315 lines
11 KiB
TypeScript
315 lines
11 KiB
TypeScript
|
|
/**
|
||
|
|
* Phase 2 of the legacy migration: pull signed EOI PDFs + berth spec PDFs from
|
||
|
|
* the LEGACY MinIO (`client-portal` bucket) and deposit them into the CRM's own
|
||
|
|
* storage, linking them to the already-migrated deals + berths.
|
||
|
|
*
|
||
|
|
* Two storage worlds, kept strictly separate:
|
||
|
|
* - LEGACY read : a dedicated `minio` Client using LEGACY_MINIO_* env.
|
||
|
|
* - CRM write : `getStorageBackend()` (the CRM's own configured storage).
|
||
|
|
* ⚠ We NEVER route legacy creds through getStorageBackend — that would
|
||
|
|
* write INTO prod. LEGACY_MINIO_* is distinct from the CRM's MINIO_*.
|
||
|
|
*
|
||
|
|
* Idempotent + re-runnable: an EOI is skipped once its `documents.signedFileId`
|
||
|
|
* is set; a berth is skipped once it has a `currentPdfVersionId`.
|
||
|
|
*
|
||
|
|
* Run AFTER `migrate-from-nocodb.ts --apply`:
|
||
|
|
* LEGACY_MINIO_ACCESS_KEY=… LEGACY_MINIO_SECRET_KEY=… \
|
||
|
|
* pnpm tsx scripts/migration/backfill-documents.ts --port-slug port-nimara [--dry-run]
|
||
|
|
*/
|
||
|
|
import 'dotenv/config';
|
||
|
|
import { randomUUID } from 'node:crypto';
|
||
|
|
import { Client as MinioClient } from 'minio';
|
||
|
|
import { and, eq, isNull } from 'drizzle-orm';
|
||
|
|
|
||
|
|
import { db, closeDb } from '@/lib/db';
|
||
|
|
import { ports } from '@/lib/db/schema/ports';
|
||
|
|
import { berths } from '@/lib/db/schema/berths';
|
||
|
|
import { documents, files } from '@/lib/db/schema/documents';
|
||
|
|
import { clients } from '@/lib/db/schema/clients';
|
||
|
|
import { getStorageBackend } from '@/lib/storage';
|
||
|
|
import { buildStoragePath } from '@/lib/minio';
|
||
|
|
import { ensureEntityFolder } from '@/lib/services/document-folders.service';
|
||
|
|
import { uploadBerthPdf } from '@/lib/services/berth-pdf.service';
|
||
|
|
import { normalizeName } from '@/lib/dedup/normalize';
|
||
|
|
import { SUPER_ADMIN_USER_ID } from '@/lib/db/seed-bootstrap';
|
||
|
|
|
||
|
|
const DRY = process.argv.includes('--dry-run');
|
||
|
|
const slugArg = (() => {
|
||
|
|
const i = process.argv.indexOf('--port-slug');
|
||
|
|
return i >= 0 ? (process.argv[i + 1] ?? 'port-nimara') : 'port-nimara';
|
||
|
|
})();
|
||
|
|
|
||
|
|
const LEGACY_BUCKET = process.env.LEGACY_MINIO_BUCKET ?? 'client-portal';
|
||
|
|
const legacy = new MinioClient({
|
||
|
|
endPoint: process.env.LEGACY_MINIO_ENDPOINT ?? 's3.portnimara.com',
|
||
|
|
port: 443,
|
||
|
|
useSSL: true,
|
||
|
|
accessKey: process.env.LEGACY_MINIO_ACCESS_KEY ?? '',
|
||
|
|
secretKey: process.env.LEGACY_MINIO_SECRET_KEY ?? '',
|
||
|
|
});
|
||
|
|
|
||
|
|
/** Levenshtein edit distance — conservative fuzzy name matching for legacy
|
||
|
|
* spelling/format drift (Koshbin↔Khoshbin, Costanzo↔Constanzo). */
|
||
|
|
function lev(a: string, b: string): number {
|
||
|
|
const m = a.length;
|
||
|
|
const n = b.length;
|
||
|
|
if (!m) return n;
|
||
|
|
if (!n) return m;
|
||
|
|
let prev = Array.from({ length: n + 1 }, (_, i) => i);
|
||
|
|
for (let i = 1; i <= m; i++) {
|
||
|
|
const cur = [i];
|
||
|
|
for (let j = 1; j <= n; j++) {
|
||
|
|
cur[j] = Math.min(
|
||
|
|
prev[j]! + 1,
|
||
|
|
cur[j - 1]! + 1,
|
||
|
|
prev[j - 1]! + (a[i - 1] === b[j - 1] ? 0 : 1),
|
||
|
|
);
|
||
|
|
}
|
||
|
|
prev = cur;
|
||
|
|
}
|
||
|
|
return prev[n]!;
|
||
|
|
}
|
||
|
|
|
||
|
|
function streamToBuffer(stream: NodeJS.ReadableStream): Promise<Buffer> {
|
||
|
|
return new Promise((resolve, reject) => {
|
||
|
|
const chunks: Buffer[] = [];
|
||
|
|
stream.on('data', (c: Buffer) => chunks.push(c));
|
||
|
|
stream.on('end', () => resolve(Buffer.concat(chunks)));
|
||
|
|
stream.on('error', reject);
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
interface LegacyObject {
|
||
|
|
name: string;
|
||
|
|
size: number;
|
||
|
|
}
|
||
|
|
function listLegacy(prefix: string): Promise<LegacyObject[]> {
|
||
|
|
return new Promise((resolve, reject) => {
|
||
|
|
const out: LegacyObject[] = [];
|
||
|
|
const stream = legacy.listObjectsV2(LEGACY_BUCKET, prefix, true);
|
||
|
|
stream.on('data', (o) => {
|
||
|
|
if (o.name && !o.name.endsWith('/')) out.push({ name: o.name, size: o.size ?? 0 });
|
||
|
|
});
|
||
|
|
stream.on('end', () => resolve(out));
|
||
|
|
stream.on('error', reject);
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
async function resolvePort(slug: string): Promise<{ id: string; slug: string }> {
|
||
|
|
const [p] = await db
|
||
|
|
.select({ id: ports.id, slug: ports.slug })
|
||
|
|
.from(ports)
|
||
|
|
.where(eq(ports.slug, slug))
|
||
|
|
.limit(1);
|
||
|
|
if (!p) throw new Error(`No port with slug "${slug}"`);
|
||
|
|
return p;
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─── Berth PDFs ──────────────────────────────────────────────────────────────
|
||
|
|
// client-portal/Berth-PDFs/<ts>-Berth_Spec_Sheet_<Mooring>.pdf → berth by mooring.
|
||
|
|
async function backfillBerthPdfs(port: { id: string; slug: string }) {
|
||
|
|
const objs = (await listLegacy('Berth-PDFs/')).filter((o) => /\.pdf$/i.test(o.name));
|
||
|
|
const berthRows = await db
|
||
|
|
.select({ id: berths.id, mooring: berths.mooringNumber, cur: berths.currentPdfVersionId })
|
||
|
|
.from(berths)
|
||
|
|
.where(eq(berths.portId, port.id));
|
||
|
|
const byMooring = new Map(berthRows.map((b) => [b.mooring, b]));
|
||
|
|
|
||
|
|
let attached = 0;
|
||
|
|
let skipped = 0;
|
||
|
|
let unmatched = 0;
|
||
|
|
for (const o of objs) {
|
||
|
|
const m = o.name.match(/Berth_Spec_Sheet_([A-Za-z]+\d+)\.pdf$/i);
|
||
|
|
if (!m) {
|
||
|
|
unmatched++;
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
const mooring = `${m[1]!.replace(/[a-z]+/g, (s) => s.toUpperCase())}`
|
||
|
|
.toUpperCase()
|
||
|
|
.replace(/([A-Z]+)0*(\d+)/, '$1$2');
|
||
|
|
const berth = byMooring.get(mooring);
|
||
|
|
if (!berth) {
|
||
|
|
console.log(` [berth] no berth for mooring "${mooring}" (${o.name})`);
|
||
|
|
unmatched++;
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
if (berth.cur) {
|
||
|
|
skipped++;
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
if (DRY) {
|
||
|
|
attached++;
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
const buf = await streamToBuffer(await legacy.getObject(LEGACY_BUCKET, o.name));
|
||
|
|
await uploadBerthPdf({
|
||
|
|
berthId: berth.id,
|
||
|
|
portId: port.id,
|
||
|
|
buffer: buf,
|
||
|
|
fileName: o.name.split('/').pop() ?? `${mooring}.pdf`,
|
||
|
|
uploadedBy: SUPER_ADMIN_USER_ID,
|
||
|
|
});
|
||
|
|
attached++;
|
||
|
|
}
|
||
|
|
return { total: objs.length, attached, skipped, unmatched };
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─── Signed EOIs ─────────────────────────────────────────────────────────────
|
||
|
|
// client-portal/EOIs/<Client Name>/<file>.pdf → match by normalized client name.
|
||
|
|
async function backfillEois(port: { id: string; slug: string }) {
|
||
|
|
// Signed EOIs live under EOIs/<Name>/ and (some) under Client Documents/<Name>/.
|
||
|
|
const objs = [...(await listLegacy('EOIs/')), ...(await listLegacy('Client Documents/'))].filter(
|
||
|
|
(o) => /\.pdf$/i.test(o.name) && /eoi|sign/i.test(o.name),
|
||
|
|
);
|
||
|
|
// Index the best signed PDF per normalized folder (client) name.
|
||
|
|
const byName = new Map<string, { key: string; size: number }>();
|
||
|
|
for (const o of objs) {
|
||
|
|
const parts = o.name.split('/'); // <prefix> / <Name> / <file>.pdf
|
||
|
|
if (parts.length < 3) continue;
|
||
|
|
const folder = (parts[1] ?? '').replace(/_/g, ' '); // "Matt_Ciaccio" → "Matt Ciaccio"
|
||
|
|
const norm = normalizeName(folder).display;
|
||
|
|
if (!norm) continue;
|
||
|
|
const isSigned = /sign/i.test(o.name);
|
||
|
|
const prev = byName.get(norm);
|
||
|
|
// Prefer a "signed" file; among those, the largest (the full signed PDF).
|
||
|
|
if (!prev || (isSigned && o.size > prev.size)) byName.set(norm, { key: o.name, size: o.size });
|
||
|
|
}
|
||
|
|
|
||
|
|
// Migrated EOI documents missing a signed file.
|
||
|
|
const docRows = await db
|
||
|
|
.select({ id: documents.id, interestId: documents.interestId, clientId: documents.clientId })
|
||
|
|
.from(documents)
|
||
|
|
.where(
|
||
|
|
and(
|
||
|
|
eq(documents.portId, port.id),
|
||
|
|
eq(documents.documentType, 'eoi'),
|
||
|
|
isNull(documents.signedFileId),
|
||
|
|
),
|
||
|
|
);
|
||
|
|
|
||
|
|
const backend = await getStorageBackend();
|
||
|
|
let attached = 0;
|
||
|
|
let unmatched = 0;
|
||
|
|
const unresolved: string[] = [];
|
||
|
|
for (const doc of docRows) {
|
||
|
|
const clientId = doc.clientId;
|
||
|
|
if (!clientId) {
|
||
|
|
unmatched++;
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
const [c] = await db
|
||
|
|
.select({ name: clients.fullName })
|
||
|
|
.from(clients)
|
||
|
|
.where(eq(clients.id, clientId))
|
||
|
|
.limit(1);
|
||
|
|
if (!c) {
|
||
|
|
unmatched++;
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
const target = normalizeName(c.name).display;
|
||
|
|
let match = byName.get(target);
|
||
|
|
if (!match && target.length >= 6) {
|
||
|
|
// Conservative fuzzy fallback: best edit-distance ≤ 2 on the full name.
|
||
|
|
let bestDist = 3;
|
||
|
|
for (const [name, v] of byName) {
|
||
|
|
const d = lev(name, target);
|
||
|
|
if (d < bestDist) {
|
||
|
|
bestDist = d;
|
||
|
|
match = v;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if (!match) {
|
||
|
|
unresolved.push(c.name);
|
||
|
|
unmatched++;
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
if (DRY) {
|
||
|
|
attached++;
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
// Pull legacy bytes → write to CRM storage → files row → link signedFileId.
|
||
|
|
const buf = await streamToBuffer(await legacy.getObject(LEGACY_BUCKET, match.key));
|
||
|
|
const key = buildStoragePath(port.slug, 'eoi-signed', doc.id, randomUUID(), 'pdf');
|
||
|
|
const putRes = await backend.put(key, buf, {
|
||
|
|
contentType: 'application/pdf',
|
||
|
|
sizeBytes: buf.length,
|
||
|
|
});
|
||
|
|
// File into the client's entity folder (mirrors handleDocumentCompleted's
|
||
|
|
// owner-folder filing). files.interestId still scopes the row to the deal;
|
||
|
|
// interest "Deal" folders aren't system-managed (chk_system_folder_shape).
|
||
|
|
const folder = await ensureEntityFolder(port.id, 'client', clientId, SUPER_ADMIN_USER_ID);
|
||
|
|
const fileName = match.key.split('/').pop() ?? 'eoi-signed.pdf';
|
||
|
|
await db.transaction(async (tx) => {
|
||
|
|
const [f] = await tx
|
||
|
|
.insert(files)
|
||
|
|
.values({
|
||
|
|
portId: port.id,
|
||
|
|
filename: fileName,
|
||
|
|
originalName: fileName,
|
||
|
|
storagePath: putRes.key,
|
||
|
|
mimeType: 'application/pdf',
|
||
|
|
sizeBytes: String(putRes.sizeBytes),
|
||
|
|
category: 'eoi',
|
||
|
|
folderId: folder.id,
|
||
|
|
clientId,
|
||
|
|
interestId: doc.interestId,
|
||
|
|
uploadedBy: 'system',
|
||
|
|
})
|
||
|
|
.returning({ id: files.id });
|
||
|
|
if (!f) throw new Error('files insert returned no row');
|
||
|
|
await tx
|
||
|
|
.update(documents)
|
||
|
|
.set({ signedFileId: f.id, status: 'completed', isManualUpload: true })
|
||
|
|
.where(eq(documents.id, doc.id));
|
||
|
|
});
|
||
|
|
attached++;
|
||
|
|
}
|
||
|
|
return {
|
||
|
|
totalBlobs: objs.length,
|
||
|
|
indexedClients: byName.size,
|
||
|
|
candidates: docRows.length,
|
||
|
|
attached,
|
||
|
|
unmatched,
|
||
|
|
unresolved,
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
async function main() {
|
||
|
|
if (!process.env.LEGACY_MINIO_ACCESS_KEY || !process.env.LEGACY_MINIO_SECRET_KEY) {
|
||
|
|
console.error(
|
||
|
|
'Set LEGACY_MINIO_ACCESS_KEY + LEGACY_MINIO_SECRET_KEY (legacy MinIO read creds).',
|
||
|
|
);
|
||
|
|
process.exit(1);
|
||
|
|
}
|
||
|
|
const port = await resolvePort(slugArg);
|
||
|
|
console.log(
|
||
|
|
`[backfill] port=${port.slug} legacy-bucket=${LEGACY_BUCKET} ${DRY ? '(DRY RUN)' : ''}`,
|
||
|
|
);
|
||
|
|
|
||
|
|
console.log('[backfill] Berth PDFs…');
|
||
|
|
const berthRes = await backfillBerthPdfs(port);
|
||
|
|
console.log(
|
||
|
|
` berth PDFs: ${berthRes.total} blobs → ${berthRes.attached} attached, ${berthRes.skipped} already had one, ${berthRes.unmatched} unmatched`,
|
||
|
|
);
|
||
|
|
|
||
|
|
console.log('[backfill] Signed EOIs…');
|
||
|
|
const eoiRes = await backfillEois(port);
|
||
|
|
console.log(
|
||
|
|
` EOIs: ${eoiRes.totalBlobs} blobs (${eoiRes.indexedClients} client folders) · ${eoiRes.candidates} migrated EOI docs needing a file → ${eoiRes.attached} attached, ${eoiRes.unmatched} unmatched`,
|
||
|
|
);
|
||
|
|
if (eoiRes.unresolved.length > 0) {
|
||
|
|
console.log(` ⚠ EOI docs with no name-matched legacy PDF (${eoiRes.unresolved.length}):`);
|
||
|
|
for (const n of eoiRes.unresolved.slice(0, 25)) console.log(` - ${n}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
await closeDb();
|
||
|
|
process.exit(0);
|
||
|
|
}
|
||
|
|
|
||
|
|
main().catch(async (err) => {
|
||
|
|
console.error('[backfill] failed:', err);
|
||
|
|
await closeDb().catch(() => {});
|
||
|
|
process.exit(1);
|
||
|
|
});
|