feat(migration): document backfill — legacy MinIO → CRM storage (Phase 2)
backfill-documents.ts pulls signed EOI PDFs + berth spec PDFs from the legacy MinIO (client-portal bucket; read-only via dedicated LEGACY_MINIO_* creds) and deposits them into the CRM (getStorageBackend), linking: - berth PDFs → berth_pdf_versions + berths.current_pdf_version_id (mooring from filename; 113/113 matched) - signed EOIs → documents.signed_file_id + status=completed + a files row filed into the client folder (exact name + conservative lev<=2 fuzzy; 33 linked) Idempotent (skips when signedFileId / current_pdf_version_id already set). Strictly prod-READ-only; all writes local (dev storage_backend=filesystem). Unmatched EOIs reported (mostly in-flight deals w/ no signed PDF yet + old-LOI docs in the NocoDB attachment bucket). Adds probe-minio.ts (read-only bucket inventory). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
314
scripts/migration/backfill-documents.ts
Normal file
314
scripts/migration/backfill-documents.ts
Normal file
@@ -0,0 +1,314 @@
|
||||
/**
|
||||
* Phase 2 of the legacy migration: pull signed EOI PDFs + berth spec PDFs from
|
||||
* the LEGACY MinIO (`client-portal` bucket) and deposit them into the CRM's own
|
||||
* storage, linking them to the already-migrated deals + berths.
|
||||
*
|
||||
* Two storage worlds, kept strictly separate:
|
||||
* - LEGACY read : a dedicated `minio` Client using LEGACY_MINIO_* env.
|
||||
* - CRM write : `getStorageBackend()` (the CRM's own configured storage).
|
||||
* ⚠ We NEVER route legacy creds through getStorageBackend — that would
|
||||
* write INTO prod. LEGACY_MINIO_* is distinct from the CRM's MINIO_*.
|
||||
*
|
||||
* Idempotent + re-runnable: an EOI is skipped once its `documents.signedFileId`
|
||||
* is set; a berth is skipped once it has a `currentPdfVersionId`.
|
||||
*
|
||||
* Run AFTER `migrate-from-nocodb.ts --apply`:
|
||||
* LEGACY_MINIO_ACCESS_KEY=… LEGACY_MINIO_SECRET_KEY=… \
|
||||
* pnpm tsx scripts/migration/backfill-documents.ts --port-slug port-nimara [--dry-run]
|
||||
*/
|
||||
import 'dotenv/config';
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import { Client as MinioClient } from 'minio';
|
||||
import { and, eq, isNull } from 'drizzle-orm';
|
||||
|
||||
import { db, closeDb } from '@/lib/db';
|
||||
import { ports } from '@/lib/db/schema/ports';
|
||||
import { berths } from '@/lib/db/schema/berths';
|
||||
import { documents, files } from '@/lib/db/schema/documents';
|
||||
import { clients } from '@/lib/db/schema/clients';
|
||||
import { getStorageBackend } from '@/lib/storage';
|
||||
import { buildStoragePath } from '@/lib/minio';
|
||||
import { ensureEntityFolder } from '@/lib/services/document-folders.service';
|
||||
import { uploadBerthPdf } from '@/lib/services/berth-pdf.service';
|
||||
import { normalizeName } from '@/lib/dedup/normalize';
|
||||
import { SUPER_ADMIN_USER_ID } from '@/lib/db/seed-bootstrap';
|
||||
|
||||
const DRY = process.argv.includes('--dry-run');
|
||||
const slugArg = (() => {
|
||||
const i = process.argv.indexOf('--port-slug');
|
||||
return i >= 0 ? (process.argv[i + 1] ?? 'port-nimara') : 'port-nimara';
|
||||
})();
|
||||
|
||||
const LEGACY_BUCKET = process.env.LEGACY_MINIO_BUCKET ?? 'client-portal';
|
||||
const legacy = new MinioClient({
|
||||
endPoint: process.env.LEGACY_MINIO_ENDPOINT ?? 's3.portnimara.com',
|
||||
port: 443,
|
||||
useSSL: true,
|
||||
accessKey: process.env.LEGACY_MINIO_ACCESS_KEY ?? '',
|
||||
secretKey: process.env.LEGACY_MINIO_SECRET_KEY ?? '',
|
||||
});
|
||||
|
||||
/** Levenshtein edit distance — conservative fuzzy name matching for legacy
|
||||
* spelling/format drift (Koshbin↔Khoshbin, Costanzo↔Constanzo). */
|
||||
function lev(a: string, b: string): number {
|
||||
const m = a.length;
|
||||
const n = b.length;
|
||||
if (!m) return n;
|
||||
if (!n) return m;
|
||||
let prev = Array.from({ length: n + 1 }, (_, i) => i);
|
||||
for (let i = 1; i <= m; i++) {
|
||||
const cur = [i];
|
||||
for (let j = 1; j <= n; j++) {
|
||||
cur[j] = Math.min(
|
||||
prev[j]! + 1,
|
||||
cur[j - 1]! + 1,
|
||||
prev[j - 1]! + (a[i - 1] === b[j - 1] ? 0 : 1),
|
||||
);
|
||||
}
|
||||
prev = cur;
|
||||
}
|
||||
return prev[n]!;
|
||||
}
|
||||
|
||||
function streamToBuffer(stream: NodeJS.ReadableStream): Promise<Buffer> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const chunks: Buffer[] = [];
|
||||
stream.on('data', (c: Buffer) => chunks.push(c));
|
||||
stream.on('end', () => resolve(Buffer.concat(chunks)));
|
||||
stream.on('error', reject);
|
||||
});
|
||||
}
|
||||
|
||||
interface LegacyObject {
|
||||
name: string;
|
||||
size: number;
|
||||
}
|
||||
function listLegacy(prefix: string): Promise<LegacyObject[]> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const out: LegacyObject[] = [];
|
||||
const stream = legacy.listObjectsV2(LEGACY_BUCKET, prefix, true);
|
||||
stream.on('data', (o) => {
|
||||
if (o.name && !o.name.endsWith('/')) out.push({ name: o.name, size: o.size ?? 0 });
|
||||
});
|
||||
stream.on('end', () => resolve(out));
|
||||
stream.on('error', reject);
|
||||
});
|
||||
}
|
||||
|
||||
async function resolvePort(slug: string): Promise<{ id: string; slug: string }> {
|
||||
const [p] = await db
|
||||
.select({ id: ports.id, slug: ports.slug })
|
||||
.from(ports)
|
||||
.where(eq(ports.slug, slug))
|
||||
.limit(1);
|
||||
if (!p) throw new Error(`No port with slug "${slug}"`);
|
||||
return p;
|
||||
}
|
||||
|
||||
// ─── Berth PDFs ──────────────────────────────────────────────────────────────
|
||||
// client-portal/Berth-PDFs/<ts>-Berth_Spec_Sheet_<Mooring>.pdf → berth by mooring.
|
||||
async function backfillBerthPdfs(port: { id: string; slug: string }) {
|
||||
const objs = (await listLegacy('Berth-PDFs/')).filter((o) => /\.pdf$/i.test(o.name));
|
||||
const berthRows = await db
|
||||
.select({ id: berths.id, mooring: berths.mooringNumber, cur: berths.currentPdfVersionId })
|
||||
.from(berths)
|
||||
.where(eq(berths.portId, port.id));
|
||||
const byMooring = new Map(berthRows.map((b) => [b.mooring, b]));
|
||||
|
||||
let attached = 0;
|
||||
let skipped = 0;
|
||||
let unmatched = 0;
|
||||
for (const o of objs) {
|
||||
const m = o.name.match(/Berth_Spec_Sheet_([A-Za-z]+\d+)\.pdf$/i);
|
||||
if (!m) {
|
||||
unmatched++;
|
||||
continue;
|
||||
}
|
||||
const mooring = `${m[1]!.replace(/[a-z]+/g, (s) => s.toUpperCase())}`
|
||||
.toUpperCase()
|
||||
.replace(/([A-Z]+)0*(\d+)/, '$1$2');
|
||||
const berth = byMooring.get(mooring);
|
||||
if (!berth) {
|
||||
console.log(` [berth] no berth for mooring "${mooring}" (${o.name})`);
|
||||
unmatched++;
|
||||
continue;
|
||||
}
|
||||
if (berth.cur) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
if (DRY) {
|
||||
attached++;
|
||||
continue;
|
||||
}
|
||||
const buf = await streamToBuffer(await legacy.getObject(LEGACY_BUCKET, o.name));
|
||||
await uploadBerthPdf({
|
||||
berthId: berth.id,
|
||||
portId: port.id,
|
||||
buffer: buf,
|
||||
fileName: o.name.split('/').pop() ?? `${mooring}.pdf`,
|
||||
uploadedBy: SUPER_ADMIN_USER_ID,
|
||||
});
|
||||
attached++;
|
||||
}
|
||||
return { total: objs.length, attached, skipped, unmatched };
|
||||
}
|
||||
|
||||
// ─── Signed EOIs ─────────────────────────────────────────────────────────────
|
||||
// client-portal/EOIs/<Client Name>/<file>.pdf → match by normalized client name.
|
||||
async function backfillEois(port: { id: string; slug: string }) {
|
||||
// Signed EOIs live under EOIs/<Name>/ and (some) under Client Documents/<Name>/.
|
||||
const objs = [...(await listLegacy('EOIs/')), ...(await listLegacy('Client Documents/'))].filter(
|
||||
(o) => /\.pdf$/i.test(o.name) && /eoi|sign/i.test(o.name),
|
||||
);
|
||||
// Index the best signed PDF per normalized folder (client) name.
|
||||
const byName = new Map<string, { key: string; size: number }>();
|
||||
for (const o of objs) {
|
||||
const parts = o.name.split('/'); // <prefix> / <Name> / <file>.pdf
|
||||
if (parts.length < 3) continue;
|
||||
const folder = (parts[1] ?? '').replace(/_/g, ' '); // "Matt_Ciaccio" → "Matt Ciaccio"
|
||||
const norm = normalizeName(folder).display;
|
||||
if (!norm) continue;
|
||||
const isSigned = /sign/i.test(o.name);
|
||||
const prev = byName.get(norm);
|
||||
// Prefer a "signed" file; among those, the largest (the full signed PDF).
|
||||
if (!prev || (isSigned && o.size > prev.size)) byName.set(norm, { key: o.name, size: o.size });
|
||||
}
|
||||
|
||||
// Migrated EOI documents missing a signed file.
|
||||
const docRows = await db
|
||||
.select({ id: documents.id, interestId: documents.interestId, clientId: documents.clientId })
|
||||
.from(documents)
|
||||
.where(
|
||||
and(
|
||||
eq(documents.portId, port.id),
|
||||
eq(documents.documentType, 'eoi'),
|
||||
isNull(documents.signedFileId),
|
||||
),
|
||||
);
|
||||
|
||||
const backend = await getStorageBackend();
|
||||
let attached = 0;
|
||||
let unmatched = 0;
|
||||
const unresolved: string[] = [];
|
||||
for (const doc of docRows) {
|
||||
const clientId = doc.clientId;
|
||||
if (!clientId) {
|
||||
unmatched++;
|
||||
continue;
|
||||
}
|
||||
const [c] = await db
|
||||
.select({ name: clients.fullName })
|
||||
.from(clients)
|
||||
.where(eq(clients.id, clientId))
|
||||
.limit(1);
|
||||
if (!c) {
|
||||
unmatched++;
|
||||
continue;
|
||||
}
|
||||
const target = normalizeName(c.name).display;
|
||||
let match = byName.get(target);
|
||||
if (!match && target.length >= 6) {
|
||||
// Conservative fuzzy fallback: best edit-distance ≤ 2 on the full name.
|
||||
let bestDist = 3;
|
||||
for (const [name, v] of byName) {
|
||||
const d = lev(name, target);
|
||||
if (d < bestDist) {
|
||||
bestDist = d;
|
||||
match = v;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!match) {
|
||||
unresolved.push(c.name);
|
||||
unmatched++;
|
||||
continue;
|
||||
}
|
||||
if (DRY) {
|
||||
attached++;
|
||||
continue;
|
||||
}
|
||||
// Pull legacy bytes → write to CRM storage → files row → link signedFileId.
|
||||
const buf = await streamToBuffer(await legacy.getObject(LEGACY_BUCKET, match.key));
|
||||
const key = buildStoragePath(port.slug, 'eoi-signed', doc.id, randomUUID(), 'pdf');
|
||||
const putRes = await backend.put(key, buf, {
|
||||
contentType: 'application/pdf',
|
||||
sizeBytes: buf.length,
|
||||
});
|
||||
// File into the client's entity folder (mirrors handleDocumentCompleted's
|
||||
// owner-folder filing). files.interestId still scopes the row to the deal;
|
||||
// interest "Deal" folders aren't system-managed (chk_system_folder_shape).
|
||||
const folder = await ensureEntityFolder(port.id, 'client', clientId, SUPER_ADMIN_USER_ID);
|
||||
const fileName = match.key.split('/').pop() ?? 'eoi-signed.pdf';
|
||||
await db.transaction(async (tx) => {
|
||||
const [f] = await tx
|
||||
.insert(files)
|
||||
.values({
|
||||
portId: port.id,
|
||||
filename: fileName,
|
||||
originalName: fileName,
|
||||
storagePath: putRes.key,
|
||||
mimeType: 'application/pdf',
|
||||
sizeBytes: String(putRes.sizeBytes),
|
||||
category: 'eoi',
|
||||
folderId: folder.id,
|
||||
clientId,
|
||||
interestId: doc.interestId,
|
||||
uploadedBy: 'system',
|
||||
})
|
||||
.returning({ id: files.id });
|
||||
if (!f) throw new Error('files insert returned no row');
|
||||
await tx
|
||||
.update(documents)
|
||||
.set({ signedFileId: f.id, status: 'completed', isManualUpload: true })
|
||||
.where(eq(documents.id, doc.id));
|
||||
});
|
||||
attached++;
|
||||
}
|
||||
return {
|
||||
totalBlobs: objs.length,
|
||||
indexedClients: byName.size,
|
||||
candidates: docRows.length,
|
||||
attached,
|
||||
unmatched,
|
||||
unresolved,
|
||||
};
|
||||
}
|
||||
|
||||
async function main() {
|
||||
if (!process.env.LEGACY_MINIO_ACCESS_KEY || !process.env.LEGACY_MINIO_SECRET_KEY) {
|
||||
console.error(
|
||||
'Set LEGACY_MINIO_ACCESS_KEY + LEGACY_MINIO_SECRET_KEY (legacy MinIO read creds).',
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
const port = await resolvePort(slugArg);
|
||||
console.log(
|
||||
`[backfill] port=${port.slug} legacy-bucket=${LEGACY_BUCKET} ${DRY ? '(DRY RUN)' : ''}`,
|
||||
);
|
||||
|
||||
console.log('[backfill] Berth PDFs…');
|
||||
const berthRes = await backfillBerthPdfs(port);
|
||||
console.log(
|
||||
` berth PDFs: ${berthRes.total} blobs → ${berthRes.attached} attached, ${berthRes.skipped} already had one, ${berthRes.unmatched} unmatched`,
|
||||
);
|
||||
|
||||
console.log('[backfill] Signed EOIs…');
|
||||
const eoiRes = await backfillEois(port);
|
||||
console.log(
|
||||
` EOIs: ${eoiRes.totalBlobs} blobs (${eoiRes.indexedClients} client folders) · ${eoiRes.candidates} migrated EOI docs needing a file → ${eoiRes.attached} attached, ${eoiRes.unmatched} unmatched`,
|
||||
);
|
||||
if (eoiRes.unresolved.length > 0) {
|
||||
console.log(` ⚠ EOI docs with no name-matched legacy PDF (${eoiRes.unresolved.length}):`);
|
||||
for (const n of eoiRes.unresolved.slice(0, 25)) console.log(` - ${n}`);
|
||||
}
|
||||
|
||||
await closeDb();
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
main().catch(async (err) => {
|
||||
console.error('[backfill] failed:', err);
|
||||
await closeDb().catch(() => {});
|
||||
process.exit(1);
|
||||
});
|
||||
102
scripts/migration/probe-minio.ts
Normal file
102
scripts/migration/probe-minio.ts
Normal file
@@ -0,0 +1,102 @@
|
||||
/**
|
||||
* Read-only MinIO inventory for the legacy → new-CRM migration (Phase 2 sizing).
|
||||
*
|
||||
* Lists every bucket the creds can see, then for the document buckets
|
||||
* (`client-portal`, `signatures`) groups objects by top-level prefix with
|
||||
* counts + sizes + samples — so we can see exactly where the EOIs, berth
|
||||
* PDFs, receipts and business-card images live before backfilling them.
|
||||
*
|
||||
* Secret-free: reads creds from env. Run with:
|
||||
* MINIO_ACCESS_KEY=... MINIO_SECRET_KEY=... \
|
||||
* pnpm tsx scripts/migration/probe-minio.ts
|
||||
*
|
||||
* Strictly read-only (listBuckets + listObjectsV2). No writes.
|
||||
*/
|
||||
import { Client } from 'minio';
|
||||
|
||||
const endPoint = process.env.MINIO_ENDPOINT || 's3.portnimara.com';
|
||||
const accessKey = process.env.MINIO_ACCESS_KEY;
|
||||
const secretKey = process.env.MINIO_SECRET_KEY;
|
||||
|
||||
if (!accessKey || !secretKey) {
|
||||
console.error('Set MINIO_ACCESS_KEY and MINIO_SECRET_KEY');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const client = new Client({ endPoint, port: 443, useSSL: true, accessKey, secretKey });
|
||||
|
||||
interface PrefixStat {
|
||||
count: number;
|
||||
bytes: number;
|
||||
samples: string[];
|
||||
}
|
||||
|
||||
async function inventory(bucket: string) {
|
||||
const byPrefix = new Map<string, PrefixStat>();
|
||||
let total = 0;
|
||||
let totalBytes = 0;
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const stream = client.listObjectsV2(bucket, '', true);
|
||||
stream.on('data', (o) => {
|
||||
if (!o.name) return;
|
||||
total++;
|
||||
totalBytes += o.size || 0;
|
||||
const top = o.name.includes('/') ? o.name.split('/')[0] + '/' : '(root)';
|
||||
const e = byPrefix.get(top) || { count: 0, bytes: 0, samples: [] };
|
||||
e.count++;
|
||||
e.bytes += o.size || 0;
|
||||
if (e.samples.length < 4) e.samples.push(`${o.name} (${o.size}b)`);
|
||||
byPrefix.set(top, e);
|
||||
});
|
||||
stream.on('end', () => resolve());
|
||||
stream.on('error', reject);
|
||||
});
|
||||
return { bucket, total, totalBytes, byPrefix };
|
||||
}
|
||||
|
||||
const mb = (b: number) => (b / 1e6).toFixed(1);
|
||||
|
||||
async function main() {
|
||||
console.log(`MinIO @ ${endPoint}\n`);
|
||||
|
||||
let buckets: string[] = [];
|
||||
try {
|
||||
const list = await client.listBuckets();
|
||||
buckets = list.map((b) => b.name);
|
||||
console.log('=== all buckets visible to these creds ===');
|
||||
for (const b of list) console.log(` ${b.name}`);
|
||||
} catch (err) {
|
||||
console.log(`listBuckets failed: ${(err as Error).message}`);
|
||||
}
|
||||
|
||||
const targets = (process.env.MINIO_BUCKETS || 'client-portal,signatures')
|
||||
.split(',')
|
||||
.map((s) => s.trim());
|
||||
|
||||
for (const bucket of targets) {
|
||||
if (buckets.length && !buckets.includes(bucket)) {
|
||||
console.log(`\n=== bucket: ${bucket} — NOT VISIBLE to these creds ===`);
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
const inv = await inventory(bucket);
|
||||
console.log(
|
||||
`\n=== bucket: ${inv.bucket} — ${inv.total} objects, ${mb(inv.totalBytes)} MB ===`,
|
||||
);
|
||||
const rows = [...inv.byPrefix.entries()].sort((a, z) => z[1].count - a[1].count);
|
||||
for (const [prefix, e] of rows) {
|
||||
console.log(
|
||||
` ${prefix.padEnd(30)} ${String(e.count).padStart(5)} obj ${mb(e.bytes).padStart(8)} MB`,
|
||||
);
|
||||
for (const s of e.samples) console.log(` e.g. ${s}`);
|
||||
}
|
||||
} catch (err) {
|
||||
console.log(`\n=== bucket: ${bucket} — ERROR: ${(err as Error).message} ===`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error('probe-minio failed:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user