Files
pn-new-crm/scripts/backfill-document-folders.ts
Matt e5e2e68e5d fix(documents): backfill CLI --port arg guard
--port without a value (or with a --flag value) previously silently
fell back to all-ports mode because process.argv[indexOf+1] was
undefined. Now exits 1 with an explicit error. Hardens the script
before it gets wired into deploy in Task 17.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-11 12:25:22 +02:00

175 lines
6.0 KiB
TypeScript

/**
* Idempotent backfill: ensure every port has the three system roots
* (Clients / Companies / Yachts), every entity with attached files
* has a per-entity subfolder, every file with entity FKs has
* `folder_id` set, and every signed file from a completed workflow
* has the workflow's entity FKs propagated onto it.
*
* Safe to re-run: all writes target only rows where the relevant
* column is NULL. Per-port `pg_advisory_xact_lock` serializes
* concurrent runs.
*
* Usage:
* pnpm tsx scripts/backfill-document-folders.ts
* pnpm tsx scripts/backfill-document-folders.ts --port <portId>
*/
import 'dotenv/config';
import { and, eq, isNotNull, isNull, sql } from 'drizzle-orm';
import { db } from '@/lib/db';
import { ports } from '@/lib/db/schema/ports';
import { files, documents } from '@/lib/db/schema/documents';
import {
ensureSystemRoots,
ensureEntityFolder,
type EntityType,
} from '@/lib/services/document-folders.service';
import { logger } from '@/lib/logger';
export interface BackfillOptions {
/** When provided, only backfill this port. Otherwise all ports. */
portId?: string;
/** User ID recorded in `created_by` for any folders created. */
systemUserId?: string;
}
/**
* One-time idempotent backfill. See module-level JSDoc for full
* description of what each step does.
*/
export async function runBackfill(opts: BackfillOptions = {}): Promise<void> {
const portRows = opts.portId
? [{ id: opts.portId }]
: await db.select({ id: ports.id }).from(ports);
const systemUser = opts.systemUserId ?? 'system-backfill';
for (const { id: portId } of portRows) {
await db.transaction(async (tx) => {
// Serialize concurrent runs on a per-port lock so two simultaneous
// backfills can't race on folder inserts.
await tx.execute(sql`SELECT pg_advisory_xact_lock(hashtext(${portId})::bigint)`);
// ── Step 1: Ensure system roots exist for this port ──────────────────
await ensureSystemRoots(portId, systemUser);
// ── Step 2: Propagate entity FKs from completed workflows onto their
// signed file rows (pre-auto-deposit legacy completions). ──
const completedDocs = await tx
.select({
id: documents.id,
signedFileId: documents.signedFileId,
clientId: documents.clientId,
companyId: documents.companyId,
yachtId: documents.yachtId,
})
.from(documents)
.where(
and(
eq(documents.portId, portId),
eq(documents.status, 'completed'),
isNotNull(documents.signedFileId),
),
);
for (const d of completedDocs) {
if (!d.signedFileId) continue;
const owner: { type: EntityType; id: string } | null = d.clientId
? { type: 'client', id: d.clientId }
: d.companyId
? { type: 'company', id: d.companyId }
: d.yachtId
? { type: 'yacht', id: d.yachtId }
: null;
if (!owner) continue;
// Build the update object with ONLY the matching FK column so we
// never pass column references to .set() (Drizzle syntax bug fix).
const update =
owner.type === 'client'
? { clientId: owner.id }
: owner.type === 'company'
? { companyId: owner.id }
: { yachtId: owner.id };
const matchingFkColumn =
owner.type === 'client'
? files.clientId
: owner.type === 'company'
? files.companyId
: files.yachtId;
await tx
.update(files)
.set(update)
.where(
and(eq(files.id, d.signedFileId), eq(files.portId, portId), isNull(matchingFkColumn)),
);
}
// ── Step 3: For every file with entity FKs but no folder_id,
// create the entity subfolder and set folder_id. ──────────
const fileRows = await tx
.select()
.from(files)
.where(and(eq(files.portId, portId), isNull(files.folderId)));
for (const f of fileRows) {
const owner: { type: EntityType; id: string } | null = f.clientId
? { type: 'client', id: f.clientId }
: f.companyId
? { type: 'company', id: f.companyId }
: f.yachtId
? { type: 'yacht', id: f.yachtId }
: null;
if (!owner) continue;
try {
const folder = await ensureEntityFolder(portId, owner.type, owner.id, systemUser);
await tx
.update(files)
.set({ folderId: folder.id })
.where(and(eq(files.id, f.id), eq(files.portId, portId)));
} catch (err) {
// Best-effort: log and skip rather than abort the whole port.
logger.warn({ err, fileId: f.id, portId }, 'backfill: ensureEntityFolder failed');
}
}
});
logger.info({ portId }, 'backfill: port complete');
}
}
// ── CLI entry point ────────────────────────────────────────────────────────────
// tsx compiles TypeScript to CJS at runtime, so `require.main === module`
// is the standard guard. The test suite imports `runBackfill` as a named
// export; the CLI invocation hits this block and runs main().
if (require.main === module) {
const portIdArg = process.argv.indexOf('--port');
let portId: string | undefined;
if (portIdArg !== -1) {
const next = process.argv[portIdArg + 1];
if (!next || next.startsWith('--')) {
logger.error('--port requires a value');
process.exit(1);
}
portId = next;
}
runBackfill({ portId })
.then(() => {
// eslint-disable-next-line no-console
console.log('Backfill complete');
process.exit(0);
})
.catch((err) => {
logger.error({ err }, 'Backfill failed');
process.exit(1);
});
}