feat(documents): backfill script for system roots + entity folders

Idempotent one-time backfill that runs as part of the deploy:
  1. Ensures Clients/Companies/Yachts roots per port.
  2. Copies entity FKs from completed workflows onto signed file rows
     (legacy completions ran before the auto-deposit handler shipped).
  3. Ensures per-entity subfolders for every entity with attached
     files and sets files.folder_id.

pg_advisory_xact_lock(hashtext(portId)::bigint) per port so concurrent
runs serialize. Safe to re-run; the SELECT-then-UPDATE pattern targets
only rows where folder_id IS NULL.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-11 12:19:15 +02:00
parent ae3f483cb6
commit d68d8e5a79
2 changed files with 400 additions and 0 deletions

View File

@@ -0,0 +1,175 @@
/**
* Idempotent backfill: ensure every port has the three system roots
* (Clients / Companies / Yachts), every entity with attached files
* has a per-entity subfolder, every file with entity FKs has
* `folder_id` set, and every signed file from a completed workflow
* has the workflow's entity FKs propagated onto it.
*
* Safe to re-run: all writes target only rows where the relevant
* column is NULL. Per-port `pg_advisory_xact_lock` serializes
* concurrent runs.
*
* Usage:
* pnpm tsx scripts/backfill-document-folders.ts
* pnpm tsx scripts/backfill-document-folders.ts --port <portId>
*/
import 'dotenv/config';
import { and, eq, isNotNull, isNull, sql } from 'drizzle-orm';
import { db } from '@/lib/db';
import { ports } from '@/lib/db/schema/ports';
import { files, documents } from '@/lib/db/schema/documents';
import {
ensureSystemRoots,
ensureEntityFolder,
type EntityType,
} from '@/lib/services/document-folders.service';
import { logger } from '@/lib/logger';
export interface BackfillOptions {
/** When provided, only backfill this port. Otherwise all ports. */
portId?: string;
/** User ID recorded in `created_by` for any folders created. */
systemUserId?: string;
}
/**
* One-time idempotent backfill. See module-level JSDoc for full
* description of what each step does.
*/
export async function runBackfill(opts: BackfillOptions = {}): Promise<void> {
const portRows = opts.portId
? [{ id: opts.portId }]
: await db.select({ id: ports.id }).from(ports);
const systemUser = opts.systemUserId ?? 'system-backfill';
for (const { id: portId } of portRows) {
await db.transaction(async (tx) => {
// Serialize concurrent runs on a per-port lock so two simultaneous
// backfills can't race on folder inserts.
await tx.execute(
sql`SELECT pg_advisory_xact_lock(hashtext(${portId})::bigint)`,
);
// ── Step 1: Ensure system roots exist for this port ──────────────────
await ensureSystemRoots(portId, systemUser);
// ── Step 2: Propagate entity FKs from completed workflows onto their
// signed file rows (pre-auto-deposit legacy completions). ──
const completedDocs = await tx
.select({
id: documents.id,
signedFileId: documents.signedFileId,
clientId: documents.clientId,
companyId: documents.companyId,
yachtId: documents.yachtId,
})
.from(documents)
.where(
and(
eq(documents.portId, portId),
eq(documents.status, 'completed'),
isNotNull(documents.signedFileId),
),
);
for (const d of completedDocs) {
if (!d.signedFileId) continue;
const owner: { type: EntityType; id: string } | null = d.clientId
? { type: 'client', id: d.clientId }
: d.companyId
? { type: 'company', id: d.companyId }
: d.yachtId
? { type: 'yacht', id: d.yachtId }
: null;
if (!owner) continue;
// Build the update object with ONLY the matching FK column so we
// never pass column references to .set() (Drizzle syntax bug fix).
const update =
owner.type === 'client'
? { clientId: owner.id }
: owner.type === 'company'
? { companyId: owner.id }
: { yachtId: owner.id };
const matchingFkColumn =
owner.type === 'client'
? files.clientId
: owner.type === 'company'
? files.companyId
: files.yachtId;
await tx
.update(files)
.set(update)
.where(
and(
eq(files.id, d.signedFileId),
eq(files.portId, portId),
isNull(matchingFkColumn),
),
);
}
// ── Step 3: For every file with entity FKs but no folder_id,
// create the entity subfolder and set folder_id. ──────────
const fileRows = await tx
.select()
.from(files)
.where(and(eq(files.portId, portId), isNull(files.folderId)));
for (const f of fileRows) {
const owner: { type: EntityType; id: string } | null = f.clientId
? { type: 'client', id: f.clientId }
: f.companyId
? { type: 'company', id: f.companyId }
: f.yachtId
? { type: 'yacht', id: f.yachtId }
: null;
if (!owner) continue;
try {
const folder = await ensureEntityFolder(portId, owner.type, owner.id, systemUser);
await tx
.update(files)
.set({ folderId: folder.id })
.where(and(eq(files.id, f.id), eq(files.portId, portId)));
} catch (err) {
// Best-effort: log and skip rather than abort the whole port.
logger.warn({ err, fileId: f.id, portId }, 'backfill: ensureEntityFolder failed');
}
}
});
logger.info({ portId }, 'backfill: port complete');
}
}
// ── CLI entry point ────────────────────────────────────────────────────────────
// tsx compiles TypeScript to CJS at runtime, so `require.main === module`
// is the standard guard. The test suite imports `runBackfill` as a named
// export; the CLI invocation hits this block and runs main().
async function main(): Promise<void> {
const portIdArg = process.argv.indexOf('--port');
const portId = portIdArg !== -1 ? process.argv[portIdArg + 1] : undefined;
await runBackfill({ portId });
// eslint-disable-next-line no-console
console.log('Backfill complete');
process.exit(0);
}
// eslint-disable-next-line @typescript-eslint/no-require-imports
if (require.main === module) {
main().catch((err) => {
logger.error({ err }, 'Backfill failed');
process.exit(1);
});
}

View File

@@ -0,0 +1,225 @@
/**
* Task 11 — backfill-document-folders integration tests.
*
* Five cases:
* 1. Creates system roots and entity subfolders.
* 2. Sets files.folder_id from entity FKs.
* 3. Copies entity FKs from completed workflows onto signed files.
* 4. Idempotent — second run produces the same result.
* 5. Port isolation — does not touch other ports.
*/
import { describe, it, expect, beforeAll, beforeEach } from 'vitest';
import { and, eq } from 'drizzle-orm';
import { db } from '@/lib/db';
import { documentFolders, files, documents } from '@/lib/db/schema/documents';
import { user } from '@/lib/db/schema/users';
import { runBackfill } from '../../scripts/backfill-document-folders';
import { makePort, makeClient } from '../helpers/factories';
let TEST_USER_ID = '';
beforeAll(async () => {
const [u] = await db.select({ id: user.id }).from(user).limit(1);
if (!u) throw new Error('No user available; run pnpm db:seed first');
TEST_USER_ID = u.id;
});
describe('backfill-document-folders · runBackfill', () => {
let portId: string;
beforeEach(async () => {
const port = await makePort();
portId = port.id;
// Clean up any folders left by a prior test on this port.
await db.delete(documentFolders).where(eq(documentFolders.portId, portId));
});
// ── Test 1: Creates system roots and entity subfolders ─────────────────────
it('creates the three system roots and a client entity subfolder', async () => {
const client = await makeClient({ portId });
// Insert a file linked to the client so the backfill creates the subfolder.
await db.insert(files).values({
portId,
clientId: client.id,
filename: 'test.pdf',
originalName: 'test.pdf',
storagePath: `${portId}/test.pdf`,
storageBucket: 'crm-files',
uploadedBy: TEST_USER_ID,
});
await runBackfill({ portId, systemUserId: TEST_USER_ID });
const roots = await db
.select()
.from(documentFolders)
.where(and(eq(documentFolders.portId, portId), eq(documentFolders.entityType, 'root')));
expect(roots).toHaveLength(3);
const rootNames = roots.map((r) => r.name).sort();
expect(rootNames).toEqual(['Clients', 'Companies', 'Yachts']);
const entityFolder = await db.query.documentFolders.findFirst({
where: and(
eq(documentFolders.portId, portId),
eq(documentFolders.entityType, 'client'),
eq(documentFolders.entityId, client.id),
),
});
expect(entityFolder).toBeDefined();
expect(entityFolder?.entityType).toBe('client');
expect(entityFolder?.entityId).toBe(client.id);
});
// ── Test 2: Sets files.folder_id from entity FKs ────────────────────────────
it('sets files.folder_id for files that have entity FKs but no folder_id', async () => {
const client = await makeClient({ portId });
const [fileRow] = await db
.insert(files)
.values({
portId,
clientId: client.id,
filename: 'contract.pdf',
originalName: 'contract.pdf',
storagePath: `${portId}/contract.pdf`,
storageBucket: 'crm-files',
uploadedBy: TEST_USER_ID,
// folderId intentionally omitted → null
})
.returning();
expect(fileRow!.folderId).toBeNull();
await runBackfill({ portId, systemUserId: TEST_USER_ID });
const updated = await db.query.files.findFirst({
where: and(eq(files.id, fileRow!.id), eq(files.portId, portId)),
});
expect(updated!.folderId).not.toBeNull();
// The assigned folder must be the client's entity subfolder.
const entityFolder = await db.query.documentFolders.findFirst({
where: and(
eq(documentFolders.portId, portId),
eq(documentFolders.entityType, 'client'),
eq(documentFolders.entityId, client.id),
),
});
expect(updated!.folderId).toBe(entityFolder!.id);
});
// ── Test 3: Copies entity FKs from completed workflows onto signed files ────
it('propagates entity FKs from completed workflow onto signed file and sets folder_id', async () => {
const client = await makeClient({ portId });
// File row with no entity FK (simulating a legacy completed-before-auto-deposit).
const [signedFile] = await db
.insert(files)
.values({
portId,
// No clientId set — simulates legacy completion before entity FK auto-propagation.
filename: 'signed-eoi.pdf',
originalName: 'signed-eoi.pdf',
storagePath: `${portId}/signed-eoi.pdf`,
storageBucket: 'crm-files',
uploadedBy: TEST_USER_ID,
})
.returning();
expect(signedFile!.clientId).toBeNull();
// Completed workflow document pointing at the orphaned file.
await db.insert(documents).values({
portId,
documentType: 'eoi',
title: 'EOI for client',
status: 'completed',
signedFileId: signedFile!.id,
clientId: client.id,
createdBy: TEST_USER_ID,
});
await runBackfill({ portId, systemUserId: TEST_USER_ID });
const updatedFile = await db.query.files.findFirst({
where: and(eq(files.id, signedFile!.id), eq(files.portId, portId)),
});
// The backfill should have set the clientId on the signed file.
expect(updatedFile!.clientId).toBe(client.id);
// And then assigned it to the client's entity subfolder.
expect(updatedFile!.folderId).not.toBeNull();
});
// ── Test 4: Idempotent ────────────────────────────────────────────────────────
it('is idempotent — running twice produces the same number of folder rows', async () => {
const client = await makeClient({ portId });
await db.insert(files).values({
portId,
clientId: client.id,
filename: 'idempotent.pdf',
originalName: 'idempotent.pdf',
storagePath: `${portId}/idempotent.pdf`,
storageBucket: 'crm-files',
uploadedBy: TEST_USER_ID,
});
await runBackfill({ portId, systemUserId: TEST_USER_ID });
const countAfterFirst = await db
.select()
.from(documentFolders)
.where(eq(documentFolders.portId, portId));
await runBackfill({ portId, systemUserId: TEST_USER_ID });
const countAfterSecond = await db
.select()
.from(documentFolders)
.where(eq(documentFolders.portId, portId));
expect(countAfterSecond).toHaveLength(countAfterFirst.length);
});
// ── Test 5: Port isolation ────────────────────────────────────────────────────
it('does not create folders for a different port when only portId is supplied', async () => {
const otherPort = await makePort();
await db
.delete(documentFolders)
.where(eq(documentFolders.portId, otherPort.id));
const otherClient = await makeClient({ portId: otherPort.id });
await db.insert(files).values({
portId: otherPort.id,
clientId: otherClient.id,
filename: 'other-port.pdf',
originalName: 'other-port.pdf',
storagePath: `${otherPort.id}/other-port.pdf`,
storageBucket: 'crm-files',
uploadedBy: TEST_USER_ID,
});
// Run backfill only for the main portId, NOT the otherPort.
await runBackfill({ portId, systemUserId: TEST_USER_ID });
const otherPortFolders = await db
.select()
.from(documentFolders)
.where(eq(documentFolders.portId, otherPort.id));
// The other port should have zero folders — the backfill was not run for it.
expect(otherPortFolders).toHaveLength(0);
});
});