319 lines
11 KiB
TypeScript
319 lines
11 KiB
TypeScript
|
|
/**
|
||
|
|
* Storage backend migration core. The CLI in `scripts/migrate-storage.ts` and
|
||
|
|
* the admin API at `/api/v1/admin/storage/migrate` both call `runMigration()`
|
||
|
|
* here, so behaviour is identical regardless of trigger.
|
||
|
|
*
|
||
|
|
* See docs/berth-recommender-and-pdf-plan.md §4.7a + §14.9a for the contract.
|
||
|
|
*/
|
||
|
|
|
||
|
|
import { createHash } from 'node:crypto';
|
||
|
|
import { statfs } from 'node:fs/promises';
|
||
|
|
import { Readable } from 'node:stream';
|
||
|
|
|
||
|
|
import { and, eq, isNull, sql } from 'drizzle-orm';
|
||
|
|
|
||
|
|
import { db } from '@/lib/db';
|
||
|
|
import { systemSettings } from '@/lib/db/schema/system';
|
||
|
|
|
||
|
|
import { FilesystemBackend } from './filesystem';
|
||
|
|
import { resetStorageBackendCache, type StorageBackend, type StorageBackendName } from './index';
|
||
|
|
import { S3Backend } from './s3';
|
||
|
|
|
||
|
|
// ─── tables to walk ─────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
export interface StorageKeyTable {
|
||
|
|
table: string;
|
||
|
|
/** Column name holding the storage key (always `storage_key` going forward). */
|
||
|
|
keyColumn: string;
|
||
|
|
/** Primary-key column for per-row progress markers. */
|
||
|
|
pkColumn: string;
|
||
|
|
/** Optional content-type column (lets the target backend persist Content-Type). */
|
||
|
|
contentTypeColumn?: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Phase 6a ships an empty list — `berth_pdf_versions` and `brochure_versions`
|
||
|
|
* land in Phase 6b. Add new entries here when new file-bearing tables are
|
||
|
|
* introduced. The migration script reads each named table via raw SQL so it
|
||
|
|
* does not need to import every domain's Drizzle schema.
|
||
|
|
*/
|
||
|
|
export const TABLES_WITH_STORAGE_KEYS: StorageKeyTable[] = [
|
||
|
|
// { table: 'berth_pdf_versions', keyColumn: 'storage_key', pkColumn: 'id', contentTypeColumn: 'content_type' },
|
||
|
|
// { table: 'brochure_versions', keyColumn: 'storage_key', pkColumn: 'id', contentTypeColumn: 'content_type' },
|
||
|
|
];
|
||
|
|
|
||
|
|
const ADVISORY_LOCK_KEY = 0xc7000a01;
|
||
|
|
|
||
|
|
// ─── helpers ────────────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
interface CliArgs {
|
||
|
|
from: StorageBackendName;
|
||
|
|
to: StorageBackendName;
|
||
|
|
dryRun: boolean;
|
||
|
|
}
|
||
|
|
|
||
|
|
export function parseArgs(argv: string[]): CliArgs {
|
||
|
|
const args: Partial<CliArgs> = { dryRun: false };
|
||
|
|
for (let i = 0; i < argv.length; i++) {
|
||
|
|
const a = argv[i];
|
||
|
|
if (a === '--dry-run') args.dryRun = true;
|
||
|
|
else if (a === '--from') args.from = argv[++i] as StorageBackendName;
|
||
|
|
else if (a === '--to') args.to = argv[++i] as StorageBackendName;
|
||
|
|
}
|
||
|
|
if (!args.from || !args.to || (args.from !== 's3' && args.from !== 'filesystem')) {
|
||
|
|
throw new Error('Usage: --from s3|filesystem --to s3|filesystem [--dry-run]');
|
||
|
|
}
|
||
|
|
if (args.to !== 's3' && args.to !== 'filesystem') {
|
||
|
|
throw new Error('--to must be s3 or filesystem');
|
||
|
|
}
|
||
|
|
if (args.from === args.to) {
|
||
|
|
throw new Error('--from and --to must differ');
|
||
|
|
}
|
||
|
|
return args as CliArgs;
|
||
|
|
}
|
||
|
|
|
||
|
|
async function ensureProgressTable(): Promise<void> {
|
||
|
|
await db.execute(sql`
|
||
|
|
CREATE TABLE IF NOT EXISTS _storage_migration_progress (
|
||
|
|
table_name text NOT NULL,
|
||
|
|
row_pk text NOT NULL,
|
||
|
|
storage_key text NOT NULL,
|
||
|
|
sha256 text NOT NULL,
|
||
|
|
size_bytes bigint NOT NULL,
|
||
|
|
migrated_at timestamptz NOT NULL DEFAULT now(),
|
||
|
|
PRIMARY KEY (table_name, row_pk)
|
||
|
|
)
|
||
|
|
`);
|
||
|
|
}
|
||
|
|
|
||
|
|
function rowsOf(result: unknown): unknown[] {
|
||
|
|
if (Array.isArray(result)) return result;
|
||
|
|
const r = result as { rows?: unknown[] } | null;
|
||
|
|
return r?.rows ?? [];
|
||
|
|
}
|
||
|
|
|
||
|
|
async function isRowMigrated(tableName: string, pk: string): Promise<boolean> {
|
||
|
|
const res = await db.execute(sql`
|
||
|
|
SELECT 1 FROM _storage_migration_progress
|
||
|
|
WHERE table_name = ${tableName} AND row_pk = ${pk}
|
||
|
|
LIMIT 1
|
||
|
|
`);
|
||
|
|
return rowsOf(res).length > 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
async function markRowMigrated(
|
||
|
|
tableName: string,
|
||
|
|
pk: string,
|
||
|
|
key: string,
|
||
|
|
sha256: string,
|
||
|
|
sizeBytes: number,
|
||
|
|
): Promise<void> {
|
||
|
|
await db.execute(sql`
|
||
|
|
INSERT INTO _storage_migration_progress (table_name, row_pk, storage_key, sha256, size_bytes)
|
||
|
|
VALUES (${tableName}, ${pk}, ${key}, ${sha256}, ${sizeBytes})
|
||
|
|
ON CONFLICT (table_name, row_pk) DO NOTHING
|
||
|
|
`);
|
||
|
|
}
|
||
|
|
|
||
|
|
interface RowRef {
|
||
|
|
tableName: string;
|
||
|
|
pk: string;
|
||
|
|
key: string;
|
||
|
|
contentType: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
async function listKeysFor(tbl: StorageKeyTable): Promise<RowRef[]> {
|
||
|
|
const ctSelect = tbl.contentTypeColumn ? `, ${tbl.contentTypeColumn} as content_type` : '';
|
||
|
|
const result = await db.execute(
|
||
|
|
sql.raw(
|
||
|
|
`SELECT ${tbl.pkColumn} as pk, ${tbl.keyColumn} as key${ctSelect}
|
||
|
|
FROM ${tbl.table}
|
||
|
|
WHERE ${tbl.keyColumn} IS NOT NULL`,
|
||
|
|
),
|
||
|
|
);
|
||
|
|
const rows = rowsOf(result) as Array<{ pk: unknown; key: unknown; content_type?: unknown }>;
|
||
|
|
return rows.map((r) => ({
|
||
|
|
tableName: tbl.table,
|
||
|
|
pk: String(r.pk),
|
||
|
|
key: String(r.key),
|
||
|
|
contentType:
|
||
|
|
typeof r.content_type === 'string' && r.content_type.length > 0
|
||
|
|
? r.content_type
|
||
|
|
: 'application/octet-stream',
|
||
|
|
}));
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─── streaming + sha256 verify ──────────────────────────────────────────────
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Stream a file from `source` -> `target` while computing sha256 of the bytes
|
||
|
|
* actually written. Re-fetches the target object and verifies a second time
|
||
|
|
* to catch storage-side corruption.
|
||
|
|
*/
|
||
|
|
export async function copyAndVerify(
|
||
|
|
source: StorageBackend,
|
||
|
|
target: StorageBackend,
|
||
|
|
ref: RowRef,
|
||
|
|
): Promise<{ sha256: string; sizeBytes: number }> {
|
||
|
|
const stream = await source.get(ref.key);
|
||
|
|
const chunks: Buffer[] = [];
|
||
|
|
for await (const chunk of stream as Readable) {
|
||
|
|
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk as string));
|
||
|
|
}
|
||
|
|
const buffer = Buffer.concat(chunks);
|
||
|
|
const sha256 = createHash('sha256').update(buffer).digest('hex');
|
||
|
|
|
||
|
|
const putResult = await target.put(ref.key, buffer, {
|
||
|
|
contentType: ref.contentType,
|
||
|
|
sha256,
|
||
|
|
sizeBytes: buffer.length,
|
||
|
|
});
|
||
|
|
if (putResult.sha256 !== sha256) {
|
||
|
|
throw new Error(`sha256 mismatch on put for ${ref.tableName}/${ref.pk}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
// Re-fetch from the target and verify a second time.
|
||
|
|
const verifyStream = await target.get(ref.key);
|
||
|
|
const verifyChunks: Buffer[] = [];
|
||
|
|
for await (const chunk of verifyStream as Readable) {
|
||
|
|
verifyChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk as string));
|
||
|
|
}
|
||
|
|
const verifyBuf = Buffer.concat(verifyChunks);
|
||
|
|
const verifySha = createHash('sha256').update(verifyBuf).digest('hex');
|
||
|
|
if (verifySha !== sha256) {
|
||
|
|
throw new Error(`sha256 mismatch after round-trip for ${ref.tableName}/${ref.pk} (${ref.key})`);
|
||
|
|
}
|
||
|
|
return { sha256, sizeBytes: buffer.length };
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─── pre-flight ─────────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
async function freeBytesAt(rootPath: string): Promise<number> {
|
||
|
|
const s = await statfs(rootPath);
|
||
|
|
return Number(s.bavail) * Number(s.bsize);
|
||
|
|
}
|
||
|
|
|
||
|
|
async function flipBackendSetting(target: StorageBackendName, userId: string): Promise<void> {
|
||
|
|
const existing = await db.query.systemSettings.findFirst({
|
||
|
|
where: and(eq(systemSettings.key, 'storage_backend'), isNull(systemSettings.portId)),
|
||
|
|
});
|
||
|
|
if (existing) {
|
||
|
|
await db
|
||
|
|
.update(systemSettings)
|
||
|
|
.set({ value: target, updatedBy: userId, updatedAt: new Date() })
|
||
|
|
.where(and(eq(systemSettings.key, 'storage_backend'), isNull(systemSettings.portId)));
|
||
|
|
} else {
|
||
|
|
await db.insert(systemSettings).values({
|
||
|
|
key: 'storage_backend',
|
||
|
|
value: target,
|
||
|
|
portId: null,
|
||
|
|
updatedBy: userId,
|
||
|
|
});
|
||
|
|
}
|
||
|
|
resetStorageBackendCache();
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─── main ───────────────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
export interface MigrationOptions {
|
||
|
|
from: StorageBackendName;
|
||
|
|
to: StorageBackendName;
|
||
|
|
dryRun: boolean;
|
||
|
|
/** Override for tests. */
|
||
|
|
source?: StorageBackend;
|
||
|
|
target?: StorageBackend;
|
||
|
|
/** Audit user id. */
|
||
|
|
userId?: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface MigrationResult {
|
||
|
|
rowsConsidered: number;
|
||
|
|
rowsMigrated: number;
|
||
|
|
rowsSkippedAlreadyDone: number;
|
||
|
|
totalBytes: number;
|
||
|
|
flipped: boolean;
|
||
|
|
dryRun: boolean;
|
||
|
|
}
|
||
|
|
|
||
|
|
export async function runMigration(opts: MigrationOptions): Promise<MigrationResult> {
|
||
|
|
const lockResult = await db.execute(sql`SELECT pg_try_advisory_lock(${ADVISORY_LOCK_KEY}) as ok`);
|
||
|
|
const lockRows = rowsOf(lockResult) as Array<{ ok: boolean }>;
|
||
|
|
if (!lockRows[0]?.ok) {
|
||
|
|
throw new Error('Could not acquire storage migration advisory lock');
|
||
|
|
}
|
||
|
|
|
||
|
|
try {
|
||
|
|
await ensureProgressTable();
|
||
|
|
|
||
|
|
const source = opts.source ?? (await buildBackendForMigration(opts.from));
|
||
|
|
const target = opts.target ?? (await buildBackendForMigration(opts.to));
|
||
|
|
|
||
|
|
let rowsConsidered = 0;
|
||
|
|
let rowsMigrated = 0;
|
||
|
|
let rowsSkippedAlreadyDone = 0;
|
||
|
|
let totalBytes = 0;
|
||
|
|
|
||
|
|
for (const tbl of TABLES_WITH_STORAGE_KEYS) {
|
||
|
|
const refs = await listKeysFor(tbl);
|
||
|
|
rowsConsidered += refs.length;
|
||
|
|
|
||
|
|
// Pre-flight free-disk check when target is filesystem.
|
||
|
|
if (opts.to === 'filesystem' && target instanceof FilesystemBackend) {
|
||
|
|
const heads = await Promise.all(
|
||
|
|
refs.map((r) => source.head(r.key).then((h) => h?.sizeBytes ?? 0)),
|
||
|
|
);
|
||
|
|
const sumBytes = heads.reduce((a, b) => a + b, 0);
|
||
|
|
const free = await freeBytesAt(process.cwd());
|
||
|
|
if (free < sumBytes * 1.2) {
|
||
|
|
throw new Error(
|
||
|
|
`Insufficient disk: need ${Math.round(sumBytes / 1e6)}MB + 20% margin, have ${Math.round(free / 1e6)}MB free`,
|
||
|
|
);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
for (const ref of refs) {
|
||
|
|
if (await isRowMigrated(ref.tableName, ref.pk)) {
|
||
|
|
rowsSkippedAlreadyDone += 1;
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
if (opts.dryRun) {
|
||
|
|
const head = await source.head(ref.key);
|
||
|
|
totalBytes += head?.sizeBytes ?? 0;
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
const { sha256, sizeBytes } = await copyAndVerify(source, target, ref);
|
||
|
|
await markRowMigrated(ref.tableName, ref.pk, ref.key, sha256, sizeBytes);
|
||
|
|
rowsMigrated += 1;
|
||
|
|
totalBytes += sizeBytes;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
let flipped = false;
|
||
|
|
if (!opts.dryRun) {
|
||
|
|
await flipBackendSetting(opts.to, opts.userId ?? 'cli:migrate-storage');
|
||
|
|
flipped = true;
|
||
|
|
}
|
||
|
|
|
||
|
|
return {
|
||
|
|
rowsConsidered,
|
||
|
|
rowsMigrated,
|
||
|
|
rowsSkippedAlreadyDone,
|
||
|
|
totalBytes,
|
||
|
|
flipped,
|
||
|
|
dryRun: opts.dryRun,
|
||
|
|
};
|
||
|
|
} finally {
|
||
|
|
await db.execute(sql`SELECT pg_advisory_unlock(${ADVISORY_LOCK_KEY})`);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
async function buildBackendForMigration(name: StorageBackendName): Promise<StorageBackend> {
|
||
|
|
if (name === 'filesystem') {
|
||
|
|
return FilesystemBackend.create({
|
||
|
|
root: process.env.STORAGE_FILESYSTEM_ROOT ?? './storage',
|
||
|
|
proxyHmacSecretEncrypted: null,
|
||
|
|
});
|
||
|
|
}
|
||
|
|
return S3Backend.create({});
|
||
|
|
}
|