Files
pn-new-crm/src/lib/storage/migrate.ts

319 lines
11 KiB
TypeScript
Raw Normal View History

feat(storage): pluggable s3-or-filesystem backend + migration CLI + admin UI Phase 6a from docs/berth-recommender-and-pdf-plan.md §4.7a + §14.9a. Lays the storage groundwork for Phase 6b/7 file-bearing schemas (per-berth PDFs, brochures) without touching those domains yet. New files: - src/lib/storage/index.ts StorageBackend interface + per-process factory keyed on system_settings. - src/lib/storage/s3.ts S3-compatible backend (MinIO/AWS/B2/R2/ Wasabi/Tigris) wrapping the existing minio JS client. Includes a healthCheck() used by the admin "Test connection" button. - src/lib/storage/filesystem.ts Local filesystem backend with all §14.9a mitigations baked in. - src/lib/storage/migrate.ts Shared migration core — pg_advisory_lock, per-row resumable progress markers, sha256 round-trip verification, atomic storage_backend flip on success. - scripts/migrate-storage.ts Thin CLI shim around runMigration(). - src/app/api/storage/[token]/route.ts Filesystem proxy GET. Verifies HMAC, enforces single-use replay protection via Redis SET NX, streams via NextResponse ReadableStream with explicit Content-Type + Content-Disposition. Node runtime only. - src/app/api/v1/admin/storage/route.ts GET status + POST connection test. - src/app/api/v1/admin/storage/migrate/route.ts Super-admin-only POST that runs the exact same runMigration() as the CLI. - src/app/(dashboard)/[portSlug]/admin/storage/page.tsx Super-admin admin UI (current backend, capacity stats, switch button with dry-run, test connection, backup hint). - src/components/admin/storage-admin-panel.tsx Client component for the page above. §14.9a critical mitigations implemented: - Path-traversal: storage keys validated against ^[a-zA-Z0-9/_.-]+$; `..`, `.`, `//`, leading `/`, and overlength keys rejected. - Realpath: storage root realpath'd at create time, every per-key resolution checked against the realpath'd prefix. - Storage root created (or chmod'd) to 0o700. - Multi-node refusal: FilesystemBackend.create() throws when MULTI_NODE_DEPLOYMENT=true. - HMAC token: sha256-HMAC over the (key, expiry, nonce, filename, content-type) payload. Verified with timingSafeEqual; bad sig, expired, or invalid-key payloads all return 403. - Single-use replay: token body cached in Redis SET NX EX 1800s. - sha256 round-trip: copyAndVerify() re-fetches from the target after put() and aborts the migration on any mismatch. - Free-disk pre-flight: when migrating to filesystem, sums byte counts via source.head() and aborts if free space < total * 1.2. - pg_advisory_lock(0xc7000a01) prevents concurrent migrations. - Resumable: per-row progress markers in _storage_migration_progress. system_settings keys read by the factory (jsonb, no schema change): storage_backend, storage_s3_endpoint, storage_s3_region, storage_s3_bucket, storage_s3_access_key, storage_s3_secret_key_encrypted, storage_s3_force_path_style, storage_filesystem_root, storage_proxy_hmac_secret_encrypted. Defaults: storage_backend=`s3`, storage_filesystem_root=`./storage` (./storage added to .gitignore). Tests added (34 tests, all green): - tests/unit/storage/filesystem-backend.test.ts — key validation allow/reject matrix, realpath escape, 0o700 perms, multi-node refusal, HMAC token sign/verify/tamper/expire/invalid-key. - tests/unit/storage/copy-and-verify.test.ts — sha256 mismatch on round-trip aborts the migration. - tests/integration/storage/proxy-route.test.ts — happy path, wrong HMAC secret, expired token, replay rejection. Phase 6a ships zero file-bearing tables — TABLES_WITH_STORAGE_KEYS is intentionally empty. berth_pdf_versions and brochure_versions land in Phase 6b and join the list there. Existing s3_key columns: only gdpr_export_jobs.storage_key, already named correctly — no rename needed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-05 03:15:59 +02:00
/**
* Storage backend migration core. The CLI in `scripts/migrate-storage.ts` and
* the admin API at `/api/v1/admin/storage/migrate` both call `runMigration()`
* here, so behaviour is identical regardless of trigger.
*
* See docs/berth-recommender-and-pdf-plan.md §4.7a + §14.9a for the contract.
*/
import { createHash } from 'node:crypto';
import { statfs } from 'node:fs/promises';
import { Readable } from 'node:stream';
import { and, eq, isNull, sql } from 'drizzle-orm';
import { db } from '@/lib/db';
import { systemSettings } from '@/lib/db/schema/system';
import { FilesystemBackend } from './filesystem';
import { resetStorageBackendCache, type StorageBackend, type StorageBackendName } from './index';
import { S3Backend } from './s3';
// ─── tables to walk ─────────────────────────────────────────────────────────
export interface StorageKeyTable {
table: string;
/** Column name holding the storage key (always `storage_key` going forward). */
keyColumn: string;
/** Primary-key column for per-row progress markers. */
pkColumn: string;
/** Optional content-type column (lets the target backend persist Content-Type). */
contentTypeColumn?: string;
}
/**
* Phase 6a ships an empty list `berth_pdf_versions` and `brochure_versions`
* land in Phase 6b. Add new entries here when new file-bearing tables are
* introduced. The migration script reads each named table via raw SQL so it
* does not need to import every domain's Drizzle schema.
*/
export const TABLES_WITH_STORAGE_KEYS: StorageKeyTable[] = [
// { table: 'berth_pdf_versions', keyColumn: 'storage_key', pkColumn: 'id', contentTypeColumn: 'content_type' },
// { table: 'brochure_versions', keyColumn: 'storage_key', pkColumn: 'id', contentTypeColumn: 'content_type' },
];
const ADVISORY_LOCK_KEY = 0xc7000a01;
// ─── helpers ────────────────────────────────────────────────────────────────
interface CliArgs {
from: StorageBackendName;
to: StorageBackendName;
dryRun: boolean;
}
export function parseArgs(argv: string[]): CliArgs {
const args: Partial<CliArgs> = { dryRun: false };
for (let i = 0; i < argv.length; i++) {
const a = argv[i];
if (a === '--dry-run') args.dryRun = true;
else if (a === '--from') args.from = argv[++i] as StorageBackendName;
else if (a === '--to') args.to = argv[++i] as StorageBackendName;
}
if (!args.from || !args.to || (args.from !== 's3' && args.from !== 'filesystem')) {
throw new Error('Usage: --from s3|filesystem --to s3|filesystem [--dry-run]');
}
if (args.to !== 's3' && args.to !== 'filesystem') {
throw new Error('--to must be s3 or filesystem');
}
if (args.from === args.to) {
throw new Error('--from and --to must differ');
}
return args as CliArgs;
}
async function ensureProgressTable(): Promise<void> {
await db.execute(sql`
CREATE TABLE IF NOT EXISTS _storage_migration_progress (
table_name text NOT NULL,
row_pk text NOT NULL,
storage_key text NOT NULL,
sha256 text NOT NULL,
size_bytes bigint NOT NULL,
migrated_at timestamptz NOT NULL DEFAULT now(),
PRIMARY KEY (table_name, row_pk)
)
`);
}
function rowsOf(result: unknown): unknown[] {
if (Array.isArray(result)) return result;
const r = result as { rows?: unknown[] } | null;
return r?.rows ?? [];
}
async function isRowMigrated(tableName: string, pk: string): Promise<boolean> {
const res = await db.execute(sql`
SELECT 1 FROM _storage_migration_progress
WHERE table_name = ${tableName} AND row_pk = ${pk}
LIMIT 1
`);
return rowsOf(res).length > 0;
}
async function markRowMigrated(
tableName: string,
pk: string,
key: string,
sha256: string,
sizeBytes: number,
): Promise<void> {
await db.execute(sql`
INSERT INTO _storage_migration_progress (table_name, row_pk, storage_key, sha256, size_bytes)
VALUES (${tableName}, ${pk}, ${key}, ${sha256}, ${sizeBytes})
ON CONFLICT (table_name, row_pk) DO NOTHING
`);
}
interface RowRef {
tableName: string;
pk: string;
key: string;
contentType: string;
}
async function listKeysFor(tbl: StorageKeyTable): Promise<RowRef[]> {
const ctSelect = tbl.contentTypeColumn ? `, ${tbl.contentTypeColumn} as content_type` : '';
const result = await db.execute(
sql.raw(
`SELECT ${tbl.pkColumn} as pk, ${tbl.keyColumn} as key${ctSelect}
FROM ${tbl.table}
WHERE ${tbl.keyColumn} IS NOT NULL`,
),
);
const rows = rowsOf(result) as Array<{ pk: unknown; key: unknown; content_type?: unknown }>;
return rows.map((r) => ({
tableName: tbl.table,
pk: String(r.pk),
key: String(r.key),
contentType:
typeof r.content_type === 'string' && r.content_type.length > 0
? r.content_type
: 'application/octet-stream',
}));
}
// ─── streaming + sha256 verify ──────────────────────────────────────────────
/**
* Stream a file from `source` -> `target` while computing sha256 of the bytes
* actually written. Re-fetches the target object and verifies a second time
* to catch storage-side corruption.
*/
export async function copyAndVerify(
source: StorageBackend,
target: StorageBackend,
ref: RowRef,
): Promise<{ sha256: string; sizeBytes: number }> {
const stream = await source.get(ref.key);
const chunks: Buffer[] = [];
for await (const chunk of stream as Readable) {
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk as string));
}
const buffer = Buffer.concat(chunks);
const sha256 = createHash('sha256').update(buffer).digest('hex');
const putResult = await target.put(ref.key, buffer, {
contentType: ref.contentType,
sha256,
sizeBytes: buffer.length,
});
if (putResult.sha256 !== sha256) {
throw new Error(`sha256 mismatch on put for ${ref.tableName}/${ref.pk}`);
}
// Re-fetch from the target and verify a second time.
const verifyStream = await target.get(ref.key);
const verifyChunks: Buffer[] = [];
for await (const chunk of verifyStream as Readable) {
verifyChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk as string));
}
const verifyBuf = Buffer.concat(verifyChunks);
const verifySha = createHash('sha256').update(verifyBuf).digest('hex');
if (verifySha !== sha256) {
throw new Error(`sha256 mismatch after round-trip for ${ref.tableName}/${ref.pk} (${ref.key})`);
}
return { sha256, sizeBytes: buffer.length };
}
// ─── pre-flight ─────────────────────────────────────────────────────────────
async function freeBytesAt(rootPath: string): Promise<number> {
const s = await statfs(rootPath);
return Number(s.bavail) * Number(s.bsize);
}
async function flipBackendSetting(target: StorageBackendName, userId: string): Promise<void> {
const existing = await db.query.systemSettings.findFirst({
where: and(eq(systemSettings.key, 'storage_backend'), isNull(systemSettings.portId)),
});
if (existing) {
await db
.update(systemSettings)
.set({ value: target, updatedBy: userId, updatedAt: new Date() })
.where(and(eq(systemSettings.key, 'storage_backend'), isNull(systemSettings.portId)));
} else {
await db.insert(systemSettings).values({
key: 'storage_backend',
value: target,
portId: null,
updatedBy: userId,
});
}
resetStorageBackendCache();
}
// ─── main ───────────────────────────────────────────────────────────────────
export interface MigrationOptions {
from: StorageBackendName;
to: StorageBackendName;
dryRun: boolean;
/** Override for tests. */
source?: StorageBackend;
target?: StorageBackend;
/** Audit user id. */
userId?: string;
}
export interface MigrationResult {
rowsConsidered: number;
rowsMigrated: number;
rowsSkippedAlreadyDone: number;
totalBytes: number;
flipped: boolean;
dryRun: boolean;
}
export async function runMigration(opts: MigrationOptions): Promise<MigrationResult> {
const lockResult = await db.execute(sql`SELECT pg_try_advisory_lock(${ADVISORY_LOCK_KEY}) as ok`);
const lockRows = rowsOf(lockResult) as Array<{ ok: boolean }>;
if (!lockRows[0]?.ok) {
throw new Error('Could not acquire storage migration advisory lock');
}
try {
await ensureProgressTable();
const source = opts.source ?? (await buildBackendForMigration(opts.from));
const target = opts.target ?? (await buildBackendForMigration(opts.to));
let rowsConsidered = 0;
let rowsMigrated = 0;
let rowsSkippedAlreadyDone = 0;
let totalBytes = 0;
for (const tbl of TABLES_WITH_STORAGE_KEYS) {
const refs = await listKeysFor(tbl);
rowsConsidered += refs.length;
// Pre-flight free-disk check when target is filesystem.
if (opts.to === 'filesystem' && target instanceof FilesystemBackend) {
const heads = await Promise.all(
refs.map((r) => source.head(r.key).then((h) => h?.sizeBytes ?? 0)),
);
const sumBytes = heads.reduce((a, b) => a + b, 0);
const free = await freeBytesAt(process.cwd());
if (free < sumBytes * 1.2) {
throw new Error(
`Insufficient disk: need ${Math.round(sumBytes / 1e6)}MB + 20% margin, have ${Math.round(free / 1e6)}MB free`,
);
}
}
for (const ref of refs) {
if (await isRowMigrated(ref.tableName, ref.pk)) {
rowsSkippedAlreadyDone += 1;
continue;
}
if (opts.dryRun) {
const head = await source.head(ref.key);
totalBytes += head?.sizeBytes ?? 0;
continue;
}
const { sha256, sizeBytes } = await copyAndVerify(source, target, ref);
await markRowMigrated(ref.tableName, ref.pk, ref.key, sha256, sizeBytes);
rowsMigrated += 1;
totalBytes += sizeBytes;
}
}
let flipped = false;
if (!opts.dryRun) {
await flipBackendSetting(opts.to, opts.userId ?? 'cli:migrate-storage');
flipped = true;
}
return {
rowsConsidered,
rowsMigrated,
rowsSkippedAlreadyDone,
totalBytes,
flipped,
dryRun: opts.dryRun,
};
} finally {
await db.execute(sql`SELECT pg_advisory_unlock(${ADVISORY_LOCK_KEY})`);
}
}
async function buildBackendForMigration(name: StorageBackendName): Promise<StorageBackend> {
if (name === 'filesystem') {
return FilesystemBackend.create({
root: process.env.STORAGE_FILESYSTEM_ROOT ?? './storage',
proxyHmacSecretEncrypted: null,
});
}
return S3Backend.create({});
}