feat(storage): pluggable s3-or-filesystem backend + migration CLI + admin UI
Phase 6a from docs/berth-recommender-and-pdf-plan.md §4.7a + §14.9a. Lays
the storage groundwork for Phase 6b/7 file-bearing schemas (per-berth PDFs,
brochures) without touching those domains yet.
New files:
- src/lib/storage/index.ts StorageBackend interface + per-process
factory keyed on system_settings.
- src/lib/storage/s3.ts S3-compatible backend (MinIO/AWS/B2/R2/
Wasabi/Tigris) wrapping the existing minio
JS client. Includes a healthCheck() used
by the admin "Test connection" button.
- src/lib/storage/filesystem.ts Local filesystem backend with all §14.9a
mitigations baked in.
- src/lib/storage/migrate.ts Shared migration core — pg_advisory_lock,
per-row resumable progress markers,
sha256 round-trip verification, atomic
storage_backend flip on success.
- scripts/migrate-storage.ts Thin CLI shim around runMigration().
- src/app/api/storage/[token]/route.ts
Filesystem proxy GET. Verifies HMAC,
enforces single-use replay protection
via Redis SET NX, streams via NextResponse
ReadableStream with explicit Content-Type
+ Content-Disposition. Node runtime only.
- src/app/api/v1/admin/storage/route.ts
GET status + POST connection test.
- src/app/api/v1/admin/storage/migrate/route.ts
Super-admin-only POST that runs the
exact same runMigration() as the CLI.
- src/app/(dashboard)/[portSlug]/admin/storage/page.tsx
Super-admin admin UI (current backend,
capacity stats, switch button with
dry-run, test connection, backup hint).
- src/components/admin/storage-admin-panel.tsx
Client component for the page above.
§14.9a critical mitigations implemented:
- Path-traversal: storage keys validated against ^[a-zA-Z0-9/_.-]+$;
`..`, `.`, `//`, leading `/`, and overlength keys rejected.
- Realpath: storage root realpath'd at create time, every per-key
resolution checked against the realpath'd prefix.
- Storage root created (or chmod'd) to 0o700.
- Multi-node refusal: FilesystemBackend.create() throws when
MULTI_NODE_DEPLOYMENT=true.
- HMAC token: sha256-HMAC over the (key, expiry, nonce, filename,
content-type) payload. Verified with timingSafeEqual; bad sig,
expired, or invalid-key payloads all return 403.
- Single-use replay: token body cached in Redis SET NX EX 1800s.
- sha256 round-trip: copyAndVerify() re-fetches from the target after
put() and aborts the migration on any mismatch.
- Free-disk pre-flight: when migrating to filesystem, sums byte counts
via source.head() and aborts if free space < total * 1.2.
- pg_advisory_lock(0xc7000a01) prevents concurrent migrations.
- Resumable: per-row progress markers in _storage_migration_progress.
system_settings keys read by the factory (jsonb, no schema change):
storage_backend, storage_s3_endpoint, storage_s3_region,
storage_s3_bucket, storage_s3_access_key,
storage_s3_secret_key_encrypted, storage_s3_force_path_style,
storage_filesystem_root, storage_proxy_hmac_secret_encrypted.
Defaults: storage_backend=`s3`, storage_filesystem_root=`./storage`
(./storage added to .gitignore).
Tests added (34 tests, all green):
- tests/unit/storage/filesystem-backend.test.ts — key validation
allow/reject matrix, realpath escape, 0o700 perms, multi-node
refusal, HMAC token sign/verify/tamper/expire/invalid-key.
- tests/unit/storage/copy-and-verify.test.ts — sha256 mismatch on
round-trip aborts the migration.
- tests/integration/storage/proxy-route.test.ts — happy path, wrong
HMAC secret, expired token, replay rejection.
Phase 6a ships zero file-bearing tables — TABLES_WITH_STORAGE_KEYS is
intentionally empty. berth_pdf_versions and brochure_versions land in
Phase 6b and join the list there. Existing s3_key columns: only
gdpr_export_jobs.storage_key, already named correctly — no rename needed.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
318
src/lib/storage/migrate.ts
Normal file
318
src/lib/storage/migrate.ts
Normal file
@@ -0,0 +1,318 @@
|
||||
/**
|
||||
* Storage backend migration core. The CLI in `scripts/migrate-storage.ts` and
|
||||
* the admin API at `/api/v1/admin/storage/migrate` both call `runMigration()`
|
||||
* here, so behaviour is identical regardless of trigger.
|
||||
*
|
||||
* See docs/berth-recommender-and-pdf-plan.md §4.7a + §14.9a for the contract.
|
||||
*/
|
||||
|
||||
import { createHash } from 'node:crypto';
|
||||
import { statfs } from 'node:fs/promises';
|
||||
import { Readable } from 'node:stream';
|
||||
|
||||
import { and, eq, isNull, sql } from 'drizzle-orm';
|
||||
|
||||
import { db } from '@/lib/db';
|
||||
import { systemSettings } from '@/lib/db/schema/system';
|
||||
|
||||
import { FilesystemBackend } from './filesystem';
|
||||
import { resetStorageBackendCache, type StorageBackend, type StorageBackendName } from './index';
|
||||
import { S3Backend } from './s3';
|
||||
|
||||
// ─── tables to walk ─────────────────────────────────────────────────────────
|
||||
|
||||
export interface StorageKeyTable {
|
||||
table: string;
|
||||
/** Column name holding the storage key (always `storage_key` going forward). */
|
||||
keyColumn: string;
|
||||
/** Primary-key column for per-row progress markers. */
|
||||
pkColumn: string;
|
||||
/** Optional content-type column (lets the target backend persist Content-Type). */
|
||||
contentTypeColumn?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Phase 6a ships an empty list — `berth_pdf_versions` and `brochure_versions`
|
||||
* land in Phase 6b. Add new entries here when new file-bearing tables are
|
||||
* introduced. The migration script reads each named table via raw SQL so it
|
||||
* does not need to import every domain's Drizzle schema.
|
||||
*/
|
||||
export const TABLES_WITH_STORAGE_KEYS: StorageKeyTable[] = [
|
||||
// { table: 'berth_pdf_versions', keyColumn: 'storage_key', pkColumn: 'id', contentTypeColumn: 'content_type' },
|
||||
// { table: 'brochure_versions', keyColumn: 'storage_key', pkColumn: 'id', contentTypeColumn: 'content_type' },
|
||||
];
|
||||
|
||||
const ADVISORY_LOCK_KEY = 0xc7000a01;
|
||||
|
||||
// ─── helpers ────────────────────────────────────────────────────────────────
|
||||
|
||||
interface CliArgs {
|
||||
from: StorageBackendName;
|
||||
to: StorageBackendName;
|
||||
dryRun: boolean;
|
||||
}
|
||||
|
||||
export function parseArgs(argv: string[]): CliArgs {
|
||||
const args: Partial<CliArgs> = { dryRun: false };
|
||||
for (let i = 0; i < argv.length; i++) {
|
||||
const a = argv[i];
|
||||
if (a === '--dry-run') args.dryRun = true;
|
||||
else if (a === '--from') args.from = argv[++i] as StorageBackendName;
|
||||
else if (a === '--to') args.to = argv[++i] as StorageBackendName;
|
||||
}
|
||||
if (!args.from || !args.to || (args.from !== 's3' && args.from !== 'filesystem')) {
|
||||
throw new Error('Usage: --from s3|filesystem --to s3|filesystem [--dry-run]');
|
||||
}
|
||||
if (args.to !== 's3' && args.to !== 'filesystem') {
|
||||
throw new Error('--to must be s3 or filesystem');
|
||||
}
|
||||
if (args.from === args.to) {
|
||||
throw new Error('--from and --to must differ');
|
||||
}
|
||||
return args as CliArgs;
|
||||
}
|
||||
|
||||
async function ensureProgressTable(): Promise<void> {
|
||||
await db.execute(sql`
|
||||
CREATE TABLE IF NOT EXISTS _storage_migration_progress (
|
||||
table_name text NOT NULL,
|
||||
row_pk text NOT NULL,
|
||||
storage_key text NOT NULL,
|
||||
sha256 text NOT NULL,
|
||||
size_bytes bigint NOT NULL,
|
||||
migrated_at timestamptz NOT NULL DEFAULT now(),
|
||||
PRIMARY KEY (table_name, row_pk)
|
||||
)
|
||||
`);
|
||||
}
|
||||
|
||||
function rowsOf(result: unknown): unknown[] {
|
||||
if (Array.isArray(result)) return result;
|
||||
const r = result as { rows?: unknown[] } | null;
|
||||
return r?.rows ?? [];
|
||||
}
|
||||
|
||||
async function isRowMigrated(tableName: string, pk: string): Promise<boolean> {
|
||||
const res = await db.execute(sql`
|
||||
SELECT 1 FROM _storage_migration_progress
|
||||
WHERE table_name = ${tableName} AND row_pk = ${pk}
|
||||
LIMIT 1
|
||||
`);
|
||||
return rowsOf(res).length > 0;
|
||||
}
|
||||
|
||||
async function markRowMigrated(
|
||||
tableName: string,
|
||||
pk: string,
|
||||
key: string,
|
||||
sha256: string,
|
||||
sizeBytes: number,
|
||||
): Promise<void> {
|
||||
await db.execute(sql`
|
||||
INSERT INTO _storage_migration_progress (table_name, row_pk, storage_key, sha256, size_bytes)
|
||||
VALUES (${tableName}, ${pk}, ${key}, ${sha256}, ${sizeBytes})
|
||||
ON CONFLICT (table_name, row_pk) DO NOTHING
|
||||
`);
|
||||
}
|
||||
|
||||
interface RowRef {
|
||||
tableName: string;
|
||||
pk: string;
|
||||
key: string;
|
||||
contentType: string;
|
||||
}
|
||||
|
||||
async function listKeysFor(tbl: StorageKeyTable): Promise<RowRef[]> {
|
||||
const ctSelect = tbl.contentTypeColumn ? `, ${tbl.contentTypeColumn} as content_type` : '';
|
||||
const result = await db.execute(
|
||||
sql.raw(
|
||||
`SELECT ${tbl.pkColumn} as pk, ${tbl.keyColumn} as key${ctSelect}
|
||||
FROM ${tbl.table}
|
||||
WHERE ${tbl.keyColumn} IS NOT NULL`,
|
||||
),
|
||||
);
|
||||
const rows = rowsOf(result) as Array<{ pk: unknown; key: unknown; content_type?: unknown }>;
|
||||
return rows.map((r) => ({
|
||||
tableName: tbl.table,
|
||||
pk: String(r.pk),
|
||||
key: String(r.key),
|
||||
contentType:
|
||||
typeof r.content_type === 'string' && r.content_type.length > 0
|
||||
? r.content_type
|
||||
: 'application/octet-stream',
|
||||
}));
|
||||
}
|
||||
|
||||
// ─── streaming + sha256 verify ──────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Stream a file from `source` -> `target` while computing sha256 of the bytes
|
||||
* actually written. Re-fetches the target object and verifies a second time
|
||||
* to catch storage-side corruption.
|
||||
*/
|
||||
export async function copyAndVerify(
|
||||
source: StorageBackend,
|
||||
target: StorageBackend,
|
||||
ref: RowRef,
|
||||
): Promise<{ sha256: string; sizeBytes: number }> {
|
||||
const stream = await source.get(ref.key);
|
||||
const chunks: Buffer[] = [];
|
||||
for await (const chunk of stream as Readable) {
|
||||
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk as string));
|
||||
}
|
||||
const buffer = Buffer.concat(chunks);
|
||||
const sha256 = createHash('sha256').update(buffer).digest('hex');
|
||||
|
||||
const putResult = await target.put(ref.key, buffer, {
|
||||
contentType: ref.contentType,
|
||||
sha256,
|
||||
sizeBytes: buffer.length,
|
||||
});
|
||||
if (putResult.sha256 !== sha256) {
|
||||
throw new Error(`sha256 mismatch on put for ${ref.tableName}/${ref.pk}`);
|
||||
}
|
||||
|
||||
// Re-fetch from the target and verify a second time.
|
||||
const verifyStream = await target.get(ref.key);
|
||||
const verifyChunks: Buffer[] = [];
|
||||
for await (const chunk of verifyStream as Readable) {
|
||||
verifyChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk as string));
|
||||
}
|
||||
const verifyBuf = Buffer.concat(verifyChunks);
|
||||
const verifySha = createHash('sha256').update(verifyBuf).digest('hex');
|
||||
if (verifySha !== sha256) {
|
||||
throw new Error(`sha256 mismatch after round-trip for ${ref.tableName}/${ref.pk} (${ref.key})`);
|
||||
}
|
||||
return { sha256, sizeBytes: buffer.length };
|
||||
}
|
||||
|
||||
// ─── pre-flight ─────────────────────────────────────────────────────────────
|
||||
|
||||
async function freeBytesAt(rootPath: string): Promise<number> {
|
||||
const s = await statfs(rootPath);
|
||||
return Number(s.bavail) * Number(s.bsize);
|
||||
}
|
||||
|
||||
async function flipBackendSetting(target: StorageBackendName, userId: string): Promise<void> {
|
||||
const existing = await db.query.systemSettings.findFirst({
|
||||
where: and(eq(systemSettings.key, 'storage_backend'), isNull(systemSettings.portId)),
|
||||
});
|
||||
if (existing) {
|
||||
await db
|
||||
.update(systemSettings)
|
||||
.set({ value: target, updatedBy: userId, updatedAt: new Date() })
|
||||
.where(and(eq(systemSettings.key, 'storage_backend'), isNull(systemSettings.portId)));
|
||||
} else {
|
||||
await db.insert(systemSettings).values({
|
||||
key: 'storage_backend',
|
||||
value: target,
|
||||
portId: null,
|
||||
updatedBy: userId,
|
||||
});
|
||||
}
|
||||
resetStorageBackendCache();
|
||||
}
|
||||
|
||||
// ─── main ───────────────────────────────────────────────────────────────────
|
||||
|
||||
export interface MigrationOptions {
|
||||
from: StorageBackendName;
|
||||
to: StorageBackendName;
|
||||
dryRun: boolean;
|
||||
/** Override for tests. */
|
||||
source?: StorageBackend;
|
||||
target?: StorageBackend;
|
||||
/** Audit user id. */
|
||||
userId?: string;
|
||||
}
|
||||
|
||||
export interface MigrationResult {
|
||||
rowsConsidered: number;
|
||||
rowsMigrated: number;
|
||||
rowsSkippedAlreadyDone: number;
|
||||
totalBytes: number;
|
||||
flipped: boolean;
|
||||
dryRun: boolean;
|
||||
}
|
||||
|
||||
export async function runMigration(opts: MigrationOptions): Promise<MigrationResult> {
|
||||
const lockResult = await db.execute(sql`SELECT pg_try_advisory_lock(${ADVISORY_LOCK_KEY}) as ok`);
|
||||
const lockRows = rowsOf(lockResult) as Array<{ ok: boolean }>;
|
||||
if (!lockRows[0]?.ok) {
|
||||
throw new Error('Could not acquire storage migration advisory lock');
|
||||
}
|
||||
|
||||
try {
|
||||
await ensureProgressTable();
|
||||
|
||||
const source = opts.source ?? (await buildBackendForMigration(opts.from));
|
||||
const target = opts.target ?? (await buildBackendForMigration(opts.to));
|
||||
|
||||
let rowsConsidered = 0;
|
||||
let rowsMigrated = 0;
|
||||
let rowsSkippedAlreadyDone = 0;
|
||||
let totalBytes = 0;
|
||||
|
||||
for (const tbl of TABLES_WITH_STORAGE_KEYS) {
|
||||
const refs = await listKeysFor(tbl);
|
||||
rowsConsidered += refs.length;
|
||||
|
||||
// Pre-flight free-disk check when target is filesystem.
|
||||
if (opts.to === 'filesystem' && target instanceof FilesystemBackend) {
|
||||
const heads = await Promise.all(
|
||||
refs.map((r) => source.head(r.key).then((h) => h?.sizeBytes ?? 0)),
|
||||
);
|
||||
const sumBytes = heads.reduce((a, b) => a + b, 0);
|
||||
const free = await freeBytesAt(process.cwd());
|
||||
if (free < sumBytes * 1.2) {
|
||||
throw new Error(
|
||||
`Insufficient disk: need ${Math.round(sumBytes / 1e6)}MB + 20% margin, have ${Math.round(free / 1e6)}MB free`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
for (const ref of refs) {
|
||||
if (await isRowMigrated(ref.tableName, ref.pk)) {
|
||||
rowsSkippedAlreadyDone += 1;
|
||||
continue;
|
||||
}
|
||||
if (opts.dryRun) {
|
||||
const head = await source.head(ref.key);
|
||||
totalBytes += head?.sizeBytes ?? 0;
|
||||
continue;
|
||||
}
|
||||
const { sha256, sizeBytes } = await copyAndVerify(source, target, ref);
|
||||
await markRowMigrated(ref.tableName, ref.pk, ref.key, sha256, sizeBytes);
|
||||
rowsMigrated += 1;
|
||||
totalBytes += sizeBytes;
|
||||
}
|
||||
}
|
||||
|
||||
let flipped = false;
|
||||
if (!opts.dryRun) {
|
||||
await flipBackendSetting(opts.to, opts.userId ?? 'cli:migrate-storage');
|
||||
flipped = true;
|
||||
}
|
||||
|
||||
return {
|
||||
rowsConsidered,
|
||||
rowsMigrated,
|
||||
rowsSkippedAlreadyDone,
|
||||
totalBytes,
|
||||
flipped,
|
||||
dryRun: opts.dryRun,
|
||||
};
|
||||
} finally {
|
||||
await db.execute(sql`SELECT pg_advisory_unlock(${ADVISORY_LOCK_KEY})`);
|
||||
}
|
||||
}
|
||||
|
||||
async function buildBackendForMigration(name: StorageBackendName): Promise<StorageBackend> {
|
||||
if (name === 'filesystem') {
|
||||
return FilesystemBackend.create({
|
||||
root: process.env.STORAGE_FILESYSTEM_ROOT ?? './storage',
|
||||
proxyHmacSecretEncrypted: null,
|
||||
});
|
||||
}
|
||||
return S3Backend.create({});
|
||||
}
|
||||
Reference in New Issue
Block a user