/** * GDPR client-data export orchestration. * * `requestExport()` creates a row, queues a BullMQ job, and returns. The * `processExportJob()` handler builds the bundle, ZIPs JSON+HTML into * MinIO, optionally emails the client a download link, and updates the * row to status='ready' or 'sent'. * * Bundles are kept for 30 days then expired by maintenance (the * gdpr_exports.expires_at column is the cleanup target). */ import archiver from 'archiver'; import { eq, and } from 'drizzle-orm'; import { PassThrough } from 'node:stream'; import { db } from '@/lib/db'; import { gdprExports, type GdprExport } from '@/lib/db/schema/gdpr'; import { clients, clientContacts } from '@/lib/db/schema/clients'; import { ports } from '@/lib/db/schema/ports'; import { env } from '@/lib/env'; import { NotFoundError, ValidationError } from '@/lib/errors'; import { logger } from '@/lib/logger'; import { minioClient, getPresignedUrl } from '@/lib/minio'; import { getQueue } from '@/lib/queue'; import { createAuditLog } from '@/lib/audit'; import { buildClientBundle, renderBundleHtml } from '@/lib/services/gdpr-bundle-builder'; const EXPIRY_DAYS = 30; const PRESIGN_EXPIRY_SECONDS = 7 * 24 * 60 * 60; // 7 days for the email link /** * Hard cap on the produced ZIP. Article-15 bundles are JSON+HTML only (no * receipts/contracts) so even very active clients land at <1 MB; anything * larger is almost certainly an unbounded relation we forgot to cap. * Failing the job is safer than uploading a 500MB blob to MinIO + emailing * an unsuspecting client a download link of mystery size. */ const MAX_BUNDLE_BYTES = 50 * 1024 * 1024; interface RequestExportInput { clientId: string; portId: string; requestedBy: string; /** When true, the bundle is emailed to the client's primary address once ready. */ emailToClient: boolean; /** Override recipient (e.g. lawyer or agent). When set, takes precedence over the client's primary email. */ emailOverride?: string | null; ipAddress: string; userAgent: string; } export interface RequestExportResult { export: GdprExport; } export async function requestGdprExport(input: RequestExportInput): Promise { const client = await db.query.clients.findFirst({ where: eq(clients.id, input.clientId), }); if (!client || client.portId !== input.portId) throw new NotFoundError('Client'); if (input.emailToClient && !input.emailOverride) { const primary = await db.query.clientContacts.findFirst({ where: and( eq(clientContacts.clientId, input.clientId), eq(clientContacts.channel, 'email'), eq(clientContacts.isPrimary, true), ), }); if (!primary) { throw new ValidationError( 'Client has no primary email contact — provide an emailOverride or add one before exporting.', ); } } const [row] = await db .insert(gdprExports) .values({ portId: input.portId, clientId: input.clientId, requestedBy: input.requestedBy, status: 'pending', }) .returning(); if (!row) throw new Error('Failed to create export row'); void createAuditLog({ userId: input.requestedBy, portId: input.portId, action: 'request_gdpr_export', entityType: 'client', entityId: input.clientId, metadata: { exportId: row.id, emailToClient: input.emailToClient }, ipAddress: input.ipAddress, userAgent: input.userAgent, }); await getQueue('export').add('gdpr-export', { exportId: row.id, portId: input.portId, clientId: input.clientId, emailToClient: input.emailToClient, emailOverride: input.emailOverride ?? null, }); return { export: row }; } interface ProcessJobInput { exportId: string; portId: string; clientId: string; emailToClient: boolean; emailOverride: string | null; } /** * Worker entry point. Loads the bundle, ZIPs it, uploads to MinIO, * (optionally) emails the client. Failures mark the row 'failed' with * the truncated error. */ export async function processGdprExportJob(input: ProcessJobInput): Promise { await db .update(gdprExports) .set({ status: 'building' }) .where(eq(gdprExports.id, input.exportId)); try { const bundle = await buildClientBundle(input.clientId, input.portId); const json = JSON.stringify(bundle, null, 2); const html = renderBundleHtml(bundle); // Stream a ZIP into a buffer. Receipts/contracts are not included // here — they live on file rows referenced by the bundle and would // bloat the archive. Add them later if Article-15 requests demand. const zip = archiver('zip', { zlib: { level: 9 } }); const sink = new PassThrough(); const chunks: Buffer[] = []; sink.on('data', (c: Buffer) => chunks.push(c)); const done = new Promise((resolve, reject) => { sink.on('end', () => resolve(Buffer.concat(chunks))); sink.on('error', reject); zip.on('error', reject); }); zip.pipe(sink); zip.append(json, { name: 'client.json' }); zip.append(html, { name: 'client.html' }); zip.append( `Personal data export for client ${input.clientId}\nGenerated ${bundle.meta.generatedAt}\n`, { name: 'README.txt' }, ); await zip.finalize(); const buffer = await done; if (buffer.length > MAX_BUNDLE_BYTES) { throw new Error( `GDPR bundle exceeded ${MAX_BUNDLE_BYTES} bytes (got ${buffer.length}); refusing to upload`, ); } const port = await db.query.ports.findFirst({ where: eq(ports.id, input.portId) }); const portSlug = port?.slug ?? 'unknown'; const storageKey = `${portSlug}/gdpr-exports/${input.clientId}/${input.exportId}.zip`; await minioClient.putObject(env.MINIO_BUCKET, storageKey, buffer, buffer.length, { 'Content-Type': 'application/zip', 'Content-Disposition': `attachment; filename="gdpr-export-${input.clientId}.zip"`, }); const expiresAt = new Date(Date.now() + EXPIRY_DAYS * 24 * 60 * 60 * 1000); await db .update(gdprExports) .set({ status: 'ready', storageKey, sizeBytes: buffer.length, readyAt: new Date(), expiresAt, }) .where(eq(gdprExports.id, input.exportId)); if (input.emailToClient) { await emailExport(input, storageKey); } } catch (err) { logger.error({ err, exportId: input.exportId }, 'GDPR export job failed'); await db .update(gdprExports) .set({ status: 'failed', error: err instanceof Error ? err.message.slice(0, 1000) : 'Unknown error', }) .where(eq(gdprExports.id, input.exportId)); throw err; // let BullMQ retry per the queue config } } async function emailExport(input: ProcessJobInput, storageKey: string): Promise { // Resolve the recipient: explicit override beats primary contact. let recipient = input.emailOverride; if (!recipient) { const primary = await db.query.clientContacts.findFirst({ where: and( eq(clientContacts.clientId, input.clientId), eq(clientContacts.channel, 'email'), eq(clientContacts.isPrimary, true), ), }); recipient = primary?.value ?? null; } if (!recipient) { logger.warn( { exportId: input.exportId, clientId: input.clientId }, 'GDPR export ready but no email recipient — skipping send', ); return; } const url = await getPresignedUrl(storageKey, PRESIGN_EXPIRY_SECONDS); const client = await db.query.clients.findFirst({ where: eq(clients.id, input.clientId) }); const name = client?.fullName ?? 'there'; const expiry = new Date(Date.now() + PRESIGN_EXPIRY_SECONDS * 1000).toUTCString(); const subject = 'Your personal data export is ready'; const html = `

Hello ${escapeHtml(name)},

You requested a copy of the personal data we hold about you. The export is ready and contains:

  • client.json — machine-readable data dump
  • client.html — same data as a printable web page

Download the export (ZIP, expires ${escapeHtml(expiry)})

If you have any questions, reply to this email.

`; const text = `Your personal data export is ready: ${url}\nThe link expires ${expiry}.`; const { sendEmail } = await import('@/lib/email/index'); await sendEmail(recipient, subject, html, undefined, text, input.portId); await db .update(gdprExports) .set({ status: 'sent', sentAt: new Date(), sentTo: recipient }) .where(eq(gdprExports.id, input.exportId)); } function escapeHtml(s: unknown): string { if (s === null || s === undefined) return ''; return String(s) .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); } /** Lists exports for a client (most-recent first) — feeds the admin "history" UI. */ export async function listClientExports(clientId: string, portId: string) { const client = await db.query.clients.findFirst({ where: eq(clients.id, clientId) }); if (!client || client.portId !== portId) throw new NotFoundError('Client'); return db.query.gdprExports.findMany({ where: and(eq(gdprExports.clientId, clientId), eq(gdprExports.portId, portId)), orderBy: (t, { desc }) => [desc(t.createdAt)], limit: 25, }); } /** Generates a fresh signed URL for an existing ready/sent export. */ export async function getExportDownloadUrl(exportId: string, portId: string): Promise { const row = await db.query.gdprExports.findFirst({ where: and(eq(gdprExports.id, exportId), eq(gdprExports.portId, portId)), }); if (!row) throw new NotFoundError('Export'); if (!row.storageKey || (row.status !== 'ready' && row.status !== 'sent')) { throw new ValidationError('Export is not ready to download'); } return getPresignedUrl(row.storageKey, PRESIGN_EXPIRY_SECONDS); }