import { Worker, type Job } from 'bullmq'; import { env } from '@/lib/env'; import { and, eq, lt, isNotNull } from 'drizzle-orm'; import type { ConnectionOptions } from 'bullmq'; import { db } from '@/lib/db'; import { formSubmissions } from '@/lib/db/schema/documents'; import { gdprExports } from '@/lib/db/schema/gdpr'; import { aiUsageLedger } from '@/lib/db/schema/ai-usage'; import { auditLogs, errorEvents } from '@/lib/db/schema/system'; import { websiteSubmissions } from '@/lib/db/schema/website-submissions'; import { logger } from '@/lib/logger'; import { attachWorkerAudit } from '@/lib/queue/audit-helpers'; import { getStorageBackend } from '@/lib/storage'; import { QUEUE_CONFIGS } from '@/lib/queue'; /** AI usage rows older than this are deleted by the retention job. */ const AI_USAGE_RETENTION_DAYS = 90; /** error_events rows older than this are pruned. Migration 0040 declares * this contract; the worker had no implementation until now. */ const ERROR_EVENTS_RETENTION_DAYS = 90; /** audit_logs rows older than this are pruned. Mirrors error_events. * Metadata is masked at insert time but older rows have no operational * value past the window and represent residual stale-PII exposure. */ const AUDIT_LOGS_RETENTION_DAYS = 90; /** Raw website inquiry payloads (website_submissions) - kept long enough * to investigate "why didn't this lead reach the CRM" inbound questions * but not indefinitely. 180d aligns with the typical sales cycle. */ const WEBSITE_SUBMISSIONS_RETENTION_DAYS = 180; export const maintenanceWorker = new Worker( 'maintenance', async (job: Job) => { logger.info({ jobId: job.id, jobName: job.name }, 'Processing maintenance job'); switch (job.name) { case 'currency-refresh': { const { refreshRates } = await import('@/lib/services/currency'); await refreshRates(); break; } case 'database-backup': { // Scheduled full-bundle backup pushed to every enabled destination. // No-op until an admin turns the schedule on AND enables a destination // (`backup_schedule` setting + `backup_destinations`). Replaces the // previous silent no-op (this case did not exist before). const { runScheduledBackupPush } = await import('@/lib/services/backup-destinations.service'); const summary = await runScheduledBackupPush(); logger.info(summary, 'Scheduled backup push complete'); break; } case 'form-expiry-check': { const result = await db .update(formSubmissions) .set({ status: 'expired' }) .where( and(eq(formSubmissions.status, 'pending'), lt(formSubmissions.expiresAt, new Date())), ) .returning({ id: formSubmissions.id }); logger.info({ expired: result.length }, 'Form expiry check complete'); break; } case 'alerts-evaluate': { const { runAlertEngine } = await import('@/lib/services/alert-engine'); const summary = await runAlertEngine(); logger.info(summary, 'Alert engine sweep complete'); break; } case 'bounce-poll': { const { processImapBouncePoll } = await import('@/jobs/processors/imap-bounce-poller'); await processImapBouncePoll(); break; } case 'analytics-refresh': { const { ports } = await import('@/lib/db/schema/ports'); const { refreshSnapshotsForPort } = await import('@/lib/services/analytics.service'); const allPorts = await db.select({ id: ports.id }).from(ports); for (const p of allPorts) { try { await refreshSnapshotsForPort(p.id); } catch (err) { logger.warn({ portId: p.id, err }, 'Analytics refresh failed for port'); } } logger.info({ count: allPorts.length }, 'Analytics snapshot refresh complete'); break; } case 'expense-dedup-scan': { const { expenseId } = job.data as { expenseId: string }; if (!expenseId) { logger.warn({ jobId: job.id }, 'expense-dedup-scan missing expenseId'); break; } const { markBestDuplicate } = await import('@/lib/services/expense-dedup.service'); const matchedId = await markBestDuplicate(expenseId); logger.info({ expenseId, matchedId: matchedId ?? null }, 'expense-dedup-scan complete'); break; } case 'gdpr-export-cleanup': { // GDPR Article 17 (right to erasure): when an export expires we must // actually delete the bytes, not just mark a flag. Pulls every row // past expiresAt with a storage_key, removes the MinIO object, then // deletes the row. const expired = await db .select({ id: gdprExports.id, storageKey: gdprExports.storageKey }) .from(gdprExports) .where( and( isNotNull(gdprExports.expiresAt), lt(gdprExports.expiresAt, new Date()), isNotNull(gdprExports.storageKey), ), ); let removed = 0; let failed = 0; for (const row of expired) { try { if (row.storageKey) { await (await getStorageBackend()).delete(row.storageKey); } await db.delete(gdprExports).where(eq(gdprExports.id, row.id)); removed++; } catch (err) { failed++; logger.warn({ err, exportId: row.id }, 'Failed to clean up GDPR export'); } } logger.info({ removed, failed, total: expired.length }, 'GDPR export cleanup complete'); break; } case 'ai-usage-retention': { // Trim ai_usage_ledger to the retention window. Older rows aren't // useful for budget rollups (which always operate on the current // period) and bloat both the table and admin breakdown queries. const cutoff = new Date(Date.now() - AI_USAGE_RETENTION_DAYS * 24 * 60 * 60 * 1000); const result = await db .delete(aiUsageLedger) .where(lt(aiUsageLedger.createdAt, cutoff)) .returning({ id: aiUsageLedger.id }); logger.info( { deleted: result.length, retentionDays: AI_USAGE_RETENTION_DAYS }, 'AI usage retention sweep complete', ); break; } case 'error-events-retention': { // Honor the contract from migration 0040: error_events older than // ERROR_EVENTS_RETENTION_DAYS get dropped. Otherwise the table // grows unbounded and the admin error log becomes unusable. const cutoff = new Date(Date.now() - ERROR_EVENTS_RETENTION_DAYS * 24 * 60 * 60 * 1000); const result = await db .delete(errorEvents) .where(lt(errorEvents.createdAt, cutoff)) .returning({ requestId: errorEvents.requestId }); logger.info( { deleted: result.length, retentionDays: ERROR_EVENTS_RETENTION_DAYS }, 'Error events retention sweep complete', ); break; } case 'audit-logs-retention': { const cutoff = new Date(Date.now() - AUDIT_LOGS_RETENTION_DAYS * 24 * 60 * 60 * 1000); const result = await db .delete(auditLogs) .where(lt(auditLogs.createdAt, cutoff)) .returning({ id: auditLogs.id }); logger.info( { deleted: result.length, retentionDays: AUDIT_LOGS_RETENTION_DAYS }, 'Audit logs retention sweep complete', ); break; } case 'website-submissions-retention': { // Raw inquiry payloads from the marketing-site dual-write. Keep // long enough to debug capture issues but not forever - these // rows include reCAPTCHA + IP + UA metadata. const cutoff = new Date( Date.now() - WEBSITE_SUBMISSIONS_RETENTION_DAYS * 24 * 60 * 60 * 1000, ); const result = await db .delete(websiteSubmissions) .where(lt(websiteSubmissions.receivedAt, cutoff)) .returning({ id: websiteSubmissions.id }); logger.info( { deleted: result.length, retentionDays: WEBSITE_SUBMISSIONS_RETENTION_DAYS }, 'Website submissions retention sweep complete', ); break; } default: logger.warn({ jobName: job.name }, 'Unknown maintenance job'); } }, { connection: { url: env.REDIS_URL } as ConnectionOptions, concurrency: QUEUE_CONFIGS.maintenance.concurrency, }, ); maintenanceWorker.on('failed', (job, err) => { logger.error({ jobId: job?.id, jobName: job?.name, err }, 'Maintenance job failed'); }); attachWorkerAudit(maintenanceWorker, 'maintenance');