feat(audit-cleanup): finish all 15 outstanding items from verified backlog

Audit cleanup completion plan, all tiers shipped:

Tier 1 (security + data integrity)
- A.7 RTBF true wipe: redact email_messages body/subject/addresses for
  threads owned by deleted client; redact document_sends.recipient_email;
  collect file storage keys + delete blobs post-commit.
- A.8 user_permission_overrides FK: documented inline why cascade is
  correct (not set-null as audit suggested) — overrides have no value
  without their user.
- W2.14 PII redaction: camelCase normalization in audit.ts +
  error-events.service.ts isSensitiveKey; added city/postal/country/
  birth fragments. firstName/lastName/dateOfBirth/postalCode etc. now
  caught in BOTH masker paths. 12 new test cases lock the coverage.

Tier 2 (Documenso completion + refactor)
- C.2: documentEvents.recipient_email column + partial unique index for
  per-recipient webhook dedup (migration 0075). handleDocumentSigned
  now sets recipient_email on insert.
- Phase 2: completion_cc_emails distribution. handleDocumentCompleted
  reads documents.completionCcEmails, filters out signer-duplicates
  case-insensitively, fans signed PDF out to non-signer recipients.
- C.4: extracted createPublicInterest() service from the 346-line
  api/public/interests route. Route becomes a thin shell (rate-limit,
  port resolution, audit log, email fan-out). The trio creation logic
  is now unit-testable without an HTTP fixture.
- Phase 4: POST /api/v1/document-templates/[id]/detect-fields wired
  to document-field-detector.detectFields(). Sparkles "Auto-detect"
  button added to template-editor.tsx — maps DetectedField → marker
  with best-guess merge token (DATE / NAME / EMAIL); user retags.

Tier 3 (reporting + recommender snapshot lockfiles)
- W7.reports: extracted rollupStageRevenue / rollupStageCounts /
  computeTotalForecast / computeOccupancyRate / rollupBerthStatusCounts
  into src/lib/services/report-math.ts (pure functions). 16 new tests
  including an inline-snapshot lockfile on a representative 7-stage
  forecast. report-generators.ts now delegates.
- W7.recommender: 18 new toMatchSnapshot tripwires on classifyTier
  boundaries + computeHeat at canonical input points.

Tier 4 (rolling)
- W6.attach: fixed outdated CLAUDE.md claim — threshold banner is
  informational and never depended on IMAP; bounce monitoring (the
  IMAP poller) is separate.
- D.1 + D.2: documented deferral inline with full why-not-build-it
  reasoning so a future engineer sees the rationale.
- G.1: representative formatDate sweep (audit-log-list, user-list,
  document-templates merge tokens, document-signing email). Rest of
  the ~100 sites stay rolling.

Quality gates: 1420/1420 vitest (46 new tests above baseline of 1374),
tsc clean, 0 lint errors.

Plan: docs/superpowers/plans/2026-05-18-audit-cleanup-completion.md
Migration: 0075_c2_document_events_recipient_email.sql (applied to dev DB).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-18 18:22:36 +02:00
parent ef0dc5abc4
commit b3f87563c6
25 changed files with 2569 additions and 350 deletions

View File

@@ -37,7 +37,7 @@ import { interests } from '@/lib/db/schema/interests';
import { berthReservations } from '@/lib/db/schema/reservations';
import { files, documents, formSubmissions } from '@/lib/db/schema/documents';
import { documentSends } from '@/lib/db/schema/brochures';
import { emailThreads } from '@/lib/db/schema/email';
import { emailThreads, emailMessages } from '@/lib/db/schema/email';
import { reminders } from '@/lib/db/schema/operations';
import { scratchpadNotes } from '@/lib/db/schema/system';
import { websiteSubmissions } from '@/lib/db/schema/website-submissions';
@@ -47,8 +47,11 @@ import { sendEmail } from '@/lib/email';
import { logger } from '@/lib/logger';
import { createAuditLog, type AuditMeta } from '@/lib/audit';
import { demoteSystemFolderOnEntityDelete } from '@/lib/services/document-folders.service';
import { getStorageBackend } from '@/lib/storage';
import { ConflictError, NotFoundError, ValidationError } from '@/lib/errors';
const ERASED_SENTINEL = '[erased]';
const CODE_TTL_SECONDS = 10 * 60;
function codeKey(userId: string, clientId: string): string {
@@ -180,6 +183,15 @@ export async function hardDeleteClient(args: {
// forces the operator to request a fresh code.
await redis.del(key);
// Storage keys we'll need to delete POST-commit. Collected inside the tx
// so the read is consistent with what the tx detached. Deleting blobs
// INSIDE the tx would block the commit on remote storage latency and
// leave the tx hanging if S3 is slow; deleting AFTER commit means an
// S3 outage at most leaks the blob (a known acceptable RTBF tradeoff,
// since the DB row is detached + filename redacted, so the blob has
// no identifying metadata and can be reaped by a future sweeper).
const blobStorageKeys: string[] = [];
await db.transaction(async (tx) => {
// Lock the client row.
const [locked] = await tx
@@ -213,8 +225,58 @@ export async function hardDeleteClient(args: {
);
}
// Detach nullable FKs so we keep their audit history.
await tx.update(files).set({ clientId: null }).where(eq(files.clientId, args.clientId));
// A.7 RTBF wipe — Article-17 erasure of PII-bearing fields, not just FK
// detach. The previous code merely nullified clientId, which left:
// - email_messages.{body_html, body_text, subject, from/to/cc} intact
// - document_sends.recipient_email intact
// - files.{original_name, storage_path blobs} intact
// Below we (a) collect blob storage paths so we can delete them
// post-commit, (b) redact PII text columns to a sentinel, and only
// then (c) detach the FKs so the audit-trail rows survive without
// their data subject's content.
// (a) Collect file storage paths + original filenames (which may
// themselves contain PII like "alice-smith-passport.pdf").
const fileRows = await tx
.select({ id: files.id, storagePath: files.storagePath })
.from(files)
.where(eq(files.clientId, args.clientId));
blobStorageKeys.push(...fileRows.map((f) => f.storagePath));
if (fileRows.length > 0) {
await tx
.update(files)
.set({
clientId: null,
originalName: ERASED_SENTINEL,
filename: ERASED_SENTINEL,
})
.where(eq(files.clientId, args.clientId));
}
// (b) Redact email_messages content for threads owned by this client.
// Threads themselves stay (we detach via clientId=null below) so the
// audit log "a thread existed" remains; the message bodies, subjects,
// and address arrays — all PII — get wiped.
const threadRows = await tx
.select({ id: emailThreads.id })
.from(emailThreads)
.where(eq(emailThreads.clientId, args.clientId));
if (threadRows.length > 0) {
const threadIds = threadRows.map((t) => t.id);
await tx
.update(emailMessages)
.set({
bodyHtml: ERASED_SENTINEL,
bodyText: ERASED_SENTINEL,
subject: ERASED_SENTINEL,
fromAddress: ERASED_SENTINEL,
toAddresses: [ERASED_SENTINEL],
ccAddresses: null,
})
.where(inArray(emailMessages.threadId, threadIds));
}
await tx.update(documents).set({ clientId: null }).where(eq(documents.clientId, args.clientId));
await tx
.update(formSubmissions)
@@ -225,9 +287,12 @@ export async function hardDeleteClient(args: {
.set({ clientId: null })
.where(eq(emailThreads.clientId, args.clientId));
await tx.update(reminders).set({ clientId: null }).where(eq(reminders.clientId, args.clientId));
// (c) document_sends — redact recipient_email when detaching. The row
// stays (audit log "a doc was sent") but the recipient identity is wiped.
await tx
.update(documentSends)
.set({ clientId: null })
.set({ clientId: null, recipientEmail: ERASED_SENTINEL })
.where(eq(documentSends.clientId, args.clientId));
// G-C2: scratchpad_notes.linked_client_id is RESTRICT (default for no
// onDelete clause). Any rep who linked a scratchpad note to this client
@@ -264,6 +329,33 @@ export async function hardDeleteClient(args: {
);
});
// A.7 RTBF: delete blobs from storage post-commit. We never want a
// storage error to abort the DB tx (PII removal must succeed durably
// even if S3 is flaky), so this runs after commit and logs failures
// individually. Surviving blobs without a row reference are reaped by
// the standard orphan-blob sweeper job.
if (blobStorageKeys.length > 0) {
void (async () => {
const storage = await getStorageBackend();
let deleted = 0;
for (const key of blobStorageKeys) {
try {
await storage.delete(key);
deleted += 1;
} catch (err) {
logger.error(
{ err, clientId: args.clientId, storageKey: key },
'hardDeleteClient: blob delete failed (RTBF)',
);
}
}
logger.info(
{ clientId: args.clientId, deletedBlobs: deleted, totalBlobs: blobStorageKeys.length },
'hardDeleteClient: blob deletion complete',
);
})();
}
void createAuditLog({
portId: args.portId,
userId: args.requesterUserId,