fix(audit): bounce/email — M8 (Message-ID port-safe bounce match), L16 (recipient validation, CRLF, header trust note)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-02 13:18:20 +02:00
parent cc5c053a79
commit fd69a75980
4 changed files with 253 additions and 30 deletions
--- a/src/lib/email/bounce-parser.ts
+++ b/src/lib/email/bounce-parser.ts
@@ -34,10 +34,87 @@ export interface ParsedBounce {
  reason: string;
  /** Inbound message-id (or in-reply-to header) for cross-reference. */
  inReplyTo: string | null;
+  /**
+   * Candidate Message-IDs of the *original* (bounced) message, gathered
+   * from the NDR's `In-Reply-To` + `References` headers AND from the
+   * returned-headers block embedded in the DSN body (RFC 3464 carries the
+   * failed message's headers in a `message/rfc822` part). The poller
+   * matches these against `document_sends.message_id` to pin a bounce to
+   * the exact originating send (and therefore the correct port) without
+   * having to guess by recipient + time window. Angle brackets/whitespace
+   * are stripped and values lowercased so comparison is normalization-safe.
+   */
+  originalMessageIds: string[];
  /** SMTP status code (e.g. "5.1.1") if the NDR carried one. */
  statusCode: string | null;
 }

+/**
+ * RFC 5322-ish address validator. Deliberately identical to the strict
+ * regex used on the outbound side (`document-sends.service.ts`) so a
+ * recipient that was rejected at send time can't slip through here either.
+ * The `originalRecipient` we parse comes from an attacker-controllable
+ * inbound NDR body, so it MUST be validated before it lands in a DB query
+ * or a user-facing notification string (audit L16a).
+ */
+const RFC5322_EMAIL = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
+
+function isValidEmail(email: string | null | undefined): email is string {
+  return Boolean(email) && email!.length <= 254 && RFC5322_EMAIL.test(email!);
+}
+
+/** Strip angle brackets + whitespace and lowercase a Message-ID for
+ *  normalization-safe equality. Returns null for empty/garbage input. */
+function normalizeMessageId(raw: string | null | undefined): string | null {
+  if (!raw) return null;
+  const trimmed = raw
+    .trim()
+    .replace(/^<+|>+$/g, '')
+    .trim()
+    .toLowerCase();
+  // A Message-ID must contain an '@' and have no internal whitespace; reject
+  // anything that doesn't look like one so we never query on noise.
+  if (!trimmed || /\s/.test(trimmed) || !trimmed.includes('@')) return null;
+  return trimmed;
+}
+
+/**
+ * Collect every plausible original-message-id from the parsed NDR:
+ *   1. `In-Reply-To` (single id) and `References` (one or many) headers.
+ *   2. The returned-headers block in the DSN body — many MTAs inline the
+ *      failed message's `Message-ID:` header inside the human-readable or
+ *      `message/rfc822` part rather than setting In-Reply-To on the NDR.
+ * Deduplicated + normalized.
+ */
+function collectOriginalMessageIds(
+  inReplyTo: string | null,
+  references: string[] | string | undefined,
+  bodyText: string,
+): string[] {
+  const out = new Set<string>();
+  const push = (v: string | null | undefined): void => {
+    const n = normalizeMessageId(v);
+    if (n) out.add(n);
+  };
+
+  push(inReplyTo);
+  if (Array.isArray(references)) {
+    for (const r of references) push(r);
+  } else if (typeof references === 'string') {
+    // A single `References` header may carry space-separated ids.
+    for (const r of references.split(/\s+/)) push(r);
+  }
+
+  // Returned `Message-ID:` header(s) embedded in the DSN body.
+  const re = /^\s*Message-ID:\s*(<[^>\r\n]+>)/gim;
+  let m: RegExpExecArray | null;
+  while ((m = re.exec(bodyText)) !== null) {
+    push(m[1]);
+  }
+
+  return [...out];
+}
+
 const HARD_BOUNCE_STATUSES = new Set([
  '5.0.0',
  '5.1.1', // mailbox does not exist
@@ -116,6 +193,7 @@ export async function parseBounce(raw: string | Buffer): Promise<ParsedBounce> {
      bounceClass: 'unknown',
      reason: 'Failed to parse message',
      inReplyTo: null,
+      originalMessageIds: [],
      statusCode: null,
    };
  }
@@ -123,13 +201,20 @@ export async function parseBounce(raw: string | Buffer): Promise<ParsedBounce> {
  const subject = parsed.subject ?? '';
  const bodyText = parsed.text ?? '';
  const inReplyTo = (parsed.inReplyTo as string | undefined) ?? null;
+  const originalMessageIds = collectOriginalMessageIds(inReplyTo, parsed.references, bodyText);

  if (looksLikeOoo(subject, bodyText)) {
+    const oooFrom = parsed.from?.value[0]?.address ?? null;
    return {
-      originalRecipient: parsed.from?.value[0]?.address ?? null,
+      // OOO auto-replies come *from* the recipient, so the From address is
+      // the "original recipient". Validate it the same way as a bounce
+      // recipient (audit L16a) - an invalid value would only ever pollute a
+      // string we don't act on for OOO, but we keep the contract uniform.
+      originalRecipient: isValidEmail(oooFrom) ? oooFrom : null,
      bounceClass: 'ooo',
      reason: 'Out-of-office auto-reply',
      inReplyTo,
+      originalMessageIds,
      statusCode: null,
    };
  }
@@ -137,7 +222,11 @@ export async function parseBounce(raw: string | Buffer): Promise<ParsedBounce> {
  // Try to walk the multipart/report DSN structure first; falls back to
  // plain-text heuristics for non-RFC-compliant Outlook NDRs.
  const statusCode = extractStatusFromBody(bodyText);
-  const originalRecipient = extractRecipientFromBody(bodyText);
+  const rawRecipient = extractRecipientFromBody(bodyText);
+  // The recipient is parsed out of an attacker-controllable inbound body;
+  // reject anything that isn't a syntactically valid address before it can
+  // reach a DB query or a notification string (audit L16a).
+  const originalRecipient = isValidEmail(rawRecipient) ? rawRecipient : null;

  const cls = classifyByStatus(statusCode);

@@ -154,6 +243,7 @@ export async function parseBounce(raw: string | Buffer): Promise<ParsedBounce> {
      bounceClass: 'unknown',
      reason: 'No bounce indicators detected',
      inReplyTo,
+      originalMessageIds,
      statusCode,
    };
  }
@@ -163,6 +253,7 @@ export async function parseBounce(raw: string | Buffer): Promise<ParsedBounce> {
    bounceClass: cls ?? 'hard',
    reason: deriveReason(statusCode, bodyText, subject),
    inReplyTo,
+    originalMessageIds,
    statusCode,
  };
 }
--- a/src/lib/email/shell.ts
+++ b/src/lib/email/shell.ts
@@ -50,6 +50,17 @@ export function renderShell({ title, body, branding }: ShellOpts): string {
  // any host). Mail clients have no app origin, so re-absolutize here.
  const logoUrl = absolutizeBrandingUrl(branding?.logoUrl ?? DEFAULT_LOGO_URL);
  const backgroundUrl = absolutizeBrandingUrl(branding?.backgroundUrl ?? DEFAULT_BACKGROUND_URL);
+  // SECURITY / trust boundary (audit L16b): `emailHeaderHtml` / `emailFooterHtml`
+  // are admin-authored branding HTML and are interpolated RAW into the email
+  // body below (intentional - admins legitimately need to paste a styled
+  // legal footer / marketing strip). Authoring them is gated on the
+  // `manage_settings` permission, so the worst case is a self-XSS by the
+  // highest-privileged user on a tenant they already control - not a
+  // cross-tenant or privilege-escalation vector (each port reads only its
+  // own settings rows). If a future tenant model has MULTIPLE admins with
+  // mutually-distrusting `manage_settings` holders, allowlist-sanitize these
+  // two fields here (e.g. via a sanitize-html allowlist of safe tags/attrs)
+  // before interpolation. Until then we keep them raw by design.
  const headerHtml = branding?.emailHeaderHtml ?? '';
  const footerHtml = branding?.emailFooterHtml ?? '';

--- a/src/lib/email/template-overrides.ts
+++ b/src/lib/email/template-overrides.ts
@@ -28,14 +28,32 @@ export async function loadSubjectOverride(
  return typeof value === 'string' && value.trim() ? value : null;
 }

-/** Synchronous client-side helper for substituting {{token}} placeholders. */
+/**
+ * Synchronous helper for substituting {{token}} placeholders in an email
+ * subject line.
+ *
+ * Defensive CRLF neutralization (audit L16c): a subject is an email *header*,
+ * and a CR/LF inside a substituted token value is the classic header-injection
+ * primitive (smuggle a `Bcc:` / second header / a fake body). nodemailer
+ * already strips CR/LF from header values before transmission, so this is not
+ * exploitable in practice through our send path - but a token value can flow
+ * from user-controlled data (client name, berth label, …), so we strip
+ * CR/LF (and the rest of the C0/DEL control range) from each substituted value
+ * here too, in depth, rather than relying solely on the transport. The static
+ * template text is admin-authored and left untouched.
+ */
 export function applySubjectTokens(
  template: string,
  tokens: Record<string, string | number | undefined>,
 ): string {
  return template.replace(/\{\{(\w+)\}\}/g, (match, name: string) => {
    const v = tokens[name];
-    return v === undefined || v === null ? match : String(v);
+    if (v === undefined || v === null) return match;
+    // Replace CR/LF (and the rest of the C0/DEL control range) with a single
+    // space so a multi-line token value can never break the subject onto a
+    // new header line.
+
+    return String(v).replace(/[\x00-\x1f\x7f]+/g, ' ');
  });
 }