fix(audit): bounce/email — M8 (Message-ID port-safe bounce match), L16 (recipient validation, CRLF, header trust note)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-02 13:18:20 +02:00
parent cc5c053a79
commit fd69a75980
4 changed files with 253 additions and 30 deletions

View File

@@ -34,10 +34,87 @@ export interface ParsedBounce {
reason: string;
/** Inbound message-id (or in-reply-to header) for cross-reference. */
inReplyTo: string | null;
/**
* Candidate Message-IDs of the *original* (bounced) message, gathered
* from the NDR's `In-Reply-To` + `References` headers AND from the
* returned-headers block embedded in the DSN body (RFC 3464 carries the
* failed message's headers in a `message/rfc822` part). The poller
* matches these against `document_sends.message_id` to pin a bounce to
* the exact originating send (and therefore the correct port) without
* having to guess by recipient + time window. Angle brackets/whitespace
* are stripped and values lowercased so comparison is normalization-safe.
*/
originalMessageIds: string[];
/** SMTP status code (e.g. "5.1.1") if the NDR carried one. */
statusCode: string | null;
}
/**
* RFC 5322-ish address validator. Deliberately identical to the strict
* regex used on the outbound side (`document-sends.service.ts`) so a
* recipient that was rejected at send time can't slip through here either.
* The `originalRecipient` we parse comes from an attacker-controllable
* inbound NDR body, so it MUST be validated before it lands in a DB query
* or a user-facing notification string (audit L16a).
*/
const RFC5322_EMAIL = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
function isValidEmail(email: string | null | undefined): email is string {
return Boolean(email) && email!.length <= 254 && RFC5322_EMAIL.test(email!);
}
/** Strip angle brackets + whitespace and lowercase a Message-ID for
* normalization-safe equality. Returns null for empty/garbage input. */
function normalizeMessageId(raw: string | null | undefined): string | null {
if (!raw) return null;
const trimmed = raw
.trim()
.replace(/^<+|>+$/g, '')
.trim()
.toLowerCase();
// A Message-ID must contain an '@' and have no internal whitespace; reject
// anything that doesn't look like one so we never query on noise.
if (!trimmed || /\s/.test(trimmed) || !trimmed.includes('@')) return null;
return trimmed;
}
/**
* Collect every plausible original-message-id from the parsed NDR:
* 1. `In-Reply-To` (single id) and `References` (one or many) headers.
* 2. The returned-headers block in the DSN body — many MTAs inline the
* failed message's `Message-ID:` header inside the human-readable or
* `message/rfc822` part rather than setting In-Reply-To on the NDR.
* Deduplicated + normalized.
*/
function collectOriginalMessageIds(
inReplyTo: string | null,
references: string[] | string | undefined,
bodyText: string,
): string[] {
const out = new Set<string>();
const push = (v: string | null | undefined): void => {
const n = normalizeMessageId(v);
if (n) out.add(n);
};
push(inReplyTo);
if (Array.isArray(references)) {
for (const r of references) push(r);
} else if (typeof references === 'string') {
// A single `References` header may carry space-separated ids.
for (const r of references.split(/\s+/)) push(r);
}
// Returned `Message-ID:` header(s) embedded in the DSN body.
const re = /^\s*Message-ID:\s*(<[^>\r\n]+>)/gim;
let m: RegExpExecArray | null;
while ((m = re.exec(bodyText)) !== null) {
push(m[1]);
}
return [...out];
}
const HARD_BOUNCE_STATUSES = new Set([
'5.0.0',
'5.1.1', // mailbox does not exist
@@ -116,6 +193,7 @@ export async function parseBounce(raw: string | Buffer): Promise<ParsedBounce> {
bounceClass: 'unknown',
reason: 'Failed to parse message',
inReplyTo: null,
originalMessageIds: [],
statusCode: null,
};
}
@@ -123,13 +201,20 @@ export async function parseBounce(raw: string | Buffer): Promise<ParsedBounce> {
const subject = parsed.subject ?? '';
const bodyText = parsed.text ?? '';
const inReplyTo = (parsed.inReplyTo as string | undefined) ?? null;
const originalMessageIds = collectOriginalMessageIds(inReplyTo, parsed.references, bodyText);
if (looksLikeOoo(subject, bodyText)) {
const oooFrom = parsed.from?.value[0]?.address ?? null;
return {
originalRecipient: parsed.from?.value[0]?.address ?? null,
// OOO auto-replies come *from* the recipient, so the From address is
// the "original recipient". Validate it the same way as a bounce
// recipient (audit L16a) - an invalid value would only ever pollute a
// string we don't act on for OOO, but we keep the contract uniform.
originalRecipient: isValidEmail(oooFrom) ? oooFrom : null,
bounceClass: 'ooo',
reason: 'Out-of-office auto-reply',
inReplyTo,
originalMessageIds,
statusCode: null,
};
}
@@ -137,7 +222,11 @@ export async function parseBounce(raw: string | Buffer): Promise<ParsedBounce> {
// Try to walk the multipart/report DSN structure first; falls back to
// plain-text heuristics for non-RFC-compliant Outlook NDRs.
const statusCode = extractStatusFromBody(bodyText);
const originalRecipient = extractRecipientFromBody(bodyText);
const rawRecipient = extractRecipientFromBody(bodyText);
// The recipient is parsed out of an attacker-controllable inbound body;
// reject anything that isn't a syntactically valid address before it can
// reach a DB query or a notification string (audit L16a).
const originalRecipient = isValidEmail(rawRecipient) ? rawRecipient : null;
const cls = classifyByStatus(statusCode);
@@ -154,6 +243,7 @@ export async function parseBounce(raw: string | Buffer): Promise<ParsedBounce> {
bounceClass: 'unknown',
reason: 'No bounce indicators detected',
inReplyTo,
originalMessageIds,
statusCode,
};
}
@@ -163,6 +253,7 @@ export async function parseBounce(raw: string | Buffer): Promise<ParsedBounce> {
bounceClass: cls ?? 'hard',
reason: deriveReason(statusCode, bodyText, subject),
inReplyTo,
originalMessageIds,
statusCode,
};
}

View File

@@ -50,6 +50,17 @@ export function renderShell({ title, body, branding }: ShellOpts): string {
// any host). Mail clients have no app origin, so re-absolutize here.
const logoUrl = absolutizeBrandingUrl(branding?.logoUrl ?? DEFAULT_LOGO_URL);
const backgroundUrl = absolutizeBrandingUrl(branding?.backgroundUrl ?? DEFAULT_BACKGROUND_URL);
// SECURITY / trust boundary (audit L16b): `emailHeaderHtml` / `emailFooterHtml`
// are admin-authored branding HTML and are interpolated RAW into the email
// body below (intentional - admins legitimately need to paste a styled
// legal footer / marketing strip). Authoring them is gated on the
// `manage_settings` permission, so the worst case is a self-XSS by the
// highest-privileged user on a tenant they already control - not a
// cross-tenant or privilege-escalation vector (each port reads only its
// own settings rows). If a future tenant model has MULTIPLE admins with
// mutually-distrusting `manage_settings` holders, allowlist-sanitize these
// two fields here (e.g. via a sanitize-html allowlist of safe tags/attrs)
// before interpolation. Until then we keep them raw by design.
const headerHtml = branding?.emailHeaderHtml ?? '';
const footerHtml = branding?.emailFooterHtml ?? '';

View File

@@ -28,14 +28,32 @@ export async function loadSubjectOverride(
return typeof value === 'string' && value.trim() ? value : null;
}
/** Synchronous client-side helper for substituting {{token}} placeholders. */
/**
* Synchronous helper for substituting {{token}} placeholders in an email
* subject line.
*
* Defensive CRLF neutralization (audit L16c): a subject is an email *header*,
* and a CR/LF inside a substituted token value is the classic header-injection
* primitive (smuggle a `Bcc:` / second header / a fake body). nodemailer
* already strips CR/LF from header values before transmission, so this is not
* exploitable in practice through our send path - but a token value can flow
* from user-controlled data (client name, berth label, …), so we strip
* CR/LF (and the rest of the C0/DEL control range) from each substituted value
* here too, in depth, rather than relying solely on the transport. The static
* template text is admin-authored and left untouched.
*/
export function applySubjectTokens(
template: string,
tokens: Record<string, string | number | undefined>,
): string {
return template.replace(/\{\{(\w+)\}\}/g, (match, name: string) => {
const v = tokens[name];
return v === undefined || v === null ? match : String(v);
if (v === undefined || v === null) return match;
// Replace CR/LF (and the rest of the C0/DEL control range) with a single
// space so a multi-line token value can never break the subject onto a
// new header line.
return String(v).replace(/[\x00-\x1f\x7f]+/g, ' ');
});
}