pn-new-crm/tests/unit/markdown-email-sanitization.test.ts

/**
 * Phase 7 §14.7 critical mitigation: body markdown XSS sanitization.
 *
 * Every code path that turns rep-authored markdown into the email's
 * `html` body is required to go through `renderEmailBody()`. These tests
 * are the canary — if any future change to the renderer lets a known XSS
 * payload through, the test breaks before the change ships.
 */
import { describe, expect, it } from 'vitest';

import {
  EMAIL_BODY_MAX_BYTES,
  expandMergeTokens,
  extractTokens,
  findUnresolvedTokens,
  renderEmailBody,
} from '@/lib/utils/markdown-email';

describe('renderEmailBody — XSS payload coverage', () => {
  it('escapes <script> tags so they render as text, not active script', () => {
    const html = renderEmailBody('Hi <script>alert(1)</script> there');
    expect(html).not.toContain('<script>');
    expect(html).toContain('&lt;script&gt;');
  });

  it('escapes onerror handlers in img tags', () => {
    const html = renderEmailBody('<img src=x onerror=alert(1)>');
    expect(html).not.toContain('<img');
    expect(html).toContain('&lt;img');
  });

  it('strips javascript: URLs from markdown links', () => {
    const html = renderEmailBody('[click](javascript:alert(1))');
    expect(html).not.toContain('javascript:');
    expect(html).not.toContain('<a ');
    // Falls back to rendering the link text as plain.
    expect(html).toContain('click');
  });

  it('strips data: URLs from markdown links', () => {
    const html = renderEmailBody('[bad](data:text/html,<script>alert(1)</script>)');
    expect(html).not.toContain('<a ');
    expect(html).not.toContain('<script');
  });

  it('allows https:// URLs in markdown links', () => {
    const html = renderEmailBody('[example](https://example.com)');
    expect(html).toContain('<a href="https://example.com"');
    expect(html).toContain('rel="noopener noreferrer"');
  });

  it('allows mailto: URLs in markdown links', () => {
    const html = renderEmailBody('[reach me](mailto:hi@example.com)');
    expect(html).toContain('<a href="mailto:hi@example.com"');
  });

  it('escapes <iframe> tags', () => {
    const html = renderEmailBody('<iframe src="https://evil.com"></iframe>');
    expect(html).not.toContain('<iframe');
    expect(html).toContain('&lt;iframe');
  });

  it('escapes <style> blocks', () => {
    const html = renderEmailBody('<style>body{background:red}</style>');
    expect(html).not.toContain('<style');
    expect(html).toContain('&lt;style');
  });

  it('escapes attribute-style XSS attempts (no live <svg> tag survives)', () => {
    const html = renderEmailBody('"><svg onload=alert(1)>');
    // The literal "<svg" must never appear unescaped — the angle bracket is
    // what the browser parses, not the word "onload".
    expect(html).not.toContain('<svg');
    expect(html).toContain('&lt;svg');
    expect(html).toContain('&quot;');
  });

  it('escapes the polyglot from CWE-79 reference samples', () => {
    const polyglot = `'\`<img src=x onerror=alert(1)>"<svg/onload=alert(1)>"`;
    const html = renderEmailBody(polyglot);
    // Only unescaped tags can fire handlers; we just need to be sure no
    // unescaped `<` survives.
    expect(html).not.toContain('<img');
    expect(html).not.toContain('<svg');
    expect(html).toContain('&lt;img');
    expect(html).toContain('&lt;svg');
  });

  it('rejects bodies above 50KB', () => {
    const huge = 'x'.repeat(EMAIL_BODY_MAX_BYTES + 1);
    expect(() => renderEmailBody(huge)).toThrow(/maximum length/);
  });
});

describe('renderEmailBody — markdown rules', () => {
  it('renders **bold** as <strong>', () => {
    expect(renderEmailBody('this is **bold**')).toContain('<strong>bold</strong>');
  });

  it('renders *italic* as <em>', () => {
    expect(renderEmailBody('this is *italic*')).toContain('<em>italic</em>');
  });

  it('renders `code` spans', () => {
    expect(renderEmailBody('use `apiFetch`')).toContain('<code>apiFetch</code>');
  });

  it('splits paragraphs on blank lines', () => {
    const out = renderEmailBody('para one\n\npara two');
    expect(out).toContain('<p>para one</p>');
    expect(out).toContain('<p>para two</p>');
  });

  it('converts single newlines to <br>', () => {
    const out = renderEmailBody('line one\nline two');
    expect(out).toContain('line one<br>line two');
  });
});

describe('merge token helpers', () => {
  it('extracts tokens from a body', () => {
    const tokens = extractTokens('Hi {{client.fullName}} re {{berth.mooringNumber}}.');
    expect(tokens).toEqual(['{{client.fullName}}', '{{berth.mooringNumber}}']);
  });

  it('expands tokens that have values', () => {
    const out = expandMergeTokens('Hi {{client.fullName}}', {
      '{{client.fullName}}': 'Jane Doe',
    });
    expect(out).toBe('Hi Jane Doe');
  });

  it('leaves unresolved tokens intact', () => {
    const out = expandMergeTokens('Hi {{client.fullName}} {{missing}}', {
      '{{client.fullName}}': 'Jane',
    });
    expect(out).toBe('Hi Jane {{missing}}');
  });

  it('reports unresolved tokens', () => {
    const unresolved = findUnresolvedTokens('Hi {{a}} {{b}} {{c}}', {
      '{{a}}': 'value',
    });
    expect(unresolved).toEqual(['{{b}}', '{{c}}']);
  });

  // Audit-final v2: a malicious merge value (e.g. a client.fullName imported
  // from a low-trust source) must NOT inject a link or emphasis into the
  // rendered email body. escapeMergeValue neutralizes the markdown chars
  // inside the value before substitution.
  it('escapes markdown control chars inside merge values', () => {
    const expanded = expandMergeTokens('Hi {{client.fullName}}, welcome.', {
      '{{client.fullName}}': '[click here](https://attacker.tld)',
    });
    // The brackets/parens are now entity-encoded, so the markdown link
    // rule will not fire.
    expect(expanded).not.toContain('[click here](https://attacker.tld)');
    expect(expanded).toContain('&#91;click here&#93;');

    const html = renderEmailBody(expanded);
    expect(html).not.toContain('<a href="https://attacker.tld');
    // Plain-text version (visible to recipient) still reads normally.
    expect(html).toContain('click here');
  });

  it('escapes nested {{token}} forms in merge values to prevent re-expansion shenanigans', () => {
    const expanded = expandMergeTokens('Hi {{a}}', { '{{a}}': '{{secret_token}}' });
    // Both braces and the underscore are entity-encoded.
    expect(expanded).toContain('&#123;&#123;secret&#95;token&#125;&#125;');
    expect(expanded).not.toContain('{{secret_token}}');
  });
});