fix(audit): reports workers — M9 (no duplicate scheduled emails), L5 (idempotent render artefacts), L6 (atomic schedule claim), L7 (per-port notification From)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-02 13:07:30 +02:00
parent 64c73a5d77
commit cc5c053a79
4 changed files with 280 additions and 115 deletions

View File

@@ -17,6 +17,8 @@
* land alongside the builder UI (P4+).
*/
import { createHash } from 'node:crypto';
import { and, eq } from 'drizzle-orm';
import { db } from '@/lib/db';
@@ -85,6 +87,28 @@ function rowsToCsv(rows: Array<Array<string | number | null | undefined>>): Buff
return Buffer.from(lines.join('\r\n') + '\r\n', 'utf-8');
}
/**
* L5 — deterministic artefact file id per report run.
*
* The render path can crash between `backend.put` and the `files`
* insert / status write; BullMQ then retries (reports maxAttempts 3).
* Deriving the `files.id` (and therefore the storage key, which embeds
* the file id) deterministically from the run id makes the retry land
* on the SAME storage key and the SAME PK, so `onConflictDoNothing`
* collapses the duplicate insert and `backend.put` overwrites in place
* instead of leaking an orphaned blob + dangling `files` row.
*
* We format a SHA-256 of the run id as a UUID-shaped string so the
* value is a stable, collision-free `files.id` (UUIDv4 columns accept
* any 36-char UUID-shaped text).
*/
function deterministicFileId(runId: string): string {
const h = createHash('sha256').update(`report-run:${runId}`).digest('hex');
return [h.slice(0, 8), h.slice(8, 12), h.slice(12, 16), h.slice(16, 20), h.slice(20, 32)].join(
'-',
);
}
const REPORT_RENDER_MAP: Record<string, KindRenderer> = {
dashboard: {
fetchData: fetchPipelineData as KindRenderer['fetchData'],
@@ -289,7 +313,10 @@ export async function renderReportRun(reportRunId: string): Promise<ReportRun> {
contentType = 'application/pdf';
}
const fileId = crypto.randomUUID();
// L5: deterministic per-run id + storage key so a retry after a
// mid-render crash overwrites the same blob and no-ops the duplicate
// `files` insert instead of leaking an orphan.
const fileId = deterministicFileId(run.id);
const storagePath = buildStoragePath(port.slug, 'reports', run.id, fileId, extension);
const backend = await getStorageBackend();
@@ -299,18 +326,21 @@ export async function renderReportRun(reportRunId: string): Promise<ReportRun> {
});
putStoragePath = storagePath;
await db.insert(files).values({
id: fileId,
portId: run.portId,
filename: `${run.kind}-${run.id.slice(0, 8)}.${extension}`,
originalName: `${run.kind}-report.${extension}`,
mimeType: contentType,
sizeBytes: String(bytes.length),
storagePath,
storageBucket: env.MINIO_BUCKET,
category: 'misc',
uploadedBy: run.triggeredByUserId ?? 'system',
});
await db
.insert(files)
.values({
id: fileId,
portId: run.portId,
filename: `${run.kind}-${run.id.slice(0, 8)}.${extension}`,
originalName: `${run.kind}-report.${extension}`,
mimeType: contentType,
sizeBytes: String(bytes.length),
storagePath,
storageBucket: env.MINIO_BUCKET,
category: 'misc',
uploadedBy: run.triggeredByUserId ?? 'system',
})
.onConflictDoNothing();
const updated = await updateReportRunStatus(run.id, run.portId, {
status: 'complete',
@@ -342,8 +372,16 @@ export async function renderReportRun(reportRunId: string): Promise<ReportRun> {
/**
* Schedule-driven email side effect. Looks up the schedule's recipients
* and ships an email with the rendered PDF attached. Stamps `emailedAt`
* on success; logs + rethrows on failure so BullMQ retries.
* and ships an email with the rendered PDF attached.
*
* M9 — idempotent against BullMQ retries (reports maxAttempts 3):
* - Early-returns when `emailedAt` is already stamped, so a retry never
* re-blasts recipients who already received the report.
* - Stamps `emailedAt` BEFORE the recipient loop, so a transient SMTP
* failure on recipient N does not re-send to 1..N-1 on the next
* attempt — at most the failing send is lost, never duplicated.
* - Treats per-recipient send failures as logged-not-thrown so one bad
* address cannot re-trigger the whole loop on retry.
*/
export async function emailReportRun(reportRunId: string): Promise<void> {
const run = await db.query.reportRuns.findFirst({
@@ -355,6 +393,14 @@ export async function emailReportRun(reportRunId: string): Promise<void> {
internalMessage: `Cannot email report ${run.id} — status=${run.status}, storageKey=${run.storageKey}`,
});
}
// M9: already emailed on a prior attempt — never re-send on BullMQ retry.
if (run.emailedAt) {
logger.info(
{ reportRunId: run.id, emailedAt: run.emailedAt },
'Report already emailed; skipping (M9 idempotency)',
);
return;
}
if (!run.scheduleId) {
logger.info({ reportRunId: run.id }, 'Skipping email for user-triggered report (no schedule)');
return;
@@ -392,16 +438,29 @@ export async function emailReportRun(reportRunId: string): Promise<void> {
const subject = `${port.name} · ${run.kind} report`;
const html = `<p>Your scheduled ${run.kind} report is attached.</p>`;
for (const recipient of recipients) {
await sendEmail(recipient.email, subject, html, undefined, undefined, run.portId, [
{ fileId: fileRow.id, filename: fileRow.originalName ?? `${run.kind}-report.pdf` },
]);
}
// M9: stamp `emailedAt` BEFORE sending so a transient SMTP failure
// mid-loop cannot cause a BullMQ retry to re-send to the recipients
// who already received the report (the early-return above now fires).
await updateReportRunStatus(run.id, run.portId, {
status: 'complete',
emailedAt: new Date(),
});
// M9: log-not-throw per recipient — one bad address must not re-blast
// the rest on retry (the run is already marked emailed). At-most-once
// delivery per recipient; a failed send is logged and dropped.
for (const recipient of recipients) {
try {
await sendEmail(recipient.email, subject, html, undefined, undefined, run.portId, [
{ fileId: fileRow.id, filename: fileRow.originalName ?? `${run.kind}-report.pdf` },
]);
} catch (err) {
logger.error(
{ err, reportRunId: run.id, recipient: recipient.email },
'Failed to email scheduled report to recipient; skipping (run already marked emailed)',
);
}
}
}
/**
@@ -456,7 +515,10 @@ async function renderStandaloneReportRun(run: ReportRun): Promise<ReportRun> {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const bytes = (await renderToBuffer(element as any)) as Buffer;
const fileId = crypto.randomUUID();
// L5: deterministic per-run id + storage key so a retry after a
// mid-render crash overwrites the same blob and no-ops the duplicate
// `files` insert instead of leaking an orphan.
const fileId = deterministicFileId(run.id);
const storagePath = buildStoragePath(port.slug, 'reports', run.id, fileId, 'pdf');
const backend = await getStorageBackend();
await backend.put(storagePath, bytes, {
@@ -465,18 +527,21 @@ async function renderStandaloneReportRun(run: ReportRun): Promise<ReportRun> {
});
putStoragePath = storagePath;
await db.insert(files).values({
id: fileId,
portId: run.portId,
filename: `${run.kind}-${run.id.slice(0, 8)}.pdf`,
originalName: `${run.kind}-report.pdf`,
mimeType: 'application/pdf',
sizeBytes: String(bytes.length),
storagePath,
storageBucket: env.MINIO_BUCKET,
category: 'misc',
uploadedBy: run.triggeredByUserId ?? 'system',
});
await db
.insert(files)
.values({
id: fileId,
portId: run.portId,
filename: `${run.kind}-${run.id.slice(0, 8)}.pdf`,
originalName: `${run.kind}-report.pdf`,
mimeType: 'application/pdf',
sizeBytes: String(bytes.length),
storagePath,
storageBucket: env.MINIO_BUCKET,
category: 'misc',
uploadedBy: run.triggeredByUserId ?? 'system',
})
.onConflictDoNothing();
const updated = await updateReportRunStatus(run.id, run.portId, {
status: 'complete',

View File

@@ -1,3 +1,5 @@
import { createHash } from 'node:crypto';
import { and, desc, eq } from 'drizzle-orm';
import type { ReactElement } from 'react';
import type { DocumentProps } from '@react-pdf/renderer';
@@ -193,6 +195,24 @@ export async function getDownloadUrl(reportId: string, portId: string) {
// ─── generateReport ───────────────────────────────────────────────────────────
/**
* L5 — deterministic artefact file id per legacy report job.
*
* `generateReport` can crash between `backend.put` and the
* `generatedReports` status write; BullMQ then retries (reports
* maxAttempts 3). Deriving the `files.id` (and therefore the storage
* key, which embeds the file id) deterministically from the job id
* makes the retry land on the SAME storage key and SAME PK, so the blob
* is overwritten in place and the duplicate `files` insert no-ops via
* `onConflictDoNothing` — no orphaned blob + dangling row.
*/
function deterministicReportFileId(reportJobId: string): string {
const h = createHash('sha256').update(`generated-report:${reportJobId}`).digest('hex');
return [h.slice(0, 8), h.slice(8, 12), h.slice(12, 16), h.slice(16, 20), h.slice(20, 32)].join(
'-',
);
}
export async function generateReport(reportJobId: string): Promise<void> {
// 1. Fetch the generatedReports record
const report = await db.query.generatedReports.findFirst({
@@ -203,6 +223,15 @@ export async function generateReport(reportJobId: string): Promise<void> {
throw new NotFoundError('report job');
}
// L5: idempotency early-return. If a prior attempt already produced the
// artefact (status='ready' with a fileId), a BullMQ retry must not
// re-render + re-upload + re-insert a second `files` row — that leaks an
// orphaned blob. Bail out treating the run as already done.
if (report.status === 'ready' && report.fileId) {
logger.info({ reportJobId }, 'Report already generated; skipping re-render (L5 idempotency)');
return;
}
const { portId, reportType, name, parameters, requestedBy } = report;
try {
@@ -260,8 +289,11 @@ export async function generateReport(reportJobId: string): Promise<void> {
) => ReactElement<DocumentProps>;
const pdfBytes = await renderPdf(renderFn(data, ctx));
// 8. Build storage path
const fileId = crypto.randomUUID();
// 8. Build storage path.
// L5: deterministic per-job file id + storage key so a retry after a
// mid-render crash overwrites the same blob and no-ops the duplicate
// `files` insert instead of leaking an orphan.
const fileId = deterministicReportFileId(reportJobId);
const storagePath = buildStoragePath(portSlug, 'reports', reportJobId, fileId, 'pdf');
// 9. Upload PDF via the active storage backend (filesystem or s3)
@@ -272,8 +304,8 @@ export async function generateReport(reportJobId: string): Promise<void> {
sizeBytes: buffer.length,
});
// 10. Insert into files table
const [fileRecord] = await db
// 10. Insert into files table (idempotent on retry via deterministic id)
let [fileRecord] = await db
.insert(files)
.values({
id: fileId,
@@ -287,8 +319,19 @@ export async function generateReport(reportJobId: string): Promise<void> {
category: 'misc',
uploadedBy: requestedBy,
})
.onConflictDoNothing()
.returning();
// L5: a retry that already inserted the file row on a prior attempt
// (but crashed before the generatedReports status write) gets an empty
// returning() from onConflictDoNothing — re-fetch the existing row
// rather than failing.
if (!fileRecord) {
fileRecord = await db.query.files.findFirst({
where: and(eq(files.id, fileId), eq(files.portId, portId)),
});
}
if (!fileRecord) {
throw new CodedError('INSERT_RETURNING_EMPTY', {
internalMessage: 'Failed to insert file record for generated report',