fix(audit): reports workers — M9 (no duplicate scheduled emails), L5 (idempotent render artefacts), L6 (atomic schedule claim), L7 (per-port notification From)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-02 13:07:30 +02:00
parent 64c73a5d77
commit cc5c053a79
4 changed files with 280 additions and 115 deletions

View File

@@ -92,10 +92,17 @@ export const notificationsWorker = new Worker(
? await getPortBrandingConfig(notif.portId).catch(() => null)
: null;
const prefix = portBrand?.appName?.trim() || 'CRM';
// L7: pass `portId` (6th positional arg) so `getPortEmailConfig`
// resolves the notification's per-port send-from identity instead
// of falling back to the global default From. `from`/`text` stay
// undefined.
await sendEmail(
authUser.email,
`[${prefix}] ${notif.title}`,
`<p>${bodyText}</p>${linkHtml}`,
undefined,
undefined,
notif.portId ?? undefined,
);
await db

View File

@@ -22,69 +22,92 @@ export const reportsWorker = new Worker(
// weekly/monthly reports that's an instant flood of dupe
// emails to recipients. Now we compute the next fire from
// the cron expression and UPDATE the row atomically.
//
// L6: this poller does a select-due → per-row update. With a
// single `crm-worker` (concurrency 1) that's safe, but the moment
// `MULTI_NODE_DEPLOYMENT` adds a second replica two pollers would
// both read the same due rows and double-fire (duplicate runs +
// email blasts). We now atomically CLAIM due rows with
// `FOR UPDATE SKIP LOCKED` inside a transaction: a concurrent
// replica skips rows this tx already holds, so each due row is
// claimed by exactly one poller. `nextRunAt` is row-specific
// (cron-derived) so we keep the per-row update — the row lock,
// not a bulk UPDATE, is what makes the claim atomic. Enqueues are
// deferred to AFTER commit so a rolled-back claim never leaves an
// orphaned generate-report job.
const { db } = await import('@/lib/db');
const { scheduledReports } = await import('@/lib/db/schema/operations');
const { generatedReports } = await import('@/lib/db/schema/operations');
const { eq, and, lte } = await import('drizzle-orm');
const { CronExpressionParser } = await import('cron-parser');
const dueReports = await db
.select()
.from(scheduledReports)
.where(
and(eq(scheduledReports.isActive, true), lte(scheduledReports.nextRunAt, new Date())),
);
const enqueueIds: string[] = [];
for (const report of dueReports) {
const { getQueue } = await import('@/lib/queue');
await db.transaction(async (tx) => {
const dueReports = await tx
.select()
.from(scheduledReports)
.where(
and(eq(scheduledReports.isActive, true), lte(scheduledReports.nextRunAt, new Date())),
)
.for('update', { skipLocked: true });
// Compute next_run_at BEFORE the enqueue so a failure in the
// parse path (malformed cron) doesn't get repeat-fired.
let nextRunAt: Date | null = null;
try {
nextRunAt = CronExpressionParser.parse(report.schedule, {
currentDate: new Date(),
tz: process.env.SCHEDULER_TZ ?? 'Europe/Warsaw',
})
.next()
.toDate();
} catch (err) {
logger.error(
{ err, reportId: report.id, schedule: report.schedule },
'Failed to parse cron schedule for scheduled report; pausing it',
);
// Disable the row so we don't re-attempt the malformed cron
// every minute.
await db
for (const report of dueReports) {
// Compute next_run_at BEFORE the enqueue so a failure in the
// parse path (malformed cron) doesn't get repeat-fired.
let nextRunAt: Date | null = null;
try {
nextRunAt = CronExpressionParser.parse(report.schedule, {
currentDate: new Date(),
tz: process.env.SCHEDULER_TZ ?? 'Europe/Warsaw',
})
.next()
.toDate();
} catch (err) {
logger.error(
{ err, reportId: report.id, schedule: report.schedule },
'Failed to parse cron schedule for scheduled report; pausing it',
);
// Disable the row so we don't re-attempt the malformed cron
// every minute.
await tx
.update(scheduledReports)
.set({ isActive: false, updatedAt: new Date() })
.where(eq(scheduledReports.id, report.id));
continue;
}
await tx
.update(scheduledReports)
.set({ isActive: false, updatedAt: new Date() })
.set({ nextRunAt, updatedAt: new Date() })
.where(eq(scheduledReports.id, report.id));
continue;
const [genReport] = await tx
.insert(generatedReports)
.values({
portId: report.portId,
scheduledReportId: report.id,
reportType: report.reportType,
name: `${report.name} - ${new Date().toISOString().split('T')[0]}`,
status: 'queued',
parameters: (report.config as Record<string, unknown>) ?? {},
requestedBy: report.createdBy,
})
.returning();
if (genReport) {
enqueueIds.push(genReport.id);
}
}
});
await db
.update(scheduledReports)
.set({ nextRunAt, updatedAt: new Date() })
.where(eq(scheduledReports.id, report.id));
const [genReport] = await db
.insert(generatedReports)
.values({
portId: report.portId,
scheduledReportId: report.id,
reportType: report.reportType,
name: `${report.name} - ${new Date().toISOString().split('T')[0]}`,
status: 'queued',
parameters: (report.config as Record<string, unknown>) ?? {},
requestedBy: report.createdBy,
})
.returning();
if (genReport) {
if (enqueueIds.length > 0) {
const { getQueue } = await import('@/lib/queue');
for (const genReportId of enqueueIds) {
await getQueue('reports').add(
'generate-report',
{ reportJobId: genReport.id },
{ jobId: `generate-report:${genReport.id}` },
{ reportJobId: genReportId },
{ jobId: `generate-report:${genReportId}` },
);
}
}
@@ -102,46 +125,73 @@ export const reportsWorker = new Worker(
case 'report-schedules-poll': {
// Scan report_schedules due to fire, mint a report_runs row per
// schedule, advance next_run_at by cadence math, enqueue render.
//
// L6: same select-due → per-row update shape as the legacy poller
// above. Safe under the single `crm-worker` (concurrency 1) today,
// but double-fires under multiple replicas once
// `MULTI_NODE_DEPLOYMENT` is on. We atomically CLAIM due rows in a
// `FOR UPDATE SKIP LOCKED` transaction that ALSO advances
// `nextRunAt`/`lastRunAt` (and pauses templateless rows). Because
// the claim advances `nextRunAt` past `now`, a concurrent replica
// re-polling immediately afterwards no longer sees the row as due,
// and `SKIP LOCKED` keeps two pollers from claiming the same row
// mid-flight. The heavier per-row work (`createReportRun` + render
// enqueue) runs AFTER commit on the claimed rows — `createReportRun`
// is a service that uses its own db handle, and advancing the fire
// time before minting already preserves the "no-op doesn't slip"
// rule, so a downstream mint failure just retries on the next poll.
const { db } = await import('@/lib/db');
const { reportSchedules, reportTemplates } = await import('@/lib/db/schema/reports');
const { createReportRun } = await import('@/lib/services/report-runs.service');
const { nextRunFor } = await import('@/lib/services/report-schedules.service');
const { and, eq, lte } = await import('drizzle-orm');
type ReportSchedule = import('@/lib/db/schema/reports').ReportSchedule;
type ReportTemplate = import('@/lib/db/schema/reports').ReportTemplate;
const now = new Date();
const due = await db
.select()
.from(reportSchedules)
.where(and(eq(reportSchedules.enabled, true), lte(reportSchedules.nextRunAt, now)));
for (const schedule of due) {
const template = await db.query.reportTemplates.findFirst({
where: eq(reportTemplates.id, schedule.templateId),
});
if (!template) {
logger.warn(
{ scheduleId: schedule.id, templateId: schedule.templateId },
'Skipping schedule: template missing (likely archived); pausing',
);
await db
const claimed: Array<{ schedule: ReportSchedule; template: ReportTemplate }> = [];
await db.transaction(async (tx) => {
const due = await tx
.select()
.from(reportSchedules)
.where(and(eq(reportSchedules.enabled, true), lte(reportSchedules.nextRunAt, now)))
.for('update', { skipLocked: true });
for (const schedule of due) {
const template = await tx.query.reportTemplates.findFirst({
where: eq(reportTemplates.id, schedule.templateId),
});
if (!template) {
logger.warn(
{ scheduleId: schedule.id, templateId: schedule.templateId },
'Skipping schedule: template missing (likely archived); pausing',
);
await tx
.update(reportSchedules)
.set({ enabled: false, updatedAt: new Date() })
.where(eq(reportSchedules.id, schedule.id));
continue;
}
// Compute the next fire BEFORE the enqueue so a downstream
// failure (storage outage, etc.) doesn't pin the schedule on
// the same tick — preserves the "no-op doesn't slip" rule.
await tx
.update(reportSchedules)
.set({ enabled: false, updatedAt: new Date() })
.set({
lastRunAt: now,
nextRunAt: nextRunFor(schedule.cadence as Parameters<typeof nextRunFor>[0], now),
updatedAt: new Date(),
})
.where(eq(reportSchedules.id, schedule.id));
continue;
claimed.push({ schedule, template });
}
});
// Compute the next fire BEFORE the enqueue so a downstream
// failure (storage outage, etc.) doesn't pin the schedule on
// the same tick — preserves the "no-op doesn't slip" rule.
await db
.update(reportSchedules)
.set({
lastRunAt: now,
nextRunAt: nextRunFor(schedule.cadence as Parameters<typeof nextRunFor>[0], now),
updatedAt: new Date(),
})
.where(eq(reportSchedules.id, schedule.id));
for (const { schedule, template } of claimed) {
try {
const { REPORT_KINDS } = await import('@/lib/validators/reports');
const kindNarrowed = (REPORT_KINDS as readonly string[]).includes(template.kind)