perf(audit-tier-3): bulk-fetch the five hot N+1 loops

Replaces per-row fan-out with grouped queries / inArray pre-fetches
across the five dashboard + cron hotspots flagged in the audit
(MED §13 / HIGH §11–14):

* reminders.processFollowUpReminders — was 3 round trips per
  enabled-and-due interest.  Now: filter in JS, single clients
  bulk-fetch, single reminders bulk-insert, single interests
  bulk-update, one summary socket emit.  1k due interests: 6 round
  trips total instead of 3000+.
* portal.getClientInvoices — was a full-table scan filtered in JS.
  Now an inArray push-down on lower(billingEmail) + defensive
  limit(100).  After 12mo this would have been the worst portal
  endpoint.
* interest-scoring.calculateBulkScores — was 6N round trips
  (1 redis + 1 findFirst + 4 counts per interest).  Now 4 grouped
  count queries on the port's interest set + a single redis pipeline
  to refresh the cache.  1k interests: ~7 round trips.
* document-reminders.processReminderQueue — was 5N round trips per
  cron tick (port + template + lastReminder + pendingSigners + send
  per doc).  Now hoists port + per-type template map + grouped
  lastReminder + bulk pendingSigners; per-row work collapses to a
  Map.get and the documenso send.  500 docs: ~7 round trips.
* inquiry-notifications.sendInquiryNotifications — was sequential
  createNotification + emailQueue.add per recipient inside a public
  POST.  Now Promise.all'd; a 20-user port stops blocking the public
  inquiry POST on ~80 round trips.

Test status: 1168/1168 vitest, tsc clean.

Refs: docs/audit-comprehensive-2026-05-05.md HIGH §§11–14 (auditor-I
Issues 1–4) + MED §13 (auditor-I Issue 5).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Matt Ciaccio
2026-05-05 20:41:23 +02:00
parent d3a6a9beef
commit 7854cbabe4
5 changed files with 351 additions and 64 deletions

View File

@@ -1,4 +1,4 @@
import { and, count, eq, gte, isNull } from 'drizzle-orm';
import { and, count, eq, gte, inArray, isNull } from 'drizzle-orm';
import { db } from '@/lib/db';
import { redis } from '@/lib/redis';
@@ -212,25 +212,161 @@ export async function calculateInterestScore(
// ─── Bulk scoring ─────────────────────────────────────────────────────────────
/**
* Score every active interest in a port. The previous implementation
* fanned out one scoring call per interest, each issuing 1 redis read +
* 1 interests.findFirst + 4 count queries → 6N round trips per
* dashboard render (≈6000 for a 1k-interest port). Cold-cache flushes
* pegged the API for a couple of seconds.
*
* The new path replaces those 4N count queries with 4 grouped queries
* (one per dimension, filtered by inArray on the port's interest ids)
* and merges in JS. The redis cache is still consulted, but only as a
* map merged onto the freshly computed scores so cached values short-
* circuit recomputation without re-issuing the per-row count fan-out.
*/
export async function calculateBulkScores(
portId: string,
): Promise<Array<{ interestId: string; score: InterestScore }>> {
const allInterests = await db
.select({ id: interests.id })
.select({
id: interests.id,
clientId: interests.clientId,
pipelineStage: interests.pipelineStage,
createdAt: interests.createdAt,
eoiStatus: interests.eoiStatus,
contractStatus: interests.contractStatus,
depositStatus: interests.depositStatus,
dateEoiSigned: interests.dateEoiSigned,
dateContractSigned: interests.dateContractSigned,
dateDepositReceived: interests.dateDepositReceived,
})
.from(interests)
.where(and(eq(interests.portId, portId), isNull(interests.archivedAt)));
const results = await Promise.allSettled(
allInterests.map(async (i) => {
const score = await calculateInterestScore(i.id, portId);
return { interestId: i.id, score };
}),
if (allInterests.length === 0) return [];
const ids = allInterests.map((i) => i.id);
const clientIds = Array.from(
new Set(allInterests.map((i) => i.clientId).filter((v): v is string => Boolean(v))),
);
const thirtyDaysAgo = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000);
// Four grouped aggregates against the port's interest set. Each is a
// single index-friendly scan on `interest_id` (or `client_id` for the
// email-threads case) — no per-row round trips.
const [notesGrouped, remindersGrouped, emailsGrouped, berthLinksGrouped] = await Promise.all([
db
.select({ interestId: interestNotes.interestId, value: count() })
.from(interestNotes)
.where(
and(inArray(interestNotes.interestId, ids), gte(interestNotes.createdAt, thirtyDaysAgo)),
)
.groupBy(interestNotes.interestId),
db
.select({ interestId: reminders.interestId, value: count() })
.from(reminders)
.where(
and(
inArray(reminders.interestId, ids),
eq(reminders.status, 'completed'),
gte(reminders.completedAt, thirtyDaysAgo),
),
)
.groupBy(reminders.interestId),
clientIds.length > 0
? db
.select({ clientId: emailThreads.clientId, value: count() })
.from(emailThreads)
.where(
and(
inArray(emailThreads.clientId, clientIds),
eq(emailThreads.portId, portId),
gte(emailThreads.lastMessageAt, thirtyDaysAgo),
),
)
.groupBy(emailThreads.clientId)
: Promise.resolve([] as Array<{ clientId: string | null; value: number }>),
db
.select({ interestId: interestBerths.interestId, value: count() })
.from(interestBerths)
.where(inArray(interestBerths.interestId, ids))
.groupBy(interestBerths.interestId),
]);
const notesByInterest = new Map(
notesGrouped
.filter((r): r is { interestId: string; value: number } => r.interestId !== null)
.map((r) => [r.interestId, r.value]),
);
const remindersByInterest = new Map(
remindersGrouped
.filter((r): r is { interestId: string; value: number } => r.interestId !== null)
.map((r) => [r.interestId, r.value]),
);
const emailsByClient = new Map(
emailsGrouped
.filter((r): r is { clientId: string; value: number } => r.clientId !== null)
.map((r) => [r.clientId, r.value]),
);
const berthLinksByInterest = new Map(
berthLinksGrouped
.filter((r): r is { interestId: string; value: number } => r.interestId !== null)
.map((r) => [r.interestId, r.value]),
);
return results
.filter(
(r): r is PromiseFulfilledResult<{ interestId: string; score: InterestScore }> =>
r.status === 'fulfilled',
const RAW_MAX = 425;
const calculatedAt = new Date();
const calculatedAtIso = calculatedAt.toISOString();
const scored = allInterests.map((interest) => {
const pipelineAge = scorePipelineAge(interest.createdAt);
const stageSpeed = scoreStageSpeed(interest.createdAt, interest.pipelineStage);
const documentCompleteness = scoreDocumentCompleteness({
eoiStatus: interest.eoiStatus,
contractStatus: interest.contractStatus,
depositStatus: interest.depositStatus,
dateEoiSigned: interest.dateEoiSigned,
dateContractSigned: interest.dateContractSigned,
dateDepositReceived: interest.dateDepositReceived,
});
const notesCount = notesByInterest.get(interest.id) ?? 0;
const remindersCount = remindersByInterest.get(interest.id) ?? 0;
const emailCount = interest.clientId ? (emailsByClient.get(interest.clientId) ?? 0) : 0;
const notesScore = Math.min(notesCount * 10, 50);
const emailScore = Math.min(emailCount * 5, 30);
const remindersScore = Math.min(remindersCount * 10, 20);
const engagement = Math.min(notesScore + emailScore + remindersScore, 100);
const berthLinked = (berthLinksByInterest.get(interest.id) ?? 0) > 0 ? 25 : 0;
const rawTotal = pipelineAge + stageSpeed + documentCompleteness + engagement + berthLinked;
const totalScore = Math.round((rawTotal / RAW_MAX) * 100);
const score: InterestScore = {
totalScore,
breakdown: { pipelineAge, stageSpeed, documentCompleteness, engagement, berthLinked },
calculatedAt,
};
return { interestId: interest.id, score };
});
// Refresh the redis cache for each interest in a single pipeline so
// single-interest reads downstream short-circuit the per-row queries.
// Fire-and-forget — bulk scoring's correctness doesn't depend on the
// cache write succeeding.
redis
.pipeline(
scored.map(({ interestId, score }) => [
'setex',
SCORE_KEY(portId, interestId),
SCORE_TTL,
JSON.stringify({ ...score, calculatedAt: calculatedAtIso }),
]),
)
.map((r) => r.value);
.exec()
.catch((err) => logger.warn({ err, portId }, 'Redis bulk cache write failed'));
return scored;
}