Files
pn-new-crm/src/lib/services/system-monitoring.service.ts
Matt 221ae5784e chore(autonomous-session): consolidate uncommitted work from prior session
Bundles the prior autonomous-session output that was sitting unstaged:

- Em-dash sweep across src/ + tests/ (en-dash/em-dash to hyphen, ~2280 instances)
- country-flag-icons rollout (CountryFlag component, replaces emoji glyphs that
  never rendered on Windows; lazy-loads the 3x2 SVG index as a single chunk
  after the per-subpath dynamic-import approach silently failed in webpack)
- Admin IA Phase 1+2: 7-domain regroup, 41 to 38 pages, /admin/berths index,
  redirects (ocr to ai, reports to dashboard, invitations to users),
  docs/admin-ia-proposal.md
- Per-template email tester (registry + endpoint + UI on Email admin page)
- Cancel-document mode picker (delete-from-Documenso vs keep-for-audit)
- Dashboard PDF report: 25 widgets, SVG charts, date-range picker, 11 resolvers
- Customize-widgets per-region sortables at xl+ (charts/rails/feed); single
  flat sortable below xl when the layout stacks; per-viewport saved orders
- Audit doc updates capturing each shipped item
- Lint fixes: react-compiler immutability in DonutChart (reduce instead of
  let-reassign), set-state-in-effect disables in CountryFlag and
  UploadForSigning preview-bytes effect, unused 'confirm' destructures in
  interest contract + reservation tabs, unescaped apostrophe in test-template
  card copy
2026-05-23 00:52:59 +02:00

423 lines
14 KiB
TypeScript

import { db } from '@/lib/db';
import { auditLogs, errorEvents } from '@/lib/db/schema';
import { redis } from '@/lib/redis';
import { getQueue, QUEUE_CONFIGS, type QueueName } from '@/lib/queue';
import { createAuditLog } from '@/lib/audit';
import { env } from '@/lib/env';
import { sql, desc, eq } from 'drizzle-orm';
import { NotFoundError } from '@/lib/errors';
import { logger } from '@/lib/logger';
// ─── Types ────────────────────────────────────────────────────────────────────
export interface ServiceStatus {
name: string;
status: 'healthy' | 'degraded' | 'down';
responseTimeMs: number;
details?: string;
}
export interface HealthStatus {
overall: 'healthy' | 'degraded' | 'down';
services: ServiceStatus[];
checkedAt: Date;
}
export interface QueueStatus {
name: string;
waiting: number;
active: number;
completed: number;
failed: number;
delayed: number;
}
export interface QueueJobSummary {
id: string;
name: string;
data: unknown;
status: string;
timestamp: number | undefined;
processedOn: number | undefined;
finishedOn: number | undefined;
failedReason: string | undefined;
}
export interface PaginatedQueueJobs {
jobs: QueueJobSummary[];
total: number;
page: number;
limit: number;
}
export interface ConnectionStatus {
totalConnections: number;
}
export interface RecentError {
id: string;
source: 'audit' | 'queue' | 'request';
message: string;
timestamp: Date;
metadata?: Record<string, unknown>;
/** Set for `source: 'request'` rows so the UI can deep-link to
* /admin/errors/<requestId>. */
requestId?: string;
/** Set for `source: 'request'` rows. */
statusCode?: number;
/** Set for `source: 'request'` rows. */
errorCode?: string | null;
}
// ─── Timeout helper ───────────────────────────────────────────────────────────
function withTimeout<T>(promise: Promise<T>, ms: number): Promise<T> {
return Promise.race([
promise,
new Promise<T>((_, reject) =>
setTimeout(() => reject(new Error(`Timed out after ${ms}ms`)), ms),
),
]);
}
// ─── healthCheck ──────────────────────────────────────────────────────────────
export async function healthCheck(): Promise<HealthStatus> {
const checks = await Promise.allSettled([
checkPostgres(),
checkRedis(),
checkMinio(),
checkDocumenso(),
]);
const services: ServiceStatus[] = checks.map((result) => {
if (result.status === 'fulfilled') return result.value;
// Should not happen since each checker catches internally
return {
name: 'unknown',
status: 'down' as const,
responseTimeMs: 0,
details: String(result.reason),
};
});
const hasDown = services.some((s) => s.status === 'down');
const hasDegraded = services.some((s) => s.status === 'degraded');
const overall = hasDown ? 'down' : hasDegraded ? 'degraded' : 'healthy';
return { overall, services, checkedAt: new Date() };
}
async function checkPostgres(): Promise<ServiceStatus> {
const start = Date.now();
try {
await withTimeout(db.execute(sql`SELECT 1`), 5000);
return { name: 'PostgreSQL', status: 'healthy', responseTimeMs: Date.now() - start };
} catch (err) {
return {
name: 'PostgreSQL',
status: 'down',
responseTimeMs: Date.now() - start,
details: err instanceof Error ? err.message : 'Unknown error',
};
}
}
async function checkRedis(): Promise<ServiceStatus> {
const start = Date.now();
try {
const result = await withTimeout(redis.ping(), 5000);
const status = result === 'PONG' ? 'healthy' : 'degraded';
return { name: 'Redis', status, responseTimeMs: Date.now() - start };
} catch (err) {
return {
name: 'Redis',
status: 'down',
responseTimeMs: Date.now() - start,
details: err instanceof Error ? err.message : 'Unknown error',
};
}
}
async function checkMinio(): Promise<ServiceStatus> {
// Health-checks the ACTIVE storage backend (S3 or filesystem) via
// the abstraction so a port running on filesystem still gets a
// useful "Storage" status row instead of a meaningless "MinIO down".
// Probe key is a sentinel that's never written; head() returns null
// for a missing object on both backends, which counts as healthy
// (the connection itself worked).
const start = Date.now();
try {
const { getStorageBackend } = await import('@/lib/storage');
const backend = await getStorageBackend();
await withTimeout(backend.head('__health_probe__'), 5000);
return { name: 'Storage', status: 'healthy', responseTimeMs: Date.now() - start };
} catch (err) {
return {
name: 'Storage',
status: 'down',
responseTimeMs: Date.now() - start,
details: err instanceof Error ? err.message : 'Unknown error',
};
}
}
async function checkDocumenso(): Promise<ServiceStatus> {
const start = Date.now();
try {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), 5000);
try {
const res = await fetch(`${env.DOCUMENSO_API_URL}/api/v1/health`, {
signal: controller.signal,
method: 'GET',
});
clearTimeout(timer);
const status = res.ok ? 'healthy' : 'degraded';
return { name: 'Documenso', status, responseTimeMs: Date.now() - start };
} finally {
clearTimeout(timer);
}
} catch (err) {
return {
name: 'Documenso',
status: 'down',
responseTimeMs: Date.now() - start,
details: err instanceof Error ? err.message : 'Unreachable',
};
}
}
// ─── getQueueDashboard ────────────────────────────────────────────────────────
export async function getQueueDashboard(): Promise<QueueStatus[]> {
const queueNames = Object.keys(QUEUE_CONFIGS) as QueueName[];
const results = await Promise.allSettled(
queueNames.map(async (name) => {
const queue = getQueue(name);
const counts = await queue.getJobCounts(
'waiting',
'active',
'completed',
'failed',
'delayed',
);
return {
name,
waiting: counts.waiting ?? 0,
active: counts.active ?? 0,
completed: counts.completed ?? 0,
failed: counts.failed ?? 0,
delayed: counts.delayed ?? 0,
} satisfies QueueStatus;
}),
);
return results.map((r, i) => {
if (r.status === 'fulfilled') return r.value;
const name = queueNames[i] ?? 'unknown';
logger.warn({ queue: name, err: r.reason }, 'Failed to get queue counts');
return {
name,
waiting: 0,
active: 0,
completed: 0,
failed: 0,
delayed: 0,
};
});
}
// ─── getQueueJobs ─────────────────────────────────────────────────────────────
type JobStatus = 'waiting' | 'active' | 'completed' | 'failed' | 'delayed';
export async function getQueueJobs(
queueName: QueueName,
status: JobStatus = 'failed',
page = 1,
limit = 20,
): Promise<PaginatedQueueJobs> {
const queue = getQueue(queueName);
const start = (page - 1) * limit;
const end = start + limit - 1;
const jobs = await queue.getJobs([status], start, end);
const counts = await queue.getJobCounts(status);
const total = counts[status] ?? 0;
const summaries: QueueJobSummary[] = jobs.map((job) => {
// Truncate job data to prevent huge payloads
let truncatedData: unknown;
try {
const dataStr = JSON.stringify(job.data);
truncatedData =
dataStr.length > 500 ? JSON.parse(dataStr.slice(0, 500) + '...(truncated)') : job.data;
} catch {
truncatedData = '[unparseable]';
}
return {
id: job.id ?? '',
name: job.name,
data: truncatedData,
status,
timestamp: job.timestamp,
processedOn: job.processedOn ?? undefined,
finishedOn: job.finishedOn ?? undefined,
failedReason: job.failedReason ?? undefined,
};
});
return { jobs: summaries, total, page, limit };
}
// ─── retryJob ─────────────────────────────────────────────────────────────────
export async function retryJob(queueName: QueueName, jobId: string, userId: string): Promise<void> {
const queue = getQueue(queueName);
const job = await queue.getJob(jobId);
if (!job) throw new NotFoundError('queue job');
await job.retry();
void createAuditLog({
userId,
portId: null,
action: 'update',
entityType: 'queue_job',
entityId: jobId,
metadata: { queueName, jobName: job.name, action: 'retry' },
ipAddress: 'system',
userAgent: 'system',
});
}
// ─── deleteJob ────────────────────────────────────────────────────────────────
export async function deleteJob(
queueName: QueueName,
jobId: string,
userId: string,
): Promise<void> {
const queue = getQueue(queueName);
const job = await queue.getJob(jobId);
if (!job) throw new NotFoundError('queue job');
await job.remove();
void createAuditLog({
userId,
portId: null,
action: 'delete',
entityType: 'queue_job',
entityId: jobId,
metadata: { queueName, jobName: job.name, action: 'delete' },
ipAddress: 'system',
userAgent: 'system',
});
}
// ─── getActiveConnections ─────────────────────────────────────────────────────
export async function getActiveConnections(): Promise<ConnectionStatus> {
try {
const { getIO } = await import('@/lib/socket/server');
const io = getIO();
const sockets = await io.fetchSockets();
return { totalConnections: sockets.length };
} catch {
return { totalConnections: 0 };
}
}
// ─── getRecentErrors ──────────────────────────────────────────────────────────
export async function getRecentErrors(limit = 20): Promise<RecentError[]> {
// Fetch permission-denied audit log entries
const auditErrors = await db
.select({
id: auditLogs.id,
action: auditLogs.action,
entityType: auditLogs.entityType,
entityId: auditLogs.entityId,
metadata: auditLogs.metadata,
createdAt: auditLogs.createdAt,
})
.from(auditLogs)
.where(eq(auditLogs.action, 'permission_denied'))
.orderBy(desc(auditLogs.createdAt))
.limit(limit);
const auditResults: RecentError[] = auditErrors.map((row) => ({
id: row.id,
source: 'audit' as const,
message: `Permission denied on ${row.entityType}`,
timestamp: row.createdAt,
metadata: (row.metadata as Record<string, unknown>) ?? {},
}));
// Fetch failed jobs from all queues (sample - top 5 per queue)
const queueNames = Object.keys(QUEUE_CONFIGS) as QueueName[];
const failedJobResults = await Promise.allSettled(
queueNames.map(async (name) => {
const queue = getQueue(name);
const jobs = await queue.getJobs(['failed'], 0, 4);
return jobs.map(
(job): RecentError => ({
id: `${name}:${job.id ?? ''}`,
source: 'queue',
message: `Queue job failed: ${job.name} in ${name}`,
timestamp: job.finishedOn ? new Date(job.finishedOn) : new Date(job.timestamp),
metadata: { queueName: name, failedReason: job.failedReason },
}),
);
}),
);
const queueErrors: RecentError[] = failedJobResults
.filter((r): r is PromiseFulfilledResult<RecentError[]> => r.status === 'fulfilled')
.flatMap((r) => r.value);
// Captured 5xx requests from the per-request error_events table -
// this is the deepest source: full stack head + body excerpt + path.
// The dedicated /admin/errors page paginates this; here we surface
// the most recent for the dashboard.
const requestErrorRows = await db
.select({
requestId: errorEvents.requestId,
statusCode: errorEvents.statusCode,
method: errorEvents.method,
path: errorEvents.path,
errorName: errorEvents.errorName,
errorMessage: errorEvents.errorMessage,
metadata: errorEvents.metadata,
createdAt: errorEvents.createdAt,
})
.from(errorEvents)
.orderBy(desc(errorEvents.createdAt))
.limit(limit);
const requestErrors: RecentError[] = requestErrorRows.map((row) => {
const meta = (row.metadata as Record<string, unknown>) ?? {};
return {
id: row.requestId,
source: 'request' as const,
message:
`${row.method} ${row.path}${row.statusCode} ${row.errorMessage ?? row.errorName ?? ''}`.trim(),
timestamp: row.createdAt,
metadata: meta,
requestId: row.requestId,
statusCode: row.statusCode,
errorCode: typeof meta.code === 'string' ? meta.code : null,
};
});
// Merge and sort combined list by timestamp descending
const combined = [...auditResults, ...queueErrors, ...requestErrors].sort(
(a, b) => b.timestamp.getTime() - a.timestamp.getTime(),
);
return combined.slice(0, limit);
}