Files
pn-new-crm/src/lib/services/system-monitoring.service.ts

378 lines
12 KiB
TypeScript
Raw Normal View History

import { db } from '@/lib/db';
import { auditLogs } from '@/lib/db/schema';
import { redis } from '@/lib/redis';
import { minioClient } from '@/lib/minio/index';
import { getQueue, QUEUE_CONFIGS, type QueueName } from '@/lib/queue';
import { createAuditLog } from '@/lib/audit';
import { env } from '@/lib/env';
import { sql, desc, eq } from 'drizzle-orm';
import { logger } from '@/lib/logger';
// ─── Types ────────────────────────────────────────────────────────────────────
export interface ServiceStatus {
name: string;
status: 'healthy' | 'degraded' | 'down';
responseTimeMs: number;
details?: string;
}
export interface HealthStatus {
overall: 'healthy' | 'degraded' | 'down';
services: ServiceStatus[];
checkedAt: Date;
}
export interface QueueStatus {
name: string;
waiting: number;
active: number;
completed: number;
failed: number;
delayed: number;
}
export interface QueueJobSummary {
id: string;
name: string;
data: unknown;
status: string;
timestamp: number | undefined;
processedOn: number | undefined;
finishedOn: number | undefined;
failedReason: string | undefined;
}
export interface PaginatedQueueJobs {
jobs: QueueJobSummary[];
total: number;
page: number;
limit: number;
}
export interface ConnectionStatus {
totalConnections: number;
}
export interface RecentError {
id: string;
source: 'audit' | 'queue';
message: string;
timestamp: Date;
metadata?: Record<string, unknown>;
}
// ─── Timeout helper ───────────────────────────────────────────────────────────
function withTimeout<T>(promise: Promise<T>, ms: number): Promise<T> {
return Promise.race([
promise,
new Promise<T>((_, reject) =>
setTimeout(() => reject(new Error(`Timed out after ${ms}ms`)), ms),
),
]);
}
// ─── healthCheck ──────────────────────────────────────────────────────────────
export async function healthCheck(): Promise<HealthStatus> {
const checks = await Promise.allSettled([
checkPostgres(),
checkRedis(),
checkMinio(),
checkDocumenso(),
]);
const services: ServiceStatus[] = checks.map((result) => {
if (result.status === 'fulfilled') return result.value;
// Should not happen since each checker catches internally
return {
name: 'unknown',
status: 'down' as const,
responseTimeMs: 0,
details: String(result.reason),
};
});
const hasDown = services.some((s) => s.status === 'down');
const hasDegraded = services.some((s) => s.status === 'degraded');
const overall = hasDown ? 'down' : hasDegraded ? 'degraded' : 'healthy';
return { overall, services, checkedAt: new Date() };
}
async function checkPostgres(): Promise<ServiceStatus> {
const start = Date.now();
try {
await withTimeout(db.execute(sql`SELECT 1`), 5000);
return { name: 'PostgreSQL', status: 'healthy', responseTimeMs: Date.now() - start };
} catch (err) {
return {
name: 'PostgreSQL',
status: 'down',
responseTimeMs: Date.now() - start,
details: err instanceof Error ? err.message : 'Unknown error',
};
}
}
async function checkRedis(): Promise<ServiceStatus> {
const start = Date.now();
try {
const result = await withTimeout(redis.ping(), 5000);
const status = result === 'PONG' ? 'healthy' : 'degraded';
return { name: 'Redis', status, responseTimeMs: Date.now() - start };
} catch (err) {
return {
name: 'Redis',
status: 'down',
responseTimeMs: Date.now() - start,
details: err instanceof Error ? err.message : 'Unknown error',
};
}
}
async function checkMinio(): Promise<ServiceStatus> {
const start = Date.now();
try {
await withTimeout(minioClient.bucketExists(env.MINIO_BUCKET), 5000);
return { name: 'MinIO', status: 'healthy', responseTimeMs: Date.now() - start };
} catch (err) {
return {
name: 'MinIO',
status: 'down',
responseTimeMs: Date.now() - start,
details: err instanceof Error ? err.message : 'Unknown error',
};
}
}
async function checkDocumenso(): Promise<ServiceStatus> {
const start = Date.now();
try {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), 5000);
try {
const res = await fetch(`${env.DOCUMENSO_API_URL}/api/v1/health`, {
signal: controller.signal,
method: 'GET',
});
clearTimeout(timer);
const status = res.ok ? 'healthy' : 'degraded';
return { name: 'Documenso', status, responseTimeMs: Date.now() - start };
} finally {
clearTimeout(timer);
}
} catch (err) {
return {
name: 'Documenso',
status: 'down',
responseTimeMs: Date.now() - start,
details: err instanceof Error ? err.message : 'Unreachable',
};
}
}
// ─── getQueueDashboard ────────────────────────────────────────────────────────
export async function getQueueDashboard(): Promise<QueueStatus[]> {
const queueNames = Object.keys(QUEUE_CONFIGS) as QueueName[];
const results = await Promise.allSettled(
queueNames.map(async (name) => {
const queue = getQueue(name);
const counts = await queue.getJobCounts(
'waiting',
'active',
'completed',
'failed',
'delayed',
);
return {
name,
waiting: counts.waiting ?? 0,
active: counts.active ?? 0,
completed: counts.completed ?? 0,
failed: counts.failed ?? 0,
delayed: counts.delayed ?? 0,
} satisfies QueueStatus;
}),
);
return results.map((r, i) => {
if (r.status === 'fulfilled') return r.value;
const name = queueNames[i] ?? 'unknown';
logger.warn({ queue: name, err: r.reason }, 'Failed to get queue counts');
return {
name,
waiting: 0,
active: 0,
completed: 0,
failed: 0,
delayed: 0,
};
});
}
// ─── getQueueJobs ─────────────────────────────────────────────────────────────
type JobStatus = 'waiting' | 'active' | 'completed' | 'failed' | 'delayed';
export async function getQueueJobs(
queueName: QueueName,
status: JobStatus = 'failed',
page = 1,
limit = 20,
): Promise<PaginatedQueueJobs> {
const queue = getQueue(queueName);
const start = (page - 1) * limit;
const end = start + limit - 1;
const jobs = await queue.getJobs([status], start, end);
const counts = await queue.getJobCounts(status);
const total = counts[status] ?? 0;
const summaries: QueueJobSummary[] = jobs.map((job) => {
// Truncate job data to prevent huge payloads
let truncatedData: unknown;
try {
const dataStr = JSON.stringify(job.data);
truncatedData =
dataStr.length > 500
? JSON.parse(dataStr.slice(0, 500) + '...(truncated)')
: job.data;
} catch {
truncatedData = '[unparseable]';
}
return {
id: job.id ?? '',
name: job.name,
data: truncatedData,
status,
timestamp: job.timestamp,
processedOn: job.processedOn ?? undefined,
finishedOn: job.finishedOn ?? undefined,
failedReason: job.failedReason ?? undefined,
};
});
return { jobs: summaries, total, page, limit };
}
// ─── retryJob ─────────────────────────────────────────────────────────────────
export async function retryJob(
queueName: QueueName,
jobId: string,
userId: string,
): Promise<void> {
const queue = getQueue(queueName);
const job = await queue.getJob(jobId);
if (!job) throw new Error(`Job ${jobId} not found in queue ${queueName}`);
await job.retry();
void createAuditLog({
userId,
portId: null,
action: 'update',
entityType: 'queue_job',
entityId: jobId,
metadata: { queueName, jobName: job.name, action: 'retry' },
ipAddress: 'system',
userAgent: 'system',
});
}
// ─── deleteJob ────────────────────────────────────────────────────────────────
export async function deleteJob(
queueName: QueueName,
jobId: string,
userId: string,
): Promise<void> {
const queue = getQueue(queueName);
const job = await queue.getJob(jobId);
if (!job) throw new Error(`Job ${jobId} not found in queue ${queueName}`);
await job.remove();
void createAuditLog({
userId,
portId: null,
action: 'delete',
entityType: 'queue_job',
entityId: jobId,
metadata: { queueName, jobName: job.name, action: 'delete' },
ipAddress: 'system',
userAgent: 'system',
});
}
// ─── getActiveConnections ─────────────────────────────────────────────────────
export async function getActiveConnections(): Promise<ConnectionStatus> {
try {
const { getIO } = await import('@/lib/socket/server');
const io = getIO();
const sockets = await io.fetchSockets();
return { totalConnections: sockets.length };
} catch {
return { totalConnections: 0 };
}
}
// ─── getRecentErrors ──────────────────────────────────────────────────────────
export async function getRecentErrors(limit = 20): Promise<RecentError[]> {
// Fetch permission-denied audit log entries
const auditErrors = await db
.select({
id: auditLogs.id,
action: auditLogs.action,
entityType: auditLogs.entityType,
entityId: auditLogs.entityId,
metadata: auditLogs.metadata,
createdAt: auditLogs.createdAt,
})
.from(auditLogs)
.where(eq(auditLogs.action, 'permission_denied'))
.orderBy(desc(auditLogs.createdAt))
.limit(limit);
const auditResults: RecentError[] = auditErrors.map((row) => ({
id: row.id,
source: 'audit' as const,
message: `Permission denied on ${row.entityType}`,
timestamp: row.createdAt,
metadata: (row.metadata as Record<string, unknown>) ?? {},
}));
// Fetch failed jobs from all queues (sample — top 5 per queue)
const queueNames = Object.keys(QUEUE_CONFIGS) as QueueName[];
const failedJobResults = await Promise.allSettled(
queueNames.map(async (name) => {
const queue = getQueue(name);
const jobs = await queue.getJobs(['failed'], 0, 4);
return jobs.map((job): RecentError => ({
id: `${name}:${job.id ?? ''}`,
source: 'queue',
message: `Queue job failed: ${job.name} in ${name}`,
timestamp: job.finishedOn ? new Date(job.finishedOn) : new Date(job.timestamp),
metadata: { queueName: name, failedReason: job.failedReason },
}));
}),
);
const queueErrors: RecentError[] = failedJobResults
.filter((r): r is PromiseFulfilledResult<RecentError[]> => r.status === 'fulfilled')
.flatMap((r) => r.value);
// Merge and sort combined list by timestamp descending
const combined = [...auditResults, ...queueErrors].sort(
(a, b) => b.timestamp.getTime() - a.timestamp.getTime(),
);
return combined.slice(0, limit);
}