MOPC-App/src/server/services/ai-evaluation-summary.ts

406 lines
12 KiB
TypeScript

/**
* AI-Powered Evaluation Summary Service
*
* Generates AI summaries of jury evaluations for a project in a given round.
* Combines OpenAI analysis with server-side scoring pattern calculations.
*
* GDPR Compliance:
* - All evaluation data is anonymized before AI processing
* - No juror names, emails, or identifiers are sent to OpenAI
* - Only scores, feedback text, and binary decisions are included
*/
import { TRPCError } from '@trpc/server'
import { getOpenAI, getConfiguredModel, buildCompletionParams, AI_MODELS } from '@/lib/openai'
import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
import { classifyAIError, createParseError, logAIError } from './ai-errors'
import { sanitizeText } from './anonymization'
import type { PrismaClient, Prisma } from '@prisma/client'
// ─── Types ──────────────────────────────────────────────────────────────────
interface EvaluationForSummary {
id: string
criterionScoresJson: Record<string, number> | null
globalScore: number | null
binaryDecision: boolean | null
feedbackText: string | null
assignment: {
user: {
id: string
name: string | null
email: string
}
}
}
interface AnonymizedEvaluation {
criterionScores: Record<string, number> | null
globalScore: number | null
binaryDecision: boolean | null
feedbackText: string | null
}
interface CriterionDef {
id: string
label: string
}
interface AIResponsePayload {
overallAssessment: string
strengths: string[]
weaknesses: string[]
themes: Array<{
theme: string
sentiment: 'positive' | 'negative' | 'mixed'
frequency: number
}>
recommendation: string
}
interface ScoringPatterns {
averageGlobalScore: number | null
consensus: number
criterionAverages: Record<string, number>
evaluatorCount: number
}
export interface EvaluationSummaryResult {
id: string
projectId: string
roundId: string
summaryJson: AIResponsePayload & { scoringPatterns: ScoringPatterns }
generatedAt: Date
model: string
tokensUsed: number
}
// ─── Anonymization ──────────────────────────────────────────────────────────
/**
* Strip juror names/emails from evaluations, keeping only scores and feedback.
*/
export function anonymizeEvaluations(
evaluations: EvaluationForSummary[]
): AnonymizedEvaluation[] {
return evaluations.map((ev) => ({
criterionScores: ev.criterionScoresJson as Record<string, number> | null,
globalScore: ev.globalScore,
binaryDecision: ev.binaryDecision,
feedbackText: ev.feedbackText ? sanitizeText(ev.feedbackText) : null,
}))
}
// ─── Prompt Building ────────────────────────────────────────────────────────
/**
* Build the OpenAI prompt for evaluation summary generation.
*/
export function buildSummaryPrompt(
anonymizedEvaluations: AnonymizedEvaluation[],
projectTitle: string,
criteriaLabels: string[]
): string {
const sanitizedTitle = sanitizeText(projectTitle)
return `You are analyzing jury evaluations for a project competition.
PROJECT: "${sanitizedTitle}"
EVALUATION CRITERIA: ${criteriaLabels.join(', ')}
EVALUATIONS (${anonymizedEvaluations.length} total):
${JSON.stringify(anonymizedEvaluations, null, 2)}
Analyze these evaluations and return a JSON object with this exact structure:
{
"overallAssessment": "A 2-3 sentence summary of how the project was evaluated overall",
"strengths": ["strength 1", "strength 2", ...],
"weaknesses": ["weakness 1", "weakness 2", ...],
"themes": [
{ "theme": "theme name", "sentiment": "positive" | "negative" | "mixed", "frequency": <number of evaluators mentioning this> }
],
"recommendation": "A brief recommendation based on the evaluation consensus"
}
Guidelines:
- Base your analysis only on the provided evaluation data
- Identify common themes across evaluator feedback
- Note areas of agreement and disagreement
- Keep the assessment objective and balanced
- Do not include any personal identifiers`
}
// ─── Scoring Patterns (Server-Side) ─────────────────────────────────────────
/**
* Compute scoring patterns from evaluations without AI.
*/
export function computeScoringPatterns(
evaluations: EvaluationForSummary[],
criteriaLabels: CriterionDef[]
): ScoringPatterns {
const globalScores = evaluations
.map((e) => e.globalScore)
.filter((s): s is number => s !== null)
// Average global score
const averageGlobalScore =
globalScores.length > 0
? globalScores.reduce((a, b) => a + b, 0) / globalScores.length
: null
// Consensus: 1 - normalized standard deviation (1.0 = full consensus)
let consensus = 1
if (globalScores.length > 1 && averageGlobalScore !== null) {
const variance =
globalScores.reduce(
(sum, score) => sum + Math.pow(score - averageGlobalScore, 2),
0
) / globalScores.length
const stdDev = Math.sqrt(variance)
// Normalize by the scoring scale (1-10, so max possible std dev is ~4.5)
consensus = Math.max(0, 1 - stdDev / 4.5)
}
// Criterion averages
const criterionAverages: Record<string, number> = {}
for (const criterion of criteriaLabels) {
const scores: number[] = []
for (const ev of evaluations) {
const criterionScores = ev.criterionScoresJson as Record<string, number> | null
if (criterionScores && criterionScores[criterion.id] !== undefined) {
scores.push(criterionScores[criterion.id])
}
}
if (scores.length > 0) {
criterionAverages[criterion.label] =
scores.reduce((a, b) => a + b, 0) / scores.length
}
}
return {
averageGlobalScore,
consensus: Math.round(consensus * 100) / 100,
criterionAverages,
evaluatorCount: evaluations.length,
}
}
// ─── Main Orchestrator ──────────────────────────────────────────────────────
/**
* Generate an AI-powered evaluation summary for a project in a round.
*/
export async function generateSummary({
projectId,
roundId,
userId,
prisma,
}: {
projectId: string
roundId: string
userId: string
prisma: PrismaClient
}): Promise<EvaluationSummaryResult> {
// 1. Fetch project with evaluations and form criteria
const project = await prisma.project.findUnique({
where: { id: projectId },
select: {
id: true,
title: true,
roundId: true,
},
})
if (!project) {
throw new TRPCError({ code: 'NOT_FOUND', message: 'Project not found' })
}
// Fetch submitted evaluations for this project in this round
const evaluations = await prisma.evaluation.findMany({
where: {
status: 'SUBMITTED',
assignment: {
projectId,
roundId,
},
},
select: {
id: true,
criterionScoresJson: true,
globalScore: true,
binaryDecision: true,
feedbackText: true,
assignment: {
select: {
user: {
select: { id: true, name: true, email: true },
},
},
},
},
})
if (evaluations.length === 0) {
throw new TRPCError({
code: 'BAD_REQUEST',
message: 'No submitted evaluations found for this project in this round',
})
}
// Get evaluation form criteria for this round
const form = await prisma.evaluationForm.findFirst({
where: { roundId, isActive: true },
select: { criteriaJson: true },
})
const criteria: CriterionDef[] = form?.criteriaJson
? (form.criteriaJson as unknown as CriterionDef[])
: []
const criteriaLabels = criteria.map((c) => c.label)
// 2. Anonymize evaluations
const typedEvaluations = evaluations as unknown as EvaluationForSummary[]
const anonymized = anonymizeEvaluations(typedEvaluations)
// 3. Build prompt and call OpenAI
const openai = await getOpenAI()
if (!openai) {
throw new TRPCError({
code: 'PRECONDITION_FAILED',
message: 'OpenAI is not configured. Please set up your API key in Settings.',
})
}
const model = await getConfiguredModel(AI_MODELS.QUICK)
const prompt = buildSummaryPrompt(anonymized, project.title, criteriaLabels)
let aiResponse: AIResponsePayload
let tokensUsed = 0
try {
const params = buildCompletionParams(model, {
messages: [
{ role: 'user', content: prompt },
],
jsonMode: true,
temperature: 0.3,
maxTokens: 2000,
})
const response = await openai.chat.completions.create(params)
const usage = extractTokenUsage(response)
tokensUsed = usage.totalTokens
const content = response.choices[0]?.message?.content
if (!content) {
throw new Error('Empty response from AI')
}
aiResponse = JSON.parse(content) as AIResponsePayload
} catch (error) {
if (error instanceof SyntaxError) {
const parseError = createParseError(error.message)
logAIError('EvaluationSummary', 'generateSummary', parseError)
await logAIUsage({
userId,
action: 'EVALUATION_SUMMARY',
entityType: 'Project',
entityId: projectId,
model,
promptTokens: 0,
completionTokens: 0,
totalTokens: tokensUsed,
itemsProcessed: 0,
status: 'ERROR',
errorMessage: parseError.message,
})
throw new TRPCError({
code: 'INTERNAL_SERVER_ERROR',
message: 'Failed to parse AI response. Please try again.',
})
}
const classified = classifyAIError(error)
logAIError('EvaluationSummary', 'generateSummary', classified)
await logAIUsage({
userId,
action: 'EVALUATION_SUMMARY',
entityType: 'Project',
entityId: projectId,
model,
promptTokens: 0,
completionTokens: 0,
totalTokens: 0,
itemsProcessed: 0,
status: 'ERROR',
errorMessage: classified.message,
})
throw new TRPCError({
code: 'INTERNAL_SERVER_ERROR',
message: classified.message,
})
}
// 4. Compute scoring patterns (server-side, no AI)
const scoringPatterns = computeScoringPatterns(typedEvaluations, criteria)
// 5. Merge and upsert
const summaryJson = {
...aiResponse,
scoringPatterns,
}
const summaryJsonValue = summaryJson as unknown as Prisma.InputJsonValue
const summary = await prisma.evaluationSummary.upsert({
where: {
projectId_roundId: { projectId, roundId },
},
create: {
projectId,
roundId,
summaryJson: summaryJsonValue,
generatedById: userId,
model,
tokensUsed,
},
update: {
summaryJson: summaryJsonValue,
generatedAt: new Date(),
generatedById: userId,
model,
tokensUsed,
},
})
// 6. Log AI usage
await logAIUsage({
userId,
action: 'EVALUATION_SUMMARY',
entityType: 'Project',
entityId: projectId,
model,
promptTokens: 0, // Detailed breakdown not always available
completionTokens: 0,
totalTokens: tokensUsed,
itemsProcessed: evaluations.length,
status: 'SUCCESS',
})
return {
id: summary.id,
projectId: summary.projectId,
roundId: summary.roundId,
summaryJson: summaryJson as AIResponsePayload & { scoringPatterns: ScoringPatterns },
generatedAt: summary.generatedAt,
model: summary.model,
tokensUsed: summary.tokensUsed,
}
}