406 lines
12 KiB
TypeScript
406 lines
12 KiB
TypeScript
|
|
/**
|
||
|
|
* AI-Powered Evaluation Summary Service
|
||
|
|
*
|
||
|
|
* Generates AI summaries of jury evaluations for a project in a given round.
|
||
|
|
* Combines OpenAI analysis with server-side scoring pattern calculations.
|
||
|
|
*
|
||
|
|
* GDPR Compliance:
|
||
|
|
* - All evaluation data is anonymized before AI processing
|
||
|
|
* - No juror names, emails, or identifiers are sent to OpenAI
|
||
|
|
* - Only scores, feedback text, and binary decisions are included
|
||
|
|
*/
|
||
|
|
|
||
|
|
import { TRPCError } from '@trpc/server'
|
||
|
|
import { getOpenAI, getConfiguredModel, buildCompletionParams, AI_MODELS } from '@/lib/openai'
|
||
|
|
import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
|
||
|
|
import { classifyAIError, createParseError, logAIError } from './ai-errors'
|
||
|
|
import { sanitizeText } from './anonymization'
|
||
|
|
import type { PrismaClient, Prisma } from '@prisma/client'
|
||
|
|
|
||
|
|
// ─── Types ──────────────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
interface EvaluationForSummary {
|
||
|
|
id: string
|
||
|
|
criterionScoresJson: Record<string, number> | null
|
||
|
|
globalScore: number | null
|
||
|
|
binaryDecision: boolean | null
|
||
|
|
feedbackText: string | null
|
||
|
|
assignment: {
|
||
|
|
user: {
|
||
|
|
id: string
|
||
|
|
name: string | null
|
||
|
|
email: string
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
interface AnonymizedEvaluation {
|
||
|
|
criterionScores: Record<string, number> | null
|
||
|
|
globalScore: number | null
|
||
|
|
binaryDecision: boolean | null
|
||
|
|
feedbackText: string | null
|
||
|
|
}
|
||
|
|
|
||
|
|
interface CriterionDef {
|
||
|
|
id: string
|
||
|
|
label: string
|
||
|
|
}
|
||
|
|
|
||
|
|
interface AIResponsePayload {
|
||
|
|
overallAssessment: string
|
||
|
|
strengths: string[]
|
||
|
|
weaknesses: string[]
|
||
|
|
themes: Array<{
|
||
|
|
theme: string
|
||
|
|
sentiment: 'positive' | 'negative' | 'mixed'
|
||
|
|
frequency: number
|
||
|
|
}>
|
||
|
|
recommendation: string
|
||
|
|
}
|
||
|
|
|
||
|
|
interface ScoringPatterns {
|
||
|
|
averageGlobalScore: number | null
|
||
|
|
consensus: number
|
||
|
|
criterionAverages: Record<string, number>
|
||
|
|
evaluatorCount: number
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface EvaluationSummaryResult {
|
||
|
|
id: string
|
||
|
|
projectId: string
|
||
|
|
roundId: string
|
||
|
|
summaryJson: AIResponsePayload & { scoringPatterns: ScoringPatterns }
|
||
|
|
generatedAt: Date
|
||
|
|
model: string
|
||
|
|
tokensUsed: number
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─── Anonymization ──────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Strip juror names/emails from evaluations, keeping only scores and feedback.
|
||
|
|
*/
|
||
|
|
export function anonymizeEvaluations(
|
||
|
|
evaluations: EvaluationForSummary[]
|
||
|
|
): AnonymizedEvaluation[] {
|
||
|
|
return evaluations.map((ev) => ({
|
||
|
|
criterionScores: ev.criterionScoresJson as Record<string, number> | null,
|
||
|
|
globalScore: ev.globalScore,
|
||
|
|
binaryDecision: ev.binaryDecision,
|
||
|
|
feedbackText: ev.feedbackText ? sanitizeText(ev.feedbackText) : null,
|
||
|
|
}))
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─── Prompt Building ────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Build the OpenAI prompt for evaluation summary generation.
|
||
|
|
*/
|
||
|
|
export function buildSummaryPrompt(
|
||
|
|
anonymizedEvaluations: AnonymizedEvaluation[],
|
||
|
|
projectTitle: string,
|
||
|
|
criteriaLabels: string[]
|
||
|
|
): string {
|
||
|
|
const sanitizedTitle = sanitizeText(projectTitle)
|
||
|
|
|
||
|
|
return `You are analyzing jury evaluations for a project competition.
|
||
|
|
|
||
|
|
PROJECT: "${sanitizedTitle}"
|
||
|
|
|
||
|
|
EVALUATION CRITERIA: ${criteriaLabels.join(', ')}
|
||
|
|
|
||
|
|
EVALUATIONS (${anonymizedEvaluations.length} total):
|
||
|
|
${JSON.stringify(anonymizedEvaluations, null, 2)}
|
||
|
|
|
||
|
|
Analyze these evaluations and return a JSON object with this exact structure:
|
||
|
|
{
|
||
|
|
"overallAssessment": "A 2-3 sentence summary of how the project was evaluated overall",
|
||
|
|
"strengths": ["strength 1", "strength 2", ...],
|
||
|
|
"weaknesses": ["weakness 1", "weakness 2", ...],
|
||
|
|
"themes": [
|
||
|
|
{ "theme": "theme name", "sentiment": "positive" | "negative" | "mixed", "frequency": <number of evaluators mentioning this> }
|
||
|
|
],
|
||
|
|
"recommendation": "A brief recommendation based on the evaluation consensus"
|
||
|
|
}
|
||
|
|
|
||
|
|
Guidelines:
|
||
|
|
- Base your analysis only on the provided evaluation data
|
||
|
|
- Identify common themes across evaluator feedback
|
||
|
|
- Note areas of agreement and disagreement
|
||
|
|
- Keep the assessment objective and balanced
|
||
|
|
- Do not include any personal identifiers`
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─── Scoring Patterns (Server-Side) ─────────────────────────────────────────
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Compute scoring patterns from evaluations without AI.
|
||
|
|
*/
|
||
|
|
export function computeScoringPatterns(
|
||
|
|
evaluations: EvaluationForSummary[],
|
||
|
|
criteriaLabels: CriterionDef[]
|
||
|
|
): ScoringPatterns {
|
||
|
|
const globalScores = evaluations
|
||
|
|
.map((e) => e.globalScore)
|
||
|
|
.filter((s): s is number => s !== null)
|
||
|
|
|
||
|
|
// Average global score
|
||
|
|
const averageGlobalScore =
|
||
|
|
globalScores.length > 0
|
||
|
|
? globalScores.reduce((a, b) => a + b, 0) / globalScores.length
|
||
|
|
: null
|
||
|
|
|
||
|
|
// Consensus: 1 - normalized standard deviation (1.0 = full consensus)
|
||
|
|
let consensus = 1
|
||
|
|
if (globalScores.length > 1 && averageGlobalScore !== null) {
|
||
|
|
const variance =
|
||
|
|
globalScores.reduce(
|
||
|
|
(sum, score) => sum + Math.pow(score - averageGlobalScore, 2),
|
||
|
|
0
|
||
|
|
) / globalScores.length
|
||
|
|
const stdDev = Math.sqrt(variance)
|
||
|
|
// Normalize by the scoring scale (1-10, so max possible std dev is ~4.5)
|
||
|
|
consensus = Math.max(0, 1 - stdDev / 4.5)
|
||
|
|
}
|
||
|
|
|
||
|
|
// Criterion averages
|
||
|
|
const criterionAverages: Record<string, number> = {}
|
||
|
|
for (const criterion of criteriaLabels) {
|
||
|
|
const scores: number[] = []
|
||
|
|
for (const ev of evaluations) {
|
||
|
|
const criterionScores = ev.criterionScoresJson as Record<string, number> | null
|
||
|
|
if (criterionScores && criterionScores[criterion.id] !== undefined) {
|
||
|
|
scores.push(criterionScores[criterion.id])
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if (scores.length > 0) {
|
||
|
|
criterionAverages[criterion.label] =
|
||
|
|
scores.reduce((a, b) => a + b, 0) / scores.length
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return {
|
||
|
|
averageGlobalScore,
|
||
|
|
consensus: Math.round(consensus * 100) / 100,
|
||
|
|
criterionAverages,
|
||
|
|
evaluatorCount: evaluations.length,
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─── Main Orchestrator ──────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Generate an AI-powered evaluation summary for a project in a round.
|
||
|
|
*/
|
||
|
|
export async function generateSummary({
|
||
|
|
projectId,
|
||
|
|
roundId,
|
||
|
|
userId,
|
||
|
|
prisma,
|
||
|
|
}: {
|
||
|
|
projectId: string
|
||
|
|
roundId: string
|
||
|
|
userId: string
|
||
|
|
prisma: PrismaClient
|
||
|
|
}): Promise<EvaluationSummaryResult> {
|
||
|
|
// 1. Fetch project with evaluations and form criteria
|
||
|
|
const project = await prisma.project.findUnique({
|
||
|
|
where: { id: projectId },
|
||
|
|
select: {
|
||
|
|
id: true,
|
||
|
|
title: true,
|
||
|
|
roundId: true,
|
||
|
|
},
|
||
|
|
})
|
||
|
|
|
||
|
|
if (!project) {
|
||
|
|
throw new TRPCError({ code: 'NOT_FOUND', message: 'Project not found' })
|
||
|
|
}
|
||
|
|
|
||
|
|
// Fetch submitted evaluations for this project in this round
|
||
|
|
const evaluations = await prisma.evaluation.findMany({
|
||
|
|
where: {
|
||
|
|
status: 'SUBMITTED',
|
||
|
|
assignment: {
|
||
|
|
projectId,
|
||
|
|
roundId,
|
||
|
|
},
|
||
|
|
},
|
||
|
|
select: {
|
||
|
|
id: true,
|
||
|
|
criterionScoresJson: true,
|
||
|
|
globalScore: true,
|
||
|
|
binaryDecision: true,
|
||
|
|
feedbackText: true,
|
||
|
|
assignment: {
|
||
|
|
select: {
|
||
|
|
user: {
|
||
|
|
select: { id: true, name: true, email: true },
|
||
|
|
},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
})
|
||
|
|
|
||
|
|
if (evaluations.length === 0) {
|
||
|
|
throw new TRPCError({
|
||
|
|
code: 'BAD_REQUEST',
|
||
|
|
message: 'No submitted evaluations found for this project in this round',
|
||
|
|
})
|
||
|
|
}
|
||
|
|
|
||
|
|
// Get evaluation form criteria for this round
|
||
|
|
const form = await prisma.evaluationForm.findFirst({
|
||
|
|
where: { roundId, isActive: true },
|
||
|
|
select: { criteriaJson: true },
|
||
|
|
})
|
||
|
|
|
||
|
|
const criteria: CriterionDef[] = form?.criteriaJson
|
||
|
|
? (form.criteriaJson as unknown as CriterionDef[])
|
||
|
|
: []
|
||
|
|
const criteriaLabels = criteria.map((c) => c.label)
|
||
|
|
|
||
|
|
// 2. Anonymize evaluations
|
||
|
|
const typedEvaluations = evaluations as unknown as EvaluationForSummary[]
|
||
|
|
const anonymized = anonymizeEvaluations(typedEvaluations)
|
||
|
|
|
||
|
|
// 3. Build prompt and call OpenAI
|
||
|
|
const openai = await getOpenAI()
|
||
|
|
if (!openai) {
|
||
|
|
throw new TRPCError({
|
||
|
|
code: 'PRECONDITION_FAILED',
|
||
|
|
message: 'OpenAI is not configured. Please set up your API key in Settings.',
|
||
|
|
})
|
||
|
|
}
|
||
|
|
|
||
|
|
const model = await getConfiguredModel(AI_MODELS.QUICK)
|
||
|
|
const prompt = buildSummaryPrompt(anonymized, project.title, criteriaLabels)
|
||
|
|
|
||
|
|
let aiResponse: AIResponsePayload
|
||
|
|
let tokensUsed = 0
|
||
|
|
|
||
|
|
try {
|
||
|
|
const params = buildCompletionParams(model, {
|
||
|
|
messages: [
|
||
|
|
{ role: 'user', content: prompt },
|
||
|
|
],
|
||
|
|
jsonMode: true,
|
||
|
|
temperature: 0.3,
|
||
|
|
maxTokens: 2000,
|
||
|
|
})
|
||
|
|
|
||
|
|
const response = await openai.chat.completions.create(params)
|
||
|
|
const usage = extractTokenUsage(response)
|
||
|
|
tokensUsed = usage.totalTokens
|
||
|
|
|
||
|
|
const content = response.choices[0]?.message?.content
|
||
|
|
if (!content) {
|
||
|
|
throw new Error('Empty response from AI')
|
||
|
|
}
|
||
|
|
|
||
|
|
aiResponse = JSON.parse(content) as AIResponsePayload
|
||
|
|
} catch (error) {
|
||
|
|
if (error instanceof SyntaxError) {
|
||
|
|
const parseError = createParseError(error.message)
|
||
|
|
logAIError('EvaluationSummary', 'generateSummary', parseError)
|
||
|
|
|
||
|
|
await logAIUsage({
|
||
|
|
userId,
|
||
|
|
action: 'EVALUATION_SUMMARY',
|
||
|
|
entityType: 'Project',
|
||
|
|
entityId: projectId,
|
||
|
|
model,
|
||
|
|
promptTokens: 0,
|
||
|
|
completionTokens: 0,
|
||
|
|
totalTokens: tokensUsed,
|
||
|
|
itemsProcessed: 0,
|
||
|
|
status: 'ERROR',
|
||
|
|
errorMessage: parseError.message,
|
||
|
|
})
|
||
|
|
|
||
|
|
throw new TRPCError({
|
||
|
|
code: 'INTERNAL_SERVER_ERROR',
|
||
|
|
message: 'Failed to parse AI response. Please try again.',
|
||
|
|
})
|
||
|
|
}
|
||
|
|
|
||
|
|
const classified = classifyAIError(error)
|
||
|
|
logAIError('EvaluationSummary', 'generateSummary', classified)
|
||
|
|
|
||
|
|
await logAIUsage({
|
||
|
|
userId,
|
||
|
|
action: 'EVALUATION_SUMMARY',
|
||
|
|
entityType: 'Project',
|
||
|
|
entityId: projectId,
|
||
|
|
model,
|
||
|
|
promptTokens: 0,
|
||
|
|
completionTokens: 0,
|
||
|
|
totalTokens: 0,
|
||
|
|
itemsProcessed: 0,
|
||
|
|
status: 'ERROR',
|
||
|
|
errorMessage: classified.message,
|
||
|
|
})
|
||
|
|
|
||
|
|
throw new TRPCError({
|
||
|
|
code: 'INTERNAL_SERVER_ERROR',
|
||
|
|
message: classified.message,
|
||
|
|
})
|
||
|
|
}
|
||
|
|
|
||
|
|
// 4. Compute scoring patterns (server-side, no AI)
|
||
|
|
const scoringPatterns = computeScoringPatterns(typedEvaluations, criteria)
|
||
|
|
|
||
|
|
// 5. Merge and upsert
|
||
|
|
const summaryJson = {
|
||
|
|
...aiResponse,
|
||
|
|
scoringPatterns,
|
||
|
|
}
|
||
|
|
|
||
|
|
const summaryJsonValue = summaryJson as unknown as Prisma.InputJsonValue
|
||
|
|
|
||
|
|
const summary = await prisma.evaluationSummary.upsert({
|
||
|
|
where: {
|
||
|
|
projectId_roundId: { projectId, roundId },
|
||
|
|
},
|
||
|
|
create: {
|
||
|
|
projectId,
|
||
|
|
roundId,
|
||
|
|
summaryJson: summaryJsonValue,
|
||
|
|
generatedById: userId,
|
||
|
|
model,
|
||
|
|
tokensUsed,
|
||
|
|
},
|
||
|
|
update: {
|
||
|
|
summaryJson: summaryJsonValue,
|
||
|
|
generatedAt: new Date(),
|
||
|
|
generatedById: userId,
|
||
|
|
model,
|
||
|
|
tokensUsed,
|
||
|
|
},
|
||
|
|
})
|
||
|
|
|
||
|
|
// 6. Log AI usage
|
||
|
|
await logAIUsage({
|
||
|
|
userId,
|
||
|
|
action: 'EVALUATION_SUMMARY',
|
||
|
|
entityType: 'Project',
|
||
|
|
entityId: projectId,
|
||
|
|
model,
|
||
|
|
promptTokens: 0, // Detailed breakdown not always available
|
||
|
|
completionTokens: 0,
|
||
|
|
totalTokens: tokensUsed,
|
||
|
|
itemsProcessed: evaluations.length,
|
||
|
|
status: 'SUCCESS',
|
||
|
|
})
|
||
|
|
|
||
|
|
return {
|
||
|
|
id: summary.id,
|
||
|
|
projectId: summary.projectId,
|
||
|
|
roundId: summary.roundId,
|
||
|
|
summaryJson: summaryJson as AIResponsePayload & { scoringPatterns: ScoringPatterns },
|
||
|
|
generatedAt: summary.generatedAt,
|
||
|
|
model: summary.model,
|
||
|
|
tokensUsed: summary.tokensUsed,
|
||
|
|
}
|
||
|
|
}
|