MOPC-App/src/server/services/ai-evaluation-summary.ts

/**
 * AI-Powered Evaluation Summary Service
 *
 * Generates AI summaries of jury evaluations for a project in a given round.
 * Combines OpenAI analysis with server-side scoring pattern calculations.
 *
 * GDPR Compliance:
 * - All evaluation data is anonymized before AI processing
 * - No juror names, emails, or identifiers are sent to OpenAI
 * - Only scores, feedback text, and binary decisions are included
 */

import { TRPCError } from '@trpc/server'
import { getOpenAI, getConfiguredModel, buildCompletionParams, AI_MODELS } from '@/lib/openai'
import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
import { classifyAIError, createParseError, logAIError } from './ai-errors'
import { sanitizeText } from './anonymization'
import type { PrismaClient, Prisma } from '@prisma/client'

// ─── Types ──────────────────────────────────────────────────────────────────

interface EvaluationForSummary {
  id: string
  criterionScoresJson: Record<string, number> | null
  globalScore: number | null
  binaryDecision: boolean | null
  feedbackText: string | null
  assignment: {
    user: {
      id: string
      name: string | null
      email: string
    }
  }
}

interface AnonymizedEvaluation {
  criterionScores: Record<string, number> | null
  globalScore: number | null
  binaryDecision: boolean | null
  feedbackText: string | null
}

interface CriterionDef {
  id: string
  label: string
}

interface AIResponsePayload {
  overallAssessment: string
  strengths: string[]
  weaknesses: string[]
  themes: Array<{
    theme: string
    sentiment: 'positive' | 'negative' | 'mixed'
    frequency: number
  }>
  recommendation: string
}

interface ScoringPatterns {
  averageGlobalScore: number | null
  consensus: number
  criterionAverages: Record<string, number>
  evaluatorCount: number
}

export interface EvaluationSummaryResult {
  id: string
  projectId: string
  roundId: string
  summaryJson: AIResponsePayload & { scoringPatterns: ScoringPatterns }
  generatedAt: Date
  model: string
  tokensUsed: number
}

// ─── Anonymization ──────────────────────────────────────────────────────────

/**
 * Strip juror names/emails from evaluations, keeping only scores and feedback.
 */
export function anonymizeEvaluations(
  evaluations: EvaluationForSummary[]
): AnonymizedEvaluation[] {
  return evaluations.map((ev) => ({
    criterionScores: ev.criterionScoresJson as Record<string, number> | null,
    globalScore: ev.globalScore,
    binaryDecision: ev.binaryDecision,
    feedbackText: ev.feedbackText ? sanitizeText(ev.feedbackText) : null,
  }))
}

// ─── Prompt Building ────────────────────────────────────────────────────────

/**
 * Build the OpenAI prompt for evaluation summary generation.
 */
export function buildSummaryPrompt(
  anonymizedEvaluations: AnonymizedEvaluation[],
  projectTitle: string,
  criteriaLabels: string[]
): string {
  const sanitizedTitle = sanitizeText(projectTitle)

  return `You are analyzing jury evaluations for a project competition.

PROJECT: "${sanitizedTitle}"

EVALUATION CRITERIA: ${criteriaLabels.join(', ')}

EVALUATIONS (${anonymizedEvaluations.length} total):
${JSON.stringify(anonymizedEvaluations, null, 2)}

Analyze these evaluations and return a JSON object with this exact structure:
{
  "overallAssessment": "A 2-3 sentence summary of how the project was evaluated overall",
  "strengths": ["strength 1", "strength 2", ...],
  "weaknesses": ["weakness 1", "weakness 2", ...],
  "themes": [
    { "theme": "theme name", "sentiment": "positive" | "negative" | "mixed", "frequency": <number of evaluators mentioning this> }
  ],
  "recommendation": "A brief recommendation based on the evaluation consensus"
}

Guidelines:
- Base your analysis only on the provided evaluation data
- Identify common themes across evaluator feedback
- Note areas of agreement and disagreement
- Keep the assessment objective and balanced
- Do not include any personal identifiers`
}

// ─── Scoring Patterns (Server-Side) ─────────────────────────────────────────

/**
 * Compute scoring patterns from evaluations without AI.
 */
export function computeScoringPatterns(
  evaluations: EvaluationForSummary[],
  criteriaLabels: CriterionDef[]
): ScoringPatterns {
  const globalScores = evaluations
    .map((e) => e.globalScore)
    .filter((s): s is number => s !== null)

  // Average global score
  const averageGlobalScore =
    globalScores.length > 0
      ? globalScores.reduce((a, b) => a + b, 0) / globalScores.length
      : null

  // Consensus: 1 - normalized standard deviation (1.0 = full consensus)
  let consensus = 1
  if (globalScores.length > 1 && averageGlobalScore !== null) {
    const variance =
      globalScores.reduce(
        (sum, score) => sum + Math.pow(score - averageGlobalScore, 2),
        0
      ) / globalScores.length
    const stdDev = Math.sqrt(variance)
    // Normalize by the scoring scale (1-10, so max possible std dev is ~4.5)
    consensus = Math.max(0, 1 - stdDev / 4.5)
  }

  // Criterion averages
  const criterionAverages: Record<string, number> = {}
  for (const criterion of criteriaLabels) {
    const scores: number[] = []
    for (const ev of evaluations) {
      const criterionScores = ev.criterionScoresJson as Record<string, number> | null
      if (criterionScores && criterionScores[criterion.id] !== undefined) {
        scores.push(criterionScores[criterion.id])
      }
    }
    if (scores.length > 0) {
      criterionAverages[criterion.label] =
        scores.reduce((a, b) => a + b, 0) / scores.length
    }
  }

  return {
    averageGlobalScore,
    consensus: Math.round(consensus * 100) / 100,
    criterionAverages,
    evaluatorCount: evaluations.length,
  }
}

// ─── Main Orchestrator ──────────────────────────────────────────────────────

/**
 * Generate an AI-powered evaluation summary for a project in a round.
 */
export async function generateSummary({
  projectId,
  roundId,
  userId,
  prisma,
}: {
  projectId: string
  roundId: string
  userId: string
  prisma: PrismaClient
}): Promise<EvaluationSummaryResult> {
  // 1. Fetch project with evaluations and form criteria
  const project = await prisma.project.findUnique({
    where: { id: projectId },
    select: {
      id: true,
      title: true,
      roundId: true,
    },
  })

  if (!project) {
    throw new TRPCError({ code: 'NOT_FOUND', message: 'Project not found' })
  }

  // Fetch submitted evaluations for this project in this round
  const evaluations = await prisma.evaluation.findMany({
    where: {
      status: 'SUBMITTED',
      assignment: {
        projectId,
        roundId,
      },
    },
    select: {
      id: true,
      criterionScoresJson: true,
      globalScore: true,
      binaryDecision: true,
      feedbackText: true,
      assignment: {
        select: {
          user: {
            select: { id: true, name: true, email: true },
          },
        },
      },
    },
  })

  if (evaluations.length === 0) {
    throw new TRPCError({
      code: 'BAD_REQUEST',
      message: 'No submitted evaluations found for this project in this round',
    })
  }

  // Get evaluation form criteria for this round
  const form = await prisma.evaluationForm.findFirst({
    where: { roundId, isActive: true },
    select: { criteriaJson: true },
  })

  const criteria: CriterionDef[] = form?.criteriaJson
    ? (form.criteriaJson as unknown as CriterionDef[])
    : []
  const criteriaLabels = criteria.map((c) => c.label)

  // 2. Anonymize evaluations
  const typedEvaluations = evaluations as unknown as EvaluationForSummary[]
  const anonymized = anonymizeEvaluations(typedEvaluations)

  // 3. Build prompt and call OpenAI
  const openai = await getOpenAI()
  if (!openai) {
    throw new TRPCError({
      code: 'PRECONDITION_FAILED',
      message: 'OpenAI is not configured. Please set up your API key in Settings.',
    })
  }

  const model = await getConfiguredModel(AI_MODELS.QUICK)
  const prompt = buildSummaryPrompt(anonymized, project.title, criteriaLabels)

  let aiResponse: AIResponsePayload
  let tokensUsed = 0

  try {
    const params = buildCompletionParams(model, {
      messages: [
        { role: 'user', content: prompt },
      ],
      jsonMode: true,
      temperature: 0.3,
      maxTokens: 2000,
    })

    const response = await openai.chat.completions.create(params)
    const usage = extractTokenUsage(response)
    tokensUsed = usage.totalTokens

    const content = response.choices[0]?.message?.content
    if (!content) {
      throw new Error('Empty response from AI')
    }

    aiResponse = JSON.parse(content) as AIResponsePayload
  } catch (error) {
    if (error instanceof SyntaxError) {
      const parseError = createParseError(error.message)
      logAIError('EvaluationSummary', 'generateSummary', parseError)

      await logAIUsage({
        userId,
        action: 'EVALUATION_SUMMARY',
        entityType: 'Project',
        entityId: projectId,
        model,
        promptTokens: 0,
        completionTokens: 0,
        totalTokens: tokensUsed,
        itemsProcessed: 0,
        status: 'ERROR',
        errorMessage: parseError.message,
      })

      throw new TRPCError({
        code: 'INTERNAL_SERVER_ERROR',
        message: 'Failed to parse AI response. Please try again.',
      })
    }

    const classified = classifyAIError(error)
    logAIError('EvaluationSummary', 'generateSummary', classified)

    await logAIUsage({
      userId,
      action: 'EVALUATION_SUMMARY',
      entityType: 'Project',
      entityId: projectId,
      model,
      promptTokens: 0,
      completionTokens: 0,
      totalTokens: 0,
      itemsProcessed: 0,
      status: 'ERROR',
      errorMessage: classified.message,
    })

    throw new TRPCError({
      code: 'INTERNAL_SERVER_ERROR',
      message: classified.message,
    })
  }

  // 4. Compute scoring patterns (server-side, no AI)
  const scoringPatterns = computeScoringPatterns(typedEvaluations, criteria)

  // 5. Merge and upsert
  const summaryJson = {
    ...aiResponse,
    scoringPatterns,
  }

  const summaryJsonValue = summaryJson as unknown as Prisma.InputJsonValue

  const summary = await prisma.evaluationSummary.upsert({
    where: {
      projectId_roundId: { projectId, roundId },
    },
    create: {
      projectId,
      roundId,
      summaryJson: summaryJsonValue,
      generatedById: userId,
      model,
      tokensUsed,
    },
    update: {
      summaryJson: summaryJsonValue,
      generatedAt: new Date(),
      generatedById: userId,
      model,
      tokensUsed,
    },
  })

  // 6. Log AI usage
  await logAIUsage({
    userId,
    action: 'EVALUATION_SUMMARY',
    entityType: 'Project',
    entityId: projectId,
    model,
    promptTokens: 0, // Detailed breakdown not always available
    completionTokens: 0,
    totalTokens: tokensUsed,
    itemsProcessed: evaluations.length,
    status: 'SUCCESS',
  })

  return {
    id: summary.id,
    projectId: summary.projectId,
    roundId: summary.roundId,
    summaryJson: summaryJson as AIResponsePayload & { scoringPatterns: ScoringPatterns },
    generatedAt: summary.generatedAt,
    model: summary.model,
    tokensUsed: summary.tokensUsed,
  }
}