/** * Data Anonymization Service * * Strips PII (names, emails, etc.) from data before sending to AI services. * Returns ID mappings for de-anonymization of results. */ export interface AnonymizedJuror { anonymousId: string expertiseTags: string[] currentAssignmentCount: number maxAssignments: number | null } export interface AnonymizedProject { anonymousId: string title: string description: string | null tags: string[] teamName: string | null } export interface JurorMapping { anonymousId: string realId: string } export interface ProjectMapping { anonymousId: string realId: string } export interface AnonymizationResult { jurors: AnonymizedJuror[] projects: AnonymizedProject[] jurorMappings: JurorMapping[] projectMappings: ProjectMapping[] } /** * Juror data from database */ interface JurorInput { id: string name?: string | null email: string expertiseTags: string[] maxAssignments?: number | null _count?: { assignments: number } } /** * Project data from database */ interface ProjectInput { id: string title: string description?: string | null tags: string[] teamName?: string | null } /** * Anonymize juror and project data for AI processing * * This function: * 1. Strips all PII (names, emails) from juror data * 2. Replaces real IDs with sequential anonymous IDs * 3. Keeps only expertise tags and assignment counts * 4. Returns mappings for de-anonymization */ export function anonymizeForAI( jurors: JurorInput[], projects: ProjectInput[] ): AnonymizationResult { const jurorMappings: JurorMapping[] = [] const projectMappings: ProjectMapping[] = [] // Anonymize jurors const anonymizedJurors: AnonymizedJuror[] = jurors.map((juror, index) => { const anonymousId = `juror_${(index + 1).toString().padStart(3, '0')}` jurorMappings.push({ anonymousId, realId: juror.id, }) return { anonymousId, expertiseTags: juror.expertiseTags, currentAssignmentCount: juror._count?.assignments ?? 0, maxAssignments: juror.maxAssignments ?? null, } }) // Anonymize projects (keep content but replace IDs) const anonymizedProjects: AnonymizedProject[] = projects.map( (project, index) => { const anonymousId = `project_${(index + 1).toString().padStart(3, '0')}` projectMappings.push({ anonymousId, realId: project.id, }) return { anonymousId, title: sanitizeText(project.title), description: project.description ? sanitizeText(project.description) : null, tags: project.tags, // Replace specific team names with generic identifier teamName: project.teamName ? `Team ${index + 1}` : null, } } ) return { jurors: anonymizedJurors, projects: anonymizedProjects, jurorMappings, projectMappings, } } /** * De-anonymize AI results back to real IDs */ export function deanonymizeResults( results: T[], jurorMappings: JurorMapping[], projectMappings: ProjectMapping[] ): (T & { realJurorId: string; realProjectId: string })[] { const jurorMap = new Map( jurorMappings.map((m) => [m.anonymousId, m.realId]) ) const projectMap = new Map( projectMappings.map((m) => [m.anonymousId, m.realId]) ) return results.map((result) => ({ ...result, realJurorId: jurorMap.get(result.jurorId) || result.jurorId, realProjectId: projectMap.get(result.projectId) || result.projectId, })) } /** * Sanitize text to remove potential PII patterns * Removes emails, phone numbers, and URLs from text */ function sanitizeText(text: string): string { // Remove email addresses let sanitized = text.replace( /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g, '[email removed]' ) // Remove phone numbers (various formats) sanitized = sanitized.replace( /(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/g, '[phone removed]' ) // Remove URLs sanitized = sanitized.replace( /https?:\/\/[^\s]+/g, '[url removed]' ) return sanitized } /** * Validate that data has been properly anonymized * Returns true if no PII patterns are detected */ export function validateAnonymization(data: AnonymizationResult): boolean { const piiPatterns = [ /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/, // Email /(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/, // Phone ] const checkText = (text: string | null | undefined): boolean => { if (!text) return true return !piiPatterns.some((pattern) => pattern.test(text)) } // Check jurors (they should only have expertise tags) for (const juror of data.jurors) { // Jurors should not have any text fields that could contain PII // Only check expertiseTags for (const tag of juror.expertiseTags) { if (!checkText(tag)) return false } } // Check projects for (const project of data.projects) { if (!checkText(project.title)) return false if (!checkText(project.description)) return false for (const tag of project.tags) { if (!checkText(tag)) return false } } return true }