MOPC-App/src/server/services/anonymization.ts

212 lines
5.1 KiB
TypeScript
Raw Normal View History

/**
* Data Anonymization Service
*
* Strips PII (names, emails, etc.) from data before sending to AI services.
* Returns ID mappings for de-anonymization of results.
*/
export interface AnonymizedJuror {
anonymousId: string
expertiseTags: string[]
currentAssignmentCount: number
maxAssignments: number | null
}
export interface AnonymizedProject {
anonymousId: string
title: string
description: string | null
tags: string[]
teamName: string | null
}
export interface JurorMapping {
anonymousId: string
realId: string
}
export interface ProjectMapping {
anonymousId: string
realId: string
}
export interface AnonymizationResult {
jurors: AnonymizedJuror[]
projects: AnonymizedProject[]
jurorMappings: JurorMapping[]
projectMappings: ProjectMapping[]
}
/**
* Juror data from database
*/
interface JurorInput {
id: string
name?: string | null
email: string
expertiseTags: string[]
maxAssignments?: number | null
_count?: {
assignments: number
}
}
/**
* Project data from database
*/
interface ProjectInput {
id: string
title: string
description?: string | null
tags: string[]
teamName?: string | null
}
/**
* Anonymize juror and project data for AI processing
*
* This function:
* 1. Strips all PII (names, emails) from juror data
* 2. Replaces real IDs with sequential anonymous IDs
* 3. Keeps only expertise tags and assignment counts
* 4. Returns mappings for de-anonymization
*/
export function anonymizeForAI(
jurors: JurorInput[],
projects: ProjectInput[]
): AnonymizationResult {
const jurorMappings: JurorMapping[] = []
const projectMappings: ProjectMapping[] = []
// Anonymize jurors
const anonymizedJurors: AnonymizedJuror[] = jurors.map((juror, index) => {
const anonymousId = `juror_${(index + 1).toString().padStart(3, '0')}`
jurorMappings.push({
anonymousId,
realId: juror.id,
})
return {
anonymousId,
expertiseTags: juror.expertiseTags,
currentAssignmentCount: juror._count?.assignments ?? 0,
maxAssignments: juror.maxAssignments ?? null,
}
})
// Anonymize projects (keep content but replace IDs)
const anonymizedProjects: AnonymizedProject[] = projects.map(
(project, index) => {
const anonymousId = `project_${(index + 1).toString().padStart(3, '0')}`
projectMappings.push({
anonymousId,
realId: project.id,
})
return {
anonymousId,
title: sanitizeText(project.title),
description: project.description
? sanitizeText(project.description)
: null,
tags: project.tags,
// Replace specific team names with generic identifier
teamName: project.teamName ? `Team ${index + 1}` : null,
}
}
)
return {
jurors: anonymizedJurors,
projects: anonymizedProjects,
jurorMappings,
projectMappings,
}
}
/**
* De-anonymize AI results back to real IDs
*/
export function deanonymizeResults<T extends { jurorId: string; projectId: string }>(
results: T[],
jurorMappings: JurorMapping[],
projectMappings: ProjectMapping[]
): (T & { realJurorId: string; realProjectId: string })[] {
const jurorMap = new Map(
jurorMappings.map((m) => [m.anonymousId, m.realId])
)
const projectMap = new Map(
projectMappings.map((m) => [m.anonymousId, m.realId])
)
return results.map((result) => ({
...result,
realJurorId: jurorMap.get(result.jurorId) || result.jurorId,
realProjectId: projectMap.get(result.projectId) || result.projectId,
}))
}
/**
* Sanitize text to remove potential PII patterns
* Removes emails, phone numbers, and URLs from text
*/
function sanitizeText(text: string): string {
// Remove email addresses
let sanitized = text.replace(
/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g,
'[email removed]'
)
// Remove phone numbers (various formats)
sanitized = sanitized.replace(
/(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/g,
'[phone removed]'
)
// Remove URLs
sanitized = sanitized.replace(
/https?:\/\/[^\s]+/g,
'[url removed]'
)
return sanitized
}
/**
* Validate that data has been properly anonymized
* Returns true if no PII patterns are detected
*/
export function validateAnonymization(data: AnonymizationResult): boolean {
const piiPatterns = [
/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/, // Email
/(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/, // Phone
]
const checkText = (text: string | null | undefined): boolean => {
if (!text) return true
return !piiPatterns.some((pattern) => pattern.test(text))
}
// Check jurors (they should only have expertise tags)
for (const juror of data.jurors) {
// Jurors should not have any text fields that could contain PII
// Only check expertiseTags
for (const tag of juror.expertiseTags) {
if (!checkText(tag)) return false
}
}
// Check projects
for (const project of data.projects) {
if (!checkText(project.title)) return false
if (!checkText(project.description)) return false
for (const tag of project.tags) {
if (!checkText(tag)) return false
}
}
return true
}