212 lines
5.1 KiB
TypeScript
212 lines
5.1 KiB
TypeScript
|
|
/**
|
||
|
|
* Data Anonymization Service
|
||
|
|
*
|
||
|
|
* Strips PII (names, emails, etc.) from data before sending to AI services.
|
||
|
|
* Returns ID mappings for de-anonymization of results.
|
||
|
|
*/
|
||
|
|
|
||
|
|
export interface AnonymizedJuror {
|
||
|
|
anonymousId: string
|
||
|
|
expertiseTags: string[]
|
||
|
|
currentAssignmentCount: number
|
||
|
|
maxAssignments: number | null
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface AnonymizedProject {
|
||
|
|
anonymousId: string
|
||
|
|
title: string
|
||
|
|
description: string | null
|
||
|
|
tags: string[]
|
||
|
|
teamName: string | null
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface JurorMapping {
|
||
|
|
anonymousId: string
|
||
|
|
realId: string
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface ProjectMapping {
|
||
|
|
anonymousId: string
|
||
|
|
realId: string
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface AnonymizationResult {
|
||
|
|
jurors: AnonymizedJuror[]
|
||
|
|
projects: AnonymizedProject[]
|
||
|
|
jurorMappings: JurorMapping[]
|
||
|
|
projectMappings: ProjectMapping[]
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Juror data from database
|
||
|
|
*/
|
||
|
|
interface JurorInput {
|
||
|
|
id: string
|
||
|
|
name?: string | null
|
||
|
|
email: string
|
||
|
|
expertiseTags: string[]
|
||
|
|
maxAssignments?: number | null
|
||
|
|
_count?: {
|
||
|
|
assignments: number
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Project data from database
|
||
|
|
*/
|
||
|
|
interface ProjectInput {
|
||
|
|
id: string
|
||
|
|
title: string
|
||
|
|
description?: string | null
|
||
|
|
tags: string[]
|
||
|
|
teamName?: string | null
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Anonymize juror and project data for AI processing
|
||
|
|
*
|
||
|
|
* This function:
|
||
|
|
* 1. Strips all PII (names, emails) from juror data
|
||
|
|
* 2. Replaces real IDs with sequential anonymous IDs
|
||
|
|
* 3. Keeps only expertise tags and assignment counts
|
||
|
|
* 4. Returns mappings for de-anonymization
|
||
|
|
*/
|
||
|
|
export function anonymizeForAI(
|
||
|
|
jurors: JurorInput[],
|
||
|
|
projects: ProjectInput[]
|
||
|
|
): AnonymizationResult {
|
||
|
|
const jurorMappings: JurorMapping[] = []
|
||
|
|
const projectMappings: ProjectMapping[] = []
|
||
|
|
|
||
|
|
// Anonymize jurors
|
||
|
|
const anonymizedJurors: AnonymizedJuror[] = jurors.map((juror, index) => {
|
||
|
|
const anonymousId = `juror_${(index + 1).toString().padStart(3, '0')}`
|
||
|
|
|
||
|
|
jurorMappings.push({
|
||
|
|
anonymousId,
|
||
|
|
realId: juror.id,
|
||
|
|
})
|
||
|
|
|
||
|
|
return {
|
||
|
|
anonymousId,
|
||
|
|
expertiseTags: juror.expertiseTags,
|
||
|
|
currentAssignmentCount: juror._count?.assignments ?? 0,
|
||
|
|
maxAssignments: juror.maxAssignments ?? null,
|
||
|
|
}
|
||
|
|
})
|
||
|
|
|
||
|
|
// Anonymize projects (keep content but replace IDs)
|
||
|
|
const anonymizedProjects: AnonymizedProject[] = projects.map(
|
||
|
|
(project, index) => {
|
||
|
|
const anonymousId = `project_${(index + 1).toString().padStart(3, '0')}`
|
||
|
|
|
||
|
|
projectMappings.push({
|
||
|
|
anonymousId,
|
||
|
|
realId: project.id,
|
||
|
|
})
|
||
|
|
|
||
|
|
return {
|
||
|
|
anonymousId,
|
||
|
|
title: sanitizeText(project.title),
|
||
|
|
description: project.description
|
||
|
|
? sanitizeText(project.description)
|
||
|
|
: null,
|
||
|
|
tags: project.tags,
|
||
|
|
// Replace specific team names with generic identifier
|
||
|
|
teamName: project.teamName ? `Team ${index + 1}` : null,
|
||
|
|
}
|
||
|
|
}
|
||
|
|
)
|
||
|
|
|
||
|
|
return {
|
||
|
|
jurors: anonymizedJurors,
|
||
|
|
projects: anonymizedProjects,
|
||
|
|
jurorMappings,
|
||
|
|
projectMappings,
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* De-anonymize AI results back to real IDs
|
||
|
|
*/
|
||
|
|
export function deanonymizeResults<T extends { jurorId: string; projectId: string }>(
|
||
|
|
results: T[],
|
||
|
|
jurorMappings: JurorMapping[],
|
||
|
|
projectMappings: ProjectMapping[]
|
||
|
|
): (T & { realJurorId: string; realProjectId: string })[] {
|
||
|
|
const jurorMap = new Map(
|
||
|
|
jurorMappings.map((m) => [m.anonymousId, m.realId])
|
||
|
|
)
|
||
|
|
const projectMap = new Map(
|
||
|
|
projectMappings.map((m) => [m.anonymousId, m.realId])
|
||
|
|
)
|
||
|
|
|
||
|
|
return results.map((result) => ({
|
||
|
|
...result,
|
||
|
|
realJurorId: jurorMap.get(result.jurorId) || result.jurorId,
|
||
|
|
realProjectId: projectMap.get(result.projectId) || result.projectId,
|
||
|
|
}))
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Sanitize text to remove potential PII patterns
|
||
|
|
* Removes emails, phone numbers, and URLs from text
|
||
|
|
*/
|
||
|
|
function sanitizeText(text: string): string {
|
||
|
|
// Remove email addresses
|
||
|
|
let sanitized = text.replace(
|
||
|
|
/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g,
|
||
|
|
'[email removed]'
|
||
|
|
)
|
||
|
|
|
||
|
|
// Remove phone numbers (various formats)
|
||
|
|
sanitized = sanitized.replace(
|
||
|
|
/(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/g,
|
||
|
|
'[phone removed]'
|
||
|
|
)
|
||
|
|
|
||
|
|
// Remove URLs
|
||
|
|
sanitized = sanitized.replace(
|
||
|
|
/https?:\/\/[^\s]+/g,
|
||
|
|
'[url removed]'
|
||
|
|
)
|
||
|
|
|
||
|
|
return sanitized
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Validate that data has been properly anonymized
|
||
|
|
* Returns true if no PII patterns are detected
|
||
|
|
*/
|
||
|
|
export function validateAnonymization(data: AnonymizationResult): boolean {
|
||
|
|
const piiPatterns = [
|
||
|
|
/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/, // Email
|
||
|
|
/(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/, // Phone
|
||
|
|
]
|
||
|
|
|
||
|
|
const checkText = (text: string | null | undefined): boolean => {
|
||
|
|
if (!text) return true
|
||
|
|
return !piiPatterns.some((pattern) => pattern.test(text))
|
||
|
|
}
|
||
|
|
|
||
|
|
// Check jurors (they should only have expertise tags)
|
||
|
|
for (const juror of data.jurors) {
|
||
|
|
// Jurors should not have any text fields that could contain PII
|
||
|
|
// Only check expertiseTags
|
||
|
|
for (const tag of juror.expertiseTags) {
|
||
|
|
if (!checkText(tag)) return false
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Check projects
|
||
|
|
for (const project of data.projects) {
|
||
|
|
if (!checkText(project.title)) return false
|
||
|
|
if (!checkText(project.description)) return false
|
||
|
|
for (const tag of project.tags) {
|
||
|
|
if (!checkText(tag)) return false
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return true
|
||
|
|
}
|