Compare commits

...

4 Commits

Author SHA1 Message Date
Matt e5b7cdf670 Add document analysis: page count, text extraction & language detection
Build and Push Docker Image / build (push) Failing after 11s Details
Introduces a document analyzer service that extracts page count (via pdf-parse),
text preview, and detected language (via franc) from uploaded files. Analysis runs
automatically on upload (configurable via SystemSettings) and can be triggered
retroactively for existing files. Results are displayed as badges in the FileViewer
and fed to AI screening for language-based filtering criteria.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 10:08:20 +01:00
Matt 90f36ac9b2 Retroactive auto-PASS for projects with complete documents
Wire batchCheckRequirementsAndTransition into round activation and reopen
so pre-existing projects that already have all required docs get auto-
passed. Also adds checkDocumentCompletion endpoint for manual sweeps on
already-active rounds.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 10:08:20 +01:00
Matt a921731c52 Pass tag confidence scores to AI assignment for weighted matching
The AI assignment path was receiving project tags as flat strings, losing
the confidence scores from AI tagging. Now both the GPT path and the
fallback algorithm weight tag matches by confidence — a 0.9 tag matters
more than a 0.5 one.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 10:08:20 +01:00
Matt fc8e58f985 Auto-transition projects to PASSED when all required documents uploaded
Add checkRequirementsAndTransition() to round-engine that checks if all
required FileRequirements for a round are satisfied by uploaded files.
When all are met and the project is PENDING/IN_PROGRESS, it auto-
transitions to PASSED. Also adds batchCheckRequirementsAndTransition()
for bulk operations.

Wired into:
- file.adminUploadForRoundRequirement (admin bulk upload)
- applicant.saveFileMetadata (applicant self-upload)

Non-fatal: failures in the check never break the upload itself.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 10:08:20 +01:00
17 changed files with 806 additions and 20 deletions

48
package-lock.json generated
View File

@ -49,6 +49,7 @@
"cmdk": "^1.0.4", "cmdk": "^1.0.4",
"csv-parse": "^6.1.0", "csv-parse": "^6.1.0",
"date-fns": "^4.1.0", "date-fns": "^4.1.0",
"franc": "^6.2.0",
"html2canvas": "^1.4.1", "html2canvas": "^1.4.1",
"jspdf": "^4.1.0", "jspdf": "^4.1.0",
"jspdf-autotable": "^5.0.7", "jspdf-autotable": "^5.0.7",
@ -6147,6 +6148,16 @@
"react-dom": "^18 || ^19 || ^19.0.0-rc" "react-dom": "^18 || ^19 || ^19.0.0-rc"
} }
}, },
"node_modules/collapse-white-space": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/collapse-white-space/-/collapse-white-space-2.1.0.tgz",
"integrity": "sha512-loKTxY1zCOuG4j9f6EPnuyyYkf58RnhhWTvRoZEokgB+WbdXehfjFviyOVYkqzEWz1Q5kRiZdBYS5SwxbQYwzw==",
"license": "MIT",
"funding": {
"type": "github",
"url": "https://github.com/sponsors/wooorm"
}
},
"node_modules/color-convert": { "node_modules/color-convert": {
"version": "2.0.1", "version": "2.0.1",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
@ -7736,6 +7747,19 @@
} }
} }
}, },
"node_modules/franc": {
"version": "6.2.0",
"resolved": "https://registry.npmjs.org/franc/-/franc-6.2.0.tgz",
"integrity": "sha512-rcAewP7PSHvjq7Kgd7dhj82zE071kX5B4W1M4ewYMf/P+i6YsDQmj62Xz3VQm9zyUzUXwhIde/wHLGCMrM+yGg==",
"license": "MIT",
"dependencies": {
"trigram-utils": "^2.0.0"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/wooorm"
}
},
"node_modules/fsevents": { "node_modules/fsevents": {
"version": "2.3.2", "version": "2.3.2",
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
@ -10441,6 +10465,16 @@
"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
"license": "MIT" "license": "MIT"
}, },
"node_modules/n-gram": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/n-gram/-/n-gram-2.0.2.tgz",
"integrity": "sha512-S24aGsn+HLBxUGVAUFOwGpKs7LBcG4RudKU//eWzt/mQ97/NMKQxDWHyHx63UNWk/OOdihgmzoETn1tf5nQDzQ==",
"license": "MIT",
"funding": {
"type": "github",
"url": "https://github.com/sponsors/wooorm"
}
},
"node_modules/nanoid": { "node_modules/nanoid": {
"version": "3.3.11", "version": "3.3.11",
"resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz", "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz",
@ -13110,6 +13144,20 @@
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
"license": "MIT" "license": "MIT"
}, },
"node_modules/trigram-utils": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/trigram-utils/-/trigram-utils-2.0.1.tgz",
"integrity": "sha512-nfWIXHEaB+HdyslAfMxSqWKDdmqY9I32jS7GnqpdWQnLH89r6A5sdk3fDVYqGAZ0CrT8ovAFSAo6HRiWcWNIGQ==",
"license": "MIT",
"dependencies": {
"collapse-white-space": "^2.0.0",
"n-gram": "^2.0.0"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/wooorm"
}
},
"node_modules/trim-lines": { "node_modules/trim-lines": {
"version": "3.0.1", "version": "3.0.1",
"resolved": "https://registry.npmjs.org/trim-lines/-/trim-lines-3.0.1.tgz", "resolved": "https://registry.npmjs.org/trim-lines/-/trim-lines-3.0.1.tgz",

View File

@ -62,6 +62,7 @@
"cmdk": "^1.0.4", "cmdk": "^1.0.4",
"csv-parse": "^6.1.0", "csv-parse": "^6.1.0",
"date-fns": "^4.1.0", "date-fns": "^4.1.0",
"franc": "^6.2.0",
"html2canvas": "^1.4.1", "html2canvas": "^1.4.1",
"jspdf": "^4.1.0", "jspdf": "^4.1.0",
"jspdf-autotable": "^5.0.7", "jspdf-autotable": "^5.0.7",

View File

@ -0,0 +1,5 @@
-- AlterTable
ALTER TABLE "ProjectFile" ADD COLUMN "textPreview" TEXT;
ALTER TABLE "ProjectFile" ADD COLUMN "detectedLang" TEXT;
ALTER TABLE "ProjectFile" ADD COLUMN "langConfidence" DOUBLE PRECISION;
ALTER TABLE "ProjectFile" ADD COLUMN "analyzedAt" TIMESTAMP(3);

View File

@ -689,6 +689,12 @@ model ProjectFile {
size Int // bytes size Int // bytes
pageCount Int? // Number of pages (PDFs, presentations, etc.) pageCount Int? // Number of pages (PDFs, presentations, etc.)
// Document analysis (optional, populated by document-analyzer service)
textPreview String? @db.Text // First ~2000 chars of extracted text
detectedLang String? // ISO 639-3 code (e.g. 'eng', 'fra', 'und')
langConfidence Float? // 0.01.0 confidence
analyzedAt DateTime? // When analysis last ran
// MinIO location // MinIO location
bucket String bucket String
objectKey String objectKey String

View File

@ -49,7 +49,10 @@ import {
Heart, Heart,
Crown, Crown,
UserPlus, UserPlus,
Loader2,
ScanSearch,
} from 'lucide-react' } from 'lucide-react'
import { toast } from 'sonner'
import { formatDate, formatDateOnly } from '@/lib/utils' import { formatDate, formatDateOnly } from '@/lib/utils'
interface PageProps { interface PageProps {
@ -529,6 +532,8 @@ function ProjectDetailContent({ projectId }: { projectId: string }) {
<AnimatedCard index={4}> <AnimatedCard index={4}>
<Card> <Card>
<CardHeader> <CardHeader>
<div className="flex items-center justify-between">
<div>
<CardTitle className="flex items-center gap-2.5 text-lg"> <CardTitle className="flex items-center gap-2.5 text-lg">
<div className="rounded-lg bg-rose-500/10 p-1.5"> <div className="rounded-lg bg-rose-500/10 p-1.5">
<FileText className="h-4 w-4 text-rose-500" /> <FileText className="h-4 w-4 text-rose-500" />
@ -538,6 +543,9 @@ function ProjectDetailContent({ projectId }: { projectId: string }) {
<CardDescription> <CardDescription>
Project documents and materials organized by competition round Project documents and materials organized by competition round
</CardDescription> </CardDescription>
</div>
<AnalyzeDocumentsButton projectId={projectId} onComplete={() => utils.file.listByProject.invalidate({ projectId })} />
</div>
</CardHeader> </CardHeader>
<CardContent className="space-y-6"> <CardContent className="space-y-6">
{/* Requirements organized by round */} {/* Requirements organized by round */}
@ -664,6 +672,11 @@ function ProjectDetailContent({ projectId }: { projectId: string }) {
size: f.size, size: f.size,
bucket: f.bucket, bucket: f.bucket,
objectKey: f.objectKey, objectKey: f.objectKey,
pageCount: f.pageCount,
textPreview: f.textPreview,
detectedLang: f.detectedLang,
langConfidence: f.langConfidence,
analyzedAt: f.analyzedAt ? String(f.analyzedAt) : null,
}))} }))}
/> />
</div> </div>
@ -847,6 +860,36 @@ function ProjectDetailSkeleton() {
) )
} }
function AnalyzeDocumentsButton({ projectId, onComplete }: { projectId: string; onComplete: () => void }) {
const analyzeMutation = trpc.file.analyzeProjectFiles.useMutation({
onSuccess: (result) => {
toast.success(
`Analyzed ${result.analyzed} file${result.analyzed !== 1 ? 's' : ''}${result.failed > 0 ? ` (${result.failed} failed)` : ''}`
)
onComplete()
},
onError: (error) => {
toast.error(error.message || 'Analysis failed')
},
})
return (
<Button
variant="outline"
size="sm"
onClick={() => analyzeMutation.mutate({ projectId })}
disabled={analyzeMutation.isPending}
>
{analyzeMutation.isPending ? (
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
) : (
<ScanSearch className="mr-2 h-4 w-4" />
)}
{analyzeMutation.isPending ? 'Analyzing...' : 'Analyze Documents'}
</Button>
)
}
export default function ProjectDetailPage({ params }: PageProps) { export default function ProjectDetailPage({ params }: PageProps) {
const { id } = use(params) const { id } = use(params)

View File

@ -1599,7 +1599,7 @@ function FilteringRulesSection({ roundId }: { roundId: string }) {
className="text-sm" className="text-sm"
/> />
<p className="text-xs text-muted-foreground mt-1"> <p className="text-xs text-muted-foreground mt-1">
The AI has access to: category, country, region, founded year, ocean issue, tags, description, file details (type, page count, size), and team size. The AI has access to: category, country, region, founded year, ocean issue, tags, description, file details (type, page count, size, detected language), and team size.
</p> </p>
</div> </div>

View File

@ -65,6 +65,12 @@ interface ProjectFile {
isLate?: boolean isLate?: boolean
requirementId?: string | null requirementId?: string | null
requirement?: FileRequirementInfo | null requirement?: FileRequirementInfo | null
// Document analysis fields
pageCount?: number | null
textPreview?: string | null
detectedLang?: string | null
langConfidence?: number | null
analyzedAt?: Date | string | null
} }
interface RoundGroup { interface RoundGroup {
@ -270,6 +276,25 @@ function FileItem({ file }: { file: ProjectFile }) {
</Badge> </Badge>
)} )}
<span>{formatFileSize(file.size)}</span> <span>{formatFileSize(file.size)}</span>
{file.pageCount != null && (
<Badge variant="outline" className="text-xs gap-1">
<FileText className="h-3 w-3" />
{file.pageCount} {file.pageCount === 1 ? 'page' : 'pages'}
</Badge>
)}
{file.detectedLang && file.detectedLang !== 'und' && (
<Badge
variant="outline"
className={cn('text-xs font-mono uppercase', {
'border-green-300 text-green-700 bg-green-50': file.langConfidence != null && file.langConfidence >= 0.8,
'border-amber-300 text-amber-700 bg-amber-50': file.langConfidence != null && file.langConfidence >= 0.4 && file.langConfidence < 0.8,
'border-red-300 text-red-700 bg-red-50': file.langConfidence != null && file.langConfidence < 0.4,
})}
title={`Language: ${file.detectedLang} (${Math.round((file.langConfidence ?? 0) * 100)}% confidence)`}
>
{file.detectedLang.toUpperCase()}
</Badge>
)}
</div> </div>
</div> </div>

View File

@ -6,6 +6,7 @@ import { getPresignedUrl, generateObjectKey } from '@/lib/minio'
import { sendStyledNotificationEmail, sendTeamMemberInviteEmail } from '@/lib/email' import { sendStyledNotificationEmail, sendTeamMemberInviteEmail } from '@/lib/email'
import { logAudit } from '@/server/utils/audit' import { logAudit } from '@/server/utils/audit'
import { createNotification } from '../services/in-app-notification' import { createNotification } from '../services/in-app-notification'
import { checkRequirementsAndTransition } from '../services/round-engine'
// Bucket for applicant submissions // Bucket for applicant submissions
export const SUBMISSIONS_BUCKET = 'mopc-submissions' export const SUBMISSIONS_BUCKET = 'mopc-submissions'
@ -410,6 +411,24 @@ export const applicantRouter = router({
}, },
}) })
// Auto-transition: if uploading against a round requirement, check completion
if (roundId && requirementId) {
await checkRequirementsAndTransition(
projectId,
roundId,
ctx.user.id,
ctx.prisma,
)
}
// Auto-analyze document (fire-and-forget, delayed for presigned upload)
import('../services/document-analyzer').then(({ analyzeFileDelayed, isAutoAnalysisEnabled }) =>
isAutoAnalysisEnabled().then((enabled) => {
if (enabled) analyzeFileDelayed(file.id).catch((err) =>
console.warn('[DocAnalyzer] Post-upload analysis failed:', err))
})
).catch(() => {})
return file return file
}), }),

View File

@ -74,10 +74,22 @@ async function runAIAssignmentJob(jobId: string, roundId: string, userId: string
description: true, description: true,
tags: true, tags: true,
teamName: true, teamName: true,
projectTags: {
select: { tag: { select: { name: true } }, confidence: true },
},
_count: { select: { assignments: { where: { roundId } } } }, _count: { select: { assignments: { where: { roundId } } } },
}, },
}) })
// Enrich projects with tag confidence data for AI matching
const projectsWithConfidence = projects.map((p) => ({
...p,
tagConfidences: p.projectTags.map((pt) => ({
name: pt.tag.name,
confidence: pt.confidence,
})),
}))
const existingAssignments = await prisma.assignment.findMany({ const existingAssignments = await prisma.assignment.findMany({
where: { roundId }, where: { roundId },
select: { userId: true, projectId: true }, select: { userId: true, projectId: true },
@ -124,7 +136,7 @@ async function runAIAssignmentJob(jobId: string, roundId: string, userId: string
const result = await generateAIAssignments( const result = await generateAIAssignments(
jurors, jurors,
projects, projectsWithConfidence,
constraints, constraints,
userId, userId,
roundId, roundId,

View File

@ -3,6 +3,7 @@ import { TRPCError } from '@trpc/server'
import { router, protectedProcedure, adminProcedure } from '../trpc' import { router, protectedProcedure, adminProcedure } from '../trpc'
import { getPresignedUrl, generateObjectKey, deleteObject, BUCKET_NAME } from '@/lib/minio' import { getPresignedUrl, generateObjectKey, deleteObject, BUCKET_NAME } from '@/lib/minio'
import { logAudit } from '../utils/audit' import { logAudit } from '../utils/audit'
import { checkRequirementsAndTransition } from '../services/round-engine'
export const fileRouter = router({ export const fileRouter = router({
/** /**
@ -205,6 +206,14 @@ export const fileRouter = router({
userAgent: ctx.userAgent, userAgent: ctx.userAgent,
}) })
// Auto-analyze document (fire-and-forget, delayed for presigned upload)
import('../services/document-analyzer').then(({ analyzeFileDelayed, isAutoAnalysisEnabled }) =>
isAutoAnalysisEnabled().then((enabled) => {
if (enabled) analyzeFileDelayed(file.id).catch((err) =>
console.warn('[DocAnalyzer] Post-upload analysis failed:', err))
})
).catch(() => {})
return { return {
uploadUrl, uploadUrl,
file, file,
@ -1200,6 +1209,14 @@ export const fileRouter = router({
userAgent: ctx.userAgent, userAgent: ctx.userAgent,
}) })
// Auto-analyze document (fire-and-forget, delayed for presigned upload)
import('../services/document-analyzer').then(({ analyzeFileDelayed, isAutoAnalysisEnabled }) =>
isAutoAnalysisEnabled().then((enabled) => {
if (enabled) analyzeFileDelayed(file.id).catch((err) =>
console.warn('[DocAnalyzer] Post-upload analysis failed:', err))
})
).catch(() => {})
return { uploadUrl, file } return { uploadUrl, file }
}), }),
@ -1501,6 +1518,22 @@ export const fileRouter = router({
userAgent: ctx.userAgent, userAgent: ctx.userAgent,
}) })
// Auto-transition: check if all required documents are now uploaded
await checkRequirementsAndTransition(
input.projectId,
input.roundId,
ctx.user.id,
ctx.prisma,
)
// Auto-analyze document (fire-and-forget, delayed for presigned upload)
import('../services/document-analyzer').then(({ analyzeFileDelayed, isAutoAnalysisEnabled }) =>
isAutoAnalysisEnabled().then((enabled) => {
if (enabled) analyzeFileDelayed(file.id).catch((err) =>
console.warn('[DocAnalyzer] Post-upload analysis failed:', err))
})
).catch(() => {})
return { uploadUrl, file } return { uploadUrl, file }
}), }),
@ -1536,4 +1569,25 @@ export const fileRouter = router({
) )
return results return results
}), }),
/**
* Analyze all files for a specific project (page count, language, text preview).
* Retroactive: re-analyzes even previously analyzed files.
*/
analyzeProjectFiles: adminProcedure
.input(z.object({ projectId: z.string() }))
.mutation(async ({ input }) => {
const { analyzeProjectFiles } = await import('../services/document-analyzer')
return analyzeProjectFiles(input.projectId)
}),
/**
* Batch analyze all unanalyzed files across the platform.
* For retroactive analysis of files uploaded before this feature.
*/
analyzeAllFiles: adminProcedure
.mutation(async () => {
const { analyzeAllUnanalyzed } = await import('../services/document-analyzer')
return analyzeAllUnanalyzed()
}),
}) })

View File

@ -69,6 +69,8 @@ export async function runFilteringJob(jobId: string, roundId: string, userId: st
mimeType: true, mimeType: true,
size: true, size: true,
pageCount: true, pageCount: true,
detectedLang: true,
langConfidence: true,
objectKey: true, objectKey: true,
roundId: true, roundId: true,
createdAt: true, createdAt: true,

View File

@ -263,4 +263,41 @@ export const roundEngineRouter = router({
return { success: true, removedCount: deleted.count } return { success: true, removedCount: deleted.count }
}), }),
/**
* Retroactive document check: auto-PASS any PENDING/IN_PROGRESS projects
* that already have all required documents uploaded for this round.
* Useful for rounds activated before the auto-transition feature was deployed.
*/
checkDocumentCompletion: adminProcedure
.input(z.object({ roundId: z.string() }))
.mutation(async ({ ctx, input }) => {
const { batchCheckRequirementsAndTransition } = await import('../services/round-engine')
const projectStates = await ctx.prisma.projectRoundState.findMany({
where: {
roundId: input.roundId,
state: { in: ['PENDING', 'IN_PROGRESS'] },
},
select: { projectId: true },
})
if (projectStates.length === 0) {
return { transitionedCount: 0, checkedCount: 0, projectIds: [] }
}
const projectIds = projectStates.map((ps: { projectId: string }) => ps.projectId)
const result = await batchCheckRequirementsAndTransition(
input.roundId,
projectIds,
ctx.user.id,
ctx.prisma,
)
return {
transitionedCount: result.transitionedCount,
checkedCount: projectIds.length,
projectIds: result.projectIds,
}
}),
}) })

View File

@ -38,7 +38,7 @@ const ASSIGNMENT_SYSTEM_PROMPT = `You are an expert jury assignment optimizer fo
Match jurors to projects based on expertise alignment, workload balance, and coverage requirements. Match jurors to projects based on expertise alignment, workload balance, and coverage requirements.
## Matching Criteria (Weighted) ## Matching Criteria (Weighted)
- Expertise Match (50%): How well juror tags/expertise align with project topics - Expertise Match (50%): How well juror tags/expertise align with project topics. Project tags include a confidence score (0-1) weight higher-confidence tags more heavily as they are more reliably assigned. A tag with confidence 0.9 is a strong signal; one with 0.5 is uncertain.
- Workload Balance (30%): Distribute assignments evenly; prefer jurors below capacity - Workload Balance (30%): Distribute assignments evenly; prefer jurors below capacity
- Minimum Target (20%): Prioritize jurors who haven't reached their minimum assignment count - Minimum Target (20%): Prioritize jurors who haven't reached their minimum assignment count
@ -99,6 +99,7 @@ interface ProjectForAssignment {
title: string title: string
description?: string | null description?: string | null
tags: string[] tags: string[]
tagConfidences?: Array<{ name: string; confidence: number }>
teamName?: string | null teamName?: string | null
_count?: { _count?: {
assignments: number assignments: number
@ -539,7 +540,7 @@ export function generateFallbackAssignments(
return { return {
juror, juror,
score: calculateExpertiseScore(juror.expertiseTags, project.tags), score: calculateExpertiseScore(juror.expertiseTags, project.tags, project.tagConfidences),
loadScore: calculateLoadScore(currentLoad, maxLoad), loadScore: calculateLoadScore(currentLoad, maxLoad),
underMinBonus: calculateUnderMinBonus(currentLoad, minTarget), underMinBonus: calculateUnderMinBonus(currentLoad, minTarget),
} }
@ -586,24 +587,44 @@ export function generateFallbackAssignments(
/** /**
* Calculate expertise match score based on tag overlap * Calculate expertise match score based on tag overlap
* When tagConfidences are available, weights matches by confidence
*/ */
function calculateExpertiseScore( function calculateExpertiseScore(
jurorTags: string[], jurorTags: string[],
projectTags: string[] projectTags: string[],
tagConfidences?: Array<{ name: string; confidence: number }>
): number { ): number {
if (jurorTags.length === 0 || projectTags.length === 0) { if (jurorTags.length === 0 || projectTags.length === 0) {
return 0.5 // Neutral score if no tags return 0.5 // Neutral score if no tags
} }
const jurorTagsLower = new Set(jurorTags.map((t) => t.toLowerCase())) const jurorTagsLower = new Set(jurorTags.map((t) => t.toLowerCase()))
// If we have confidence data, use weighted scoring
if (tagConfidences && tagConfidences.length > 0) {
let weightedMatches = 0
let totalWeight = 0
for (const tc of tagConfidences) {
totalWeight += tc.confidence
if (jurorTagsLower.has(tc.name.toLowerCase())) {
weightedMatches += tc.confidence
}
}
if (totalWeight === 0) return 0.5
const weightedRatio = weightedMatches / totalWeight
const hasExpertise = weightedMatches > 0 ? 0.2 : 0
return Math.min(1, weightedRatio * 0.8 + hasExpertise)
}
// Fallback: unweighted matching using flat tags
const matchingTags = projectTags.filter((t) => const matchingTags = projectTags.filter((t) =>
jurorTagsLower.has(t.toLowerCase()) jurorTagsLower.has(t.toLowerCase())
) )
// Score based on percentage of project tags matched
const matchRatio = matchingTags.length / projectTags.length const matchRatio = matchingTags.length / projectTags.length
// Boost for having expertise, even if not all match
const hasExpertise = matchingTags.length > 0 ? 0.2 : 0 const hasExpertise = matchingTags.length > 0 ? 0.2 : 0
return Math.min(1, matchRatio * 0.8 + hasExpertise) return Math.min(1, matchRatio * 0.8 + hasExpertise)

View File

@ -179,10 +179,11 @@ Return a JSON object with this exact structure:
- founded_year: when the company/initiative was founded (use for age checks) - founded_year: when the company/initiative was founded (use for age checks)
- ocean_issue: the ocean conservation area - ocean_issue: the ocean conservation area
- file_count, file_types: uploaded documents summary - file_count, file_types: uploaded documents summary
- files[]: per-file details with file_type, page_count (if known), size_kb, round_name (which round the file was submitted for), and is_current_round flag - files[]: per-file details with file_type, page_count (if known), size_kb, detected_lang (ISO 639-3 language code like 'eng', 'fra'), lang_confidence (0-1), round_name (which round the file was submitted for), and is_current_round flag
- description: project summary text - description: project summary text
- tags: topic tags - tags: topic tags
- If document content is provided (text_content field in files), use it for deeper analysis. Pay SPECIAL ATTENTION to files from the current round (is_current_round=true) as they are the most recent and relevant submissions. - If document content is provided (text_content field in files), use it for deeper analysis. Pay SPECIAL ATTENTION to files from the current round (is_current_round=true) as they are the most recent and relevant submissions.
- If detected_lang is provided, use it to evaluate language requirements (e.g. 'eng' = English, 'fra' = French). lang_confidence indicates detection reliability.
## Guidelines ## Guidelines
- Evaluate ONLY against the provided criteria, not your own standards - Evaluate ONLY against the provided criteria, not your own standards

View File

@ -52,7 +52,7 @@ export interface AnonymizedProject {
anonymousId: string anonymousId: string
title: string title: string
description: string | null description: string | null
tags: string[] tags: Array<{ name: string; confidence: number }>
teamName: string | null teamName: string | null
} }
@ -83,6 +83,8 @@ export interface AnonymizedFileInfo {
file_type: string // FileType enum value file_type: string // FileType enum value
page_count: number | null // Number of pages if known page_count: number | null // Number of pages if known
size_kb: number // File size in KB size_kb: number // File size in KB
detected_lang?: string | null // ISO 639-3 language code (e.g. 'eng', 'fra')
lang_confidence?: number | null // 0.01.0 confidence score
round_name?: string | null // Which round the file was submitted for round_name?: string | null // Which round the file was submitted for
is_current_round?: boolean // Whether this file belongs to the current filtering/evaluation round is_current_round?: boolean // Whether this file belongs to the current filtering/evaluation round
text_content?: string // Extracted text content (when aiParseFiles is enabled) text_content?: string // Extracted text content (when aiParseFiles is enabled)
@ -209,6 +211,7 @@ interface ProjectInput {
title: string title: string
description?: string | null description?: string | null
tags: string[] tags: string[]
tagConfidences?: Array<{ name: string; confidence: number }>
teamName?: string | null teamName?: string | null
} }
@ -253,7 +256,9 @@ export function anonymizeForAI(
description: project.description description: project.description
? truncateAndSanitize(project.description, DESCRIPTION_LIMITS.ASSIGNMENT) ? truncateAndSanitize(project.description, DESCRIPTION_LIMITS.ASSIGNMENT)
: null, : null,
tags: project.tags, tags: project.tagConfidences && project.tagConfidences.length > 0
? project.tagConfidences
: project.tags.map((t) => ({ name: t, confidence: 1.0 })),
teamName: project.teamName ? `Team ${index + 1}` : null, teamName: project.teamName ? `Team ${index + 1}` : null,
} }
} }
@ -306,6 +311,8 @@ export function anonymizeProjectForAI(
file_type: f.fileType ?? 'OTHER', file_type: f.fileType ?? 'OTHER',
page_count: f.pageCount ?? null, page_count: f.pageCount ?? null,
size_kb: Math.round((f.size ?? 0) / 1024), size_kb: Math.round((f.size ?? 0) / 1024),
...(f.detectedLang ? { detected_lang: f.detectedLang } : {}),
...(f.langConfidence != null ? { lang_confidence: f.langConfidence } : {}),
...(f.roundName ? { round_name: f.roundName } : {}), ...(f.roundName ? { round_name: f.roundName } : {}),
...(f.isCurrentRound !== undefined ? { is_current_round: f.isCurrentRound } : {}), ...(f.isCurrentRound !== undefined ? { is_current_round: f.isCurrentRound } : {}),
...(f.textContent ? { text_content: f.textContent } : {}), ...(f.textContent ? { text_content: f.textContent } : {}),
@ -524,7 +531,7 @@ export function validateAnonymization(data: AnonymizationResult): boolean {
if (!checkText(project.title)) return false if (!checkText(project.title)) return false
if (!checkText(project.description)) return false if (!checkText(project.description)) return false
for (const tag of project.tags) { for (const tag of project.tags) {
if (!checkText(tag)) return false if (!checkText(typeof tag === 'string' ? tag : tag.name)) return false
} }
} }

View File

@ -0,0 +1,367 @@
/**
* Document Analyzer Service
*
* Extracts metadata from uploaded files:
* - Page count (PDFs)
* - Text preview (first ~2000 chars)
* - Language detection via franc
*
* Runs optionally on upload (controlled by SystemSettings) and
* retroactively via admin endpoint.
*/
import { getStorageProvider } from '@/lib/storage'
import { isParseableMimeType } from './file-content-extractor'
import { prisma } from '@/lib/prisma'
const TEXT_PREVIEW_LIMIT = 2000
const BATCH_SIZE = 10
// ─── Types ──────────────────────────────────────────────────────────────────
export type AnalysisResult = {
fileId: string
pageCount: number | null
textPreview: string | null
detectedLang: string | null
langConfidence: number | null
error?: string
}
// ─── Language Detection ──────────────────────────────────────────────────────
/**
* Detect language using franc. Returns ISO 639-3 code and confidence.
* franc returns a distance-based score where lower = better match.
* We convert to 0-1 confidence where 1 = perfect match.
*/
async function detectLanguage(
text: string
): Promise<{ lang: string; confidence: number }> {
if (!text || text.trim().length < 20) {
return { lang: 'und', confidence: 0 }
}
// Use a reasonable sample for detection (first 5000 chars)
const sample = text.slice(0, 5000)
const { francAll } = await import('franc')
const results = francAll(sample, { minLength: 10 })
if (!results || results.length === 0 || results[0][0] === 'und') {
return { lang: 'und', confidence: 0 }
}
const topLang = results[0][0]
const topScore = results[0][1] // 1.0 = best match, 0.0 = worst
// franc scores: 1.0 is best match, scale drops from there
// Convert to a 0-1 confidence
const confidence = Math.max(0, Math.min(1, topScore))
return { lang: topLang, confidence: Math.round(confidence * 100) / 100 }
}
// ─── Core Analysis ──────────────────────────────────────────────────────────
/**
* Analyze a single file: extract page count, text preview, and detect language.
* Downloads the file from storage, parses it, and returns results.
*/
export async function analyzeFileContent(
objectKey: string,
bucket: string,
mimeType: string,
fileName: string,
fileId: string
): Promise<AnalysisResult> {
const result: AnalysisResult = {
fileId,
pageCount: null,
textPreview: null,
detectedLang: null,
langConfidence: null,
}
if (!isParseableMimeType(mimeType)) {
return { ...result, error: 'Unsupported mime type for analysis' }
}
try {
const storage = await getStorageProvider()
const buffer = await storage.getObject(objectKey)
let text = ''
let pageCount: number | null = null
if (mimeType === 'application/pdf') {
const pdfParseModule = await import('pdf-parse')
const pdfParse =
typeof pdfParseModule === 'function'
? pdfParseModule
: (pdfParseModule as any).default ?? pdfParseModule
const pdf = await pdfParse(buffer)
text = pdf.text || ''
pageCount = pdf.numpages ?? null
} else {
// Text-based files (plain text, CSV, markdown, HTML, RTF)
text = buffer.toString('utf-8')
}
result.pageCount = pageCount
// Text preview
if (text.trim()) {
result.textPreview =
text.length > TEXT_PREVIEW_LIMIT
? text.slice(0, TEXT_PREVIEW_LIMIT)
: text
}
// Language detection
if (text.trim().length >= 20) {
const langResult = await detectLanguage(text)
result.detectedLang = langResult.lang
result.langConfidence = langResult.confidence
}
return result
} catch (error) {
console.warn(
`[DocAnalyzer] Failed to analyze ${fileName}:`,
error instanceof Error ? error.message : error
)
return {
...result,
error: error instanceof Error ? error.message : 'Analysis failed',
}
}
}
// ─── DB-Integrated Operations ───────────────────────────────────────────────
/**
* Analyze a single file by ID and persist results to DB.
*/
export async function analyzeFile(fileId: string): Promise<AnalysisResult> {
const file = await prisma.projectFile.findUnique({
where: { id: fileId },
select: {
id: true,
objectKey: true,
bucket: true,
mimeType: true,
fileName: true,
},
})
if (!file) {
return {
fileId,
pageCount: null,
textPreview: null,
detectedLang: null,
langConfidence: null,
error: 'File not found',
}
}
const result = await analyzeFileContent(
file.objectKey,
file.bucket,
file.mimeType,
file.fileName,
file.id
)
// Persist results
await prisma.projectFile.update({
where: { id: fileId },
data: {
pageCount: result.pageCount,
textPreview: result.textPreview,
detectedLang: result.detectedLang,
langConfidence: result.langConfidence,
analyzedAt: new Date(),
},
})
return result
}
/**
* Analyze a single file by ID with a delay (for post-upload use).
* The delay accounts for presigned URL uploads where the file
* may not be in storage yet when the DB record is created.
*/
export async function analyzeFileDelayed(
fileId: string,
delayMs = 3000
): Promise<AnalysisResult> {
await new Promise((resolve) => setTimeout(resolve, delayMs))
return analyzeFile(fileId)
}
/**
* Analyze all files for a specific project.
*/
export async function analyzeProjectFiles(
projectId: string
): Promise<{ analyzed: number; failed: number; total: number }> {
const files = await prisma.projectFile.findMany({
where: { projectId },
select: {
id: true,
objectKey: true,
bucket: true,
mimeType: true,
fileName: true,
},
})
let analyzed = 0
let failed = 0
// Process in batches
for (let i = 0; i < files.length; i += BATCH_SIZE) {
const batch = files.slice(i, i + BATCH_SIZE)
const results = await Promise.allSettled(
batch.map(async (file) => {
if (!isParseableMimeType(file.mimeType)) {
// Mark non-parseable files as analyzed with no data
await prisma.projectFile.update({
where: { id: file.id },
data: { analyzedAt: new Date() },
})
return 'skipped'
}
const result = await analyzeFileContent(
file.objectKey,
file.bucket,
file.mimeType,
file.fileName,
file.id
)
await prisma.projectFile.update({
where: { id: file.id },
data: {
pageCount: result.pageCount,
textPreview: result.textPreview,
detectedLang: result.detectedLang,
langConfidence: result.langConfidence,
analyzedAt: new Date(),
},
})
return result.error ? 'failed' : 'analyzed'
})
)
for (const r of results) {
if (r.status === 'fulfilled') {
if (r.value === 'analyzed') analyzed++
else if (r.value === 'failed') failed++
} else {
failed++
}
}
}
return { analyzed, failed, total: files.length }
}
/**
* Retroactive batch analysis: analyze all files that haven't been analyzed yet.
* Returns counts. Processes in batches to avoid memory issues.
*/
export async function analyzeAllUnanalyzed(): Promise<{
analyzed: number
failed: number
skipped: number
total: number
}> {
const files = await prisma.projectFile.findMany({
where: { analyzedAt: null },
select: {
id: true,
objectKey: true,
bucket: true,
mimeType: true,
fileName: true,
},
orderBy: { createdAt: 'desc' },
})
let analyzed = 0
let failed = 0
let skipped = 0
for (let i = 0; i < files.length; i += BATCH_SIZE) {
const batch = files.slice(i, i + BATCH_SIZE)
const results = await Promise.allSettled(
batch.map(async (file) => {
if (!isParseableMimeType(file.mimeType)) {
await prisma.projectFile.update({
where: { id: file.id },
data: { analyzedAt: new Date() },
})
return 'skipped'
}
const result = await analyzeFileContent(
file.objectKey,
file.bucket,
file.mimeType,
file.fileName,
file.id
)
await prisma.projectFile.update({
where: { id: file.id },
data: {
pageCount: result.pageCount,
textPreview: result.textPreview,
detectedLang: result.detectedLang,
langConfidence: result.langConfidence,
analyzedAt: new Date(),
},
})
return result.error ? 'failed' : 'analyzed'
})
)
for (const r of results) {
if (r.status === 'fulfilled') {
if (r.value === 'analyzed') analyzed++
else if (r.value === 'failed') failed++
else if (r.value === 'skipped') skipped++
} else {
failed++
}
}
console.log(
`[DocAnalyzer] Batch progress: ${i + batch.length}/${files.length} (${analyzed} analyzed, ${skipped} skipped, ${failed} failed)`
)
}
return { analyzed, failed, skipped, total: files.length }
}
/**
* Check if auto-analysis is enabled via SystemSettings.
*/
export async function isAutoAnalysisEnabled(): Promise<boolean> {
try {
const setting = await prisma.systemSettings.findUnique({
where: { key: 'file_analysis_auto_enabled' },
})
// Default to true if setting doesn't exist
return setting?.value !== 'false'
} catch {
return true
}
}

View File

@ -143,6 +143,24 @@ export async function activateRound(
detailsJson: { name: round.name, roundType: round.roundType }, detailsJson: { name: round.name, roundType: round.roundType },
}) })
// Retroactive check: auto-PASS any projects that already have all required docs uploaded
// Non-fatal — runs after activation so it never blocks the transition
try {
const projectStates = await prisma.projectRoundState.findMany({
where: { roundId, state: { in: ['PENDING', 'IN_PROGRESS'] } },
select: { projectId: true },
})
if (projectStates.length > 0) {
const projectIds = projectStates.map((ps: { projectId: string }) => ps.projectId)
const result = await batchCheckRequirementsAndTransition(roundId, projectIds, actorId, prisma)
if (result.transitionedCount > 0) {
console.log(`[RoundEngine] On activation: auto-passed ${result.transitionedCount} projects with complete documents`)
}
}
} catch (retroError) {
console.error('[RoundEngine] Retroactive document check failed (non-fatal):', retroError)
}
return { return {
success: true, success: true,
round: { id: updated.id, status: updated.status }, round: { id: updated.id, status: updated.status },
@ -429,6 +447,23 @@ export async function reopenRound(
}, },
}) })
// Retroactive check: auto-PASS any projects that already have all required docs
try {
const projectStates = await prisma.projectRoundState.findMany({
where: { roundId, state: { in: ['PENDING', 'IN_PROGRESS'] } },
select: { projectId: true },
})
if (projectStates.length > 0) {
const projectIds = projectStates.map((ps: { projectId: string }) => ps.projectId)
const batchResult = await batchCheckRequirementsAndTransition(roundId, projectIds, actorId, prisma)
if (batchResult.transitionedCount > 0) {
console.log(`[RoundEngine] On reopen: auto-passed ${batchResult.transitionedCount} projects with complete documents`)
}
}
} catch (retroError) {
console.error('[RoundEngine] Retroactive document check on reopen failed (non-fatal):', retroError)
}
return { return {
success: true, success: true,
round: { id: result.updated.id, status: result.updated.status }, round: { id: result.updated.id, status: result.updated.status },
@ -625,6 +660,109 @@ export async function getProjectRoundState(
}) })
} }
// ─── Auto-Transition on Document Completion ─────────────────────────────────
/**
* Check if a project has fulfilled all required FileRequirements for a round.
* If yes, and the project is currently PENDING, transition it to PASSED.
*
* Called after file uploads (admin bulk upload or applicant upload).
* Non-fatal: errors are logged but never propagated to callers.
*/
export async function checkRequirementsAndTransition(
projectId: string,
roundId: string,
actorId: string,
prisma: PrismaClient | any,
): Promise<{ transitioned: boolean; newState?: string }> {
try {
// Get all required FileRequirements for this round
const requirements = await prisma.fileRequirement.findMany({
where: { roundId, isRequired: true },
select: { id: true },
})
// If the round has no file requirements, nothing to check
if (requirements.length === 0) {
return { transitioned: false }
}
// Check which requirements this project has satisfied (has a file uploaded)
const fulfilledFiles = await prisma.projectFile.findMany({
where: {
projectId,
roundId,
requirementId: { in: requirements.map((r: { id: string }) => r.id) },
},
select: { requirementId: true },
})
const fulfilledIds = new Set(
fulfilledFiles
.map((f: { requirementId: string | null }) => f.requirementId)
.filter(Boolean)
)
// Check if all required requirements are met
const allMet = requirements.every((r: { id: string }) => fulfilledIds.has(r.id))
if (!allMet) {
return { transitioned: false }
}
// Check current state — only transition if PENDING or IN_PROGRESS
const currentState = await prisma.projectRoundState.findUnique({
where: { projectId_roundId: { projectId, roundId } },
select: { state: true },
})
const eligibleStates = ['PENDING', 'IN_PROGRESS']
if (!currentState || !eligibleStates.includes(currentState.state)) {
return { transitioned: false }
}
// All requirements met — transition to PASSED
const result = await transitionProject(projectId, roundId, 'PASSED' as ProjectRoundStateValue, actorId, prisma)
if (result.success) {
console.log(`[RoundEngine] Auto-transitioned project ${projectId} to PASSED in round ${roundId} (all ${requirements.length} requirements met)`)
return { transitioned: true, newState: 'PASSED' }
}
return { transitioned: false }
} catch (error) {
// Non-fatal — log and continue
console.error('[RoundEngine] checkRequirementsAndTransition failed:', error)
return { transitioned: false }
}
}
/**
* Batch version: check all projects in a round and transition any that
* have all required documents uploaded. Useful after bulk upload.
*/
export async function batchCheckRequirementsAndTransition(
roundId: string,
projectIds: string[],
actorId: string,
prisma: PrismaClient | any,
): Promise<{ transitionedCount: number; projectIds: string[] }> {
const transitioned: string[] = []
for (const projectId of projectIds) {
const result = await checkRequirementsAndTransition(projectId, roundId, actorId, prisma)
if (result.transitioned) {
transitioned.push(projectId)
}
}
if (transitioned.length > 0) {
console.log(`[RoundEngine] Batch auto-transition: ${transitioned.length}/${projectIds.length} projects moved to PASSED in round ${roundId}`)
}
return { transitionedCount: transitioned.length, projectIds: transitioned }
}
// ─── Internals ────────────────────────────────────────────────────────────── // ─── Internals ──────────────────────────────────────────────────────────────
function isTerminalState(state: ProjectRoundStateValue): boolean { function isTerminalState(state: ProjectRoundStateValue): boolean {