From dda554df84e319543ca35a3dc002923cf995bcba Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 12 May 2026 22:43:59 +0200 Subject: [PATCH] feat(deps): @faker-js/faker wide-synthetic seed for load testing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New seed harness for stress-testing list pages, search, analytics under realistic volumes. Faker-driven, deterministic via fixed seed, idempotent via `clients.source_details = 'wide-synthetic'` marker. - `src/lib/db/seed-wide-synthetic-data.ts` — generator (1000 clients default, override via `WIDE_SEED_COUNT`) - `src/lib/db/seed-wide-synthetic.ts` — entrypoint - `pnpm db:seed:wide-synthetic` script Distribution: - 70% of clients get an interest (spread across pipeline stages) - ~50% of those interests link to a real berth - Acquisition source weighted: 55% website / 25% referral / 15% broker / 5% manual - Locale-aware names/emails/phones/addresses via faker Curated synthetic seed (`seed-synthetic-data.ts`) and realistic seed (`seed-data.ts`) are untouched — this is a third axis for volume testing, not a replacement. Verified: tsc clean, build green. Co-Authored-By: Claude Opus 4.7 (1M context) --- package.json | 2 + pnpm-lock.yaml | 9 + src/lib/db/seed-wide-synthetic-data.ts | 221 +++++++++++++++++++++++++ src/lib/db/seed-wide-synthetic.ts | 57 +++++++ 4 files changed, 289 insertions(+) create mode 100644 src/lib/db/seed-wide-synthetic-data.ts create mode 100644 src/lib/db/seed-wide-synthetic.ts diff --git a/package.json b/package.json index f5b217af..09fe38c8 100644 --- a/package.json +++ b/package.json @@ -17,6 +17,7 @@ "db:seed": "tsx src/lib/db/seed.ts", "db:seed:realistic": "tsx src/lib/db/seed.ts", "db:seed:synthetic": "tsx src/lib/db/seed-synthetic.ts", + "db:seed:wide-synthetic": "tsx src/lib/db/seed-wide-synthetic.ts", "db:reset": "tsx scripts/db-reset.ts --confirm", "db:reseed:realistic": "pnpm db:reset && pnpm db:seed:realistic", "db:reseed:synthetic": "pnpm db:reset && pnpm db:seed:synthetic", @@ -117,6 +118,7 @@ }, "devDependencies": { "@axe-core/playwright": "^4.11.3", + "@faker-js/faker": "^10.4.0", "@hookform/devtools": "^4.4.0", "@next/bundle-analyzer": "^16.2.6", "@playwright/test": "^1.60.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 277481b6..5f42d3f3 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -275,6 +275,9 @@ importers: '@axe-core/playwright': specifier: ^4.11.3 version: 4.11.3(playwright-core@1.60.0) + '@faker-js/faker': + specifier: ^10.4.0 + version: 10.4.0 '@hookform/devtools': specifier: ^4.4.0 version: 4.4.0(@types/react@19.2.14)(react-dom@19.2.6(react@19.2.6))(react@19.2.6) @@ -905,6 +908,10 @@ packages: '@noble/hashes': optional: true + '@faker-js/faker@10.4.0': + resolution: {integrity: sha512-sDBWI3yLy8EcDzgobvJTWq1MJYzAkQdpjXuPukga9wXonhpMRvd1Izuo2Qgwey2OiEoRIBr35RMU9HJRoOHzpw==} + engines: {node: ^20.19.0 || ^22.13.0 || ^23.5.0 || >=24.0.0, npm: '>=10'} + '@fastify/otel@0.18.0': resolution: {integrity: sha512-3TASCATfw+ctICSb4ymrv7iCm0qJ0N9CarB+CZ7zIJ7KqNbwI5JjyDL1/sxoC0ccTO1Zyd1iQ+oqncPg5FJXaA==} peerDependencies: @@ -7454,6 +7461,8 @@ snapshots: optionalDependencies: '@noble/hashes': 2.2.0 + '@faker-js/faker@10.4.0': {} + '@fastify/otel@0.18.0(@opentelemetry/api@1.9.1)': dependencies: '@opentelemetry/api': 1.9.1 diff --git a/src/lib/db/seed-wide-synthetic-data.ts b/src/lib/db/seed-wide-synthetic-data.ts new file mode 100644 index 00000000..200cc382 --- /dev/null +++ b/src/lib/db/seed-wide-synthetic-data.ts @@ -0,0 +1,221 @@ +/** + * Wide synthetic seed for load + perf testing. + * + * Faker-generated bulk client records (default 1000, configurable via + * `WIDE_SEED_COUNT` env var). Designed to exercise pagination, list- + * page rendering, search indexing, and analytics aggregations under + * realistic-ish volumes. + * + * Distinct from `seed-synthetic-data.ts` which is hand-curated for + * every-pipeline-stage end-to-end test coverage. This file is for + * "what happens when there are 10k rows" stress tests; the curated + * seed stays the source of truth for selector-keyed Playwright runs. + * + * Idempotent at the row-count level: stamps `clients.source_details` + * with `wide-synthetic` and skips ports already at or above the + * target count. + * + * Run via `pnpm db:seed:wide-synthetic`. Override count via env: + * `WIDE_SEED_COUNT=5000 pnpm db:seed:wide-synthetic` + */ + +import { faker } from '@faker-js/faker'; +import { and, eq, count } from 'drizzle-orm'; + +import { db } from './index'; +import { withTransaction } from './utils'; +import { + clients, + clientContacts, + clientAddresses, + berths, + interests, + interestBerths, +} from './schema'; +import { PIPELINE_STAGES, type PipelineStage } from '@/lib/constants'; + +// Stable seed = deterministic output across runs of the same WIDE_SEED_COUNT. +// Bump if the distribution feels stale. +const FAKER_SEED = 20260512; + +// Marker stamped on every wide-synthetic client so the idempotency +// check + future cleanup query can find them without colliding with +// either the curated synthetic seed or any real records. +const WIDE_MARKER = 'wide-synthetic'; + +// Acquisition source distribution roughly matching how a real marina +// funnel breaks down — most opportunity comes through the website, then +// referrals, then brokers, then manual entry. Tweak when product data +// gives us better numbers. +const SOURCE_DISTRIBUTION: Array<{ + source: 'website' | 'referral' | 'broker' | 'manual'; + weight: number; +}> = [ + { source: 'website', weight: 0.55 }, + { source: 'referral', weight: 0.25 }, + { source: 'broker', weight: 0.15 }, + { source: 'manual', weight: 0.05 }, +]; + +function pickWeighted(items: T[]): T { + const r = faker.number.float({ min: 0, max: 1 }); + let acc = 0; + for (const item of items) { + acc += item.weight; + if (r <= acc) return item; + } + return items[items.length - 1]!; +} + +export interface WideSeedSummary { + clients: number; + interests: number; +} + +export async function seedWideSyntheticPortData( + portId: string, + portSlug: string, + targetClients: number, +): Promise { + faker.seed(FAKER_SEED); + + // Idempotency: count existing wide-marker rows. Skip if we'd be at + // or above the target count. + const [existing] = await db + .select({ count: count() }) + .from(clients) + .where(and(eq(clients.portId, portId), eq(clients.sourceDetails, WIDE_MARKER))); + + if ((existing?.count ?? 0) >= targetClients) { + return null; + } + + const portBerths = await db + .select({ id: berths.id, mooringNumber: berths.mooringNumber }) + .from(berths) + .where(eq(berths.portId, portId)); + + if (portBerths.length === 0) { + console.warn(` [${portSlug}] no berths in port — wide seed skipping`); + return { clients: 0, interests: 0 }; + } + + let clientsInserted = 0; + let interestsInserted = 0; + + // Chunked transactions: one tx per 100 rows so a mid-run failure + // doesn't roll back the entire batch and force a from-scratch retry. + const CHUNK = 100; + const startIdx = existing?.count ?? 0; + + for (let chunkStart = startIdx; chunkStart < targetClients; chunkStart += CHUNK) { + const chunkEnd = Math.min(chunkStart + CHUNK, targetClients); + + await withTransaction(async (tx) => { + for (let i = chunkStart; i < chunkEnd; i++) { + const sourceChoice = pickWeighted(SOURCE_DISTRIBUTION); + const firstName = faker.person.firstName(); + const lastName = faker.person.lastName(); + const fullName = `${firstName} ${lastName}`; + const email = faker.internet.email({ firstName, lastName }).toLowerCase(); + const phone = faker.phone.number({ style: 'international' }); + const country = faker.location.countryCode('alpha-2'); + const city = faker.location.city(); + const street = faker.location.streetAddress(); + const postalCode = faker.location.zipCode(); + + const createdDaysAgo = faker.number.int({ min: 1, max: 365 }); + const createdAt = new Date(Date.now() - createdDaysAgo * 86_400_000); + + const [client] = await tx + .insert(clients) + .values({ + portId, + fullName, + source: sourceChoice.source, + sourceDetails: WIDE_MARKER, + nationalityIso: country, + preferredContactMethod: 'email', + preferredLanguage: 'en', + createdAt, + updatedAt: createdAt, + }) + .returning({ id: clients.id }); + + if (!client) continue; + clientsInserted++; + + await tx.insert(clientContacts).values([ + { + clientId: client.id, + channel: 'email', + value: email, + label: 'primary', + isPrimary: true, + }, + { + clientId: client.id, + channel: 'phone', + value: phone, + label: 'primary', + isPrimary: false, + }, + ]); + + await tx.insert(clientAddresses).values({ + clientId: client.id, + portId, + label: 'Primary', + streetAddress: street, + city, + postalCode, + countryIso: country, + isPrimary: true, + }); + + // ~70% of clients get an interest, spread across pipeline stages. + // The remaining ~30% sit as raw leads (open/no-interest state). + if (faker.number.float({ min: 0, max: 1 }) < 0.7) { + const stage = faker.helpers.arrayElement(PIPELINE_STAGES) as PipelineStage; + const berth = faker.helpers.arrayElement(portBerths); + + const [interest] = await tx + .insert(interests) + .values({ + portId, + clientId: client.id, + yachtId: null, + pipelineStage: stage, + leadCategory: + stage === 'open' + ? 'general_interest' + : stage === 'details_sent' || stage === 'in_communication' + ? 'specific_qualified' + : 'hot_lead', + source: sourceChoice.source, + createdAt, + updatedAt: createdAt, + }) + .returning({ id: interests.id }); + + if (interest) { + interestsInserted++; + // ~50% of interests link to a berth — late-stage flow needs + // one, early-stage doesn't have to. + if (faker.number.float({ min: 0, max: 1 }) < 0.5) { + await tx.insert(interestBerths).values({ + interestId: interest.id, + berthId: berth.id, + isPrimary: true, + isSpecificInterest: false, + isInEoiBundle: false, + }); + } + } + } + } + }); + } + + return { clients: clientsInserted, interests: interestsInserted }; +} diff --git a/src/lib/db/seed-wide-synthetic.ts b/src/lib/db/seed-wide-synthetic.ts new file mode 100644 index 00000000..9c2f2fff --- /dev/null +++ b/src/lib/db/seed-wide-synthetic.ts @@ -0,0 +1,57 @@ +/** + * Wide synthetic seed entrypoint. + * + * Bootstraps ports + roles + super-admin profile (idempotent), then + * generates faker-driven bulk client + interest rows per port. Default + * 1000 clients/port; override via `WIDE_SEED_COUNT`. + * + * Run with: pnpm db:seed:wide-synthetic + */ + +import 'dotenv/config'; +import { seedBootstrap } from './seed-bootstrap'; +import { seedWideSyntheticPortData, type WideSeedSummary } from './seed-wide-synthetic-data'; + +const DEFAULT_COUNT = 1000; + +async function seed() { + const target = Number(process.env.WIDE_SEED_COUNT ?? DEFAULT_COUNT); + if (!Number.isFinite(target) || target < 1) { + console.error(`Invalid WIDE_SEED_COUNT: ${process.env.WIDE_SEED_COUNT}`); + process.exit(1); + } + + console.log(`Seeding Port Nimara CRM (wide synthetic — ${target} clients/port)...`); + + const portIds = await seedBootstrap(); + + console.log(''); + console.log('Seeding per-port wide synthetic data...'); + + const summaries: Array<{ name: string; summary: WideSeedSummary | null }> = []; + for (const p of portIds) { + console.log(` [${p.slug}] seeding ${target} synthetic clients...`); + const summary = await seedWideSyntheticPortData(p.id, p.slug, target); + summaries.push({ name: p.name, summary }); + } + + console.log(''); + console.log('─── Summary ───────────────────────────────────────────────'); + for (const s of summaries) { + if (s.summary === null) { + console.log(` ✓ Port "${s.name}" - already at target count (skipped)`); + } else { + const x = s.summary; + console.log(` ✓ Port "${s.name}" - +${x.clients} clients, +${x.interests} interests`); + } + } + console.log(''); + console.log('Wide synthetic seed complete!'); + + process.exit(0); +} + +seed().catch((err) => { + console.error('Wide synthetic seed failed:', err); + process.exit(1); +});