From 2c57082d8da9758e83283b58339fbd28760fa2fc Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 14 May 2026 22:40:24 +0200 Subject: [PATCH] =?UTF-8?q?fix(P1):=20postgres-js=20pool=20reliability=20?= =?UTF-8?q?=E2=80=94=20F8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the audit the dev server twice entered a stuck state where every query 500'd with `write CONNECT_TIMEOUT` while the DB was healthy (1/100 connections used, queryable from psql immediately). The Docker bridge can silently drop TCP sockets and postgres-js holds the stale handles until max_lifetime expires. - connect_timeout: 10 → 5 (fail fast) - max_lifetime: 30min → 10min (recycle before staleness accumulates) - onnotice: surface NOTICE/WARNING for visibility Reduces the window of stuck state. Full recovery still requires a restart if the pool hard-fails. pgbouncer in production is the proper long-term answer; this is the safe one-file change. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/lib/db/index.ts | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/lib/db/index.ts b/src/lib/db/index.ts index 3322dd9f..a400b54a 100644 --- a/src/lib/db/index.ts +++ b/src/lib/db/index.ts @@ -29,11 +29,31 @@ const connectionString = process.env.DATABASE_URL!; // during clients-page fanout without log-storm. const POOL_MAX = process.env.NODE_ENV === 'development' ? 30 : 20; +// Pool reliability hardening (post-audit F8): +// During the audit the dev server twice entered a stuck state where every +// query 500'd with `write CONNECT_TIMEOUT` while the DB was healthy +// (1 of 100 connections used, queryable from psql immediately). +// The Docker bridge can silently drop TCP sockets and postgres-js's pool +// holds onto the stale handles until max_lifetime expires. +// - connect_timeout: 5s so failures surface fast instead of stalling +// requests for 10s before erroring. +// - max_lifetime: 10min so connections recycle before stale sockets +// accumulate. Was 30min — too long for the Docker socket-drop pattern. +// - onnotice: surfaces postgres NOTICE/WARNING messages that we'd +// otherwise miss (extension warnings, deprecation hints). const queryClient = postgres(connectionString, { max: POOL_MAX, idle_timeout: 20, - connect_timeout: 10, - max_lifetime: 60 * 30, + connect_timeout: 5, + max_lifetime: 60 * 10, + onnotice: (notice) => { + // postgres-js types `notice` as `unknown`; the runtime shape is + // { severity, code, message, ... }. Only surface WARNING+. + const n = notice as { severity?: string; message?: string }; + if (n.severity && n.severity !== 'NOTICE') { + console.warn(`[postgres ${n.severity}] ${n.message ?? ''}`); + } + }, connection: { // ms values per postgres.js types; these become Postgres GUC settings // applied at session start.