From 5eedd04c544828314fa9de0c4dded67e68ebd3b5 Mon Sep 17 00:00:00 2001 From: Kalven Schraut Date: Fri, 12 Jun 2026 18:35:28 -0500 Subject: [PATCH 1/3] Fix multi-host connection retries never advancing through the host list The local retries counter was read in error() and tryNext() but never incremented (only options.shared.retries, the backoff counter, was), so with multiple hosts: - connect errors were swallowed forever, leaving queries hanging indefinitely when every host is down instead of rejecting once the host list is exhausted - target_session_attrs: 'prefer-standby' kept terminating perfectly good primary connections forever when no standby was reachable Reset retries when an initial query starts a connection cycle, increment it on every closed attempt during establishment, and surface the error once every host has been tried. prefer-standby now insists on a standby only during a first pass over the host list and accepts any server on a second pass, like libpq. Route connect timeouts through error() so a timed out host fails over to the next host the same way a refused host does (fixes #988). Co-Authored-By: Claude Fable 5 --- src/connection.js | 14 ++++++++++---- tests/index.js | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/src/connection.js b/src/connection.js index 1b1cccde..5dc36e16 100644 --- a/src/connection.js +++ b/src/connection.js @@ -69,6 +69,9 @@ function Connection(options, queues = {}, { onopen = noop, onend = noop, onclose target_session_attrs } = options + // prefer-standby does a second pass over the host list accepting any server type (like libpq) + const maxHostAttempts = host.length * (target_session_attrs === 'prefer-standby' ? 2 : 1) + const sent = Queue() , id = uid++ , backend = { pid: null, secret: null } @@ -112,6 +115,7 @@ function Connection(options, queues = {}, { onopen = noop, onend = noop, onclose idleTimer, connect(query) { initial = query + retries = 0 reconnect() }, terminate, @@ -259,7 +263,7 @@ function Connection(options, queues = {}, { onopen = noop, onend = noop, onclose } function connectTimedOut() { - errored(Errors.connection('CONNECT_TIMEOUT', options, socket)) + error(Errors.connection('CONNECT_TIMEOUT', options, socket)) socket.destroy() } @@ -379,7 +383,7 @@ function Connection(options, queues = {}, { onopen = noop, onend = noop, onclose } function error(err) { - if (connection.queue === queues.connecting && options.host[retries + 1]) + if (connection.queue === queues.connecting && retries + 1 < maxHostAttempts) return errored(err) @@ -447,8 +451,10 @@ function Connection(options, queues = {}, { onopen = noop, onend = noop, onclose socket.removeAllListeners() socket = null - if (initial) + if (initial) { + retries++ return reconnect() + } !hadError && (query || sent.length) && error(Errors.connection('CONNECTION_CLOSED', options, socket)) closedTime = performance.now() @@ -793,7 +799,7 @@ function Connection(options, queues = {}, { onopen = noop, onend = noop, onclose (x === 'read-only' && xs.default_transaction_read_only === 'off') || (x === 'primary' && xs.in_hot_standby === 'on') || (x === 'standby' && xs.in_hot_standby === 'off') || - (x === 'prefer-standby' && xs.in_hot_standby === 'off' && options.host[retries]) + (x === 'prefer-standby' && xs.in_hot_standby === 'off' && retries < host.length) ) } diff --git a/tests/index.js b/tests/index.js index 23e6c4d4..ec66666f 100644 --- a/tests/index.js +++ b/tests/index.js @@ -1901,6 +1901,40 @@ t('Multiple hosts', { return [[id1, id2, id1].join(','), result.join(',')] }) +t('Multiple hosts errors when all hosts are down', { timeout: 10 }, async() => { + const sql = postgres({ ...options, host: ['localhost', 'localhost'], port: [1, 2], connect_timeout: 1 }) + return ['ECONNREFUSED', await sql`select 1`.catch(e => e.code)] +}) + +t('Multiple hosts continues to next host after connect timeout', { timeout: 10 }, async() => { + const server = net.createServer() + server.listen() + const sql = postgres({ ...options, host: ['127.0.0.1', 'localhost'], port: [server.address().port, 5432], connect_timeout: 1 }) + const x = (await sql`select 1 as x`)[0].x + server.close() + await sql.end() + return [1, x] +}) + +t('prefer-standby connects to the primary when the standby host is down', { timeout: 10 }, async() => { + const sql = postgres({ ...options, host: ['localhost', 'localhost'], port: [1, 5432], target_session_attrs: 'prefer-standby', connect_timeout: 1 }) + const x = (await sql`select 1 as x`)[0].x + await sql.end() + return [1, x] +}) + +t('prefer-standby connects to a primary when no host is a standby', { timeout: 10 }, async() => { + const sql = postgres({ idle_timeout, max: 1, host: ['localhost', 'localhost'], port: [5432, 5433], target_session_attrs: 'prefer-standby' }) + const x = (await sql`select 1 as x`)[0].x + await sql.end() + return [1, x] +}) + +t('target_session_attrs standby errors when no host is a standby', { timeout: 10 }, async() => { + const sql = postgres({ idle_timeout, max: 1, host: ['localhost', 'localhost'], port: [5432, 5433], target_session_attrs: 'standby' }) + return ['CONNECTION_DESTROYED', await sql`select 1`.catch(e => e.code)] +}) + t('Escaping supports schemas and tables', async() => { await sql`create schema a` await sql`create table a.b (c int)` From 36a9938807d6f59b8aefe7e0418e88e4f86af3c3 Mon Sep 17 00:00:00 2001 From: Kalven Schraut Date: Fri, 12 Jun 2026 18:48:28 -0500 Subject: [PATCH 2/3] Allow host/port arrays in Options types Multi-host connections are documented in the README and supported at runtime (BaseOptions already types them), but the user-facing Options interface narrowed host/port back to scalars, so object-form multi-host config failed to type check. Co-Authored-By: Claude Fable 5 --- types/index.d.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/types/index.d.ts b/types/index.d.ts index 13c3432f..9bf064ce 100644 --- a/types/index.d.ts +++ b/types/index.d.ts @@ -345,9 +345,9 @@ declare namespace postgres { interface Options> extends Partial> { /** @inheritdoc */ - host?: string | undefined; + host?: string | string[] | undefined; /** @inheritdoc */ - port?: number | undefined; + port?: number | number[] | undefined; /** @inheritdoc */ path?: string | undefined; /** Password of database user (an alias for `password`) */ From aeced3b543378264d63992caa7a495f942cdc52f Mon Sep 17 00:00:00 2001 From: Kalven Schraut Date: Fri, 26 Jun 2026 11:25:36 -0500 Subject: [PATCH 3/3] test: more cases covered --- tests/index.js | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tests/index.js b/tests/index.js index ec66666f..6bfbd1a0 100644 --- a/tests/index.js +++ b/tests/index.js @@ -1935,6 +1935,53 @@ t('target_session_attrs standby errors when no host is a standby', { timeout: 10 return ['CONNECTION_DESTROYED', await sql`select 1`.catch(e => e.code)] }) +t('Multiple hosts rejects within connect_timeout × hosts when every host times out', { timeout: 10 }, async() => { + // Two hosts that accept the TCP connection but never speak the Postgres protocol, so each + // attempt connect-timeouts (the #988 path, not ECONNREFUSED). When `error()` wrongly assumed + // "another host to try", connect_timeout was defeated and the query hung forever. It must now + // reject, bounded by ~connect_timeout × hosts, instead of hanging. + const connect_timeout = 0.3 + const a = net.createServer().listen() + const b = net.createServer().listen() + const host = ['127.0.0.1', '127.0.0.1'] + const port = [a.address().port, b.address().port] + const sql = postgres({ ...options, host, port, connect_timeout }) + + const start = Date.now() + const code = await sql`select 1`.catch(e => e.code) + const elapsed = (Date.now() - start) / 1000 + + a.close() + b.close() + await sql.end() + + const bounded = code === 'CONNECT_TIMEOUT' && elapsed < connect_timeout * host.length + 1 + return ['CONNECT_TIMEOUT bounded', bounded ? 'CONNECT_TIMEOUT bounded' : `${code} after ${elapsed.toFixed(2)}s`] +}) + +t('prefer-standby exhausts the standby-only first pass before accepting a primary', { timeout: 10 }, async() => { + // host[0] is the real primary; host[1] is a probe that counts connections and drops them. + // With prefer-standby and no standby available, libpq-style semantics require a full first + // pass that rejects primaries, then a second pass that accepts any server. So the probe must + // be reached exactly once (pass one skips past the primary) before the primary is accepted on + // pass two. probe === 0 would mean the primary was wrongly accepted on the first pass; a probe + // count that never settles (looping forever) is the original "retries never increments" bug. + let probe = 0 + const server = net.createServer(socket => (probe++, socket.destroy())).listen() + const sql = postgres({ + ...options, + host: ['localhost', '127.0.0.1'], + port: [5432, server.address().port], + target_session_attrs: 'prefer-standby' + }) + + const x = (await sql`select 1 as x`)[0].x + server.close() + await sql.end() + + return ['1,1', [x, probe].join(',')] +}) + t('Escaping supports schemas and tables', async() => { await sql`create schema a` await sql`create table a.b (c int)`