From ab054edb198d71d3ee9a9fcfcda7867051598a41 Mon Sep 17 00:00:00 2001 From: Ibrar Ahmed Date: Thu, 7 May 2026 19:16:25 +0500 Subject: [PATCH 1/4] zodan: add sync_event handshake to Phase 9 sub_create loop. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 9 created enabled new→existing subscriptions and returned without confirming any peer had begun applying. Mirror Phases 6/8. --- samples/Z0DAN/zodan.sql | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/samples/Z0DAN/zodan.sql b/samples/Z0DAN/zodan.sql index 98d109f0..bef1891c 100644 --- a/samples/Z0DAN/zodan.sql +++ b/samples/Z0DAN/zodan.sql @@ -1744,6 +1744,10 @@ DECLARE rec RECORD; subscription_count integer := 0; sub_name text; + sync_lsn pg_lsn; + remotesql text; + timeout_ms integer := 180; -- 3 minutes + sync_ok text; BEGIN RAISE NOTICE 'Phase 9: Creating subscriptions from all other nodes to new node'; @@ -1768,12 +1772,43 @@ BEGIN verb -- verbose ); RAISE NOTICE ' ✓ %', rpad('Creating subscription ' || sub_name || ' on node ' || rec.node_name || '...', 120, ' '); - PERFORM pg_sleep(5); subscription_count := subscription_count + 1; EXCEPTION WHEN OTHERS THEN RAISE EXCEPTION ' ✗ %', rpad('Creating subscription ' || sub_name || ' on node ' || rec.node_name || ' (error: ' || SQLERRM || ')', 120, ' '); END; + + -- Failover sync handshake: emit sync_event on new (provider) and + -- wait for it on rec.node (subscriber) so that Phase 9 cannot + -- return until each existing node has actually started applying + -- from a known LSN on the new node. + BEGIN + remotesql := 'SELECT spock.sync_event();'; + IF verb THEN + RAISE NOTICE ' Remote SQL for sync_event on new node %: %', new_node_name, remotesql; + END IF; + SELECT * FROM dblink(new_node_dsn, remotesql) AS t(lsn pg_lsn) INTO sync_lsn; + RAISE NOTICE ' OK: %', rpad('Triggered sync_event on new node ' || new_node_name || ' (LSN: ' || sync_lsn || ')...', 120, ' '); + EXCEPTION + WHEN OTHERS THEN + RAISE EXCEPTION ' ✗ %', rpad('Triggering sync_event on new node ' || new_node_name || ' for ' || sub_name || ' (error: ' || SQLERRM || ')', 120, ' '); + END; + + BEGIN + remotesql := format('CALL spock.wait_for_sync_event(true, %L, %L::pg_lsn, %s, true);', + new_node_name, sync_lsn, timeout_ms); + IF verb THEN + RAISE NOTICE ' Remote SQL for wait_for_sync_event on %: %', rec.node_name, remotesql; + END IF; + SELECT * INTO sync_ok FROM dblink(rec.dsn, remotesql) AS t(result text); + IF sync_ok IS NULL OR sync_ok::boolean IS NOT TRUE THEN + RAISE EXCEPTION 'wait_for_sync_event timed out for % on node %', new_node_name, rec.node_name; + END IF; + RAISE NOTICE ' OK: %', rpad('Sync event from ' || new_node_name || ' confirmed on node ' || rec.node_name, 120, ' '); + EXCEPTION + WHEN OTHERS THEN + RAISE EXCEPTION ' ✗ %', rpad('Unable to wait for sync event from ' || new_node_name || ' on node ' || rec.node_name || ' (error: ' || SQLERRM || ')', 120, ' '); + END; END LOOP; IF subscription_count = 0 THEN From f7719322ea73e540ebf67509f6c9c35657c60860 Mon Sep 17 00:00:00 2001 From: Ibrar Ahmed Date: Thu, 7 May 2026 20:45:19 +0500 Subject: [PATCH 2/4] zodan: mark logical slots with FAILOVER on PG17+ create_replication_slot now detects the remote PG version and passes failover := true on PG17+, matching spock_sync.c. --- samples/Z0DAN/zodan.sql | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/samples/Z0DAN/zodan.sql b/samples/Z0DAN/zodan.sql index bef1891c..e72f2528 100644 --- a/samples/Z0DAN/zodan.sql +++ b/samples/Z0DAN/zodan.sql @@ -353,9 +353,10 @@ LANGUAGE plpgsql AS $$ DECLARE - remotesql text; - result RECORD; - exists_count int; + remotesql text; + result RECORD; + exists_count int; + remote_version int; BEGIN -- ============================================================================ -- Step 1: Check if replication slot already exists on remote node @@ -383,11 +384,27 @@ BEGIN -- ============================================================================ -- Step 2: Build remote SQL for replication slot creation + -- On PG17+ pass failover := true so the slot is picked up by the + -- native slotsync worker (sync_replication_slots = on) and is + -- synchronized to physical standbys, matching what Spock does + -- on its own CREATE_REPLICATION_SLOT path (see spock_sync.c). -- ============================================================================ - remotesql := format( - 'SELECT slot_name, lsn FROM pg_create_logical_replication_slot(%L, %L)', - slot_name, plugin - ); + SELECT v INTO remote_version + FROM dblink(node_dsn, 'SHOW server_version_num') AS t(v int); + + IF remote_version >= 170000 THEN + remotesql := format( + 'SELECT slot_name, lsn ' + 'FROM pg_create_logical_replication_slot(%L, %L, false, false, true)', + slot_name, plugin + ); + ELSE + remotesql := format( + 'SELECT slot_name, lsn ' + 'FROM pg_create_logical_replication_slot(%L, %L)', + slot_name, plugin + ); + END IF; IF verb THEN RAISE NOTICE '[QUERY] %', remotesql; From 5648cad7266504187ff6d1a254eacc912e3f079f Mon Sep 17 00:00:00 2001 From: Ibrar Ahmed Date: Fri, 8 May 2026 19:17:22 +0500 Subject: [PATCH 3/4] spock_sync: switch out of ErrorContext before CopyErrorData in cswp Also revert Phase 9 sync_event handshake; deadlocks against cswp. --- samples/Z0DAN/zodan.sql | 40 ++++------------------------------------ src/spock_sync.c | 13 ++++++++++++- 2 files changed, 16 insertions(+), 37 deletions(-) diff --git a/samples/Z0DAN/zodan.sql b/samples/Z0DAN/zodan.sql index e72f2528..f7da08eb 100644 --- a/samples/Z0DAN/zodan.sql +++ b/samples/Z0DAN/zodan.sql @@ -1761,10 +1761,6 @@ DECLARE rec RECORD; subscription_count integer := 0; sub_name text; - sync_lsn pg_lsn; - remotesql text; - timeout_ms integer := 180; -- 3 minutes - sync_ok text; BEGIN RAISE NOTICE 'Phase 9: Creating subscriptions from all other nodes to new node'; @@ -1789,43 +1785,15 @@ BEGIN verb -- verbose ); RAISE NOTICE ' ✓ %', rpad('Creating subscription ' || sub_name || ' on node ' || rec.node_name || '...', 120, ' '); + -- Allow the apply worker on rec.node time to come up and + -- create its slot on new_node before the next iteration + -- (and before subsequent zodan phases poke this state). + PERFORM pg_sleep(5); subscription_count := subscription_count + 1; EXCEPTION WHEN OTHERS THEN RAISE EXCEPTION ' ✗ %', rpad('Creating subscription ' || sub_name || ' on node ' || rec.node_name || ' (error: ' || SQLERRM || ')', 120, ' '); END; - - -- Failover sync handshake: emit sync_event on new (provider) and - -- wait for it on rec.node (subscriber) so that Phase 9 cannot - -- return until each existing node has actually started applying - -- from a known LSN on the new node. - BEGIN - remotesql := 'SELECT spock.sync_event();'; - IF verb THEN - RAISE NOTICE ' Remote SQL for sync_event on new node %: %', new_node_name, remotesql; - END IF; - SELECT * FROM dblink(new_node_dsn, remotesql) AS t(lsn pg_lsn) INTO sync_lsn; - RAISE NOTICE ' OK: %', rpad('Triggered sync_event on new node ' || new_node_name || ' (LSN: ' || sync_lsn || ')...', 120, ' '); - EXCEPTION - WHEN OTHERS THEN - RAISE EXCEPTION ' ✗ %', rpad('Triggering sync_event on new node ' || new_node_name || ' for ' || sub_name || ' (error: ' || SQLERRM || ')', 120, ' '); - END; - - BEGIN - remotesql := format('CALL spock.wait_for_sync_event(true, %L, %L::pg_lsn, %s, true);', - new_node_name, sync_lsn, timeout_ms); - IF verb THEN - RAISE NOTICE ' Remote SQL for wait_for_sync_event on %: %', rec.node_name, remotesql; - END IF; - SELECT * INTO sync_ok FROM dblink(rec.dsn, remotesql) AS t(result text); - IF sync_ok IS NULL OR sync_ok::boolean IS NOT TRUE THEN - RAISE EXCEPTION 'wait_for_sync_event timed out for % on node %', new_node_name, rec.node_name; - END IF; - RAISE NOTICE ' OK: %', rpad('Sync event from ' || new_node_name || ' confirmed on node ' || rec.node_name, 120, ' '); - EXCEPTION - WHEN OTHERS THEN - RAISE EXCEPTION ' ✗ %', rpad('Unable to wait for sync event from ' || new_node_name || ' on node ' || rec.node_name || ' (error: ' || SQLERRM || ')', 120, ' '); - END; END LOOP; IF subscription_count = 0 THEN diff --git a/src/spock_sync.c b/src/spock_sync.c index 99a97aaa..50d16971 100644 --- a/src/spock_sync.c +++ b/src/spock_sync.c @@ -1483,7 +1483,17 @@ spock_sync_subscription(SpockSubscription *sub) } PG_CATCH(); { - ErrorData *edata = CopyErrorData(); + MemoryContext savecxt; + ErrorData *edata; + + /* + * CopyErrorData() requires that we are NOT running in + * ErrorContext, otherwise its assertion in elog.c trips on + * cassert builds and the apply worker dies with SIGABRT. + * Switch into our long-lived sync context first. + */ + savecxt = MemoryContextSwitchTo(myctx); + edata = CopyErrorData(); FlushErrorState(); elog(LOG, "SPOCK cswp error sub=%s slot=%s: %s", @@ -1502,6 +1512,7 @@ spock_sync_subscription(SpockSubscription *sub) } FreeErrorData(edata); + MemoryContextSwitchTo(savecxt); PG_RE_THROW(); } PG_END_TRY(); From ec3b18a9f5a7512048917b344651e59989760e76 Mon Sep 17 00:00:00 2001 From: Ibrar Ahmed Date: Fri, 8 May 2026 20:11:20 +0500 Subject: [PATCH 4/4] Failover test added. --- tests/tap/t/018_failover_slots.pl | 64 +++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/tests/tap/t/018_failover_slots.pl b/tests/tap/t/018_failover_slots.pl index 3f1a0256..e0417290 100644 --- a/tests/tap/t/018_failover_slots.pl +++ b/tests/tap/t/018_failover_slots.pl @@ -390,6 +390,70 @@ sub wait_until { }); ok($data_ok, 'Row (1, before_failover) replicated n1 -> n2 before failover'); +# ========================================================================== +# 14b. REGRESSION: read-only standby is queryable while spock is loaded +# +# A customer reported that after enabling spock with logical slot failover, +# the hot_standby could not be queried — basic SELECTs failed because of +# spock interactions on a recovery backend. Re-running the full +# slot-failover dance is not enough; we need explicit assertions that the +# standby answers user SELECT, spock catalog SELECT, and pg_replication_slots +# while it's still in recovery. Without these checks a future regression +# could quietly reintroduce the same bug. +# ========================================================================== + +# Wait for the standby to apply the row we just wrote on n1. +my $primary_wal_lsn = scalar_query(1, "SELECT pg_current_wal_lsn()"); +$primary_wal_lsn =~ s/\s+//g; +my $standby_caught_up = wait_until(60, 2, sub { + my $rl = qport($pg_bin, $host, $standby_port, $dbname, $db_user, + "SELECT pg_last_wal_replay_lsn() >= '$primary_wal_lsn'::pg_lsn"); + $rl =~ s/\s+//g; + return $rl eq 't'; +}); +ok($standby_caught_up, + "Standby applied WAL up to primary lsn $primary_wal_lsn"); + +# Standby must still be in recovery — confirms hot_standby mode and that +# no spock hook accidentally took the standby out of recovery. +my $still_in_recovery = qport($pg_bin, $host, $standby_port, + $dbname, $db_user, "SELECT pg_is_in_recovery()"); +$still_in_recovery =~ s/\s+//g; +is($still_in_recovery, 't', + 'Read-only standby is still in recovery (hot_standby mode)'); + +# 1) User-table SELECT against the standby returns the committed row. +my $val_on_standby = qport($pg_bin, $host, $standby_port, + $dbname, $db_user, "SELECT val FROM failover_test WHERE id = 1"); +$val_on_standby =~ s/\s+//g; +is($val_on_standby, 'before_failover', + 'Read-only standby returns committed user data (SELECT works)'); + +# 2) Spock catalog SELECT against the standby — the original customer +# failure mode was that spock.* reads errored out on a recovery backend. +my $standby_node_count = qport($pg_bin, $host, $standby_port, + $dbname, $db_user, "SELECT count(*) FROM spock.node"); +$standby_node_count =~ s/\s+//g; +ok(($standby_node_count =~ /^\d+$/) && $standby_node_count >= 1, + "Read-only standby returns spock.node ($standby_node_count rows)"); + +# 3) The synced logical slot is visible on the standby. +my $standby_slot_count = qport($pg_bin, $host, $standby_port, + $dbname, $db_user, + "SELECT count(*) FROM pg_replication_slots WHERE slot_name = '$slot_name'"); +$standby_slot_count =~ s/\s+//g; +is($standby_slot_count, '1', + "Read-only standby returns synced slot '$slot_name' via pg_replication_slots"); + +# 4) Writes are rejected — the standby must remain read-only. +my $write_rc = system( + "$pg_bin/psql -X -h $host -p $standby_port -d $dbname -U $db_user " + . "-v ON_ERROR_STOP=1 " + . "-c \"INSERT INTO failover_test VALUES (999, 'must_fail')\" " + . ">/dev/null 2>&1"); +isnt($write_rc, 0, + 'Write against read-only standby is rejected (read-only enforced)'); + # ========================================================================== # 15. Verify invalidation_reason is NULL (slot is healthy on standby) # ==========================================================================