Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
dd2335d
Fix segfault in Postgres 18.2
mason-sharp Feb 8, 2026
aeb4d12
Require dbname value in a DSN string.
danolivo Nov 11, 2025
018412e
Block zodan if lolor is installed on new node (#245)
mason-sharp Oct 27, 2025
0c4f40c
SPOC-307: Add lolor check to zodan.py health-check (#260)
mason-sharp Nov 14, 2025
c04914e
Plain Z0DAN SYNC test for the 2+1 node configuration.
danolivo Nov 4, 2025
334a71d
Bug with not using source node to wait on sync (#281)
mason-sharp Dec 9, 2025
669a30c
Simplify the Z0DAN add_node process.
danolivo Dec 10, 2025
41dfdb1
Harmonize zodan.sql and zodan.py
mason-sharp Jan 6, 2026
b12d094
Cleanup the Z0DAN add_node script.
danolivo Dec 10, 2025
c7c4ee2
Fix Z0DAN sync origin advancement to use slot creation LSN
rasifr Jan 5, 2026
d007128
Port Z0DAN sync LSN fix to Python script
rasifr Jan 7, 2026
f3c3bca
zodan.sql - add subscription health check (Spoc 294). (#266)
danolivo Jan 23, 2026
44fcdda
Bugfix: Z0DAN should unify approach to build slot names by calling bu…
danolivo Jan 23, 2026
a7d2a40
Comment out 012 in schedule
mason-sharp Feb 7, 2026
73bba17
Remove sql/spock--5.0.4--5.0.5.sql
mason-sharp Feb 8, 2026
1e9e9a1
Avoid changing C files for 5.0.5
mason-sharp Feb 8, 2026
a762254
Fix progress tracking
mason-sharp Feb 8, 2026
96a81ed
Address codacy strlen complaint
mason-sharp Feb 8, 2026
fcfecbd
Prepare v5.0.5
mason-sharp Feb 8, 2026
7f4420e
Update init_1.out for 5.0.5
mason-sharp Feb 9, 2026
528fe90
Arrange patch 010 to satisfy back-ported commit aae05622a7c "Prevent …
danolivo Feb 10, 2026
3c42150
SPOC-442: Use NULL for unknown local_origin instead of -1
rasifr Feb 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ REGRESS = preseed infofuncs init_fail init preseed_check basic conflict_secondar
toasted replication_set matview bidirectional primary_key \
interfaces foreign_key copy sequence triggers parallel functions row_filter \
row_filter_sampling att_list column_filter apply_delay \
extended node_origin_cascade multiple_upstreams tuple_origin autoddl \
extended progress_tracking node_origin_cascade multiple_upstreams tuple_origin autoddl \
drop

# The following test cases are disabled while developing.
Expand Down
8 changes: 7 additions & 1 deletion docs/spock_release_notes.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Spock Release Notes

## Spock 5.0.5 on Feb 12, 2026

* Fix segfault that occurs when using new Postgres minor releases like 18.2.
* Zero Downtime Add Node (Zodan) minor bug fixes and improvements
* Updated documentation


## Spock 5.0.4 on Oct 8, 2025

* Reduce memory usage for transactions with many inserts.
Expand All @@ -17,7 +24,6 @@
- Fix bug where spock incorrectly outputs a message that DDL was replicated when a transaction is executing in repair mode.



## v5.0.3 on Sep 26, 2025

* Spock 5.0.3 adds support for Postgres 18.
Expand Down
4 changes: 2 additions & 2 deletions include/spock.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
#include "spock_fe.h"
#include "spock_node.h"

#define SPOCK_VERSION "5.0.4"
#define SPOCK_VERSION_NUM 50004
#define SPOCK_VERSION "5.0.5"
#define SPOCK_VERSION_NUM 50005

#define EXTENSION_NAME "spock"

Expand Down
123 changes: 62 additions & 61 deletions patches/15/pg15-010-allow_logical_decoding_on_standbys.diff
Original file line number Diff line number Diff line change
Expand Up @@ -247,77 +247,78 @@ index 466f30c22d..b67ad3abe4 100644
slot = MyReplicationSlot;

diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 80d96db8eb..2d5b2945ed 100644
index 1f4aad52c4a..623967e4fa1 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -40,6 +40,7 @@
#include <sys/stat.h>

#include "access/transam.h"
+#include "access/xlogrecovery.h"
#include "access/xlog_internal.h"
#include "common/string.h"
#include "miscadmin.h"
@@ -1174,37 +1175,28 @@ ReplicationSlotReserveWal(void)
/*
* For logical slots log a standby snapshot and start logical decoding
* at exactly that position. That allows the slot to start up more
- * quickly.
+ * quickly. But on a standby we cannot do WAL writes, so just use the
+ * replay pointer; effectively, an attempt to create a logical slot on
+ * standby will cause it to wait for an xl_running_xact record to be
+ * logged independently on the primary, so that a snapshot can be
+ * built using the record.
*
- * That's not needed (or indeed helpful) for physical slots as they'll
- * start replay at the last logged checkpoint anyway. Instead return
- * the location of the last redo LSN. While that slightly increases
- * the chance that we have to retry, it's where a base backup has to
- * start replay at.
+ * None of this is needed (or indeed helpful) for physical slots as
+ * they'll start replay at the last logged checkpoint anyway. Instead
+ * return the location of the last redo LSN. While that slightly
+ * increases the chance that we have to retry, it's where a base
+ * backup has to start replay at.
*/
- if (!RecoveryInProgress() && SlotIsLogical(slot))
- {
- XLogRecPtr flushptr;
@@ -1238,37 +1239,29 @@ ReplicationSlotReserveWal(void)
LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);

/*
- * For logical slots log a standby snapshot and start logical decoding at
- * exactly that position. That allows the slot to start up more quickly.
+ * For logical slots log a standby snapshot and start logical decoding
+ * at exactly that position. That allows the slot to start up more
+ * quickly. But on a standby we cannot do WAL writes, so just use the
+ * replay pointer; effectively, an attempt to create a logical slot on
+ * standby will cause it to wait for an xl_running_xact record to be
+ * logged independently on the primary, so that a snapshot can be
+ * built using the record.
*
- * That's not needed (or indeed helpful) for physical slots as they'll
- * start replay at the last logged checkpoint anyway. Instead return the
- * location of the last redo LSN, where a base backup has to start replay
- * at.
+ * None of this is needed (or indeed helpful) for physical slots as
+ * they'll start replay at the last logged checkpoint anyway. Instead
+ * return the location of the last redo LSN, where a base backup has
+ * to start replay at.
*/
- if (!RecoveryInProgress() && SlotIsLogical(slot))
- {
- XLogRecPtr flushptr;
-
- /* start at current insert position */
+ if (SlotIsPhysical(slot))
+ restart_lsn = GetRedoRecPtr();
+ else if (RecoveryInProgress())
+ restart_lsn = GetXLogReplayRecPtr(NULL);
+ else
restart_lsn = GetXLogInsertRecPtr();
- SpinLockAcquire(&slot->mutex);
- slot->data.restart_lsn = restart_lsn;
- SpinLockRelease(&slot->mutex);
- /* start at current insert position */
+ if (SlotIsPhysical(slot))
+ restart_lsn = GetRedoRecPtr();
+ else if (RecoveryInProgress())
+ restart_lsn = GetXLogReplayRecPtr(NULL);
+ else
restart_lsn = GetXLogInsertRecPtr();
- SpinLockAcquire(&slot->mutex);
- slot->data.restart_lsn = restart_lsn;
- SpinLockRelease(&slot->mutex);
-
- /* make sure we have enough information to start */
- flushptr = LogStandbySnapshot();
- /* and make sure it's fsynced to disk */
- XLogFlush(flushptr);
- }
- else
- {
- restart_lsn = GetRedoRecPtr();
- SpinLockAcquire(&slot->mutex);
- slot->data.restart_lsn = restart_lsn;
- SpinLockRelease(&slot->mutex);
- }
+ SpinLockAcquire(&slot->mutex);
+ slot->data.restart_lsn = restart_lsn;
+ SpinLockRelease(&slot->mutex);
/* prevent WAL removal as fast as possible */
ReplicationSlotsComputeRequiredLSN();
@@ -1220,6 +1212,17 @@ ReplicationSlotReserveWal(void)
if (XLogGetLastRemovedSegno() < segno)
break;
}
- /* make sure we have enough information to start */
- flushptr = LogStandbySnapshot();

- /* and make sure it's fsynced to disk */
- XLogFlush(flushptr);
- }
- else
- {
- restart_lsn = GetRedoRecPtr();
- SpinLockAcquire(&slot->mutex);
- slot->data.restart_lsn = restart_lsn;
- SpinLockRelease(&slot->mutex);
- }
+ SpinLockAcquire(&slot->mutex);
+ slot->data.restart_lsn = restart_lsn;
+ SpinLockRelease(&slot->mutex);

/* prevent WAL removal as fast as possible */
ReplicationSlotsComputeRequiredLSN();
@@ -1280,6 +1273,17 @@ ReplicationSlotReserveWal(void)
NameStr(slot->data.name));

LWLockRelease(ReplicationSlotAllocationLock);
+
+ if (!RecoveryInProgress() && SlotIsLogical(slot))
+ {
Expand All @@ -330,7 +331,7 @@ index 80d96db8eb..2d5b2945ed 100644
+ XLogFlush(flushptr);
+ }
}

/*
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 269914bce2..3c39fe20cb 100644
Expand Down
4 changes: 4 additions & 0 deletions samples/Z0DAN/n1.pgb
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
\set aid random(1, 50000)

UPDATE pgbench_accounts SET abalance = abalance + :aid WHERE aid = :aid;
UPDATE pgbench_accounts SET abalance = abalance - :aid WHERE aid = :aid;
4 changes: 4 additions & 0 deletions samples/Z0DAN/n2.pgb
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
\set aid random(50001, 100000)

UPDATE pgbench_accounts SET abalance = abalance + :aid WHERE aid = :aid;
UPDATE pgbench_accounts SET abalance = abalance - :aid WHERE aid = :aid;
98 changes: 98 additions & 0 deletions samples/Z0DAN/wait_subscription.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
CREATE OR REPLACE FUNCTION wait_subscription(
remote_node_name Name,
report_it Boolean DEFAULT false,
timeout Interval DEFAULT '0 second',
delay Real DEFAULT 1.
) RETURNS bigint AS $$
DECLARE
state Record;
lag Bigint := 1;
end_time Timestamp := 'infinity';
time_remained Interval;
local_node_name Name;
wal_sender_timeout Bigint;
prev_received_lsn pg_lsn := '0/0'::pg_lsn;
BEGIN
-- spock.local_node.node_id->spock.node(node_id -> node_name)
SELECT node_name FROM spock.node
WHERE node_id = (SELECT node_id FROM spock.local_node)
INTO local_node_name;

SELECT EXTRACT(epoch
FROM (
SELECT (current_setting('wal_sender_timeout')::Interval))
)::real * 1024
INTO wal_sender_timeout;

-- Calculate the End Time, if requested.
IF timeout > '0 second' THEN
SELECT now() + timeout INTO end_time;
END IF;
-- SELECT EXTRACT(epoch FROM my_interval)/3600
WHILE lag > 0 LOOP

SELECT end_time - clock_timestamp() INTO time_remained;
IF time_remained < '0 second' THEN
RETURN state.lag;
END IF;

-- NOTE: Remember, an apply group may contain more than a single worker.
SELECT
MAX(remote_insert_lsn) AS remote_write_lsn,
MAX(received_lsn) AS received_lsn
FROM spock.lag_tracker
WHERE origin_name = remote_node_name AND receiver_name = local_node_name
INTO state;

-- Special case: nothing arrived yet
IF (state.received_lsn = '0/0'::pg_lsn) THEN
IF report_it = true THEN
raise NOTICE 'Replication % -> %: waiting WAL ... . Time remained: % (HH24:MI:SS)',
remote_node_name, local_node_name,
to_char(time_remained, 'HH24:MI:SS');
END IF;
PERFORM pg_sleep(delay);
CONTINUE;
END IF;

-- Special case: No transactions has been executed on the remote yet.
IF (state.remote_write_lsn = '0/0'::pg_lsn) THEN
IF report_it = true THEN
raise NOTICE 'Replication % -> %: waiting anything substantial ... Received LSN: %. Time remained: % (HH24:MI:SS)',
remote_node_name, local_node_name, state.received_lsn,
to_char(time_remained, 'HH24:MI:SS');
PERFORM pg_sleep(delay);
CONTINUE;
END IF;

-- Check any progress
IF (state.received_lsn = prev_received_lsn) THEN
raise EXCEPTION 'Replication % -> %: publisher seems get stuck into something',
remote_node_name, local_node_name;
END IF;

-- We have a progress, wait further.
prev_received_lsn = state.received_lsn;
-- To be sure we get a 'keepalive' message
PERFORM pg_sleep(wal_sender_timeout * 2);

PERFORM pg_sleep(delay);
CONTINUE;
END IF;

SELECT MAX(remote_insert_lsn - received_lsn) FROM spock.lag_tracker
WHERE origin_name = remote_node_name AND receiver_name = local_node_name
INTO lag;

IF report_it = true THEN
raise NOTICE 'Replication % -> %: current lag % MB, Time remained: % (HH24:MI:SS)',
remote_node_name, local_node_name, lag/1024/1024,
to_char(time_remained, 'HH24:MI:SS');
END IF;

PERFORM pg_sleep(delay);
END LOOP;

RETURN lag;
END
$$ LANGUAGE plpgsql VOLATILE;
Loading
Loading