From d2fefc7f59cfd4773dc2f84e1258a011d8e01ab3 Mon Sep 17 00:00:00 2001 From: Mason Sharp Date: Sun, 5 Apr 2026 19:34:56 -0700 Subject: [PATCH 01/11] Add regression tests for read-only mode (spock.readonly GUC) Cover all three modes (off, local, all), superuser exemption, backward-compatible 'user' alias, user-imposed transaction_read_only preservation, and ALTER SYSTEM with separate non-superuser connection. Co-Authored-By: Claude Opus 4.6 (1M context) --- Makefile | 2 +- tests/regress/expected/read_only.out | 223 +++++++++++++++++++++++++++ tests/regress/sql/read_only.sql | 146 ++++++++++++++++++ 3 files changed, 370 insertions(+), 1 deletion(-) create mode 100644 tests/regress/expected/read_only.out create mode 100644 tests/regress/sql/read_only.sql diff --git a/Makefile b/Makefile index 0e0917c3..772daa4b 100644 --- a/Makefile +++ b/Makefile @@ -56,7 +56,7 @@ REGRESS = preseed infofuncs init_fail init preseed_check basic conflict_secondar interfaces foreign_key copy sequence triggers parallel functions row_filter \ row_filter_sampling att_list column_filter apply_delay \ extended node_origin_cascade multiple_upstreams tuple_origin autoddl \ - sync_event sync_table generated_columns spill_transaction drop + sync_event sync_table generated_columns spill_transaction read_only drop # The following test cases are disabled while developing. # diff --git a/tests/regress/expected/read_only.out b/tests/regress/expected/read_only.out new file mode 100644 index 00000000..1d31e599 --- /dev/null +++ b/tests/regress/expected/read_only.out @@ -0,0 +1,223 @@ +-- Tests for read-only mode (spock.readonly GUC) +-- Verifies three modes (off, local, all), superuser exemption, +-- backward-compatible 'user' alias, and user-imposed transaction_read_only. +SELECT * FROM spock_regress_variables() +\gset +\c :provider_dsn +-- Store superuser name for reconnections after user switching +SELECT current_user AS regress_su +\gset +-- Setup: test table with grants for nonsuper +CREATE TABLE public.ro_test (id int primary key, data text); +GRANT ALL ON public.ro_test TO nonsuper; +-- Verify default state is 'off' +SHOW spock.readonly; + spock.readonly +---------------- + off +(1 row) + +--- +--- Test: non-superuser can write when readonly is off +--- +SET ROLE nonsuper; +INSERT INTO ro_test VALUES (1, 'nonsuper writes when off'); +RESET ROLE; +SELECT count(*) FROM ro_test; + count +------- + 1 +(1 row) + +--- +--- Test: set readonly to 'local', superuser can still write +--- +SET spock.readonly = 'local'; +SHOW spock.readonly; + spock.readonly +---------------- + local +(1 row) + +INSERT INTO ro_test VALUES (2, 'superuser writes in local'); +SELECT count(*) FROM ro_test; + count +------- + 2 +(1 row) + +--- +--- Test: non-superuser blocked from DML and DDL in 'local' mode +--- +\set VERBOSITY terse +SET ROLE nonsuper; +-- INSERT blocked +INSERT INTO ro_test VALUES (3, 'should fail'); +ERROR: cannot execute INSERT in a read-only transaction +-- UPDATE blocked +UPDATE ro_test SET data = 'fail' WHERE id = 1; +ERROR: cannot execute UPDATE in a read-only transaction +-- DELETE blocked +DELETE FROM ro_test WHERE id = 1; +ERROR: cannot execute DELETE in a read-only transaction +-- TRUNCATE blocked +TRUNCATE ro_test; +ERROR: cannot execute TRUNCATE TABLE in a read-only transaction +-- DDL blocked +CREATE TABLE public.ro_fail (id int); +ERROR: cannot execute CREATE TABLE in a read-only transaction +-- Non-superuser can still SELECT +SELECT count(*) FROM ro_test; + count +------- + 2 +(1 row) + +-- Non-superuser cannot change spock.readonly +SET spock.readonly = 'off'; +ERROR: permission denied to set parameter "spock.readonly" +RESET ROLE; +\set VERBOSITY default +--- +--- Test: backward-compatible 'user' alias maps to 'local' +--- +SET spock.readonly = 'user'; +SHOW spock.readonly; + spock.readonly +---------------- + local +(1 row) + +\set VERBOSITY terse +SET ROLE nonsuper; +INSERT INTO ro_test VALUES (4, 'fail user alias'); +ERROR: cannot execute INSERT in a read-only transaction +RESET ROLE; +\set VERBOSITY default +--- +--- Test: reset to 'off', non-superuser can write again +--- +SET spock.readonly = 'off'; +SHOW spock.readonly; + spock.readonly +---------------- + off +(1 row) + +SET ROLE nonsuper; +INSERT INTO ro_test VALUES (3, 'nonsuper after reset'); +RESET ROLE; +SELECT count(*) FROM ro_test; + count +------- + 3 +(1 row) + +--- +--- Test: 'all' mode - non-superuser blocked, superuser exempt +--- +SET spock.readonly = 'all'; +SHOW spock.readonly; + spock.readonly +---------------- + all +(1 row) + +-- Superuser can write in 'all' mode +INSERT INTO ro_test VALUES (4, 'superuser in all'); +-- Non-superuser blocked +\set VERBOSITY terse +SET ROLE nonsuper; +INSERT INTO ro_test VALUES (5, 'fail all mode'); +ERROR: cannot execute INSERT in a read-only transaction +RESET ROLE; +\set VERBOSITY default +SELECT count(*) FROM ro_test; + count +------- + 4 +(1 row) + +--- +--- Test: user-imposed transaction_read_only is respected when spock.readonly = off +--- +SET spock.readonly = 'off'; +\set VERBOSITY terse +BEGIN; +SET LOCAL transaction_read_only = on; +INSERT INTO ro_test VALUES (5, 'fail user readonly'); +ERROR: cannot execute INSERT in a read-only transaction +ROLLBACK; +\set VERBOSITY default +--- +--- Test: ALTER SYSTEM with separate non-superuser connection +--- +ALTER SYSTEM SET spock.readonly = 'local'; +SELECT pg_reload_conf(); + pg_reload_conf +---------------- + t +(1 row) + +SELECT pg_sleep(0.5); + pg_sleep +---------- + +(1 row) + +-- New connection as nonsuper inherits system-level setting +\c regression nonsuper +SHOW spock.readonly; + spock.readonly +---------------- + local +(1 row) + +\set VERBOSITY terse +INSERT INTO ro_test VALUES (5, 'fail new conn'); +ERROR: cannot execute INSERT in a read-only transaction +\set VERBOSITY default +SELECT count(*) FROM ro_test; + count +------- + 4 +(1 row) + +-- Superuser not blocked +\c regression :regress_su +INSERT INTO ro_test VALUES (5, 'superuser alter system'); +SELECT count(*) FROM ro_test; + count +------- + 5 +(1 row) + +-- Reset system setting +ALTER SYSTEM RESET spock.readonly; +SELECT pg_reload_conf(); + pg_reload_conf +---------------- + t +(1 row) + +SELECT pg_sleep(0.5); + pg_sleep +---------- + +(1 row) + +-- Non-superuser can write after ALTER SYSTEM RESET +\c regression nonsuper +INSERT INTO ro_test VALUES (6, 'nonsuper after sys reset'); +SELECT count(*) FROM ro_test; + count +------- + 6 +(1 row) + +--- +--- Cleanup +--- +\c regression :regress_su +SET spock.readonly = 'off'; +DROP TABLE public.ro_test; diff --git a/tests/regress/sql/read_only.sql b/tests/regress/sql/read_only.sql new file mode 100644 index 00000000..4b82eccb --- /dev/null +++ b/tests/regress/sql/read_only.sql @@ -0,0 +1,146 @@ +-- Tests for read-only mode (spock.readonly GUC) +-- Verifies three modes (off, local, all), superuser exemption, +-- backward-compatible 'user' alias, and user-imposed transaction_read_only. +SELECT * FROM spock_regress_variables() +\gset + +\c :provider_dsn +-- Store superuser name for reconnections after user switching +SELECT current_user AS regress_su +\gset + +-- Setup: test table with grants for nonsuper +CREATE TABLE public.ro_test (id int primary key, data text); +GRANT ALL ON public.ro_test TO nonsuper; + +-- Verify default state is 'off' +SHOW spock.readonly; + +--- +--- Test: non-superuser can write when readonly is off +--- +SET ROLE nonsuper; +INSERT INTO ro_test VALUES (1, 'nonsuper writes when off'); +RESET ROLE; +SELECT count(*) FROM ro_test; + +--- +--- Test: set readonly to 'local', superuser can still write +--- +SET spock.readonly = 'local'; +SHOW spock.readonly; + +INSERT INTO ro_test VALUES (2, 'superuser writes in local'); +SELECT count(*) FROM ro_test; + +--- +--- Test: non-superuser blocked from DML and DDL in 'local' mode +--- +\set VERBOSITY terse +SET ROLE nonsuper; +-- INSERT blocked +INSERT INTO ro_test VALUES (3, 'should fail'); +-- UPDATE blocked +UPDATE ro_test SET data = 'fail' WHERE id = 1; +-- DELETE blocked +DELETE FROM ro_test WHERE id = 1; +-- TRUNCATE blocked +TRUNCATE ro_test; +-- DDL blocked +CREATE TABLE public.ro_fail (id int); +-- Non-superuser can still SELECT +SELECT count(*) FROM ro_test; +-- Non-superuser cannot change spock.readonly +SET spock.readonly = 'off'; +RESET ROLE; +\set VERBOSITY default + +--- +--- Test: backward-compatible 'user' alias maps to 'local' +--- +SET spock.readonly = 'user'; +SHOW spock.readonly; + +\set VERBOSITY terse +SET ROLE nonsuper; +INSERT INTO ro_test VALUES (4, 'fail user alias'); +RESET ROLE; +\set VERBOSITY default + +--- +--- Test: reset to 'off', non-superuser can write again +--- +SET spock.readonly = 'off'; +SHOW spock.readonly; + +SET ROLE nonsuper; +INSERT INTO ro_test VALUES (3, 'nonsuper after reset'); +RESET ROLE; +SELECT count(*) FROM ro_test; + +--- +--- Test: 'all' mode - non-superuser blocked, superuser exempt +--- +SET spock.readonly = 'all'; +SHOW spock.readonly; + +-- Superuser can write in 'all' mode +INSERT INTO ro_test VALUES (4, 'superuser in all'); + +-- Non-superuser blocked +\set VERBOSITY terse +SET ROLE nonsuper; +INSERT INTO ro_test VALUES (5, 'fail all mode'); +RESET ROLE; +\set VERBOSITY default + +SELECT count(*) FROM ro_test; + +--- +--- Test: user-imposed transaction_read_only is respected when spock.readonly = off +--- +SET spock.readonly = 'off'; +\set VERBOSITY terse +BEGIN; +SET LOCAL transaction_read_only = on; +INSERT INTO ro_test VALUES (5, 'fail user readonly'); +ROLLBACK; +\set VERBOSITY default + +--- +--- Test: ALTER SYSTEM with separate non-superuser connection +--- +ALTER SYSTEM SET spock.readonly = 'local'; +SELECT pg_reload_conf(); +SELECT pg_sleep(0.5); + +-- New connection as nonsuper inherits system-level setting +\c regression nonsuper +SHOW spock.readonly; + +\set VERBOSITY terse +INSERT INTO ro_test VALUES (5, 'fail new conn'); +\set VERBOSITY default +SELECT count(*) FROM ro_test; + +-- Superuser not blocked +\c regression :regress_su +INSERT INTO ro_test VALUES (5, 'superuser alter system'); +SELECT count(*) FROM ro_test; + +-- Reset system setting +ALTER SYSTEM RESET spock.readonly; +SELECT pg_reload_conf(); +SELECT pg_sleep(0.5); + +-- Non-superuser can write after ALTER SYSTEM RESET +\c regression nonsuper +INSERT INTO ro_test VALUES (6, 'nonsuper after sys reset'); +SELECT count(*) FROM ro_test; + +--- +--- Cleanup +--- +\c regression :regress_su +SET spock.readonly = 'off'; +DROP TABLE public.ro_test; From eae0f80367d8cd9c93890e35fe979b7217c6ac7e Mon Sep 17 00:00:00 2001 From: Mason Sharp Date: Mon, 6 Apr 2026 15:50:08 -0700 Subject: [PATCH 02/11] Expand read-only test coverage per PR review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Utility commands: verify VACUUM and ANALYZE pass through (native Postgres behavior, no PreventCommandIfReadOnly for maintenance ops) - SECURITY DEFINER/INVOKER: both blocked — XactReadOnly is a transaction-level flag, SECURITY DEFINER only switches effective user for permission checks (also native Postgres behavior) - Replication: apply worker continues in 'local' mode, pauses gracefully in 'all' mode, resumes after reset with no data loss - Auto DDL: blocked DDL not replicated when read-only is active - repair_mode(): superuser can make local-only writes that are not replicated to subscribers - Remove pg_sleep after pg_reload_conf in ALTER SYSTEM tests - Change test table ownership to nonsuper for VACUUM/ANALYZE tests Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/regress/expected/read_only.out | 286 +++++++++++++++++++++++++-- tests/regress/sql/read_only.sql | 190 +++++++++++++++++- 2 files changed, 454 insertions(+), 22 deletions(-) diff --git a/tests/regress/expected/read_only.out b/tests/regress/expected/read_only.out index 1d31e599..119ad5be 100644 --- a/tests/regress/expected/read_only.out +++ b/tests/regress/expected/read_only.out @@ -1,15 +1,16 @@ -- Tests for read-only mode (spock.readonly GUC) -- Verifies three modes (off, local, all), superuser exemption, --- backward-compatible 'user' alias, and user-imposed transaction_read_only. +-- backward-compatible 'user' alias, utility commands, SECURITY DEFINER, +-- user-imposed transaction_read_only, and replication behavior. SELECT * FROM spock_regress_variables() \gset \c :provider_dsn -- Store superuser name for reconnections after user switching SELECT current_user AS regress_su \gset --- Setup: test table with grants for nonsuper +-- Setup: test table owned by nonsuper (allows VACUUM/ANALYZE tests) CREATE TABLE public.ro_test (id int primary key, data text); -GRANT ALL ON public.ro_test TO nonsuper; +ALTER TABLE public.ro_test OWNER TO nonsuper; -- Verify default state is 'off' SHOW spock.readonly; spock.readonly @@ -66,6 +67,14 @@ ERROR: cannot execute TRUNCATE TABLE in a read-only transaction -- DDL blocked CREATE TABLE public.ro_fail (id int); ERROR: cannot execute CREATE TABLE in a read-only transaction +--- +--- Test: utility commands allowed in 'local' mode (still as nonsuper) +--- Spock sets XactReadOnly = true and delegates enforcement entirely to +--- Postgres. These commands are allowed because Postgres does not call +--- PreventCommandIfReadOnly() for maintenance operations. +--- +VACUUM ro_test; +ANALYZE ro_test; -- Non-superuser can still SELECT SELECT count(*) FROM ro_test; count @@ -139,9 +148,60 @@ SELECT count(*) FROM ro_test; (1 row) --- ---- Test: user-imposed transaction_read_only is respected when spock.readonly = off +--- Test: SECURITY DEFINER function does NOT bypass read-only +--- This is native Postgres behavior: XactReadOnly is a transaction-level +--- flag, and SECURITY DEFINER only switches the effective user for +--- permission checks — it does not change the transaction's read-only +--- state. Both SECURITY DEFINER and SECURITY INVOKER are blocked. --- +SET spock.readonly = 'local'; +CREATE OR REPLACE FUNCTION public.ro_secdef_write() +RETURNS void LANGUAGE plpgsql SECURITY DEFINER AS $$ +BEGIN + INSERT INTO ro_test VALUES (99, 'security definer write'); +END; +$$; +GRANT EXECUTE ON FUNCTION public.ro_secdef_write() TO nonsuper; +-- SECURITY DEFINER: still blocked despite definer being superuser +SET ROLE nonsuper; +\set VERBOSITY terse +SELECT public.ro_secdef_write(); +ERROR: cannot execute INSERT in a read-only transaction +\set VERBOSITY default +RESET ROLE; +SELECT count(*) FROM ro_test WHERE id = 99; + count +------- + 0 +(1 row) + +-- SECURITY INVOKER: also blocked (caller is non-superuser) +CREATE OR REPLACE FUNCTION public.ro_secinv_write() +RETURNS void LANGUAGE plpgsql SECURITY INVOKER AS $$ +BEGIN + INSERT INTO ro_test VALUES (98, 'security invoker write'); +END; +$$; +GRANT EXECUTE ON FUNCTION public.ro_secinv_write() TO nonsuper; +SET ROLE nonsuper; +\set VERBOSITY terse +SELECT public.ro_secinv_write(); +ERROR: cannot execute INSERT in a read-only transaction +\set VERBOSITY default +RESET ROLE; +SELECT count(*) FROM ro_test WHERE id = 98; + count +------- + 0 +(1 row) + +-- Cleanup +DROP FUNCTION public.ro_secdef_write(); +DROP FUNCTION public.ro_secinv_write(); SET spock.readonly = 'off'; +--- +--- Test: user-imposed transaction_read_only is respected when spock.readonly = off +--- \set VERBOSITY terse BEGIN; SET LOCAL transaction_read_only = on; @@ -159,12 +219,6 @@ SELECT pg_reload_conf(); t (1 row) -SELECT pg_sleep(0.5); - pg_sleep ----------- - -(1 row) - -- New connection as nonsuper inherits system-level setting \c regression nonsuper SHOW spock.readonly; @@ -200,12 +254,6 @@ SELECT pg_reload_conf(); t (1 row) -SELECT pg_sleep(0.5); - pg_sleep ----------- - -(1 row) - -- Non-superuser can write after ALTER SYSTEM RESET \c regression nonsuper INSERT INTO ro_test VALUES (6, 'nonsuper after sys reset'); @@ -215,6 +263,212 @@ SELECT count(*) FROM ro_test; 6 (1 row) +--- +--- Test: replication still works when subscriber is in 'local' mode +--- +-- Create a replicated table for testing +\c regression :regress_su +SELECT spock.replicate_ddl($$ + CREATE TABLE public.ro_repl_test (id int primary key, data text); +$$); + replicate_ddl +--------------- + t +(1 row) + +SELECT * FROM spock.repset_add_table('default', 'ro_repl_test'); + repset_add_table +------------------ + t +(1 row) + +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); + wait_slot_confirm_lsn +----------------------- + +(1 row) + +-- Set subscriber to 'local' readonly +\c :subscriber_dsn +ALTER SYSTEM SET spock.readonly = 'local'; +SELECT pg_reload_conf(); + pg_reload_conf +---------------- + t +(1 row) + +-- Insert on provider and wait for replication +\c :provider_dsn +INSERT INTO ro_repl_test VALUES (1, 'local mode repl'); +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); + wait_slot_confirm_lsn +----------------------- + +(1 row) + +-- Data should arrive: 'local' mode does not block apply workers +\c :subscriber_dsn +SELECT * FROM ro_repl_test ORDER BY id; + id | data +----+----------------- + 1 | local mode repl +(1 row) + +--- +--- Test: auto DDL blocked in read-only mode, nothing replicated +--- +\c :provider_dsn +SET spock.enable_ddl_replication = on; +SET spock.readonly = 'local'; +-- Non-superuser tries DDL with auto DDL enabled: blocked by read-only +\set VERBOSITY terse +SET ROLE nonsuper; +CREATE TABLE public.ro_autoddl_test (id int); +ERROR: cannot execute CREATE TABLE in a read-only transaction +RESET ROLE; +\set VERBOSITY default +-- Table was never created on provider +SELECT count(*) FROM information_schema.tables + WHERE table_schema = 'public' AND table_name = 'ro_autoddl_test'; + count +------- + 0 +(1 row) + +-- Nothing replicated to subscriber +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); + wait_slot_confirm_lsn +----------------------- + +(1 row) + +\c :subscriber_dsn +SELECT count(*) FROM information_schema.tables + WHERE table_schema = 'public' AND table_name = 'ro_autoddl_test'; + count +------- + 0 +(1 row) + +--- +--- Test: repair_mode() allows local-only writes in read-only mode +--- +\c :provider_dsn +-- Provider still in readonly = 'local' from above +-- Superuser uses repair_mode for a local-only fix that won't replicate +BEGIN; +SELECT spock.repair_mode(true) \gset +INSERT INTO ro_repl_test VALUES (99, 'repair data'); +SELECT spock.repair_mode(false) \gset +COMMIT; +-- Data exists on provider +SELECT count(*) FROM ro_repl_test WHERE id = 99; + count +------- + 1 +(1 row) + +-- But NOT replicated to subscriber +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); + wait_slot_confirm_lsn +----------------------- + +(1 row) + +\c :subscriber_dsn +SELECT count(*) FROM ro_repl_test WHERE id = 99; + count +------- + 0 +(1 row) + +-- Reset provider state +\c :provider_dsn +SET spock.readonly = 'off'; +SET spock.enable_ddl_replication = off; +--- +--- Test: replication paused when subscriber is in 'all' mode +--- +ALTER SYSTEM SET spock.readonly = 'all'; +SELECT pg_reload_conf(); + pg_reload_conf +---------------- + t +(1 row) + +-- Give apply worker time to detect config change and enter readonly mode +SELECT pg_sleep(2); + pg_sleep +---------- + +(1 row) + +\c :provider_dsn +INSERT INTO ro_repl_test VALUES (2, 'all mode repl'); +-- Slot should not advance: wait with short timeout +\set VERBOSITY terse +BEGIN; +SET LOCAL statement_timeout = '5s'; +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); +ERROR: canceling statement due to statement timeout +ROLLBACK; +\set VERBOSITY default +-- Data should NOT have arrived on subscriber +\c :subscriber_dsn +SELECT * FROM ro_repl_test ORDER BY id; + id | data +----+----------------- + 1 | local mode repl +(1 row) + +--- +--- Test: replication resumes after resetting readonly +--- +ALTER SYSTEM RESET spock.readonly; +SELECT pg_reload_conf(); + pg_reload_conf +---------------- + t +(1 row) + +\c :provider_dsn +BEGIN; +SET LOCAL statement_timeout = '30s'; +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); + wait_slot_confirm_lsn +----------------------- + +(1 row) + +COMMIT; +\c :subscriber_dsn +SELECT * FROM ro_repl_test ORDER BY id; + id | data +----+----------------- + 1 | local mode repl + 2 | all mode repl +(2 rows) + +-- Cleanup replicated table and subscriber readonly state +ALTER SYSTEM RESET spock.readonly; +SELECT pg_reload_conf(); + pg_reload_conf +---------------- + t +(1 row) + +\c :provider_dsn +\set VERBOSITY terse +SELECT spock.replicate_ddl($$ + DROP TABLE public.ro_repl_test CASCADE; +$$); +NOTICE: drop cascades to table ro_repl_test membership in replication set default + replicate_ddl +--------------- + t +(1 row) + +\set VERBOSITY default --- --- Cleanup --- diff --git a/tests/regress/sql/read_only.sql b/tests/regress/sql/read_only.sql index 4b82eccb..964456e8 100644 --- a/tests/regress/sql/read_only.sql +++ b/tests/regress/sql/read_only.sql @@ -1,6 +1,7 @@ -- Tests for read-only mode (spock.readonly GUC) -- Verifies three modes (off, local, all), superuser exemption, --- backward-compatible 'user' alias, and user-imposed transaction_read_only. +-- backward-compatible 'user' alias, utility commands, SECURITY DEFINER, +-- user-imposed transaction_read_only, and replication behavior. SELECT * FROM spock_regress_variables() \gset @@ -9,9 +10,9 @@ SELECT * FROM spock_regress_variables() SELECT current_user AS regress_su \gset --- Setup: test table with grants for nonsuper +-- Setup: test table owned by nonsuper (allows VACUUM/ANALYZE tests) CREATE TABLE public.ro_test (id int primary key, data text); -GRANT ALL ON public.ro_test TO nonsuper; +ALTER TABLE public.ro_test OWNER TO nonsuper; -- Verify default state is 'off' SHOW spock.readonly; @@ -48,6 +49,16 @@ DELETE FROM ro_test WHERE id = 1; TRUNCATE ro_test; -- DDL blocked CREATE TABLE public.ro_fail (id int); + +--- +--- Test: utility commands allowed in 'local' mode (still as nonsuper) +--- Spock sets XactReadOnly = true and delegates enforcement entirely to +--- Postgres. These commands are allowed because Postgres does not call +--- PreventCommandIfReadOnly() for maintenance operations. +--- +VACUUM ro_test; +ANALYZE ro_test; + -- Non-superuser can still SELECT SELECT count(*) FROM ro_test; -- Non-superuser cannot change spock.readonly @@ -97,9 +108,54 @@ RESET ROLE; SELECT count(*) FROM ro_test; --- ---- Test: user-imposed transaction_read_only is respected when spock.readonly = off +--- Test: SECURITY DEFINER function does NOT bypass read-only +--- This is native Postgres behavior: XactReadOnly is a transaction-level +--- flag, and SECURITY DEFINER only switches the effective user for +--- permission checks — it does not change the transaction's read-only +--- state. Both SECURITY DEFINER and SECURITY INVOKER are blocked. --- +SET spock.readonly = 'local'; + +CREATE OR REPLACE FUNCTION public.ro_secdef_write() +RETURNS void LANGUAGE plpgsql SECURITY DEFINER AS $$ +BEGIN + INSERT INTO ro_test VALUES (99, 'security definer write'); +END; +$$; +GRANT EXECUTE ON FUNCTION public.ro_secdef_write() TO nonsuper; + +-- SECURITY DEFINER: still blocked despite definer being superuser +SET ROLE nonsuper; +\set VERBOSITY terse +SELECT public.ro_secdef_write(); +\set VERBOSITY default +RESET ROLE; +SELECT count(*) FROM ro_test WHERE id = 99; + +-- SECURITY INVOKER: also blocked (caller is non-superuser) +CREATE OR REPLACE FUNCTION public.ro_secinv_write() +RETURNS void LANGUAGE plpgsql SECURITY INVOKER AS $$ +BEGIN + INSERT INTO ro_test VALUES (98, 'security invoker write'); +END; +$$; +GRANT EXECUTE ON FUNCTION public.ro_secinv_write() TO nonsuper; + +SET ROLE nonsuper; +\set VERBOSITY terse +SELECT public.ro_secinv_write(); +\set VERBOSITY default +RESET ROLE; +SELECT count(*) FROM ro_test WHERE id = 98; + +-- Cleanup +DROP FUNCTION public.ro_secdef_write(); +DROP FUNCTION public.ro_secinv_write(); SET spock.readonly = 'off'; + +--- +--- Test: user-imposed transaction_read_only is respected when spock.readonly = off +--- \set VERBOSITY terse BEGIN; SET LOCAL transaction_read_only = on; @@ -112,7 +168,6 @@ ROLLBACK; --- ALTER SYSTEM SET spock.readonly = 'local'; SELECT pg_reload_conf(); -SELECT pg_sleep(0.5); -- New connection as nonsuper inherits system-level setting \c regression nonsuper @@ -131,13 +186,136 @@ SELECT count(*) FROM ro_test; -- Reset system setting ALTER SYSTEM RESET spock.readonly; SELECT pg_reload_conf(); -SELECT pg_sleep(0.5); -- Non-superuser can write after ALTER SYSTEM RESET \c regression nonsuper INSERT INTO ro_test VALUES (6, 'nonsuper after sys reset'); SELECT count(*) FROM ro_test; +--- +--- Test: replication still works when subscriber is in 'local' mode +--- + +-- Create a replicated table for testing +\c regression :regress_su +SELECT spock.replicate_ddl($$ + CREATE TABLE public.ro_repl_test (id int primary key, data text); +$$); +SELECT * FROM spock.repset_add_table('default', 'ro_repl_test'); +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); + +-- Set subscriber to 'local' readonly +\c :subscriber_dsn +ALTER SYSTEM SET spock.readonly = 'local'; +SELECT pg_reload_conf(); + +-- Insert on provider and wait for replication +\c :provider_dsn +INSERT INTO ro_repl_test VALUES (1, 'local mode repl'); +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); + +-- Data should arrive: 'local' mode does not block apply workers +\c :subscriber_dsn +SELECT * FROM ro_repl_test ORDER BY id; + +--- +--- Test: auto DDL blocked in read-only mode, nothing replicated +--- +\c :provider_dsn +SET spock.enable_ddl_replication = on; +SET spock.readonly = 'local'; + +-- Non-superuser tries DDL with auto DDL enabled: blocked by read-only +\set VERBOSITY terse +SET ROLE nonsuper; +CREATE TABLE public.ro_autoddl_test (id int); +RESET ROLE; +\set VERBOSITY default + +-- Table was never created on provider +SELECT count(*) FROM information_schema.tables + WHERE table_schema = 'public' AND table_name = 'ro_autoddl_test'; + +-- Nothing replicated to subscriber +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); +\c :subscriber_dsn +SELECT count(*) FROM information_schema.tables + WHERE table_schema = 'public' AND table_name = 'ro_autoddl_test'; + +--- +--- Test: repair_mode() allows local-only writes in read-only mode +--- +\c :provider_dsn +-- Provider still in readonly = 'local' from above +-- Superuser uses repair_mode for a local-only fix that won't replicate +BEGIN; +SELECT spock.repair_mode(true) \gset +INSERT INTO ro_repl_test VALUES (99, 'repair data'); +SELECT spock.repair_mode(false) \gset +COMMIT; + +-- Data exists on provider +SELECT count(*) FROM ro_repl_test WHERE id = 99; + +-- But NOT replicated to subscriber +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); +\c :subscriber_dsn +SELECT count(*) FROM ro_repl_test WHERE id = 99; + +-- Reset provider state +\c :provider_dsn +SET spock.readonly = 'off'; +SET spock.enable_ddl_replication = off; + +--- +--- Test: replication paused when subscriber is in 'all' mode +--- +ALTER SYSTEM SET spock.readonly = 'all'; +SELECT pg_reload_conf(); +-- Give apply worker time to detect config change and enter readonly mode +SELECT pg_sleep(2); + +\c :provider_dsn +INSERT INTO ro_repl_test VALUES (2, 'all mode repl'); + +-- Slot should not advance: wait with short timeout +\set VERBOSITY terse +BEGIN; +SET LOCAL statement_timeout = '5s'; +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); +ROLLBACK; +\set VERBOSITY default + +-- Data should NOT have arrived on subscriber +\c :subscriber_dsn +SELECT * FROM ro_repl_test ORDER BY id; + +--- +--- Test: replication resumes after resetting readonly +--- +ALTER SYSTEM RESET spock.readonly; +SELECT pg_reload_conf(); + +\c :provider_dsn +BEGIN; +SET LOCAL statement_timeout = '30s'; +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); +COMMIT; + +\c :subscriber_dsn +SELECT * FROM ro_repl_test ORDER BY id; + +-- Cleanup replicated table and subscriber readonly state +ALTER SYSTEM RESET spock.readonly; +SELECT pg_reload_conf(); + +\c :provider_dsn +\set VERBOSITY terse +SELECT spock.replicate_ddl($$ + DROP TABLE public.ro_repl_test CASCADE; +$$); +\set VERBOSITY default + --- --- Cleanup --- From 628cb65d7ad6c90f8c82c98be249895180e98943 Mon Sep 17 00:00:00 2001 From: Mason Sharp Date: Tue, 7 Apr 2026 08:08:49 -0700 Subject: [PATCH 03/11] Address PR feedback for read-only regression tests - ALTER SYSTEM for 'all' mode now runs on subscriber - Add walsender PID check to prove apply worker survives readonly (wait, not crash-and-restart) - Combine VACUUM and ANALYZE into single VACUUM ANALYZE command - Reword utility command comment to explain reasoning (no schema change, no WAL that affects replication) rather than implementation details Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/regress/expected/read_only.out | 23 ++++++++++++++++++----- tests/regress/sql/read_only.sql | 21 ++++++++++++++++----- 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/tests/regress/expected/read_only.out b/tests/regress/expected/read_only.out index 119ad5be..33a29bad 100644 --- a/tests/regress/expected/read_only.out +++ b/tests/regress/expected/read_only.out @@ -69,12 +69,12 @@ CREATE TABLE public.ro_fail (id int); ERROR: cannot execute CREATE TABLE in a read-only transaction --- --- Test: utility commands allowed in 'local' mode (still as nonsuper) ---- Spock sets XactReadOnly = true and delegates enforcement entirely to ---- Postgres. These commands are allowed because Postgres does not call ---- PreventCommandIfReadOnly() for maintenance operations. +--- VACUUM ANALYZE does not alter the schema or generate WAL that would +--- interfere with logical replication, so Postgres allows it in read-only +--- transactions. Spock delegates to Postgres here and could add further +--- restrictions if needed. --- -VACUUM ro_test; -ANALYZE ro_test; +VACUUM ANALYZE ro_test; -- Non-superuser can still SELECT SELECT count(*) FROM ro_test; count @@ -388,7 +388,13 @@ SET spock.readonly = 'off'; SET spock.enable_ddl_replication = off; --- --- Test: replication paused when subscriber is in 'all' mode +--- The apply worker should gently wait (not crash or flood logs). --- +-- Record walsender PID: if the apply worker crashes, the replication +-- connection drops and the walsender restarts with a new PID. +SELECT pid AS repl_pid FROM pg_stat_replication LIMIT 1 +\gset +\c :subscriber_dsn ALTER SYSTEM SET spock.readonly = 'all'; SELECT pg_reload_conf(); pg_reload_conf @@ -413,6 +419,13 @@ SELECT spock.wait_slot_confirm_lsn(NULL, NULL); ERROR: canceling statement due to statement timeout ROLLBACK; \set VERBOSITY default +-- Same walsender PID proves the apply worker stayed alive (gentle wait) +SELECT :repl_pid = pid AS worker_survived FROM pg_stat_replication LIMIT 1; + worker_survived +----------------- + t +(1 row) + -- Data should NOT have arrived on subscriber \c :subscriber_dsn SELECT * FROM ro_repl_test ORDER BY id; diff --git a/tests/regress/sql/read_only.sql b/tests/regress/sql/read_only.sql index 964456e8..efec6230 100644 --- a/tests/regress/sql/read_only.sql +++ b/tests/regress/sql/read_only.sql @@ -52,12 +52,12 @@ CREATE TABLE public.ro_fail (id int); --- --- Test: utility commands allowed in 'local' mode (still as nonsuper) ---- Spock sets XactReadOnly = true and delegates enforcement entirely to ---- Postgres. These commands are allowed because Postgres does not call ---- PreventCommandIfReadOnly() for maintenance operations. +--- VACUUM ANALYZE does not alter the schema or generate WAL that would +--- interfere with logical replication, so Postgres allows it in read-only +--- transactions. Spock delegates to Postgres here and could add further +--- restrictions if needed. --- -VACUUM ro_test; -ANALYZE ro_test; +VACUUM ANALYZE ro_test; -- Non-superuser can still SELECT SELECT count(*) FROM ro_test; @@ -269,7 +269,15 @@ SET spock.enable_ddl_replication = off; --- --- Test: replication paused when subscriber is in 'all' mode +--- The apply worker should gently wait (not crash or flood logs). --- + +-- Record walsender PID: if the apply worker crashes, the replication +-- connection drops and the walsender restarts with a new PID. +SELECT pid AS repl_pid FROM pg_stat_replication LIMIT 1 +\gset + +\c :subscriber_dsn ALTER SYSTEM SET spock.readonly = 'all'; SELECT pg_reload_conf(); -- Give apply worker time to detect config change and enter readonly mode @@ -286,6 +294,9 @@ SELECT spock.wait_slot_confirm_lsn(NULL, NULL); ROLLBACK; \set VERBOSITY default +-- Same walsender PID proves the apply worker stayed alive (gentle wait) +SELECT :repl_pid = pid AS worker_survived FROM pg_stat_replication LIMIT 1; + -- Data should NOT have arrived on subscriber \c :subscriber_dsn SELECT * FROM ro_repl_test ORDER BY id; From 3bb8ad8492aa63d06c9edbac2e3feb9d575197a3 Mon Sep 17 00:00:00 2001 From: Mason Sharp Date: Tue, 7 Apr 2026 16:23:32 -0700 Subject: [PATCH 04/11] Add conflict types documentation and report update_exists to PostgreSQL log - New docs/conflict_types.md covering all 7 conflict types, resolution strategies, frozen tuple handling, and comparison with PostgreSQL 18 - Detect secondary unique constraint violations during UPDATE apply and report them as update_exists to the PostgreSQL log and PG18 stats - Origin-differs events (update_origin_differs, delete_origin_differs) are no longer written to spock.resolutions when the local tuple wins; previously only the remote-wins case was excluded - Add conflict_types.md to mkdocs.yml nav and cross-link with conflicts.md Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/conflict_types.md | 256 +++++++++++++++++++++++++++++++++++++++++ docs/conflicts.md | 128 ++++++++++++++------- mkdocs.yml | 3 +- src/spock_apply_heap.c | 44 ++++++- src/spock_conflict.c | 13 ++- 5 files changed, 396 insertions(+), 48 deletions(-) create mode 100644 docs/conflict_types.md diff --git a/docs/conflict_types.md b/docs/conflict_types.md new file mode 100644 index 00000000..190d1835 --- /dev/null +++ b/docs/conflict_types.md @@ -0,0 +1,256 @@ +## Conflict Types + +For conflict *avoidance* using delta-apply columns, see +[Conflict Avoidance and Delta-Apply Columns](conflicts.md). + +In a multi-master replication environment, conflicts occur when two or +more nodes make changes to the same row (as identified by a primary key +or replica identity) at overlapping times. Spock detects these conflicts +during the apply phase on the subscribing node and resolves them +according to a configurable resolution strategy. + +Each conflict is classified as one of the types described below. +Resolvable conflicts are recorded in the `spock.resolutions` table +(when `spock.save_resolutions` is enabled); non-resolvable conflicts +are recorded in `spock.exception_log`. + +### Summary Table + +| Conflict Type | DML | Resolvable | +|----------------------------|-----------|-------------------------------------| +| `insert_exists` | INSERT | Yes | +| `update_origin_differs` | UPDATE | N/A (normal flow, not recorded) | +| `update_exists` | UPDATE | No (unique constraint violated), saved in `spock.exception_log` | +| `update_missing` | UPDATE | No (row not found), saved in `spock.exception_log` | +| `delete_origin_differs` | DELETE | N/A (normal flow, not recorded) | +| `delete_missing` | DELETE | Yes | +| `delete_exists` | DELETE | Yes | + +A conflict is **resolvable** when Spock can automatically choose a +winning tuple and continue replication without operator intervention. +`update_missing` and `update_exists` are not resolvable and result in +an ERROR that is recorded in `spock.exception_log`. + +--- + +### `insert_exists` + +A remote INSERT arrives but a row with the same primary key (or replica +identity) already exists on the local node. This typically happens when +two nodes independently insert a row with the same key. + +Spock detects the conflict by looking up the incoming tuple against +unique indexes (primary key, replica identity, and optionally all +unique constraints when `spock.check_all_uc_indexes` is enabled). + +**Resolution:** The configurable conflict resolver (default +`last_update_wins`) compares the commit timestamps of the local and +remote rows. The winner's values are applied as an UPDATE to the +existing row. + +--- + +### `update_origin_differs` + +A remote UPDATE targets a row whose local copy was last modified by a +different replication origin. In a multi-master topology this is the +normal flow of replication -- two nodes each modify a row and the +changes propagate -- so Spock does not treat it as a true conflict. +PostgreSQL's native logical replication does classify it as a conflict, +however, so Spock tracks the type for completeness and supports +optional logging via the `spock.log_origin_change` GUC. + +Spock detects this by comparing `replorigin_session_origin` (the +source of the incoming change) with the origin recorded on the local +tuple. Changes from the same origin or the same local transaction are +silently applied without any conflict reporting. + +**Resolution:** The configurable conflict resolver (default +`last_update_wins`) determines the winner. Since this is normal +replication flow, the event is not written to `spock.resolutions` +regardless of which side wins. + +--- + +### `update_exists` + +A remote UPDATE is applied successfully via the replica identity, but +the new row values violate a unique constraint on a different index. +For example, the UPDATE changes a column that is part of a secondary +unique index and the new value collides with another existing row. + +This conflict type matches the definition used by PostgreSQL 18's +native logical replication. Spock reports it to the PostgreSQL server +log and PG18 conflict statistics when the unique constraint violation +is detected. + +**Resolution:** This conflict is **not** automatically resolvable. The +error is written to `spock.exception_log`. + +--- + +### `update_missing` + +A remote UPDATE cannot find the target row on the local node. The row +may have been deleted locally, or it may never have arrived due to a +replication gap. + +Spock retries the lookup several times (with short waits) in case the +row is being inserted by a concurrent transaction. If the row still +cannot be found after retries, the conflict is raised. + +**Resolution:** This conflict is **not resolvable**. Spock raises an +ERROR, which is logged to `spock.exception_log`. + +--- + +### `delete_origin_differs` + +A remote DELETE targets a row that exists locally but was last modified +by a different replication origin. As with `update_origin_differs`, +this is normal replication flow rather than a true conflict -- one node +deletes a row while another node's earlier update is still in flight. +Spock tracks the type because PostgreSQL's native logical replication +considers it a conflict, and supports optional logging via +`spock.log_origin_change`. + +If the local tuple came from the same origin as the incoming delete, +or from the same local transaction, the delete is applied silently +with no conflict reporting. + +**Resolution:** The configurable conflict resolver (default +`last_update_wins`) determines the winner. When the remote DELETE +wins, the delete proceeds. When the local tuple wins (it is newer), +the delete is skipped and the row is preserved; this case is reported +as `delete_exists` instead. + +--- + +### `delete_missing` + +A remote DELETE cannot find the target row on the local node. The row +was likely already deleted by a local or other replicated transaction. + +As with `update_missing`, Spock retries the lookup several times +before confirming the row is absent. + +**Resolution:** The conflict is automatically resolved by skipping the +delete (`skip`). Since the row is already gone, the desired end state +has been achieved. The event is recorded in the `spock.resolutions` +table. + +--- + +### `delete_exists` + +A remote DELETE arrives but the local copy of the row has a more +recent commit timestamp than the delete. This is unique to Spock and +does not have a counterpart in native PostgreSQL logical replication. + +This occurs when one node deletes a row while another node updates it +with a later timestamp. The delete is the older operation, so the +updated row should be preserved. + +**Resolution:** The delete is skipped and the local (newer) row is +kept (`skip` / `keep_local`). The event is recorded in the +`spock.resolutions` table. + +--- + +### Conflict Resolution Strategies + +The `spock.conflict_resolution` GUC controls how resolvable conflicts +(all types except `update_missing` and `update_exists`) are decided: + +| Strategy | Behavior | +|-----------------------|-----------------------------------------------------------| +| `last_update_wins` | The row with the most recent commit timestamp wins (default). | +| `first_update_wins` | The row with the earliest commit timestamp wins. | +| `apply_remote` | Always apply the incoming remote change. | +| `keep_local` | Always keep the local row. | +| `error` | Raise an ERROR on any conflict. | + +The timestamp-based strategies (`last_update_wins` and +`first_update_wins`) require `track_commit_timestamp = on` in +`postgresql.conf`. + +**Tiebreaker:** When two rows have identical commit timestamps, Spock +uses the `tiebreaker` value from the `spock.node` configuration to +determine a winner. The node with the lower tiebreaker value wins. By +default the tiebreaker is set to the node's unique ID. + +### Frozen Tuples and Missing Timestamp Data + +Timestamp-based conflict resolution depends on commit timestamp and +origin information being available for the local tuple. There are two +cases where this information is missing: + +**Frozen tuples.** PostgreSQL periodically "freezes" old row versions, +replacing their transaction ID with `FrozenTransactionId`. Once +frozen, the original commit timestamp and origin can no longer be +retrieved. When Spock encounters a frozen local tuple during conflict +resolution, it treats the local timestamp as zero. Since any real +remote timestamp is greater than zero, the remote change always wins. +No conflict is logged because the local origin could not be +determined. + +**`track_commit_timestamp = off`.** If commit timestamp tracking is +disabled, Spock cannot retrieve origin or timestamp information for +any local tuple. In this case, Spock copies the remote origin and +timestamp into the local values. The same-origin check then sees +matching origins and treats the change as normal replication flow -- +no conflict is detected or logged. This effectively makes conflict +resolution invisible: the remote change is always applied silently. + +For reliable conflict detection and resolution, +`track_commit_timestamp` must be set to `on`. + +### Conflict Logging + +- **`spock.save_resolutions`** (default `off`) -- When enabled, + resolved conflicts are written to the `spock.resolutions` table + with full tuple details in JSON format. +- **`spock.conflict_log_level`** (default `LOG`) -- Controls the + PostgreSQL log level at which detected conflicts are reported. Set + to a level below `log_min_messages` to suppress log output. +- **`spock.log_origin_change`** (default `none`) -- Controls whether + `update_origin_differs` and `delete_origin_differs` events (normal + replication flow) are logged to the PostgreSQL server log. These + events are never written to `spock.resolutions`. Options: `none`, + `remote_only_differs`, `since_sub_creation`. + +### Comparison with PostgreSQL 18 Native Logical Replication + +PostgreSQL 18 introduced built-in conflict detection for logical +replication. Spock's conflict types are aligned with PostgreSQL's +definitions (same names, same enum ordering) so that the two systems +report conflicts in a consistent way. The key differences are in how +each system *resolves* conflicts and where it *records* them. + +| Conflict Type | PostgreSQL 18 | Spock (with last update wins) | +|-------------------------|----------------------------------------|----------------------------------------------| +| `insert_exists` | Logs and raises ERROR. | Resolves via `last_update_wins`; transforms INSERT into UPDATE of the winning tuple. | +| `update_origin_differs` | Logs and always applies the remote tuple. | Resolves via `last_update_wins`; local tuple can win. Treated as normal replication flow (not a true conflict) with optional logging via `log_origin_change`. | +| `update_exists` | Detects unique constraint violation on updated row; logs. | Logs and records in `spock.exception_log`. | +| `update_missing` | Logs and skips. | Logs and records in `spock.exception_log`. | +| `delete_origin_differs` | Logs and always applies the delete. | Resolves via `last_update_wins`; local tuple can win (reported as `delete_exists`). Treated as normal replication flow (not a true conflict) with optional logging. | +| `delete_missing` | Logs and skips. | Logs and skips. Records in `spock.resolutions`. | +| `delete_exists` | No equivalent. | Unique to Spock. The local row is newer than the remote DELETE, so the delete is skipped and the row is preserved. | + +**Resolution.** PostgreSQL 18 does not resolve conflicts -- it either +applies the remote change unconditionally or skips the operation, and +logs the event. Spock adds configurable resolution strategies (default +`last_update_wins`) with a tiebreaker mechanism, allowing the local +tuple to win when it is more recent. + +**Persistence.** PostgreSQL 18 writes conflicts only to the PostgreSQL +server log. Spock additionally persists certain conflicts in the +`spock.resolutions` table (with full tuple details in JSON) -- +specifically `insert_exists`, `delete_missing`, and `delete_exists` -- +and non-resolvable conflicts (`update_missing`, `update_exists`) in +`spock.exception_log`. Origin-differs events are not persisted to +either table. + +**Statistics.** On PostgreSQL 18, Spock reports all conflict types to +the native `pgstat` subscription conflict statistics, so they appear +in the same views used by built-in logical replication. diff --git a/docs/conflicts.md b/docs/conflicts.md index d48c5bbd..90f9d7c5 100644 --- a/docs/conflicts.md +++ b/docs/conflicts.md @@ -1,72 +1,120 @@ ## Conflict-Free Delta-Apply Columns (Conflict Avoidance) -Conflicts can arise if a node is subscribed to multiple providers, or when local writes happen on a subscriber node. Without Spock, logical replication can also encounter issues when resolving the value of a running sum (such as a YTD balance). - -!!! example - - Suppose that a running bank account sum contains a balance of `$1,000`. Two transactions "conflict" because they overlap with each from two different multi-master nodes. Transaction A is a `$1,000` withdrawal from the account. Transaction B is also a `$1,000` withdrawal from the account. The correct balance is `$-1,000`. Our Delta-Apply algorithm fixes this problem and highly conflicting workloads with this scenario (like a tpc-c like benchmark) now run correctly at lightning speeds. +For a complete reference on conflict types, resolution strategies, and +comparison with PostgreSQL 18, see +[Conflict Types and Resolution](conflict_types.md). +Conflicts can arise if a node is subscribed to multiple providers, or when +local writes happen on a subscriber node. Without Spock, logical replication +can also encounter issues when resolving the value of a running sum (such as +a YTD balance). - In the past, Postgres users have relied on special data types and numbering schemes to help prevent conflicts. Unlike other solutions, Spock's powerful and simple conflict-free delta-apply columns instead manage data update conflicts with logic: +!!! example - - the old value is captured in WAL files. - - a new value comes in from a transaction. - - before the new value overwrites the old value, a delta value is created from the above two steps; that delta is correctly applied. + Suppose that a running bank account sum contains a balance of + `$1,000`. Two transactions conflict because they overlap with each from + two different multi-master nodes. Transaction A is a `$1,000` withdrawal + from the account. Transaction B is also a `$1,000` withdrawal from the + account. The correct balance is `$-1,000`. The Delta-Apply algorithm + fixes this problem and highly conflicting workloads with this scenario + (for example, a tpc-c like benchmark) now run correctly at lightning + speeds. + +In the past, Postgres users have relied on special data types and numbering +schemes to help prevent conflicts. Unlike other solutions, Spock's powerful +and simple conflict-free delta-apply columns instead manage data update +conflicts with the following logic: + +- the old value is captured in WAL files. +- a new value comes in from a transaction. +- before the new value overwrites the old value, a delta value is created + from the above two steps; that delta is correctly applied. !!! example - To simplify: *local_value + (new_value - old_value)* + To simplify: local_value + (new_value - old_value) -Note that on a conflicting transaction, the delta-apply column will be correctly calculated and applied. +Note that on a conflicting transaction, the delta-apply column will be +correctly calculated and applied. -To make a column a conflict-free delta-apply column, ensuring that the value replicated is the delta of the committed changes (the old value plus or minus any new value) to a given record, you need to apply the following settings to the column: `log_old_value=true, delta_apply_function=spock.delta_apply`. For example: +To make a column a conflict-free delta-apply column, ensuring that the value +replicated is the delta of the committed changes (the old value plus or +minus any new value) to a given record, you need to apply the following +settings to the column: `log_old_value=true, +delta_apply_function=spock.delta_apply`. For example: ```sql -ALTER TABLE pgbench_accounts ALTER COLUMN abalance SET (log_old_value=true, delta_apply_function=spock.delta_apply); -ALTER TABLE pgbench_branches ALTER COLUMN bbalance SET (log_old_value=true, delta_apply_function=spock.delta_apply); -ALTER TABLE pgbench_tellers ALTER COLUMN tbalance SET (log_old_value=true, delta_apply_function=spock.delta_apply); +ALTER TABLE pgbench_accounts ALTER COLUMN abalance + SET (log_old_value=true, delta_apply_function=spock.delta_apply); +ALTER TABLE pgbench_branches ALTER COLUMN bbalance + SET (log_old_value=true, delta_apply_function=spock.delta_apply); +ALTER TABLE pgbench_tellers ALTER COLUMN tbalance + SET (log_old_value=true, delta_apply_function=spock.delta_apply); ``` -As a special safety-valve feature, if you ever need to re-set a `log_old_value` column you can temporarily alter the column to `log_old_value` is `false`. +As a special safety-valve feature, if you ever need to re-set a +`log_old_value` column you can temporarily alter the column to +`log_old_value` is `false`. ### Conflict Configuration Options -You can configure some Spock extension behaviors with configuration options that are set in the `postgresql.conf` file or via `ALTER SYSTEM SET`: - -- `spock.conflict_resolution = last_update_wins` - Sets the resolution method to `last_update_wins` for any detected conflicts between local data and incoming changes - the version of data with newest commit timestamp is kept. Note that the `track_commit_timestamp` PostgreSQL setting must also be `enabled`. - -- `spock.conflict_log_level` - Sets the log level for reporting detected conflicts. The default is `LOG`. If the parameter is set to a value lower than `log_min_messages`, resolved conflicts are not written to the server log. Accepted values are: - - - The [possible values](https://www.postgresql.org/docs/15/runtime-config-logging.html#RUNTIME-CONFIG-SEVERITY-LEVELS) are the same as for the `log_min_messages` PostgreSQL setting. - - This setting is used primarily to suppress logging of conflicts. - -- `spock.batch_inserts` - Tells Spock to use a batch insert mechanism if possible. The batch mechanism uses PostgreSQL internal batch insert mode (also used by the `COPY` command). The default is `on`. +You can configure some Spock extension behaviors with configuration options +that are set in the `postgresql.conf` file or via `ALTER SYSTEM SET`: + +- `spock.conflict_resolution = last_update_wins` sets the resolution method + to `last_update_wins` for any detected conflicts between local data and + incoming changes; the version of data with newest commit timestamp is + kept; note that the `track_commit_timestamp` PostgreSQL setting must also + be `enabled`. +- `spock.conflict_log_level` sets the log level for reporting detected + conflicts; the default is `LOG`; if the parameter is set to a value lower + than `log_min_messages`, resolved conflicts are not written to the server + log; accepted values are the same as for the `log_min_messages` PostgreSQL + setting (see [possible + values](https://www.postgresql.org/docs/15/runtime-config-logging.html#RUNTIME-CONFIG-SEVERITY-LEVELS)); + this setting is used primarily to suppress logging of conflicts. +- `spock.batch_inserts` tells Spock to use a batch insert mechanism if + possible; the batch mechanism uses PostgreSQL internal batch insert mode + (also used by the `COPY` command); the default is `on`. ### Handling `INSERT-RowExists` or `INSERT/INSERT` Conflicts -If Spock encounters a conflict caused by a constraint violation (unique constraint, primary key, or replication identity) Spock can automatically transform an `INSERT` into an `UPDATE`, updating all columns of the existing row. - -By default, when an `INSERT` statement targets a row that already exists, Spock detects the conflict and transforms the operation into an `UPDATE` statement, applying changes to all columns of the existing row. The event is recorded in the `spock.resolutions` table. +If Spock encounters a conflict caused by a constraint violation (unique +constraint, primary key, or replication identity) Spock can automatically +transform an `INSERT` into an `UPDATE`, updating all columns of the existing +row. -Extended constraint violation behavior is controlled by the `spock.check_all_uc_indexes` parameter. The default value is `off`; when set to `on`, Spock will: +By default, when an `INSERT` statement targets a row that already exists, +Spock detects the conflict and transforms the operation into an `UPDATE` +statement, applying changes to all columns of the existing row. The event is +recorded in the `spock.resolutions` table. -* Scan all valid unique constraints (as well as the primary key and replica identity). -* Scan non-null unique constraints (including the primary key / replica identity index) in OID order. -* Locate and resolve conflicting rows encountered during an `INSERT` statement. +Extended constraint violation behavior is controlled by the +`spock.check_all_uc_indexes` parameter. The default value is `off`; when set +to `on`, Spock will perform the following actions: +- scan all valid unique constraints (as well as the primary key and replica + identity). +- scan non-null unique constraints (including the primary key or replica + identity index) in OID order. +- locate and resolve conflicting rows encountered during an `INSERT` + statement. !!! warning -If `spock.check_all_uc_indexes` is `enabled`, Spock will resolve only the first conflict identified, using Last-Write-Wins logic. If a second conflict occurs, an exception is recorded in the `spock.resolutions` table as either `Keep-Local` or `Apply-Remote`. + If `spock.check_all_uc_indexes` is `enabled`, Spock will resolve only + the first conflict identified, using Last-Write-Wins logic. If a second + conflict occurs, an exception is recorded in the `spock.resolutions` + table as either `Keep-Local` or `Apply-Remote`. -This feature is experimental; enable this feature at your own risk. + This feature is experimental; enable this feature at your own risk. ### Handling `DELETE-RowMissing` Conflicts -Given that the purpose of the DELETE statement is to remove the row anyway, we do not log these as exceptions, since the desired outcome of removing the row has obviously been achieved, one way or the other. Instead `DELETE-RowMissing` conflicts are automatically written to the `spock.resolutions` table since the desired result has been achieved. \ No newline at end of file +Given that the purpose of the DELETE statement is to remove the row anyway, +we do not log these as exceptions, since the desired outcome of removing the +row has obviously been achieved, one way or the other. Instead +`DELETE-RowMissing` conflicts are automatically written to the +`spock.resolutions` table since the desired result has been achieved. \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index f0285ec9..03544c66 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -54,7 +54,8 @@ nav: - Creating a Two-Node Cluster: two_node_cluster.md - Using Advanced Configuration Options: configuring.md - Upgrading a Spock Installation: upgrading_spock.md - - Spock's Conflict Avoidance Options: conflicts.md + - Conflict Types and Resolution: conflict_types.md + - Conflict Avoidance and Delta-Apply Columns: conflicts.md - Spock's Management Features: - Managing a Spock Installation: managing/index.md - Replicating Partitioned Tables: managing/partition_mgmt.md diff --git a/src/spock_apply_heap.c b/src/spock_apply_heap.c index 0d50dbc4..cf2befd4 100644 --- a/src/spock_apply_heap.c +++ b/src/spock_apply_heap.c @@ -819,8 +819,48 @@ spock_handle_conflict_and_apply(SpockRelation *rel, EState *estate, BeginInternalSubTransaction("SpockDeltaApply"); EvalPlanQualSetSlot(epqstate, remoteslot); - ExecSimpleRelationUpdate(relinfo, estate, epqstate, - localslot, remoteslot); + + PG_TRY(); + { + ExecSimpleRelationUpdate(relinfo, estate, epqstate, + localslot, remoteslot); + } + PG_CATCH(); + { + /* + * If the UPDATE's new values violated a unique constraint, + * report it as an update_exists conflict before re-throwing. + * This matches PG18 native conflict detection behavior. + * + * We cannot safely call spock_report_conflict() here because + * the executor may have invalidated tuple slot data during its + * partial execution. Use a simple elog that is similar instead + */ + if (!is_insert) + { + ErrorData *edata; + + MemoryContextSwitchTo(MessageContext); + edata = CopyErrorData(); + + if (edata->sqlerrcode == ERRCODE_UNIQUE_VIOLATION) + { + elog(spock_conflict_log_level, + "CONFLICT: detected %s on %s.%s: %s", + SpockConflictTypeName(SPOCK_CT_UPDATE_EXISTS), + rel->nspname, + RelationGetRelationName(rel->rel), + edata->message); +#if PG_VERSION_NUM >= 180000 + spock_stat_report_subscription_conflict( + MyApplyWorker->subid, SPOCK_CT_UPDATE_EXISTS); +#endif + } + FreeErrorData(edata); + } + PG_RE_THROW(); + } + PG_END_TRY(); if (is_delta_apply) { diff --git a/src/spock_conflict.c b/src/spock_conflict.c index 46d6e9e3..93989696 100644 --- a/src/spock_conflict.c +++ b/src/spock_conflict.c @@ -386,15 +386,18 @@ spock_report_conflict(SpockConflictType conflict_type, if (conflict_type == SPOCK_CT_UPDATE_EXISTS) conflict_type = SPOCK_CT_UPDATE_ORIGIN_DIFFERS; + /* + * Origin-differs is normal replication flow, not a true conflict. + * Do not write to spock.resolutions regardless of which side wins. + */ + save_in_resolutions = false; + if (resolution == SpockResolution_ApplyRemote) { /* - * Remote tuple wins — this is normal replication flow, not a true - * conflict. Do not write to spock.resolutions, but optionally - * log to the PostgreSQL log based on the GUC setting. + * Remote tuple wins — optionally log to the PostgreSQL log + * based on the log_origin_change GUC setting. */ - save_in_resolutions = false; - if (!found_local_origin) return; From 3d2619f8219054f9e936f8d2d5f480592c5f3d88 Mon Sep 17 00:00:00 2001 From: Mason Sharp Date: Tue, 7 Apr 2026 21:48:17 -0700 Subject: [PATCH 05/11] Add update_exists conflict test to conflict_stat regression suite Tests that a secondary unique constraint violation during UPDATE apply is detected, logged to spock.exception_log, and counted in PG18 conflict statistics (confl_update_exists). Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/regress/expected/conflict_stat.out | 112 ++++++++++++++++++++++- tests/regress/sql/conflict_stat.sql | 68 +++++++++++++- 2 files changed, 178 insertions(+), 2 deletions(-) diff --git a/tests/regress/expected/conflict_stat.out b/tests/regress/expected/conflict_stat.out index 9d96cce4..b46c8b26 100644 --- a/tests/regress/expected/conflict_stat.out +++ b/tests/regress/expected/conflict_stat.out @@ -208,9 +208,119 @@ SELECT spock.reset_subscription_stats(:test_sub_id); (1 row) --- Cleanup +-- ============================================================ +-- Test UPDATE_EXISTS: an UPDATE from the provider violates a secondary +-- unique constraint on the subscriber. The apply worker detects the +-- unique violation, logs update_exists to the PostgreSQL log, counts +-- it in conflict stats, and records it in exception_log. +-- Default exception_behaviour is TRANSDISCARD: the worker errors on +-- the first attempt, restarts, replays read-only logging each failing +-- row to exception_log, then advances the LSN. +-- ============================================================ +\c :provider_dsn +-- Create a table with a secondary unique index +SELECT spock.replicate_ddl($$ + CREATE TABLE public.conflict_ue_test ( + id integer PRIMARY KEY, + uval integer NOT NULL, + data text + ); + CREATE UNIQUE INDEX conflict_ue_test_uval_idx + ON public.conflict_ue_test (uval); +$$); + replicate_ddl +--------------- + t +(1 row) + +SELECT * FROM spock.repset_add_table('default', 'conflict_ue_test'); + repset_add_table +------------------ + t +(1 row) + +-- Seed rows on both sides via replication +INSERT INTO conflict_ue_test VALUES (1, 100, 'row1'); +INSERT INTO conflict_ue_test VALUES (2, 200, 'row2'); +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); + wait_slot_confirm_lsn +----------------------- + +(1 row) + +\c :subscriber_dsn +-- Insert a row only on the subscriber to set up the conflict +INSERT INTO conflict_ue_test VALUES (3, 300, 'sub_only'); +SELECT * FROM conflict_ue_test ORDER BY id; + id | uval | data +----+------+---------- + 1 | 100 | row1 + 2 | 200 | row2 + 3 | 300 | sub_only +(3 rows) + +-- Reset stats +SELECT spock.reset_subscription_stats(:test_sub_id); + reset_subscription_stats +-------------------------- + +(1 row) + +TRUNCATE spock.exception_log; +\c :provider_dsn +-- This UPDATE sets uval=300 on row id=2. It succeeds on the provider +-- (no row with uval=300 here), but on the subscriber row id=3 already +-- has uval=300, so the secondary unique index is violated. +UPDATE conflict_ue_test SET uval = 300, data = 'should_conflict' WHERE id = 2; +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); + wait_slot_confirm_lsn +----------------------- + +(1 row) + +\c :subscriber_dsn +-- Row id=2 should still have its original value (update was discarded) +SELECT * FROM conflict_ue_test ORDER BY id; + id | uval | data +----+------+---------- + 1 | 100 | row1 + 2 | 200 | row2 + 3 | 300 | sub_only +(3 rows) + +-- The conflict should be logged in exception_log +SELECT operation, table_name FROM spock.exception_log; + operation | table_name +-----------+------------------ + UPDATE | conflict_ue_test +(1 row) + +-- Verify UPDATE_EXISTS counter incremented +SELECT confl_update_exists +FROM spock.get_subscription_stats(:test_sub_id); + confl_update_exists +--------------------- + 1 +(1 row) + +SELECT spock.reset_subscription_stats(:test_sub_id); + reset_subscription_stats +-------------------------- + +(1 row) + TRUNCATE spock.exception_log; +-- Cleanup \c :provider_dsn +SELECT spock.replicate_ddl($$ DROP TABLE public.conflict_ue_test CASCADE; $$); +NOTICE: drop cascades to table conflict_ue_test membership in replication set default + replicate_ddl +--------------- + t +(1 row) + +-- Cleanup original test table +TRUNCATE spock.exception_log; SELECT spock.replicate_ddl($$ DROP TABLE public.conflict_stat_test CASCADE; $$); NOTICE: drop cascades to table conflict_stat_test membership in replication set default replicate_ddl diff --git a/tests/regress/sql/conflict_stat.sql b/tests/regress/sql/conflict_stat.sql index 7ffb5ccb..8f8959a7 100644 --- a/tests/regress/sql/conflict_stat.sql +++ b/tests/regress/sql/conflict_stat.sql @@ -140,7 +140,73 @@ FROM spock.get_subscription_stats(:test_sub_id); SELECT spock.reset_subscription_stats(:test_sub_id); --- Cleanup +-- ============================================================ +-- Test UPDATE_EXISTS: an UPDATE from the provider violates a secondary +-- unique constraint on the subscriber. The apply worker detects the +-- unique violation, logs update_exists to the PostgreSQL log, counts +-- it in conflict stats, and records it in exception_log. +-- Default exception_behaviour is TRANSDISCARD: the worker errors on +-- the first attempt, restarts, replays read-only logging each failing +-- row to exception_log, then advances the LSN. +-- ============================================================ + +\c :provider_dsn + +-- Create a table with a secondary unique index +SELECT spock.replicate_ddl($$ + CREATE TABLE public.conflict_ue_test ( + id integer PRIMARY KEY, + uval integer NOT NULL, + data text + ); + CREATE UNIQUE INDEX conflict_ue_test_uval_idx + ON public.conflict_ue_test (uval); +$$); + +SELECT * FROM spock.repset_add_table('default', 'conflict_ue_test'); + +-- Seed rows on both sides via replication +INSERT INTO conflict_ue_test VALUES (1, 100, 'row1'); +INSERT INTO conflict_ue_test VALUES (2, 200, 'row2'); +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); + +\c :subscriber_dsn + +-- Insert a row only on the subscriber to set up the conflict +INSERT INTO conflict_ue_test VALUES (3, 300, 'sub_only'); +SELECT * FROM conflict_ue_test ORDER BY id; + +-- Reset stats +SELECT spock.reset_subscription_stats(:test_sub_id); TRUNCATE spock.exception_log; + +\c :provider_dsn + +-- This UPDATE sets uval=300 on row id=2. It succeeds on the provider +-- (no row with uval=300 here), but on the subscriber row id=3 already +-- has uval=300, so the secondary unique index is violated. +UPDATE conflict_ue_test SET uval = 300, data = 'should_conflict' WHERE id = 2; +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); + +\c :subscriber_dsn + +-- Row id=2 should still have its original value (update was discarded) +SELECT * FROM conflict_ue_test ORDER BY id; + +-- The conflict should be logged in exception_log +SELECT operation, table_name FROM spock.exception_log; + +-- Verify UPDATE_EXISTS counter incremented +SELECT confl_update_exists +FROM spock.get_subscription_stats(:test_sub_id); + +SELECT spock.reset_subscription_stats(:test_sub_id); +TRUNCATE spock.exception_log; + +-- Cleanup \c :provider_dsn +SELECT spock.replicate_ddl($$ DROP TABLE public.conflict_ue_test CASCADE; $$); + +-- Cleanup original test table +TRUNCATE spock.exception_log; SELECT spock.replicate_ddl($$ DROP TABLE public.conflict_stat_test CASCADE; $$); From 25feecc4830b1ab52bb315baf0289f68719d6cac Mon Sep 17 00:00:00 2001 From: Mason Sharp Date: Tue, 7 Apr 2026 21:59:54 -0700 Subject: [PATCH 06/11] Fix incorrect exception recording destination in conflicts.md When check_all_uc_indexes resolves the first conflict and a second unique constraint violation occurs, the error is recorded in spock.exception_log, not spock.resolutions. Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/conflicts.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/conflicts.md b/docs/conflicts.md index 90f9d7c5..73ebcac4 100644 --- a/docs/conflicts.md +++ b/docs/conflicts.md @@ -105,8 +105,8 @@ to `on`, Spock will perform the following actions: If `spock.check_all_uc_indexes` is `enabled`, Spock will resolve only the first conflict identified, using Last-Write-Wins logic. If a second - conflict occurs, an exception is recorded in the `spock.resolutions` - table as either `Keep-Local` or `Apply-Remote`. + unique constraint conflict occurs, an error is raised and recorded in + `spock.exception_log`. This feature is experimental; enable this feature at your own risk. From e7a3344836e1d00d6ecb73855986c43402bc21ce Mon Sep 17 00:00:00 2001 From: Asif Rehman Date: Wed, 8 Apr 2026 18:35:11 +0500 Subject: [PATCH 07/11] Add spock.resolutions_retention_days GUC and cleanup_resolutions() Introduces spock.resolutions_retention_days (int, default 100, min 0) to control how long rows are kept in spock.resolutions. Rows older than the configured window are deleted automatically by the apply worker at most once per day. Setting the GUC to 0 disables automatic cleanup entirely. Also adds spock.cleanup_resolutions(), a superuser-only SQL function that returns the number of rows deleted, for manual invocation. A log_time index is added to both the fresh-install and upgrade scripts to keep the daily DELETE efficient on large resolutions tables. Co-Authored-By: Claude Sonnet 4.6 --- Makefile | 3 +- include/spock_conflict.h | 2 + sql/spock--5.0.6--6.0.0-devel.sql | 9 ++ sql/spock--6.0.0-devel.sql | 6 ++ src/spock.c | 11 +++ src/spock_apply.c | 24 +++++ src/spock_conflict.c | 144 ++++++++++++++++++++++++++++++ 7 files changed, 198 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 772daa4b..96fe47be 100644 --- a/Makefile +++ b/Makefile @@ -56,7 +56,8 @@ REGRESS = preseed infofuncs init_fail init preseed_check basic conflict_secondar interfaces foreign_key copy sequence triggers parallel functions row_filter \ row_filter_sampling att_list column_filter apply_delay \ extended node_origin_cascade multiple_upstreams tuple_origin autoddl \ - sync_event sync_table generated_columns spill_transaction read_only drop + sync_event sync_table generated_columns spill_transaction read_only \ + resolutions_retention drop # The following test cases are disabled while developing. # diff --git a/include/spock_conflict.h b/include/spock_conflict.h index ba196579..a6fe9b6c 100644 --- a/include/spock_conflict.h +++ b/include/spock_conflict.h @@ -42,6 +42,7 @@ typedef enum extern int spock_conflict_resolver; extern int spock_conflict_log_level; extern bool spock_save_resolutions; +extern int spock_resolutions_retention_days; /* * We want to eventually match native PostgreSQL conflict types, @@ -161,5 +162,6 @@ extern bool spock_conflict_resolver_check_hook(int *newval, void **extra, extern void tuple_to_stringinfo(StringInfo s, TupleDesc tupdesc, HeapTuple tuple); +extern uint64 spock_cleanup_resolutions(void); #endif /* SPOCK_CONFLICT_H */ diff --git a/sql/spock--5.0.6--6.0.0-devel.sql b/sql/spock--5.0.6--6.0.0-devel.sql index 23492975..2ec45d2d 100644 --- a/sql/spock--5.0.6--6.0.0-devel.sql +++ b/sql/spock--5.0.6--6.0.0-devel.sql @@ -177,6 +177,15 @@ SET conflict_type = CASE conflict_type ELSE conflict_type END; +-- Add index on log_time to support efficient TTL-based cleanup +CREATE INDEX ON spock.resolutions (log_time); + +-- Manual cleanup function for the resolutions table +CREATE FUNCTION spock.cleanup_resolutions() +RETURNS bigint VOLATILE +LANGUAGE c AS 'MODULE_PATHNAME', 'spock_cleanup_resolutions_sql'; +REVOKE ALL ON FUNCTION spock.cleanup_resolutions() FROM PUBLIC; + -- ---- -- Subscription conflict statistics -- ---- diff --git a/sql/spock--6.0.0-devel.sql b/sql/spock--6.0.0-devel.sql index 882082e2..89d8ec9b 100644 --- a/sql/spock--6.0.0-devel.sql +++ b/sql/spock--6.0.0-devel.sql @@ -351,6 +351,12 @@ CREATE TABLE spock.resolutions ( PRIMARY KEY(id, node_name) ) WITH (user_catalog_table=true); +CREATE INDEX ON spock.resolutions (log_time); + +CREATE FUNCTION spock.cleanup_resolutions() +RETURNS bigint VOLATILE +LANGUAGE c AS 'MODULE_PATHNAME', 'spock_cleanup_resolutions_sql'; +REVOKE ALL ON FUNCTION spock.cleanup_resolutions() FROM PUBLIC; CREATE VIEW spock.TABLES AS WITH set_relations AS ( diff --git a/src/spock.c b/src/spock.c index 1683e303..582525cf 100644 --- a/src/spock.c +++ b/src/spock.c @@ -1012,6 +1012,17 @@ _PG_init(void) 0, NULL, NULL, NULL); + DefineCustomIntVariable("spock.resolutions_retention_days", + "Number of days to retain rows in spock." CATALOG_LOGTABLE " table. " + "Rows older than this are deleted periodically by the apply worker. " + "Set to 0 to disable automatic cleanup.", + NULL, + &spock_resolutions_retention_days, + 100, 0, INT_MAX, + PGC_SUSET, + 0, + NULL, NULL, NULL); + DefineCustomBoolVariable("spock.enable_quiet_mode", "Reduce message verbosity for cleaner output", "When enabled, downgrades DDL replication INFO/WARNING messages to LOG level " diff --git a/src/spock_apply.c b/src/spock_apply.c index f78cf19d..cabdefcd 100644 --- a/src/spock_apply.c +++ b/src/spock_apply.c @@ -218,6 +218,9 @@ static dlist_head sync_replica_lsn = DLIST_STATIC_INIT(sync_replica_lsn); static XLogRecPtr skip_xact_finish_lsn = InvalidXLogRecPtr; #define is_skipping_changes() (unlikely(!XLogRecPtrIsInvalid(skip_xact_finish_lsn))) +/* How often the apply worker runs spock_cleanup_resolutions() (milliseconds). */ +#define RESOLUTIONS_CLEANUP_INTERVAL_MS (86400L * 1000L) + /* * Whereas MessageContext is used for the duration of a transaction, * ApplyOperationContext can be used for individual operations @@ -2947,6 +2950,7 @@ apply_work(PGconn *streamConn) XLogRecPtr last_received = InvalidXLogRecPtr; XLogRecPtr last_inserted = InvalidXLogRecPtr; TimestampTz last_receive_timestamp = GetCurrentTimestamp(); + TimestampTz last_cleanup_timestamp = 0; bool need_replay; ErrorData *edata = NULL; @@ -3050,6 +3054,26 @@ apply_work(PGconn *streamConn) } } + /* + * Periodically clean up old rows from spock.resolutions. We run + * at most once per day regardless of whether the worker is idle + * or processing traffic. spock_cleanup_resolutions() manages its + * own transaction and error handling. + */ + if (!IsTransactionState() && + spock_resolutions_retention_days > 0) + { + TimestampTz cleanup_due; + + cleanup_due = TimestampTzPlusMilliseconds(last_cleanup_timestamp, + RESOLUTIONS_CLEANUP_INTERVAL_MS); + if (GetCurrentTimestamp() >= cleanup_due) + { + spock_cleanup_resolutions(); + last_cleanup_timestamp = GetCurrentTimestamp(); + } + } + Assert(CurrentMemoryContext == MessageContext); for (;;) diff --git a/src/spock_conflict.c b/src/spock_conflict.c index 93989696..b7695991 100644 --- a/src/spock_conflict.c +++ b/src/spock_conflict.c @@ -87,6 +87,7 @@ SpockConflictTypeName(SpockConflictType t) int spock_conflict_resolver = SPOCK_RESOLVE_LAST_UPDATE_WINS; int spock_conflict_log_level = LOG; bool spock_save_resolutions = false; +int spock_resolutions_retention_days = 100; static Datum spock_conflict_row_to_json(Datum row, bool row_isnull, bool *ret_isnull); @@ -882,6 +883,149 @@ tuple_to_stringinfo(StringInfo s, TupleDesc tupdesc, HeapTuple tuple) } } +/* + * Delete rows from spock.resolutions that are older than + * spock.resolutions_retention_days. Returns the number of rows deleted. + * + * Caller must have an active transaction and snapshot (SPI requirement). + * Errors propagate to the caller; no error suppression here. + */ +static uint64 +spock_cleanup_resolutions_core(void) +{ + int ret; + uint64 ndeleted; + StringInfoData cmd; + + initStringInfo(&cmd); + appendStringInfo(&cmd, + "DELETE FROM spock.%s WHERE log_time < now() - '%d days'::interval", + CATALOG_LOGTABLE, spock_resolutions_retention_days); + + if (SPI_connect() != SPI_OK_CONNECT) + { + pfree(cmd.data); + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("SPOCK: SPI_connect failed in spock_cleanup_resolutions"))); + } + + ret = SPI_execute(cmd.data, false, 0); + pfree(cmd.data); + + if (ret != SPI_OK_DELETE) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("SPOCK: unexpected SPI result %d in spock_cleanup_resolutions", + ret))); + + ndeleted = SPI_processed; + SPI_finish(); + + elog(DEBUG1, "SPOCK: cleaned up " UINT64_FORMAT " row(s) from spock.%s", + ndeleted, CATALOG_LOGTABLE); + + return ndeleted; +} + +/* + * spock_cleanup_resolutions + * + * Apply worker entry point. Manages its own transaction so it can be called + * from the background loop where no transaction is active. Errors are + * downgraded to WARNING so a transient failure does not disrupt replication; + * the worker will retry on the next daily cycle. + */ +uint64 +spock_cleanup_resolutions(void) +{ + uint64 ndeleted = 0; + MemoryContext oldcontext; + + if (spock_resolutions_retention_days <= 0) + return 0; + + /* + * Save the caller's memory context (MessageContext in the apply worker) + * before entering PG_TRY. + */ + oldcontext = CurrentMemoryContext; + + /* + * The entire transaction lifetime lives inside PG_TRY so that errors + * from StartTransactionCommand() or PushActiveSnapshot() — not just SPI + * execution failures — are also caught and downgraded to WARNING. + * + * SetCurrentStatementStartTimestamp() must precede StartTransactionCommand() + * so the transaction's cached current_timestamp is initialised correctly. + * PushActiveSnapshot() is required by SPI_execute (it asserts an active + * snapshot exists). + */ + PG_TRY(); + { + SetCurrentStatementStartTimestamp(); + StartTransactionCommand(); + PushActiveSnapshot(GetTransactionSnapshot()); + + /* do the cleanup */ + ndeleted = spock_cleanup_resolutions_core(); + + PopActiveSnapshot(); + CommitTransactionCommand(); + } + PG_CATCH(); + { + ErrorData *edata; + + MemoryContextSwitchTo(oldcontext); + edata = CopyErrorData(); + FlushErrorState(); + + /* + * Abort only if a transaction was actually started. If the error + * occurred in SetCurrentStatementStartTimestamp() or before + * StartTransactionCommand() completed, there may be no transaction + * to abort. AbortCurrentTransaction() also handles SPI and snapshot + * cleanup via AtEOXact_SPI() and AtAbort_Snapshot(), avoiding + * double-cleanup if core() already called SPI_finish() before + * CommitTransactionCommand() threw. + */ + if (IsTransactionState()) + AbortCurrentTransaction(); + + ereport(WARNING, + (errcode(edata->sqlerrcode), + errmsg("%s", edata->message))); + FreeErrorData(edata); + } + PG_END_TRY(); + MemoryContextSwitchTo(oldcontext); + + return ndeleted; +} + +/* + * spock_cleanup_resolutions_sql + * + * SQL-callable entry point. The executor already provides an active + * transaction, so we call the core function directly. Any error propagates + * to the caller normally — no silent transaction poisoning. + */ +PG_FUNCTION_INFO_V1(spock_cleanup_resolutions_sql); +Datum +spock_cleanup_resolutions_sql(PG_FUNCTION_ARGS) +{ + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to call spock.cleanup_resolutions()"))); + + if (spock_resolutions_retention_days <= 0) + PG_RETURN_INT64(0); + + PG_RETURN_INT64((int64) spock_cleanup_resolutions_core()); +} + /* * Convert the target row to json form if it isn't null. */ From aa6d5e5b50ae1be9c06c9d98c33d09b00dbfd96a Mon Sep 17 00:00:00 2001 From: Asif Rehman Date: Wed, 8 Apr 2026 18:35:11 +0500 Subject: [PATCH 08/11] docs: document spock.resolutions_retention_days and cleanup_resolutions() Co-Authored-By: Claude Sonnet 4.6 --- docs/configuring.md | 16 ++++++++ .../functions/spock_cleanup_resolutions.md | 41 +++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 docs/spock_functions/functions/spock_cleanup_resolutions.md diff --git a/docs/configuring.md b/docs/configuring.md index de7240a3..5d94c5f1 100644 --- a/docs/configuring.md +++ b/docs/configuring.md @@ -258,6 +258,22 @@ The following configuration values are possible: logs all conflict resolutions to the `spock.resolutions` table. This option can only be set when the postmaster starts. +### `spock.resolutions_retention_days` + +`spock.resolutions_retention_days` controls how long rows are kept in the +`spock.resolutions` table. Rows with a `log_time` older than this many days +are deleted automatically by the apply worker, which runs the cleanup at most +once per day. The default is `100` days. Set to `0` to disable automatic +cleanup entirely. + +This GUC has no effect when `spock.save_resolutions` is `off`. + +Cleanup can also be triggered manually at any time by a superuser: + +```sql +SELECT spock.cleanup_resolutions(); +``` + ### `spock.stats_max_entries` `spock.stats_max_entries` specifies the maximum number of entries that can diff --git a/docs/spock_functions/functions/spock_cleanup_resolutions.md b/docs/spock_functions/functions/spock_cleanup_resolutions.md new file mode 100644 index 00000000..9e44560c --- /dev/null +++ b/docs/spock_functions/functions/spock_cleanup_resolutions.md @@ -0,0 +1,41 @@ +## NAME + +spock.cleanup_resolutions() + +### SYNOPSIS + +spock.cleanup_resolutions() + +### RETURNS + +bigint — the number of rows deleted from `spock.resolutions`. + +### DESCRIPTION + +Deletes rows from `spock.resolutions` whose `log_time` is older than the +value configured by `spock.resolutions_retention_days`. Returns the number +of rows deleted. + +This function is a superuser-only manual trigger for the same cleanup that +the apply worker runs automatically once per day. It is useful for +immediate cleanup via `pg_cron` or when the apply worker has not been +running. + +The function respects both `spock.save_resolutions` and +`spock.resolutions_retention_days`. If either setting disables cleanup +(`save_resolutions = off` or `resolutions_retention_days = 0`), the +function returns `0` without deleting anything. + +### ARGUMENTS + +None. + +### EXAMPLE + +Delete conflict history rows older than the configured retention window: + + SELECT spock.cleanup_resolutions(); + +### SEE ALSO + +`spock.save_resolutions`, `spock.resolutions_retention_days` From b9adb1d602df8eb077d6caf5e1fb96c30f2fff79 Mon Sep 17 00:00:00 2001 From: Asif Rehman Date: Wed, 8 Apr 2026 18:35:11 +0500 Subject: [PATCH 09/11] test: add resolutions_retention regression test Co-Authored-By: Claude Sonnet 4.6 --- .../expected/resolutions_retention.out | 199 ++++++++++++++++++ tests/regress/sql/resolutions_retention.sql | 106 ++++++++++ 2 files changed, 305 insertions(+) create mode 100644 tests/regress/expected/resolutions_retention.out create mode 100644 tests/regress/sql/resolutions_retention.sql diff --git a/tests/regress/expected/resolutions_retention.out b/tests/regress/expected/resolutions_retention.out new file mode 100644 index 00000000..cbf0af16 --- /dev/null +++ b/tests/regress/expected/resolutions_retention.out @@ -0,0 +1,199 @@ +-- resolutions_retention: test spock.resolutions_retention_days GUC and +-- spock.cleanup_resolutions() SQL function. +SELECT * FROM spock_regress_variables() +\gset +-- Configure GUCs up front on both nodes +\c :provider_dsn +ALTER SYSTEM SET spock.save_resolutions = on; +SELECT pg_reload_conf(); + pg_reload_conf +---------------- + t +(1 row) + +\c :subscriber_dsn +ALTER SYSTEM SET spock.save_resolutions = on; +SELECT pg_reload_conf(); + pg_reload_conf +---------------- + t +(1 row) + +SELECT pg_sleep(1); + pg_sleep +---------- + +(1 row) + +TRUNCATE spock.resolutions; +-- Setup: create a table and seed it on both sides to enable conflict generation +\c :provider_dsn +SELECT spock.replicate_ddl($$ + CREATE TABLE retention_test (id int PRIMARY KEY, data text); +$$); + replicate_ddl +--------------- + t +(1 row) + +SELECT * FROM spock.repset_add_table('default', 'retention_test'); + repset_add_table +------------------ + t +(1 row) + +INSERT INTO retention_test VALUES (1, 'one'); +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); + wait_slot_confirm_lsn +----------------------- + +(1 row) + +-- Generate an insert_exists conflict: insert same PK on subscriber first, +-- then provider insert arrives and conflicts. +\c :subscriber_dsn +INSERT INTO retention_test VALUES (2, 'sub-two'); +\c :provider_dsn +INSERT INTO retention_test VALUES (2, 'pub-two'); +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); + wait_slot_confirm_lsn +----------------------- + +(1 row) + +\c :subscriber_dsn +-- Expect 1 conflict row (insert_exists) +SELECT conflict_type FROM spock.resolutions WHERE relname = 'public.retention_test'; + conflict_type +--------------- + insert_exists +(1 row) + +-- Backdate that row to 60 days ago to simulate aged history +UPDATE spock.resolutions +SET log_time = now() - '60 days'::interval +WHERE relname = 'public.retention_test'; +-- Expect 1 row total (the 60-day-old one) +SELECT COUNT(*) AS total FROM spock.resolutions WHERE relname = 'public.retention_test'; + total +------- + 1 +(1 row) + +-- Set retention to 30 days: the 60-day-old row falls outside the window +-- (60 > 30) so cleanup will delete it. +SET spock.resolutions_retention_days = 30; +SELECT spock.cleanup_resolutions() AS rows_deleted; + rows_deleted +-------------- + 1 +(1 row) + +-- Expect 0 rows remaining +SELECT COUNT(*) AS remaining FROM spock.resolutions WHERE relname = 'public.retention_test'; + remaining +----------- + 0 +(1 row) + +-- Generate a fresh conflict for subsequent tests +INSERT INTO retention_test VALUES (3, 'sub-three'); +\c :provider_dsn +INSERT INTO retention_test VALUES (3, 'pub-three'); +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); + wait_slot_confirm_lsn +----------------------- + +(1 row) + +\c :subscriber_dsn +-- Expect 1 recent row +SELECT COUNT(*) AS total FROM spock.resolutions WHERE relname = 'public.retention_test'; + total +------- + 1 +(1 row) + +-- Test that retention_days = 0 disables cleanup: backdate the row so it +-- would be deleted if cleanup ran, then verify the guard prevents deletion. +UPDATE spock.resolutions +SET log_time = now() - '999 days'::interval +WHERE relname = 'public.retention_test'; +SET spock.resolutions_retention_days = 0; +SELECT spock.cleanup_resolutions() AS rows_deleted; + rows_deleted +-------------- + 0 +(1 row) + +-- Row should still be there +SELECT COUNT(*) AS remaining FROM spock.resolutions WHERE relname = 'public.retention_test'; + remaining +----------- + 1 +(1 row) + +-- Test that cleanup runs even when save_resolutions=off: logging controls new +-- inserts only; cleanup is driven solely by retention_days. +UPDATE spock.resolutions +SET log_time = now() - '999 days'::interval +WHERE relname = 'public.retention_test'; +SET spock.resolutions_retention_days = 30; +ALTER SYSTEM SET spock.save_resolutions = off; +SELECT pg_reload_conf(); + pg_reload_conf +---------------- + t +(1 row) + +SELECT pg_sleep(1); + pg_sleep +---------- + +(1 row) + +SELECT spock.cleanup_resolutions() AS rows_deleted; + rows_deleted +-------------- + 1 +(1 row) + +-- Row should be deleted (save_resolutions=off does not suppress cleanup) +SELECT COUNT(*) AS remaining FROM spock.resolutions WHERE relname = 'public.retention_test'; + remaining +----------- + 0 +(1 row) + +-- Cleanup +\c :provider_dsn +SELECT * FROM spock.repset_remove_table('default', 'retention_test'); + repset_remove_table +--------------------- + t +(1 row) + +SELECT spock.replicate_ddl($$ + DROP TABLE retention_test CASCADE; +$$); + replicate_ddl +--------------- + t +(1 row) + +ALTER SYSTEM SET spock.save_resolutions = off; +SELECT pg_reload_conf(); + pg_reload_conf +---------------- + t +(1 row) + +\c :subscriber_dsn +RESET spock.resolutions_retention_days; +ALTER SYSTEM SET spock.save_resolutions = off; +SELECT pg_reload_conf(); + pg_reload_conf +---------------- + t +(1 row) + diff --git a/tests/regress/sql/resolutions_retention.sql b/tests/regress/sql/resolutions_retention.sql new file mode 100644 index 00000000..ce3fed96 --- /dev/null +++ b/tests/regress/sql/resolutions_retention.sql @@ -0,0 +1,106 @@ +-- resolutions_retention: test spock.resolutions_retention_days GUC and +-- spock.cleanup_resolutions() SQL function. +SELECT * FROM spock_regress_variables() +\gset + +-- Configure GUCs up front on both nodes +\c :provider_dsn +ALTER SYSTEM SET spock.save_resolutions = on; +SELECT pg_reload_conf(); + +\c :subscriber_dsn +ALTER SYSTEM SET spock.save_resolutions = on; +SELECT pg_reload_conf(); +SELECT pg_sleep(1); + +TRUNCATE spock.resolutions; + +-- Setup: create a table and seed it on both sides to enable conflict generation +\c :provider_dsn +SELECT spock.replicate_ddl($$ + CREATE TABLE retention_test (id int PRIMARY KEY, data text); +$$); +SELECT * FROM spock.repset_add_table('default', 'retention_test'); +INSERT INTO retention_test VALUES (1, 'one'); +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); + +-- Generate an insert_exists conflict: insert same PK on subscriber first, +-- then provider insert arrives and conflicts. +\c :subscriber_dsn +INSERT INTO retention_test VALUES (2, 'sub-two'); + +\c :provider_dsn +INSERT INTO retention_test VALUES (2, 'pub-two'); +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); + +\c :subscriber_dsn +-- Expect 1 conflict row (insert_exists) +SELECT conflict_type FROM spock.resolutions WHERE relname = 'public.retention_test'; + +-- Backdate that row to 60 days ago to simulate aged history +UPDATE spock.resolutions +SET log_time = now() - '60 days'::interval +WHERE relname = 'public.retention_test'; + +-- Expect 1 row total (the 60-day-old one) +SELECT COUNT(*) AS total FROM spock.resolutions WHERE relname = 'public.retention_test'; + +-- Set retention to 30 days: the 60-day-old row falls outside the window +-- (60 > 30) so cleanup will delete it. +SET spock.resolutions_retention_days = 30; +SELECT spock.cleanup_resolutions() AS rows_deleted; + +-- Expect 0 rows remaining +SELECT COUNT(*) AS remaining FROM spock.resolutions WHERE relname = 'public.retention_test'; + +-- Generate a fresh conflict for subsequent tests +INSERT INTO retention_test VALUES (3, 'sub-three'); + +\c :provider_dsn +INSERT INTO retention_test VALUES (3, 'pub-three'); +SELECT spock.wait_slot_confirm_lsn(NULL, NULL); + +\c :subscriber_dsn +-- Expect 1 recent row +SELECT COUNT(*) AS total FROM spock.resolutions WHERE relname = 'public.retention_test'; + +-- Test that retention_days = 0 disables cleanup: backdate the row so it +-- would be deleted if cleanup ran, then verify the guard prevents deletion. +UPDATE spock.resolutions +SET log_time = now() - '999 days'::interval +WHERE relname = 'public.retention_test'; +SET spock.resolutions_retention_days = 0; +SELECT spock.cleanup_resolutions() AS rows_deleted; + +-- Row should still be there +SELECT COUNT(*) AS remaining FROM spock.resolutions WHERE relname = 'public.retention_test'; + +-- Test that cleanup runs even when save_resolutions=off: logging controls new +-- inserts only; cleanup is driven solely by retention_days. +UPDATE spock.resolutions +SET log_time = now() - '999 days'::interval +WHERE relname = 'public.retention_test'; + +SET spock.resolutions_retention_days = 30; +ALTER SYSTEM SET spock.save_resolutions = off; +SELECT pg_reload_conf(); +SELECT pg_sleep(1); + +SELECT spock.cleanup_resolutions() AS rows_deleted; + +-- Row should be deleted (save_resolutions=off does not suppress cleanup) +SELECT COUNT(*) AS remaining FROM spock.resolutions WHERE relname = 'public.retention_test'; + +-- Cleanup +\c :provider_dsn +SELECT * FROM spock.repset_remove_table('default', 'retention_test'); +SELECT spock.replicate_ddl($$ + DROP TABLE retention_test CASCADE; +$$); +ALTER SYSTEM SET spock.save_resolutions = off; +SELECT pg_reload_conf(); + +\c :subscriber_dsn +RESET spock.resolutions_retention_days; +ALTER SYSTEM SET spock.save_resolutions = off; +SELECT pg_reload_conf(); From 2f007c312bccb65a19776870ef7be7474e38c71c Mon Sep 17 00:00:00 2001 From: Asif Rehman Date: Sun, 12 Apr 2026 13:38:00 +0500 Subject: [PATCH 10/11] Address PR 412 review: GUC level, optional days arg, doc fix - Change resolutions_retention_days from PGC_SUSET to PGC_SIGHUP to prevent session-level SET; apply worker picks up changes via reload - Add optional days arg to cleanup_resolutions(); overrides GUC when set - Fix docs: remove incorrect save_resolutions claim; clarify precedence - Update regression test to use ALTER SYSTEM SET + pg_reload_conf() instead of session-level SET/RESET Co-Authored-By: Claude Sonnet 4.6 --- .../functions/spock_cleanup_resolutions.md | 23 +++++++------ sql/spock--5.0.6--6.0.0-devel.sql | 4 +-- sql/spock--6.0.0-devel.sql | 4 +-- src/spock.c | 2 +- src/spock_conflict.c | 18 ++++++++--- .../expected/resolutions_retention.out | 32 ++++++++++++++++--- tests/regress/sql/resolutions_retention.sql | 12 ++++--- 7 files changed, 68 insertions(+), 27 deletions(-) diff --git a/docs/spock_functions/functions/spock_cleanup_resolutions.md b/docs/spock_functions/functions/spock_cleanup_resolutions.md index 9e44560c..28836245 100644 --- a/docs/spock_functions/functions/spock_cleanup_resolutions.md +++ b/docs/spock_functions/functions/spock_cleanup_resolutions.md @@ -4,7 +4,7 @@ spock.cleanup_resolutions() ### SYNOPSIS -spock.cleanup_resolutions() +spock.cleanup_resolutions([days integer]) ### RETURNS @@ -13,22 +13,23 @@ bigint — the number of rows deleted from `spock.resolutions`. ### DESCRIPTION Deletes rows from `spock.resolutions` whose `log_time` is older than the -value configured by `spock.resolutions_retention_days`. Returns the number -of rows deleted. +retention window. Returns the number of rows deleted. This function is a superuser-only manual trigger for the same cleanup that the apply worker runs automatically once per day. It is useful for immediate cleanup via `pg_cron` or when the apply worker has not been running. -The function respects both `spock.save_resolutions` and -`spock.resolutions_retention_days`. If either setting disables cleanup -(`save_resolutions = off` or `resolutions_retention_days = 0`), the -function returns `0` without deleting anything. +When `days` is provided it takes precedence over `spock.resolutions_retention_days`, +including when the GUC is set to 0 (automatic cleanup disabled). If `days` is +omitted, the GUC value is used; if the GUC is also 0, the function returns `0` +without deleting anything. ### ARGUMENTS -None. +| Argument | Type | Default | Description | +|----------|------|---------|-------------| +| `days` | `integer` | `NULL` | Retention window in days. Overrides `spock.resolutions_retention_days` for this call. Pass an explicit value to perform a one-off cleanup when automatic cleanup is disabled (`resolutions_retention_days = 0`). | ### EXAMPLE @@ -36,6 +37,10 @@ Delete conflict history rows older than the configured retention window: SELECT spock.cleanup_resolutions(); +Delete rows older than 60 days, regardless of the GUC setting: + + SELECT spock.cleanup_resolutions(60); + ### SEE ALSO -`spock.save_resolutions`, `spock.resolutions_retention_days` +`spock.resolutions_retention_days` diff --git a/sql/spock--5.0.6--6.0.0-devel.sql b/sql/spock--5.0.6--6.0.0-devel.sql index 2ec45d2d..ab8c214a 100644 --- a/sql/spock--5.0.6--6.0.0-devel.sql +++ b/sql/spock--5.0.6--6.0.0-devel.sql @@ -181,10 +181,10 @@ END; CREATE INDEX ON spock.resolutions (log_time); -- Manual cleanup function for the resolutions table -CREATE FUNCTION spock.cleanup_resolutions() +CREATE FUNCTION spock.cleanup_resolutions(days integer DEFAULT NULL) RETURNS bigint VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'spock_cleanup_resolutions_sql'; -REVOKE ALL ON FUNCTION spock.cleanup_resolutions() FROM PUBLIC; +REVOKE ALL ON FUNCTION spock.cleanup_resolutions(integer) FROM PUBLIC; -- ---- -- Subscription conflict statistics diff --git a/sql/spock--6.0.0-devel.sql b/sql/spock--6.0.0-devel.sql index 89d8ec9b..719191b9 100644 --- a/sql/spock--6.0.0-devel.sql +++ b/sql/spock--6.0.0-devel.sql @@ -353,10 +353,10 @@ CREATE TABLE spock.resolutions ( ) WITH (user_catalog_table=true); CREATE INDEX ON spock.resolutions (log_time); -CREATE FUNCTION spock.cleanup_resolutions() +CREATE FUNCTION spock.cleanup_resolutions(days integer DEFAULT NULL) RETURNS bigint VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'spock_cleanup_resolutions_sql'; -REVOKE ALL ON FUNCTION spock.cleanup_resolutions() FROM PUBLIC; +REVOKE ALL ON FUNCTION spock.cleanup_resolutions(integer) FROM PUBLIC; CREATE VIEW spock.TABLES AS WITH set_relations AS ( diff --git a/src/spock.c b/src/spock.c index 582525cf..74519213 100644 --- a/src/spock.c +++ b/src/spock.c @@ -1019,7 +1019,7 @@ _PG_init(void) NULL, &spock_resolutions_retention_days, 100, 0, INT_MAX, - PGC_SUSET, + PGC_SIGHUP, 0, NULL, NULL, NULL); diff --git a/src/spock_conflict.c b/src/spock_conflict.c index b7695991..c9fb4f1e 100644 --- a/src/spock_conflict.c +++ b/src/spock_conflict.c @@ -891,7 +891,7 @@ tuple_to_stringinfo(StringInfo s, TupleDesc tupdesc, HeapTuple tuple) * Errors propagate to the caller; no error suppression here. */ static uint64 -spock_cleanup_resolutions_core(void) +spock_cleanup_resolutions_core(int days) { int ret; uint64 ndeleted; @@ -900,7 +900,7 @@ spock_cleanup_resolutions_core(void) initStringInfo(&cmd); appendStringInfo(&cmd, "DELETE FROM spock.%s WHERE log_time < now() - '%d days'::interval", - CATALOG_LOGTABLE, spock_resolutions_retention_days); + CATALOG_LOGTABLE, days); if (SPI_connect() != SPI_OK_CONNECT) { @@ -968,7 +968,7 @@ spock_cleanup_resolutions(void) PushActiveSnapshot(GetTransactionSnapshot()); /* do the cleanup */ - ndeleted = spock_cleanup_resolutions_core(); + ndeleted = spock_cleanup_resolutions_core(spock_resolutions_retention_days); PopActiveSnapshot(); CommitTransactionCommand(); @@ -1010,20 +1010,28 @@ spock_cleanup_resolutions(void) * SQL-callable entry point. The executor already provides an active * transaction, so we call the core function directly. Any error propagates * to the caller normally — no silent transaction poisoning. + * + * The optional 'days' argument overrides spock.resolutions_retention_days for + * this call. This is useful when automatic cleanup is disabled (retention = 0) + * but the operator wants a one-off purge with a specific retention window. */ PG_FUNCTION_INFO_V1(spock_cleanup_resolutions_sql); Datum spock_cleanup_resolutions_sql(PG_FUNCTION_ARGS) { + int days; + if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to call spock.cleanup_resolutions()"))); - if (spock_resolutions_retention_days <= 0) + days = PG_ARGISNULL(0) ? spock_resolutions_retention_days : PG_GETARG_INT32(0); + + if (days <= 0) PG_RETURN_INT64(0); - PG_RETURN_INT64((int64) spock_cleanup_resolutions_core()); + PG_RETURN_INT64((int64) spock_cleanup_resolutions_core(days)); } /* diff --git a/tests/regress/expected/resolutions_retention.out b/tests/regress/expected/resolutions_retention.out index cbf0af16..3b149cc7 100644 --- a/tests/regress/expected/resolutions_retention.out +++ b/tests/regress/expected/resolutions_retention.out @@ -82,7 +82,13 @@ SELECT COUNT(*) AS total FROM spock.resolutions WHERE relname = 'public.retentio -- Set retention to 30 days: the 60-day-old row falls outside the window -- (60 > 30) so cleanup will delete it. -SET spock.resolutions_retention_days = 30; +ALTER SYSTEM SET spock.resolutions_retention_days = 30; +SELECT pg_reload_conf(); + pg_reload_conf +---------------- + t +(1 row) + SELECT spock.cleanup_resolutions() AS rows_deleted; rows_deleted -------------- @@ -119,7 +125,13 @@ SELECT COUNT(*) AS total FROM spock.resolutions WHERE relname = 'public.retentio UPDATE spock.resolutions SET log_time = now() - '999 days'::interval WHERE relname = 'public.retention_test'; -SET spock.resolutions_retention_days = 0; +ALTER SYSTEM SET spock.resolutions_retention_days = 0; +SELECT pg_reload_conf(); + pg_reload_conf +---------------- + t +(1 row) + SELECT spock.cleanup_resolutions() AS rows_deleted; rows_deleted -------------- @@ -138,7 +150,13 @@ SELECT COUNT(*) AS remaining FROM spock.resolutions WHERE relname = 'public.rete UPDATE spock.resolutions SET log_time = now() - '999 days'::interval WHERE relname = 'public.retention_test'; -SET spock.resolutions_retention_days = 30; +ALTER SYSTEM SET spock.resolutions_retention_days = 30; +SELECT pg_reload_conf(); + pg_reload_conf +---------------- + t +(1 row) + ALTER SYSTEM SET spock.save_resolutions = off; SELECT pg_reload_conf(); pg_reload_conf @@ -189,7 +207,13 @@ SELECT pg_reload_conf(); (1 row) \c :subscriber_dsn -RESET spock.resolutions_retention_days; +ALTER SYSTEM RESET spock.resolutions_retention_days; +SELECT pg_reload_conf(); + pg_reload_conf +---------------- + t +(1 row) + ALTER SYSTEM SET spock.save_resolutions = off; SELECT pg_reload_conf(); pg_reload_conf diff --git a/tests/regress/sql/resolutions_retention.sql b/tests/regress/sql/resolutions_retention.sql index ce3fed96..fae8deba 100644 --- a/tests/regress/sql/resolutions_retention.sql +++ b/tests/regress/sql/resolutions_retention.sql @@ -47,7 +47,8 @@ SELECT COUNT(*) AS total FROM spock.resolutions WHERE relname = 'public.retentio -- Set retention to 30 days: the 60-day-old row falls outside the window -- (60 > 30) so cleanup will delete it. -SET spock.resolutions_retention_days = 30; +ALTER SYSTEM SET spock.resolutions_retention_days = 30; +SELECT pg_reload_conf(); SELECT spock.cleanup_resolutions() AS rows_deleted; -- Expect 0 rows remaining @@ -69,7 +70,8 @@ SELECT COUNT(*) AS total FROM spock.resolutions WHERE relname = 'public.retentio UPDATE spock.resolutions SET log_time = now() - '999 days'::interval WHERE relname = 'public.retention_test'; -SET spock.resolutions_retention_days = 0; +ALTER SYSTEM SET spock.resolutions_retention_days = 0; +SELECT pg_reload_conf(); SELECT spock.cleanup_resolutions() AS rows_deleted; -- Row should still be there @@ -81,7 +83,8 @@ UPDATE spock.resolutions SET log_time = now() - '999 days'::interval WHERE relname = 'public.retention_test'; -SET spock.resolutions_retention_days = 30; +ALTER SYSTEM SET spock.resolutions_retention_days = 30; +SELECT pg_reload_conf(); ALTER SYSTEM SET spock.save_resolutions = off; SELECT pg_reload_conf(); SELECT pg_sleep(1); @@ -101,6 +104,7 @@ ALTER SYSTEM SET spock.save_resolutions = off; SELECT pg_reload_conf(); \c :subscriber_dsn -RESET spock.resolutions_retention_days; +ALTER SYSTEM RESET spock.resolutions_retention_days; +SELECT pg_reload_conf(); ALTER SYSTEM SET spock.save_resolutions = off; SELECT pg_reload_conf(); From 2afc55b2fae1e3e7a3ba8c70be94fc70cc446c30 Mon Sep 17 00:00:00 2001 From: Asif Rehman Date: Sun, 12 Apr 2026 13:57:03 +0500 Subject: [PATCH 11/11] test: fix resolutions_retention for PGC_SIGHUP reload timing PGC_SIGHUP GUC changes require ALTER SYSTEM + pg_reload_conf() and a pg_sleep(1) to ensure the SIGHUP is processed by the backend before cleanup_resolutions() reads the C variable. Without the sleep the reload arrives one statement late, causing wrong rows_deleted counts. Also fix trailing-space alignment in pg_reload_conf and pg_sleep column headers to match actual psql aligned-format output. Co-Authored-By: Claude Sonnet 4.6 --- .../expected/resolutions_retention.out | 26 ++++++++++++++++--- tests/regress/sql/resolutions_retention.sql | 3 +++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/tests/regress/expected/resolutions_retention.out b/tests/regress/expected/resolutions_retention.out index 3b149cc7..0719aeb4 100644 --- a/tests/regress/expected/resolutions_retention.out +++ b/tests/regress/expected/resolutions_retention.out @@ -84,11 +84,17 @@ SELECT COUNT(*) AS total FROM spock.resolutions WHERE relname = 'public.retentio -- (60 > 30) so cleanup will delete it. ALTER SYSTEM SET spock.resolutions_retention_days = 30; SELECT pg_reload_conf(); - pg_reload_conf + pg_reload_conf ---------------- t (1 row) +SELECT pg_sleep(1); + pg_sleep +---------- + +(1 row) + SELECT spock.cleanup_resolutions() AS rows_deleted; rows_deleted -------------- @@ -127,11 +133,17 @@ SET log_time = now() - '999 days'::interval WHERE relname = 'public.retention_test'; ALTER SYSTEM SET spock.resolutions_retention_days = 0; SELECT pg_reload_conf(); - pg_reload_conf + pg_reload_conf ---------------- t (1 row) +SELECT pg_sleep(1); + pg_sleep +---------- + +(1 row) + SELECT spock.cleanup_resolutions() AS rows_deleted; rows_deleted -------------- @@ -152,11 +164,17 @@ SET log_time = now() - '999 days'::interval WHERE relname = 'public.retention_test'; ALTER SYSTEM SET spock.resolutions_retention_days = 30; SELECT pg_reload_conf(); - pg_reload_conf + pg_reload_conf ---------------- t (1 row) +SELECT pg_sleep(1); + pg_sleep +---------- + +(1 row) + ALTER SYSTEM SET spock.save_resolutions = off; SELECT pg_reload_conf(); pg_reload_conf @@ -209,7 +227,7 @@ SELECT pg_reload_conf(); \c :subscriber_dsn ALTER SYSTEM RESET spock.resolutions_retention_days; SELECT pg_reload_conf(); - pg_reload_conf + pg_reload_conf ---------------- t (1 row) diff --git a/tests/regress/sql/resolutions_retention.sql b/tests/regress/sql/resolutions_retention.sql index fae8deba..9a1e9419 100644 --- a/tests/regress/sql/resolutions_retention.sql +++ b/tests/regress/sql/resolutions_retention.sql @@ -49,6 +49,7 @@ SELECT COUNT(*) AS total FROM spock.resolutions WHERE relname = 'public.retentio -- (60 > 30) so cleanup will delete it. ALTER SYSTEM SET spock.resolutions_retention_days = 30; SELECT pg_reload_conf(); +SELECT pg_sleep(1); SELECT spock.cleanup_resolutions() AS rows_deleted; -- Expect 0 rows remaining @@ -72,6 +73,7 @@ SET log_time = now() - '999 days'::interval WHERE relname = 'public.retention_test'; ALTER SYSTEM SET spock.resolutions_retention_days = 0; SELECT pg_reload_conf(); +SELECT pg_sleep(1); SELECT spock.cleanup_resolutions() AS rows_deleted; -- Row should still be there @@ -85,6 +87,7 @@ WHERE relname = 'public.retention_test'; ALTER SYSTEM SET spock.resolutions_retention_days = 30; SELECT pg_reload_conf(); +SELECT pg_sleep(1); ALTER SYSTEM SET spock.save_resolutions = off; SELECT pg_reload_conf(); SELECT pg_sleep(1);