diff --git a/src/backend/distributed/utils/resource_lock.c b/src/backend/distributed/utils/resource_lock.c index 1dbc84c42b2..9edfc4943a9 100644 --- a/src/backend/distributed/utils/resource_lock.c +++ b/src/backend/distributed/utils/resource_lock.c @@ -405,7 +405,17 @@ LockShardListMetadataOnWorkers(LOCKMODE lockmode, List *shardIntervalList) appendStringInfo(lockCommand, "])"); - SendCommandToWorkersWithMetadata(lockCommand->data); + /* + * Disable idle_in_transaction_session_timeout on metadata workers before + * acquiring locks. In block_writes mode, these connections stay open for + * the entire shard copy which can take hours for large shards. Without + * this, the timeout would kill the connection and fail the move. + * SET LOCAL scopes the change to this transaction only. + */ + List *commandList = list_make2( + "SET LOCAL idle_in_transaction_session_timeout = 0", + lockCommand->data); + SendCommandListToWorkersWithMetadata(commandList); } diff --git a/src/test/regress/expected/shard_move_constraints_blocking.out b/src/test/regress/expected/shard_move_constraints_blocking.out index 66dec069e7a..2d82c0e8278 100644 --- a/src/test/regress/expected/shard_move_constraints_blocking.out +++ b/src/test/regress/expected/shard_move_constraints_blocking.out @@ -399,3 +399,109 @@ drop cascades to table "blocking shard Move Fkeys Indexes".reference_table drop cascades to table "blocking shard Move Fkeys Indexes".reference_table_8970028 drop cascades to table "blocking shard Move Fkeys Indexes".index_backed_rep_identity DROP ROLE mx_rebalancer_blocking_role_ent; +-- Test: block_writes shard move succeeds even when workers have a low +-- idle_in_transaction_session_timeout. LockShardListMetadataOnWorkers opens +-- coordinated transactions on ALL metadata workers before the data copy. +-- Workers not involved in the copy sit idle-in-transaction for the entire +-- duration. Without the SET LOCAL override, the timeout would kill those +-- connections and fail the move. +SET citus.next_shard_id TO 8980000; +SET citus.shard_count TO 4; +SET citus.shard_replication_factor TO 1; +CREATE SCHEMA blocking_move_idle_timeout; +SET search_path TO blocking_move_idle_timeout; +-- set a very low idle_in_transaction_session_timeout on all nodes +SELECT 1 FROM run_command_on_all_nodes( + 'ALTER SYSTEM SET idle_in_transaction_session_timeout = ''1s'''); + ?column? +--------------------------------------------------------------------- + 1 + 1 + 1 +(3 rows) + +SELECT 1 FROM run_command_on_all_nodes('SELECT pg_reload_conf()'); + ?column? +--------------------------------------------------------------------- + 1 + 1 + 1 +(3 rows) + +-- allow the reload to take effect +SELECT pg_sleep(0.5); + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +-- Helper that sleeps for the given number of seconds and returns TRUE. +-- Used in a NOT VALID check constraint below so that COPY (which fires +-- check constraints) introduces a per-row delay during the shard move, +-- making the data-copy phase reliably exceed the 1s timeout. +CREATE FUNCTION sleep_and_true(float8) RETURNS boolean LANGUAGE plpgsql AS $$ +BEGIN + PERFORM pg_sleep($1); + RETURN true; +END; +$$; +CREATE FUNCTION +CREATE TABLE test_move(id int PRIMARY KEY, val text); +SELECT create_distributed_table('test_move', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO test_move SELECT i, 'val_' || i FROM generate_series(1, 100) i; +-- Add a per-row delay constraint after inserting data (NOT VALID skips +-- checking existing rows). COPY during the shard move fires check constraints, +-- so each copied row will sleep for 0.1 s, ensuring the copy takes > 1 s and +-- the idle timeout would kill uninvolved metadata workers without the fix. +-- Use the schema-qualified function name so the constraint propagates to +-- workers correctly regardless of their search_path. +ALTER TABLE test_move ADD CONSTRAINT slow_copy + CHECK (blocking_move_idle_timeout.sleep_and_true(0.1)) NOT VALID; +ALTER TABLE +-- move a shard using block_writes; should succeed despite the 1s timeout +SELECT citus_move_shard_placement(8980000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='block_writes'); + citus_move_shard_placement +--------------------------------------------------------------------- + +(1 row) + +SELECT public.wait_for_resource_cleanup(); + wait_for_resource_cleanup +--------------------------------------------------------------------- + +(1 row) + +-- verify data integrity after move +SELECT count(*) FROM test_move; + count +--------------------------------------------------------------------- + 100 +(1 row) + +-- cleanup: restore idle_in_transaction_session_timeout +SELECT 1 FROM run_command_on_all_nodes( + 'ALTER SYSTEM RESET idle_in_transaction_session_timeout'); + ?column? +--------------------------------------------------------------------- + 1 + 1 + 1 +(3 rows) + +SELECT 1 FROM run_command_on_all_nodes('SELECT pg_reload_conf()'); + ?column? +--------------------------------------------------------------------- + 1 + 1 + 1 +(3 rows) + +DROP SCHEMA blocking_move_idle_timeout CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to function sleep_and_true(double precision) +drop cascades to table test_move diff --git a/src/test/regress/sql/shard_move_constraints_blocking.sql b/src/test/regress/sql/shard_move_constraints_blocking.sql index 66b58f42b9c..af8312ac76e 100644 --- a/src/test/regress/sql/shard_move_constraints_blocking.sql +++ b/src/test/regress/sql/shard_move_constraints_blocking.sql @@ -222,3 +222,61 @@ ALTER TABLE sensors_2020_01_01 DROP CONSTRAINT fkey_from_child_to_child; \c - postgres - :master_port DROP SCHEMA "blocking shard Move Fkeys Indexes" CASCADE; DROP ROLE mx_rebalancer_blocking_role_ent; + +-- Test: block_writes shard move succeeds even when workers have a low +-- idle_in_transaction_session_timeout. LockShardListMetadataOnWorkers opens +-- coordinated transactions on ALL metadata workers before the data copy. +-- Workers not involved in the copy sit idle-in-transaction for the entire +-- duration. Without the SET LOCAL override, the timeout would kill those +-- connections and fail the move. +SET citus.next_shard_id TO 8980000; +SET citus.shard_count TO 4; +SET citus.shard_replication_factor TO 1; + +CREATE SCHEMA blocking_move_idle_timeout; +SET search_path TO blocking_move_idle_timeout; + +-- set a very low idle_in_transaction_session_timeout on all nodes +SELECT 1 FROM run_command_on_all_nodes( + 'ALTER SYSTEM SET idle_in_transaction_session_timeout = ''1s'''); +SELECT 1 FROM run_command_on_all_nodes('SELECT pg_reload_conf()'); +-- allow the reload to take effect +SELECT pg_sleep(0.5); + +-- Helper that sleeps for the given number of seconds and returns TRUE. +-- Used in a NOT VALID check constraint below so that COPY (which fires +-- check constraints) introduces a per-row delay during the shard move, +-- making the data-copy phase reliably exceed the 1s timeout. +CREATE FUNCTION sleep_and_true(float8) RETURNS boolean LANGUAGE plpgsql AS $$ +BEGIN + PERFORM pg_sleep($1); + RETURN true; +END; +$$; + +CREATE TABLE test_move(id int PRIMARY KEY, val text); +SELECT create_distributed_table('test_move', 'id'); +INSERT INTO test_move SELECT i, 'val_' || i FROM generate_series(1, 100) i; + +-- Add a per-row delay constraint after inserting data (NOT VALID skips +-- checking existing rows). COPY during the shard move fires check constraints, +-- so each copied row will sleep for 0.1 s, ensuring the copy takes > 1 s and +-- the idle timeout would kill uninvolved metadata workers without the fix. +-- Use the schema-qualified function name so the constraint propagates to +-- workers correctly regardless of their search_path. +ALTER TABLE test_move ADD CONSTRAINT slow_copy + CHECK (blocking_move_idle_timeout.sleep_and_true(0.1)) NOT VALID; + +-- move a shard using block_writes; should succeed despite the 1s timeout +SELECT citus_move_shard_placement(8980000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='block_writes'); +SELECT public.wait_for_resource_cleanup(); + +-- verify data integrity after move +SELECT count(*) FROM test_move; + +-- cleanup: restore idle_in_transaction_session_timeout +SELECT 1 FROM run_command_on_all_nodes( + 'ALTER SYSTEM RESET idle_in_transaction_session_timeout'); +SELECT 1 FROM run_command_on_all_nodes('SELECT pg_reload_conf()'); + +DROP SCHEMA blocking_move_idle_timeout CASCADE;