From b5baa551c9d5da2fb5b3aaff14276096e6f4b48f Mon Sep 17 00:00:00 2001 From: Jaideep Karande Date: Tue, 10 Feb 2026 18:50:38 +0530 Subject: [PATCH] PXC-4799: PXC member used for backups fails with Inconsistency when pending DDL clashes with FTRWL Problem: LOCK..BACKUP with FTWRL and DDL on remote node caused inconsistent state. Description: FTWRL caused MDL lock and waited for DDL to finish in the PXC. FTWRL specifically waits in desync_and_pause->pause statement waiting for concurrent pause statement to finish. Resolution: DDLs cannot be executed in parallel with FTWRL in local node it will fail. So on local node ER_CANT_UPDATE_WITH_READLOCK is seen post FTWRL FLUSH TABLES WITH READ LOCK; --error ER_CANT_UPDATE_WITH_READLOCK CREATE TABLE t1 (id INT PRIMARY KEY, a INT); However in PXC remote node not aware of FTWRL on another node accepts the DDL and sends across the cluster opening a pandora of unknown conditions. So, we prevent FLUSH TABLES WITH READ LOCK when the session already holds explicit table locks. We also check that pause will block; with ongoing TO isolation, MDL locks are likely to conflict with FTWRL, so it is best to fail FTWRL. By rejecting FLUSH TABLES WITH READ LOCK in this situation, we avoid the deadlock-like condition and allow the remote DDL to proceed to avoid situations like below: NODE-2 LOCK INSTANCE FOR BACKUP; NODE-1 CREATE TABLE t_synch1 (id INT PRIMARY KEY, a INT); NODE-2 FLUSH TABLES WITH READ LOCK; NODE-2 UNLOCK TABLE; UNLOCK INSTANCE; NOTE: Even with this safeguard, if the DDL waiting for MDL exceeds lock_wait_timeout, the DDL will fail while waiting for the maitenance operations to finish. FLUSH or DDL will not block so the session can execute UNLOCK for the DDL to complete. This behavior is specific to PXC. In standalone PS/PXC servers, such conflicting operations fail naturally, but in multi-node PXC deployments, DDL may arrive from another node, making this protection necessary. --- .gitmodules | 4 +- include/wsrep/provider.hpp | 1 + include/wsrep/server_state.hpp | 12 ++++++ src/server_state.cpp | 76 ++++++++++++++++++++++++++++++++++ src/wsrep_provider_v26.cpp | 7 ++++ src/wsrep_provider_v26.hpp | 1 + wsrep-API/v26 | 2 +- 7 files changed, 100 insertions(+), 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index 43609d0b..30061dfe 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "wsrep-API/v26"] path = wsrep-API/v26 - url = https://github.com/percona/wsrep-API.git - branch = percona-4.x-8.0 + url = https://github.com/jaideepkarande/wsrep-API.git + branch = PXC-4799-4x-80 diff --git a/include/wsrep/provider.hpp b/include/wsrep/provider.hpp index b9f64a81..771af2c2 100644 --- a/include/wsrep/provider.hpp +++ b/include/wsrep/provider.hpp @@ -324,6 +324,7 @@ namespace wsrep virtual int resync() = 0; virtual wsrep::seqno pause() = 0; + virtual wsrep::seqno try_pause() = 0; virtual int resume() = 0; // Applier interface diff --git a/include/wsrep/server_state.hpp b/include/wsrep/server_state.hpp index 17c44ec9..3e0b8186 100644 --- a/include/wsrep/server_state.hpp +++ b/include/wsrep/server_state.hpp @@ -499,6 +499,18 @@ namespace wsrep */ wsrep::seqno desync_and_pause(); + /** + * Try to desync and pause the provider without blocking. + * + * This call relies on provider-level try_pause() which should return + * immediately with a negative value (e.g. -EAGAIN) if pausing would + * block. On such condition this method returns undefined seqno. + * + * @return Pause seqno on success, undefined seqno if pausing would + * block or fails. + */ + wsrep::seqno try_desync_and_pause(); + /** * Resume and resync the provider on one go. Prior this * call the provider must have been both desynced and paused, diff --git a/src/server_state.cpp b/src/server_state.cpp index d337887a..1fe44dbd 100644 --- a/src/server_state.cpp +++ b/src/server_state.cpp @@ -636,6 +636,82 @@ wsrep::seqno wsrep::server_state::desync_and_pause() return ret; } +/* + In try_desync_and_pause function instead of calling + server_state::pause/try_pause we have merged 2 functions into one and added + pause pre-check first followed by desynch and actual try_pause. + So desync_and_pause and try_desync_and_pause functions are same just that + in try_desync_and_pause we call try_pause instead of pause and + handle the case when try_pause returns state where the pausing would block. + In that case we roll back the desync(refer pause, resume and + resume_and_resync function) and return undefined seqno to indicate failure. + If try_pause is successful then we return the pause seqno as before. +*/ +wsrep::seqno wsrep::server_state::try_desync_and_pause() +{ + wsrep::log_info() << "Attempting to desync and pause the provider (non-blocking)"; + + wsrep::unique_lock lock(mutex_); + + if (!pause_seqno_.is_undefined()) + { + wsrep::log_info() << "Provider already paused at: " << pause_seqno_; + return pause_seqno_; + } + + if (state(lock) != s_synced) + { + wsrep::log_info() << "Cannot pause: server not in synced state"; + return wsrep::seqno::undefined(); + } + + while (pause_count_ > 0) + { + cond_.wait(lock); + if (!pause_seqno_.is_undefined()) + { + wsrep::log_info() << "Provider already paused at: " << pause_seqno_; + return pause_seqno_; + } + } + + ++pause_count_; + assert(pause_seqno_.is_undefined()); + lock.unlock(); + + // Desync may fail transiently; tolerate if we can pause. + bool const desync_successful(desync() == 0); + if (!desync_successful) + { + WSREP_LOG_DEBUG(wsrep::log::debug_log_level(), + wsrep::log::debug_level_server_state, + "Failed to desync server before try_pause"); + } + + wsrep::seqno ret(provider().try_pause()); + + lock.lock(); + if (ret.get() < 0) + { + --pause_count_; + cond_.notify_all(); // Not sure this is needed since we never paused. + lock.unlock(); + + if (desync_successful) + { + resync(); + } + return wsrep::seqno::undefined(); + } + + pause_seqno_ = ret; + desynced_on_pause_ = desync_successful; + lock.unlock(); + + wsrep::log_info() << "Provider paused at: " << ret; + return ret; +} + void wsrep::server_state::resume_and_resync() { wsrep::log_info() << "Resuming and resyncing the provider"; diff --git a/src/wsrep_provider_v26.cpp b/src/wsrep_provider_v26.cpp index 2967765f..643aebfd 100644 --- a/src/wsrep_provider_v26.cpp +++ b/src/wsrep_provider_v26.cpp @@ -975,6 +975,13 @@ wsrep::seqno wsrep::wsrep_provider_v26::pause() return wsrep::seqno(wsrep_->pause(wsrep_)); } +wsrep::seqno wsrep::wsrep_provider_v26::try_pause() +{ + return (wsrep_ && wsrep_->try_pause) + ? wsrep::seqno(wsrep_->try_pause(wsrep_)) + : wsrep::seqno::undefined(); +} + int wsrep::wsrep_provider_v26::resume() { return (wsrep_->resume(wsrep_) != WSREP_OK); diff --git a/src/wsrep_provider_v26.hpp b/src/wsrep_provider_v26.hpp index 815f578e..0d2e2774 100644 --- a/src/wsrep_provider_v26.hpp +++ b/src/wsrep_provider_v26.hpp @@ -48,6 +48,7 @@ namespace wsrep int desync() WSREP_OVERRIDE; int resync() WSREP_OVERRIDE; wsrep::seqno pause() WSREP_OVERRIDE; + wsrep::seqno try_pause() WSREP_OVERRIDE; int resume() WSREP_OVERRIDE; enum wsrep::provider::status diff --git a/wsrep-API/v26 b/wsrep-API/v26 index 0c31e780..28330057 160000 --- a/wsrep-API/v26 +++ b/wsrep-API/v26 @@ -1 +1 @@ -Subproject commit 0c31e7805835bf21a0231e773dda5106707999d7 +Subproject commit 28330057b7a1542996b9348031490f47b86a50d9