diff --git a/simplyblock_core/controllers/lvol_controller.py b/simplyblock_core/controllers/lvol_controller.py index 87fd2ad99..c8917abbb 100755 --- a/simplyblock_core/controllers/lvol_controller.py +++ b/simplyblock_core/controllers/lvol_controller.py @@ -685,7 +685,16 @@ def add_lvol_ha(name, size, host_id_or_name, ha_type, pool_id_or_name, use_comp= logger.exception(msg) return False, msg - lvol.write_to_db(db_controller.kv_store) + if lvol.namespace: + # Namespaced lvol: verify the subsystem slot count atomically before + # writing. FDB's OCC retries the losing transaction when two parallel + # creates race for the last free slot, so max_namespace_per_subsys is + # never exceeded. + if not db_controller.write_lvol_with_ns_check(lvol): + logger.error("Subsystem %s reached capacity during concurrent create", lvol.nqn) + return False, "Subsystem namespace limit reached concurrently; retry" + else: + lvol.write_to_db(db_controller.kv_store) if ha_type == "single": if host_node.status == StorageNode.STATUS_ONLINE: diff --git a/simplyblock_core/controllers/snapshot_controller.py b/simplyblock_core/controllers/snapshot_controller.py index e727e1838..01f5bd8b0 100755 --- a/simplyblock_core/controllers/snapshot_controller.py +++ b/simplyblock_core/controllers/snapshot_controller.py @@ -749,7 +749,16 @@ def clone(snapshot_id, clone_name, new_size=0, pvc_name=None, pvc_namespace=None logger.exception(msg) return False, msg - lvol.write_to_db(db_controller.kv_store) + if lvol.namespace: + # Namespaced clone: verify the subsystem slot count atomically before + # writing. FDB's OCC retries the losing transaction when two parallel + # clones race for the last free slot, so max_namespace_per_subsys is + # never exceeded. + if not db_controller.write_lvol_with_ns_check(lvol): + logger.error("Subsystem %s reached capacity during concurrent clone", lvol.nqn) + return False, "Subsystem namespace limit reached concurrently; retry the clone" + else: + lvol.write_to_db(db_controller.kv_store) if lvol.ha_type == "single": lvol_bdev, error = lvol_controller.add_lvol_on_node(lvol, snode) diff --git a/simplyblock_core/db_controller.py b/simplyblock_core/db_controller.py index e3e27fc00..309bb65c9 100644 --- a/simplyblock_core/db_controller.py +++ b/simplyblock_core/db_controller.py @@ -427,6 +427,41 @@ def release_backup_chain_locks(self, snapshot_ids): transactional = fdb.transactional(DBController._release_backup_chain_locks_tx) transactional(self, self.kv_store, ordered_snapshot_ids) + # ---- Subsystem Namespace Slot Guard (Single FDB Transaction) ---- + + def _write_lvol_with_ns_check_tx(self, tr, node_id, nqn, max_ns, lvol_key, lvol_data): + """Count active namespaces for *nqn* on *node_id* inside a single FDB + transaction and write the new lvol record only if the subsystem still + has room. Because the range-read and the write share one transaction, + FDB's OCC serialises concurrent writers to the same subsystem + automatically — no explicit lock needed. + Returns False (without writing) when the subsystem is already full. + """ + live = 0 + for _, v in tr.get_range_startswith(b"object/LVol/"): + d = json.loads(v) + if (d.get("node_id") == node_id + and d.get("nqn") == nqn + and d.get("status") not in (LVol.STATUS_IN_DELETION, LVol.STATUS_DELETED)): + live += 1 + if live >= max_ns: + return False + tr[lvol_key] = lvol_data + return True + + def write_lvol_with_ns_check(self, lvol): + """Write a namespaced lvol to FDB while atomically verifying the + per-subsystem namespace limit is not exceeded. Use in place of + lvol.write_to_db() when lvol.namespace is set (namespaced placement). + """ + fn = fdb.transactional(DBController._write_lvol_with_ns_check_tx) + return fn( + self, self.kv_store, + lvol.node_id, lvol.nqn, lvol.max_namespace_per_subsys, + lvol.get_db_id().encode(), + json.dumps(lvol.to_dict()).encode(), + ) + # ---- Pre-Restart Guard (Single FDB Transaction) ---- def _try_set_node_restarting_tx(self, tr, cluster_id, node_id):