From 124673e3aa33e48da119f8496c78753de9818182 Mon Sep 17 00:00:00 2001 From: Apinant U-suwantim Date: Fri, 13 Feb 2026 07:04:45 +0700 Subject: [PATCH] fix: retry mkfs on next reconciliation if interrupted Both MkfsUtils.makeFileSystemOnMarked() and DrbdLayer.condInitialOrSkipSync() clear their one-shot gate flags before mkfs runs. If mkfs is interrupted (timeout or failure while the satellite stays running), the flags are already cleared and mkfs is never retried, leaving the DRBD device without a filesystem and the volume stuck in FailedMount. Move both flags to after mkfs succeeds: - MkfsUtils: move disableCheckFileSystem() from before the mkfs loop to after it completes. If mkfs throws, the exception exits the method before the flag is cleared, so the next reconciliation retries. - DrbdLayer: move unsetCreatePrimary() from before the mkfs block to after it completes. This keeps the createPrimary gate open on failure so the DRBD path re-enters on the next device manager run. The existing blkid check (hasFileSystem) already guards against reformatting volumes that have a filesystem, so successfully formatted volumes from a partial run are not reformatted. --- .../src/main/java/com/linbit/linstor/layer/drbd/DrbdLayer.java | 2 +- .../main/java/com/linbit/linstor/storage/utils/MkfsUtils.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/satellite/src/main/java/com/linbit/linstor/layer/drbd/DrbdLayer.java b/satellite/src/main/java/com/linbit/linstor/layer/drbd/DrbdLayer.java index 31d9b5c56..e0e4bd613 100644 --- a/satellite/src/main/java/com/linbit/linstor/layer/drbd/DrbdLayer.java +++ b/satellite/src/main/java/com/linbit/linstor/layer/drbd/DrbdLayer.java @@ -1770,7 +1770,6 @@ private void condInitialOrSkipSync(DrbdRscData drbdRscData) // Set the resource primary (--force) to trigger an initial sync of all // fat provisioned volumes - rsc.unsetCreatePrimary(); if (haveFatVlm) { errorReporter.logTrace("Setting resource primary on %s", drbdRscData.getSuffixedResourceName()); @@ -1795,6 +1794,7 @@ private void condInitialOrSkipSync(DrbdRscData drbdRscData) throw new StorageException("Failed to become secondary again after creating filesystem", exc); } } + rsc.unsetCreatePrimary(); } } catch (InvalidKeyException invalidKeyExc) diff --git a/server/src/main/java/com/linbit/linstor/storage/utils/MkfsUtils.java b/server/src/main/java/com/linbit/linstor/storage/utils/MkfsUtils.java index d23ec534d..f9ad9c228 100644 --- a/server/src/main/java/com/linbit/linstor/storage/utils/MkfsUtils.java +++ b/server/src/main/java/com/linbit/linstor/storage/utils/MkfsUtils.java @@ -133,7 +133,6 @@ public static void makeFileSystemOnMarked( { if (rsc.getLayerData(wrkCtx).checkFileSystem()) { - rsc.getLayerData(wrkCtx).disableCheckFileSystem(); for (AbsVolume vlm : rsc.streamVolumes().collect(Collectors.toList())) { VolumeDefinition vlmDfn = vlm.getVolumeDefinition(); @@ -251,6 +250,7 @@ public static void makeFileSystemOnMarked( // else Check for mismatch? } } + rsc.getLayerData(wrkCtx).disableCheckFileSystem(); } }