-
Notifications
You must be signed in to change notification settings - Fork 271
feat: add checkpoint/resume system #196
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
giveen
wants to merge
7
commits into
SnaffCon:master
Choose a base branch
from
giveen:checkpoint
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
1b73518
feat: add checkpoint/resume system
giveen 36ac4cc
build: add compiled Release exe and cross-platform build fixes
giveen 05a50f3
fix: move checkpoint mark to end of WalkTree; add load-time dedup
giveen c15d40e
fix: resolve two resume bugs
giveen 317923a
Fix checkpoint review issues
giveen a90e58a
fix: checkpoint correctness bugs in SCCM early-return and partial-loa…
giveen 8ce9631
fix: minor correctness and noise fixes
giveen File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| using System; | ||
| using System.Collections.Generic; | ||
| using System.Runtime.Serialization; | ||
|
|
||
| namespace SnaffCore.Checkpoint | ||
| { | ||
| /// <summary> | ||
| /// Serializable state snapshot for checkpoint/resume support. | ||
| /// Tracks which directories and computers have already been processed so | ||
| /// a resumed run can skip them entirely. | ||
| /// </summary> | ||
| [DataContract] | ||
| public class CheckpointData | ||
| { | ||
| /// <summary>When this checkpoint was written.</summary> | ||
| [DataMember] | ||
| public DateTime CheckpointTime { get; set; } | ||
|
|
||
| /// <summary> | ||
| /// Full UNC / local paths of every directory whose tree-walk has been | ||
| /// fully dispatched. On resume, any path in this set is skipped by TreeWalker. | ||
| /// </summary> | ||
| [DataMember] | ||
| public List<string> ScannedDirectories { get; set; } = new List<string>(); | ||
|
|
||
| /// <summary> | ||
| /// Hostnames / IPs of every computer whose share-discovery has been | ||
| /// completed. On resume, any computer in this set is skipped by | ||
| /// ShareFinder. | ||
| /// </summary> | ||
| [DataMember] | ||
| public List<string> ScannedComputers { get; set; } = new List<string>(); | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,297 @@ | ||
| using System; | ||
| using System.Collections.Concurrent; | ||
| using System.IO; | ||
| using System.Runtime.Serialization.Json; | ||
| using System.Text; | ||
| using SnaffCore.Concurrency; | ||
|
|
||
| namespace SnaffCore.Checkpoint | ||
| { | ||
| /// <summary> | ||
| /// Thread-safe singleton that tracks scan progress and handles periodic | ||
| /// checkpointing to disk. Use <see cref="Initialize"/> before starting a | ||
| /// scan and <see cref="GetInstance"/> everywhere else. | ||
| /// </summary> | ||
| public class CheckpointManager | ||
| { | ||
| // ------------------------------------------------------------------ // | ||
| // Singleton plumbing // | ||
| // ------------------------------------------------------------------ // | ||
|
|
||
| private static volatile CheckpointManager _instance; | ||
| private static readonly object _createLock = new object(); | ||
|
|
||
| public static CheckpointManager GetInstance() => _instance; | ||
|
|
||
| /// <summary> | ||
| /// Create (and optionally restore) the singleton. | ||
| /// Called once during <see cref="SnaffCore.SnaffCon"/> construction. | ||
| /// </summary> | ||
| /// <param name="checkpointFilePath">Path to write / read checkpoint JSON.</param> | ||
| public static CheckpointManager Initialize(string checkpointFilePath) | ||
| { | ||
| lock (_createLock) | ||
| { | ||
| _instance = new CheckpointManager(checkpointFilePath); | ||
| return _instance; | ||
| } | ||
| } | ||
|
|
||
| // ------------------------------------------------------------------ // | ||
| // State // | ||
| // ------------------------------------------------------------------ // | ||
|
|
||
| // Use ConcurrentDictionary as a thread-safe HashSet. | ||
| private readonly ConcurrentDictionary<string, byte> _scannedDirectories | ||
| = new ConcurrentDictionary<string, byte>(StringComparer.OrdinalIgnoreCase); | ||
|
|
||
| private readonly ConcurrentDictionary<string, byte> _scannedComputers | ||
| = new ConcurrentDictionary<string, byte>(StringComparer.OrdinalIgnoreCase); | ||
|
|
||
| private readonly string _filePath; | ||
| private readonly object _saveLock = new object(); | ||
|
|
||
| // ------------------------------------------------------------------ // | ||
| // Public properties // | ||
| // ------------------------------------------------------------------ // | ||
|
|
||
| public string FilePath => _filePath; | ||
| public bool IsRestoring { get; private set; } | ||
|
|
||
| /// <summary>How many directories are recorded in this session so far.</summary> | ||
| public int ScannedDirectoryCount => _scannedDirectories.Count; | ||
| /// <summary>How many computers are recorded in this session so far.</summary> | ||
| public int ScannedComputerCount => _scannedComputers.Count; | ||
|
|
||
| // ------------------------------------------------------------------ // | ||
| // Constructor // | ||
| // ------------------------------------------------------------------ // | ||
|
|
||
| private CheckpointManager(string filePath) | ||
| { | ||
| // If the caller supplied a directory path (e.g. "." or "C:\Logs"), | ||
| // automatically create a file inside it rather than trying to | ||
| // treat the directory itself as the checkpoint file. | ||
| if (Directory.Exists(filePath)) | ||
| filePath = Path.Combine(filePath, "snaffler_checkpoint.json"); | ||
|
|
||
| _filePath = filePath; | ||
|
|
||
| if (File.Exists(filePath)) | ||
| { | ||
| TryLoad(); | ||
| } | ||
| } | ||
|
|
||
| // ------------------------------------------------------------------ // | ||
| // Directory tracking // | ||
| // ------------------------------------------------------------------ // | ||
|
|
||
| /// <summary> | ||
| /// Returns true if this directory has already been processed in a | ||
| /// previous (or the current) session. | ||
| /// </summary> | ||
| public bool IsDirectoryScanned(string path) | ||
| { | ||
| return !string.IsNullOrWhiteSpace(path) && | ||
| _scannedDirectories.ContainsKey(NormalisePath(path)); | ||
| } | ||
|
|
||
| /// <summary>Mark a directory as having been entered / processed.</summary> | ||
| public void MarkDirectoryScanned(string path) | ||
| { | ||
| if (!string.IsNullOrWhiteSpace(path)) | ||
| _scannedDirectories.TryAdd(NormalisePath(path), 0); | ||
| } | ||
|
|
||
| // ------------------------------------------------------------------ // | ||
| // Computer tracking // | ||
| // ------------------------------------------------------------------ // | ||
|
|
||
| /// <summary> | ||
| /// Returns true if this computer's shares have already been discovered | ||
| /// in a previous session. | ||
| /// </summary> | ||
| public bool IsComputerScanned(string computer) | ||
| { | ||
| return !string.IsNullOrWhiteSpace(computer) && | ||
| _scannedComputers.ContainsKey(NormaliseHost(computer)); | ||
| } | ||
|
|
||
| /// <summary>Mark a computer as having had its shares discovered.</summary> | ||
| public void MarkComputerScanned(string computer) | ||
| { | ||
| if (!string.IsNullOrWhiteSpace(computer)) | ||
| _scannedComputers.TryAdd(NormaliseHost(computer), 0); | ||
| } | ||
|
|
||
| // ------------------------------------------------------------------ // | ||
| // Persistence // | ||
| // ------------------------------------------------------------------ // | ||
|
|
||
| /// <summary> | ||
| /// Atomically write a checkpoint file to disk. Safe to call from any | ||
| /// thread – surplus concurrent calls are serialised by a lock so no | ||
| /// data is lost. | ||
| /// </summary> | ||
| public void SaveCheckpoint() | ||
| { | ||
| lock (_saveLock) | ||
| { | ||
| try | ||
| { | ||
| var data = new CheckpointData | ||
| { | ||
| CheckpointTime = DateTime.UtcNow, | ||
| ScannedDirectories = new System.Collections.Generic.List<string>(_scannedDirectories.Keys), | ||
| ScannedComputers = new System.Collections.Generic.List<string>(_scannedComputers.Keys), | ||
| }; | ||
|
|
||
| string json = Serialise(data); | ||
|
|
||
| // Write to a temp file first, then atomically replace the | ||
| // destination – avoids a corrupt checkpoint if the process | ||
| // is killed mid-write. | ||
| // File.Replace performs an atomic swap on NTFS and keeps a | ||
| // .bak as a safety net. On the very first save the | ||
| // destination does not yet exist, so File.Move is used | ||
| // instead (also atomic on the same volume). | ||
| string tmp = _filePath + ".tmp"; | ||
| File.WriteAllText(tmp, json, Encoding.UTF8); | ||
| if (File.Exists(_filePath)) | ||
| { | ||
| File.Replace(tmp, _filePath, _filePath + ".bak"); | ||
| } | ||
| else | ||
| { | ||
| File.Move(tmp, _filePath); | ||
| } | ||
|
|
||
| BlockingMq.GetMq()?.Info( | ||
| string.Format("[Checkpoint] Saved checkpoint ({0} dirs, {1} computers) → {2}", | ||
| data.ScannedDirectories.Count, | ||
| data.ScannedComputers.Count, | ||
| _filePath)); | ||
| } | ||
| catch (Exception ex) | ||
| { | ||
| BlockingMq.GetMq()?.Error("[Checkpoint] Failed to save checkpoint: " + ex.Message); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // ------------------------------------------------------------------ // | ||
| // Private helpers // | ||
| // ------------------------------------------------------------------ // | ||
|
|
||
| private void TryLoad() | ||
| { | ||
| try | ||
| { | ||
| string json = File.ReadAllText(_filePath, Encoding.UTF8); | ||
| CheckpointData data = Deserialise(json); | ||
| if (data == null) return; | ||
|
|
||
| foreach (string d in data.ScannedDirectories ?? new System.Collections.Generic.List<string>()) | ||
| _scannedDirectories.TryAdd(NormalisePath(d), 0); | ||
|
|
||
| foreach (string c in data.ScannedComputers ?? new System.Collections.Generic.List<string>()) | ||
| _scannedComputers.TryAdd(NormaliseHost(c), 0); | ||
|
|
||
| // Deduplication: remove child-directory entries whose parent is | ||
| // also in the completed set. If a parent dir is marked complete, | ||
| // WalkTree will skip it entirely — the child entries are dead weight | ||
| // that will never be checked. Pruning them here keeps the in-memory | ||
| // set lean and speeds up future IsDirectoryScanned lookups. | ||
| // Example: if both \\srv\share AND \\srv\share\sub are present, | ||
| // \\srv\share\sub is redundant and can be removed. | ||
| // | ||
| // Algorithm: sort the keys so that every descendant of a path | ||
| // immediately follows it, then do a single linear pass. | ||
| // Naïve lexicographic order is NOT sufficient here because the | ||
| // path-separator character '\' (ASCII 92) sorts after digits and | ||
| // uppercase letters, which would interleave children with siblings | ||
| // (e.g. "SHARE2" < "SHARE\A" in ordinal order). To fix this, we | ||
| // sort by a transformed key where both separators are replaced with | ||
| // '\x01' (ASCII 1, lower than every printable character) so that | ||
| // child paths always follow their parent in the sorted sequence. | ||
| var sortedKeys = new System.Collections.Generic.List<string>(_scannedDirectories.Keys); | ||
| sortedKeys.Sort((a, b) => | ||
| string.Compare( | ||
| a.Replace('\\', '\x01').Replace('/', '\x01'), | ||
| b.Replace('\\', '\x01').Replace('/', '\x01'), | ||
| StringComparison.Ordinal)); | ||
|
|
||
| var toRemove = new System.Collections.Generic.List<string>(); | ||
| string lastKept = null; | ||
| foreach (string dir in sortedKeys) | ||
| { | ||
| if (lastKept != null && | ||
| (dir.StartsWith(lastKept + "\\", StringComparison.OrdinalIgnoreCase) || | ||
| dir.StartsWith(lastKept + "/", StringComparison.OrdinalIgnoreCase))) | ||
| { | ||
| // dir is a descendant of lastKept – redundant. | ||
| // Do NOT update lastKept so that deeper descendants | ||
| // are still caught by the same ancestor check. | ||
| toRemove.Add(dir); | ||
| } | ||
| else | ||
| { | ||
| lastKept = dir; | ||
| } | ||
| } | ||
| foreach (string redundant in toRemove) | ||
| { | ||
| byte dummy; | ||
| _scannedDirectories.TryRemove(redundant, out dummy); | ||
| } | ||
|
|
||
| IsRestoring = true; | ||
|
|
||
| Console.WriteLine(string.Format( | ||
| "[Checkpoint] Loaded checkpoint from {0} (written {1} UTC).", | ||
| _filePath, | ||
| data.CheckpointTime.ToString("u"))); | ||
| Console.WriteLine(string.Format( | ||
| "[Checkpoint] Resuming – will skip {0} directories and {1} computers ({2} redundant dir entries pruned).", | ||
| _scannedDirectories.Count, | ||
| _scannedComputers.Count, | ||
| toRemove.Count)); | ||
| } | ||
| catch (Exception ex) | ||
| { | ||
| Console.WriteLine("[Checkpoint] WARNING – could not load checkpoint (" + ex.Message + "). Starting fresh."); | ||
| _scannedDirectories.Clear(); | ||
| _scannedComputers.Clear(); | ||
| IsRestoring = false; | ||
| } | ||
| } | ||
|
|
||
| private static string NormalisePath(string p) => | ||
| p.TrimEnd('\\', '/').ToUpperInvariant(); | ||
|
|
||
| private static string NormaliseHost(string h) => | ||
| h.Trim().ToLowerInvariant(); | ||
|
|
||
| // Use DataContractJsonSerializer – no extra NuGet dependency required. | ||
| private static string Serialise(CheckpointData data) | ||
| { | ||
| var ser = new DataContractJsonSerializer(typeof(CheckpointData)); | ||
| using (var ms = new MemoryStream()) | ||
| { | ||
| ser.WriteObject(ms, data); | ||
| return Encoding.UTF8.GetString(ms.ToArray()); | ||
| } | ||
| } | ||
|
|
||
| private static CheckpointData Deserialise(string json) | ||
| { | ||
| var ser = new DataContractJsonSerializer(typeof(CheckpointData)); | ||
| byte[] bytes = Encoding.UTF8.GetBytes(json); | ||
| using (var ms = new MemoryStream(bytes)) | ||
| { | ||
| return (CheckpointData)ser.ReadObject(ms); | ||
| } | ||
| } | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.