Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 172 additions & 0 deletions KtsuTools.FileDedupe/FileDedupeService.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
// Copyright (c) ktsu.dev
// All rights reserved.
// Licensed under the MIT license.

namespace KtsuTools.FileDedupe;

using System.Collections.Concurrent;
using System.Collections.ObjectModel;
using System.Security.Cryptography;
using ktsu.Semantics.Paths;
using Spectre.Console;

/// <summary>
/// A set of files with byte-identical content, identified by SHA256.
/// </summary>
public sealed record DuplicateGroup(string Hash, long FileSize, Collection<string> Files);

/// <summary>
/// Result of a dedupe planning pass.
/// </summary>
public sealed record DedupePlan(
IReadOnlyList<DuplicateGroup> Groups,
IReadOnlyList<string> Keepers,
IReadOnlyList<string> Removals)
{
public long WastedBytes { get; } = ComputeWastedBytes(Groups);

private static long ComputeWastedBytes(IReadOnlyList<DuplicateGroup> groups)
{
long total = 0;
foreach (DuplicateGroup g in groups)
{
total += g.FileSize * (g.Files.Count - 1);
}

return total;
}
}

public sealed record DedupeStats(
int FilesScanned,
int DuplicateGroups,
int RedundantFiles,
long WastedBytes,
Dictionary<string, int> CountByExtension);

/// <summary>
/// Scans a directory tree, groups files by SHA256, and applies (or previews)
/// "shortest filename wins" deduplication.
/// </summary>
public class FileDedupeService
{
#pragma warning disable CA1822 // instance method required for DI injection
public async Task<DedupePlan> PlanAsync(AbsoluteDirectoryPath path, CancellationToken ct = default)
#pragma warning restore CA1822
{
Ensure.NotNull(path);

string root = path.ToString();
if (!Directory.Exists(root))
{
AnsiConsole.MarkupLine($"[red]Error: Directory '{root.EscapeMarkup()}' does not exist.[/]");
return new DedupePlan([], [], []);
}

string[] files = Directory.GetFiles(root, "*", SearchOption.AllDirectories);

ConcurrentDictionary<string, ConcurrentBag<(string Path, long Size)>> byHash = new();

await Parallel.ForEachAsync(
files,
new ParallelOptions { CancellationToken = ct, MaxDegreeOfParallelism = Environment.ProcessorCount },
async (file, token) =>
{
try
{
await using FileStream stream = File.OpenRead(file);
byte[] hashBytes = await SHA256.HashDataAsync(stream, token).ConfigureAwait(false);
string hash = Convert.ToHexString(hashBytes);
long size = new FileInfo(file).Length;

ConcurrentBag<(string Path, long Size)> bag = byHash.GetOrAdd(hash, _ => []);
bag.Add((file, size));
}
catch (IOException)
{
// Skip unreadable files (locked, deleted mid-scan, etc.).
}
catch (UnauthorizedAccessException)
{
}
}).ConfigureAwait(false);

List<DuplicateGroup> groups = [];
List<string> keepers = [];
List<string> removals = [];

foreach ((string hash, ConcurrentBag<(string Path, long Size)> entries) in byHash)
{
if (entries.Count < 2)
{
continue;
}

List<string> paths = [.. entries.Select(e => e.Path)
.OrderBy(p => Path.GetFileName(p).Length)
.ThenBy(p => p, StringComparer.OrdinalIgnoreCase)];

long size = entries.First().Size;
groups.Add(new DuplicateGroup(hash, size, [.. paths]));

keepers.Add(paths[0]);
removals.AddRange(paths.Skip(1));
}

return new DedupePlan(groups, keepers, removals);
}

#pragma warning disable CA1822
public DedupeStats Summarize(DedupePlan plan, int filesScanned)
#pragma warning restore CA1822
{
Ensure.NotNull(plan);

Dictionary<string, int> byExt = new(StringComparer.OrdinalIgnoreCase);
int redundant = 0;

foreach (DuplicateGroup group in plan.Groups)
{
redundant += group.Files.Count - 1;
foreach (string file in group.Files)
{
string ext = Path.GetExtension(file);
if (string.IsNullOrEmpty(ext))
{
ext = "(none)";
}

byExt[ext] = byExt.TryGetValue(ext, out int c) ? c + 1 : 1;
}
}

return new DedupeStats(filesScanned, plan.Groups.Count, redundant, plan.WastedBytes, byExt);
}

#pragma warning disable CA1822
public int DeleteRedundant(DedupePlan plan)
#pragma warning restore CA1822
{
Ensure.NotNull(plan);

int deleted = 0;
foreach (string path in plan.Removals)
{
try
{
File.Delete(path);
deleted++;
}
catch (IOException ex)
{
AnsiConsole.MarkupLine($" [yellow]skip[/] {path.EscapeMarkup()}: {ex.Message.EscapeMarkup()}");
}
catch (UnauthorizedAccessException ex)
{
AnsiConsole.MarkupLine($" [yellow]skip[/] {path.EscapeMarkup()}: {ex.Message.EscapeMarkup()}");
}
}

return deleted;
}
}
19 changes: 19 additions & 0 deletions KtsuTools.FileDedupe/KtsuTools.FileDedupe.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<Project>
<Sdk Name="Microsoft.NET.Sdk" />
<Sdk Name="ktsu.Sdk" />

<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<TargetFrameworks></TargetFrameworks>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Spectre.Console" />
<PackageReference Include="ktsu.Semantics.Paths" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\KtsuTools.Core\KtsuTools.Core.csproj" />
</ItemGroup>

</Project>
48 changes: 48 additions & 0 deletions KtsuTools.Merge/DiffStyleParser.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright (c) ktsu.dev
// All rights reserved.
// Licensed under the MIT license.

namespace KtsuTools.Merge;

using System;

/// <summary>
/// Maps the string form of <see cref="DiffStyle"/> used in CLI flags and persisted batch
/// configs (e.g. "side-by-side", "git") to the enum and back.
/// </summary>
public static class DiffStyleParser
{
public const string SideBySideName = "side-by-side";
public const string GitName = "git";

public static bool TryParse(string? value, out DiffStyle style)
{
if (string.IsNullOrWhiteSpace(value))
{
style = DiffStyle.SideBySide;
return true;
}

switch (value.Trim().ToLowerInvariant())
{
case SideBySideName:
case "sidebyside":
case "side":
style = DiffStyle.SideBySide;
return true;
case GitName:
case "unified":
style = DiffStyle.Git;
return true;
default:
style = DiffStyle.SideBySide;
return false;
}
}

public static string ToCanonicalString(DiffStyle style) => style switch
{
DiffStyle.Git => GitName,
_ => SideBySideName,
};
}
84 changes: 75 additions & 9 deletions KtsuTools.Merge/MergeService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,18 @@ public enum BlockChoice
Skip,
}

/// <summary>
/// Diff rendering style for conflict display.
/// </summary>
public enum DiffStyle
{
/// <summary>DiffPlex side-by-side renderer (default; preserves prior behaviour).</summary>
SideBySide,

/// <summary>Git-style unified diff via DiffPlex UnifiedDiffBuilder.</summary>
Git,
}

/// <summary>
/// Service for N-way iterative file merging with interactive conflict resolution.
/// </summary>
Expand All @@ -51,10 +63,11 @@ public class MergeService
/// </summary>
/// <param name="directory">Absolute root directory under which to search.</param>
/// <param name="filename">Glob pattern to match against filenames.</param>
/// <param name="diffStyle">How to render conflict diffs.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Exit code (0 for success).</returns>
#pragma warning disable CA1822 // Mark members as static - instance method required for DI injection
public async Task<int> RunMergeAsync(AbsoluteDirectoryPath directory, string filename, CancellationToken ct = default)
public async Task<int> RunMergeAsync(AbsoluteDirectoryPath directory, string filename, DiffStyle diffStyle = DiffStyle.SideBySide, CancellationToken ct = default)
#pragma warning restore CA1822
{
Ensure.NotNull(directory);
Expand Down Expand Up @@ -113,7 +126,7 @@ public async Task<int> RunMergeAsync(AbsoluteDirectoryPath directory, string fil
string content2 = await File.ReadAllTextAsync(bestPair.FilePath2, ct).ConfigureAwait(false);

// Show diff
ShowDiff(content1, content2, bestPair.FilePath1, bestPair.FilePath2);
ShowDiff(content1, content2, bestPair.FilePath1, bestPair.FilePath2, diffStyle);

// Interactive merge
string mergedContent = InteractiveMerge(content1, content2);
Expand Down Expand Up @@ -235,15 +248,34 @@ private static double CalculateSimilarity(string content1, string content2)
return Math.Max(0.0, (double)unchangedLines / totalLines);
}

private static void ShowDiff(string content1, string content2, string path1, string path2)
{
SideBySideDiffBuilder diffBuilder = new(new Differ());
SideBySideDiffModel diff = diffBuilder.BuildDiffModel(content1, content2);
private const int DiffLineCap = 50;

private static void ShowDiff(string content1, string content2, string path1, string path2, DiffStyle style)
{
AnsiConsole.MarkupLine($"[dim]--- {Path.GetFileName(path1).EscapeMarkup()}[/]");
AnsiConsole.MarkupLine($"[dim]+++ {Path.GetFileName(path2).EscapeMarkup()}[/]");

int maxLines = Math.Min(50, Math.Max(diff.OldText.Lines.Count, diff.NewText.Lines.Count));
switch (style)
{
case DiffStyle.Git:
ShowUnifiedDiff(content1, content2);
break;
case DiffStyle.SideBySide:
default:
ShowSideBySideDiff(content1, content2);
break;
}

AnsiConsole.WriteLine();
}

private static void ShowSideBySideDiff(string content1, string content2)
{
SideBySideDiffBuilder diffBuilder = new(new Differ());
SideBySideDiffModel diff = diffBuilder.BuildDiffModel(content1, content2);

int total = Math.Max(diff.OldText.Lines.Count, diff.NewText.Lines.Count);
int maxLines = Math.Min(DiffLineCap, total);

for (int i = 0; i < maxLines; i++)
{
Expand All @@ -268,12 +300,46 @@ private static void ShowDiff(string content1, string content2, string path1, str
}
}

if (Math.Max(diff.OldText.Lines.Count, diff.NewText.Lines.Count) > maxLines)
if (total > maxLines)
{
AnsiConsole.MarkupLine("[dim]... (truncated)[/]");
}
}

AnsiConsole.WriteLine();
private static void ShowUnifiedDiff(string content1, string content2)
{
DiffPaneModel diff = InlineDiffBuilder.Diff(content1, content2);

int rendered = 0;
foreach (DiffPiece line in diff.Lines)
{
if (rendered >= DiffLineCap)
{
AnsiConsole.MarkupLine("[dim]... (truncated)[/]");
return;
}

string text = line.Text?.EscapeMarkup() ?? string.Empty;
switch (line.Type)
{
case ChangeType.Inserted:
AnsiConsole.MarkupLine($"[green]+{text}[/]");
break;
case ChangeType.Deleted:
AnsiConsole.MarkupLine($"[red]-{text}[/]");
break;
case ChangeType.Modified:
AnsiConsole.MarkupLine($"[red]-{text}[/]");
break;
case ChangeType.Unchanged:
AnsiConsole.MarkupLine($"[dim] {text}[/]");
break;
case ChangeType.Imaginary:
continue;
}

rendered++;
}
}

private static string InteractiveMerge(string content1, string content2)
Expand Down
Loading
Loading