From 780b2931c86fec79cb5f9602d1d76c08b627f321 Mon Sep 17 00:00:00 2001 From: Simon Cropp Date: Tue, 30 Jun 2026 22:33:52 +1000 Subject: [PATCH] Reduce redundant buffering and tree walks in conversion - Fuse revision-marker stripping and drawing-id collection into a single document.xml traversal (WordRevisionMarkers.StripAndCollectDrawingIds) - Stream the normalized PNG IDAT straight to the target instead of staging it in a second MemoryStream; ~44% fewer allocations per image - Presize the nested-zip recursion buffer to the entry's uncompressed length - Add PngNormalizerBench --- src/Benchmarks/PngNormalizerBench.cs | 20 ++++ src/Benchmarks/SampleXml.cs | 48 ++++++++ .../DeterministicPackage.cs | 4 +- .../DeterministicPackage_Convert.cs | 25 +++- .../Patching/DocumentPatcher.cs | 26 ++--- .../Patching/WordRevisionMarkers.cs | 64 +++++++--- src/DeterministicIoPackaging/PngNormalizer.cs | 109 ++++++++++-------- 7 files changed, 213 insertions(+), 83 deletions(-) create mode 100644 src/Benchmarks/PngNormalizerBench.cs diff --git a/src/Benchmarks/PngNormalizerBench.cs b/src/Benchmarks/PngNormalizerBench.cs new file mode 100644 index 0000000..346f3c1 --- /dev/null +++ b/src/Benchmarks/PngNormalizerBench.cs @@ -0,0 +1,20 @@ +[MemoryDiagnoser] +public class PngNormalizerBench +{ + byte[] png = null!; + + [Params(64 * 1024, 1024 * 1024)] + public int ImageBytes { get; set; } + + [GlobalSetup] + public void Setup() => + png = SampleXml.BuildPng(ImageBytes); + + [Benchmark] + public void Normalize() + { + using var source = new MemoryStream(png, writable: false); + using var target = new MemoryStream(); + PngNormalizer.Normalize(source, target); + } +} diff --git a/src/Benchmarks/SampleXml.cs b/src/Benchmarks/SampleXml.cs index 264320c..dd84eea 100644 --- a/src/Benchmarks/SampleXml.cs +++ b/src/Benchmarks/SampleXml.cs @@ -1,3 +1,5 @@ +using System.Buffers.Binary; + static class SampleXml { static XNamespace w = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"; @@ -193,4 +195,50 @@ static void WriteEntry(Archive zip, string name, XDocument xml) using var writer = new StreamWriter(stream, new UTF8Encoding(false)); xml.Save(writer, SaveOptions.DisableFormatting); } + + // Builds a minimal PNG (signature + IHDR + IDAT + IEND) whose IDAT carries a + // zlib-compressed payload of `rawBytes` bytes — that payload is what + // PngNormalizer decompresses and rewrites as stored zlib blocks. Chunk CRCs + // are left zero: the normalizer reads chunk lengths and re-derives the IDAT + // CRC itself, so the input value is irrelevant for benchmarking it. + public static byte[] BuildPng(int rawBytes) + { + var raw = new byte[rawBytes]; + for (var i = 0; i < raw.Length; i++) + { + raw[i] = (byte) (i * 31 + 7); + } + + byte[] idat; + using (var compressed = new MemoryStream()) + { + using (var zlib = new ZLibStream(compressed, CompressionLevel.Optimal, leaveOpen: true)) + { + zlib.Write(raw, 0, raw.Length); + } + + idat = compressed.ToArray(); + } + + using var png = new MemoryStream(); + ReadOnlySpan signature = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]; + png.Write(signature); + WriteChunk(png, "IHDR"u8, new byte[13]); + WriteChunk(png, "IDAT"u8, idat); + WriteChunk(png, "IEND"u8, []); + return png.ToArray(); + } + + static void WriteChunk(Stream png, ReadOnlySpan type, ReadOnlySpan data) + { + Span header = stackalloc byte[4]; + BinaryPrimitives.WriteInt32BigEndian(header, data.Length); + png.Write(header); + png.Write(type); + png.Write(data); + + // CRC placeholder (PngNormalizer does not validate input chunk CRCs). + header.Clear(); + png.Write(header); + } } diff --git a/src/DeterministicIoPackaging/DeterministicPackage.cs b/src/DeterministicIoPackaging/DeterministicPackage.cs index 697cb2e..0d1fad9 100644 --- a/src/DeterministicIoPackaging/DeterministicPackage.cs +++ b/src/DeterministicIoPackaging/DeterministicPackage.cs @@ -78,7 +78,7 @@ static void DuplicateEntry(Entry sourceEntry, Archive targetArchive, PatcherSet return; } - CopyOrRecurseZip(sourceStream, targetStream); + CopyOrRecurseZip(sourceStream, targetStream, sourceEntry.Length); } static async Task DuplicateEntryAsync(Entry sourceEntry, Archive targetArchive, PatcherSet currentPatchers, Cancel cancel) @@ -115,7 +115,7 @@ static async Task DuplicateEntryAsync(Entry sourceEntry, Archive targetArchive, return; } - await CopyOrRecurseZipAsync(sourceStream, targetStream, cancel); + await CopyOrRecurseZipAsync(sourceStream, targetStream, sourceEntry.Length, cancel); } static bool IsSpreadsheetXml(Entry entry) => diff --git a/src/DeterministicIoPackaging/DeterministicPackage_Convert.cs b/src/DeterministicIoPackaging/DeterministicPackage_Convert.cs index 6174364..e091ed3 100644 --- a/src/DeterministicIoPackaging/DeterministicPackage_Convert.cs +++ b/src/DeterministicIoPackaging/DeterministicPackage_Convert.cs @@ -76,14 +76,17 @@ static bool LooksLikeZip(byte[] head, int length) // packages flow through with whatever non-deterministic deflate/timestamps // their producer emitted, defeating the deterministic guarantee for the // outer package. - static void CopyOrRecurseZip(Stream source, Stream target) + static void CopyOrRecurseZip(Stream source, Stream target, long sourceLength) { var head = new byte[4]; var read = ReadUpTo(source, head, 4); if (LooksLikeZip(head, read)) { - using var buffer = new MemoryStream(); + // The whole entry (head + remainder) is buffered for the recursive + // Convert. Its uncompressed size is known from the central directory, + // so presize the buffer to avoid MemoryStream's grow-and-copy churn. + using var buffer = new MemoryStream(InitialCapacity(sourceLength)); buffer.Write(head, 0, read); source.CopyTo(buffer); buffer.Position = 0; @@ -100,14 +103,16 @@ static void CopyOrRecurseZip(Stream source, Stream target) source.CopyTo(target); } - static async Task CopyOrRecurseZipAsync(Stream source, Stream target, Cancel cancel) + static async Task CopyOrRecurseZipAsync(Stream source, Stream target, long sourceLength, Cancel cancel) { var head = new byte[4]; var read = await ReadUpToAsync(source, head, 4, cancel); if (LooksLikeZip(head, read)) { - using var buffer = new MemoryStream(); + // See CopyOrRecurseZip: presize the recursion buffer to the entry's + // known uncompressed size to avoid grow-and-copy reallocations. + using var buffer = new MemoryStream(InitialCapacity(sourceLength)); await buffer.WriteAsync(head, 0, read, cancel); await source.CopyToAsync(buffer, cancel); buffer.Position = 0; @@ -124,6 +129,18 @@ static async Task CopyOrRecurseZipAsync(Stream source, Stream target, Cancel can await source.CopyToAsync(target, cancel); } + // Clamp a ZipArchiveEntry.Length to a valid MemoryStream initial capacity. + // 0 (the parameterless-constructor default) for unknown/oversized lengths. + static int InitialCapacity(long sourceLength) + { + if (sourceLength is > 0 and <= int.MaxValue) + { + return (int) sourceLength; + } + + return 0; + } + static int ReadUpTo(Stream source, byte[] buffer, int count) => source.ReadAtLeast(buffer.AsSpan(0, count), count, throwOnEndOfStream: false); diff --git a/src/DeterministicIoPackaging/Patching/DocumentPatcher.cs b/src/DeterministicIoPackaging/Patching/DocumentPatcher.cs index ae48c9b..307f401 100644 --- a/src/DeterministicIoPackaging/Patching/DocumentPatcher.cs +++ b/src/DeterministicIoPackaging/Patching/DocumentPatcher.cs @@ -14,27 +14,15 @@ public void PatchXml(XDocument xml, string entryName) { var root = xml.Root!; - WordRevisionMarkers.Strip(xml); - - // Collect id attributes in one Descendants() walk rather than two. - // Preserves the original ordering: all wp:docPr ids first, then - // pic:cNvPr ids, numbering continuing from where the docPrs left off. - // Storing the XAttribute directly avoids a redundant Attribute("id") - // lookup during renumbering. + // Strip revision markers and collect the drawing-id attributes in a + // single tree traversal. word/document.xml is the largest content part, + // so fusing these two passes avoids walking it twice. Ordering is + // preserved: all wp:docPr ids first, then pic:cNvPr ids, numbering + // continuing from where the docPrs left off. Storing the XAttribute + // directly avoids a redundant Attribute("id") lookup during renumbering. var docPrIds = new List(); var picIds = new List(); - foreach (var element in root.Descendants()) - { - var name = element.Name; - if (name == wpDocPr) - { - docPrIds.Add(element.Attribute("id")!); - } - else if (name == picCNvPr) - { - picIds.Add(element.Attribute("id")!); - } - } + WordRevisionMarkers.StripAndCollectDrawingIds(root, wpDocPr, picCNvPr, docPrIds, picIds); var index = 1; foreach (var attr in docPrIds) diff --git a/src/DeterministicIoPackaging/Patching/WordRevisionMarkers.cs b/src/DeterministicIoPackaging/Patching/WordRevisionMarkers.cs index d6b567d..b26558b 100644 --- a/src/DeterministicIoPackaging/Patching/WordRevisionMarkers.cs +++ b/src/DeterministicIoPackaging/Patching/WordRevisionMarkers.cs @@ -33,24 +33,62 @@ public static void Strip(XDocument xml) return; } - // Walk the attribute linked-list manually rather than via LINQ + - // ToList(). Capturing NextAttribute before Remove() lets us mutate - // safely without per-element allocations — common case is zero - // matching attributes, so this method used to allocate a Where - // iterator and a List for every node in the document. foreach (var element in root.DescendantsAndSelf()) { - var attr = element.FirstAttribute; - while (attr != null) + StripAttributes(element); + } + } + + // Strips revision markers and, in the same traversal, collects the + // wp:docPr / pic:cNvPr id attributes DocumentPatcher renumbers. + // word/document.xml is the largest part in a .docx, so folding the strip + // and the id collection into a single Descendants() walk avoids traversing + // the whole tree twice. The collection order is identical to a standalone + // root.Descendants() pass: all docPr ids then all pic ids in document order. + public static void StripAndCollectDrawingIds( + XElement root, + XName docPrName, + XName cNvPrName, + List docPrIds, + List picIds) + { + // Process the root's own attributes first, mirroring Strip's use of + // DescendantsAndSelf. The root of word/document.xml is w:document — + // never a drawing element — so it needs stripping, not id collection. + StripAttributes(root); + + foreach (var element in root.Descendants()) + { + StripAttributes(element); + + var name = element.Name; + if (name == docPrName) + { + docPrIds.Add(element.Attribute("id")!); + } + else if (name == cNvPrName) { - var next = attr.NextAttribute; - if (attributesToRemove.Contains(attr.Name)) - { - attr.Remove(); - } + picIds.Add(element.Attribute("id")!); + } + } + } - attr = next; + // Walk the attribute linked-list manually rather than via LINQ + ToList(). + // Capturing NextAttribute before Remove() lets us mutate safely without + // per-element allocations — the common case is zero matching attributes, so + // a Where iterator + List per node would be pure waste. + static void StripAttributes(XElement element) + { + var attr = element.FirstAttribute; + while (attr != null) + { + var next = attr.NextAttribute; + if (attributesToRemove.Contains(attr.Name)) + { + attr.Remove(); } + + attr = next; } } } diff --git a/src/DeterministicIoPackaging/PngNormalizer.cs b/src/DeterministicIoPackaging/PngNormalizer.cs index b689109..d1631dd 100644 --- a/src/DeterministicIoPackaging/PngNormalizer.cs +++ b/src/DeterministicIoPackaging/PngNormalizer.cs @@ -1,7 +1,5 @@ using System.Buffers.Binary; -namespace DeterministicIoPackaging; - static class PngNormalizer { static readonly byte[] pngSignature = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]; @@ -98,56 +96,77 @@ static void WriteNormalizedIdat(Stream target, byte[] zlibBytes, int zlibLength) var raw = decompressedStream.GetBuffer(); var rawLength = (int) decompressedStream.Length; - // Write raw zlib stored format to avoid framework DEFLATE differences. - // Format: CMF(0x78) FLG(0x01) + DEFLATE stored blocks + Adler-32 - using var compressOutput = new MemoryStream(); - // CMF: deflate, 32K window - compressOutput.WriteByte(0x78); - // FLG: no dict, check bits make CMF*256+FLG divisible by 31 - compressOutput.WriteByte(0x01); + // Emit the IDAT chunk straight to target instead of staging the whole + // normalized zlib stream in a second MemoryStream. The chunk length is + // computed analytically and a single Crc32 accumulates the same bytes as + // they are written, so this avoids buffering a second full copy of the + // image and two extra passes over it (GetBuffer for the write + the CRC). + // + // Raw zlib stored format (avoids framework DEFLATE differences): + // CMF(0x78) FLG(0x01) + DEFLATE stored blocks + Adler-32. + // Each stored block is a 5-byte header (BFINAL/BTYPE + LEN + NLEN) plus + // its payload; a zero-length image still emits one empty final block. + var blockCount = rawLength == 0 ? 1 : (rawLength + 65534) / 65535; + var idatLength = 2 + blockCount * 5 + rawLength + 4; - // Write DEFLATE stored blocks (max 65535 bytes each) - var offset = 0; - while (offset < rawLength) - { - var blockSize = Math.Min(rawLength - offset, 65535); - var isFinal = offset + blockSize >= rawLength; - compressOutput.WriteByte(isFinal ? (byte) 1 : (byte) 0); // BFINAL + BTYPE=00 - compressOutput.WriteByte((byte) (blockSize & 0xFF)); - compressOutput.WriteByte((byte) (blockSize >> 8)); - compressOutput.WriteByte((byte) (~blockSize & 0xFF)); - compressOutput.WriteByte((byte) ((~blockSize >> 8) & 0xFF)); - compressOutput.Write(raw, offset, blockSize); - offset += blockSize; - } + var crc = new Crc32(); + + // Chunk length (big-endian) + "IDAT". The CRC covers the type + data, not the length. + Span lengthAndType = stackalloc byte[8]; + BinaryPrimitives.WriteInt32BigEndian(lengthAndType, idatLength); + idatType.AsSpan().CopyTo(lengthAndType.Slice(4)); + target.Write(lengthAndType); + crc.Append(lengthAndType.Slice(4, 4)); + + // Reusable scratch for the 2-byte zlib header and the 5-byte block headers. + Span block = stackalloc byte[5]; + // CMF: deflate, 32K window. FLG: no dict, CMF*256+FLG divisible by 31. + block[0] = 0x78; + block[1] = 0x01; + target.Write(block.Slice(0, 2)); + crc.Append(block.Slice(0, 2)); if (rawLength == 0) { - // Empty data: single final stored block with length 0 - compressOutput.WriteByte(1); - compressOutput.Write([0, 0, 0xFF, 0xFF], 0, 4); + // Empty data: single final stored block with length 0. + block[0] = 1; + block[1] = 0; + block[2] = 0; + block[3] = 0xFF; + block[4] = 0xFF; + target.Write(block); + crc.Append(block); + } + else + { + // DEFLATE stored blocks (max 65535 bytes each). + var offset = 0; + while (offset < rawLength) + { + var blockSize = Math.Min(rawLength - offset, 65535); + var isFinal = offset + blockSize >= rawLength; + block[0] = isFinal ? (byte) 1 : (byte) 0; // BFINAL + BTYPE=00 + block[1] = (byte) (blockSize & 0xFF); + block[2] = (byte) (blockSize >> 8); + block[3] = (byte) (~blockSize & 0xFF); + block[4] = (byte) ((~blockSize >> 8) & 0xFF); + target.Write(block); + crc.Append(block); + + target.Write(raw, offset, blockSize); + crc.Append(raw.AsSpan(offset, blockSize)); + offset += blockSize; + } } - // Adler-32 checksum - var adler = Adler32(raw, rawLength); - compressOutput.WriteByte((byte) (adler >> 24)); - compressOutput.WriteByte((byte) (adler >> 16)); - compressOutput.WriteByte((byte) (adler >> 8)); - compressOutput.WriteByte((byte) adler); - - var length = (int) compressOutput.Length; - var header = new byte[4]; - BinaryPrimitives.WriteInt32BigEndian(header, length); - target.Write(header); - target.Write(idatType); - target.Write(compressOutput.GetBuffer(), 0, length); + // Adler-32 checksum, then the chunk's CRC-32 (over type + data). + Span trailer = stackalloc byte[4]; + BinaryPrimitives.WriteUInt32BigEndian(trailer, Adler32(raw, rawLength)); + target.Write(trailer); + crc.Append(trailer); - var crc = new Crc32(); - crc.Append(idatType); - crc.Append(compressOutput.GetBuffer().AsSpan(0, length)); - var crcBytes = new byte[4]; - BinaryPrimitives.WriteUInt32BigEndian(crcBytes, crc.GetCurrentHashAsUInt32()); - target.Write(crcBytes); + BinaryPrimitives.WriteUInt32BigEndian(trailer, crc.GetCurrentHashAsUInt32()); + target.Write(trailer); } // Adler-32 checksum as defined in RFC 1950.