From a521859e2a57598deaf026274c5bd4d1a9e658d3 Mon Sep 17 00:00:00 2001 From: Serge Koudoro Date: Fri, 6 Feb 2026 11:58:03 -0500 Subject: [PATCH] BF: improve header reading. The ZIP spec allows local headers to have different extra fields than central directory entries which was causing a crash. --- trx/tests/test_memmap.py | 108 +++++++++++++++++++++++++++++++++++++++ trx/trx_file_memmap.py | 26 ++++++++-- 2 files changed, 130 insertions(+), 4 deletions(-) diff --git a/trx/tests/test_memmap.py b/trx/tests/test_memmap.py index ed1f581..0d9488f 100644 --- a/trx/tests/test_memmap.py +++ b/trx/tests/test_memmap.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- +import json import os +import struct import tempfile import zipfile @@ -476,6 +478,112 @@ def test__ensure_little_endian_big_endian_input(): assert result[0] == 0x12345678 +def test_load_zip_with_local_header_extra_field(): + """Test loading ZIP where local header has extra field not in central dir. + + Regression test for a bug where zip_info.FileHeader() was used to calculate + data offset. The ZIP spec allows local headers to have different extra + fields than central directory entries. The fix reads the actual local + file header to get the correct offset. + """ + positions = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32) + offsets = np.array([0, 2], dtype=np.uint64) + header = { + "DIMENSIONS": [10, 10, 10], + "VOXEL_TO_RASMM": np.eye(4).tolist(), + "NB_VERTICES": 2, + "NB_STREAMLINES": 1, + } + + with tempfile.TemporaryDirectory() as tmp_dir: + trx_path = os.path.join(tmp_dir, "test.trx") + + # Build ZIP with extra bytes in local headers but not central directory + with open(trx_path, "wb") as f: + local_info = [] + extra = b"\x00\x00\x04\x00TEST" # 8-byte extra field + + for name, data in [ + ("header.json", json.dumps(header).encode()), + ("positions.3.float32", positions.tobytes()), + ("offsets.uint64", offsets.tobytes()), + ]: + offset = f.tell() + fname = name.encode() + crc = zipfile.crc32(data) + # Local header WITH extra field + f.write( + struct.pack( + "<4sHHHHHIIIHH", + b"PK\x03\x04", + 20, + 0, + 0, + 0, + 0, + crc, + len(data), + len(data), + len(fname), + len(extra), + ) + ) + f.write(fname) + f.write(extra) + f.write(data) + local_info.append((name, offset, crc, len(data))) + + cd_start = f.tell() + for name, offset, crc, size in local_info: + fname = name.encode() + # Central directory WITHOUT extra field (mismatch!) + f.write( + struct.pack( + "<4sHHHHHHIIIHHHHHII", + b"PK\x01\x02", + 20, + 20, + 0, + 0, + 0, + 0, + crc, + size, + size, + len(fname), + 0, + 0, + 0, + 0, + 0, + offset, + ) + ) + f.write(fname) + + # End of central directory + f.write( + struct.pack( + "<4sHHHHIIH", + b"PK\x05\x06", + 0, + 0, + 3, + 3, + f.tell() - cd_start, + cd_start, + 0, + ) + ) + + trx = tmm.load_from_zip(trx_path) + np.testing.assert_array_almost_equal(trx.streamlines._data, positions) + assert trx.header["NB_VERTICES"] == 2 + assert trx.header["NB_STREAMLINES"] == 1 + + trx.close() + + def test_endianness_roundtrip(): """Test that data survives write/read cycle with correct endianness.""" with get_trx_tmp_dir() as dirname: diff --git a/trx/trx_file_memmap.py b/trx/trx_file_memmap.py index 2bfd0bc..37cc65c 100644 --- a/trx/trx_file_memmap.py +++ b/trx/trx_file_memmap.py @@ -6,6 +6,7 @@ import logging import os import shutil +import struct from typing import Any, List, Optional, Tuple, Type, Union import zipfile @@ -407,13 +408,30 @@ def load_from_zip(filename: str) -> Type["TrxFile"]: if ext == ".bit": ext = ".bool" - mem_adress = zip_info.header_offset + len(zip_info.FileHeader()) + # Read actual local file header to get correct data offset. + # We can't use zip_info.FileHeader() because ZIP spec allows local + # headers to differ from central directory entries. + # See: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT + _ZIP_LOCAL_HEADER_SIZE = 30 + _ZIP_LOCAL_HEADER_SIGNATURE = b"PK\x03\x04" + + zf.fp.seek(zip_info.header_offset) + local_header = zf.fp.read(_ZIP_LOCAL_HEADER_SIZE) + if len(local_header) < _ZIP_LOCAL_HEADER_SIZE: + raise ValueError(f"Truncated local file header for {elem_filename}") + if local_header[:4] != _ZIP_LOCAL_HEADER_SIGNATURE: + raise ValueError( + f"Invalid local file header signature for {elem_filename}" + ) + fname_len, extra_len = struct.unpack("