From 5a13c01c556a93cf58fb252149060e5f625b9766 Mon Sep 17 00:00:00 2001 From: Noah Pendleton <2538614+noahp@users.noreply.github.com> Date: Wed, 26 Nov 2025 11:19:55 -0500 Subject: [PATCH 1/2] Magic test for archives with zero-length entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Noticed when attempting to unpack a python `.whl` wheel file; They're just renamed .zip's, and usually work OK, but there are casess where the first file in the `.whl` archive is zero bytes, and that causes `file -z` to error: ```bash ❯ file -zL tests/test-1.23.zip tests/test-1.23.zip: ERROR:[gzip: ] (data) ❯ file tests/test-1.23.zip tests/test-1.23.zip: Zip archive data, made by v2.3 UNIX, extract using at least v1.0, last modified Oct 28 2006 14:38:44, uncompressed size 0, method=store ❯ unzip -l tests/test-1.23.zip Archive: tests/test-1.23.zip Length Date Time Name --------- ---------- ----- ---- 0 2006-10-28 14:38 1/2/3 0 2006-10-28 14:38 a/b 0 2006-10-28 14:38 foobar --------- ------- 0 3 files ``` This means we were not using `file` magic for any archive that started with a zero-length file (or directory) entry. Add a fallback when the magic test fails in this case, and add a test to cover it. --- dtrx/dtrx.py | 15 +++++++++++++-- tests/test-1.23.whl | Bin 0 -> 380 bytes tests/tests.yml | 7 +++++++ 3 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 tests/test-1.23.whl diff --git a/dtrx/dtrx.py b/dtrx/dtrx.py index bf4bf05..6306836 100755 --- a/dtrx/dtrx.py +++ b/dtrx/dtrx.py @@ -1357,11 +1357,22 @@ def try_by_magic(self, filename): status = process.wait() if status != 0: return [] + # if output contains 'ERROR:[', there was an error unzipping the + # first archive entry. re-run without -z. + output = process.stdout.readline().decode("ascii") + process.stdout.close() + if "ERROR:[" in output: + process.stdout.close() + process = subprocess.Popen(["file", "-L", filename], stdout=subprocess.PIPE) + status = process.wait() + if status != 0: + return [] + output = process.stdout.readline().decode("ascii") + process.stdout.close() + except FileNotFoundError: logger.error("'file' command not found, skipping magic test") return [] - output = process.stdout.readline().decode("ascii") - process.stdout.close() if output.startswith("%s: " % filename): output = output[len(filename) + 2 :] mimes = self.magic_map_matches(output, self.magic_mime_map) diff --git a/tests/test-1.23.whl b/tests/test-1.23.whl new file mode 100644 index 0000000000000000000000000000000000000000..aa0ff55b0a2f2618b8d5621cc23c33b70f3ea438 GIT binary patch literal 380 zcmWIWW@h1H0D)^IF{WS!lwf5LWiZq?(l-tb;bdUmve?;G`>3;PXax(y3+5Nh0dRdc z(eyC`^(E>jfpzV~sp}S+E;gX9wEX;}#3Hc9-544fnZ%iKIg}Uf?j?;NCe+!i5NC&0 zFfasoBlKfB7+JpsP(RrDK;0ncqwB_WII`|Mpl+BqfChlP0WpA;4dgIpAlwh6mxDMA E0GE Date: Mon, 1 Dec 2025 11:27:31 -0500 Subject: [PATCH 2/2] clean up --- dtrx/dtrx.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/dtrx/dtrx.py b/dtrx/dtrx.py index 6306836..4fb1297 100755 --- a/dtrx/dtrx.py +++ b/dtrx/dtrx.py @@ -1353,22 +1353,24 @@ def magic_map_matches(self, output, magic_map): def try_by_magic(self, filename): try: - process = subprocess.Popen(["file", "-zL", filename], stdout=subprocess.PIPE) - status = process.wait() - if status != 0: + result = subprocess.run( + ["file", "-zL", filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + ) + if result.returncode != 0: return [] # if output contains 'ERROR:[', there was an error unzipping the # first archive entry. re-run without -z. - output = process.stdout.readline().decode("ascii") - process.stdout.close() + output = result.stdout.split("\n")[0] if "ERROR:[" in output: - process.stdout.close() - process = subprocess.Popen(["file", "-L", filename], stdout=subprocess.PIPE) - status = process.wait() - if status != 0: + result = subprocess.run( + ["file", "-L", filename], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + if result.returncode != 0: return [] - output = process.stdout.readline().decode("ascii") - process.stdout.close() + output = result.stdout.split("\n")[0] except FileNotFoundError: logger.error("'file' command not found, skipping magic test")