From 5aa334919eb2e2d7a880372026651003ef066a50 Mon Sep 17 00:00:00 2001 From: arkptz Date: Tue, 26 May 2026 22:11:08 +0300 Subject: [PATCH] fix(mitmproxy): decompress response bodies based on content-encoding header --- Cargo.lock | 79 +++++++++++++++++++++++++++++++ Cargo.toml | 2 + src/mitmproxy_reader.rs | 102 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 181 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3298635..025d7f9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + [[package]] name = "aho-corasick" version = "1.1.4" @@ -11,6 +17,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + [[package]] name = "android_system_properties" version = "0.1.5" @@ -124,6 +145,27 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" +[[package]] +name = "brotli" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "4.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a334ef7c9e23abf0ce748e8cd309037da93e606ad52eb372e4ce327a0dcfbdfd" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + [[package]] name = "bstr" version = "1.12.1" @@ -221,6 +263,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + [[package]] name = "darling" version = "0.23.0" @@ -311,6 +362,16 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "float-cmp" version = "0.10.0" @@ -532,6 +593,16 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + [[package]] name = "mitm2openapi" version = "0.5.2" @@ -539,7 +610,9 @@ dependencies = [ "anyhow", "assert_cmd", "base64", + "brotli", "clap", + "flate2", "globset", "har", "indexmap 2.14.0", @@ -1013,6 +1086,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + [[package]] name = "smallvec" version = "1.15.1" diff --git a/Cargo.toml b/Cargo.toml index 13efe52..3bad8e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,8 @@ uuid = { version = "1", features = ["v4"] } rmp-serde = "1" globset = "0.4" tempfile = "3" +brotli = "7" +flate2 = "1" [dev-dependencies] proptest = "1" diff --git a/src/mitmproxy_reader.rs b/src/mitmproxy_reader.rs index c0cc7d6..96f94b8 100644 --- a/src/mitmproxy_reader.rs +++ b/src/mitmproxy_reader.rs @@ -1,3 +1,4 @@ +use std::io::Read; use std::path::Path; use tracing::{debug, warn}; @@ -153,6 +154,42 @@ fn find_header<'a>(headers: &'a [(String, String)], name: &str) -> Option<&'a st .map(|(_, v)| v.as_str()) } +fn decompress_body(body: &[u8], encoding: Option<&str>) -> Vec { + match encoding { + Some("br") => { + let mut decoded = Vec::new(); + match brotli::Decompressor::new(body, 4096).read_to_end(&mut decoded) { + Ok(_) => decoded, + Err(_) => { + warn!(event = "decompress_failed", encoding = ?encoding, "decompression failed, using raw bytes"); + body.to_vec() + } + } + } + Some("gzip") => { + let mut decoded = Vec::new(); + match flate2::read::GzDecoder::new(body).read_to_end(&mut decoded) { + Ok(_) => decoded, + Err(_) => { + warn!(event = "decompress_failed", encoding = ?encoding, "decompression failed, using raw bytes"); + body.to_vec() + } + } + } + Some("deflate") => { + let mut decoded = Vec::new(); + match flate2::read::DeflateDecoder::new(body).read_to_end(&mut decoded) { + Ok(_) => decoded, + Err(_) => { + warn!(event = "decompress_failed", encoding = ?encoding, "decompression failed, using raw bytes"); + body.to_vec() + } + } + } + _ => body.to_vec(), + } +} + /// Resolve hostname: host field → Host header → authority field. fn resolve_host(request: &TNetValue, headers: &[(String, String)]) -> Option { if let Some(host) = request.get("host").and_then(value_to_string_strict) { @@ -269,11 +306,12 @@ fn parse_flow(flow: &TNetValue) -> Result { let url = build_url_with_fallback(request, &request_headers)?; + let request_encoding = find_header(&request_headers, "content-encoding").map(|v| v.to_string()); let request_body = request .get("content") .and_then(|v| if v.is_null() { None } else { v.as_bytes() }) .filter(|b| !b.is_empty()) - .map(|b| cap_body(b, &url)); + .map(|b| cap_body(&decompress_body(b, request_encoding.as_deref()), &url)); let response = flow.get("response"); @@ -301,11 +339,14 @@ fn parse_flow(flow: &TNetValue) -> Result { ); let reason = resp.get("reason").and_then(value_to_string); let headers = resp.get("headers").map(parse_headers); + let response_encoding = headers + .as_ref() + .and_then(|h| find_header(h, "content-encoding").map(|v| v.to_string())); let body = resp .get("content") .and_then(|v| if v.is_null() { None } else { v.as_bytes() }) .filter(|b| !b.is_empty()) - .map(|b| cap_body(b, &url)); + .map(|b| cap_body(&decompress_body(b, response_encoding.as_deref()), &url)); let content_type = headers .as_ref() .and_then(|h| find_header(h, "content-type").map(|v| v.to_string())); @@ -961,4 +1002,61 @@ mod tests { "null byte should not be in URL" ); } + + #[test] + fn decompress_body_brotli() { + use brotli::enc::BrotliCompress; + let original = b"hello brotli world"; + let mut compressed = Vec::new(); + BrotliCompress( + &mut &original[..], + &mut compressed, + &brotli::enc::BrotliEncoderParams::default(), + ) + .unwrap(); + let decoded = decompress_body(&compressed, Some("br")); + assert_eq!(decoded, original); + } + + #[test] + fn decompress_body_gzip() { + use flate2::write::GzEncoder; + use std::io::Write; + let original = b"hello gzip world"; + let mut encoder = GzEncoder::new(Vec::new(), flate2::Compression::default()); + encoder.write_all(original).unwrap(); + let compressed = encoder.finish().unwrap(); + let decoded = decompress_body(&compressed, Some("gzip")); + assert_eq!(decoded, original); + } + + #[test] + fn decompress_body_deflate() { + use flate2::write::DeflateEncoder; + use std::io::Write; + let original = b"hello deflate world"; + let mut encoder = DeflateEncoder::new(Vec::new(), flate2::Compression::default()); + encoder.write_all(original).unwrap(); + let compressed = encoder.finish().unwrap(); + let decoded = decompress_body(&compressed, Some("deflate")); + assert_eq!(decoded, original); + } + + #[test] + fn decompress_body_invalid_data_fallback() { + let garbage = b"not valid compressed data at all"; + let decoded = decompress_body(garbage, Some("gzip")); + assert_eq!( + decoded, garbage, + "invalid data should fall back to raw bytes" + ); + } + + #[test] + fn decompress_body_no_encoding_passthrough() { + let raw = b"plain text body"; + assert_eq!(decompress_body(raw, None), raw); + assert_eq!(decompress_body(raw, Some("identity")), raw); + assert_eq!(decompress_body(raw, Some("unknown")), raw); + } }