diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b6806a..6c73cfe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- *(cli)* add `--skip-options` flag to filter OPTIONS requests (discover + generate) +- *(cli)* add `--param-regex` flag for custom path parameter detection (discover) +- *(cli)* add `--max-examples` flag to cap examples per endpoint (generate, default 5) +- *(cli)* add `--redact-patterns` flag for regex-based value redaction (generate) +- *(cli)* add `--redact-fields` flag for field-name-based redaction (generate) +- *(discover)* enhanced path parameterization: UPPER_CASE slugs, hex strings, base58, cross-request variability +- *(generate)* response/request examples stored as named OpenAPI examples +- *(generate)* request body schema merging via oneOf for multiple captures + ## [0.5.2](https://github.com/Arkptz/mitm2openapi/compare/v0.5.1...v0.5.2) - 2026-04-24 ### Fixed diff --git a/Cargo.lock b/Cargo.lock index 3298635..025d7f9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + [[package]] name = "aho-corasick" version = "1.1.4" @@ -11,6 +17,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + [[package]] name = "android_system_properties" version = "0.1.5" @@ -124,6 +145,27 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" +[[package]] +name = "brotli" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "4.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a334ef7c9e23abf0ce748e8cd309037da93e606ad52eb372e4ce327a0dcfbdfd" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + [[package]] name = "bstr" version = "1.12.1" @@ -221,6 +263,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + [[package]] name = "darling" version = "0.23.0" @@ -311,6 +362,16 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "float-cmp" version = "0.10.0" @@ -532,6 +593,16 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + [[package]] name = "mitm2openapi" version = "0.5.2" @@ -539,7 +610,9 @@ dependencies = [ "anyhow", "assert_cmd", "base64", + "brotli", "clap", + "flate2", "globset", "har", "indexmap 2.14.0", @@ -1013,6 +1086,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + [[package]] name = "smallvec" version = "1.15.1" diff --git a/Cargo.toml b/Cargo.toml index 13efe52..3bad8e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,8 @@ uuid = { version = "1", features = ["v4"] } rmp-serde = "1" globset = "0.4" tempfile = "3" +brotli = "7" +flate2 = "1" [dev-dependencies] proptest = "1" diff --git a/README.md b/README.md index 1ac5ef1..56ad164 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,9 @@ Credit to [@alufers](https://github.com/alufers) for the original tool that pion - **Two-format support** — mitmproxy flow dumps (v19/v20/v21) and HAR 1.2 - **Two-step workflow** — `discover` finds endpoints, you curate, `generate` emits clean OpenAPI 3.0 - **Glob filters** — `--exclude-patterns` and `--include-patterns` for automated pipelines +- **Response examples** — captured bodies stored as named OpenAPI examples with `--max-examples` cap +- **Smart parameterization** — detects UUIDs, hex, base58, UPPER_CASE slugs, and cross-request variability +- **Redaction** — `--redact-patterns` and `--redact-fields` scrub sensitive data from examples - **Error recovery** — skips corrupt flows, continues processing - **Auto-detection** — heuristic format detection from file content - **Battle-tested** — integration tests against Swagger Petstore and OWASP crAPI with `oasdiff` verification diff --git a/book/src/introduction.md b/book/src/introduction.md index eb62e18..aaacfc4 100644 --- a/book/src/introduction.md +++ b/book/src/introduction.md @@ -30,6 +30,9 @@ unattended pipelines. - **Resource limits** — configurable caps prevent denial-of-service on untrusted input - **Strict mode** — treat warnings as errors for CI gates - **Structured reports** — `--report` outputs machine-readable JSON processing summaries +- **Response examples** — captured request/response bodies stored as named OpenAPI examples +- **Smart parameterization** — detects UUIDs, hex strings, base58, UPPER_CASE slugs, and cross-request variability +- **Redaction** — `--redact-patterns` and `--redact-fields` scrub sensitive data from examples - **Battle-tested** — integration tests against Swagger Petstore and OWASP crAPI - **Cross-platform** — Linux, macOS, Windows pre-built binaries diff --git a/book/src/usage/cli-reference.md b/book/src/usage/cli-reference.md index 5b1ca06..4e8aa58 100644 --- a/book/src/usage/cli-reference.md +++ b/book/src/usage/cli-reference.md @@ -3,7 +3,7 @@ ```admonish warning -This reference was last synced with `mitm2openapi --help` at version 0.5.1. +This reference was last synced with `mitm2openapi --help` at version 0.6.0. If you notice a flag missing from your local `--help` output, the tool may be ahead of these docs. [Open an issue](https://github.com/Arkptz/mitm2openapi/issues/new) to prompt an update. ``` @@ -35,6 +35,8 @@ mitm2openapi discover [OPTIONS] -i -o -p | `--allow-symlinks` | off | Allow symlinked input files | | `--strict` | off | Treat warnings as errors (exit code 2) | | `--report ` | | Write structured JSON processing report | +| `--skip-options` | off | Filter out OPTIONS requests from output | +| `--param-regex ` | | Custom regex for path parameter detection | ## `mitm2openapi generate` @@ -73,6 +75,10 @@ mitm2openapi generate [OPTIONS] -i -t -o -p ` | | Write structured JSON processing report | +| `--skip-options` | off | Filter out OPTIONS requests from output | +| `--max-examples ` | `5` | Maximum examples per endpoint per status code | +| `--redact-patterns ` | | Comma-separated regex patterns to redact from examples | +| `--redact-fields ` | | Comma-separated field names to redact from examples | ## Common flag details diff --git a/book/src/usage/pipeline.md b/book/src/usage/pipeline.md index a50b0bd..5236f0f 100644 --- a/book/src/usage/pipeline.md +++ b/book/src/usage/pipeline.md @@ -66,8 +66,24 @@ named parameters: | `/api/users/42`, `/api/users/99` | `/api/users/{id}` | | `/api/orders/abc-def-123` | `/api/orders/{id}` | -UUID-like and numeric segments are detected automatically. More complex patterns require -manual editing of the templates file. +UUID-like and numeric segments are detected automatically. The following patterns are also +recognized: + +- **UPPER_CASE slugs** — e.g. `BTC_USDT`, `ETH_BTC` +- **Hex strings** — segments starting with `0x` +- **Base58 identifiers** — alphanumeric segments 20+ characters long +- **Cross-request variability** — segments with 3 or more distinct values across requests + +For patterns not covered by the built-in heuristics, use `--param-regex` to supply a custom +regex. Any path segment matching the regex is treated as a parameter: + +```bash +mitm2openapi discover \ + -i capture.flow \ + -o templates.yaml \ + -p "https://api.example.com" \ + --param-regex '[A-Z]{2,}_[A-Z]{2,}' +``` ## Step 2: Curate @@ -165,6 +181,43 @@ mitm2openapi generate \ See the [CLI reference](./cli-reference.md) for all available options. +### Response and request examples + +The `generate` step captures actual request and response bodies as named examples in the +OpenAPI spec. Each unique response per endpoint and status code is stored as a separate +example, up to the limit set by `--max-examples` (default: 5). + +When multiple requests hit the same endpoint with different request bodies, the schemas are +merged using `oneOf` to represent all observed variants. + +### Redacting sensitive data + +Production captures often contain tokens, passwords, or PII. Use `--redact-patterns` and +`--redact-fields` to scrub sensitive values from examples before they land in the spec: + +```bash +mitm2openapi generate \ + -i capture.flow \ + -t templates.yaml \ + -o openapi.yaml \ + -p "https://api.example.com" \ + --redact-patterns 'eyJ[\w-]+,sk-[a-zA-Z0-9]+' \ + --redact-fields 'password,token,secret,authorization' +``` + +`--redact-patterns` accepts comma-separated regexes matched against string values. +`--redact-fields` accepts comma-separated field names whose values are replaced with +`"[REDACTED]"`. + +### Filtering OPTIONS requests + +Both `discover` and `generate` accept `--skip-options` to exclude HTTP OPTIONS requests +(typically CORS preflight) from processing: + +```bash +mitm2openapi discover -i capture.flow -o templates.yaml -p "https://api.example.com" --skip-options +``` + ## Worked example Starting from a mitmproxy capture of a pet store API: diff --git a/src/builder.rs b/src/builder.rs index 015b30d..2ad5b8f 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -1,8 +1,9 @@ use indexmap::IndexMap; use openapiv3::{ - Info, MediaType, OpenAPI, Operation, PathItem, Paths, ReferenceOr, RequestBody, Response, - Responses, Server, StatusCode, + Example, Info, MediaType, OpenAPI, Operation, PathItem, Paths, ReferenceOr, RequestBody, + Response, Responses, Server, StatusCode, }; +use std::collections::HashMap; use tracing::{debug, warn}; use crate::params; @@ -145,6 +146,10 @@ pub struct OpenApiBuilder { tags_overrides: Option>, compiled_templates: path_matching::CompiledTemplates, spec: OpenAPI, + examples_store: HashMap<(String, String, u16), Vec<(String, serde_json::Value)>>, + req_examples_store: HashMap<(String, String, String), Vec<(String, serde_json::Value)>>, + max_examples: usize, + redactor: Option, } fn extract_tag( @@ -188,6 +193,47 @@ fn is_image_content_type(ct: Option<&str>) -> bool { ct.is_some_and(|s| s.to_lowercase().starts_with("image/")) } +fn is_binary_content_type(ct: Option<&str>) -> bool { + ct.is_some_and(|s| { + let lower = s.to_lowercase(); + lower.starts_with("image/") || lower == "application/octet-stream" + }) +} + +fn make_example_name(val: &serde_json::Value, existing: &[String]) -> String { + let base = val + .as_object() + .and_then(|obj| { + obj.iter().filter_map(|(_, v)| v.as_str()).next().map(|s| { + s.chars() + .take(32) + .map(|c| if c.is_alphanumeric() { c } else { '_' }) + .collect::() + }) + }) + .filter(|s| !s.is_empty()); + + let base = match base { + Some(b) => b, + None => { + let n = existing.len() + 1; + return format!("response_{n}"); + } + }; + + if !existing.contains(&base) { + return base; + } + let mut i = 2; + loop { + let candidate = format!("{base}_{i}"); + if !existing.contains(&candidate) { + return candidate; + } + i += 1; + } +} + fn host_from_prefix(prefix: &str) -> String { prefix .strip_prefix("https://") @@ -300,6 +346,24 @@ fn merge_response_content(existing: &mut Response, incoming: &Response) { } } +fn merge_request_body_content(existing: &mut RequestBody, incoming: &RequestBody) { + for (media_type, incoming_mt) in &incoming.content { + if let Some(existing_mt) = existing.content.get_mut(media_type) { + let existing_schema = existing_mt.schema.take(); + let incoming_schema = incoming_mt.schema.clone(); + existing_mt.schema = match (existing_schema, incoming_schema) { + (Some(a), Some(b)) => Some(merge_schemas_one_of(a, b)), + (Some(a), None) => Some(a), + (None, b) => b, + }; + } else { + existing + .content + .insert(media_type.clone(), incoming_mt.clone()); + } + } +} + fn merge_schemas_one_of( a: ReferenceOr, b: ReferenceOr, @@ -399,12 +463,28 @@ impl OpenApiBuilder { path_matching::CompiledTemplates::new(&[]).unwrap() }); + let redactor = if !config.redact_patterns.is_empty() || !config.redact_fields.is_empty() { + match crate::redact::Redactor::new(&config.redact_patterns, &config.redact_fields) { + Ok(r) => Some(r), + Err(e) => { + tracing::warn!(error = %e, "Failed to compile redact patterns, skipping redaction"); + None + } + } + } else { + None + }; + Self { prefix: prefix.to_string(), config: config.clone(), tags_overrides, compiled_templates, spec, + examples_store: HashMap::new(), + req_examples_store: HashMap::new(), + max_examples: config.max_examples, + redactor, } } @@ -412,6 +492,10 @@ impl OpenApiBuilder { let url = request.get_url(); let method = request.get_method().to_uppercase(); + if self.config.skip_options && method == "OPTIONS" { + return; + } + if !matches!( method.as_str(), "GET" | "PUT" | "POST" | "DELETE" | "OPTIONS" | "HEAD" | "PATCH" | "TRACE" @@ -462,17 +546,28 @@ impl OpenApiBuilder { if let Some(resp_body) = request.get_response_body() { let resp_ct = request.get_response_content_type(); - if let Some((media_type_str, val)) = parse_body(resp_body, resp_ct) { - let resp_schema = schema::value_to_schema(&val); - let mut content = IndexMap::new(); - content.insert( - media_type_str, - MediaType { - schema: Some(ReferenceOr::Item(resp_schema)), - ..MediaType::default() - }, - ); - new_response.content = content; + if !is_binary_content_type(resp_ct) { + if let Some((media_type_str, val)) = parse_body(resp_body, resp_ct) { + let resp_schema = schema::value_to_schema(&val); + let mut content = IndexMap::new(); + content.insert( + media_type_str, + MediaType { + schema: Some(ReferenceOr::Item(resp_schema)), + ..MediaType::default() + }, + ); + new_response.content = content; + + let key = (template_path.clone(), method.clone(), status_code); + let entries = self.examples_store.entry(key).or_default(); + if self.max_examples > 0 && entries.len() < self.max_examples { + let existing_names: Vec = + entries.iter().map(|(n, _)| n.clone()).collect(); + let name = make_example_name(&val, &existing_names); + entries.push((name, val)); + } + } } } @@ -492,6 +587,47 @@ impl OpenApiBuilder { .responses .insert(sc, ReferenceOr::Item(new_response)); } + + if let Some(req_body) = request.get_request_body() { + let req_ct = request + .get_request_headers() + .iter() + .find(|(k, _)| k.to_lowercase() == "content-type") + .map(|(_, v)| v.as_str()); + + if let Some((media_type_str, val)) = parse_body(req_body, req_ct) { + let schema = schema::value_to_schema(&val); + let mut incoming_content = IndexMap::new(); + incoming_content.insert( + media_type_str.clone(), + MediaType { + schema: Some(ReferenceOr::Item(schema)), + ..MediaType::default() + }, + ); + let incoming_rb = RequestBody { + content: incoming_content, + required: true, + ..RequestBody::default() + }; + + match &mut op.request_body { + Some(ReferenceOr::Item(existing_rb)) => { + merge_request_body_content(existing_rb, &incoming_rb); + } + _ => { + op.request_body = Some(ReferenceOr::Item(incoming_rb)); + } + } + + let req_key = (template_path.clone(), method.clone(), media_type_str); + let req_entries = self.req_examples_store.entry(req_key).or_default(); + let existing_names: Vec = + req_entries.iter().map(|(n, _)| n.clone()).collect(); + let name = make_example_name(&val, &existing_names); + req_entries.push((name, val)); + } + } } return; } @@ -540,7 +676,7 @@ impl OpenApiBuilder { let schema = schema::value_to_schema(&val); let mut content = IndexMap::new(); content.insert( - media_type_str, + media_type_str.clone(), MediaType { schema: Some(ReferenceOr::Item(schema)), ..MediaType::default() @@ -551,6 +687,13 @@ impl OpenApiBuilder { required: true, ..RequestBody::default() })); + + let req_key = (template_path.clone(), method.clone(), media_type_str); + let req_entries = self.req_examples_store.entry(req_key).or_default(); + let existing_names: Vec = + req_entries.iter().map(|(n, _)| n.clone()).collect(); + let name = make_example_name(&val, &existing_names); + req_entries.push((name, val)); } } @@ -586,7 +729,85 @@ impl OpenApiBuilder { } /// Get the assembled OpenAPI spec. - pub fn build(self) -> OpenAPI { + pub fn build(mut self) -> OpenAPI { + for ((path, method, status), examples) in self.examples_store.drain() { + let Some(ReferenceOr::Item(path_item)) = self.spec.paths.paths.get_mut(&path) else { + continue; + }; + let Some(Some(op)) = get_operation_mut(path_item, &method).map(|s| s.as_mut()) else { + continue; + }; + let Some(ReferenceOr::Item(resp)) = + op.responses.responses.get_mut(&StatusCode::Code(status)) + else { + continue; + }; + let Some(media_type) = resp.content.values_mut().next() else { + continue; + }; + let mut ex_map: IndexMap> = IndexMap::new(); + for (name, mut value) in examples { + if let Some(r) = &self.redactor { + r.redact(&mut value); + let existing: Vec = ex_map.keys().cloned().collect(); + let new_name = make_example_name(&value, &existing); + ex_map.insert( + new_name, + ReferenceOr::Item(Example { + value: Some(value), + ..Example::default() + }), + ); + } else { + ex_map.insert( + name, + ReferenceOr::Item(Example { + value: Some(value), + ..Example::default() + }), + ); + } + } + media_type.examples = ex_map; + } + for ((path, method, content_type), examples) in self.req_examples_store.drain() { + let Some(ReferenceOr::Item(path_item)) = self.spec.paths.paths.get_mut(&path) else { + continue; + }; + let Some(Some(op)) = get_operation_mut(path_item, &method).map(|s| s.as_mut()) else { + continue; + }; + let Some(ReferenceOr::Item(rb)) = op.request_body.as_mut() else { + continue; + }; + let Some(media_type) = rb.content.get_mut(&content_type) else { + continue; + }; + let mut ex_map: IndexMap> = IndexMap::new(); + for (name, mut value) in examples { + if let Some(r) = &self.redactor { + r.redact(&mut value); + let existing: Vec = ex_map.keys().cloned().collect(); + let new_name = make_example_name(&value, &existing); + ex_map.insert( + new_name, + ReferenceOr::Item(Example { + value: Some(value), + ..Example::default() + }), + ); + } else { + ex_map.insert( + name, + ReferenceOr::Item(Example { + value: Some(value), + ..Example::default() + }), + ); + } + } + media_type.examples = ex_map; + } self.spec } } @@ -697,6 +918,10 @@ mod tests { ignore_images: false, suppress_params: false, tags_overrides: None, + skip_options: false, + max_examples: 5, + redact_patterns: vec![], + redact_fields: vec![], } } @@ -1563,4 +1788,506 @@ mod tests { "lowercase 'patch' should be normalized to PATCH" ); } + + // ── request body merge ─────────────────────────────────────────── + + #[test] + fn request_body_merge_different_schemas() { + let config = test_config(); + let mut builder = OpenApiBuilder::new("https://api.example.com", &config, vec![]); + + let req1 = MockRequest::post("https://api.example.com/items") + .with_json_request_body(&serde_json::json!({"name": "Alice"})); + let req2 = MockRequest::post("https://api.example.com/items") + .with_json_request_body(&serde_json::json!({"age": 30})); + + builder.add_request(&req1); + builder.add_request(&req2); + + let spec = builder.build(); + let path_item = spec.paths.paths["/items"].as_item().unwrap(); + let op = path_item.post.as_ref().unwrap(); + let rb = op.request_body.as_ref().unwrap().as_item().unwrap(); + let mt = rb.content.get("application/json").unwrap(); + let schema = mt.schema.as_ref().unwrap().as_item().unwrap(); + match &schema.schema_kind { + openapiv3::SchemaKind::OneOf { one_of } => { + assert_eq!(one_of.len(), 2); + } + _ => panic!("expected oneOf schema"), + } + } + + #[test] + fn request_body_merge_identical_schemas() { + let config = test_config(); + let mut builder = OpenApiBuilder::new("https://api.example.com", &config, vec![]); + + let req1 = MockRequest::post("https://api.example.com/items") + .with_json_request_body(&serde_json::json!({"name": "Alice"})); + let req2 = MockRequest::post("https://api.example.com/items") + .with_json_request_body(&serde_json::json!({"name": "Bob"})); + + builder.add_request(&req1); + builder.add_request(&req2); + + let spec = builder.build(); + let path_item = spec.paths.paths["/items"].as_item().unwrap(); + let op = path_item.post.as_ref().unwrap(); + let rb = op.request_body.as_ref().unwrap().as_item().unwrap(); + let mt = rb.content.get("application/json").unwrap(); + let schema = mt.schema.as_ref().unwrap().as_item().unwrap(); + if let openapiv3::SchemaKind::OneOf { .. } = &schema.schema_kind { + panic!("identical schemas should NOT produce oneOf"); + } + } + + #[test] + fn request_body_first_no_body_second_has_body() { + let config = test_config(); + let mut builder = OpenApiBuilder::new("https://api.example.com", &config, vec![]); + + let req1 = MockRequest::post("https://api.example.com/items"); + let req2 = MockRequest::post("https://api.example.com/items") + .with_json_request_body(&serde_json::json!({"name": "Alice"})); + + builder.add_request(&req1); + builder.add_request(&req2); + + let spec = builder.build(); + let path_item = spec.paths.paths["/items"].as_item().unwrap(); + let op = path_item.post.as_ref().unwrap(); + let rb = op.request_body.as_ref().unwrap().as_item().unwrap(); + let mt = rb.content.get("application/json").unwrap(); + assert!(mt.schema.is_some()); + } + + #[test] + fn request_body_different_content_types_separate() { + let config = test_config(); + let mut builder = OpenApiBuilder::new("https://api.example.com", &config, vec![]); + + let req1 = MockRequest::post("https://api.example.com/items") + .with_json_request_body(&serde_json::json!({"name": "Alice"})); + let mut req2 = MockRequest::post("https://api.example.com/items"); + req2.request_headers = vec![( + "Content-Type".to_string(), + "application/x-www-form-urlencoded".to_string(), + )]; + req2.request_body = Some(b"name=Bob&age=30".to_vec()); + + builder.add_request(&req1); + builder.add_request(&req2); + + let spec = builder.build(); + let path_item = spec.paths.paths["/items"].as_item().unwrap(); + let op = path_item.post.as_ref().unwrap(); + let rb = op.request_body.as_ref().unwrap().as_item().unwrap(); + assert!(rb.content.contains_key("application/json")); + assert!(rb.content.contains_key("application/x-www-form-urlencoded")); + assert_eq!(rb.content.len(), 2); + } + + // ── examples accumulator ─────────────────────────────────────── + + #[test] + fn examples_accumulator_basic() { + let config = test_config(); + let templates = vec!["/users/{id}".to_string()]; + let mut builder = OpenApiBuilder::new("https://api.example.com", &config, templates); + + let req1 = MockRequest::get("https://api.example.com/users/1") + .with_json_response(&serde_json::json!({"name": "Alice", "age": 30})); + let req2 = MockRequest::get("https://api.example.com/users/2") + .with_json_response(&serde_json::json!({"name": "Bob", "age": 25})); + + builder.add_request(&req1); + builder.add_request(&req2); + + let spec = builder.build(); + let path_item = match spec.paths.paths.get("/users/{id}") { + Some(ReferenceOr::Item(item)) => item, + _ => panic!("expected /users/{{id}}"), + }; + let op = path_item.get.as_ref().unwrap(); + let resp = match op.responses.responses.get(&StatusCode::Code(200)) { + Some(ReferenceOr::Item(r)) => r, + _ => panic!("expected 200 response"), + }; + let mt = resp + .content + .get("application/json") + .expect("expected json media type"); + assert_eq!(mt.examples.len(), 2, "should have 2 examples"); + } + + #[test] + fn examples_binary_skipped() { + let config = test_config(); + let templates = vec!["/files/{id}".to_string()]; + let mut builder = OpenApiBuilder::new("https://api.example.com", &config, templates); + + let req = MockRequest { + url: "https://api.example.com/files/1".to_string(), + method: "GET".to_string(), + request_headers: vec![], + request_body: None, + response_status: Some(200), + response_reason: Some("OK".to_string()), + response_headers: None, + response_body: Some(vec![0x89, 0x50, 0x4E, 0x47]), + response_content_type: Some("image/png".to_string()), + }; + builder.add_request(&req); + + let spec = builder.build(); + let path_item = spec.paths.paths.get("/files/{id}"); + if let Some(ReferenceOr::Item(item)) = path_item { + if let Some(op) = &item.get { + for (_, resp_ref) in &op.responses.responses { + if let ReferenceOr::Item(resp) = resp_ref { + for (_, mt) in &resp.content { + assert!(mt.examples.is_empty(), "binary should have no examples"); + } + } + } + } + } + } + + #[test] + fn response_examples_multiple_captures() { + let config = test_config(); + let templates = vec!["/users/{id}".to_string()]; + let mut builder = OpenApiBuilder::new("https://api.example.com", &config, templates); + + for i in 1..=3 { + let req = MockRequest::get(&format!("https://api.example.com/users/{i}")) + .with_json_response(&serde_json::json!({"id": i, "name": format!("User{i}")})); + builder.add_request(&req); + } + + let spec = builder.build(); + let path_item = match spec.paths.paths.get("/users/{id}") { + Some(ReferenceOr::Item(item)) => item, + _ => panic!("expected /users/{{id}}"), + }; + let op = path_item.get.as_ref().unwrap(); + let resp = match op.responses.responses.get(&StatusCode::Code(200)) { + Some(ReferenceOr::Item(r)) => r, + _ => panic!("expected 200 response"), + }; + let mt = resp + .content + .get("application/json") + .expect("expected json media type"); + assert_eq!(mt.examples.len(), 3, "should have 3 named examples"); + } + + #[test] + fn response_examples_non_json_skipped() { + let config = test_config(); + let templates = vec!["/health".to_string()]; + let mut builder = OpenApiBuilder::new("https://api.example.com", &config, templates); + + let req = MockRequest { + url: "https://api.example.com/health".to_string(), + method: "GET".to_string(), + request_headers: vec![], + request_body: None, + response_status: Some(200), + response_reason: Some("OK".to_string()), + response_headers: None, + response_body: Some(b"OK".to_vec()), + response_content_type: Some("text/plain".to_string()), + }; + builder.add_request(&req); + + let spec = builder.build(); + let path_item = match spec.paths.paths.get("/health") { + Some(ReferenceOr::Item(item)) => item, + _ => return, + }; + if let Some(op) = &path_item.get { + for (_, resp_ref) in &op.responses.responses { + if let ReferenceOr::Item(resp) = resp_ref { + for (_, mt) in &resp.content { + assert!(mt.examples.is_empty(), "text/plain should have no examples"); + } + } + } + } + } + + // ── max_examples cap ─────────────────────────────────────────── + + #[test] + fn max_examples_cap_enforced() { + let mut config = test_config(); + config.max_examples = 2; + let templates = vec!["/users/{id}".to_string()]; + let mut builder = OpenApiBuilder::new("https://api.example.com", &config, templates); + + for i in 1..=10 { + let req = MockRequest::get(&format!("https://api.example.com/users/{i}")) + .with_json_response(&serde_json::json!({"id": i, "name": format!("User{i}")})); + builder.add_request(&req); + } + + let spec = builder.build(); + let path_item = spec + .paths + .paths + .get("/users/{id}") + .unwrap() + .as_item() + .unwrap(); + let op = path_item.get.as_ref().unwrap(); + let resp = op + .responses + .responses + .get(&StatusCode::Code(200)) + .unwrap() + .as_item() + .unwrap(); + let mt = resp.content.get("application/json").expect("expected json"); + assert_eq!(mt.examples.len(), 2, "cap of 2 should be enforced"); + } + + #[test] + fn max_examples_zero_disables() { + let mut config = test_config(); + config.max_examples = 0; + let templates = vec!["/users/{id}".to_string()]; + let mut builder = OpenApiBuilder::new("https://api.example.com", &config, templates); + + for i in 1..=3 { + let req = MockRequest::get(&format!("https://api.example.com/users/{i}")) + .with_json_response(&serde_json::json!({"id": i})); + builder.add_request(&req); + } + + let spec = builder.build(); + let path_item = spec + .paths + .paths + .get("/users/{id}") + .unwrap() + .as_item() + .unwrap(); + let op = path_item.get.as_ref().unwrap(); + let resp = op + .responses + .responses + .get(&StatusCode::Code(200)) + .unwrap() + .as_item() + .unwrap(); + let mt = resp.content.get("application/json").expect("expected json"); + assert_eq!( + mt.examples.len(), + 0, + "max_examples=0 should store no examples" + ); + } + + #[test] + fn max_examples_default_five() { + let config = test_config(); + let templates = vec!["/users/{id}".to_string()]; + let mut builder = OpenApiBuilder::new("https://api.example.com", &config, templates); + + for i in 1..=8 { + let req = MockRequest::get(&format!("https://api.example.com/users/{i}")) + .with_json_response(&serde_json::json!({"id": i, "name": format!("User{i}")})); + builder.add_request(&req); + } + + let spec = builder.build(); + let path_item = spec + .paths + .paths + .get("/users/{id}") + .unwrap() + .as_item() + .unwrap(); + let op = path_item.get.as_ref().unwrap(); + let resp = op + .responses + .responses + .get(&StatusCode::Code(200)) + .unwrap() + .as_item() + .unwrap(); + let mt = resp.content.get("application/json").expect("expected json"); + assert!( + mt.examples.len() <= 5, + "default cap of 5 should be enforced, got {}", + mt.examples.len() + ); + } + + #[test] + fn request_examples_multiple_captures() { + let config = test_config(); + let templates = vec!["/orders".to_string()]; + let mut builder = OpenApiBuilder::new("https://api.example.com", &config, templates); + + for i in 1..=3u32 { + let req = MockRequest::post("https://api.example.com/orders") + .with_json_request_body(&serde_json::json!({"item": i, "qty": i})) + .with_json_response(&serde_json::json!({"id": i})) + .with_status(201, "Created"); + builder.add_request(&req); + } + + let spec = builder.build(); + let path_item = spec.paths.paths.get("/orders").expect("expected /orders"); + let op = path_item.as_item().unwrap().post.as_ref().unwrap(); + let rb = op.request_body.as_ref().unwrap().as_item().unwrap(); + let mt = rb.content.get("application/json").expect("expected json"); + assert_eq!(mt.examples.len(), 3, "should have 3 request body examples"); + } + + #[test] + fn request_examples_get_no_body() { + let config = test_config(); + let mut builder = OpenApiBuilder::new("https://api.example.com", &config, vec![]); + + for i in 1..=3u32 { + let req = MockRequest::get(&format!("https://api.example.com/users/{i}")) + .with_json_response(&serde_json::json!({"id": i})); + builder.add_request(&req); + } + + let spec = builder.build(); + for (_, path_ref) in &spec.paths.paths { + if let ReferenceOr::Item(item) = path_ref { + if let Some(op) = &item.get { + assert!(op.request_body.is_none(), "GET should have no requestBody"); + } + } + } + } + + #[test] + fn response_examples_duplicate_names_get_suffix() { + let config = test_config(); + let templates = vec!["/items".to_string()]; + let mut builder = OpenApiBuilder::new("https://api.example.com", &config, templates); + + let req1 = MockRequest::get("https://api.example.com/items") + .with_json_response(&serde_json::json!({"status": "active", "count": 1})); + let req2 = MockRequest::get("https://api.example.com/items") + .with_json_response(&serde_json::json!({"status": "active", "count": 2})); + builder.add_request(&req1); + builder.add_request(&req2); + + let spec = builder.build(); + let path_item = match spec.paths.paths.get("/items") { + Some(ReferenceOr::Item(item)) => item, + _ => panic!("expected /items"), + }; + let op = path_item.get.as_ref().unwrap(); + let resp = match op.responses.responses.get(&StatusCode::Code(200)) { + Some(ReferenceOr::Item(r)) => r, + _ => panic!("expected 200 response"), + }; + let mt = resp + .content + .get("application/json") + .expect("expected json media type"); + assert_eq!(mt.examples.len(), 2, "should have 2 examples"); + let names: Vec<&String> = mt.examples.keys().collect(); + assert_eq!(names[0], "active"); + assert_eq!(names[1], "active_2"); + } + + // ── redaction integration ────────────────────────────────────── + + #[test] + fn redact_integration_field() { + let mut config = test_config(); + config.redact_fields = vec!["token".to_string()]; + let mut builder = OpenApiBuilder::new("https://api.example.com", &config, vec![]); + + let req = MockRequest::get("https://api.example.com/auth") + .with_json_response(&serde_json::json!({"token": "secret123", "user": "alice"})); + builder.add_request(&req); + + let spec = builder.build(); + let path_item = spec.paths.paths["/auth"].as_item().unwrap(); + let op = path_item.get.as_ref().unwrap(); + let resp = op + .responses + .responses + .get(&StatusCode::Code(200)) + .unwrap() + .as_item() + .unwrap(); + let mt = resp.content.get("application/json").unwrap(); + let ex = mt.examples.values().next().unwrap().as_item().unwrap(); + let val = ex.value.as_ref().unwrap(); + assert_eq!(val["token"], "[REDACTED]"); + assert_eq!(val["user"], "alice"); + } + + #[test] + fn redact_integration_pattern() { + let mut config = test_config(); + config.redact_patterns = vec!["[0-9a-f]{32,}".to_string()]; + let mut builder = OpenApiBuilder::new("https://api.example.com", &config, vec![]); + + let req = MockRequest::post("https://api.example.com/login") + .with_json_request_body( + &serde_json::json!({"session": "abcdef1234567890abcdef1234567890"}), + ) + .with_json_response(&serde_json::json!({"ok": true})) + .with_status(200, "OK"); + builder.add_request(&req); + + let spec = builder.build(); + let path_item = spec.paths.paths["/login"].as_item().unwrap(); + let op = path_item.post.as_ref().unwrap(); + let rb = op.request_body.as_ref().unwrap().as_item().unwrap(); + let mt = rb.content.get("application/json").unwrap(); + let ex = mt.examples.values().next().unwrap().as_item().unwrap(); + let val = ex.value.as_ref().unwrap(); + assert_eq!(val["session"], "[REDACTED]"); + } + + #[test] + fn redact_schema_unaffected() { + let mut config = test_config(); + config.redact_fields = vec!["token".to_string()]; + let mut builder = OpenApiBuilder::new("https://api.example.com", &config, vec![]); + + let req = MockRequest::get("https://api.example.com/auth") + .with_json_response(&serde_json::json!({"token": "secret123", "user": "alice"})); + builder.add_request(&req); + + let spec = builder.build(); + let path_item = spec.paths.paths["/auth"].as_item().unwrap(); + let op = path_item.get.as_ref().unwrap(); + let resp = op + .responses + .responses + .get(&StatusCode::Code(200)) + .unwrap() + .as_item() + .unwrap(); + let mt = resp.content.get("application/json").unwrap(); + let schema = mt.schema.as_ref().unwrap().as_item().unwrap(); + match &schema.schema_kind { + openapiv3::SchemaKind::Type(openapiv3::Type::Object(obj)) => { + assert!(obj.properties.contains_key("token")); + let token_schema = obj.properties["token"].as_item().unwrap(); + match &token_schema.schema_kind { + openapiv3::SchemaKind::Type(openapiv3::Type::String(_)) => {} + other => panic!("expected string schema for token, got {:?}", other), + } + } + other => panic!("expected Object schema, got {:?}", other), + } + } } diff --git a/src/cli.rs b/src/cli.rs index 2c06fde..0e08f70 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -80,6 +80,9 @@ pub struct DiscoverArgs { #[arg(long, value_parser = parse_byte_size, default_value = "2GiB")] pub max_input_size: u64, + #[arg(long, value_parser = parse_byte_size, default_value = "256MiB")] + pub max_payload_size: u64, + #[arg(long, default_value_t = false)] pub allow_symlinks: bool, @@ -91,6 +94,12 @@ pub struct DiscoverArgs { /// Write a structured JSON processing report to the given path #[arg(long)] pub report: Option, + + #[arg(long, default_value_t = false)] + pub skip_options: bool, + + #[arg(long)] + pub param_regex: Option, } #[derive(Parser, Debug)] @@ -170,4 +179,16 @@ pub struct GenerateArgs { /// Write a structured JSON processing report to the given path #[arg(long)] pub report: Option, + + #[arg(long, default_value_t = false)] + pub skip_options: bool, + + #[arg(long, default_value_t = 5)] + pub max_examples: usize, + + #[arg(long, value_delimiter = ',')] + pub redact_patterns: Vec, + + #[arg(long, value_delimiter = ',')] + pub redact_fields: Vec, } diff --git a/src/har_reader.rs b/src/har_reader.rs index 980209f..f162a9a 100644 --- a/src/har_reader.rs +++ b/src/har_reader.rs @@ -661,7 +661,7 @@ mod tests { .join("testdata") .join("har"); let requests = read_har_file(&dir).unwrap(); - assert_eq!(requests.len(), 5); + assert_eq!(requests.len(), 9); } #[test] diff --git a/src/lib.rs b/src/lib.rs index 6e77568..ef2d0bf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,6 +26,7 @@ pub mod mitmproxy_reader; pub mod output; pub mod params; pub mod path_matching; +pub mod redact; pub mod report; pub mod schema; pub mod tnetstring; @@ -44,6 +45,9 @@ pub const MAX_DEPTH: usize = 256; /// Maximum recursion depth for JSON-to-schema conversion. pub const MAX_SCHEMA_DEPTH: usize = 64; +/// Minimum distinct values at a path position to trigger variability-based parameterization. +pub const MIN_VARIABILITY_CARDINALITY: usize = 3; + /// Maximum body size for response/request bodies (64 MiB). pub const MAX_BODY_SIZE: usize = 64 * 1024 * 1024; diff --git a/src/main.rs b/src/main.rs index a607332..c0a810f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -45,15 +45,37 @@ fn run(cli: Cli) -> Result { &args.format, args.max_input_size, args.allow_symlinks, + args.max_payload_size as usize, )?; let counting_iter = CountingIterator::new(req_iter); let error_count = counting_iter.error_count.clone(); + let filtered_iter: Box< + dyn Iterator>>, + > = if args.skip_options { + Box::new(counting_iter.filter(|r| { + r.as_ref() + .map(|req| req.get_method().to_uppercase() != "OPTIONS") + .unwrap_or(true) + })) + } else { + Box::new(counting_iter) + }; + + let custom_re = args + .param_regex + .as_deref() + .map(|pat| { + regex::Regex::new(pat) + .with_context(|| format!("invalid --param-regex pattern: {pat}")) + }) + .transpose()?; + let templates = builder::discover_paths_streaming( - counting_iter, + filtered_iter, &args.prefix, - None, + custom_re.as_ref(), &args.exclude_patterns, &args.include_patterns, ); @@ -120,6 +142,7 @@ fn run(cli: Cli) -> Result { &args.format, args.max_input_size, args.allow_symlinks, + args.max_payload_size as usize, )?; let all_templates = load_templates(&args.templates).with_context(|| { @@ -149,6 +172,10 @@ fn run(cli: Cli) -> Result { ignore_images: args.ignore_images, suppress_params: args.suppress_params, tags_overrides: args.tags_overrides.clone(), + skip_options: args.skip_options, + max_examples: args.max_examples, + redact_patterns: args.redact_patterns.clone(), + redact_fields: args.redact_fields.clone(), }; let mut builder = OpenApiBuilder::new(&args.prefix, &config, active_templates); @@ -273,6 +300,7 @@ fn stream_input( format: &InputFormat, max_input_size: u64, allow_symlinks: bool, + max_payload_size: usize, ) -> Result { // Check symlink-ness before is_dir(), since is_dir() follows symlinks. if !allow_symlinks { @@ -295,13 +323,13 @@ fn stream_input( debug!(path = %path.display(), "Streaming as mitmproxy format"); if path.is_dir() { if reject_symlinks { - mitmproxy_reader::stream_mitmproxy_dir_no_symlinks(path) + mitmproxy_reader::stream_mitmproxy_dir_no_symlinks(path, max_payload_size) } else { - mitmproxy_reader::stream_mitmproxy_dir(path) + mitmproxy_reader::stream_mitmproxy_dir(path, max_payload_size) } .context("failed to stream mitmproxy directory") } else { - let iter = mitmproxy_reader::stream_mitmproxy_file(path) + let iter = mitmproxy_reader::stream_mitmproxy_file(path, max_payload_size) .context("failed to stream mitmproxy file")?; Ok(Box::new(iter)) } @@ -315,9 +343,9 @@ fn stream_input( if path.is_dir() { debug!(path = %path.display(), "Auto-detecting format for directory"); let mitmproxy_result = if reject_symlinks { - mitmproxy_reader::stream_mitmproxy_dir_no_symlinks(path) + mitmproxy_reader::stream_mitmproxy_dir_no_symlinks(path, max_payload_size) } else { - mitmproxy_reader::stream_mitmproxy_dir(path) + mitmproxy_reader::stream_mitmproxy_dir(path, max_payload_size) }; let har_result = if reject_symlinks { har_reader::stream_har_dir_no_symlinks(path) @@ -347,7 +375,7 @@ fn stream_input( if ms > hs { info!(path = %path.display(), "Auto-detected as mitmproxy format"); - let iter = mitmproxy_reader::stream_mitmproxy_file(path) + let iter = mitmproxy_reader::stream_mitmproxy_file(path, max_payload_size) .context("detected as mitmproxy format but failed to parse")?; Ok(Box::new(iter)) } else if hs > ms { @@ -357,7 +385,7 @@ fn stream_input( Ok(Box::new(iter)) } else if ms > 0 { warn!(path = %path.display(), "Ambiguous format detection, trying mitmproxy first"); - match mitmproxy_reader::stream_mitmproxy_file(path) { + match mitmproxy_reader::stream_mitmproxy_file(path, max_payload_size) { Ok(iter) => Ok(Box::new(iter)), Err(_) => { let iter = har_reader::stream_har_file(path) diff --git a/src/mitmproxy_reader.rs b/src/mitmproxy_reader.rs index c0cc7d6..7a8b781 100644 --- a/src/mitmproxy_reader.rs +++ b/src/mitmproxy_reader.rs @@ -1,3 +1,4 @@ +use std::io::Read; use std::path::Path; use tracing::{debug, warn}; @@ -153,6 +154,42 @@ fn find_header<'a>(headers: &'a [(String, String)], name: &str) -> Option<&'a st .map(|(_, v)| v.as_str()) } +fn decompress_body(body: &[u8], encoding: Option<&str>) -> Vec { + match encoding { + Some("br") => { + let mut decoded = Vec::new(); + match brotli::Decompressor::new(body, 4096).read_to_end(&mut decoded) { + Ok(_) => decoded, + Err(_) => { + warn!(event = "decompress_failed", encoding = ?encoding, "decompression failed, using raw bytes"); + body.to_vec() + } + } + } + Some("gzip") => { + let mut decoded = Vec::new(); + match flate2::read::GzDecoder::new(body).read_to_end(&mut decoded) { + Ok(_) => decoded, + Err(_) => { + warn!(event = "decompress_failed", encoding = ?encoding, "decompression failed, using raw bytes"); + body.to_vec() + } + } + } + Some("deflate") => { + let mut decoded = Vec::new(); + match flate2::read::DeflateDecoder::new(body).read_to_end(&mut decoded) { + Ok(_) => decoded, + Err(_) => { + warn!(event = "decompress_failed", encoding = ?encoding, "decompression failed, using raw bytes"); + body.to_vec() + } + } + } + _ => body.to_vec(), + } +} + /// Resolve hostname: host field → Host header → authority field. fn resolve_host(request: &TNetValue, headers: &[(String, String)]) -> Option { if let Some(host) = request.get("host").and_then(value_to_string_strict) { @@ -269,11 +306,12 @@ fn parse_flow(flow: &TNetValue) -> Result { let url = build_url_with_fallback(request, &request_headers)?; + let request_encoding = find_header(&request_headers, "content-encoding").map(|v| v.to_string()); let request_body = request .get("content") .and_then(|v| if v.is_null() { None } else { v.as_bytes() }) .filter(|b| !b.is_empty()) - .map(|b| cap_body(b, &url)); + .map(|b| cap_body(&decompress_body(b, request_encoding.as_deref()), &url)); let response = flow.get("response"); @@ -301,11 +339,14 @@ fn parse_flow(flow: &TNetValue) -> Result { ); let reason = resp.get("reason").and_then(value_to_string); let headers = resp.get("headers").map(parse_headers); + let response_encoding = headers + .as_ref() + .and_then(|h| find_header(h, "content-encoding").map(|v| v.to_string())); let body = resp .get("content") .and_then(|v| if v.is_null() { None } else { v.as_bytes() }) .filter(|b| !b.is_empty()) - .map(|b| cap_body(b, &url)); + .map(|b| cap_body(&decompress_body(b, response_encoding.as_deref()), &url)); let content_type = headers .as_ref() .and_then(|h| find_header(h, "content-type").map(|v| v.to_string())); @@ -329,14 +370,15 @@ fn parse_flow(flow: &TNetValue) -> Result { pub fn stream_mitmproxy_file( path: &Path, + max_payload_size: usize, ) -> Result>>> { let file = std::fs::File::open(path)?; let reader = std::io::BufReader::with_capacity(64 * 1024, file); let display_path = path.display().to_string(); Ok( - tnetstring::TNetStringIter::new(reader).filter_map( - move |value_result| match value_result { + tnetstring::TNetStringIter::with_limits(reader, max_payload_size, crate::MAX_DEPTH) + .filter_map(move |value_result| match value_result { Ok(flow) => { let flow_type = flow.get("type").and_then(value_to_string); if flow_type.as_deref() != Some("http") { @@ -355,20 +397,26 @@ pub fn stream_mitmproxy_file( warn!(path = %display_path, error = %e, "Skipping unparseable flow entry"); Some(Err(e)) } - }, - ), + }), ) } -pub fn stream_mitmproxy_dir(path: &Path) -> Result { - stream_mitmproxy_dir_inner(path, false) +pub fn stream_mitmproxy_dir(path: &Path, max_payload_size: usize) -> Result { + stream_mitmproxy_dir_inner(path, false, max_payload_size) } -pub fn stream_mitmproxy_dir_no_symlinks(path: &Path) -> Result { - stream_mitmproxy_dir_inner(path, true) +pub fn stream_mitmproxy_dir_no_symlinks( + path: &Path, + max_payload_size: usize, +) -> Result { + stream_mitmproxy_dir_inner(path, true, max_payload_size) } -fn stream_mitmproxy_dir_inner(path: &Path, reject_symlinks: bool) -> Result { +fn stream_mitmproxy_dir_inner( + path: &Path, + reject_symlinks: bool, + max_payload_size: usize, +) -> Result { let mut entries: Vec<_> = std::fs::read_dir(path)? .filter_map(|e| match e { Ok(entry) => Some(entry), @@ -408,7 +456,7 @@ fn stream_mitmproxy_dir_inner(path: &Path, reject_symlinks: bool) -> Result = Vec::new(); for entry in entries { - match stream_mitmproxy_file(&entry.path()) { + match stream_mitmproxy_file(&entry.path(), max_payload_size) { Ok(iter) => iters.push(Box::new(iter)), Err(e) => { warn!(path = %entry.path().display(), error = %e, "Skipping unreadable flow file"); @@ -420,13 +468,15 @@ fn stream_mitmproxy_dir_inner(path: &Path, reject_symlinks: bool) -> Result Result>> { - Ok(stream_mitmproxy_file(path)? + Ok(stream_mitmproxy_file(path, crate::MAX_PAYLOAD_SIZE)? .filter_map(|r| r.ok()) .collect()) } pub fn read_mitmproxy_dir(path: &Path) -> Result>> { - Ok(stream_mitmproxy_dir(path)?.filter_map(|r| r.ok()).collect()) + Ok(stream_mitmproxy_dir(path, crate::MAX_PAYLOAD_SIZE)? + .filter_map(|r| r.ok()) + .collect()) } /// Heuristic: does this file look like a mitmproxy flow dump? @@ -961,4 +1011,61 @@ mod tests { "null byte should not be in URL" ); } + + #[test] + fn decompress_body_brotli() { + use brotli::enc::BrotliCompress; + let original = b"hello brotli world"; + let mut compressed = Vec::new(); + BrotliCompress( + &mut &original[..], + &mut compressed, + &brotli::enc::BrotliEncoderParams::default(), + ) + .unwrap(); + let decoded = decompress_body(&compressed, Some("br")); + assert_eq!(decoded, original); + } + + #[test] + fn decompress_body_gzip() { + use flate2::write::GzEncoder; + use std::io::Write; + let original = b"hello gzip world"; + let mut encoder = GzEncoder::new(Vec::new(), flate2::Compression::default()); + encoder.write_all(original).unwrap(); + let compressed = encoder.finish().unwrap(); + let decoded = decompress_body(&compressed, Some("gzip")); + assert_eq!(decoded, original); + } + + #[test] + fn decompress_body_deflate() { + use flate2::write::DeflateEncoder; + use std::io::Write; + let original = b"hello deflate world"; + let mut encoder = DeflateEncoder::new(Vec::new(), flate2::Compression::default()); + encoder.write_all(original).unwrap(); + let compressed = encoder.finish().unwrap(); + let decoded = decompress_body(&compressed, Some("deflate")); + assert_eq!(decoded, original); + } + + #[test] + fn decompress_body_invalid_data_fallback() { + let garbage = b"not valid compressed data at all"; + let decoded = decompress_body(garbage, Some("gzip")); + assert_eq!( + decoded, garbage, + "invalid data should fall back to raw bytes" + ); + } + + #[test] + fn decompress_body_no_encoding_passthrough() { + let raw = b"plain text body"; + assert_eq!(decompress_body(raw, None), raw); + assert_eq!(decompress_body(raw, Some("identity")), raw); + assert_eq!(decompress_body(raw, Some("unknown")), raw); + } } diff --git a/src/path_matching.rs b/src/path_matching.rs index d1788de..00428f5 100644 --- a/src/path_matching.rs +++ b/src/path_matching.rs @@ -2,7 +2,7 @@ //! detect parameter segments, and suggest parameterized templates from observed paths. use regex::Regex; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; /// Convert a path template like "/api/v1/users/{id}" to a regex pattern. /// @@ -94,7 +94,7 @@ fn path_to_regex_pattern(template: &str) -> std::result::Result) -> bool { if segment.is_empty() { return false; } + if is_version_prefix(segment) { + return false; + } if is_numeric_string(segment) || is_uuid(segment) { return true; } + if is_upper_case_slug(segment) || is_hex_string(segment) || is_base58(segment) { + return true; + } if let Some(re) = custom_regex { return re.is_match(segment); } @@ -125,20 +131,46 @@ pub fn is_param_segment(segment: &str, custom_regex: Option<&Regex>) -> bool { /// let templates = suggest_param_templates(&paths, None); /// assert_eq!(templates, vec!["/users/{id}"]); /// ``` +fn is_version_prefix(s: &str) -> bool { + let mut chars = s.chars(); + matches!(chars.next(), Some('v')) && chars.all(|c| c.is_ascii_digit()) && s.len() >= 2 +} + pub fn suggest_param_templates(paths: &[String], custom_regex: Option<&Regex>) -> Vec { + let mut position_values: HashMap<(usize, usize), HashSet<&str>> = HashMap::new(); + for path in paths { + let segs: Vec<&str> = path.split('/').collect(); + let n = segs.len(); + for (i, seg) in segs.iter().enumerate() { + if seg.is_empty() { + continue; + } + position_values.entry((n, i)).or_default().insert(seg); + } + } + let variability_params: HashSet<(usize, usize)> = position_values + .into_iter() + .filter(|(_, vals)| { + vals.len() >= crate::MIN_VARIABILITY_CARDINALITY + && !vals.iter().any(|v| is_version_prefix(v)) + }) + .map(|(k, _)| k) + .collect(); + let mut templates: HashSet = HashSet::new(); for path in paths { let segments: Vec<&str> = path.split('/').collect(); + let n = segments.len(); let mut param_count = 0u32; let mut template_segments: Vec = Vec::new(); - for segment in &segments { + for (i, segment) in segments.iter().enumerate() { if segment.is_empty() { template_segments.push(String::new()); continue; } - if is_param_segment(segment, custom_regex) { + if is_param_segment(segment, custom_regex) || variability_params.contains(&(n, i)) { param_count += 1; template_segments.push(format!("{{__P{}}}", param_count)); } else { @@ -428,4 +460,85 @@ mod tests { assert!(re.is_match("/api/(v1)/data")); assert!(!re.is_match("/api/v1/data")); } + + // ── new heuristics: UPPER_CASE, hex, base58 ──────────────────── + + #[test] + fn upper_case_slug_is_param() { + assert!(is_param_segment("BTC_USDT", None)); + assert!(is_param_segment("ETH_BTC", None)); + } + + #[test] + fn hex_string_is_param() { + assert!(is_param_segment("0xabcdef12345678", None)); + } + + #[test] + fn base58_is_param() { + assert!(is_param_segment("5KJvsngHeMpm88xU9Fcd", None)); + } + + #[test] + fn version_prefix_not_param() { + assert!(!is_param_segment("v1", None)); + } + + #[test] + fn common_words_not_param() { + assert!(!is_param_segment("api", None)); + assert!(!is_param_segment("users", None)); + } + + #[test] + fn short_uppercase_not_param() { + assert!(!is_param_segment("ID", None)); + } + + // ── variability detection ────────────────────────────────────── + + #[test] + fn variability_three_values_parameterized() { + // lowercase slugs not caught by format heuristics — only variability detects these + let paths = vec![ + "/api/v1/pairs/btc-usdt".to_string(), + "/api/v1/pairs/eth-btc".to_string(), + "/api/v1/pairs/sol-usdt".to_string(), + ]; + let templates = suggest_param_templates(&paths, None); + assert_eq!(templates, vec!["/api/v1/pairs/{id}"]); + } + + #[test] + fn variability_two_values_not_parameterized() { + let paths = vec![ + "/api/v1/status/active".to_string(), + "/api/v1/status/inactive".to_string(), + ]; + let templates = suggest_param_templates(&paths, None); + assert_eq!(templates.len(), 2); + assert!(templates.contains(&"/api/v1/status/active".to_string())); + assert!(templates.contains(&"/api/v1/status/inactive".to_string())); + } + + #[test] + fn variability_version_not_parameterized() { + let paths = vec![ + "/api/v1/users".to_string(), + "/api/v2/users".to_string(), + "/api/v3/users".to_string(), + ]; + let templates = suggest_param_templates(&paths, None); + assert_eq!(templates.len(), 3); + assert!(templates.contains(&"/api/v1/users".to_string())); + assert!(templates.contains(&"/api/v2/users".to_string())); + assert!(templates.contains(&"/api/v3/users".to_string())); + } + + #[test] + fn variability_combined_with_heuristic() { + let paths = vec!["/api/v1/users/12345".to_string()]; + let templates = suggest_param_templates(&paths, None); + assert_eq!(templates, vec!["/api/v1/users/{id}"]); + } } diff --git a/src/redact.rs b/src/redact.rs new file mode 100644 index 0000000..abb7dbf --- /dev/null +++ b/src/redact.rs @@ -0,0 +1,88 @@ +use regex::Regex; +use serde_json::Value; + +pub struct Redactor { + patterns: Vec, + fields: Vec, +} + +impl Redactor { + pub fn new(patterns: &[String], fields: &[String]) -> Result { + let patterns = patterns + .iter() + .map(|p| Regex::new(p)) + .collect::, _>>()?; + Ok(Self { + patterns, + fields: fields.to_vec(), + }) + } + + pub fn redact(&self, value: &mut Value) { + match value { + Value::Object(map) => { + for (key, val) in map.iter_mut() { + if self.fields.contains(key) { + *val = Value::String("[REDACTED]".to_string()); + } else { + self.redact(val); + } + } + } + Value::Array(arr) => { + for item in arr.iter_mut() { + self.redact(item); + } + } + Value::String(s) if self.patterns.iter().any(|p| p.is_match(s)) => { + *value = Value::String("[REDACTED]".to_string()); + } + _ => {} + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn redact_nested_field() { + let r = Redactor::new(&[], &["token".to_string()]).unwrap(); + let mut v = json!({"user": {"token": "secret123", "name": "Alice"}}); + r.redact(&mut v); + assert_eq!(v, json!({"user": {"token": "[REDACTED]", "name": "Alice"}})); + } + + #[test] + fn redact_hex_pattern() { + let r = Redactor::new(&["[0-9a-f]{32,}".to_string()], &[]).unwrap(); + let mut v = json!({"session": "abcdef1234567890abcdef1234567890", "count": 5}); + r.redact(&mut v); + assert_eq!(v, json!({"session": "[REDACTED]", "count": 5})); + } + + #[test] + fn redact_invalid_regex() { + let result = Redactor::new(&["[invalid".to_string()], &[]); + assert!(result.is_err()); + } + + #[test] + fn redact_empty_noop() { + let r = Redactor::new(&[], &[]).unwrap(); + let mut v = json!({"hello": "world", "num": 42}); + let expected = v.clone(); + r.redact(&mut v); + assert_eq!(v, expected); + } + + #[test] + fn redact_array_items() { + let r = Redactor::new(&[], &["token".to_string()]).unwrap(); + let mut v = json!([{"token": "secret"}, {"token": "other"}]); + r.redact(&mut v); + assert_eq!(v, json!([{"token": "[REDACTED]"}, {"token": "[REDACTED]"}])); + } +} diff --git a/src/tnetstring.rs b/src/tnetstring.rs index 5ac500b..91147d5 100644 --- a/src/tnetstring.rs +++ b/src/tnetstring.rs @@ -136,6 +136,23 @@ impl TrackingReader { } } + #[allow(clippy::indexing_slicing)] // to_read = remaining.min(buf.len()) guarantees in-bounds + fn skip_bytes(&mut self, count: usize) -> Result<(), Error> { + let mut remaining = count; + let mut buf = [0u8; 8192]; + while remaining > 0 { + let to_read = remaining.min(buf.len()); + // SAFETY: to_read <= buf.len() guaranteed by min() + let slice = &mut buf[..to_read]; + self.inner.read_exact(slice).map_err(|e| { + self.make_error(format!("unexpected end of input during skip: {e}")) + })?; + remaining -= to_read; + } + self.offset += count; + Ok(()) + } + fn make_error(&self, message: String) -> Error { Error::TNetParse { offset: self.offset, @@ -469,6 +486,22 @@ impl Iterator for TNetStringIter { self.done = true; None } + Err(Error::TNetStringPayloadTooLarge { len, max }) => { + warn!( + event = "tnetstring_entry_skipped", + byte_offset = self.reader.offset, + payload_len = len, + max_payload = max, + "skipping oversized tnetstring entry and continuing" + ); + // Reader is positioned after the colon, before the data. + // Skip data (len bytes) + tag (1 byte) to reach the next entry. + if let Err(skip_err) = self.reader.skip_bytes(len + 1) { + self.done = true; + return Some(Err(skip_err)); + } + Some(Err(Error::TNetStringPayloadTooLarge { len, max })) + } Err(e) => { let error_kind = classify_error_kind(&e); warn!( diff --git a/src/type_hints.rs b/src/type_hints.rs index f547bab..cb45097 100644 --- a/src/type_hints.rs +++ b/src/type_hints.rs @@ -12,6 +12,38 @@ pub(crate) fn is_numeric_string(s: &str) -> bool { !s.is_empty() && s.chars().all(|c| c.is_ascii_digit()) } +pub(crate) fn is_upper_case_slug(s: &str) -> bool { + if s.is_empty() { + return false; + } + let all_upper_digit_underscore = s + .chars() + .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_'); + if !all_upper_digit_underscore { + return false; + } + let has_underscore = s.contains('_'); + let all_alpha = s.chars().all(|c| c.is_ascii_uppercase()); + if has_underscore { + s.len() >= 3 + } else { + all_alpha && s.len() >= 4 + } +} + +pub(crate) fn is_hex_string(s: &str) -> bool { + if let Some(rest) = s.strip_prefix("0x") { + rest.len() >= 8 && rest.chars().all(|c| c.is_ascii_hexdigit()) + } else { + s.len() >= 16 && s.chars().all(|c| matches!(c, '0'..='9' | 'a'..='f')) + } +} + +pub(crate) fn is_base58(s: &str) -> bool { + const ALPHABET: &[u8] = b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"; + s.len() >= 20 && s.bytes().all(|b| ALPHABET.contains(&b)) +} + /// Check if a string looks like a UUID (8-4-4-4-12 hex pattern). pub(crate) fn is_uuid(s: &str) -> bool { let parts: Vec<&str> = s.split('-').collect(); diff --git a/src/types.rs b/src/types.rs index 12d0012..6ca62a8 100644 --- a/src/types.rs +++ b/src/types.rs @@ -23,4 +23,8 @@ pub struct Config { pub ignore_images: bool, pub suppress_params: bool, pub tags_overrides: Option, + pub skip_options: bool, + pub max_examples: usize, + pub redact_patterns: Vec, + pub redact_fields: Vec, } diff --git a/testdata/har/crypto_pairs.har b/testdata/har/crypto_pairs.har new file mode 100644 index 0000000..6cbfa97 --- /dev/null +++ b/testdata/har/crypto_pairs.har @@ -0,0 +1 @@ +{"log":{"version":"1.2","creator":{"name":"test","version":"1.0"},"entries":[{"startedDateTime":"2025-01-15T10:30:00.000Z","time":100,"request":{"method":"GET","url":"https://api.example.com/api/pairs/BTC_USDT","httpVersion":"HTTP/1.1","cookies":[],"headers":[],"queryString":[],"headersSize":-1,"bodySize":0},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":2,"mimeType":"application/json","text":"{}"},"redirectURL":"","headersSize":-1,"bodySize":2},"cache":{},"timings":{"send":1,"wait":90,"receive":9}},{"startedDateTime":"2025-01-15T10:30:01.000Z","time":100,"request":{"method":"GET","url":"https://api.example.com/api/pairs/ETH_USDT","httpVersion":"HTTP/1.1","cookies":[],"headers":[],"queryString":[],"headersSize":-1,"bodySize":0},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":2,"mimeType":"application/json","text":"{}"},"redirectURL":"","headersSize":-1,"bodySize":2},"cache":{},"timings":{"send":1,"wait":90,"receive":9}}]}} diff --git a/testdata/har/with_options.har b/testdata/har/with_options.har new file mode 100644 index 0000000..b9bd722 --- /dev/null +++ b/testdata/har/with_options.har @@ -0,0 +1,73 @@ +{ + "log": { + "version": "1.2", + "creator": { + "name": "test", + "version": "1.0" + }, + "entries": [ + { + "startedDateTime": "2025-01-15T10:30:00.000Z", + "time": 100, + "request": { + "method": "GET", + "url": "https://api.example.com/api/users", + "httpVersion": "HTTP/1.1", + "cookies": [], + "headers": [], + "queryString": [], + "headersSize": -1, + "bodySize": 0 + }, + "response": { + "status": 200, + "statusText": "OK", + "httpVersion": "HTTP/1.1", + "cookies": [], + "headers": [{"name": "Content-Type", "value": "application/json"}], + "content": { + "size": 2, + "mimeType": "application/json", + "text": "[]" + }, + "redirectURL": "", + "headersSize": -1, + "bodySize": 2 + }, + "cache": {}, + "timings": {"send": 1, "wait": 90, "receive": 9} + }, + { + "startedDateTime": "2025-01-15T10:30:01.000Z", + "time": 10, + "request": { + "method": "OPTIONS", + "url": "https://api.example.com/api/preflight", + "httpVersion": "HTTP/1.1", + "cookies": [], + "headers": [], + "queryString": [], + "headersSize": -1, + "bodySize": 0 + }, + "response": { + "status": 204, + "statusText": "No Content", + "httpVersion": "HTTP/1.1", + "cookies": [], + "headers": [{"name": "Allow", "value": "GET, OPTIONS"}], + "content": { + "size": 0, + "mimeType": "text/plain", + "text": "" + }, + "redirectURL": "", + "headersSize": -1, + "bodySize": 0 + }, + "cache": {}, + "timings": {"send": 1, "wait": 8, "receive": 1} + } + ] + } +} diff --git a/tests/cli_param_regex_removed.rs b/tests/cli_param_regex_removed.rs deleted file mode 100644 index dc81e97..0000000 --- a/tests/cli_param_regex_removed.rs +++ /dev/null @@ -1,26 +0,0 @@ -use assert_cmd::Command; - -#[test] -fn param_regex_flag_rejected_as_unknown() { - let mut cmd = Command::cargo_bin("mitm2openapi").unwrap(); - cmd.args([ - "generate", - "-i", - "nonexistent.flow", - "-t", - "nonexistent.yaml", - "-o", - "out.yaml", - "-p", - "https://example.com", - "--param-regex", - "foo", - ]); - cmd.assert().failure(); - let output = cmd.output().unwrap(); - let stderr = String::from_utf8_lossy(&output.stderr); - assert!( - stderr.contains("unexpected argument") || stderr.contains("unknown"), - "expected 'unexpected argument' in stderr, got: {stderr}" - ); -} diff --git a/tests/fixtures/full_pipeline.har b/tests/fixtures/full_pipeline.har new file mode 100644 index 0000000..36366df --- /dev/null +++ b/tests/fixtures/full_pipeline.har @@ -0,0 +1,9 @@ +{"log":{"version":"1.2","creator":{"name":"test","version":"1.0"},"entries":[ + {"startedDateTime":"2025-01-15T10:00:00.000Z","time":50,"request":{"method":"GET","url":"https://api.example.com/api/v1/pairs/BTC_USDT","httpVersion":"HTTP/1.1","cookies":[],"headers":[],"queryString":[],"headersSize":-1,"bodySize":0},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":100,"mimeType":"application/json","text":"{\"token\":\"secret123\",\"session\":\"abcdef1234567890abcdef1234567890\",\"data\":{\"symbol\":\"BTC_USDT\",\"price\":\"67000.50\"}}"},"redirectURL":"","headersSize":-1,"bodySize":100},"cache":{},"timings":{"send":1,"wait":40,"receive":9}}, + {"startedDateTime":"2025-01-15T10:00:01.000Z","time":50,"request":{"method":"GET","url":"https://api.example.com/api/v1/pairs/ETH_BTC","httpVersion":"HTTP/1.1","cookies":[],"headers":[],"queryString":[],"headersSize":-1,"bodySize":0},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":100,"mimeType":"application/json","text":"{\"token\":\"secret456\",\"session\":\"1234567890abcdef1234567890abcdef\",\"data\":{\"symbol\":\"ETH_BTC\",\"price\":\"0.0521\"}}"},"redirectURL":"","headersSize":-1,"bodySize":100},"cache":{},"timings":{"send":1,"wait":40,"receive":9}}, + {"startedDateTime":"2025-01-15T10:00:02.000Z","time":50,"request":{"method":"GET","url":"https://api.example.com/api/v1/pairs/SOL_USDT","httpVersion":"HTTP/1.1","cookies":[],"headers":[],"queryString":[],"headersSize":-1,"bodySize":0},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":100,"mimeType":"application/json","text":"{\"token\":\"secret789\",\"session\":\"deadbeef1234567890abcdef12345678\",\"data\":{\"symbol\":\"SOL_USDT\",\"price\":\"145.20\"}}"},"redirectURL":"","headersSize":-1,"bodySize":100},"cache":{},"timings":{"send":1,"wait":40,"receive":9}}, + {"startedDateTime":"2025-01-15T10:00:03.000Z","time":50,"request":{"method":"GET","url":"https://api.example.com/api/v1/pairs/DOT_USDT","httpVersion":"HTTP/1.1","cookies":[],"headers":[],"queryString":[],"headersSize":-1,"bodySize":0},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":100,"mimeType":"application/json","text":"{\"token\":\"secretabc\",\"session\":\"aabbccdd1234567890abcdef12345678\",\"data\":{\"symbol\":\"DOT_USDT\",\"price\":\"7.85\"}}"},"redirectURL":"","headersSize":-1,"bodySize":100},"cache":{},"timings":{"send":1,"wait":40,"receive":9}}, + {"startedDateTime":"2025-01-15T10:00:04.000Z","time":10,"request":{"method":"OPTIONS","url":"https://api.example.com/api/v1/pairs/BTC_USDT","httpVersion":"HTTP/1.1","cookies":[],"headers":[],"queryString":[],"headersSize":-1,"bodySize":0},"response":{"status":204,"statusText":"No Content","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Allow","value":"GET, OPTIONS"}],"content":{"size":0,"mimeType":"text/plain","text":""},"redirectURL":"","headersSize":-1,"bodySize":0},"cache":{},"timings":{"send":1,"wait":8,"receive":1}}, + {"startedDateTime":"2025-01-15T10:00:05.000Z","time":80,"request":{"method":"POST","url":"https://api.example.com/api/v1/orders","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"queryString":[],"headersSize":-1,"bodySize":50,"postData":{"mimeType":"application/json","text":"{\"symbol\":\"BTC_USDT\",\"side\":\"buy\",\"amount\":0.5}"}},"response":{"status":201,"statusText":"Created","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":50,"mimeType":"application/json","text":"{\"order_id\":\"ord_001\",\"status\":\"pending\"}"},"redirectURL":"","headersSize":-1,"bodySize":50},"cache":{},"timings":{"send":1,"wait":70,"receive":9}}, + {"startedDateTime":"2025-01-15T10:00:06.000Z","time":80,"request":{"method":"POST","url":"https://api.example.com/api/v1/orders","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"queryString":[],"headersSize":-1,"bodySize":55,"postData":{"mimeType":"application/json","text":"{\"symbol\":\"ETH_BTC\",\"side\":\"sell\",\"amount\":2.0}"}},"response":{"status":201,"statusText":"Created","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":50,"mimeType":"application/json","text":"{\"order_id\":\"ord_002\",\"status\":\"pending\"}"},"redirectURL":"","headersSize":-1,"bodySize":50},"cache":{},"timings":{"send":1,"wait":70,"receive":9}} +]}} diff --git a/tests/full_pipeline.rs b/tests/full_pipeline.rs new file mode 100644 index 0000000..3dd7edd --- /dev/null +++ b/tests/full_pipeline.rs @@ -0,0 +1,104 @@ +use assert_cmd::Command; +use tempfile::TempDir; + +fn har_fixture(name: &str) -> std::path::PathBuf { + std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("fixtures") + .join(name) +} + +const PREFIX: &str = "https://api.example.com"; + +#[test] +fn full_pipeline_all_features() { + let dir = TempDir::new().unwrap(); + let templates = dir.path().join("templates.yaml"); + let output = dir.path().join("openapi.yaml"); + + Command::cargo_bin("mitm2openapi") + .unwrap() + .args([ + "discover", + "-i", + har_fixture("full_pipeline.har").to_str().unwrap(), + "-o", + templates.to_str().unwrap(), + "-p", + PREFIX, + "--skip-options", + "--param-regex", + "PERP_[A-Z_0-9]+", + ]) + .assert() + .success(); + + let tmpl_content = std::fs::read_to_string(&templates).unwrap(); + assert!( + tmpl_content.contains("{id}"), + "4 distinct pair symbols should trigger parameterization, got:\n{tmpl_content}" + ); + assert!( + !tmpl_content.contains("OPTIONS"), + "OPTIONS entries should not appear in templates with --skip-options" + ); + + let activated = tmpl_content + .lines() + .map(|line| { + if line.contains("{id}") || line.contains("/orders") { + line.replace("ignore:", "") + } else { + line.to_string() + } + }) + .collect::>() + .join("\n"); + std::fs::write(&templates, &activated).unwrap(); + + Command::cargo_bin("mitm2openapi") + .unwrap() + .args([ + "generate", + "-i", + har_fixture("full_pipeline.har").to_str().unwrap(), + "-t", + templates.to_str().unwrap(), + "-o", + output.to_str().unwrap(), + "-p", + PREFIX, + "--skip-options", + "--max-examples", + "3", + "--redact-fields", + "token", + "--redact-patterns", + "[0-9a-f]{32}", + ]) + .assert() + .success(); + + let spec = std::fs::read_to_string(&output).unwrap(); + assert!( + spec.contains("examples:"), + "spec should contain examples section" + ); + assert!( + spec.contains("[REDACTED]"), + "token values should be redacted" + ); + assert!( + !spec.contains("options:"), + "OPTIONS operations should not appear in spec with --skip-options" + ); + assert!(spec.contains("get:"), "spec should contain GET operations"); + assert!( + spec.contains("post:"), + "spec should contain POST operations" + ); + assert!( + spec.contains("{id}"), + "parameterized path should appear in spec" + ); +} diff --git a/tests/merge_responses.rs b/tests/merge_responses.rs index cd4762e..31418f8 100644 --- a/tests/merge_responses.rs +++ b/tests/merge_responses.rs @@ -88,6 +88,10 @@ fn test_config() -> Config { ignore_images: false, suppress_params: false, tags_overrides: None, + skip_options: false, + max_examples: 5, + redact_patterns: vec![], + redact_fields: vec![], } } diff --git a/tests/param_regex.rs b/tests/param_regex.rs new file mode 100644 index 0000000..facb434 --- /dev/null +++ b/tests/param_regex.rs @@ -0,0 +1,61 @@ +use assert_cmd::Command; +use tempfile::TempDir; + +fn har_fixture(name: &str) -> std::path::PathBuf { + std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("testdata") + .join("har") + .join(name) +} + +const PREFIX: &str = "https://api.example.com"; + +#[test] +fn discover_param_regex_detects_crypto_pair() { + let dir = TempDir::new().unwrap(); + let templates = dir.path().join("templates.yaml"); + + Command::cargo_bin("mitm2openapi") + .unwrap() + .args([ + "discover", + "-i", + har_fixture("crypto_pairs.har").to_str().unwrap(), + "-o", + templates.to_str().unwrap(), + "-p", + PREFIX, + "--param-regex", + "[A-Z]{3}_[A-Z]{3,}", + ]) + .assert() + .success(); + + let content = std::fs::read_to_string(&templates).unwrap(); + assert!( + content.contains("{id}"), + "BTC_USDT/ETH_USDT should be collapsed into {{id}} template, got:\n{content}" + ); +} + +#[test] +fn discover_invalid_param_regex_exits_nonzero() { + let dir = TempDir::new().unwrap(); + let templates = dir.path().join("templates.yaml"); + + Command::cargo_bin("mitm2openapi") + .unwrap() + .args([ + "discover", + "-i", + har_fixture("crypto_pairs.har").to_str().unwrap(), + "-o", + templates.to_str().unwrap(), + "-p", + PREFIX, + "--param-regex", + "[invalid(regex", + ]) + .assert() + .failure(); +} diff --git a/tests/security.rs b/tests/security.rs index a897b19..92ffde5 100644 --- a/tests/security.rs +++ b/tests/security.rs @@ -118,7 +118,10 @@ fn symlink_dir_entry_rejected_in_mitmproxy() { let link_file = dir.path().join("linked.flow"); unix_fs::symlink(&real_file, &link_file).unwrap(); - let iter = mitm2openapi::mitmproxy_reader::stream_mitmproxy_dir_no_symlinks(dir.path()); + let iter = mitm2openapi::mitmproxy_reader::stream_mitmproxy_dir_no_symlinks( + dir.path(), + mitm2openapi::MAX_PAYLOAD_SIZE, + ); assert!(iter.is_ok(), "should open directory"); let results: Vec<_> = iter.unwrap().filter_map(|r| r.ok()).collect(); @@ -127,10 +130,13 @@ fn symlink_dir_entry_rejected_in_mitmproxy() { "real file should produce at least one flow" ); - let all_results: Vec<_> = mitm2openapi::mitmproxy_reader::stream_mitmproxy_dir(dir.path()) - .unwrap() - .filter_map(|r| r.ok()) - .collect(); + let all_results: Vec<_> = mitm2openapi::mitmproxy_reader::stream_mitmproxy_dir( + dir.path(), + mitm2openapi::MAX_PAYLOAD_SIZE, + ) + .unwrap() + .filter_map(|r| r.ok()) + .collect(); assert!( all_results.len() > results.len(), "without symlink rejection, both files should be processed" diff --git a/tests/skip_options.rs b/tests/skip_options.rs new file mode 100644 index 0000000..d961fd0 --- /dev/null +++ b/tests/skip_options.rs @@ -0,0 +1,171 @@ +use assert_cmd::Command; +use tempfile::TempDir; + +fn har_fixture(name: &str) -> std::path::PathBuf { + std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("testdata") + .join("har") + .join(name) +} + +const PREFIX: &str = "https://api.example.com"; + +/// Discover with --skip-options: OPTIONS path should NOT appear in templates. +#[test] +fn discover_skip_options_excludes_options_method() { + let dir = TempDir::new().unwrap(); + let templates = dir.path().join("templates.yaml"); + + Command::cargo_bin("mitm2openapi") + .unwrap() + .args([ + "discover", + "-i", + har_fixture("with_options.har").to_str().unwrap(), + "-o", + templates.to_str().unwrap(), + "-p", + PREFIX, + "--skip-options", + ]) + .assert() + .success(); + + let content = std::fs::read_to_string(&templates).unwrap(); + assert!( + content.contains("/api/users"), + "GET /api/users should be discovered" + ); + assert!( + !content.contains("/api/preflight"), + "OPTIONS-only path /api/preflight should not appear when --skip-options is set" + ); +} + +/// Discover WITHOUT --skip-options: OPTIONS path SHOULD appear in templates. +#[test] +fn discover_without_skip_options_includes_options_method() { + let dir = TempDir::new().unwrap(); + let templates = dir.path().join("templates.yaml"); + + Command::cargo_bin("mitm2openapi") + .unwrap() + .args([ + "discover", + "-i", + har_fixture("with_options.har").to_str().unwrap(), + "-o", + templates.to_str().unwrap(), + "-p", + PREFIX, + ]) + .assert() + .success(); + + let content = std::fs::read_to_string(&templates).unwrap(); + assert!( + content.contains("/api/preflight"), + "OPTIONS path /api/preflight should appear when --skip-options is NOT set" + ); +} + +/// Generate with --skip-options: OPTIONS operation should NOT appear in OpenAPI spec. +#[test] +fn generate_skip_options_excludes_options_operation() { + let dir = TempDir::new().unwrap(); + let templates = dir.path().join("templates.yaml"); + let output = dir.path().join("openapi.yaml"); + + Command::cargo_bin("mitm2openapi") + .unwrap() + .args([ + "discover", + "-i", + har_fixture("with_options.har").to_str().unwrap(), + "-o", + templates.to_str().unwrap(), + "-p", + PREFIX, + ]) + .assert() + .success(); + + let tmpl_content = std::fs::read_to_string(&templates).unwrap(); + let activated = tmpl_content.replace("ignore:", ""); + std::fs::write(&templates, activated).unwrap(); + + Command::cargo_bin("mitm2openapi") + .unwrap() + .args([ + "generate", + "-i", + har_fixture("with_options.har").to_str().unwrap(), + "-t", + templates.to_str().unwrap(), + "-o", + output.to_str().unwrap(), + "-p", + PREFIX, + "--skip-options", + ]) + .assert() + .success(); + + let spec = std::fs::read_to_string(&output).unwrap(); + assert!( + !spec.contains("options:"), + "OPTIONS operation should not appear in spec when --skip-options is set" + ); + assert!( + spec.contains("get:"), + "GET operation should still be present" + ); +} + +/// Generate WITHOUT --skip-options: OPTIONS operation SHOULD appear in OpenAPI spec. +#[test] +fn generate_without_skip_options_includes_options_operation() { + let dir = TempDir::new().unwrap(); + let templates = dir.path().join("templates.yaml"); + let output = dir.path().join("openapi.yaml"); + + Command::cargo_bin("mitm2openapi") + .unwrap() + .args([ + "discover", + "-i", + har_fixture("with_options.har").to_str().unwrap(), + "-o", + templates.to_str().unwrap(), + "-p", + PREFIX, + ]) + .assert() + .success(); + + let tmpl_content = std::fs::read_to_string(&templates).unwrap(); + let activated = tmpl_content.replace("ignore:", ""); + std::fs::write(&templates, activated).unwrap(); + + Command::cargo_bin("mitm2openapi") + .unwrap() + .args([ + "generate", + "-i", + har_fixture("with_options.har").to_str().unwrap(), + "-t", + templates.to_str().unwrap(), + "-o", + output.to_str().unwrap(), + "-p", + PREFIX, + ]) + .assert() + .success(); + + let spec = std::fs::read_to_string(&output).unwrap(); + assert!( + spec.contains("options:"), + "OPTIONS operation should appear in spec when --skip-options is NOT set" + ); +}