From 8cc4f3800e3a89576dd2864e55f0231f38431102 Mon Sep 17 00:00:00 2001 From: Frank Bryden Date: Mon, 30 Mar 2026 16:51:19 +0200 Subject: [PATCH 1/6] Support indexing on arrays in body matching --- sds/src/match_validation/config_v2.rs | 9 +- sds/src/scanner/test/match_validation.rs | 133 ++++++++++++++++++++++- 2 files changed, 138 insertions(+), 4 deletions(-) diff --git a/sds/src/match_validation/config_v2.rs b/sds/src/match_validation/config_v2.rs index 8e879ae9..19f24cda 100644 --- a/sds/src/match_validation/config_v2.rs +++ b/sds/src/match_validation/config_v2.rs @@ -162,7 +162,14 @@ fn matches_body(body_matcher: &BTreeMap, body: &str) -> boo let parts = path.split('.'); let mut value = &parsed_body; for part in parts { - value = match value.get(part) { + let next = if let Ok(index) = part.parse::() { + // Numeric segment: try array index first, fall back to string key + // (handles both arrays and objects with numeric string keys like {"0": ...}) + value.get(index).or_else(|| value.get(part)) + } else { + value.get(part) + }; + value = match next { Some(value) => value, None => return false, }; diff --git a/sds/src/scanner/test/match_validation.rs b/sds/src/scanner/test/match_validation.rs index 04c7ec96..0cce300f 100644 --- a/sds/src/scanner/test/match_validation.rs +++ b/sds/src/scanner/test/match_validation.rs @@ -3,9 +3,9 @@ use crate::match_validation::config_v2::TemplatedMatchString; use crate::match_validation::validator_utils::generate_aws_headers_and_body; use crate::scanner::RootRuleConfig; use crate::{ - AwsConfig, AwsType, CustomHttpConfig, CustomHttpConfigV2, HttpCallConfig, HttpErrorInfo, - HttpMethod, HttpRequestConfig, HttpResponseConfig, InternalMatchValidationType, MatchAction, - MatchPairingConfig, MatchStatus, MatchValidationType, PairedValidatorConfig, + AwsConfig, AwsType, BodyMatcher, CustomHttpConfig, CustomHttpConfigV2, HttpCallConfig, + HttpErrorInfo, HttpMethod, HttpRequestConfig, HttpResponseConfig, InternalMatchValidationType, + MatchAction, MatchPairingConfig, MatchStatus, MatchValidationType, PairedValidatorConfig, ProximityKeywordsConfig, RegexRuleConfig, ResponseCondition, ResponseConditionType, RuleMatch, Scanner, ScannerBuilder, StatusCodeMatcher, UnknownResponseTypeInfo, ValidationError, }; @@ -1088,3 +1088,130 @@ fn test_match_pairing_rule_can_consume_and_provide() { mock_app_key_valid.assert_hits(1); } } + +// A numeric path segment (e.g. `details.0.@type`) must index into a JSON array. +// The response body has `details` as an array, so `details.0.@type` navigates to +// the first element's `@type` field and the condition correctly fires as Valid. +#[test] +fn test_body_path_numeric_segment_indexes_into_array() { + let server = MockServer::start(); + + let response_body = r#"{"error":{"code":400,"message":"Invalid JSON payload received. Unknown name \"{}\": Cannot bind query parameter. Field '{}' could not be found in request message.","status":"INVALID_ARGUMENT","details":[{"@type":"type.googleapis.com/google.rpc.BadRequest","fieldViolations":[{"description":"Invalid JSON payload received. Unknown name \"{}\": Cannot bind query parameter. Field '{}' could not be found in request message."}]}]}}"#; + + let mock = server.mock(|when, then| { + when.method(POST) + .path("/v1beta/models/gemini:countTokens") + .query_param("key", "valid_gemini_key") + .header("User-Agent", "Datadog Match Validator") + .header("api-key", "valid_gemini_key"); + then.status(400) + .header("content-type", "application/json") + .body(response_body); + }); + + let http_config_v2 = CustomHttpConfigV2 { + match_pairing: None, + provides: None, + calls: vec![HttpCallConfig { + request: HttpRequestConfig { + endpoint: TemplatedMatchString(format!( + "{}/v1beta/models/gemini:countTokens?key=$MATCH", + server.base_url() + )), + method: HttpMethod::Post, + hosts: vec![], + headers: BTreeMap::from([ + ( + "User-Agent".to_string(), + TemplatedMatchString("Datadog Match Validator".to_string()), + ), + ( + "api-key".to_string(), + TemplatedMatchString("$MATCH".to_string()), + ), + ]), + body: Some(TemplatedMatchString("{}".to_string())), + timeout: Duration::from_secs(3), + }, + response: HttpResponseConfig { + conditions: vec![ + ResponseCondition { + condition_type: ResponseConditionType::Valid, + status_code: Some(StatusCodeMatcher::Single(400)), + raw_body: None, + body: Some(BTreeMap::from([( + "error.details.0.@type".to_string(), + BodyMatcher::ExactMatch( + "type.googleapis.com/google.rpc.BadRequest".to_string(), + ), + )])), + }, + ResponseCondition { + condition_type: ResponseConditionType::Valid, + status_code: Some(StatusCodeMatcher::Single(403)), + raw_body: None, + body: Some(BTreeMap::from([( + "error.details.0.reason".to_string(), + BodyMatcher::ExactMatch("API_KEY_SERVICE_BLOCKED".to_string()), + )])), + }, + ResponseCondition { + condition_type: ResponseConditionType::Invalid, + status_code: Some(StatusCodeMatcher::Single(400)), + raw_body: None, + body: Some(BTreeMap::from([( + "error.details.0.reason".to_string(), + BodyMatcher::ExactMatch("API_KEY_INVALID".to_string()), + )])), + }, + ], + }, + }], + }; + + let rule = RootRuleConfig::new(RegexRuleConfig::new("\\bvalid_gemini_key\\b").build()) + .match_action(MatchAction::Redact { + replacement: "[REDACTED]".to_string(), + }) + .third_party_active_checker(MatchValidationType::CustomHttpV2(http_config_v2)); + + let scanner = ScannerBuilder::new(&[rule]) + .with_return_matches(true) + .build() + .unwrap(); + + let mut content = "key: valid_gemini_key".to_string(); + let mut matches = scanner.scan(&mut content).unwrap(); + assert_eq!(matches.len(), 1); + + scanner.validate_matches(&mut matches); + mock.assert(); + + assert_eq!(matches[0].match_status, MatchStatus::Valid); +} + +// When a path segment is numeric but the current JSON value is an object with a +// matching string key (not an array), the string key fallback must succeed. +#[test] +fn test_body_path_numeric_segment_falls_back_to_string_key_on_object() { + let body = r#"{"a":{"b":{"0":{"c":"value"}}}}"#; + + let mut body_map = BTreeMap::new(); + body_map.insert( + "a.b.0.c".to_string(), + BodyMatcher::ExactMatch("value".to_string()), + ); + + // Use the ResponseCondition directly to exercise matches_body without a full scanner. + let condition = ResponseCondition { + condition_type: ResponseConditionType::Valid, + status_code: None, + raw_body: None, + body: Some(body_map), + }; + + assert_eq!( + condition.matches(200, body), + crate::match_validation::config_v2::ResponseConditionResult::Valid + ); +} From 5bb9a55f6a39779cc33647865560865b13bfff0d Mon Sep 17 00:00:00 2001 From: Frank Bryden Date: Mon, 30 Mar 2026 17:05:14 +0200 Subject: [PATCH 2/6] Add unit test for matches_body --- sds/src/match_validation/config_v2.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/sds/src/match_validation/config_v2.rs b/sds/src/match_validation/config_v2.rs index 19f24cda..3c3bd2c3 100644 --- a/sds/src/match_validation/config_v2.rs +++ b/sds/src/match_validation/config_v2.rs @@ -534,4 +534,29 @@ calls: assert_eq!(provided.kind, "vendor_xyz"); assert_eq!(provided.name, "client_subdomain"); } + + fn make_exact_body_matcher(path: &str, value: &str) -> BTreeMap { + BTreeMap::from([(path.to_string(), BodyMatcher::ExactMatch(value.to_string()))]) + } + + // Path a.b.0.c where b is a JSON array: the numeric segment indexes into the array. + #[test] + fn test_matches_body_numeric_segment_indexes_into_array() { + let body = r#"{"a":{"b":[{"c":"value"}]}}"#; + assert!(matches_body( + &make_exact_body_matcher("a.b.0.c", "value"), + body + )); + } + + // Path a.b.0.c where b is a JSON object with the string key "0": the numeric + // segment falls back to string-key lookup when array access returns nothing. + #[test] + fn test_matches_body_numeric_segment_falls_back_to_string_key() { + let body = r#"{"a":{"b":{"0":{"c":"value"}}}}"#; + assert!(matches_body( + &make_exact_body_matcher("a.b.0.c", "value"), + body + )); + } } From 4393fa1ebaccc86fa23712b41ccbf200cac69d13 Mon Sep 17 00:00:00 2001 From: Frank Bryden Date: Tue, 31 Mar 2026 13:56:31 +0200 Subject: [PATCH 3/6] jsonpath-like path fetching of structure body --- sds/src/match_validation/config_v2.rs | 161 +++++++++++++++--- sds/src/match_validation/http_validator_v2.rs | 4 +- sds/src/scanner/test/match_validation.rs | 13 +- 3 files changed, 146 insertions(+), 32 deletions(-) diff --git a/sds/src/match_validation/config_v2.rs b/sds/src/match_validation/config_v2.rs index 3c3bd2c3..3dc0597f 100644 --- a/sds/src/match_validation/config_v2.rs +++ b/sds/src/match_validation/config_v2.rs @@ -114,7 +114,7 @@ pub struct ResponseCondition { /// Optional parsed body matchers (after JSON parsing) /// Maps JSON paths to matchers - /// Example: {"message.stack[2].success.status": BodyMatcher} + /// Example: {"$.message.stack[2].success.status": BodyMatcher} #[serde(skip_serializing_if = "Option::is_none")] pub body: Option>, } @@ -159,21 +159,9 @@ fn matches_body(body_matcher: &BTreeMap, body: &str) -> boo Err(_) => return false, }; for (path, matcher) in body_matcher.iter() { - let parts = path.split('.'); - let mut value = &parsed_body; - for part in parts { - let next = if let Ok(index) = part.parse::() { - // Numeric segment: try array index first, fall back to string key - // (handles both arrays and objects with numeric string keys like {"0": ...}) - value.get(index).or_else(|| value.get(part)) - } else { - value.get(part) - }; - value = match next { - Some(value) => value, - None => return false, - }; - } + let Some(value) = get_json_path_value(&parsed_body, path) else { + continue; + }; let value_str = match value { serde_json::Value::String(s) => s.clone(), other => other.to_string(), @@ -185,6 +173,65 @@ fn matches_body(body_matcher: &BTreeMap, body: &str) -> boo false } +fn get_json_path_value<'a>( + root: &'a serde_json::Value, + path: &str, +) -> Option<&'a serde_json::Value> { + let mut cursor = path; + let mut value = root; + + if let Some(remaining) = cursor.strip_prefix('$') { + cursor = remaining; + } + + if cursor.is_empty() { + return Some(value); + } + + while !cursor.is_empty() { + if let Some(remaining) = cursor.strip_prefix('.') { + let segment_end = remaining.find(['.', '[']).unwrap_or(remaining.len()); + if segment_end == 0 { + return None; + } + let key = &remaining[..segment_end]; + value = value.get(key)?; + cursor = &remaining[segment_end..]; + continue; + } + + if let Some(remaining) = cursor.strip_prefix('[') { + let closing_bracket = remaining.find(']')?; + let segment = &remaining[..closing_bracket]; + value = if let Ok(index) = segment.parse::() { + value.get(index)? + } else { + let quoted_key = segment + .strip_prefix('"') + .and_then(|s| s.strip_suffix('"')) + .or_else(|| { + segment + .strip_prefix('\'') + .and_then(|s| s.strip_suffix('\'')) + })?; + value.get(quoted_key)? + }; + cursor = &remaining[closing_bracket + 1..]; + continue; + } + + let segment_end = cursor.find(['.', '[']).unwrap_or(cursor.len()); + if segment_end == 0 { + return None; + } + let key = &cursor[..segment_end]; + value = value.get(key)?; + cursor = &cursor[segment_end..]; + } + + Some(value) +} + /// Type of response condition #[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash, Copy)] #[serde(rename_all = "lowercase")] @@ -539,23 +586,91 @@ calls: BTreeMap::from([(path.to_string(), BodyMatcher::ExactMatch(value.to_string()))]) } - // Path a.b.0.c where b is a JSON array: the numeric segment indexes into the array. #[test] - fn test_matches_body_numeric_segment_indexes_into_array() { + fn test_get_json_path_value_with_root_prefix() { + let body: serde_json::Value = + serde_json::from_str(r#"{"a":{"b":[{"c":"value"}]}}"#).unwrap(); + + assert_eq!( + get_json_path_value(&body, "$.a.b[0].c"), + Some(&serde_json::Value::String("value".to_string())) + ); + } + + #[test] + fn test_get_json_path_value_without_root_prefix() { + let body: serde_json::Value = + serde_json::from_str(r#"{"a":{"b":[{"c":"value"}]}}"#).unwrap(); + + assert_eq!( + get_json_path_value(&body, "a.b[0].c"), + Some(&serde_json::Value::String("value".to_string())) + ); + } + + #[test] + fn test_get_json_path_value_with_quoted_numeric_key() { + let body: serde_json::Value = + serde_json::from_str(r#"{"a":{"b":{"0":{"c":"value"}}}}"#).unwrap(); + + assert_eq!( + get_json_path_value(&body, "$.a.b['0'].c"), + Some(&serde_json::Value::String("value".to_string())) + ); + assert_eq!( + get_json_path_value(&body, "$.a.b.0.c"), + Some(&serde_json::Value::String("value".to_string())) + ); + } + + #[test] + fn test_get_json_path_value_returns_root_for_dollar() { + let body: serde_json::Value = serde_json::from_str(r#"{"a":1}"#).unwrap(); + + assert_eq!(get_json_path_value(&body, "$"), Some(&body)); + } + + #[test] + fn test_get_json_path_value_returns_none_for_missing_path() { + let body: serde_json::Value = + serde_json::from_str(r#"{"a":{"b":[{"c":"value"}]}}"#).unwrap(); + + assert_eq!(get_json_path_value(&body, "$.a.b[1].c"), None); + } + + #[test] + fn test_get_json_path_value_returns_none_for_invalid_quoted_key() { + let body: serde_json::Value = + serde_json::from_str(r#"{"a":{"b":{"0":{"c":"value"}}}}"#).unwrap(); + + assert_eq!(get_json_path_value(&body, "$.a.b[0.c"), None); + } + + // JSONPath $.a.b[0].c selects the first element from array b. + #[test] + fn test_matches_body_jsonpath_array_index() { let body = r#"{"a":{"b":[{"c":"value"}]}}"#; assert!(matches_body( - &make_exact_body_matcher("a.b.0.c", "value"), + &make_exact_body_matcher("$.a.b[0].c", "value"), body )); } - // Path a.b.0.c where b is a JSON object with the string key "0": the numeric - // segment falls back to string-key lookup when array access returns nothing. + // JSONPath $.a.b['0'].c makes object-key access explicit when the key is numeric. #[test] - fn test_matches_body_numeric_segment_falls_back_to_string_key() { + fn test_matches_body_jsonpath_quoted_numeric_key() { let body = r#"{"a":{"b":{"0":{"c":"value"}}}}"#; assert!(matches_body( - &make_exact_body_matcher("a.b.0.c", "value"), + &make_exact_body_matcher("$.a.b['0'].c", "value"), + body + )); + } + + #[test] + fn test_matches_body_jsonpath_without_root_prefix() { + let body = r#"{"a":{"b":[{"c":"value"}]}}"#; + assert!(matches_body( + &make_exact_body_matcher("a.b[0].c", "value"), body )); } diff --git a/sds/src/match_validation/http_validator_v2.rs b/sds/src/match_validation/http_validator_v2.rs index a721b721..519add8f 100644 --- a/sds/src/match_validation/http_validator_v2.rs +++ b/sds/src/match_validation/http_validator_v2.rs @@ -1220,7 +1220,7 @@ calls: status_code: [400, 420] - type: invalid body: - message.stack[2].success.status: + $.message.stack[2].success.status: type: ExactMatch config: success "#; @@ -1245,7 +1245,7 @@ calls: assert_eq!( config.calls[0].response.conditions[2].body, Some(BTreeMap::from([( - "message.stack[2].success.status".to_string(), + "$.message.stack[2].success.status".to_string(), BodyMatcher::ExactMatch("success".to_string()) )])), ); diff --git a/sds/src/scanner/test/match_validation.rs b/sds/src/scanner/test/match_validation.rs index 0cce300f..c1b6ccd5 100644 --- a/sds/src/scanner/test/match_validation.rs +++ b/sds/src/scanner/test/match_validation.rs @@ -1140,7 +1140,7 @@ fn test_body_path_numeric_segment_indexes_into_array() { status_code: Some(StatusCodeMatcher::Single(400)), raw_body: None, body: Some(BTreeMap::from([( - "error.details.0.@type".to_string(), + "$.error.details[0].@type".to_string(), BodyMatcher::ExactMatch( "type.googleapis.com/google.rpc.BadRequest".to_string(), ), @@ -1151,7 +1151,7 @@ fn test_body_path_numeric_segment_indexes_into_array() { status_code: Some(StatusCodeMatcher::Single(403)), raw_body: None, body: Some(BTreeMap::from([( - "error.details.0.reason".to_string(), + "$.error.details[0].reason".to_string(), BodyMatcher::ExactMatch("API_KEY_SERVICE_BLOCKED".to_string()), )])), }, @@ -1160,7 +1160,7 @@ fn test_body_path_numeric_segment_indexes_into_array() { status_code: Some(StatusCodeMatcher::Single(400)), raw_body: None, body: Some(BTreeMap::from([( - "error.details.0.reason".to_string(), + "$.error.details[0].reason".to_string(), BodyMatcher::ExactMatch("API_KEY_INVALID".to_string()), )])), }, @@ -1190,15 +1190,14 @@ fn test_body_path_numeric_segment_indexes_into_array() { assert_eq!(matches[0].match_status, MatchStatus::Valid); } -// When a path segment is numeric but the current JSON value is an object with a -// matching string key (not an array), the string key fallback must succeed. +// Numeric object keys should be expressed explicitly with JSONPath quoting. #[test] -fn test_body_path_numeric_segment_falls_back_to_string_key_on_object() { +fn test_body_path_jsonpath_quoted_numeric_key_on_object() { let body = r#"{"a":{"b":{"0":{"c":"value"}}}}"#; let mut body_map = BTreeMap::new(); body_map.insert( - "a.b.0.c".to_string(), + "$.a.b['0'].c".to_string(), BodyMatcher::ExactMatch("value".to_string()), ); From 25db5105afa53ab6600ba0c3c80525fb9404f033 Mon Sep 17 00:00:00 2001 From: Frank Bryden Date: Tue, 31 Mar 2026 14:06:48 +0200 Subject: [PATCH 4/6] Add comment justifying custom jsonpath parser --- sds/src/match_validation/config_v2.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sds/src/match_validation/config_v2.rs b/sds/src/match_validation/config_v2.rs index 3dc0597f..91f616e3 100644 --- a/sds/src/match_validation/config_v2.rs +++ b/sds/src/match_validation/config_v2.rs @@ -173,6 +173,10 @@ fn matches_body(body_matcher: &BTreeMap, body: &str) -> boo false } +/// Get the value at a given JSONPath +/// +/// Simple parser as we don't need extensive JSONPath support and can thus avoid +/// pulling in a heavy JSONPath library. fn get_json_path_value<'a>( root: &'a serde_json::Value, path: &str, From 5efbed3093a1d2b3427a0cfe3088d95c763a5068 Mon Sep 17 00:00:00 2001 From: Frank Bryden Date: Wed, 1 Apr 2026 11:42:37 +0200 Subject: [PATCH 5/6] Add nested array and array at root tests --- sds/src/match_validation/config_v2.rs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/sds/src/match_validation/config_v2.rs b/sds/src/match_validation/config_v2.rs index 91f616e3..45c53616 100644 --- a/sds/src/match_validation/config_v2.rs +++ b/sds/src/match_validation/config_v2.rs @@ -634,6 +634,28 @@ calls: assert_eq!(get_json_path_value(&body, "$"), Some(&body)); } + #[test] + fn test_get_json_path_value_with_root_array() { + let body: serde_json::Value = + serde_json::from_str(r#"[{"name":"first"},{"name":"second"}]"#).unwrap(); + + assert_eq!( + get_json_path_value(&body, "$[1].name"), + Some(&serde_json::Value::String("second".to_string())) + ); + } + + #[test] + fn test_get_json_path_value_with_nested_arrays() { + let body: serde_json::Value = + serde_json::from_str(r#"{"a":[{"b":[{"c":"value"}]}]}"#).unwrap(); + + assert_eq!( + get_json_path_value(&body, "$.a[0].b[0].c"), + Some(&serde_json::Value::String("value".to_string())) + ); + } + #[test] fn test_get_json_path_value_returns_none_for_missing_path() { let body: serde_json::Value = From 6fb2913d938558cda5bd4c94feb48cf63ade3cfe Mon Sep 17 00:00:00 2001 From: Frank Bryden Date: Wed, 1 Apr 2026 11:44:10 +0200 Subject: [PATCH 6/6] Expose a validation function for body matcher syntax --- sds/src/lib.rs | 1 + sds/src/match_validation/config_v2.rs | 53 +++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/sds/src/lib.rs b/sds/src/lib.rs index cacd5823..bd74b444 100644 --- a/sds/src/lib.rs +++ b/sds/src/lib.rs @@ -45,6 +45,7 @@ pub use match_validation::{ BodyMatcher, CustomHttpConfigV2, HttpCallConfig, HttpRequestConfig, HttpResponseConfig, MatchPairingConfig, PairedValidatorConfig, ResponseCondition, ResponseConditionResult, ResponseConditionType, StatusCodeMatcher, TemplateVariable, TemplatedMatchString, + is_valid_body_matcher_path, }, match_status::{HttpErrorInfo, MatchStatus, UnknownResponseTypeInfo, ValidationError}, }; diff --git a/sds/src/match_validation/config_v2.rs b/sds/src/match_validation/config_v2.rs index 45c53616..2cbb3c7e 100644 --- a/sds/src/match_validation/config_v2.rs +++ b/sds/src/match_validation/config_v2.rs @@ -236,6 +236,59 @@ fn get_json_path_value<'a>( Some(value) } +/// Used for validating the body matcher path syntax +pub fn is_valid_body_matcher_path(path: &str) -> bool { + let mut cursor = path; + + if let Some(remaining) = cursor.strip_prefix('$') { + cursor = remaining; + } + + if cursor.is_empty() { + return true; + } + + while !cursor.is_empty() { + if let Some(remaining) = cursor.strip_prefix('.') { + let segment_end = remaining.find(['.', '[']).unwrap_or(remaining.len()); + if segment_end == 0 { + return false; + } + cursor = &remaining[segment_end..]; + continue; + } + + if let Some(remaining) = cursor.strip_prefix('[') { + let Some(closing_bracket) = remaining.find(']') else { + return false; + }; + let segment = &remaining[..closing_bracket]; + let is_valid_segment = segment.parse::().is_ok() + || segment + .strip_prefix('"') + .and_then(|s| s.strip_suffix('"')) + .is_some() + || segment + .strip_prefix('\'') + .and_then(|s| s.strip_suffix('\'')) + .is_some(); + if !is_valid_segment { + return false; + } + cursor = &remaining[closing_bracket + 1..]; + continue; + } + + let segment_end = cursor.find(['.', '[']).unwrap_or(cursor.len()); + if segment_end == 0 { + return false; + } + cursor = &cursor[segment_end..]; + } + + true +} + /// Type of response condition #[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash, Copy)] #[serde(rename_all = "lowercase")]