diff --git a/sds/src/lib.rs b/sds/src/lib.rs index cacd5823..bd74b444 100644 --- a/sds/src/lib.rs +++ b/sds/src/lib.rs @@ -45,6 +45,7 @@ pub use match_validation::{ BodyMatcher, CustomHttpConfigV2, HttpCallConfig, HttpRequestConfig, HttpResponseConfig, MatchPairingConfig, PairedValidatorConfig, ResponseCondition, ResponseConditionResult, ResponseConditionType, StatusCodeMatcher, TemplateVariable, TemplatedMatchString, + is_valid_body_matcher_path, }, match_status::{HttpErrorInfo, MatchStatus, UnknownResponseTypeInfo, ValidationError}, }; diff --git a/sds/src/match_validation/config_v2.rs b/sds/src/match_validation/config_v2.rs index 8e879ae9..2cbb3c7e 100644 --- a/sds/src/match_validation/config_v2.rs +++ b/sds/src/match_validation/config_v2.rs @@ -114,7 +114,7 @@ pub struct ResponseCondition { /// Optional parsed body matchers (after JSON parsing) /// Maps JSON paths to matchers - /// Example: {"message.stack[2].success.status": BodyMatcher} + /// Example: {"$.message.stack[2].success.status": BodyMatcher} #[serde(skip_serializing_if = "Option::is_none")] pub body: Option>, } @@ -159,14 +159,9 @@ fn matches_body(body_matcher: &BTreeMap, body: &str) -> boo Err(_) => return false, }; for (path, matcher) in body_matcher.iter() { - let parts = path.split('.'); - let mut value = &parsed_body; - for part in parts { - value = match value.get(part) { - Some(value) => value, - None => return false, - }; - } + let Some(value) = get_json_path_value(&parsed_body, path) else { + continue; + }; let value_str = match value { serde_json::Value::String(s) => s.clone(), other => other.to_string(), @@ -178,6 +173,122 @@ fn matches_body(body_matcher: &BTreeMap, body: &str) -> boo false } +/// Get the value at a given JSONPath +/// +/// Simple parser as we don't need extensive JSONPath support and can thus avoid +/// pulling in a heavy JSONPath library. +fn get_json_path_value<'a>( + root: &'a serde_json::Value, + path: &str, +) -> Option<&'a serde_json::Value> { + let mut cursor = path; + let mut value = root; + + if let Some(remaining) = cursor.strip_prefix('$') { + cursor = remaining; + } + + if cursor.is_empty() { + return Some(value); + } + + while !cursor.is_empty() { + if let Some(remaining) = cursor.strip_prefix('.') { + let segment_end = remaining.find(['.', '[']).unwrap_or(remaining.len()); + if segment_end == 0 { + return None; + } + let key = &remaining[..segment_end]; + value = value.get(key)?; + cursor = &remaining[segment_end..]; + continue; + } + + if let Some(remaining) = cursor.strip_prefix('[') { + let closing_bracket = remaining.find(']')?; + let segment = &remaining[..closing_bracket]; + value = if let Ok(index) = segment.parse::() { + value.get(index)? + } else { + let quoted_key = segment + .strip_prefix('"') + .and_then(|s| s.strip_suffix('"')) + .or_else(|| { + segment + .strip_prefix('\'') + .and_then(|s| s.strip_suffix('\'')) + })?; + value.get(quoted_key)? + }; + cursor = &remaining[closing_bracket + 1..]; + continue; + } + + let segment_end = cursor.find(['.', '[']).unwrap_or(cursor.len()); + if segment_end == 0 { + return None; + } + let key = &cursor[..segment_end]; + value = value.get(key)?; + cursor = &cursor[segment_end..]; + } + + Some(value) +} + +/// Used for validating the body matcher path syntax +pub fn is_valid_body_matcher_path(path: &str) -> bool { + let mut cursor = path; + + if let Some(remaining) = cursor.strip_prefix('$') { + cursor = remaining; + } + + if cursor.is_empty() { + return true; + } + + while !cursor.is_empty() { + if let Some(remaining) = cursor.strip_prefix('.') { + let segment_end = remaining.find(['.', '[']).unwrap_or(remaining.len()); + if segment_end == 0 { + return false; + } + cursor = &remaining[segment_end..]; + continue; + } + + if let Some(remaining) = cursor.strip_prefix('[') { + let Some(closing_bracket) = remaining.find(']') else { + return false; + }; + let segment = &remaining[..closing_bracket]; + let is_valid_segment = segment.parse::().is_ok() + || segment + .strip_prefix('"') + .and_then(|s| s.strip_suffix('"')) + .is_some() + || segment + .strip_prefix('\'') + .and_then(|s| s.strip_suffix('\'')) + .is_some(); + if !is_valid_segment { + return false; + } + cursor = &remaining[closing_bracket + 1..]; + continue; + } + + let segment_end = cursor.find(['.', '[']).unwrap_or(cursor.len()); + if segment_end == 0 { + return false; + } + cursor = &cursor[segment_end..]; + } + + true +} + /// Type of response condition #[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash, Copy)] #[serde(rename_all = "lowercase")] @@ -527,4 +638,119 @@ calls: assert_eq!(provided.kind, "vendor_xyz"); assert_eq!(provided.name, "client_subdomain"); } + + fn make_exact_body_matcher(path: &str, value: &str) -> BTreeMap { + BTreeMap::from([(path.to_string(), BodyMatcher::ExactMatch(value.to_string()))]) + } + + #[test] + fn test_get_json_path_value_with_root_prefix() { + let body: serde_json::Value = + serde_json::from_str(r#"{"a":{"b":[{"c":"value"}]}}"#).unwrap(); + + assert_eq!( + get_json_path_value(&body, "$.a.b[0].c"), + Some(&serde_json::Value::String("value".to_string())) + ); + } + + #[test] + fn test_get_json_path_value_without_root_prefix() { + let body: serde_json::Value = + serde_json::from_str(r#"{"a":{"b":[{"c":"value"}]}}"#).unwrap(); + + assert_eq!( + get_json_path_value(&body, "a.b[0].c"), + Some(&serde_json::Value::String("value".to_string())) + ); + } + + #[test] + fn test_get_json_path_value_with_quoted_numeric_key() { + let body: serde_json::Value = + serde_json::from_str(r#"{"a":{"b":{"0":{"c":"value"}}}}"#).unwrap(); + + assert_eq!( + get_json_path_value(&body, "$.a.b['0'].c"), + Some(&serde_json::Value::String("value".to_string())) + ); + assert_eq!( + get_json_path_value(&body, "$.a.b.0.c"), + Some(&serde_json::Value::String("value".to_string())) + ); + } + + #[test] + fn test_get_json_path_value_returns_root_for_dollar() { + let body: serde_json::Value = serde_json::from_str(r#"{"a":1}"#).unwrap(); + + assert_eq!(get_json_path_value(&body, "$"), Some(&body)); + } + + #[test] + fn test_get_json_path_value_with_root_array() { + let body: serde_json::Value = + serde_json::from_str(r#"[{"name":"first"},{"name":"second"}]"#).unwrap(); + + assert_eq!( + get_json_path_value(&body, "$[1].name"), + Some(&serde_json::Value::String("second".to_string())) + ); + } + + #[test] + fn test_get_json_path_value_with_nested_arrays() { + let body: serde_json::Value = + serde_json::from_str(r#"{"a":[{"b":[{"c":"value"}]}]}"#).unwrap(); + + assert_eq!( + get_json_path_value(&body, "$.a[0].b[0].c"), + Some(&serde_json::Value::String("value".to_string())) + ); + } + + #[test] + fn test_get_json_path_value_returns_none_for_missing_path() { + let body: serde_json::Value = + serde_json::from_str(r#"{"a":{"b":[{"c":"value"}]}}"#).unwrap(); + + assert_eq!(get_json_path_value(&body, "$.a.b[1].c"), None); + } + + #[test] + fn test_get_json_path_value_returns_none_for_invalid_quoted_key() { + let body: serde_json::Value = + serde_json::from_str(r#"{"a":{"b":{"0":{"c":"value"}}}}"#).unwrap(); + + assert_eq!(get_json_path_value(&body, "$.a.b[0.c"), None); + } + + // JSONPath $.a.b[0].c selects the first element from array b. + #[test] + fn test_matches_body_jsonpath_array_index() { + let body = r#"{"a":{"b":[{"c":"value"}]}}"#; + assert!(matches_body( + &make_exact_body_matcher("$.a.b[0].c", "value"), + body + )); + } + + // JSONPath $.a.b['0'].c makes object-key access explicit when the key is numeric. + #[test] + fn test_matches_body_jsonpath_quoted_numeric_key() { + let body = r#"{"a":{"b":{"0":{"c":"value"}}}}"#; + assert!(matches_body( + &make_exact_body_matcher("$.a.b['0'].c", "value"), + body + )); + } + + #[test] + fn test_matches_body_jsonpath_without_root_prefix() { + let body = r#"{"a":{"b":[{"c":"value"}]}}"#; + assert!(matches_body( + &make_exact_body_matcher("a.b[0].c", "value"), + body + )); + } } diff --git a/sds/src/match_validation/http_validator_v2.rs b/sds/src/match_validation/http_validator_v2.rs index a721b721..519add8f 100644 --- a/sds/src/match_validation/http_validator_v2.rs +++ b/sds/src/match_validation/http_validator_v2.rs @@ -1220,7 +1220,7 @@ calls: status_code: [400, 420] - type: invalid body: - message.stack[2].success.status: + $.message.stack[2].success.status: type: ExactMatch config: success "#; @@ -1245,7 +1245,7 @@ calls: assert_eq!( config.calls[0].response.conditions[2].body, Some(BTreeMap::from([( - "message.stack[2].success.status".to_string(), + "$.message.stack[2].success.status".to_string(), BodyMatcher::ExactMatch("success".to_string()) )])), ); diff --git a/sds/src/scanner/test/match_validation.rs b/sds/src/scanner/test/match_validation.rs index 04c7ec96..c1b6ccd5 100644 --- a/sds/src/scanner/test/match_validation.rs +++ b/sds/src/scanner/test/match_validation.rs @@ -3,9 +3,9 @@ use crate::match_validation::config_v2::TemplatedMatchString; use crate::match_validation::validator_utils::generate_aws_headers_and_body; use crate::scanner::RootRuleConfig; use crate::{ - AwsConfig, AwsType, CustomHttpConfig, CustomHttpConfigV2, HttpCallConfig, HttpErrorInfo, - HttpMethod, HttpRequestConfig, HttpResponseConfig, InternalMatchValidationType, MatchAction, - MatchPairingConfig, MatchStatus, MatchValidationType, PairedValidatorConfig, + AwsConfig, AwsType, BodyMatcher, CustomHttpConfig, CustomHttpConfigV2, HttpCallConfig, + HttpErrorInfo, HttpMethod, HttpRequestConfig, HttpResponseConfig, InternalMatchValidationType, + MatchAction, MatchPairingConfig, MatchStatus, MatchValidationType, PairedValidatorConfig, ProximityKeywordsConfig, RegexRuleConfig, ResponseCondition, ResponseConditionType, RuleMatch, Scanner, ScannerBuilder, StatusCodeMatcher, UnknownResponseTypeInfo, ValidationError, }; @@ -1088,3 +1088,129 @@ fn test_match_pairing_rule_can_consume_and_provide() { mock_app_key_valid.assert_hits(1); } } + +// A numeric path segment (e.g. `details.0.@type`) must index into a JSON array. +// The response body has `details` as an array, so `details.0.@type` navigates to +// the first element's `@type` field and the condition correctly fires as Valid. +#[test] +fn test_body_path_numeric_segment_indexes_into_array() { + let server = MockServer::start(); + + let response_body = r#"{"error":{"code":400,"message":"Invalid JSON payload received. Unknown name \"{}\": Cannot bind query parameter. Field '{}' could not be found in request message.","status":"INVALID_ARGUMENT","details":[{"@type":"type.googleapis.com/google.rpc.BadRequest","fieldViolations":[{"description":"Invalid JSON payload received. Unknown name \"{}\": Cannot bind query parameter. Field '{}' could not be found in request message."}]}]}}"#; + + let mock = server.mock(|when, then| { + when.method(POST) + .path("/v1beta/models/gemini:countTokens") + .query_param("key", "valid_gemini_key") + .header("User-Agent", "Datadog Match Validator") + .header("api-key", "valid_gemini_key"); + then.status(400) + .header("content-type", "application/json") + .body(response_body); + }); + + let http_config_v2 = CustomHttpConfigV2 { + match_pairing: None, + provides: None, + calls: vec![HttpCallConfig { + request: HttpRequestConfig { + endpoint: TemplatedMatchString(format!( + "{}/v1beta/models/gemini:countTokens?key=$MATCH", + server.base_url() + )), + method: HttpMethod::Post, + hosts: vec![], + headers: BTreeMap::from([ + ( + "User-Agent".to_string(), + TemplatedMatchString("Datadog Match Validator".to_string()), + ), + ( + "api-key".to_string(), + TemplatedMatchString("$MATCH".to_string()), + ), + ]), + body: Some(TemplatedMatchString("{}".to_string())), + timeout: Duration::from_secs(3), + }, + response: HttpResponseConfig { + conditions: vec![ + ResponseCondition { + condition_type: ResponseConditionType::Valid, + status_code: Some(StatusCodeMatcher::Single(400)), + raw_body: None, + body: Some(BTreeMap::from([( + "$.error.details[0].@type".to_string(), + BodyMatcher::ExactMatch( + "type.googleapis.com/google.rpc.BadRequest".to_string(), + ), + )])), + }, + ResponseCondition { + condition_type: ResponseConditionType::Valid, + status_code: Some(StatusCodeMatcher::Single(403)), + raw_body: None, + body: Some(BTreeMap::from([( + "$.error.details[0].reason".to_string(), + BodyMatcher::ExactMatch("API_KEY_SERVICE_BLOCKED".to_string()), + )])), + }, + ResponseCondition { + condition_type: ResponseConditionType::Invalid, + status_code: Some(StatusCodeMatcher::Single(400)), + raw_body: None, + body: Some(BTreeMap::from([( + "$.error.details[0].reason".to_string(), + BodyMatcher::ExactMatch("API_KEY_INVALID".to_string()), + )])), + }, + ], + }, + }], + }; + + let rule = RootRuleConfig::new(RegexRuleConfig::new("\\bvalid_gemini_key\\b").build()) + .match_action(MatchAction::Redact { + replacement: "[REDACTED]".to_string(), + }) + .third_party_active_checker(MatchValidationType::CustomHttpV2(http_config_v2)); + + let scanner = ScannerBuilder::new(&[rule]) + .with_return_matches(true) + .build() + .unwrap(); + + let mut content = "key: valid_gemini_key".to_string(); + let mut matches = scanner.scan(&mut content).unwrap(); + assert_eq!(matches.len(), 1); + + scanner.validate_matches(&mut matches); + mock.assert(); + + assert_eq!(matches[0].match_status, MatchStatus::Valid); +} + +// Numeric object keys should be expressed explicitly with JSONPath quoting. +#[test] +fn test_body_path_jsonpath_quoted_numeric_key_on_object() { + let body = r#"{"a":{"b":{"0":{"c":"value"}}}}"#; + + let mut body_map = BTreeMap::new(); + body_map.insert( + "$.a.b['0'].c".to_string(), + BodyMatcher::ExactMatch("value".to_string()), + ); + + // Use the ResponseCondition directly to exercise matches_body without a full scanner. + let condition = ResponseCondition { + condition_type: ResponseConditionType::Valid, + status_code: None, + raw_body: None, + body: Some(body_map), + }; + + assert_eq!( + condition.matches(200, body), + crate::match_validation::config_v2::ResponseConditionResult::Valid + ); +}