From 9cefdb1ff0d8f0da838df794a028dffe2973d364 Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Thu, 5 Feb 2026 12:04:15 +0100 Subject: [PATCH] Handle async join errors and skip empty capture matches --- sds/src/scanner/mod.rs | 5 ++- sds/src/scanner/regex_rule/compiled.rs | 9 +++++ sds/src/scanner/test/async_rule.rs | 46 ++++++++++++++++++++++++++ sds/src/scanner/test/mod.rs | 24 ++++++++++++++ 4 files changed, 83 insertions(+), 1 deletion(-) diff --git a/sds/src/scanner/mod.rs b/sds/src/scanner/mod.rs index bb73f20b..34bb1cb1 100644 --- a/sds/src/scanner/mod.rs +++ b/sds/src/scanner/mod.rs @@ -654,7 +654,10 @@ impl Scanner { // results just need to be collected let mut total_io_duration = Duration::ZERO; for job in async_jobs { - let rule_info = job.fut.await.unwrap()?; + let rule_info = job + .fut + .await + .map_err(|err| ScannerError::Transient(format!("Async rule join error: {err}")))??; total_io_duration += rule_info.io_duration; rule_matches.push_async_matches( &job.path, diff --git a/sds/src/scanner/regex_rule/compiled.rs b/sds/src/scanner/regex_rule/compiled.rs index af996692..e7efad31 100644 --- a/sds/src/scanner/regex_rule/compiled.rs +++ b/sds/src/scanner/regex_rule/compiled.rs @@ -251,6 +251,15 @@ impl Iterator for TruePositiveSearch<'_> { self.rule.regex.search_half_with(self.cache, &input)?; let regex_match_range = self.perform_regex_scan(&input)?; + if regex_match_range.0 == regex_match_range.1 { + // Avoid zero-length matches (possible with capture groups) to prevent infinite loops. + if let Some(next) = get_next_regex_start(self.content, regex_match_range) { + self.start = next; + continue; + } else { + return None; + } + } // this is only checking extra validators (e.g. checksums) let is_false_positive_match = is_false_positive_match( regex_match_range, diff --git a/sds/src/scanner/test/async_rule.rs b/sds/src/scanner/test/async_rule.rs index cca7d491..0f5be035 100644 --- a/sds/src/scanner/test/async_rule.rs +++ b/sds/src/scanner/test/async_rule.rs @@ -34,6 +34,21 @@ impl CompiledRule for AsyncCompiledRule { } } +pub struct PanickingAsyncRuleConfig; + +pub struct PanickingAsyncCompiledRule; + +impl CompiledRule for PanickingAsyncCompiledRule { + fn get_string_matches( + &self, + _content: &str, + _path: &Path, + ctx: &mut StringMatchesCtx, + ) -> RuleResult { + ctx.process_async(|_ctx| Box::pin(async move { panic!("boom") })) + } +} + impl RuleConfig for AsyncRuleConfig { fn convert_to_compiled_rule( &self, @@ -44,6 +59,16 @@ impl RuleConfig for AsyncRuleConfig { } } +impl RuleConfig for PanickingAsyncRuleConfig { + fn convert_to_compiled_rule( + &self, + _content: usize, + _: Labels, + ) -> Result, CreateScannerError> { + Ok(Box::new(PanickingAsyncCompiledRule)) + } +} + #[tokio::test(flavor = "multi_thread")] async fn run_async_rule() { let scanner = ScannerBuilder::new(&[RootRuleConfig::new(Arc::new(AsyncRuleConfig { @@ -91,6 +116,27 @@ async fn async_scan_timeout() { ); } +#[tokio::test(flavor = "multi_thread")] +async fn async_scan_join_error() { + let scanner = ScannerBuilder::new(&[RootRuleConfig::new(Arc::new( + PanickingAsyncRuleConfig, + ) as Arc) + .match_action(MatchAction::Redact { + replacement: "[REDACTED]".to_string(), + })]) + .build() + .unwrap(); + + let mut input = "this is a secret with random data".to_owned(); + let err = scanner.scan_async(&mut input).await.unwrap_err(); + match err { + ScannerError::Transient(msg) => { + assert!(msg.contains("Async rule join error")); + assert!(msg.contains("boom")); + } + } +} + #[test] fn async_scan_outside_of_tokio() { // Make sure scanning works without requiring users to explicitly enter a Tokio runtime. diff --git a/sds/src/scanner/test/mod.rs b/sds/src/scanner/test/mod.rs index fbb3b355..44c98b84 100644 --- a/sds/src/scanner/test/mod.rs +++ b/sds/src/scanner/test/mod.rs @@ -939,6 +939,30 @@ fn test_capture_group() { assert_eq!(matches.len(), 1); } +#[test] +fn test_capture_group_empty_match_is_ignored() { + let rule = RootRuleConfig::new( + RegexRuleConfig::new(r"(?.*)hello") + .with_pattern_capture_groups(vec!["sds_match".to_string()]) + .build(), + ) + .match_action(MatchAction::Redact { + replacement: "[REDACTED]".to_string(), + }); + + let scanner = ScannerBuilder::new(&[rule]).build().unwrap(); + + let mut content = "hello".to_string(); + let matches = scanner.scan(&mut content).unwrap(); + assert_eq!(matches.len(), 0); + assert_eq!(content, "hello"); + + let mut content = "foohello".to_string(); + let matches = scanner.scan(&mut content).unwrap(); + assert_eq!(matches.len(), 1); + assert_eq!(content, "[REDACTED]hello"); +} + #[test] fn test_precedence_ordering() { assert!(Precedence::Specific > Precedence::Generic);