From cc42589c33209951a55ebede4915ee2a30fa1390 Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Thu, 22 Aug 2024 18:08:11 +0200 Subject: [PATCH 01/23] Add get_included_keywords method to CompiledRule trait --- sds/src/scanner/mod.rs | 16 ++++++++++++++++ sds/src/scanner/regex_rule/compiled.rs | 4 ++++ 2 files changed, 20 insertions(+) diff --git a/sds/src/scanner/mod.rs b/sds/src/scanner/mod.rs index d32f2135..fb32d068 100644 --- a/sds/src/scanner/mod.rs +++ b/sds/src/scanner/mod.rs @@ -14,9 +14,13 @@ use crate::scoped_ruleset::{ContentVisitor, ExclusionCheck, ScopedRuleSet}; pub use crate::secondary_validation::Validator; use crate::{CreateScannerError, EncodeIndices, MatchAction, Path}; use std::any::{Any, TypeId}; +use std::borrow::Cow; use std::sync::Arc; use self::metrics::ScannerMetrics; +use crate::proximity_keywords::{ + contains_keyword_in_path, CompiledIncludedProximityKeywords, UNIFIED_LINK_STR, +}; use crate::scanner::config::RuleConfig; use crate::scanner::regex_rule::compiled::RegexCompiledRule; use crate::scanner::regex_rule::{access_regex_caches, RegexCaches}; @@ -59,6 +63,7 @@ where pub trait CompiledRuleDyn: Send + Sync { fn get_match_action(&self) -> &MatchAction; fn get_scope(&self) -> &Scope; + fn get_included_keywords(&self) -> Option<&CompiledIncludedProximityKeywords>; #[allow(clippy::too_many_arguments)] fn get_string_matches( @@ -102,6 +107,10 @@ impl CompiledRuleDyn for T { self.get_scope() } + fn get_included_keywords(&self) -> Option<&CompiledIncludedProximityKeywords> { + self.get_included_keywords() + } + fn get_string_matches( &self, content: &str, @@ -158,6 +167,7 @@ pub trait CompiledRule: Send + Sync { fn get_match_action(&self) -> &MatchAction; fn get_scope(&self) -> &Scope; + fn get_included_keywords(&self) -> Option<&CompiledIncludedProximityKeywords>; #[allow(clippy::too_many_arguments)] fn get_string_matches( @@ -801,7 +811,13 @@ mod test { fn get_scope(&self) -> &Scope { &self.scope } + fn create_group_data(_: &Labels) {} + + fn get_included_keywords(&self) -> Option<&CompiledIncludedProximityKeywords> { + None + } + fn get_string_matches( &self, _content: &str, diff --git a/sds/src/scanner/regex_rule/compiled.rs b/sds/src/scanner/regex_rule/compiled.rs index 276ad40a..c5db0a0a 100644 --- a/sds/src/scanner/regex_rule/compiled.rs +++ b/sds/src/scanner/regex_rule/compiled.rs @@ -40,6 +40,10 @@ impl CompiledRule for RegexCompiledRule { &self.scope } fn create_group_data(_: &Labels) {} + fn get_included_keywords(&self) -> Option<&CompiledIncludedProximityKeywords> { + self.included_keywords.as_ref() + } + fn get_string_matches( &self, content: &str, From 2303433fb17a268db9d418156f8ad3f22694a3f2 Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Thu, 22 Aug 2024 18:09:02 +0200 Subject: [PATCH 02/23] Add should_keywords_match_events_path feature to ScopedRuleSet struct --- sds/src/scanner/mod.rs | 5 ++++- sds/src/scoped_ruleset/mod.rs | 9 +++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/sds/src/scanner/mod.rs b/sds/src/scanner/mod.rs index fb32d068..e41e94c2 100644 --- a/sds/src/scanner/mod.rs +++ b/sds/src/scanner/mod.rs @@ -625,7 +625,10 @@ impl ScannerBuilder<'_> { .map(|rule| rule.get_scope().clone()) .collect::>(), ) - .with_implicit_index_wildcards(self.scanner_features.add_implicit_index_wildcards); + .with_implicit_index_wildcards(self.scanner_features.add_implicit_index_wildcards) + .with_keywords_should_match_event_paths( + self.scanner_features.should_keywords_match_event_paths, + ); { let stats = &*GLOBAL_STATS; diff --git a/sds/src/scoped_ruleset/mod.rs b/sds/src/scoped_ruleset/mod.rs index 8ee3d0e4..520aa87b 100644 --- a/sds/src/scoped_ruleset/mod.rs +++ b/sds/src/scoped_ruleset/mod.rs @@ -15,6 +15,7 @@ pub struct ScopedRuleSet { // The number of rules stored in this set num_rules: usize, add_implicit_index_wildcards: bool, + should_keywords_match_event_paths: bool, } impl ScopedRuleSet { @@ -45,6 +46,7 @@ impl ScopedRuleSet { tree, num_rules: rules_scopes.len(), add_implicit_index_wildcards: false, + should_keywords_match_event_paths: false, } } @@ -53,6 +55,11 @@ impl ScopedRuleSet { self } + pub fn with_keywords_should_match_event_paths(mut self, value: bool) -> Self { + self.should_keywords_match_event_paths = value; + self + } + pub fn visit_string_rule_combinations<'path, 'c: 'path>( &'c self, event: &'path mut impl Event, @@ -78,6 +85,7 @@ impl ScopedRuleSet { path: Path::root(), bool_set, add_implicit_index_wildcards: self.add_implicit_index_wildcards, + should_keywords_match_event_paths: self.should_keywords_match_event_paths, }; event.visit_event(&mut visitor) @@ -162,6 +170,7 @@ struct ScopedRuledSetEventVisitor<'a, C> { bool_set: Option, add_implicit_index_wildcards: bool, + should_keywords_match_event_paths: bool, } impl<'path, C> EventVisitor<'path> for ScopedRuledSetEventVisitor<'path, C> From 08544e3cebb4ac90a467fd606a7c087afd1aa072 Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Thu, 22 Aug 2024 18:09:35 +0200 Subject: [PATCH 03/23] Add find_true_positive_rules_from_current_path method to ContentVisitor trait --- sds/benches/bench.rs | 8 ++++++++ sds/src/scanner/mod.rs | 21 +++++++++++++++++++++ sds/src/scoped_ruleset/mod.rs | 14 ++++++++++++++ 3 files changed, 43 insertions(+) diff --git a/sds/benches/bench.rs b/sds/benches/bench.rs index 8061a846..dafe7f40 100644 --- a/sds/benches/bench.rs +++ b/sds/benches/bench.rs @@ -59,6 +59,14 @@ pub fn scoped_ruleset(c: &mut Criterion) { }); false } + + fn find_true_positive_rules_from_current_path( + &self, + sanitized_path: &str, + current_true_positive_rule_idx: &mut Vec, + ) -> usize { + 0 + } } fast_rule_set.visit_string_rule_combinations( diff --git a/sds/src/scanner/mod.rs b/sds/src/scanner/mod.rs index e41e94c2..15be4131 100644 --- a/sds/src/scanner/mod.rs +++ b/sds/src/scanner/mod.rs @@ -731,6 +731,27 @@ impl<'a, E: Encoding> ContentVisitor<'a> for ScannerContentVisitor<'a, E> { has_match } + + fn find_true_positive_rules_from_current_path( + &self, + sanitized_segments: &[Cow], + current_true_positive_rule_idx: &mut Vec, + ) -> usize { + let mut times_pushed = 0; + for (idx, rule) in self.scanner.rules.iter().enumerate() { + if !current_true_positive_rule_idx.contains(&idx) { + if let Some(keywords) = rule.get_included_keywords() { + let sanitized_path = sanitized_segments.join(UNIFIED_LINK_STR); + if contains_keyword_in_path(&sanitized_path, &keywords.keywords_pattern) { + // The rule is found has a true positive for this path, push it + current_true_positive_rule_idx.push(idx); + times_pushed += 1 + } + } + } + } + times_pushed + } } // Calculates the next starting position for a regex match if a the previous match is a false positive diff --git a/sds/src/scoped_ruleset/mod.rs b/sds/src/scoped_ruleset/mod.rs index 520aa87b..178ef8fa 100644 --- a/sds/src/scoped_ruleset/mod.rs +++ b/sds/src/scoped_ruleset/mod.rs @@ -122,6 +122,12 @@ pub trait ContentVisitor<'path> { rules: RuleIndexVisitor, is_excluded: ExclusionCheck<'content_visitor>, ) -> bool; + + fn find_true_positive_rules_from_current_path( + &self, + sanitized_segments: &[Cow], + current_true_positive_rule_idx: &mut Vec, + ) -> usize; } // This is just a reference to a RuleTree with some additional information @@ -392,6 +398,14 @@ mod test { }); true } + + fn find_true_positive_rules_from_current_path( + &self, + sanitized_segments: &[Cow], + current_true_positive_rule_idx: &mut Vec, + ) -> usize { + 0 + } } ruleset.visit_string_rule_combinations( From 1e3e98af697ba6cab2f3622ee29bb19ebf7836a3 Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Thu, 22 Aug 2024 18:09:57 +0200 Subject: [PATCH 04/23] Use it in the push_segment method --- sds/src/scoped_ruleset/mod.rs | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/sds/src/scoped_ruleset/mod.rs b/sds/src/scoped_ruleset/mod.rs index 178ef8fa..f6f009a4 100644 --- a/sds/src/scoped_ruleset/mod.rs +++ b/sds/src/scoped_ruleset/mod.rs @@ -214,21 +214,35 @@ where // Sanitize the segment and push it self.sanitized_segments_until_node.push(segment.sanitize()); + let true_positive_rules_count = if self.should_keywords_match_event_paths { + self.content_visitor + .find_true_positive_rules_from_current_path( + self.sanitized_segments_until_node.as_slice(), + &mut self.true_positive_rule_idx, + ) + } else { + 0 + }; + // The new number of active trees is the number of new trees pushed self.active_node_counter.push(NodeCounter { active_tree_count: self.tree_nodes.len() - tree_nodes_len, - true_positive_rules_count: 0, + true_positive_rules_count, }); self.path.segments.push(segment); } fn pop_segment(&mut self) { - let num_active_trees = self.active_node_counter.pop().unwrap().active_tree_count; - for _ in 0..num_active_trees { + let node_counter = self.active_node_counter.pop().unwrap(); + for _ in 0..node_counter.active_tree_count { // The rules from the last node are no longer active, so remove them. let _popped = self.tree_nodes.pop(); } + for _ in 0..node_counter.true_positive_rules_count { + // The true positive rule indices from the last node are no longer active, remove them. + let _popped = self.true_positive_rule_idx.pop(); + } self.sanitized_segments_until_node.pop(); self.path.segments.pop(); } From 82191bd4fb29ccea2cb748a1c6941047f9030abd Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Fri, 23 Aug 2024 10:43:30 +0200 Subject: [PATCH 05/23] Fix sanitize_segments --- sds/src/scoped_ruleset/mod.rs | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/sds/src/scoped_ruleset/mod.rs b/sds/src/scoped_ruleset/mod.rs index f6f009a4..36596d4b 100644 --- a/sds/src/scoped_ruleset/mod.rs +++ b/sds/src/scoped_ruleset/mod.rs @@ -81,6 +81,7 @@ impl ScopedRuleSet { active_node_counter: vec![NodeCounter { active_tree_count: 1, true_positive_rules_count: 0, + sanitized_segment_count: 0, }], path: Path::root(), bool_set, @@ -121,6 +122,7 @@ pub trait ContentVisitor<'path> { content: &str, rules: RuleIndexVisitor, is_excluded: ExclusionCheck<'content_visitor>, + true_positive_rule_idx: &Vec, ) -> bool; fn find_true_positive_rules_from_current_path( @@ -148,6 +150,9 @@ struct NodeCounter { // This counts how many rule indices we have pushed at the given node. // This helps remove the right number of elements when popping the segment. true_positive_rules_count: usize, + + // Keeps track of whether we pushed a sanitized segment in the sanitized_segments_until_node or not + sanitized_segment_count: usize, } struct ScopedRuledSetEventVisitor<'a, C> { @@ -211,8 +216,14 @@ where } } - // Sanitize the segment and push it - self.sanitized_segments_until_node.push(segment.sanitize()); + // Sanitize the segment and push it if it's a field + let sanitized_segment_count = if !segment.is_index() { + self.sanitized_segments_until_node.push(segment.sanitize()); + // println!("pushing segment: {}", segment.sanitize()); + 1 + } else { + 0 + }; let true_positive_rules_count = if self.should_keywords_match_event_paths { self.content_visitor @@ -228,6 +239,7 @@ where self.active_node_counter.push(NodeCounter { active_tree_count: self.tree_nodes.len() - tree_nodes_len, true_positive_rules_count, + sanitized_segment_count, }); self.path.segments.push(segment); @@ -243,7 +255,10 @@ where // The true positive rule indices from the last node are no longer active, remove them. let _popped = self.true_positive_rule_idx.pop(); } - self.sanitized_segments_until_node.pop(); + + for _ in 0..node_counter.sanitized_segment_count { + let _popped = self.sanitized_segments_until_node.pop(); + } self.path.segments.pop(); } From 0e85e6e4e9b089e91a2aacec50890c29444dd13f Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Fri, 23 Aug 2024 11:59:59 +0200 Subject: [PATCH 06/23] Modify method signature --- sds/src/scanner/mod.rs | 3 +-- sds/src/scoped_ruleset/mod.rs | 37 ++++++++++++++++------------------- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/sds/src/scanner/mod.rs b/sds/src/scanner/mod.rs index 15be4131..72b8e22d 100644 --- a/sds/src/scanner/mod.rs +++ b/sds/src/scanner/mod.rs @@ -734,14 +734,13 @@ impl<'a, E: Encoding> ContentVisitor<'a> for ScannerContentVisitor<'a, E> { fn find_true_positive_rules_from_current_path( &self, - sanitized_segments: &[Cow], + sanitized_path: &str, current_true_positive_rule_idx: &mut Vec, ) -> usize { let mut times_pushed = 0; for (idx, rule) in self.scanner.rules.iter().enumerate() { if !current_true_positive_rule_idx.contains(&idx) { if let Some(keywords) = rule.get_included_keywords() { - let sanitized_path = sanitized_segments.join(UNIFIED_LINK_STR); if contains_keyword_in_path(&sanitized_path, &keywords.keywords_pattern) { // The rule is found has a true positive for this path, push it current_true_positive_rule_idx.push(idx); diff --git a/sds/src/scoped_ruleset/mod.rs b/sds/src/scoped_ruleset/mod.rs index 36596d4b..a2f0ee11 100644 --- a/sds/src/scoped_ruleset/mod.rs +++ b/sds/src/scoped_ruleset/mod.rs @@ -1,10 +1,12 @@ mod bool_set; use crate::event::{EventVisitor, VisitStringResult}; +use crate::proximity_keywords::UNIFIED_LINK_STR; use crate::scanner::scope::Scope; use crate::scoped_ruleset::bool_set::BoolSet; use crate::{Event, Path, PathSegment}; use ahash::AHashMap; +use serde::Serialize; use std::borrow::Cow; /// A `ScopedRuleSet` determines which rules will be used to scan each field of an event, and which @@ -81,7 +83,6 @@ impl ScopedRuleSet { active_node_counter: vec![NodeCounter { active_tree_count: 1, true_positive_rules_count: 0, - sanitized_segment_count: 0, }], path: Path::root(), bool_set, @@ -122,12 +123,11 @@ pub trait ContentVisitor<'path> { content: &str, rules: RuleIndexVisitor, is_excluded: ExclusionCheck<'content_visitor>, - true_positive_rule_idx: &Vec, ) -> bool; fn find_true_positive_rules_from_current_path( &self, - sanitized_segments: &[Cow], + sanitized_path: &str, current_true_positive_rule_idx: &mut Vec, ) -> usize; } @@ -150,9 +150,6 @@ struct NodeCounter { // This counts how many rule indices we have pushed at the given node. // This helps remove the right number of elements when popping the segment. true_positive_rules_count: usize, - - // Keeps track of whether we pushed a sanitized segment in the sanitized_segments_until_node or not - sanitized_segment_count: usize, } struct ScopedRuledSetEventVisitor<'a, C> { @@ -216,19 +213,23 @@ where } } - // Sanitize the segment and push it if it's a field - let sanitized_segment_count = if !segment.is_index() { - self.sanitized_segments_until_node.push(segment.sanitize()); - // println!("pushing segment: {}", segment.sanitize()); - 1 - } else { - 0 - }; + // Sanitize the segment and push it + self.sanitized_segments_until_node.push(segment.sanitize()); let true_positive_rules_count = if self.should_keywords_match_event_paths { + let current_sanitized_path = self + .sanitized_segments_until_node + .iter() + .filter_map(|sanitized_segment| { + sanitized_segment + .as_ref() + .map_or(None::>, |x| Some(x.clone())) + }) + .collect::>() + .join(UNIFIED_LINK_STR); self.content_visitor .find_true_positive_rules_from_current_path( - self.sanitized_segments_until_node.as_slice(), + current_sanitized_path.as_str(), &mut self.true_positive_rule_idx, ) } else { @@ -239,7 +240,6 @@ where self.active_node_counter.push(NodeCounter { active_tree_count: self.tree_nodes.len() - tree_nodes_len, true_positive_rules_count, - sanitized_segment_count, }); self.path.segments.push(segment); @@ -256,9 +256,6 @@ where let _popped = self.true_positive_rule_idx.pop(); } - for _ in 0..node_counter.sanitized_segment_count { - let _popped = self.sanitized_segments_until_node.pop(); - } self.path.segments.pop(); } @@ -430,7 +427,7 @@ mod test { fn find_true_positive_rules_from_current_path( &self, - sanitized_segments: &[Cow], + sanitized_path: &str, current_true_positive_rule_idx: &mut Vec, ) -> usize { 0 From c1712f96138118c745f5c1509ca5913b4a17f56a Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Fri, 23 Aug 2024 12:02:54 +0200 Subject: [PATCH 07/23] Remove dead code --- sds/src/proximity_keywords/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/sds/src/proximity_keywords/mod.rs b/sds/src/proximity_keywords/mod.rs index 2e8176b1..dc20331a 100644 --- a/sds/src/proximity_keywords/mod.rs +++ b/sds/src/proximity_keywords/mod.rs @@ -37,7 +37,6 @@ pub struct ProximityKeywordsRegex { pub const MULTI_WORD_KEYWORDS_LINK_CHARS: &[char] = &['-', '_', '.', ' ', '/']; pub const UNIFIED_LINK_CHAR: char = '.'; -#[allow(dead_code)] pub const UNIFIED_LINK_STR: &str = "."; pub fn compile_keywords_proximity_config( From 310e49a4441b7ea39a1f2c4a6da543aa6d04a93d Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Fri, 23 Aug 2024 12:05:31 +0200 Subject: [PATCH 08/23] Remove unused imports --- sds/src/scanner/mod.rs | 3 +-- sds/src/scoped_ruleset/mod.rs | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/sds/src/scanner/mod.rs b/sds/src/scanner/mod.rs index 72b8e22d..d4fa6824 100644 --- a/sds/src/scanner/mod.rs +++ b/sds/src/scanner/mod.rs @@ -14,12 +14,11 @@ use crate::scoped_ruleset::{ContentVisitor, ExclusionCheck, ScopedRuleSet}; pub use crate::secondary_validation::Validator; use crate::{CreateScannerError, EncodeIndices, MatchAction, Path}; use std::any::{Any, TypeId}; -use std::borrow::Cow; use std::sync::Arc; use self::metrics::ScannerMetrics; use crate::proximity_keywords::{ - contains_keyword_in_path, CompiledIncludedProximityKeywords, UNIFIED_LINK_STR, + contains_keyword_in_path, CompiledIncludedProximityKeywords, }; use crate::scanner::config::RuleConfig; use crate::scanner::regex_rule::compiled::RegexCompiledRule; diff --git a/sds/src/scoped_ruleset/mod.rs b/sds/src/scoped_ruleset/mod.rs index a2f0ee11..cfe7cde4 100644 --- a/sds/src/scoped_ruleset/mod.rs +++ b/sds/src/scoped_ruleset/mod.rs @@ -6,7 +6,6 @@ use crate::scanner::scope::Scope; use crate::scoped_ruleset::bool_set::BoolSet; use crate::{Event, Path, PathSegment}; use ahash::AHashMap; -use serde::Serialize; use std::borrow::Cow; /// A `ScopedRuleSet` determines which rules will be used to scan each field of an event, and which From 832b586d446c7f91e466b2d35a0001983897441e Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Fri, 23 Aug 2024 12:07:23 +0200 Subject: [PATCH 09/23] Cargo fmt --- sds/src/scanner/mod.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sds/src/scanner/mod.rs b/sds/src/scanner/mod.rs index d4fa6824..047e6696 100644 --- a/sds/src/scanner/mod.rs +++ b/sds/src/scanner/mod.rs @@ -17,9 +17,7 @@ use std::any::{Any, TypeId}; use std::sync::Arc; use self::metrics::ScannerMetrics; -use crate::proximity_keywords::{ - contains_keyword_in_path, CompiledIncludedProximityKeywords, -}; +use crate::proximity_keywords::{contains_keyword_in_path, CompiledIncludedProximityKeywords}; use crate::scanner::config::RuleConfig; use crate::scanner::regex_rule::compiled::RegexCompiledRule; use crate::scanner::regex_rule::{access_regex_caches, RegexCaches}; From 93c4b1f05c1eeef0f83f610f2ed82ea4f6b25865 Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Fri, 23 Aug 2024 13:52:23 +0200 Subject: [PATCH 10/23] Optimise sanitize_path creation --- sds/src/scoped_ruleset/mod.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sds/src/scoped_ruleset/mod.rs b/sds/src/scoped_ruleset/mod.rs index cfe7cde4..8d148fe8 100644 --- a/sds/src/scoped_ruleset/mod.rs +++ b/sds/src/scoped_ruleset/mod.rs @@ -1,7 +1,7 @@ mod bool_set; use crate::event::{EventVisitor, VisitStringResult}; -use crate::proximity_keywords::UNIFIED_LINK_STR; +use crate::proximity_keywords::UNIFIED_LINK_CHAR; use crate::scanner::scope::Scope; use crate::scoped_ruleset::bool_set::BoolSet; use crate::{Event, Path, PathSegment}; @@ -216,7 +216,7 @@ where self.sanitized_segments_until_node.push(segment.sanitize()); let true_positive_rules_count = if self.should_keywords_match_event_paths { - let current_sanitized_path = self + let mut current_sanitized_path = self .sanitized_segments_until_node .iter() .filter_map(|sanitized_segment| { @@ -224,8 +224,14 @@ where .as_ref() .map_or(None::>, |x| Some(x.clone())) }) - .collect::>() - .join(UNIFIED_LINK_STR); + .fold(String::new(), |mut a, b| { + a.reserve(b.len() + 1); + a.push_str(&*b); + a.push(UNIFIED_LINK_CHAR); + a + }); + // Remove the last `UNIFIED_LINK_CHAR` that has been put + current_sanitized_path.pop(); self.content_visitor .find_true_positive_rules_from_current_path( current_sanitized_path.as_str(), From 3c66fa7f6a66178325ede513fca7801ad869903c Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Fri, 23 Aug 2024 15:34:57 +0200 Subject: [PATCH 11/23] Allow dead_code --- sds/src/proximity_keywords/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/sds/src/proximity_keywords/mod.rs b/sds/src/proximity_keywords/mod.rs index dc20331a..2e8176b1 100644 --- a/sds/src/proximity_keywords/mod.rs +++ b/sds/src/proximity_keywords/mod.rs @@ -37,6 +37,7 @@ pub struct ProximityKeywordsRegex { pub const MULTI_WORD_KEYWORDS_LINK_CHARS: &[char] = &['-', '_', '.', ' ', '/']; pub const UNIFIED_LINK_CHAR: char = '.'; +#[allow(dead_code)] pub const UNIFIED_LINK_STR: &str = "."; pub fn compile_keywords_proximity_config( From 234b30ef053e89a5d2d1fbef78b077b670c2a4da Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Fri, 23 Aug 2024 15:40:04 +0200 Subject: [PATCH 12/23] Put _ in front of unused arguments --- sds/src/scoped_ruleset/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sds/src/scoped_ruleset/mod.rs b/sds/src/scoped_ruleset/mod.rs index 8d148fe8..0a9de1e1 100644 --- a/sds/src/scoped_ruleset/mod.rs +++ b/sds/src/scoped_ruleset/mod.rs @@ -432,8 +432,8 @@ mod test { fn find_true_positive_rules_from_current_path( &self, - sanitized_path: &str, - current_true_positive_rule_idx: &mut Vec, + _sanitized_path: &str, + _current_true_positive_rule_idx: &mut Vec, ) -> usize { 0 } From a9ee282418a0ca0441ae6b6a0b51eec820f8ab7b Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Fri, 23 Aug 2024 17:04:35 +0200 Subject: [PATCH 13/23] Address clippy warnings --- sds/src/scanner/mod.rs | 2 +- sds/src/scoped_ruleset/mod.rs | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/sds/src/scanner/mod.rs b/sds/src/scanner/mod.rs index 047e6696..f70c7975 100644 --- a/sds/src/scanner/mod.rs +++ b/sds/src/scanner/mod.rs @@ -738,7 +738,7 @@ impl<'a, E: Encoding> ContentVisitor<'a> for ScannerContentVisitor<'a, E> { for (idx, rule) in self.scanner.rules.iter().enumerate() { if !current_true_positive_rule_idx.contains(&idx) { if let Some(keywords) = rule.get_included_keywords() { - if contains_keyword_in_path(&sanitized_path, &keywords.keywords_pattern) { + if contains_keyword_in_path(sanitized_path, &keywords.keywords_pattern) { // The rule is found has a true positive for this path, push it current_true_positive_rule_idx.push(idx); times_pushed += 1 diff --git a/sds/src/scoped_ruleset/mod.rs b/sds/src/scoped_ruleset/mod.rs index 0a9de1e1..230e4967 100644 --- a/sds/src/scoped_ruleset/mod.rs +++ b/sds/src/scoped_ruleset/mod.rs @@ -220,13 +220,14 @@ where .sanitized_segments_until_node .iter() .filter_map(|sanitized_segment| { - sanitized_segment - .as_ref() - .map_or(None::>, |x| Some(x.clone())) + sanitized_segment.as_ref().map(|seg| Some(seg.clone())) }) .fold(String::new(), |mut a, b| { - a.reserve(b.len() + 1); - a.push_str(&*b); + let b_str = b.expect( + "In the filter_map above, we make sure to filter out the None variants", + ); + a.reserve(b_str.len() + 1); + a.push_str(&b_str); a.push(UNIFIED_LINK_CHAR); a }); From afe0aa7b5c899d814438ceaf3d82c6f917c23970 Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Mon, 14 Oct 2024 17:40:06 +0200 Subject: [PATCH 14/23] Pop sanitized_segments_until_node when the segment is popped --- sds/src/scoped_ruleset/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sds/src/scoped_ruleset/mod.rs b/sds/src/scoped_ruleset/mod.rs index 230e4967..75028968 100644 --- a/sds/src/scoped_ruleset/mod.rs +++ b/sds/src/scoped_ruleset/mod.rs @@ -261,7 +261,8 @@ where // The true positive rule indices from the last node are no longer active, remove them. let _popped = self.true_positive_rule_idx.pop(); } - + // Pop the sanitized segment + self.sanitized_segments_until_node.pop(); self.path.segments.pop(); } From 3b7e4b4400a7139de1a5d6289649bcfed26f9176 Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Thu, 22 Aug 2024 18:08:11 +0200 Subject: [PATCH 15/23] Add get_included_keywords method to CompiledRule trait --- sds/src/scanner/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sds/src/scanner/mod.rs b/sds/src/scanner/mod.rs index f70c7975..563a327c 100644 --- a/sds/src/scanner/mod.rs +++ b/sds/src/scanner/mod.rs @@ -14,10 +14,13 @@ use crate::scoped_ruleset::{ContentVisitor, ExclusionCheck, ScopedRuleSet}; pub use crate::secondary_validation::Validator; use crate::{CreateScannerError, EncodeIndices, MatchAction, Path}; use std::any::{Any, TypeId}; +use std::borrow::Cow; use std::sync::Arc; use self::metrics::ScannerMetrics; -use crate::proximity_keywords::{contains_keyword_in_path, CompiledIncludedProximityKeywords}; +use crate::proximity_keywords::{ + contains_keyword_in_path, CompiledIncludedProximityKeywords, UNIFIED_LINK_STR, +}; use crate::scanner::config::RuleConfig; use crate::scanner::regex_rule::compiled::RegexCompiledRule; use crate::scanner::regex_rule::{access_regex_caches, RegexCaches}; From 630745bbce26f797eab141dedf6288fc6cbe3605 Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Thu, 22 Aug 2024 18:09:02 +0200 Subject: [PATCH 16/23] Add should_keywords_match_events_path feature to ScopedRuleSet struct --- sds/src/scanner/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/sds/src/scanner/mod.rs b/sds/src/scanner/mod.rs index 563a327c..6ed1a79a 100644 --- a/sds/src/scanner/mod.rs +++ b/sds/src/scanner/mod.rs @@ -14,7 +14,6 @@ use crate::scoped_ruleset::{ContentVisitor, ExclusionCheck, ScopedRuleSet}; pub use crate::secondary_validation::Validator; use crate::{CreateScannerError, EncodeIndices, MatchAction, Path}; use std::any::{Any, TypeId}; -use std::borrow::Cow; use std::sync::Arc; use self::metrics::ScannerMetrics; From d3c321a2daac43791ac0931971ba318f8e6a7f79 Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Thu, 22 Aug 2024 18:09:35 +0200 Subject: [PATCH 17/23] Add find_true_positive_rules_from_current_path method to ContentVisitor trait --- sds/src/scanner/mod.rs | 5 +++-- sds/src/scoped_ruleset/mod.rs | 9 +++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/sds/src/scanner/mod.rs b/sds/src/scanner/mod.rs index 6ed1a79a..94b9572a 100644 --- a/sds/src/scanner/mod.rs +++ b/sds/src/scanner/mod.rs @@ -733,14 +733,15 @@ impl<'a, E: Encoding> ContentVisitor<'a> for ScannerContentVisitor<'a, E> { fn find_true_positive_rules_from_current_path( &self, - sanitized_path: &str, + sanitized_segments: &[Cow], current_true_positive_rule_idx: &mut Vec, ) -> usize { let mut times_pushed = 0; for (idx, rule) in self.scanner.rules.iter().enumerate() { if !current_true_positive_rule_idx.contains(&idx) { if let Some(keywords) = rule.get_included_keywords() { - if contains_keyword_in_path(sanitized_path, &keywords.keywords_pattern) { + let sanitized_path = sanitized_segments.join(UNIFIED_LINK_STR); + if contains_keyword_in_path(&sanitized_path, &keywords.keywords_pattern) { // The rule is found has a true positive for this path, push it current_true_positive_rule_idx.push(idx); times_pushed += 1 diff --git a/sds/src/scoped_ruleset/mod.rs b/sds/src/scoped_ruleset/mod.rs index 75028968..57f09c6c 100644 --- a/sds/src/scoped_ruleset/mod.rs +++ b/sds/src/scoped_ruleset/mod.rs @@ -126,7 +126,11 @@ pub trait ContentVisitor<'path> { fn find_true_positive_rules_from_current_path( &self, +<<<<<<< HEAD sanitized_path: &str, +======= + sanitized_segments: &[Cow], +>>>>>>> a86af49 (Add find_true_positive_rules_from_current_path method to ContentVisitor trait) current_true_positive_rule_idx: &mut Vec, ) -> usize; } @@ -434,8 +438,13 @@ mod test { fn find_true_positive_rules_from_current_path( &self, +<<<<<<< HEAD _sanitized_path: &str, _current_true_positive_rule_idx: &mut Vec, +======= + sanitized_segments: &[Cow], + current_true_positive_rule_idx: &mut Vec, +>>>>>>> a86af49 (Add find_true_positive_rules_from_current_path method to ContentVisitor trait) ) -> usize { 0 } From 09d2060966aa3eff9ff2f6df6f05b8472338b867 Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Fri, 23 Aug 2024 11:59:59 +0200 Subject: [PATCH 18/23] Modify method signature --- sds/src/scanner/mod.rs | 3 +-- sds/src/scoped_ruleset/mod.rs | 31 ++++++++----------------------- 2 files changed, 9 insertions(+), 25 deletions(-) diff --git a/sds/src/scanner/mod.rs b/sds/src/scanner/mod.rs index 94b9572a..0a380bc7 100644 --- a/sds/src/scanner/mod.rs +++ b/sds/src/scanner/mod.rs @@ -733,14 +733,13 @@ impl<'a, E: Encoding> ContentVisitor<'a> for ScannerContentVisitor<'a, E> { fn find_true_positive_rules_from_current_path( &self, - sanitized_segments: &[Cow], + sanitized_path: &str, current_true_positive_rule_idx: &mut Vec, ) -> usize { let mut times_pushed = 0; for (idx, rule) in self.scanner.rules.iter().enumerate() { if !current_true_positive_rule_idx.contains(&idx) { if let Some(keywords) = rule.get_included_keywords() { - let sanitized_path = sanitized_segments.join(UNIFIED_LINK_STR); if contains_keyword_in_path(&sanitized_path, &keywords.keywords_pattern) { // The rule is found has a true positive for this path, push it current_true_positive_rule_idx.push(idx); diff --git a/sds/src/scoped_ruleset/mod.rs b/sds/src/scoped_ruleset/mod.rs index 57f09c6c..225d6b96 100644 --- a/sds/src/scoped_ruleset/mod.rs +++ b/sds/src/scoped_ruleset/mod.rs @@ -1,11 +1,12 @@ mod bool_set; use crate::event::{EventVisitor, VisitStringResult}; -use crate::proximity_keywords::UNIFIED_LINK_CHAR; +use crate::proximity_keywords::UNIFIED_LINK_STR; use crate::scanner::scope::Scope; use crate::scoped_ruleset::bool_set::BoolSet; use crate::{Event, Path, PathSegment}; use ahash::AHashMap; +use serde::Serialize; use std::borrow::Cow; /// A `ScopedRuleSet` determines which rules will be used to scan each field of an event, and which @@ -126,11 +127,7 @@ pub trait ContentVisitor<'path> { fn find_true_positive_rules_from_current_path( &self, -<<<<<<< HEAD - sanitized_path: &str, -======= sanitized_segments: &[Cow], ->>>>>>> a86af49 (Add find_true_positive_rules_from_current_path method to ContentVisitor trait) current_true_positive_rule_idx: &mut Vec, ) -> usize; } @@ -220,23 +217,16 @@ where self.sanitized_segments_until_node.push(segment.sanitize()); let true_positive_rules_count = if self.should_keywords_match_event_paths { - let mut current_sanitized_path = self + let current_sanitized_path = self .sanitized_segments_until_node .iter() .filter_map(|sanitized_segment| { - sanitized_segment.as_ref().map(|seg| Some(seg.clone())) + sanitized_segment + .as_ref() + .map_or(None::>, |x| Some(x.clone())) }) - .fold(String::new(), |mut a, b| { - let b_str = b.expect( - "In the filter_map above, we make sure to filter out the None variants", - ); - a.reserve(b_str.len() + 1); - a.push_str(&b_str); - a.push(UNIFIED_LINK_CHAR); - a - }); - // Remove the last `UNIFIED_LINK_CHAR` that has been put - current_sanitized_path.pop(); + .collect::>() + .join(UNIFIED_LINK_STR); self.content_visitor .find_true_positive_rules_from_current_path( current_sanitized_path.as_str(), @@ -438,13 +428,8 @@ mod test { fn find_true_positive_rules_from_current_path( &self, -<<<<<<< HEAD - _sanitized_path: &str, - _current_true_positive_rule_idx: &mut Vec, -======= sanitized_segments: &[Cow], current_true_positive_rule_idx: &mut Vec, ->>>>>>> a86af49 (Add find_true_positive_rules_from_current_path method to ContentVisitor trait) ) -> usize { 0 } From 5ce57796ab6c82f009752372d9ac68c82816a204 Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Fri, 23 Aug 2024 12:05:31 +0200 Subject: [PATCH 19/23] Remove unused imports --- sds/src/scanner/mod.rs | 2 +- sds/src/scoped_ruleset/mod.rs | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/sds/src/scanner/mod.rs b/sds/src/scanner/mod.rs index 0a380bc7..d4fa6824 100644 --- a/sds/src/scanner/mod.rs +++ b/sds/src/scanner/mod.rs @@ -18,7 +18,7 @@ use std::sync::Arc; use self::metrics::ScannerMetrics; use crate::proximity_keywords::{ - contains_keyword_in_path, CompiledIncludedProximityKeywords, UNIFIED_LINK_STR, + contains_keyword_in_path, CompiledIncludedProximityKeywords, }; use crate::scanner::config::RuleConfig; use crate::scanner::regex_rule::compiled::RegexCompiledRule; diff --git a/sds/src/scoped_ruleset/mod.rs b/sds/src/scoped_ruleset/mod.rs index 225d6b96..c732beb5 100644 --- a/sds/src/scoped_ruleset/mod.rs +++ b/sds/src/scoped_ruleset/mod.rs @@ -6,7 +6,6 @@ use crate::scanner::scope::Scope; use crate::scoped_ruleset::bool_set::BoolSet; use crate::{Event, Path, PathSegment}; use ahash::AHashMap; -use serde::Serialize; use std::borrow::Cow; /// A `ScopedRuleSet` determines which rules will be used to scan each field of an event, and which From e3dc1d78f1bbe56d7f668d36c6ef259672b85421 Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Fri, 23 Aug 2024 12:07:23 +0200 Subject: [PATCH 20/23] Cargo fmt --- sds/src/scanner/mod.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sds/src/scanner/mod.rs b/sds/src/scanner/mod.rs index d4fa6824..047e6696 100644 --- a/sds/src/scanner/mod.rs +++ b/sds/src/scanner/mod.rs @@ -17,9 +17,7 @@ use std::any::{Any, TypeId}; use std::sync::Arc; use self::metrics::ScannerMetrics; -use crate::proximity_keywords::{ - contains_keyword_in_path, CompiledIncludedProximityKeywords, -}; +use crate::proximity_keywords::{contains_keyword_in_path, CompiledIncludedProximityKeywords}; use crate::scanner::config::RuleConfig; use crate::scanner::regex_rule::compiled::RegexCompiledRule; use crate::scanner::regex_rule::{access_regex_caches, RegexCaches}; From ebec67cca4fde0066dc5ffa24117a9763d8f04a8 Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Fri, 23 Aug 2024 10:44:53 +0200 Subject: [PATCH 21/23] Use true_positive_rule_idx to decide if string is true positive or not from path --- sds/src/scanner/mod.rs | 26 ++++++------------ sds/src/scanner/regex_rule/compiled.rs | 38 +++++++++++--------------- sds/src/scoped_ruleset/mod.rs | 7 +++-- 3 files changed, 29 insertions(+), 42 deletions(-) diff --git a/sds/src/scanner/mod.rs b/sds/src/scanner/mod.rs index 047e6696..4472eecd 100644 --- a/sds/src/scanner/mod.rs +++ b/sds/src/scanner/mod.rs @@ -66,14 +66,12 @@ pub trait CompiledRuleDyn: Send + Sync { fn get_string_matches( &self, content: &str, - path: &Path, regex_caches: &mut RegexCaches, group_data: &mut AHashMap>, exclusion_check: &ExclusionCheck<'_>, excluded_matches: &mut AHashSet, match_emitter: &mut dyn MatchEmitter, - should_keywords_match_event_paths: bool, - scanner_labels: &Labels, + true_positive_rule_idx: &Vec, ); // Whether a match from this rule should be excluded (marked as a false-positive) @@ -111,14 +109,12 @@ impl CompiledRuleDyn for T { fn get_string_matches( &self, content: &str, - path: &Path, regex_caches: &mut RegexCaches, group_data: &mut AHashMap>, exclusion_check: &ExclusionCheck<'_>, excluded_matches: &mut AHashSet, match_emitter: &mut dyn MatchEmitter, - should_keywords_match_event_paths: bool, - scanner_labels: &Labels, + true_positive_rule_idx: &Vec, ) { let group_data_any = group_data .entry(TypeId::of::()) @@ -126,13 +122,12 @@ impl CompiledRuleDyn for T { let group_data: &mut T::GroupData = group_data_any.downcast_mut().unwrap(); self.get_string_matches( content, - path, regex_caches, group_data, exclusion_check, excluded_matches, match_emitter, - should_keywords_match_event_paths, + true_positive_rule_idx, ) } @@ -170,13 +165,12 @@ pub trait CompiledRule: Send + Sync { fn get_string_matches( &self, content: &str, - path: &Path, regex_caches: &mut RegexCaches, group_data: &mut Self::GroupData, exclusion_check: &ExclusionCheck<'_>, excluded_matches: &mut AHashSet, match_emitter: &mut dyn MatchEmitter, - should_keywords_match_event_paths: bool, + true_positive_rule_idx: &Vec, ); // Whether a match from this rule should be excluded (marked as a false-positive) @@ -662,6 +656,7 @@ impl<'a, E: Encoding> ContentVisitor<'a> for ScannerContentVisitor<'a, E> { content: &str, mut rule_visitor: crate::scoped_ruleset::RuleIndexVisitor, exclusion_check: ExclusionCheck<'b>, + true_positive_rule_idx: &Vec, ) -> bool { // matches for a single path let mut path_rules_matches = vec![]; @@ -688,16 +683,12 @@ impl<'a, E: Encoding> ContentVisitor<'a> for ScannerContentVisitor<'a, E> { rule.get_string_matches( content, - path, - self.regex_caches, + &mut self.regex_caches, &mut group_data, &exclusion_check, self.excluded_matches, &mut emitter, - self.scanner - .scanner_features - .should_keywords_match_event_paths, - &self.scanner.labels, + true_positive_rule_idx, ); } }); @@ -841,13 +832,12 @@ mod test { fn get_string_matches( &self, _content: &str, - _path: &Path, _regex_caches: &mut RegexCaches, _group_data: &mut Self::GroupData, _exclusion_check: &ExclusionCheck<'_>, _excluded_matches: &mut AHashSet, match_emitter: &mut dyn MatchEmitter, - _should_keywords_match_event_paths: bool, + _true_positive_rule_idx: &Vec, ) { match_emitter.emit(StringMatch { start: 10, end: 16 }); } diff --git a/sds/src/scanner/regex_rule/compiled.rs b/sds/src/scanner/regex_rule/compiled.rs index c5db0a0a..f197e28c 100644 --- a/sds/src/scanner/regex_rule/compiled.rs +++ b/sds/src/scanner/regex_rule/compiled.rs @@ -47,24 +47,22 @@ impl CompiledRule for RegexCompiledRule { fn get_string_matches( &self, content: &str, - path: &Path, regex_caches: &mut RegexCaches, _group_data: &mut (), exclusion_check: &ExclusionCheck<'_>, excluded_matches: &mut AHashSet, match_emitter: &mut dyn MatchEmitter, - should_keywords_match_event_paths: bool, + true_positive_rule_idx: &Vec, ) { match self.included_keywords { Some(ref included_keywords) => { self.get_string_matches_with_included_keywords( content, - path, regex_caches, exclusion_check, excluded_matches, match_emitter, - should_keywords_match_event_paths, + true_positive_rule_idx, included_keywords, ); } @@ -112,31 +110,27 @@ impl RegexCompiledRule { fn get_string_matches_with_included_keywords( &self, content: &str, - path: &Path, regex_caches: &mut RegexCaches, exclusion_check: &ExclusionCheck<'_>, excluded_matches: &mut AHashSet, match_emitter: &mut dyn MatchEmitter, - should_keywords_match_event_paths: bool, + true_positive_rule_idx: &Vec, included_keywords: &CompiledIncludedProximityKeywords, ) { - if should_keywords_match_event_paths { - let sanitized_path = path.sanitize(); - if contains_keyword_in_path(&sanitized_path, &included_keywords.keywords_pattern) { - // since the path contains a match, we can skip future included keyword checks - let true_positive_search = self.true_positive_matches( - content, - 0, - regex_caches.get(&self.regex), - false, - exclusion_check, - excluded_matches, - ); - for string_match in true_positive_search { - match_emitter.emit(string_match); - } - return; + if !true_positive_rule_idx.is_empty() && true_positive_rule_idx.contains(&self.rule_index) { + // since the path contains a match, we can skip future included keyword checks + let true_positive_search = self.true_positive_matches( + content, + 0, + regex_caches.get(&self.regex), + false, + exclusion_check, + excluded_matches, + ); + for string_match in true_positive_search { + match_emitter.emit(string_match); } + return; } let mut included_keyword_matches = included_keywords.keyword_matches(content); diff --git a/sds/src/scoped_ruleset/mod.rs b/sds/src/scoped_ruleset/mod.rs index c732beb5..f6d926ac 100644 --- a/sds/src/scoped_ruleset/mod.rs +++ b/sds/src/scoped_ruleset/mod.rs @@ -122,11 +122,12 @@ pub trait ContentVisitor<'path> { content: &str, rules: RuleIndexVisitor, is_excluded: ExclusionCheck<'content_visitor>, + true_positive_rule_idx: &Vec, ) -> bool; fn find_true_positive_rules_from_current_path( &self, - sanitized_segments: &[Cow], + sanitized_path: &str, current_true_positive_rule_idx: &mut Vec, ) -> usize; } @@ -270,6 +271,7 @@ where ExclusionCheck { tree_nodes: &self.tree_nodes, }, + &self.true_positive_rule_idx, ); if let Some(bool_set) = &mut self.bool_set { bool_set.reset(); @@ -408,6 +410,7 @@ mod test { content: &str, mut rule_iter: RuleIndexVisitor, exclusion_check: ExclusionCheck<'content_visitor>, + true_positive_rule_idx: &Vec, ) -> bool { let mut rules = vec![]; rule_iter.visit_rule_indices(|rule_index| { @@ -427,7 +430,7 @@ mod test { fn find_true_positive_rules_from_current_path( &self, - sanitized_segments: &[Cow], + sanitized_segments: &str, current_true_positive_rule_idx: &mut Vec, ) -> usize { 0 From b4ec30640882ccb6e3428b51c29a06deecd7ae9f Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Fri, 23 Aug 2024 13:58:59 +0200 Subject: [PATCH 22/23] Cargo fmt and re-add segment.pop() --- sds/src/scanner/mod.rs | 3 +++ sds/src/scanner/regex_rule/compiled.rs | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/sds/src/scanner/mod.rs b/sds/src/scanner/mod.rs index 4472eecd..6755a0fc 100644 --- a/sds/src/scanner/mod.rs +++ b/sds/src/scanner/mod.rs @@ -72,6 +72,7 @@ pub trait CompiledRuleDyn: Send + Sync { excluded_matches: &mut AHashSet, match_emitter: &mut dyn MatchEmitter, true_positive_rule_idx: &Vec, + scanner_labels: &Labels, ); // Whether a match from this rule should be excluded (marked as a false-positive) @@ -115,6 +116,7 @@ impl CompiledRuleDyn for T { excluded_matches: &mut AHashSet, match_emitter: &mut dyn MatchEmitter, true_positive_rule_idx: &Vec, + scanner_labels: &Labels, ) { let group_data_any = group_data .entry(TypeId::of::()) @@ -689,6 +691,7 @@ impl<'a, E: Encoding> ContentVisitor<'a> for ScannerContentVisitor<'a, E> { self.excluded_matches, &mut emitter, true_positive_rule_idx, + &self.scanner.labels, ); } }); diff --git a/sds/src/scanner/regex_rule/compiled.rs b/sds/src/scanner/regex_rule/compiled.rs index f197e28c..1a7ab808 100644 --- a/sds/src/scanner/regex_rule/compiled.rs +++ b/sds/src/scanner/regex_rule/compiled.rs @@ -1,7 +1,7 @@ use crate::match_validation::config::{InternalMatchValidationType, MatchValidationType}; use crate::proximity_keywords::{ - contains_keyword_in_path, get_prefix_start, is_index_within_prefix, - CompiledExcludedProximityKeywords, CompiledIncludedProximityKeywords, + get_prefix_start, is_index_within_prefix, CompiledExcludedProximityKeywords, + CompiledIncludedProximityKeywords, }; use crate::scanner::metrics::RuleMetrics; use crate::scanner::regex_rule::regex_store::SharedRegex; @@ -9,7 +9,7 @@ use crate::scanner::regex_rule::RegexCaches; use crate::scanner::scope::Scope; use crate::scanner::{get_next_regex_start, is_false_positive_match}; use crate::secondary_validation::Validator; -use crate::{CompiledRule, ExclusionCheck, Labels, MatchAction, MatchEmitter, Path, StringMatch}; +use crate::{CompiledRule, ExclusionCheck, Labels, MatchAction, MatchEmitter, StringMatch}; use ahash::AHashSet; use regex_automata::meta::Cache; use regex_automata::Input; From 08e9c77f793c254027492ebe26b65f394636a74e Mon Sep 17 00:00:00 2001 From: Arthur Belleville Date: Mon, 14 Oct 2024 18:03:50 +0200 Subject: [PATCH 23/23] Do not clone the Cow and instead return a Borrowed Cow Thanks Frank for the eyes! --- sds/src/path.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sds/src/path.rs b/sds/src/path.rs index 61a963e6..79c11e55 100644 --- a/sds/src/path.rs +++ b/sds/src/path.rs @@ -110,10 +110,10 @@ impl<'a> PathSegment<'a> { } } - pub fn sanitize(&self) -> Option> { + pub fn sanitize(&'a self) -> Option> { if let PathSegment::Field(field) = self { match should_bypass_standardize_path(field) { - BypassStandardizePathResult::BypassAndAllLowercase => Some(field.clone()), + BypassStandardizePathResult::BypassAndAllLowercase => Some(Cow::Borrowed(field)), BypassStandardizePathResult::BypassAndAllUppercase => { Some(Cow::Owned(field.to_ascii_lowercase())) }