diff --git a/sds-go/go/dd_sds.h b/sds-go/go/dd_sds.h index ae37e93a..e15337a4 100644 --- a/sds-go/go/dd_sds.h +++ b/sds-go/go/dd_sds.h @@ -5,7 +5,7 @@ long create_regex_rule(const char* json_config); -long create_scanner(long rule_list, const char* encoded_labels, const char** error); +long create_scanner(long rule_list, const char* encoded_labels, int enable_debug_observability, const char** error); void delete_scanner(long scanner_id); // event is a non-null terminated TODO diff --git a/sds-go/go/scanner.go b/sds-go/go/scanner.go index b1d6bc9c..9a3be7fa 100644 --- a/sds-go/go/scanner.go +++ b/sds-go/go/scanner.go @@ -50,12 +50,25 @@ type ScanResult struct { Matches []RuleMatch } +// ScannerOptions contains optional configuration for scanner creation. +type ScannerOptions struct { + // EnableDebugObservability adds extra tags to metrics to help debugging. + // Disabled by default to avoid high-cardinality metric series in production. + EnableDebugObservability bool +} + // CreateScanner creates a scanner in the underlying SDS shared library. The library // only returns an ID to then address what scanner to use on Scan calls. This ID is // stored in the Scanner Go object for convenience. See `Scan` to process events. // The rules used to create the Scanner are stored as a read-only information in the // returned Scanner. func CreateScanner(ruleConfigs []RuleConfig) (*Scanner, error) { + return CreateScannerWithOptions(ruleConfigs, ScannerOptions{}) +} + +// CreateScannerWithOptions creates a scanner with additional configuration options. +// See CreateScanner for general usage. +func CreateScannerWithOptions(ruleConfigs []RuleConfig, options ScannerOptions) (*Scanner, error) { ruleList := CreateRuleList() defer ruleList.Delete() @@ -76,8 +89,12 @@ func CreateScanner(ruleConfigs []RuleConfig) (*Scanner, error) { encodedLabelsJson := C.CString(string(labelsMarshalled)) defer C.free(unsafe.Pointer(encodedLabelsJson)) + var cEnableDebugObservability C.int + if options.EnableDebugObservability { + cEnableDebugObservability = 1 + } var errorString *C.char - id := C.create_scanner(C.long(ruleList.nativePtr), encodedLabelsJson, &errorString) + id := C.create_scanner(C.long(ruleList.nativePtr), encodedLabelsJson, cEnableDebugObservability, &errorString) if id < 0 { switch id { diff --git a/sds-go/rust/src/native/create_scanner.rs b/sds-go/rust/src/native/create_scanner.rs index adac649f..b26168c4 100644 --- a/sds-go/rust/src/native/create_scanner.rs +++ b/sds-go/rust/src/native/create_scanner.rs @@ -13,6 +13,7 @@ use dd_sds::Scanner; pub unsafe extern "C" fn create_scanner( rules: i64, encoded_labels: *const c_char, + enable_debug_observability: i32, error_out: *mut *const c_char, ) -> i64 { handle_panic_ptr_return(Some(error_out), || { @@ -23,7 +24,11 @@ pub unsafe extern "C" fn create_scanner( let labels = unsafe { read_json(encoded_labels).unwrap() }; // create the scanner - let scanner = match Scanner::builder(&rules).labels(labels).build() { + let scanner = match Scanner::builder(&rules) + .labels(labels) + .with_debug_observability(enable_debug_observability != 0) + .build() + { Ok(s) => s, Err(err) => return err.into(), }; diff --git a/sds/src/scanner/metrics.rs b/sds/src/scanner/metrics.rs index 7ac280b8..a50d1528 100644 --- a/sds/src/scanner/metrics.rs +++ b/sds/src/scanner/metrics.rs @@ -3,7 +3,11 @@ use metrics::{Counter, counter}; #[derive(Clone)] pub struct RuleMetrics { + /// Pre-initialized counter for the fast path (debug observability disabled). pub false_positive_excluded_attributes: Counter, + /// Base labels when debug observability enabled, + /// used to attach the `sds_namespace` tag dynamically. + pub base_labels: Labels, } impl RuleMetrics { @@ -13,6 +17,7 @@ impl RuleMetrics { "false_positive.multipass.excluded_match", labels.clone() ), + base_labels: labels.clone(), } } } diff --git a/sds/src/scanner/mod.rs b/sds/src/scanner/mod.rs index c84c0e4f..7a6780dc 100644 --- a/sds/src/scanner/mod.rs +++ b/sds/src/scanner/mod.rs @@ -332,7 +332,7 @@ pub trait CompiledRule: Send + Sync { false } - fn on_excluded_match_multipass_v0(&self) { + fn on_excluded_match_multipass_v0(&self, _path: &Path, _enable_debug_observability: bool) { // default is to do nothing } @@ -367,6 +367,7 @@ struct ScannerFeatures { pub add_implicit_index_wildcards: bool, pub multipass_v0_enabled: bool, pub return_matches: bool, + pub enable_debug_observability: bool, } impl Default for ScannerFeatures { @@ -375,6 +376,7 @@ impl Default for ScannerFeatures { add_implicit_index_wildcards: false, multipass_v0_enabled: true, return_matches: false, + enable_debug_observability: false, } } } @@ -591,7 +593,10 @@ impl Scanner { .contains(&content[rule_match.utf8_start..rule_match.utf8_end]); if is_false_positive && self.scanner_features.multipass_v0_enabled { self.rules[rule_match.rule_index] - .on_excluded_match_multipass_v0(); + .on_excluded_match_multipass_v0( + &path, + self.scanner_features.enable_debug_observability, + ); } !is_false_positive } else { @@ -986,6 +991,14 @@ impl ScannerBuilder<'_> { self } + /// Enables/Disables debug observability features. This defaults to FALSE. + /// When enabled, metrics will include additional tags (such as `sds_namespace`) + /// to help debug the source of matches. + pub fn with_debug_observability(mut self, value: bool) -> Self { + self.scanner_features.enable_debug_observability = value; + self + } + pub fn build(self) -> Result { let mut match_validators_per_type = AHashMap::new(); diff --git a/sds/src/scanner/regex_rule/compiled.rs b/sds/src/scanner/regex_rule/compiled.rs index a690147a..5bd4a614 100644 --- a/sds/src/scanner/regex_rule/compiled.rs +++ b/sds/src/scanner/regex_rule/compiled.rs @@ -9,8 +9,9 @@ use crate::scanner::{ RuleResult, RuleStatus, StringMatchesCtx, get_next_regex_start, is_false_positive_match, }; use crate::secondary_validation::Validator; -use crate::{CompiledRule, ExclusionCheck, Path, StringMatch}; +use crate::{CompiledRule, ExclusionCheck, Labels, Path, StringMatch}; use ahash::AHashSet; +use metrics::counter; use regex_automata::Input; use regex_automata::meta::Cache; use regex_automata::util::captures::Captures; @@ -70,8 +71,16 @@ impl CompiledRule for RegexCompiledRule { true } - fn on_excluded_match_multipass_v0(&self) { - self.metrics.false_positive_excluded_attributes.increment(1); + fn on_excluded_match_multipass_v0(&self, path: &Path, enable_debug_observability: bool) { + if enable_debug_observability { + let labels = self + .metrics + .base_labels + .clone_with_labels(Labels::new(&[("sds_namespace", path.to_string())])); + counter!("false_positive.multipass.excluded_match", labels).increment(1); + } else { + self.metrics.false_positive_excluded_attributes.increment(1); + } } fn as_regex_rule(&self) -> Option<&RegexCompiledRule> { diff --git a/sds/src/scanner/test/metrics.rs b/sds/src/scanner/test/metrics.rs index 11048597..b53463e0 100644 --- a/sds/src/scanner/test/metrics.rs +++ b/sds/src/scanner/test/metrics.rs @@ -96,6 +96,47 @@ fn should_submit_excluded_match_metric() { assert_eq!(metric_value, &(None, None, DebugValue::Counter(1))); } +#[test] +fn should_submit_excluded_match_metric_with_debug_observability() { + let recorder = DebuggingRecorder::new(); + let snapshotter = recorder.snapshotter(); + + metrics::with_local_recorder(&recorder, || { + let rule_0 = RootRuleConfig::new(RegexRuleConfig::new("bcdef").build()) + .scope(Scope::exclude(vec![Path::from(vec![PathSegment::Field( + "test".into(), + )])])) + .match_action(MatchAction::None); + + let scanner = ScannerBuilder::new(&[rule_0]) + .with_debug_observability(true) + .build() + .unwrap(); + let mut content = SimpleEvent::Map(BTreeMap::from([ + ( + "z-match".to_string(), + SimpleEvent::String("bcdef".to_string()), + ), + ("test".to_string(), SimpleEvent::String("bcdef".to_string())), + ])); + + scanner.scan(&mut content).unwrap(); + }); + + let snapshot = snapshotter.snapshot().into_hashmap(); + + let metric_name = "false_positive.multipass.excluded_match"; + let labels = vec![Label::new("sds_namespace", "z-match")]; + let metric_value = snapshot + .get(&CompositeKey::new( + Counter, + Key::from_parts(metric_name, labels), + )) + .expect("metric not found"); + + assert_eq!(metric_value, &(None, None, DebugValue::Counter(1))); +} + #[test] fn should_submit_excluded_keywords_metric() { let recorder = DebuggingRecorder::new();