Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sds/src/scanner/regex_rule/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ pub enum SecondaryValidator {
MoneroAddress,
NhsCheckDigit,
NirChecksum,
NonHexChecker,
PolishNationalIdChecksum,
PolishNipChecksum,
PortugueseTaxIdChecksum,
Expand Down
30 changes: 30 additions & 0 deletions sds/src/scanner/test/validators.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::SecondaryValidator::{
ChineseIdChecksum, GithubTokenChecksum, IbanChecker, JwtExpirationChecker, NhsCheckDigit,
NonHexChecker,
};
use crate::scanner::RootRuleConfig;
use crate::{MatchAction, RegexRuleConfig, ScannerBuilder, SecondaryValidator};
Expand Down Expand Up @@ -170,3 +171,32 @@ fn test_nhs_checksum() {
assert_eq!(matches.len(), 1);
assert_eq!(content, "[NHS]");
}

#[test]
fn test_non_hex_checker_filters_pure_hex() {
let rule = RegexRuleConfig::new("[a-zA-Z0-9_]{16,}");
let match_action = MatchAction::Redact {
replacement: "[token]".to_string(),
};

let rule_with_validator =
RootRuleConfig::new(rule.clone().with_validator(Some(NonHexChecker)).build())
.match_action(match_action.clone());

let scanner_without =
ScannerBuilder::new(&[RootRuleConfig::new(rule.build()).match_action(match_action)])
.build()
.unwrap();

let mut pure_hex = "0123456789abcdef".to_string();
assert_eq!(scanner_without.scan(&mut pure_hex).unwrap().len(), 1);

let scanner_with = ScannerBuilder::new(&[rule_with_validator]).build().unwrap();
let mut pure_hex_again = "0123456789abcdef".to_string();
assert_eq!(scanner_with.scan(&mut pure_hex_again).unwrap().len(), 0);
assert_eq!(pure_hex_again, "0123456789abcdef");

let mut with_prefix = "sk_live_0123456789abcd".to_string();
assert_eq!(scanner_with.scan(&mut with_prefix).unwrap().len(), 1);
assert_eq!(with_prefix, "[token]");
}
3 changes: 3 additions & 0 deletions sds/src/secondary_validation/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ mod luxembourg_individual_nin_checksum;
mod monero_address;
mod nhs_check_digit;
mod nir_checksum;
mod non_hex_checker;
mod polish_national_id_checksum;
mod polish_nip_checksum;
mod portuguese_tax_id_checksum;
Expand Down Expand Up @@ -100,6 +101,7 @@ pub use crate::secondary_validation::luxembourg_individual_nin_checksum::Luxembo
pub use crate::secondary_validation::monero_address::MoneroAddress;
pub use crate::secondary_validation::nhs_check_digit::NhsCheckDigit;
pub use crate::secondary_validation::nir_checksum::NirChecksum;
pub use crate::secondary_validation::non_hex_checker::NonHexChecker;
pub use crate::secondary_validation::polish_national_id_checksum::PolishNationalIdChecksum;
pub use crate::secondary_validation::polish_nip_checksum::PolishNipChecksum;
pub use crate::secondary_validation::portuguese_tax_id_checksum::PortugueseTaxIdChecksum;
Expand Down Expand Up @@ -253,6 +255,7 @@ impl SecondaryValidator {
SecondaryValidator::MoneroAddress => Arc::new(MoneroAddress),
SecondaryValidator::NhsCheckDigit => Arc::new(NhsCheckDigit),
SecondaryValidator::NirChecksum => Arc::new(NirChecksum),
SecondaryValidator::NonHexChecker => Arc::new(NonHexChecker),
SecondaryValidator::PolishNationalIdChecksum => Arc::new(PolishNationalIdChecksum),
SecondaryValidator::PolishNipChecksum => Arc::new(PolishNipChecksum),
SecondaryValidator::PortugueseTaxIdChecksum => Arc::new(PortugueseTaxIdChecksum),
Expand Down
53 changes: 53 additions & 0 deletions sds/src/secondary_validation/non_hex_checker.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
use crate::secondary_validation::Validator;

/// Accepts matches that contain at least one character outside `[0-9a-fA-F]`.
///
/// Useful to drop pure hexadecimal substrings (for example hashes or UUIDs without separators)
/// while keeping tokens that use a wider alphabet (base64, prefixes, punctuation, etc.).
pub struct NonHexChecker;

impl Validator for NonHexChecker {
fn is_valid_match(&self, regex_match: &str) -> bool {
regex_match.chars().any(|c| !c.is_ascii_hexdigit())
}
}

#[cfg(test)]
mod tests {
use crate::secondary_validation::Validator;
use crate::secondary_validation::non_hex_checker::NonHexChecker;

#[test]
fn rejects_pure_hex() {
for input in [
"",
"a",
"deadbeef",
"DEADBEEF0123456789",
"0123456789abcdef",
"AbCdEf0123456789",
] {
assert!(
!NonHexChecker.is_valid_match(input),
"expected pure hex or empty to be rejected: {input:?}"
);
}
}

#[test]
fn accepts_when_any_non_hex_present() {
for input in [
"g",
"0g",
"sk_live_abc",
"abc-def",
"ff_FF", // underscore is not hex
"日本",
] {
assert!(
NonHexChecker.is_valid_match(input),
"expected non-hex character to accept: {input:?}"
);
}
}
}
Loading