From 998037c62519770279cd419312b9e471a6b86843 Mon Sep 17 00:00:00 2001 From: francose <13445813+francose@users.noreply.github.com> Date: Sun, 10 May 2026 12:06:44 -0400 Subject: [PATCH 1/2] Fix corrupted regex patterns in keys_extractor Several secret detection patterns in keys_extractor() were corrupted since the initial commit - curly braces {32} rendered as f32g, escaped dots \. rendered as n, and escaped dollars \$ rendered as n$. This caused Google YouTube OAuth, Amazon MWS, and PayPal/Braintree token patterns to never match any real secrets. Also fixed: "PayPal" label was actually describing the Amazon MWS token format (amzn.mws.*), and "Amazon MWS" had the PayPal/Braintree access_token$production$ format. Labels are now correct. Fixed "Slack Webook" typo to "Slack Webhook". Converted affected patterns to raw strings to prevent future escape corruption. --- xgitguard/common/data_format.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/xgitguard/common/data_format.py b/xgitguard/common/data_format.py index f3449c8..a2b488c 100644 --- a/xgitguard/common/data_format.py +++ b/xgitguard/common/data_format.py @@ -129,22 +129,22 @@ def keys_extractor(code_content): regexes = { "AWS Tokens": "(?:A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}", "AWS Access Key ID": "[0-9a-zA-Z/+=]{40}", - "Google OAuth Secret": "[0-9a-zA-Zn\-_]{24}", - "Google OAuth Auth Code": "4/[0-9A-Za-zn\-_]+", - "Google OAuth Refresh Token": "1/[0-9A-Za-zn\-_]{43}|1/[0-9A-Za-zn\-_]{64}", - "Google OAuth Access Token": "ya29n.[0-9A-Za-zn\-_]+", - "Google API Key": "AIza[0-9A-Za-zn\-_]{35}", + "Google OAuth Secret": r"[0-9a-zA-Z\-_]{24}", + "Google OAuth Auth Code": r"4/[0-9A-Za-z\-_]+", + "Google OAuth Refresh Token": r"1/[0-9A-Za-z\-_]{43}|1/[0-9A-Za-z\-_]{64}", + "Google OAuth Access Token": r"ya29\.[0-9A-Za-z\-_]+", + "Google API Key": r"AIza[0-9A-Za-z\-_]{35}", "RSA Private Key": "BEGIN RSA PRIVATE KEY", "EC Private Key": "BEGIN EC PRIVATE KEY", "PGP Private Key": "BEGIN PGP PRIVATE KEY BLOCK", "General Private Key": "BEGIN PRIVATE KEY", - "Google YouTube OAuth ID Gmail, GCloud": "[0-9]+-[0-9A-Za-z_]f32gn.appsn.googleusercontentn.com", - "Amazon MWS": "access_tokenn$productionn$[0-9a-z]f16gn$[0-9a-f]f32g", - "PayPal": "amznn.mwsn.[0-9a-f]f8g-[0-9a-f]f4g-[0-9a-f]f4g-[0-9a-f]f4g-[0-9a-f]f12g", + "Google YouTube OAuth ID Gmail, GCloud": r"[0-9]+-[0-9A-Za-z_]{32}\.apps\.googleusercontent\.com", + "Amazon MWS": r"amzn\.mws\.[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", + "PayPal Braintree": r"access_token\$production\$[0-9a-z]{16}\$[0-9a-f]{32}", "Slack Token": "(xox[pbaor]-[0-9]{12}-[0-9]{12}-[0-9]{12}-[a-z0-9]{32})", "AWS": "(?:.*awsSecretKey|.*aws_secret|.*api-key|.*aws_account_secret).*" "(?=.*[A-Z])(? Date: Sun, 10 May 2026 12:30:34 -0400 Subject: [PATCH 2/2] Use raw string for Slack Webhook pattern for consistency --- xgitguard/common/data_format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xgitguard/common/data_format.py b/xgitguard/common/data_format.py index a2b488c..d354729 100644 --- a/xgitguard/common/data_format.py +++ b/xgitguard/common/data_format.py @@ -144,7 +144,7 @@ def keys_extractor(code_content): "Slack Token": "(xox[pbaor]-[0-9]{12}-[0-9]{12}-[0-9]{12}-[a-z0-9]{32})", "AWS": "(?:.*awsSecretKey|.*aws_secret|.*api-key|.*aws_account_secret).*" "(?=.*[A-Z])(?