From 04070d7c91bbd3089ab46474cc78ce1561729d9e Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Tue, 3 Feb 2026 10:58:22 +0000 Subject: [PATCH] fix: redact sensitive query params in logs Co-authored-by: abhimehro <84992105+abhimehro@users.noreply.github.com> --- main.py | 13 +++++++++ tests/test_security_log.py | 56 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 tests/test_security_log.py diff --git a/main.py b/main.py index 86792da4..fccb6d60 100644 --- a/main.py +++ b/main.py @@ -149,8 +149,21 @@ def check_env_permissions(env_path: str = ".env") -> None: def sanitize_for_log(text: Any) -> str: """Sanitize text for logging, ensuring TOKEN is redacted and control chars are escaped.""" s = str(text) + + # 1. Redact common sensitive query parameters in URLs (Defense in Depth) + # Matches ?param=value or ¶m=value + # Stops at &, whitespace, or quotes + s = re.sub( + r"([?&](?:token|key|secret|password|auth|access_token|api_key)=)([^&\s\"']+)", + r"\1[REDACTED]", + s, + flags=re.IGNORECASE, + ) + + # 2. Redact the specific global TOKEN if known if TOKEN and TOKEN in s: s = s.replace(TOKEN, "[REDACTED]") + # repr() safely escapes control characters (e.g., \n -> \\n, \x1b -> \\x1b) # This prevents log injection and terminal hijacking. safe = repr(s) diff --git a/tests/test_security_log.py b/tests/test_security_log.py new file mode 100644 index 00000000..6ffb6913 --- /dev/null +++ b/tests/test_security_log.py @@ -0,0 +1,56 @@ +import unittest +from main import sanitize_for_log + +class TestSecurityLog(unittest.TestCase): + def test_redact_query_params(self): + # Test cases for URL query parameter redaction + test_cases = [ + ( + "https://example.com?token=secret123", + "https://example.com?token=[REDACTED]" + ), + ( + "https://example.com?key=my_key&foo=bar", + "https://example.com?key=[REDACTED]&foo=bar" + ), + ( + "Error fetching https://api.com?auth=xyz failed", + "Error fetching https://api.com?auth=[REDACTED] failed" + ), + ( + "https://site.com?access_token=token&api_key=key", + "https://site.com?access_token=[REDACTED]&api_key=[REDACTED]" + ), + ( + "https://safe.com?public=data", + "https://safe.com?public=data" + ), + ( + "'https://quoted.com?password=pass'", + "https://quoted.com?password=[REDACTED]" + ) + ] + + for input_str, expected in test_cases: + # sanitize_for_log uses repr() which adds quotes and escapes. + # We need to handle that in our expectation or strip it. + # The current implementation of sanitize_for_log returns a repr() string (quoted). + # If our expected string is the *content* inside the quotes, we should match that. + + result = sanitize_for_log(input_str) + + # Remove surrounding quotes for easier comparison if present + if len(result) >= 2 and result[0] == result[-1] and result[0] in ("'", '"'): + result_content = result[1:-1] + else: + result_content = result + + # Also repr() escapes things. + # Our expected strings don't have special chars that repr escapes (except maybe quotes). + # But the proposed implementation applies redaction BEFORE repr. + # So sanitizing "url?token=s" -> "url?token=[REDACTED]" -> repr() -> "'url?token=[REDACTED]'" + + self.assertEqual(result_content, expected, f"Failed for input: {input_str}") + +if __name__ == "__main__": + unittest.main()