From 04070d7c91bbd3089ab46474cc78ce1561729d9e Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Tue, 3 Feb 2026 10:58:22 +0000
Subject: [PATCH] fix: redact sensitive query params in logs

Co-authored-by: abhimehro <84992105+abhimehro@users.noreply.github.com>
---
 main.py                    | 13 +++++++++
 tests/test_security_log.py | 56 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)
 create mode 100644 tests/test_security_log.py

diff --git a/main.py b/main.py
index 86792da4..fccb6d60 100644
--- a/main.py
+++ b/main.py
@@ -149,8 +149,21 @@ def check_env_permissions(env_path: str = ".env") -> None:
 def sanitize_for_log(text: Any) -> str:
     """Sanitize text for logging, ensuring TOKEN is redacted and control chars are escaped."""
     s = str(text)
+
+    # 1. Redact common sensitive query parameters in URLs (Defense in Depth)
+    # Matches ?param=value or &param=value
+    # Stops at &, whitespace, or quotes
+    s = re.sub(
+        r"([?&](?:token|key|secret|password|auth|access_token|api_key)=)([^&\s\"']+)",
+        r"\1[REDACTED]",
+        s,
+        flags=re.IGNORECASE,
+    )
+
+    # 2. Redact the specific global TOKEN if known
     if TOKEN and TOKEN in s:
         s = s.replace(TOKEN, "[REDACTED]")
+
     # repr() safely escapes control characters (e.g., \n -> \\n, \x1b -> \\x1b)
     # This prevents log injection and terminal hijacking.
     safe = repr(s)
diff --git a/tests/test_security_log.py b/tests/test_security_log.py
new file mode 100644
index 00000000..6ffb6913
--- /dev/null
+++ b/tests/test_security_log.py
@@ -0,0 +1,56 @@
+import unittest
+from main import sanitize_for_log
+
+class TestSecurityLog(unittest.TestCase):
+    def test_redact_query_params(self):
+        # Test cases for URL query parameter redaction
+        test_cases = [
+            (
+                "https://example.com?token=secret123",
+                "https://example.com?token=[REDACTED]"
+            ),
+            (
+                "https://example.com?key=my_key&foo=bar",
+                "https://example.com?key=[REDACTED]&foo=bar"
+            ),
+            (
+                "Error fetching https://api.com?auth=xyz failed",
+                "Error fetching https://api.com?auth=[REDACTED] failed"
+            ),
+            (
+                "https://site.com?access_token=token&api_key=key",
+                "https://site.com?access_token=[REDACTED]&api_key=[REDACTED]"
+            ),
+            (
+                "https://safe.com?public=data",
+                "https://safe.com?public=data"
+            ),
+            (
+                "'https://quoted.com?password=pass'",
+                "https://quoted.com?password=[REDACTED]"
+            )
+        ]
+
+        for input_str, expected in test_cases:
+            # sanitize_for_log uses repr() which adds quotes and escapes.
+            # We need to handle that in our expectation or strip it.
+            # The current implementation of sanitize_for_log returns a repr() string (quoted).
+            # If our expected string is the *content* inside the quotes, we should match that.
+
+            result = sanitize_for_log(input_str)
+
+            # Remove surrounding quotes for easier comparison if present
+            if len(result) >= 2 and result[0] == result[-1] and result[0] in ("'", '"'):
+                result_content = result[1:-1]
+            else:
+                result_content = result
+
+            # Also repr() escapes things.
+            # Our expected strings don't have special chars that repr escapes (except maybe quotes).
+            # But the proposed implementation applies redaction BEFORE repr.
+            # So sanitizing "url?token=s" -> "url?token=[REDACTED]" -> repr() -> "'url?token=[REDACTED]'"
+
+            self.assertEqual(result_content, expected, f"Failed for input: {input_str}")
+
+if __name__ == "__main__":
+    unittest.main()