From 17923d56f80b5d8ebde40c6aa1d19c2f1b8b7bdc Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Wed, 4 Feb 2026 10:54:56 +0000
Subject: [PATCH] =?UTF-8?q?=F0=9F=9B=A1=EF=B8=8F=20Sentinel:=20[HIGH]=20Re?=
 =?UTF-8?q?dact=20credentials=20from=20URLs=20in=20logs?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: abhimehro <84992105+abhimehro@users.noreply.github.com>
---
 .jules/sentinel.md | 4 ++++
 main.py            | 5 +++++
 2 files changed, 9 insertions(+)
 create mode 100644 .jules/sentinel.md

diff --git a/.jules/sentinel.md b/.jules/sentinel.md
new file mode 100644
index 00000000..138ae124
--- /dev/null
+++ b/.jules/sentinel.md
@@ -0,0 +1,4 @@
+## 2025-05-23 - URL Credential Leakage in Logs
+**Vulnerability:** `sanitize_for_log` only redacted the API token but allowed URLs containing Basic Auth credentials (e.g. `https://user:pass@host`) to be logged in plain text.
+**Learning:** Sanitization functions often focus on known secrets (like specific tokens) but miss pattern-based leaks like standard URI credentials.
+**Prevention:** Always scrub user:password combinations from any URL before logging. Use regex or URL parsing libraries to identifying and redact the authority section.
diff --git a/main.py b/main.py
index 86792da4..4d2adc09 100644
--- a/main.py
+++ b/main.py
@@ -151,6 +151,11 @@ def sanitize_for_log(text: Any) -> str:
     s = str(text)
     if TOKEN and TOKEN in s:
         s = s.replace(TOKEN, "[REDACTED]")
+
+    # Redact credentials in URLs (e.g. https://user:pass@host)
+    # Pattern: scheme://user:pass@host -> scheme://[REDACTED]@host
+    s = re.sub(r"(https?://)[^/\s@]+@([^/\s]+)", r"\1[REDACTED]@\2", s)
+
     # repr() safely escapes control characters (e.g., \n -> \\n, \x1b -> \\x1b)
     # This prevents log injection and terminal hijacking.
     safe = repr(s)