From 5e9a248012a5eab57cbcffeb9ad93b7c3dd87f6d Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 18 Feb 2026 10:59:24 +0000 Subject: [PATCH 1/6] =?UTF-8?q?=F0=9F=9B=A1=EF=B8=8F=20Sentinel:=20Enforce?= =?UTF-8?q?=20strict=20Content-Type=20validation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Added Content-Type validation to `_gh_get` in `main.py`. - Updated existing tests to comply with new validation. - Added new test file `tests/test_content_type.py`. Co-authored-by: abhimehro <84992105+abhimehro@users.noreply.github.com> --- main.py | 23 +++++++- tests/test_cache_optimization.py | 3 +- tests/test_content_type.py | 99 ++++++++++++++++++++++++++++++++ tests/test_disk_cache.py | 3 +- 4 files changed, 124 insertions(+), 4 deletions(-) create mode 100644 tests/test_content_type.py diff --git a/main.py b/main.py index 1f4f0476..16276351 100644 --- a/main.py +++ b/main.py @@ -937,7 +937,16 @@ def _gh_get(url: str) -> Dict: headers = {} with _gh.stream("GET", url, headers=headers) as r_retry: r_retry.raise_for_status() - + + # Security: Enforce Content-Type validation on retry + ct = r_retry.headers.get("content-type", "").lower() + allowed_types = ("application/json", "text/json", "text/plain") + if not any(t in ct for t in allowed_types): + raise ValueError( + f"Invalid Content-Type from {sanitize_for_log(url)}: {ct}. " + f"Expected one of: {', '.join(allowed_types)}" + ) + # 1. Check Content-Length header if present cl = r_retry.headers.get("Content-Length") if cl: @@ -993,7 +1002,17 @@ def _gh_get(url: str) -> Dict: return data r.raise_for_status() - + + # Security: Enforce Content-Type to be JSON or text + # This prevents processing of unexpected content (e.g., HTML from captive portals) + ct = r.headers.get("content-type", "").lower() + allowed_types = ("application/json", "text/json", "text/plain") + if not any(t in ct for t in allowed_types): + raise ValueError( + f"Invalid Content-Type from {sanitize_for_log(url)}: {ct}. " + f"Expected one of: {', '.join(allowed_types)}" + ) + # 1. Check Content-Length header if present cl = r.headers.get("Content-Length") if cl: diff --git a/tests/test_cache_optimization.py b/tests/test_cache_optimization.py index 02426061..06ad5f10 100644 --- a/tests/test_cache_optimization.py +++ b/tests/test_cache_optimization.py @@ -11,6 +11,7 @@ from unittest.mock import patch, MagicMock import sys import os +import httpx # Add root to path to import main sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -221,7 +222,7 @@ def mock_stream_get(method, url, headers=None): mock_response = MagicMock() mock_response.status_code = 200 mock_response.raise_for_status = MagicMock() - mock_response.headers = {"Content-Length": "100"} + mock_response.headers = httpx.Headers({"Content-Length": "100", "Content-Type": "application/json"}) # Return JSON bytes properly json_bytes = b'{"group": {"group": "Test Folder"}, "domains": ["example.com"]}' mock_response.iter_bytes = MagicMock(return_value=[json_bytes]) diff --git a/tests/test_content_type.py b/tests/test_content_type.py new file mode 100644 index 00000000..30cddf89 --- /dev/null +++ b/tests/test_content_type.py @@ -0,0 +1,99 @@ + +import unittest +from unittest.mock import patch, MagicMock +import sys +import os +import json + +# Add root to path to import main +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import main +import httpx + +class TestContentTypeValidation(unittest.TestCase): + def setUp(self): + # Clear cache before each test + main._cache.clear() + main._disk_cache.clear() + + @patch('main._gh.stream') + def test_allow_application_json(self, mock_stream): + """Test that application/json is allowed.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = httpx.Headers({'Content-Type': 'application/json'}) + mock_response.iter_bytes.return_value = [b'{"group": {"group": "test"}}'] + mock_response.__enter__.return_value = mock_response + mock_response.__exit__.return_value = None + + mock_stream.return_value = mock_response + + # Should not raise exception + result = main._gh_get("https://example.com/valid.json") + self.assertEqual(result, {"group": {"group": "test"}}) + + @patch('main._gh.stream') + def test_allow_text_plain(self, mock_stream): + """Test that text/plain (used by GitHub raw) is allowed.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = httpx.Headers({'Content-Type': 'text/plain; charset=utf-8'}) + mock_response.iter_bytes.return_value = [b'{"group": {"group": "test"}}'] + mock_response.__enter__.return_value = mock_response + mock_response.__exit__.return_value = None + + mock_stream.return_value = mock_response + + # Should not raise exception + result = main._gh_get("https://example.com/raw.json") + self.assertEqual(result, {"group": {"group": "test"}}) + + @patch('main._gh.stream') + def test_reject_text_html(self, mock_stream): + """Test that text/html is rejected even if content is valid JSON.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = httpx.Headers({'Content-Type': 'text/html'}) + # Even if the body is valid JSON, the Content-Type is wrong + mock_response.iter_bytes.return_value = [b'{"group": {"group": "test"}}'] + mock_response.__enter__.return_value = mock_response + mock_response.__exit__.return_value = None + + mock_stream.return_value = mock_response + + # This should fail after we implement the fix. + # Currently it might pass because we only check JSON validity. + try: + main._gh_get("https://example.com/malicious.html") + # If it doesn't raise, we fail the test (once fixed) + # But for TDD, we expect this to fail AFTER the fix. + # For now, let's assert that it *should* raise ValueError + except ValueError as e: + self.assertIn("Invalid Content-Type", str(e)) + return + + # If we are here, no exception was raised. + # This confirms the vulnerability (or lack of validation). + # We can mark this as "expected failure" or just print it. + # For now, I'll fail the test so I can see it pass later. + self.fail("Should have raised ValueError for text/html Content-Type") + + @patch('main._gh.stream') + def test_reject_xml(self, mock_stream): + """Test that application/xml is rejected.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = httpx.Headers({'Content-Type': 'application/xml'}) + mock_response.iter_bytes.return_value = [b'{"group": {"group": "test"}}'] + mock_response.__enter__.return_value = mock_response + mock_response.__exit__.return_value = None + + mock_stream.return_value = mock_response + + with self.assertRaises(ValueError) as cm: + main._gh_get("https://example.com/data.xml") + self.assertIn("Invalid Content-Type", str(cm.exception)) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_disk_cache.py b/tests/test_disk_cache.py index fa916cdf..1bf0fa1c 100644 --- a/tests/test_disk_cache.py +++ b/tests/test_disk_cache.py @@ -17,6 +17,7 @@ from pathlib import Path from unittest.mock import MagicMock, patch import sys +import httpx # Add root to path to import main sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -209,7 +210,7 @@ def mock_stream(method, url, headers=None): mock_response = MagicMock() mock_response.status_code = 200 mock_response.raise_for_status = MagicMock() - mock_response.headers = {"Content-Length": "100", "ETag": "test123"} + mock_response.headers = httpx.Headers({"Content-Length": "100", "ETag": "test123", "Content-Type": "application/json"}) json_bytes = json.dumps(test_data).encode() mock_response.iter_bytes = MagicMock(return_value=[json_bytes]) mock_response.__enter__ = MagicMock(return_value=mock_response) From b4b98a8b730081462e9a7eeff811f7ccffc10994 Mon Sep 17 00:00:00 2001 From: Abhi Mehrotra Date: Wed, 18 Feb 2026 13:10:49 -0600 Subject: [PATCH 2/6] Update main.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- main.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/main.py b/main.py index 16276351..736a4bad 100644 --- a/main.py +++ b/main.py @@ -938,14 +938,33 @@ def _gh_get(url: str) -> Dict: with _gh.stream("GET", url, headers=headers) as r_retry: r_retry.raise_for_status() + # Security helper: centralize Content-Type validation so that + # all call sites use identical rules and error handling. + def _validate_content_type( + headers: httpx.Headers, + url: str, + allowed_types: Sequence[str] = ( + "application/json", + "text/json", + "text/plain", + ), + ) -> None: + """ + Validate that the response Content-Type is one of the expected types. + + This helper exists to keep Content-Type checks consistent across + code paths. If we ever need to adjust the allowed types or + error messaging, we only change it here. + """ + ct = headers.get("content-type", "").lower() + if not any(t in ct for t in allowed_types): + raise ValueError( + f"Invalid Content-Type from {sanitize_for_log(url)}: {ct}. " + f"Expected one of: {', '.join(allowed_types)}" + ) + # Security: Enforce Content-Type validation on retry - ct = r_retry.headers.get("content-type", "").lower() - allowed_types = ("application/json", "text/json", "text/plain") - if not any(t in ct for t in allowed_types): - raise ValueError( - f"Invalid Content-Type from {sanitize_for_log(url)}: {ct}. " - f"Expected one of: {', '.join(allowed_types)}" - ) + _validate_content_type(r_retry.headers, url) # 1. Check Content-Length header if present cl = r_retry.headers.get("Content-Length") From 8a4b0f44b7acf7e2fd118bb12b931f9436a1a2d2 Mon Sep 17 00:00:00 2001 From: Abhi Mehrotra Date: Wed, 18 Feb 2026 13:10:56 -0600 Subject: [PATCH 3/6] Update test_content_type.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_content_type.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/test_content_type.py b/tests/test_content_type.py index 30cddf89..867aac12 100644 --- a/tests/test_content_type.py +++ b/tests/test_content_type.py @@ -95,5 +95,20 @@ def test_reject_xml(self, mock_stream): main._gh_get("https://example.com/data.xml") self.assertIn("Invalid Content-Type", str(cm.exception)) + @patch('main._gh.stream') + def test_allow_text_json(self, mock_stream): + """Test that text/json is allowed and parsed as JSON.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = httpx.Headers({'Content-Type': 'text/json; charset=utf-8'}) + mock_response.iter_bytes.return_value = [b'{"group": {"group": "test"}}'] + mock_response.__enter__.return_value = mock_response + mock_response.__exit__.return_value = None + + mock_stream.return_value = mock_response + + # Should not raise exception and should parse JSON correctly + result = main._gh_get("https://example.com/data.json") + self.assertEqual(result, {"group": {"group": "test"}}) if __name__ == '__main__': unittest.main() From c2cd1440336d191f2ce2c69c04878f01a1edcfaf Mon Sep 17 00:00:00 2001 From: Abhi Mehrotra Date: Wed, 18 Feb 2026 13:11:05 -0600 Subject: [PATCH 4/6] Update test_content_type.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_content_type.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/test_content_type.py b/tests/test_content_type.py index 867aac12..7f3c0dda 100644 --- a/tests/test_content_type.py +++ b/tests/test_content_type.py @@ -96,6 +96,35 @@ def test_reject_xml(self, mock_stream): self.assertIn("Invalid Content-Type", str(cm.exception)) @patch('main._gh.stream') + def test_304_retry_with_invalid_content_type(self, mock_stream): + """Ensure Content-Type validation also applies after a 304 retry path.""" + # First response: 304 Not Modified with no cached body. This should + # force _gh_get to enter its retry logic and perform a second request. + mock_304 = MagicMock() + mock_304.status_code = 304 + mock_304.headers = httpx.Headers() + mock_304.iter_bytes.return_value = [b''] + mock_304.__enter__.return_value = mock_304 + mock_304.__exit__.return_value = None + + # Second response: 200 OK but with an invalid Content-Type that should + # be rejected even though the body contains valid JSON. + mock_invalid_ct = MagicMock() + mock_invalid_ct.status_code = 200 + mock_invalid_ct.headers = httpx.Headers({'Content-Type': 'text/html'}) + mock_invalid_ct.iter_bytes.return_value = [b'{"group": {"group": "test"}}'] + mock_invalid_ct.__enter__.return_value = mock_invalid_ct + mock_invalid_ct.__exit__.return_value = None + + # Simulate the retry sequence: first a 304, then the invalid 200. + mock_stream.side_effect = [mock_304, mock_invalid_ct] + + # The final 200 response should still be subject to Content-Type + # validation, causing _gh_get to raise a ValueError. + with self.assertRaises(ValueError) as cm: + main._gh_get("https://example.com/retry.json") + self.assertIn("Invalid Content-Type", str(cm.exception)) + @patch('main._gh.stream') def test_allow_text_json(self, mock_stream): """Test that text/json is allowed and parsed as JSON.""" mock_response = MagicMock() From edc91779b6868d366c566702b70d3cce0d945b03 Mon Sep 17 00:00:00 2001 From: Abhi Mehrotra Date: Wed, 18 Feb 2026 13:11:14 -0600 Subject: [PATCH 5/6] Update test_content_type.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_content_type.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/tests/test_content_type.py b/tests/test_content_type.py index 7f3c0dda..1853c3c4 100644 --- a/tests/test_content_type.py +++ b/tests/test_content_type.py @@ -62,24 +62,15 @@ def test_reject_text_html(self, mock_stream): mock_stream.return_value = mock_response - # This should fail after we implement the fix. - # Currently it might pass because we only check JSON validity. - try: + with self.assertRaises(ValueError) as cm: main._gh_get("https://example.com/malicious.html") - # If it doesn't raise, we fail the test (once fixed) - # But for TDD, we expect this to fail AFTER the fix. - # For now, let's assert that it *should* raise ValueError - except ValueError as e: - self.assertIn("Invalid Content-Type", str(e)) - return - - # If we are here, no exception was raised. - # This confirms the vulnerability (or lack of validation). - # We can mark this as "expected failure" or just print it. - # For now, I'll fail the test so I can see it pass later. - self.fail("Should have raised ValueError for text/html Content-Type") + self.assertIn("Invalid Content-Type", str(cm.exception)) @patch('main._gh.stream') + def test_reject_xml(self, mock_stream): + """Test that application/xml is rejected.""" + mock_response = MagicMock() + mock_response.status_code = 200 def test_reject_xml(self, mock_stream): """Test that application/xml is rejected.""" mock_response = MagicMock() From 795ff6f67176c76edfed0d3ab863765d66fe67c4 Mon Sep 17 00:00:00 2001 From: Abhi Mehrotra Date: Wed, 18 Feb 2026 13:11:19 -0600 Subject: [PATCH 6/6] Update test_content_type.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_content_type.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/test_content_type.py b/tests/test_content_type.py index 1853c3c4..a814aec4 100644 --- a/tests/test_content_type.py +++ b/tests/test_content_type.py @@ -87,6 +87,23 @@ def test_reject_xml(self, mock_stream): self.assertIn("Invalid Content-Type", str(cm.exception)) @patch('main._gh.stream') + def test_reject_missing_content_type(self, mock_stream): + """Test that responses without a Content-Type header are rejected.""" + mock_response = MagicMock() + mock_response.status_code = 200 + # Simulate a response with no Content-Type header at all + mock_response.headers = httpx.Headers({}) + # Body is valid JSON so failure should be due to missing header, not parsing + mock_response.iter_bytes.return_value = [b'{"group": {"group": "test"}}'] + mock_response.__enter__.return_value = mock_response + mock_response.__exit__.return_value = None + + mock_stream.return_value = mock_response + + with self.assertRaises(ValueError) as cm: + main._gh_get("https://example.com/no-header") + self.assertIn("Invalid Content-Type", str(cm.exception)) + @patch('main._gh.stream') def test_304_retry_with_invalid_content_type(self, mock_stream): """Ensure Content-Type validation also applies after a 304 retry path.""" # First response: 304 Not Modified with no cached body. This should