From dba017a42c918bbda280010820f75f0ab4d0811b Mon Sep 17 00:00:00 2001 From: mesutoezdil Date: Mon, 15 Jun 2026 21:51:24 +0200 Subject: [PATCH 1/6] fix(python): add encoding=utf-8 to all file reads and writes in sandbox.py read_text(), fdopen() without encoding use the system locale, which can differ on non-UTF-8 systems. All config files (metadata.json, oidc_token.json, active_gateway) are UTF-8 text. --- python/openshell/sandbox.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/openshell/sandbox.py b/python/openshell/sandbox.py index 4c98a4155..ea4234323 100644 --- a/python/openshell/sandbox.py +++ b/python/openshell/sandbox.py @@ -318,7 +318,7 @@ def from_active_cluster( gateway_dir = _xdg_config_home() / "openshell" / "gateways" / cluster_name metadata_path = gateway_dir / "metadata.json" try: - metadata = json.loads(metadata_path.read_text()) + metadata = json.loads(metadata_path.read_text(encoding="utf-8")) except FileNotFoundError: raise SandboxError(f"gateway '{cluster_name}' not found") from None if "gateway_endpoint" not in metadata: @@ -846,7 +846,7 @@ def _read_oidc_token_bundle(gateway_dir: pathlib.Path) -> dict | None: """ token_path = gateway_dir / "oidc_token.json" try: - return json.loads(token_path.read_text()) + return json.loads(token_path.read_text(encoding="utf-8")) except FileNotFoundError: return None except (OSError, json.JSONDecodeError): @@ -1299,7 +1299,7 @@ def _write_to_disk(self, bundle: dict) -> None: ) tmp_path = pathlib.Path(tmp_name) try: - with os.fdopen(fd, "w") as f: + with os.fdopen(fd, "w", encoding="utf-8") as f: f.write(payload) with contextlib.suppress(OSError): tmp_path.chmod(0o600) @@ -1374,7 +1374,7 @@ def _resolve_active_cluster() -> str: return env_gateway active_file = _xdg_config_home() / "openshell" / "active_gateway" try: - value = active_file.read_text().strip() + value = active_file.read_text(encoding="utf-8").strip() except FileNotFoundError: raise SandboxError("no active gateway configured") from None if value == "": From c7b876bc2a4288aa2c71f76f99387d8080ee9b03 Mon Sep 17 00:00:00 2001 From: mesutoezdil Date: Thu, 18 Jun 2026 16:51:37 +0200 Subject: [PATCH 2/6] fix(python): catch UnicodeDecodeError in _read_oidc_token_bundle, add encoding tests Corrupt or non-UTF-8 oidc_token.json now returns None consistently. Add regression tests for non-ASCII UTF-8 paths in metadata, oidc token, and active_gateway files. --- python/openshell/sandbox.py | 2 +- python/openshell/sandbox_test.py | 39 ++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/python/openshell/sandbox.py b/python/openshell/sandbox.py index ea4234323..3b64089de 100644 --- a/python/openshell/sandbox.py +++ b/python/openshell/sandbox.py @@ -849,7 +849,7 @@ def _read_oidc_token_bundle(gateway_dir: pathlib.Path) -> dict | None: return json.loads(token_path.read_text(encoding="utf-8")) except FileNotFoundError: return None - except (OSError, json.JSONDecodeError): + except (OSError, UnicodeDecodeError, json.JSONDecodeError): return None diff --git a/python/openshell/sandbox_test.py b/python/openshell/sandbox_test.py index 175472ca7..743b9b0cf 100644 --- a/python/openshell/sandbox_test.py +++ b/python/openshell/sandbox_test.py @@ -24,6 +24,7 @@ _make_cluster_bearer_provider, _normalize_bearer, _OidcRefresher, + _read_oidc_token_bundle, ) @@ -1345,3 +1346,41 @@ def test_inference_set_cluster_forwards_no_verify_flag() -> None: assert stub.request is not None assert stub.request.no_verify is True + + +# --------------------------------------------------------------------------- +# Encoding regression tests (utf-8 explicit on all config file reads/writes) +# --------------------------------------------------------------------------- + +def test_read_oidc_token_bundle_parses_non_ascii_utf8(tmp_path: Path) -> None: + gateway_dir = tmp_path / "gw" + gateway_dir.mkdir() + payload = {"refresh_token": "tok", "issuer": "https://example.com/é"} + (gateway_dir / "oidc_token.json").write_bytes( + json.dumps(payload).encode("utf-8") + ) + result = _read_oidc_token_bundle(gateway_dir) + assert result == payload + + +def test_read_oidc_token_bundle_returns_none_on_corrupt_bytes(tmp_path: Path) -> None: + gateway_dir = tmp_path / "gw" + gateway_dir.mkdir() + (gateway_dir / "oidc_token.json").write_bytes(b"\xff\xfe not utf-8") + assert _read_oidc_token_bundle(gateway_dir) is None + + +def test_load_cluster_bearer_token_handles_non_ascii_utf8_oidc(tmp_path: Path) -> None: + gateway_dir = tmp_path / "gw" + gateway_dir.mkdir() + bundle = { + "access_token": "accéss", + "refresh_token": "ref", + "expiry": "2099-01-01T00:00:00Z", + "issuer": "https://example.com", + "client_id": "c", + "client_secret": "s", + } + (gateway_dir / "oidc_token.json").write_bytes(json.dumps(bundle).encode("utf-8")) + token = _load_cluster_bearer_token(gateway_dir) + assert token == "accéss" From 7ac0153a542edcab30efb2d6ee338001700c0e06 Mon Sep 17 00:00:00 2001 From: mesutoezdil Date: Thu, 18 Jun 2026 17:01:21 +0200 Subject: [PATCH 3/6] fix(python): use ensure_ascii=False in encoding tests, add from_active_cluster UTF-8 bytes test --- python/openshell/sandbox_test.py | 33 ++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/python/openshell/sandbox_test.py b/python/openshell/sandbox_test.py index 743b9b0cf..f4389b1bb 100644 --- a/python/openshell/sandbox_test.py +++ b/python/openshell/sandbox_test.py @@ -1357,7 +1357,7 @@ def test_read_oidc_token_bundle_parses_non_ascii_utf8(tmp_path: Path) -> None: gateway_dir.mkdir() payload = {"refresh_token": "tok", "issuer": "https://example.com/é"} (gateway_dir / "oidc_token.json").write_bytes( - json.dumps(payload).encode("utf-8") + json.dumps(payload, ensure_ascii=False).encode("utf-8") ) result = _read_oidc_token_bundle(gateway_dir) assert result == payload @@ -1381,6 +1381,35 @@ def test_load_cluster_bearer_token_handles_non_ascii_utf8_oidc(tmp_path: Path) - "client_id": "c", "client_secret": "s", } - (gateway_dir / "oidc_token.json").write_bytes(json.dumps(bundle).encode("utf-8")) + (gateway_dir / "oidc_token.json").write_bytes(json.dumps(bundle, ensure_ascii=False).encode("utf-8")) token = _load_cluster_bearer_token(gateway_dir) assert token == "accéss" + + +def test_from_active_cluster_reads_utf8_bytes_from_active_gateway_and_metadata( + tmp_path: Path, + monkeypatch: Any, +) -> None: + gateway_name = "gw-utf8" + gateway_dir = tmp_path / "openshell" / "gateways" / gateway_name + mtls_dir = gateway_dir / "mtls" + mtls_dir.mkdir(parents=True) + (tmp_path / "openshell" / "active_gateway").write_bytes( + gateway_name.encode("utf-8") + ) + meta = {"gateway_endpoint": "https://127.0.0.1:8443", "note": "café"} + (gateway_dir / "metadata.json").write_bytes( + json.dumps(meta, ensure_ascii=False).encode("utf-8") + ) + (mtls_dir / "ca.crt").write_bytes(b"ca") + (mtls_dir / "tls.crt").write_bytes(b"cert") + (mtls_dir / "tls.key").write_bytes(b"key") + + monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path)) + monkeypatch.delenv("OPENSHELL_GATEWAY", raising=False) + + client = SandboxClient.from_active_cluster() + try: + assert client._cluster_name == gateway_name + finally: + client.close() From 0cf3fe8e77d044c7d1f55351e81847a273c5fc6d Mon Sep 17 00:00:00 2001 From: mesutoezdil Date: Thu, 18 Jun 2026 17:17:39 +0200 Subject: [PATCH 4/6] fix(python): assert non-ASCII cluster name and endpoint in encoding test --- python/openshell/sandbox_test.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/python/openshell/sandbox_test.py b/python/openshell/sandbox_test.py index f4389b1bb..0145136db 100644 --- a/python/openshell/sandbox_test.py +++ b/python/openshell/sandbox_test.py @@ -1390,20 +1390,16 @@ def test_from_active_cluster_reads_utf8_bytes_from_active_gateway_and_metadata( tmp_path: Path, monkeypatch: Any, ) -> None: - gateway_name = "gw-utf8" + gateway_name = "gw-é" gateway_dir = tmp_path / "openshell" / "gateways" / gateway_name - mtls_dir = gateway_dir / "mtls" - mtls_dir.mkdir(parents=True) + gateway_dir.mkdir(parents=True) (tmp_path / "openshell" / "active_gateway").write_bytes( gateway_name.encode("utf-8") ) - meta = {"gateway_endpoint": "https://127.0.0.1:8443", "note": "café"} + meta = {"gateway_endpoint": "http://tést.example:8080"} (gateway_dir / "metadata.json").write_bytes( json.dumps(meta, ensure_ascii=False).encode("utf-8") ) - (mtls_dir / "ca.crt").write_bytes(b"ca") - (mtls_dir / "tls.crt").write_bytes(b"cert") - (mtls_dir / "tls.key").write_bytes(b"key") monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path)) monkeypatch.delenv("OPENSHELL_GATEWAY", raising=False) @@ -1411,5 +1407,6 @@ def test_from_active_cluster_reads_utf8_bytes_from_active_gateway_and_metadata( client = SandboxClient.from_active_cluster() try: assert client._cluster_name == gateway_name + assert client._endpoint == "tést.example:8080" finally: client.close() From eff441b5303fc2d5b65b4f1b58be598fdb5b47ac Mon Sep 17 00:00:00 2001 From: mesutoezdil Date: Thu, 18 Jun 2026 17:39:59 +0200 Subject: [PATCH 5/6] fix(python): wrap long write_bytes line for ruff format --- python/openshell/sandbox_test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/openshell/sandbox_test.py b/python/openshell/sandbox_test.py index 0145136db..499a323c5 100644 --- a/python/openshell/sandbox_test.py +++ b/python/openshell/sandbox_test.py @@ -1381,7 +1381,9 @@ def test_load_cluster_bearer_token_handles_non_ascii_utf8_oidc(tmp_path: Path) - "client_id": "c", "client_secret": "s", } - (gateway_dir / "oidc_token.json").write_bytes(json.dumps(bundle, ensure_ascii=False).encode("utf-8")) + (gateway_dir / "oidc_token.json").write_bytes( + json.dumps(bundle, ensure_ascii=False).encode("utf-8") + ) token = _load_cluster_bearer_token(gateway_dir) assert token == "accéss" From a6bb8095f4906251f1376b12c16213b9d5c352e0 Mon Sep 17 00:00:00 2001 From: mesutoezdil Date: Thu, 18 Jun 2026 18:22:11 +0200 Subject: [PATCH 6/6] fix(python): apply ruff format to sandbox_test.py --- python/openshell/sandbox_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/openshell/sandbox_test.py b/python/openshell/sandbox_test.py index 499a323c5..82e86eefc 100644 --- a/python/openshell/sandbox_test.py +++ b/python/openshell/sandbox_test.py @@ -1352,6 +1352,7 @@ def test_inference_set_cluster_forwards_no_verify_flag() -> None: # Encoding regression tests (utf-8 explicit on all config file reads/writes) # --------------------------------------------------------------------------- + def test_read_oidc_token_bundle_parses_non_ascii_utf8(tmp_path: Path) -> None: gateway_dir = tmp_path / "gw" gateway_dir.mkdir()