From 5fb18504c1db76873f5980311ff3f39a526c0b5e Mon Sep 17 00:00:00 2001 From: anvil Date: Wed, 22 Apr 2026 16:07:41 +0000 Subject: [PATCH] fix(context): reject app shells for binary downloads --- ax_cli/commands/context.py | 34 +++++++++++++++++++++++ tests/test_context_commands.py | 51 ++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/ax_cli/commands/context.py b/ax_cli/commands/context.py index 30a60d7..a8b6ffa 100644 --- a/ax_cli/commands/context.py +++ b/ax_cli/commands/context.py @@ -110,6 +110,39 @@ def _context_file_payload(data: dict, key: str) -> dict: } +def _looks_like_html(content: bytes) -> bool: + prefix = content[:512].lstrip().lower() + return prefix.startswith(b" None: + expected_content_type = str(payload.get("content_type") or "").split(";", 1)[0].strip().lower() + if _is_text_like(payload): + return + + headers = getattr(response, "headers", {}) or {} + actual_content_type = str(headers.get("content-type") or "").split(";", 1)[0].strip().lower() + content = getattr(response, "content", b"") or b"" + suspicious_text_response = ( + actual_content_type.startswith("text/") + or actual_content_type in TEXT_CONTENT_TYPES + or actual_content_type == "application/json" + or _looks_like_html(content) + ) + if not suspicious_text_response: + return + + preview = content[:160].decode("utf-8", errors="replace").strip().replace("\n", " ") + filename = payload.get("filename") or "context artifact" + expected_label = expected_content_type or "binary file" + actual_label = actual_content_type or "unknown content-type" + raise ValueError( + f"Expected {filename} to download as {expected_label}, but {download_url} returned " + f"{actual_label} instead. This usually means the upload URL resolved to an app shell " + f"or error page instead of file bytes. Response preview: {preview}" + ) + + def _fetch_context_file(client, sid: str | None, payload: dict) -> bytes: url = payload.get("url", "") if not url: @@ -123,6 +156,7 @@ def _fetch_context_file(client, sid: str | None, payload: dict) -> bytes: # into a 404 after the user switches spaces. response = http.get(download_url) response.raise_for_status() + _validate_context_file_response(payload, response, download_url) return response.content diff --git a/tests/test_context_commands.py b/tests/test_context_commands.py index 6a1fb58..03da96b 100644 --- a/tests/test_context_commands.py +++ b/tests/test_context_commands.py @@ -77,6 +77,57 @@ def get(self, url, params=None): assert calls["follow_redirects"] is True +def test_context_download_rejects_html_shell_for_binary_payload(monkeypatch, tmp_path): + class FakeClient: + base_url = "https://paxai.app" + + def get_context(self, key, *, space_id=None): + assert key == "image.png" + return { + "value": { + "type": "file_upload", + "filename": "image.png", + "content_type": "image/png", + "url": "/api/v1/uploads/files/image.png", + } + } + + def _auth_headers(self): + return {"Authorization": "Bearer exchanged.jwt"} + + class FakeResponse: + headers = {"content-type": "text/html; charset=utf-8"} + content = b"app shell" + + def raise_for_status(self): + return None + + class FakeHttpClient: + def __init__(self, *, headers, timeout, follow_redirects): + pass + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return None + + def get(self, url, params=None): + return FakeResponse() + + monkeypatch.setattr(context, "get_client", lambda: FakeClient()) + monkeypatch.setattr(context, "resolve_space_id", lambda client, explicit=None: "space-1") + monkeypatch.setattr(context.httpx, "Client", FakeHttpClient) + + output = tmp_path / "downloaded.png" + result = runner.invoke(app, ["context", "download", "image.png", "--output", str(output)]) + + assert result.exit_code == 1 + assert "returned text/html instead" in result.output + assert "app shell" in result.output + assert not output.exists() + + def test_context_load_fetches_to_preview_cache(monkeypatch, tmp_path): calls = {}