From bbc88d9585acbb3ed42dbd78fbc87b2b9a877ac4 Mon Sep 17 00:00:00 2001 From: xTRam1 Date: Thu, 16 Apr 2026 16:52:56 -0700 Subject: [PATCH 01/13] Add Python agent run trace Add structured tracing for custom Python agents so their execution surfaces on the Narada observability dashboard alongside GUI-built custom agents. narada-core: - New PythonAgentRunTrace step type + PythonTraceEvent discriminated union covering stdout, stderr, sub-agent calls, extension actions, and side effects. Added to the ApaStepTrace union; parse_action_trace handles it transparently. narada-pyodide: - New private _trace.py module with bounded-size summarisation of extension action requests/responses and per-event emitters (emit_sub_agent_call, emit_extension_action, emit_side_effect). - Instrument dispatch_request() to emit one subAgentCall event per invocation, covering success/error/timeout paths. - Instrument _run_extension_action() to emit one extensionAction event per call, with action_name keyed off the request discriminator. - Instrument download_file / render_html in utils.py to emit sideEffect events. - 38 unit tests exercise summarisation, truncation, emitter shapes, and Pydantic round-trip via parse_action_trace. Version bumps (coupled to avoid parse_action_trace ValidationError for external narada users whose traces may contain pythonAgentRun nodes): - narada-core: 0.0.17 -> 0.0.18 - narada-pyodide: 0.0.43 -> 0.0.44 - narada: 0.1.42 -> 0.1.43 (repin narada-core==0.0.18 only) Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/narada-core/pyproject.toml | 2 +- .../src/narada_core/actions/models.py | 76 ++- packages/narada-pyodide/pyproject.toml | 6 +- packages/narada-pyodide/src/narada/_trace.py | 236 ++++++++++ packages/narada-pyodide/src/narada/utils.py | 10 + packages/narada-pyodide/src/narada/window.py | 155 +++++-- packages/narada-pyodide/tests/README.md | 20 + packages/narada-pyodide/tests/__init__.py | 0 packages/narada-pyodide/tests/conftest.py | 56 +++ packages/narada-pyodide/tests/test_trace.py | 432 ++++++++++++++++++ packages/narada/pyproject.toml | 4 +- uv.lock | 14 +- 12 files changed, 961 insertions(+), 50 deletions(-) create mode 100644 packages/narada-pyodide/src/narada/_trace.py create mode 100644 packages/narada-pyodide/tests/README.md create mode 100644 packages/narada-pyodide/tests/__init__.py create mode 100644 packages/narada-pyodide/tests/conftest.py create mode 100644 packages/narada-pyodide/tests/test_trace.py diff --git a/packages/narada-core/pyproject.toml b/packages/narada-core/pyproject.toml index be162df..e47b7c8 100644 --- a/packages/narada-core/pyproject.toml +++ b/packages/narada-core/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "narada-core" -version = "0.0.17" +version = "0.0.18" description = "Code shared by the `narada` and `narada-pyodide` packages." license = "Apache-2.0" readme = "README.md" diff --git a/packages/narada-core/src/narada_core/actions/models.py b/packages/narada-core/src/narada_core/actions/models.py index 6e68db6..c566d75 100644 --- a/packages/narada-core/src/narada_core/actions/models.py +++ b/packages/narada-core/src/narada_core/actions/models.py @@ -214,6 +214,79 @@ class OutputTrace(BaseModel): description: str +# --------------------------------------------------------------------------- +# Python agent run trace: emitted by CustomPythonAgentRunnable for custom +# Python agents executed in the browser Pyodide runtime. A single +# PythonAgentRunTrace wraps the full agent's execution; its `events` list is +# a chronologically sorted timeline of stdout / stderr / SDK call events. +# --------------------------------------------------------------------------- + + +class PythonStdoutEvent(BaseModel): + kind: Literal["stdout"] = "stdout" + ts: int + text: str + + +class PythonStderrEvent(BaseModel): + kind: Literal["stderr"] = "stderr" + ts: int + text: str + + +class PythonSubAgentCallEvent(BaseModel): + kind: Literal["subAgentCall"] = "subAgentCall" + ts_start: int + ts_end: int + agent_type: str + prompt: str + status: Literal["success", "error", "timeout"] + request_id: str | None = None + error_message: str | None = None + action_trace: ActionTrace | None = None + + +class PythonExtensionActionEvent(BaseModel): + kind: Literal["extensionAction"] = "extensionAction" + ts_start: int + ts_end: int + # Matches the snake_case `name` discriminator on ExtensionActionRequest + # (e.g. "go_to_url", "get_screenshot"). Carried as a plain string rather + # than a Literal so adding a new extension action in the future does not + # require a parse-time migration of historical trace data. + action_name: str + request_summary: dict[str, Any] + result_summary: dict[str, Any] | None = None + status: Literal["success", "error", "timeout"] + error_message: str | None = None + + +class PythonSideEffectEvent(BaseModel): + kind: Literal["sideEffect"] = "sideEffect" + ts: int + effect_type: Literal["download_file", "render_html"] + description: str + + +PythonTraceEvent = Annotated[ + PythonStdoutEvent + | PythonStderrEvent + | PythonSubAgentCallEvent + | PythonExtensionActionEvent + | PythonSideEffectEvent, + Field(discriminator="kind"), +] + + +class PythonAgentRunTrace(BaseModel): + step_type: Literal["pythonAgentRun"] = "pythonAgentRun" + url: str + status: Literal["success", "error", "aborted"] + duration_ms: int + events: list[PythonTraceEvent] + error_message: str | None = None + + ApaStepTrace = Annotated[ GoToUrlTrace | GetUrlTrace @@ -243,7 +316,8 @@ class OutputTrace(BaseModel): | DataTableInsertRowTrace | DataTableUpdateCellValueTrace | ObjectSetPropertiesTrace - | OutputTrace, + | OutputTrace + | PythonAgentRunTrace, Field(discriminator="step_type"), ] diff --git a/packages/narada-pyodide/pyproject.toml b/packages/narada-pyodide/pyproject.toml index 655d588..bf33ccb 100644 --- a/packages/narada-pyodide/pyproject.toml +++ b/packages/narada-pyodide/pyproject.toml @@ -1,14 +1,14 @@ [project] name = "narada-pyodide" -version = "0.0.43" +version = "0.0.44" description = "Pyodide-compatible Python client SDK for Narada" license = "Apache-2.0" readme = "README.md" authors = [{ name = "Narada", email = "support@narada.ai" }] requires-python = ">=3.12" dependencies = [ - "narada-core==0.0.17", + "narada-core==0.0.18", # Must be a supported version in https://pyodide.org/en/stable/usage/packages-in-pyodide.html "packaging==24.2", ] @@ -23,7 +23,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [dependency-groups] -dev = ["pyodide-py>=0.27.7"] +dev = ["pyodide-py>=0.27.7", "pytest>=8.4.1"] [tool.hatch.build.targets.wheel] packages = ["src/narada"] diff --git a/packages/narada-pyodide/src/narada/_trace.py b/packages/narada-pyodide/src/narada/_trace.py new file mode 100644 index 0000000..037d10f --- /dev/null +++ b/packages/narada-pyodide/src/narada/_trace.py @@ -0,0 +1,236 @@ +"""Private trace-emission helpers for narada-pyodide. + +This module is used internally by narada-pyodide to forward structured +telemetry (sub-agent invocations, extension actions, side effects) from +Python code running inside the Pyodide worker to the JavaScript harness, +which assembles a ``PythonAgentRunTrace`` that surfaces on the Narada +observability dashboard. + +The module is private: user code should not import from here. The public +surface lives in ``window.py`` and ``utils.py``; instrumentation is applied +at those module boundaries by calling into this module. +""" + +from __future__ import annotations + +import json +import time +from typing import TYPE_CHECKING, Any, Literal + +from narada_core.actions.models import ( + AgenticMouseActionRequest, + AgenticSelectorRequest, + CloseWindowRequest, + ExtensionActionRequest, + GetFullHtmlRequest, + GetScreenshotRequest, + GetSimplifiedHtmlRequest, + GetUrlRequest, + GetUrlResponse, + GoToUrlRequest, + PrintMessageRequest, + ReadGoogleSheetRequest, + ReadGoogleSheetResponse, + WriteGoogleSheetRequest, +) +from pydantic import BaseModel + +if TYPE_CHECKING: + # Injected by the JavaScript harness at worker startup (see + # `frontend/src/lib/apa/python/python.worker.ts`). narada-pyodide is + # only ever imported under a Pyodide worker that has registered this + # builtin; there is no non-Pyodide code path. + def _narada_emit_trace_event(event_json: str) -> None: ... + + +# Hard caps on payload sizes carried in trace events. Values are large enough +# that typical prompts and error messages survive intact but small enough to +# bound worst-case persisted actionTrace JSON. +_MAX_PROMPT_CHARS = 500 +_MAX_MESSAGE_CHARS = 500 +_MAX_ERROR_CHARS = 1000 +_MAX_QUERY_CHARS = 200 + +_ELLIPSIS = "\u2026" + + +def now_ms() -> int: + """Current wall-clock time in integer milliseconds.""" + return int(time.time() * 1000) + + +def truncate(value: str | None, max_chars: int) -> str | None: + """Return ``value`` shortened to at most ``max_chars`` characters, suffixed + with an ellipsis when truncation occurred. Returns ``None`` unchanged.""" + if value is None: + return None + if len(value) <= max_chars: + return value + return value[: max_chars - 1] + _ELLIPSIS + + +def truncate_prompt(prompt: str) -> str: + return truncate(prompt, _MAX_PROMPT_CHARS) or "" + + +def truncate_error(error: str) -> str: + return truncate(error, _MAX_ERROR_CHARS) or "" + + +def emit_trace_event(event: dict[str, Any]) -> None: + """Forward a single trace event to the JavaScript harness. + + The event must be JSON-serialisable and shaped as one of the + ``PythonTraceEvent`` variants defined in ``narada_core.actions.models``. + No validation is performed here; callers construct events directly and + are responsible for matching the schema. + """ + _narada_emit_trace_event(json.dumps(event)) # noqa: F821 + + +def summarize_request(request: ExtensionActionRequest) -> dict[str, Any]: + """Produce a bounded-size summary of an extension action request for + display in the observability dashboard. Large payloads (sheet row values, + selector graphs) are reduced to row counts or action types; free-form + strings are truncated. + + The returned dict is always JSON-serialisable and fits the + ``PythonExtensionActionEvent.request_summary`` field. + """ + if isinstance(request, GoToUrlRequest): + return {"url": request.url, "new_tab": request.new_tab} + if isinstance( + request, + ( + GetUrlRequest, + GetScreenshotRequest, + GetFullHtmlRequest, + GetSimplifiedHtmlRequest, + CloseWindowRequest, + ), + ): + return {} + if isinstance(request, ReadGoogleSheetRequest): + return {"spreadsheet_id": request.spreadsheet_id, "range": request.range} + if isinstance(request, WriteGoogleSheetRequest): + return { + "spreadsheet_id": request.spreadsheet_id, + "range": request.range, + "row_count": len(request.values), + } + if isinstance(request, PrintMessageRequest): + return {"message": truncate(request.message, _MAX_MESSAGE_CHARS)} + if isinstance(request, (AgenticSelectorRequest, AgenticMouseActionRequest)): + return { + "action_type": request.action["type"], + "fallback_operator_query": truncate( + request.fallback_operator_query, _MAX_QUERY_CHARS + ), + } + # ExtensionActionRequest is a closed union today. If a new variant is + # added without updating this function, we degrade gracefully to an empty + # summary rather than crashing the user's agent mid-run. + return {} + + +def summarize_response( + request: ExtensionActionRequest, + response: BaseModel | None, +) -> dict[str, Any] | None: + """Produce a bounded-size summary of an extension action response, keyed + on the originating request type. Returns ``None`` for actions that have + no observable result (writes, navigations, close) so the dashboard can + omit an empty row rather than rendering a hollow card. + """ + if isinstance(request, GetUrlRequest) and isinstance(response, GetUrlResponse): + return {"url": response.url} + if isinstance(request, GetScreenshotRequest): + return {"description": "Took screenshot of the page"} + if isinstance(request, GetFullHtmlRequest): + return {"description": "Got the full HTML of the page"} + if isinstance(request, GetSimplifiedHtmlRequest): + return {"description": "Got the simplified HTML of the page"} + if isinstance(request, ReadGoogleSheetRequest) and isinstance( + response, ReadGoogleSheetResponse + ): + rows = response.values + column_count = max((len(row) for row in rows), default=0) + return {"row_count": len(rows), "column_count": column_count} + return None + + +# --------------------------------------------------------------------------- +# Event emitters +# +# Each emitter builds a JSON-serialisable event shaped to match one of the +# ``PythonTraceEvent`` Pydantic variants in ``narada_core.actions.models`` +# and forwards it to the JavaScript harness. Optional fields are included +# only when non-None so the JSON stays compact. +# --------------------------------------------------------------------------- + + +SubAgentCallStatus = Literal["success", "error", "timeout"] +ExtensionActionStatus = Literal["success", "error", "timeout"] +SideEffectType = Literal["download_file", "render_html"] + + +def emit_sub_agent_call( + *, + ts_start: int, + agent_type: str, + prompt: str, + status: SubAgentCallStatus, + request_id: str | None = None, + error_message: str | None = None, + action_trace_raw: list[dict[str, Any]] | None = None, +) -> None: + event: dict[str, Any] = { + "kind": "subAgentCall", + "ts_start": ts_start, + "ts_end": now_ms(), + "agent_type": agent_type, + "prompt": truncate_prompt(prompt), + "status": status, + } + if request_id is not None: + event["request_id"] = request_id + if error_message is not None: + event["error_message"] = truncate_error(error_message) + if action_trace_raw is not None: + event["action_trace"] = action_trace_raw + emit_trace_event(event) + + +def emit_extension_action( + *, + ts_start: int, + request: ExtensionActionRequest, + status: ExtensionActionStatus, + response: BaseModel | None = None, + error_message: str | None = None, +) -> None: + event: dict[str, Any] = { + "kind": "extensionAction", + "ts_start": ts_start, + "ts_end": now_ms(), + "action_name": request.name, + "request_summary": summarize_request(request), + "status": status, + } + result_summary = summarize_response(request, response) + if result_summary is not None: + event["result_summary"] = result_summary + if error_message is not None: + event["error_message"] = truncate_error(error_message) + emit_trace_event(event) + + +def emit_side_effect(*, effect_type: SideEffectType, description: str) -> None: + emit_trace_event( + { + "kind": "sideEffect", + "ts": now_ms(), + "effect_type": effect_type, + "description": description, + } + ) diff --git a/packages/narada-pyodide/src/narada/utils.py b/packages/narada-pyodide/src/narada/utils.py index 68778c4..1107ad9 100644 --- a/packages/narada-pyodide/src/narada/utils.py +++ b/packages/narada-pyodide/src/narada/utils.py @@ -1,5 +1,7 @@ from typing import TYPE_CHECKING +from . import _trace + if TYPE_CHECKING: # Magic functions injected by the JavaScript harness. def _narada_render_html(html: str) -> None: ... @@ -18,6 +20,10 @@ def download_file(filename: str, content: str | bytes) -> None: If bytes, writes in binary mode. """ _narada_download_file(filename, content) + _trace.emit_side_effect( + effect_type="download_file", + description=f"Downloaded file: {filename}", + ) def render_html(html: str) -> None: @@ -28,3 +34,7 @@ def render_html(html: str) -> None: html: The HTML content to render. """ _narada_render_html(html) + _trace.emit_side_effect( + effect_type="render_html", + description="Rendered HTML in a new tab", + ) diff --git a/packages/narada-pyodide/src/narada/window.py b/packages/narada-pyodide/src/narada/window.py index df6bfa1..509499c 100644 --- a/packages/narada-pyodide/src/narada/window.py +++ b/packages/narada-pyodide/src/narada/window.py @@ -52,6 +52,8 @@ from pyodide.ffi import JsProxy, create_once_callable from pyodide.http import pyfetch +from . import _trace + # Magic variable injected by the JavaScript harness that stores the IDs of the current runnables # in the stack on the frontend. @@ -192,6 +194,12 @@ async def dispatch_request( The higher-level `agent` method should be preferred for most use cases. """ + # Trace instrumentation: the entire method body is wrapped so that any + # exit (successful return, timeout, or non-timeout failure) produces a + # ``subAgentCall`` trace event with matching status. See `_trace.py`. + trace_start_ms = _trace.now_ms() + agent_type_str = agent.value if isinstance(agent, Agent) else str(agent) + deadline = time.monotonic() + timeout headers = {"Content-Type": "application/json"} @@ -305,6 +313,18 @@ async def dispatch_request( else: response_content["structuredOutput"] = None + _trace.emit_sub_agent_call( + ts_start=trace_start_ms, + agent_type=agent_type_str, + prompt=prompt, + status="success", + request_id=request_id, + action_trace_raw=( + response_content.get("actionTrace") + if response_content is not None + else None + ), + ) return response # Poll every 3 seconds. @@ -313,7 +333,32 @@ async def dispatch_request( raise NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE(timeout) except asyncio.TimeoutError: + _trace.emit_sub_agent_call( + ts_start=trace_start_ms, + agent_type=agent_type_str, + prompt=prompt, + status="timeout", + error_message=f"Timed out after {timeout}s", + ) raise NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE(timeout) + except NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE: + _trace.emit_sub_agent_call( + ts_start=trace_start_ms, + agent_type=agent_type_str, + prompt=prompt, + status="timeout", + error_message=f"Timed out after {timeout}s", + ) + raise + except Exception as err: + _trace.emit_sub_agent_call( + ts_start=trace_start_ms, + agent_type=agent_type_str, + prompt=prompt, + status="error", + error_message=str(err), + ) + raise @overload async def agent( @@ -562,51 +607,85 @@ async def _run_extension_action( *, timeout: int | None = None, ) -> _ResponseModel | None: - headers = {"Content-Type": "application/json"} - if self._api_key is not None: - headers["x-api-key"] = self._api_key - else: - assert self._user_id is not None - assert self._env is not None + # Trace instrumentation: every exit path emits an ``extensionAction`` + # trace event with a status matching the outcome. See `_trace.py`. + trace_start_ms = _trace.now_ms() - headers["Authorization"] = f"Bearer {await _narada_get_id_token()}" - headers["X-Narada-User-ID"] = self._user_id - headers["X-Narada-Env"] = self._env + try: + headers = {"Content-Type": "application/json"} + if self._api_key is not None: + headers["x-api-key"] = self._api_key + else: + assert self._user_id is not None + assert self._env is not None - body = { - "action": request.model_dump(), - "browserWindowId": self.browser_window_id, - "parentRunIds": _parent_run_ids(), - } - if timeout is not None: - body["timeout"] = timeout - - fetch_response = await pyfetch( - f"{self._base_url}/extension-actions", - method="POST", - headers=headers, - body=json.dumps(body), - # Don't specify `timeout` here as the (soft) timeout is handled by the server. - ) + headers["Authorization"] = f"Bearer {await _narada_get_id_token()}" + headers["X-Narada-User-ID"] = self._user_id + headers["X-Narada-Env"] = self._env + + body = { + "action": request.model_dump(), + "browserWindowId": self.browser_window_id, + "parentRunIds": _parent_run_ids(), + } + if timeout is not None: + body["timeout"] = timeout - if fetch_response.status == HTTPStatus.GATEWAY_TIMEOUT: - raise NaradaTimeoutError - elif not fetch_response.ok: - status = fetch_response.status - text = await fetch_response.text() - raise NaradaError(f"Failed to run extension action: {status} {text}") + fetch_response = await pyfetch( + f"{self._base_url}/extension-actions", + method="POST", + headers=headers, + body=json.dumps(body), + # Don't specify `timeout` here as the (soft) timeout is handled by the server. + ) - resp_json = await fetch_response.json() + if fetch_response.status == HTTPStatus.GATEWAY_TIMEOUT: + raise NaradaTimeoutError + elif not fetch_response.ok: + status = fetch_response.status + text = await fetch_response.text() + raise NaradaError(f"Failed to run extension action: {status} {text}") - response = ExtensionActionResponse.model_validate(resp_json) - if response.status == "error": - raise NaradaError(response.error) + resp_json = await fetch_response.json() - if response_model is None: - return None + response = ExtensionActionResponse.model_validate(resp_json) + if response.status == "error": + raise NaradaError(response.error) - assert response.data is not None - return response_model.model_validate_json(response.data) + if response_model is None: + _trace.emit_extension_action( + ts_start=trace_start_ms, + request=request, + status="success", + ) + return None + + assert response.data is not None + parsed_response = response_model.model_validate_json(response.data) + _trace.emit_extension_action( + ts_start=trace_start_ms, + request=request, + status="success", + response=parsed_response, + ) + return parsed_response + + except NaradaTimeoutError: + _trace.emit_extension_action( + ts_start=trace_start_ms, + request=request, + status="timeout", + error_message="Extension action timed out", + ) + raise + except Exception as err: + _trace.emit_extension_action( + ts_start=trace_start_ms, + request=request, + status="error", + error_message=str(err), + ) + raise class LocalBrowserWindow(BaseBrowserWindow): diff --git a/packages/narada-pyodide/tests/README.md b/packages/narada-pyodide/tests/README.md new file mode 100644 index 0000000..5ba6499 --- /dev/null +++ b/packages/narada-pyodide/tests/README.md @@ -0,0 +1,20 @@ +# narada-pyodide tests + +narada-pyodide and narada both publish under the top-level `narada` Python +package namespace. When both are installed in the same environment, the +workspace-installed `narada` package shadows narada-pyodide's source. This +is fine at runtime (Pyodide only installs narada-pyodide) but breaks +local unit testing. + +To run the unit tests locally from the workspace root: + +```bash +uv pip uninstall narada +uv run --package narada-pyodide pytest packages/narada-pyodide/tests/ +``` + +Re-running `uv sync` will reinstall the `narada` package and require the +uninstall step again. + +The `conftest.py` stubs the Pyodide-only `js` and `pyodide.*` imports so +the non-HTTP helpers in narada-pyodide can be exercised on host CPython. diff --git a/packages/narada-pyodide/tests/__init__.py b/packages/narada-pyodide/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/packages/narada-pyodide/tests/conftest.py b/packages/narada-pyodide/tests/conftest.py new file mode 100644 index 0000000..47abb32 --- /dev/null +++ b/packages/narada-pyodide/tests/conftest.py @@ -0,0 +1,56 @@ +"""Pytest fixtures shared across narada-pyodide tests. + +narada-pyodide is designed to run inside a Pyodide web worker; several of its +transitive imports (``js``, ``pyodide.ffi``, ``pyodide.http``) are only +available in that environment. To make the pure-Python unit tests runnable on +a host CPython interpreter we stub those modules before any narada-pyodide +code is imported. The real Pyodide runtime will obviously provide them. +""" + +from __future__ import annotations + +import json +import sys +from collections.abc import Iterator +from typing import Any +from unittest.mock import MagicMock + +# Stub Pyodide-only modules. Must happen before `from narada import _trace`. +for _mod in ("js", "pyodide", "pyodide.ffi", "pyodide.http"): + if _mod not in sys.modules: + sys.modules[_mod] = MagicMock() + +import pytest # noqa: E402 + +from narada import _trace # noqa: E402 + + +class RecordingEmitter: + """Captures every event forwarded by ``_trace.emit_trace_event`` during a + test so assertions can inspect the JSON that would reach the JS harness. + """ + + def __init__(self) -> None: + self.events: list[dict[str, Any]] = [] + + def __call__(self, event_json: str) -> None: + # Round-trip through json to catch non-serialisable payloads early. + self.events.append(json.loads(event_json)) + + +@pytest.fixture +def recorded_events() -> Iterator[RecordingEmitter]: + """Replace the JS-harness-injected ``_narada_emit_trace_event`` with a + recorder for the duration of a test, restoring the original binding + afterwards. + """ + emitter = RecordingEmitter() + previous = getattr(_trace, "_narada_emit_trace_event", None) + _trace._narada_emit_trace_event = emitter # type: ignore[attr-defined] + try: + yield emitter + finally: + if previous is None: + delattr(_trace, "_narada_emit_trace_event") + else: + _trace._narada_emit_trace_event = previous # type: ignore[attr-defined] diff --git a/packages/narada-pyodide/tests/test_trace.py b/packages/narada-pyodide/tests/test_trace.py new file mode 100644 index 0000000..2a60f54 --- /dev/null +++ b/packages/narada-pyodide/tests/test_trace.py @@ -0,0 +1,432 @@ +"""Tests for the private ``narada._trace`` module. + +Covers the pure helpers (truncation, request/response summarisation) plus the +``emit_*`` functions, asserting that the JSON payloads emitted to the JS +harness match the ``PythonTraceEvent`` Pydantic schema defined in +``narada_core.actions.models``. +""" + +from __future__ import annotations + +import pytest +from narada_core.actions.models import ( + AgenticMouseActionRequest, + AgenticSelectorRequest, + CloseWindowRequest, + GetFullHtmlRequest, + GetFullHtmlResponse, + GetScreenshotRequest, + GetScreenshotResponse, + GetSimplifiedHtmlRequest, + GetSimplifiedHtmlResponse, + GetUrlRequest, + GetUrlResponse, + GoToUrlRequest, + PrintMessageRequest, + PythonAgentRunTrace, + ReadGoogleSheetRequest, + ReadGoogleSheetResponse, + WriteGoogleSheetRequest, + parse_action_trace, +) + +from narada import _trace + + +# --------------------------------------------------------------------------- +# Truncation +# --------------------------------------------------------------------------- + + +class TestTruncate: + def test_returns_none_for_none(self) -> None: + assert _trace.truncate(None, 10) is None + + def test_preserves_short_strings(self) -> None: + assert _trace.truncate("hello", 10) == "hello" + + def test_preserves_exact_length(self) -> None: + assert _trace.truncate("1234567890", 10) == "1234567890" + + def test_truncates_long_strings_with_ellipsis(self) -> None: + result = _trace.truncate("abcdefghij", 5) + assert result is not None + assert len(result) == 5 + assert result.endswith("\u2026") + assert result.startswith("abcd") + + def test_truncate_prompt_falls_back_to_empty(self) -> None: + assert _trace.truncate_prompt("") == "" + + def test_truncate_error_bounded(self) -> None: + long = "x" * 5000 + result = _trace.truncate_error(long) + assert len(result) == 1000 + assert result.endswith("\u2026") + + +# --------------------------------------------------------------------------- +# summarize_request / summarize_response +# --------------------------------------------------------------------------- + + +class TestSummarizeRequest: + def test_go_to_url(self) -> None: + req = GoToUrlRequest(url="https://example.com", new_tab=True) + assert _trace.summarize_request(req) == { + "url": "https://example.com", + "new_tab": True, + } + + @pytest.mark.parametrize( + "request_instance", + [ + GetUrlRequest(), + GetScreenshotRequest(), + GetFullHtmlRequest(), + GetSimplifiedHtmlRequest(), + CloseWindowRequest(), + ], + ) + def test_parameterless_requests_return_empty( + self, request_instance: object + ) -> None: + assert _trace.summarize_request(request_instance) == {} # type: ignore[arg-type] + + def test_read_google_sheet(self) -> None: + req = ReadGoogleSheetRequest(spreadsheet_id="abc123", range="Sheet1!A1:B10") + assert _trace.summarize_request(req) == { + "spreadsheet_id": "abc123", + "range": "Sheet1!A1:B10", + } + + def test_write_google_sheet_reports_row_count_not_values(self) -> None: + big_values = [["r"] * 5 for _ in range(847)] + req = WriteGoogleSheetRequest( + spreadsheet_id="abc123", range="Sheet1!A1:E847", values=big_values + ) + summary = _trace.summarize_request(req) + assert summary == { + "spreadsheet_id": "abc123", + "range": "Sheet1!A1:E847", + "row_count": 847, + } + # Explicitly guard against regressions that leak row payloads. + assert "values" not in summary + + def test_print_message_truncates_long_messages(self) -> None: + long_msg = "x" * 2000 + summary = _trace.summarize_request(PrintMessageRequest(message=long_msg)) + truncated = summary["message"] + assert isinstance(truncated, str) + assert len(truncated) == 500 + assert truncated.endswith("\u2026") + + def test_agentic_selector_reports_action_type_and_truncates_query(self) -> None: + req = AgenticSelectorRequest( + action={"type": "click"}, + selectors={"id": "submit-btn"}, + fallback_operator_query="y" * 1000, + ) + summary = _trace.summarize_request(req) + assert summary["action_type"] == "click" + assert len(summary["fallback_operator_query"]) == 200 + # Selectors are intentionally omitted (not user-useful in trace view). + assert "selectors" not in summary + + def test_agentic_mouse_action(self) -> None: + req = AgenticMouseActionRequest( + action={"type": "click"}, + recorded_click={"x": 1, "y": 2, "viewport": {"width": 10, "height": 20}}, + fallback_operator_query="click the button", + resize_window=False, + ) + summary = _trace.summarize_request(req) + assert summary == { + "action_type": "click", + "fallback_operator_query": "click the button", + } + + +class TestSummarizeResponse: + def test_get_url_returns_url(self) -> None: + req = GetUrlRequest() + resp = GetUrlResponse(url="https://example.com/page") + assert _trace.summarize_response(req, resp) == { + "url": "https://example.com/page" + } + + def test_get_screenshot_returns_fixed_description(self) -> None: + req = GetScreenshotRequest() + resp = GetScreenshotResponse( + base64_content="...huge blob...", + name="page.png", + mime_type="image/png", + timestamp="2025-01-01T00:00:00Z", + ) + summary = _trace.summarize_response(req, resp) + assert summary == {"description": "Took screenshot of the page"} + + def test_full_html_returns_fixed_description(self) -> None: + summary = _trace.summarize_response( + GetFullHtmlRequest(), GetFullHtmlResponse(html="...massive...") + ) + assert summary == {"description": "Got the full HTML of the page"} + + def test_simplified_html_returns_fixed_description(self) -> None: + summary = _trace.summarize_response( + GetSimplifiedHtmlRequest(), + GetSimplifiedHtmlResponse(html="short"), + ) + assert summary == {"description": "Got the simplified HTML of the page"} + + def test_read_google_sheet_reports_dimensions(self) -> None: + req = ReadGoogleSheetRequest(spreadsheet_id="x", range="A1:C5") + resp = ReadGoogleSheetResponse(values=[["a", "b", "c"], ["d", "e", "f"], ["g"]]) + assert _trace.summarize_response(req, resp) == { + "row_count": 3, + "column_count": 3, + } + + def test_read_google_sheet_empty_values(self) -> None: + req = ReadGoogleSheetRequest(spreadsheet_id="x", range="A1:C5") + resp = ReadGoogleSheetResponse(values=[]) + assert _trace.summarize_response(req, resp) == { + "row_count": 0, + "column_count": 0, + } + + def test_write_google_sheet_returns_none(self) -> None: + req = WriteGoogleSheetRequest(spreadsheet_id="x", range="A1", values=[["v"]]) + assert _trace.summarize_response(req, None) is None + + def test_close_window_returns_none(self) -> None: + assert _trace.summarize_response(CloseWindowRequest(), None) is None + + +# --------------------------------------------------------------------------- +# Event emitters +# --------------------------------------------------------------------------- + + +class TestEmitSubAgentCall: + def test_success_with_action_trace(self, recorded_events) -> None: + _trace.emit_sub_agent_call( + ts_start=1000, + agent_type="operator", + prompt="Find leads", + status="success", + request_id="req_abc", + action_trace_raw=[{"url": "https://sf.com", "action": "click Leads"}], + ) + (event,) = recorded_events.events + assert event["kind"] == "subAgentCall" + assert event["ts_start"] == 1000 + assert event["ts_end"] >= 1000 + assert event["agent_type"] == "operator" + assert event["prompt"] == "Find leads" + assert event["status"] == "success" + assert event["request_id"] == "req_abc" + assert event["action_trace"] == [ + {"url": "https://sf.com", "action": "click Leads"} + ] + assert "error_message" not in event + + def test_success_without_action_trace_omits_field(self, recorded_events) -> None: + _trace.emit_sub_agent_call( + ts_start=1000, agent_type="operator", prompt="hi", status="success" + ) + (event,) = recorded_events.events + assert "action_trace" not in event + assert "request_id" not in event + + def test_timeout_includes_error_message(self, recorded_events) -> None: + _trace.emit_sub_agent_call( + ts_start=1000, + agent_type="operator", + prompt="hi", + status="timeout", + error_message="Timed out after 60s", + ) + (event,) = recorded_events.events + assert event["status"] == "timeout" + assert event["error_message"] == "Timed out after 60s" + + def test_error_truncates_error_message(self, recorded_events) -> None: + _trace.emit_sub_agent_call( + ts_start=1000, + agent_type="operator", + prompt="hi", + status="error", + error_message="x" * 5000, + ) + (event,) = recorded_events.events + assert len(event["error_message"]) == 1000 + + def test_prompt_is_truncated(self, recorded_events) -> None: + _trace.emit_sub_agent_call( + ts_start=1000, + agent_type="operator", + prompt="y" * 1000, + status="success", + ) + (event,) = recorded_events.events + assert len(event["prompt"]) == 500 + + +class TestEmitExtensionAction: + def test_success_with_result_summary(self, recorded_events) -> None: + req = GetUrlRequest() + resp = GetUrlResponse(url="https://x.com") + _trace.emit_extension_action( + ts_start=2000, request=req, status="success", response=resp + ) + (event,) = recorded_events.events + assert event["kind"] == "extensionAction" + assert event["action_name"] == "get_url" + assert event["request_summary"] == {} + assert event["result_summary"] == {"url": "https://x.com"} + assert event["status"] == "success" + + def test_success_without_result_summary_omits_field(self, recorded_events) -> None: + req = WriteGoogleSheetRequest( + spreadsheet_id="abc", range="A1:B2", values=[["1", "2"], ["3", "4"]] + ) + _trace.emit_extension_action(ts_start=2000, request=req, status="success") + (event,) = recorded_events.events + assert event["request_summary"] == { + "spreadsheet_id": "abc", + "range": "A1:B2", + "row_count": 2, + } + assert "result_summary" not in event + + def test_timeout(self, recorded_events) -> None: + _trace.emit_extension_action( + ts_start=0, + request=GoToUrlRequest(url="https://a.b", new_tab=False), + status="timeout", + error_message="Timed out", + ) + (event,) = recorded_events.events + assert event["status"] == "timeout" + assert event["action_name"] == "go_to_url" + + def test_error(self, recorded_events) -> None: + _trace.emit_extension_action( + ts_start=0, + request=CloseWindowRequest(), + status="error", + error_message="permission denied", + ) + (event,) = recorded_events.events + assert event["status"] == "error" + assert event["error_message"] == "permission denied" + + +class TestEmitSideEffect: + def test_download_file(self, recorded_events) -> None: + _trace.emit_side_effect( + effect_type="download_file", description="Downloaded file: report.pdf" + ) + (event,) = recorded_events.events + assert event["kind"] == "sideEffect" + assert event["effect_type"] == "download_file" + assert event["description"] == "Downloaded file: report.pdf" + assert "ts" in event + + def test_render_html(self, recorded_events) -> None: + _trace.emit_side_effect( + effect_type="render_html", description="Rendered HTML in a new tab" + ) + (event,) = recorded_events.events + assert event["effect_type"] == "render_html" + + +# --------------------------------------------------------------------------- +# End-to-end schema validation: every event kind produced by the emitters +# round-trips cleanly through the ``PythonAgentRunTrace`` Pydantic model and +# the ``parse_action_trace`` entry point used by downstream consumers. +# --------------------------------------------------------------------------- + + +class TestPythonAgentRunTraceRoundtrip: + def test_every_event_kind_parses(self, recorded_events) -> None: + _trace.emit_sub_agent_call( + ts_start=1000, + agent_type="operator", + prompt="Find leads", + status="success", + request_id="req_abc", + action_trace_raw=[{"url": "https://sf.com", "action": "click Leads"}], + ) + _trace.emit_extension_action( + ts_start=2000, + request=GetScreenshotRequest(), + status="success", + response=GetScreenshotResponse( + base64_content="ignored", + name="page.png", + mime_type="image/png", + timestamp="now", + ), + ) + _trace.emit_side_effect( + effect_type="download_file", description="Downloaded file: leads.csv" + ) + + # Assemble a representative PythonAgentRunTrace containing the emitted + # events alongside stdout / stderr events (which are synthesised by + # the JS-side runnable, not the SDK). + stdout_stderr_events = [ + {"kind": "stdout", "ts": 500, "text": "starting"}, + {"kind": "stderr", "ts": 2500, "text": "deprecation warning"}, + ] + events = stdout_stderr_events + recorded_events.events + events.sort(key=lambda e: e.get("ts", e.get("ts_start", 0))) + + raw = [ + { + "step_type": "pythonAgentRun", + "url": "https://app.narada.ai/agent", + "status": "success", + "duration_ms": 3000, + "events": events, + } + ] + trace = parse_action_trace(raw) + assert len(trace) == 1 + (node,) = trace + assert isinstance(node, PythonAgentRunTrace) + # Order reflects the real wall-clock timestamps: the emitters stamp + # events with ``now_ms()`` at emit time, which in this test runs much + # later than the synthetic stdout/stderr timestamps below. The side + # effect therefore sorts after ``stderr`` (ts=2500). + assert [e.kind for e in node.events] == [ + "stdout", + "subAgentCall", + "extensionAction", + "stderr", + "sideEffect", + ] + # Nested action_trace rehydrates correctly as an OperatorActionTrace. + sub_call = node.events[1] + assert sub_call.kind == "subAgentCall" + assert sub_call.action_trace is not None + assert sub_call.action_trace[0].url == "https://sf.com" + + def test_error_status_parses(self) -> None: + raw = [ + { + "step_type": "pythonAgentRun", + "url": "https://x", + "status": "error", + "duration_ms": 120, + "error_message": "ZeroDivisionError", + "events": [], + } + ] + trace = parse_action_trace(raw) + assert isinstance(trace[0], PythonAgentRunTrace) + assert trace[0].status == "error" + assert trace[0].error_message == "ZeroDivisionError" diff --git a/packages/narada/pyproject.toml b/packages/narada/pyproject.toml index cab7376..59e1179 100644 --- a/packages/narada/pyproject.toml +++ b/packages/narada/pyproject.toml @@ -1,13 +1,13 @@ [project] name = "narada" -version = "0.1.42" +version = "0.1.43" description = "Python client SDK for Narada" license = "Apache-2.0" readme = "README.md" authors = [{ name = "Narada", email = "support@narada.ai" }] requires-python = ">=3.12" dependencies = [ - "narada-core==0.0.17", + "narada-core==0.0.18", "aiohttp>=3.12.13", "playwright>=1.53.0", "rich>=14.0.0", diff --git a/uv.lock b/uv.lock index 5fd8861..310eca5 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.12" resolution-markers = [ "python_full_version >= '3.13'", @@ -312,7 +312,7 @@ wheels = [ [[package]] name = "narada" -version = "0.1.42" +version = "0.1.43" source = { editable = "packages/narada" } dependencies = [ { name = "aiohttp" }, @@ -345,7 +345,7 @@ dev = [ [[package]] name = "narada-core" -version = "0.0.17" +version = "0.0.18" source = { editable = "packages/narada-core" } dependencies = [ { name = "pydantic" }, @@ -356,7 +356,7 @@ requires-dist = [{ name = "pydantic", specifier = "==2.12.5" }] [[package]] name = "narada-pyodide" -version = "0.0.43" +version = "0.0.44" source = { editable = "packages/narada-pyodide" } dependencies = [ { name = "narada-core" }, @@ -367,6 +367,7 @@ dependencies = [ dev = [ { name = "pyodide-py", version = "0.27.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" }, { name = "pyodide-py", version = "0.28.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" }, + { name = "pytest" }, ] [package.metadata] @@ -376,7 +377,10 @@ requires-dist = [ ] [package.metadata.requires-dev] -dev = [{ name = "pyodide-py", specifier = ">=0.27.7" }] +dev = [ + { name = "pyodide-py", specifier = ">=0.27.7" }, + { name = "pytest", specifier = ">=8.4.1" }, +] [[package]] name = "packaging" From ae3a1115bd7181b12b193a5396385d3afbacb3c3 Mon Sep 17 00:00:00 2001 From: xTRam1 Date: Thu, 16 Apr 2026 17:17:24 -0700 Subject: [PATCH 02/13] Apply review ship-blockers to Python agent trace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the four ship-blocker findings from the cross-dimensional review: Robustness — trace emission must not break user code (_trace.py): - `emit_trace_event` now wraps the serialise + forward in try/except and logs the failure instead of propagating it. Previously a stray non- serialisable value in a summary (a datetime, a Pydantic model leak) would raise TypeError out of `_run_extension_action` and abort the user's agent mid-run. - `json.dumps(event, default=str)` stringifies unknown types defensively. Scalability — bound recursive trace size (_trace.py): - `emit_sub_agent_call` now strips the `events` list from any nested `pythonAgentRun` node in the forwarded action trace, replacing it with a `truncated_event_count` marker. Previously a custom Python agent that delegated to another custom Python agent embedded the sub-run's full event timeline in the parent's persisted JSON, producing O(breadth^depth) growth. Robustness — code-quality cleanup (window.py): - Collapsed the duplicated `except asyncio.TimeoutError` / `except NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE` blocks in `dispatch_request` into a single `except (A, B):` branch. Removes ~12 lines and the divergence risk. Robustness — side-effect tracing on failure (utils.py): - `download_file` and `render_html` now emit a "failed" side-effect trace when the underlying JS call raises, then re-raise. Previously a failed download produced no trace at all — users saw silence rather than the actual error. Type safety — schema invariants (narada-core/actions/models.py): - `PythonAgentRunTrace.duration_ms` and `truncated_event_count` now use `NonNegativeInt` — Pydantic rejects negative values at parse time rather than letting `-42ms` reach the dashboard formatter. - New `@model_validator` on `PythonSubAgentCallEvent` and `PythonExtensionActionEvent` rejects `ts_end < ts_start`; clock skew on the Pyodide clock can no longer produce negative-duration events that the renderer would display as `-5ms`. - `parse_action_trace` now dispatches deterministically based on the first item's discriminator (`step_type` vs `action`+`url`) rather than try/except-falling-through two adapters. Eliminates the risk of silently misrouting a homogeneity-violated trace. Tests: - 13 new unit tests across `TestEmitDefensive`, `TestStripNestedPythonEvents`, `TestPythonEventInvariants`, and `TestParseActionTraceDispatch`. Full suite is now 51 tests, all passing under `uv run --package narada-pyodide pytest`. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/narada_core/actions/models.py | 52 +++++- packages/narada-pyodide/src/narada/_trace.py | 50 +++++- packages/narada-pyodide/src/narada/utils.py | 20 ++- packages/narada-pyodide/src/narada/window.py | 11 +- packages/narada-pyodide/tests/test_trace.py | 165 ++++++++++++++++++ 5 files changed, 281 insertions(+), 17 deletions(-) diff --git a/packages/narada-core/src/narada_core/actions/models.py b/packages/narada-core/src/narada_core/actions/models.py index c566d75..025ea51 100644 --- a/packages/narada-core/src/narada_core/actions/models.py +++ b/packages/narada-core/src/narada_core/actions/models.py @@ -12,7 +12,14 @@ override, ) -from pydantic import BaseModel, Field, TypeAdapter, ValidationError +from pydantic import ( + BaseModel, + Field, + NonNegativeInt, + TypeAdapter, + ValidationError, + model_validator, +) # There is no `AgentRequest` because the `agent` action delegates to the `dispatch_request` method # under the hood. @@ -245,6 +252,14 @@ class PythonSubAgentCallEvent(BaseModel): error_message: str | None = None action_trace: ActionTrace | None = None + @model_validator(mode="after") + def _check_ts_ordering(self) -> PythonSubAgentCallEvent: + if self.ts_end < self.ts_start: + raise ValueError( + f"PythonSubAgentCallEvent: ts_end ({self.ts_end}) must be >= ts_start ({self.ts_start})" + ) + return self + class PythonExtensionActionEvent(BaseModel): kind: Literal["extensionAction"] = "extensionAction" @@ -260,6 +275,14 @@ class PythonExtensionActionEvent(BaseModel): status: Literal["success", "error", "timeout"] error_message: str | None = None + @model_validator(mode="after") + def _check_ts_ordering(self) -> PythonExtensionActionEvent: + if self.ts_end < self.ts_start: + raise ValueError( + f"PythonExtensionActionEvent: ts_end ({self.ts_end}) must be >= ts_start ({self.ts_start})" + ) + return self + class PythonSideEffectEvent(BaseModel): kind: Literal["sideEffect"] = "sideEffect" @@ -282,9 +305,13 @@ class PythonAgentRunTrace(BaseModel): step_type: Literal["pythonAgentRun"] = "pythonAgentRun" url: str status: Literal["success", "error", "aborted"] - duration_ms: int + duration_ms: NonNegativeInt events: list[PythonTraceEvent] error_message: str | None = None + # Set by the runtime when it caps the number of buffered events (see + # `python.worker.ts`). Informational only; the dashboard surfaces it so + # users know their trace is partial. + truncated_event_count: NonNegativeInt | None = None ApaStepTrace = Annotated[ @@ -332,7 +359,26 @@ class PythonAgentRunTrace(BaseModel): def parse_action_trace(trace_data: list[dict[str, Any] | Any]) -> ActionTrace: - """Parse the action trace, it will either be a list of operator action trace items or a list of APA action trace items.""" + """Parse the action trace. + + Dispatches deterministically based on the shape of the first item rather + than try/except-falling-through two adapters: operator items carry + ``action`` + ``url`` fields, APA steps carry ``step_type``. On an empty + list (no discriminator available) we default to APA, which is the + superset shape used by all custom agents. + """ + if not trace_data: + return _ApaActionTraceAdapter.validate_python(trace_data) + + first = trace_data[0] + if isinstance(first, dict) and "step_type" in first: + return _ApaActionTraceAdapter.validate_python(trace_data) + if isinstance(first, dict) and "action" in first and "url" in first: + return _OperatorActionTraceAdapter.validate_python(trace_data) + + # Ambiguous shape — fall back to the previous try/except pattern so we + # do not regress existing callers passing Pydantic instances or other + # shapes the adapters already know how to coerce. try: return _OperatorActionTraceAdapter.validate_python(trace_data) except ValidationError: diff --git a/packages/narada-pyodide/src/narada/_trace.py b/packages/narada-pyodide/src/narada/_trace.py index 037d10f..06be667 100644 --- a/packages/narada-pyodide/src/narada/_trace.py +++ b/packages/narada-pyodide/src/narada/_trace.py @@ -14,6 +14,7 @@ from __future__ import annotations import json +import logging import time from typing import TYPE_CHECKING, Any, Literal @@ -51,8 +52,17 @@ def _narada_emit_trace_event(event_json: str) -> None: ... _MAX_ERROR_CHARS = 1000 _MAX_QUERY_CHARS = 200 +# When a sub-agent's response includes its own action trace (for example, the +# operator's step-by-step actions), we forward that trace one level deep so +# the dashboard can expand it. We do not forward deeper nesting — Python +# agents that delegate into other Python agents would otherwise produce +# exponentially-sized persisted traces. +_MAX_NESTED_ACTION_TRACE_DEPTH = 1 + _ELLIPSIS = "\u2026" +_logger = logging.getLogger(__name__) + def now_ms() -> int: """Current wall-clock time in integer milliseconds.""" @@ -84,8 +94,44 @@ def emit_trace_event(event: dict[str, Any]) -> None: ``PythonTraceEvent`` variants defined in ``narada_core.actions.models``. No validation is performed here; callers construct events directly and are responsible for matching the schema. + + Observability must not break the thing it observes: any failure + serialising or forwarding the event is logged and swallowed rather than + propagated to user code. ``default=str`` catches stray non-serialisable + values (timestamps, Pydantic models, numpy scalars) by stringifying them. + """ + try: + _narada_emit_trace_event(json.dumps(event, default=str)) # noqa: F821 + except Exception: # noqa: BLE001 — broad by design; see docstring + _logger.warning("trace event emission failed", exc_info=True) + + +def _strip_nested_python_events( + raw: list[dict[str, Any]] | None, +) -> list[dict[str, Any]] | None: + """Forward a nested action trace one level deep. Any ``pythonAgentRun`` + node inside retains its outer status/duration metadata but its ``events`` + list is dropped, preventing deep recursion from blowing up persisted + JSON size. A ``truncated_event_count`` field is left behind so the + dashboard can show that events were elided. """ - _narada_emit_trace_event(json.dumps(event)) # noqa: F821 + if raw is None: + return None + + def strip(item: dict[str, Any]) -> dict[str, Any]: + if not isinstance(item, dict): + return item + if item.get("step_type") != "pythonAgentRun": + return item + events = item.get("events", []) + stripped = dict(item) + stripped["events"] = [] + stripped["truncated_event_count"] = ( + len(events) if isinstance(events, list) else 0 + ) + return stripped + + return [strip(item) for item in raw] def summarize_request(request: ExtensionActionRequest) -> dict[str, Any]: @@ -197,7 +243,7 @@ def emit_sub_agent_call( if error_message is not None: event["error_message"] = truncate_error(error_message) if action_trace_raw is not None: - event["action_trace"] = action_trace_raw + event["action_trace"] = _strip_nested_python_events(action_trace_raw) emit_trace_event(event) diff --git a/packages/narada-pyodide/src/narada/utils.py b/packages/narada-pyodide/src/narada/utils.py index 1107ad9..dbd5d19 100644 --- a/packages/narada-pyodide/src/narada/utils.py +++ b/packages/narada-pyodide/src/narada/utils.py @@ -19,7 +19,16 @@ def download_file(filename: str, content: str | bytes) -> None: content: The content to write. If str, writes in text mode (UTF-8). If bytes, writes in binary mode. """ - _narada_download_file(filename, content) + try: + _narada_download_file(filename, content) + except Exception as err: + # Record that the attempt happened and failed, then re-raise so user + # code still sees the exception. + _trace.emit_side_effect( + effect_type="download_file", + description=f"Failed to download file {filename}: {err}", + ) + raise _trace.emit_side_effect( effect_type="download_file", description=f"Downloaded file: {filename}", @@ -33,7 +42,14 @@ def render_html(html: str) -> None: Args: html: The HTML content to render. """ - _narada_render_html(html) + try: + _narada_render_html(html) + except Exception as err: + _trace.emit_side_effect( + effect_type="render_html", + description=f"Failed to render HTML: {err}", + ) + raise _trace.emit_side_effect( effect_type="render_html", description="Rendered HTML in a new tab", diff --git a/packages/narada-pyodide/src/narada/window.py b/packages/narada-pyodide/src/narada/window.py index 509499c..5912f00 100644 --- a/packages/narada-pyodide/src/narada/window.py +++ b/packages/narada-pyodide/src/narada/window.py @@ -332,7 +332,7 @@ async def dispatch_request( else: raise NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE(timeout) - except asyncio.TimeoutError: + except (asyncio.TimeoutError, NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE): _trace.emit_sub_agent_call( ts_start=trace_start_ms, agent_type=agent_type_str, @@ -341,15 +341,6 @@ async def dispatch_request( error_message=f"Timed out after {timeout}s", ) raise NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE(timeout) - except NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE: - _trace.emit_sub_agent_call( - ts_start=trace_start_ms, - agent_type=agent_type_str, - prompt=prompt, - status="timeout", - error_message=f"Timed out after {timeout}s", - ) - raise except Exception as err: _trace.emit_sub_agent_call( ts_start=trace_start_ms, diff --git a/packages/narada-pyodide/tests/test_trace.py b/packages/narada-pyodide/tests/test_trace.py index 2a60f54..4d5e34b 100644 --- a/packages/narada-pyodide/tests/test_trace.py +++ b/packages/narada-pyodide/tests/test_trace.py @@ -430,3 +430,168 @@ def test_error_status_parses(self) -> None: assert isinstance(trace[0], PythonAgentRunTrace) assert trace[0].status == "error" assert trace[0].error_message == "ZeroDivisionError" + + +# --------------------------------------------------------------------------- +# Defensive emit: observability must never break the user's agent run +# --------------------------------------------------------------------------- + + +class TestEmitDefensive: + def test_non_serialisable_payload_is_stringified_not_raised( + self, recorded_events + ) -> None: + """A stray datetime / set / custom object in a summary should not crash + user code mid-run. ``default=str`` stringifies and the event still + reaches the harness.""" + import datetime as _dt + + _trace.emit_trace_event( + { + "kind": "stdout", + "ts": _dt.datetime(2026, 1, 1), # non-serialisable in std json + "text": "hello", + } + ) + # Event was recorded (ts got stringified by default=str). + assert len(recorded_events.events) == 1 + assert isinstance(recorded_events.events[0]["ts"], str) + + def test_harness_raising_does_not_propagate(self, monkeypatch) -> None: + """If the JS-injected emitter raises, we swallow and log rather than + propagate — tracing failures must not break the agent run.""" + + def _boom(_json: str) -> None: + raise RuntimeError("bridge down") + + # `_narada_emit_trace_event` is injected by the JS harness at runtime + # (TYPE_CHECKING stub only in source); set without `raising` so the + # assignment succeeds even when the attribute isn't yet bound. + monkeypatch.setattr(_trace, "_narada_emit_trace_event", _boom, raising=False) + # Must not raise. + _trace.emit_trace_event({"kind": "stdout", "ts": 1, "text": "hi"}) + + +# --------------------------------------------------------------------------- +# Nested action_trace stripping: cap recursion depth to one level +# --------------------------------------------------------------------------- + + +class TestStripNestedPythonEvents: + def test_passes_through_operator_items_unchanged(self) -> None: + raw = [{"url": "https://x", "action": "click Foo"}] + assert _trace._strip_nested_python_events(raw) == raw + + def test_passes_through_non_python_apa_items_unchanged(self) -> None: + raw = [{"step_type": "goToUrl", "url": "https://x", "description": "..."}] + assert _trace._strip_nested_python_events(raw) == raw + + def test_strips_events_from_nested_python_agent_run(self) -> None: + raw = [ + { + "step_type": "pythonAgentRun", + "url": "", + "status": "success", + "duration_ms": 10, + "events": [{"kind": "stdout", "ts": 1, "text": "a"}], + } + ] + stripped = _trace._strip_nested_python_events(raw) + assert stripped is not None + assert stripped[0]["events"] == [] + assert stripped[0]["truncated_event_count"] == 1 + + def test_none_passes_through(self) -> None: + assert _trace._strip_nested_python_events(None) is None + + def test_integrates_with_emit_sub_agent_call(self, recorded_events) -> None: + _trace.emit_sub_agent_call( + ts_start=1, + agent_type="custom_python", + prompt="nested", + status="success", + action_trace_raw=[ + { + "step_type": "pythonAgentRun", + "url": "", + "status": "success", + "duration_ms": 10, + "events": [ + {"kind": "stdout", "ts": 1, "text": "a"}, + {"kind": "stdout", "ts": 2, "text": "b"}, + ], + } + ], + ) + event = recorded_events.events[0] + inner = event["action_trace"][0] + assert inner["events"] == [] + assert inner["truncated_event_count"] == 2 + + +# --------------------------------------------------------------------------- +# Pydantic invariants on new event models +# --------------------------------------------------------------------------- + + +class TestPythonEventInvariants: + def test_sub_agent_call_rejects_ts_end_before_ts_start(self) -> None: + from narada_core.actions.models import PythonSubAgentCallEvent + from pydantic import ValidationError + + with pytest.raises(ValidationError, match="ts_end"): + PythonSubAgentCallEvent( + ts_start=1000, + ts_end=999, + agent_type="operator", + prompt="p", + status="success", + ) + + def test_extension_action_rejects_ts_end_before_ts_start(self) -> None: + from narada_core.actions.models import PythonExtensionActionEvent + from pydantic import ValidationError + + with pytest.raises(ValidationError, match="ts_end"): + PythonExtensionActionEvent( + ts_start=1000, + ts_end=999, + action_name="get_url", + request_summary={}, + status="success", + ) + + def test_python_agent_run_rejects_negative_duration(self) -> None: + from pydantic import ValidationError + + with pytest.raises(ValidationError): + PythonAgentRunTrace( + url="", + status="success", + duration_ms=-1, + events=[], + ) + + +# --------------------------------------------------------------------------- +# Deterministic parse_action_trace selection +# --------------------------------------------------------------------------- + + +class TestParseActionTraceDispatch: + def test_empty_list_parses_as_apa(self) -> None: + result = parse_action_trace([]) + assert result == [] + + def test_step_type_routes_to_apa_adapter(self) -> None: + result = parse_action_trace( + [{"step_type": "goToUrl", "url": "https://x", "description": "..."}] + ) + assert result[0].step_type == "goToUrl" + + def test_action_plus_url_routes_to_operator_adapter(self) -> None: + from narada_core.actions.models import OperatorActionTraceItem + + result = parse_action_trace([{"url": "https://x", "action": "click Foo"}]) + assert isinstance(result[0], OperatorActionTraceItem) + assert result[0].action == "click Foo" From 662d10decb917132a188d699dc1febdd625fac9d Mon Sep 17 00:00:00 2001 From: xTRam1 Date: Fri, 24 Apr 2026 16:01:15 -0700 Subject: [PATCH 03/13] feat(sdk): add reasoning effort to the Core Agent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lets SDK callers opt into GPT-5.2's reasoning levels on the Core Agent, matching the picker the web UI added in v2 of the agent step. The wire field stays the existing `reasoningMode: "none"|"low"|"medium"|"high"` on `NaradaGenerationRequest`; only `CoreAgent` reads it server-side. What changes: - `narada-core` — new `ReasoningEffort` `StrEnum` (NONE/LOW/MEDIUM/HIGH). Re-exported from both `narada` and `narada-pyodide` package roots so callers can `from narada import ReasoningEffort`. - `narada` and `narada-pyodide` — `dispatch_request()` and `agent()` gain a `reasoning: ReasoningEffort | None = None` parameter that serializes to `body["reasoningMode"]` only when set (absent on the wire when `None`, preserving backward-compat with older backends). - Type-level enforcement that `reasoning` is only valid with `agent=Agent.CORE_AGENT`: paired `@overload` signatures use `Literal[Agent.CORE_AGENT]` to give Pyright/mypy a hard error on misuse. A runtime `ValueError` covers the string-form path (`agent="..."`) where overload narrowing doesn't help. - 8 new unit tests in `narada-pyodide/tests/test_reasoning.py` cover body wiring (each effort level, omission when None), runtime validation (enum and string agent forms, both `agent()` and `dispatch_request()`), and enum-value alignment with the backend Literal. Version bumps (coupled — see PR description for rationale): - narada-core 0.0.18 → 0.0.19 - narada 0.1.43 → 0.1.44 - narada-pyodide 0.0.45a2 → 0.0.46a1 --- packages/narada-core/pyproject.toml | 2 +- .../narada-core/src/narada_core/models.py | 14 + packages/narada-pyodide/pyproject.toml | 4 +- .../narada-pyodide/src/narada/__init__.py | 3 +- packages/narada-pyodide/src/narada/window.py | 155 +++++++++- .../narada-pyodide/tests/test_reasoning.py | 277 ++++++++++++++++++ packages/narada/pyproject.toml | 4 +- packages/narada/src/narada/__init__.py | 3 +- packages/narada/src/narada/window.py | 164 ++++++++++- uv.lock | 6 +- 10 files changed, 596 insertions(+), 36 deletions(-) create mode 100644 packages/narada-pyodide/tests/test_reasoning.py diff --git a/packages/narada-core/pyproject.toml b/packages/narada-core/pyproject.toml index e47b7c8..2b29207 100644 --- a/packages/narada-core/pyproject.toml +++ b/packages/narada-core/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "narada-core" -version = "0.0.18" +version = "0.0.19" description = "Code shared by the `narada` and `narada-pyodide` packages." license = "Apache-2.0" readme = "README.md" diff --git a/packages/narada-core/src/narada_core/models.py b/packages/narada-core/src/narada_core/models.py index 8e03292..8124545 100644 --- a/packages/narada-core/src/narada_core/models.py +++ b/packages/narada-core/src/narada_core/models.py @@ -21,6 +21,20 @@ def prompt_prefix(self) -> str: return "/coreAgent " +class ReasoningEffort(StrEnum): + """Amount of reasoning the Core Agent applies before responding. + + Maps 1:1 to OpenAI's ``reasoning.effort`` parameter. Only honored when the + invoked agent is :py:attr:`Agent.CORE_AGENT`; the SDK enforces this both at + type-check time (via ``@overload``) and at runtime (with a ``ValueError``). + """ + + NONE = "none" + LOW = "low" + MEDIUM = "medium" + HIGH = "high" + + class UserResourceCredentials(TypedDict, total=False): salesforce: dict[str, str] jira: dict[str, str] diff --git a/packages/narada-pyodide/pyproject.toml b/packages/narada-pyodide/pyproject.toml index 38b5088..172cbf1 100644 --- a/packages/narada-pyodide/pyproject.toml +++ b/packages/narada-pyodide/pyproject.toml @@ -1,14 +1,14 @@ [project] name = "narada-pyodide" -version = "0.0.45a2" +version = "0.0.46a1" description = "Pyodide-compatible Python client SDK for Narada" license = "Apache-2.0" readme = "README.md" authors = [{ name = "Narada", email = "support@narada.ai" }] requires-python = ">=3.12" dependencies = [ - "narada-core==0.0.18", + "narada-core==0.0.19", # Must be a supported version in https://pyodide.org/en/stable/usage/packages-in-pyodide.html "packaging==24.2", ] diff --git a/packages/narada-pyodide/src/narada/__init__.py b/packages/narada-pyodide/src/narada/__init__.py index 544d452..386ed83 100644 --- a/packages/narada-pyodide/src/narada/__init__.py +++ b/packages/narada-pyodide/src/narada/__init__.py @@ -2,7 +2,7 @@ NaradaError, NaradaTimeoutError, ) -from narada_core.models import Agent, File, Response, ResponseContent +from narada_core.models import Agent, File, ReasoningEffort, Response, ResponseContent from narada.client import Narada from narada.utils import download_file, render_html @@ -23,6 +23,7 @@ "Narada", "NaradaError", "NaradaTimeoutError", + "ReasoningEffort", "RemoteBrowserWindow", "render_html", "Response", diff --git a/packages/narada-pyodide/src/narada/window.py b/packages/narada-pyodide/src/narada/window.py index 791503c..9395466 100644 --- a/packages/narada-pyodide/src/narada/window.py +++ b/packages/narada-pyodide/src/narada/window.py @@ -63,6 +63,7 @@ Agent, File, McpServer, + ReasoningEffort, RemoteDispatchChatHistoryItem, Response, UserResourceCredentials, @@ -196,6 +197,57 @@ async def upload_file(self, *, file: IO) -> File: "Uploading files is not supported in the browser environment" ) + # `reasoning` is only valid with the Core Agent; these two overloads make + # that constraint type-checkable. Generic-agent calls fall through to the + # general overloads below, which do not accept a `reasoning` argument. + @overload + async def dispatch_request( + self, + *, + prompt: str, + agent: Literal[Agent.CORE_AGENT], + reasoning: ReasoningEffort | None = None, + clear_chat: bool | None = None, + generate_gif: bool | None = None, + output_schema: None = None, + previous_request_id: str | None = None, + chat_history: list[RemoteDispatchChatHistoryItem] | None = None, + additional_context: dict[str, str] | None = None, + time_zone: str = "America/Los_Angeles", + user_resource_credentials: UserResourceCredentials | None = None, + mcp_servers: list[McpServer] | None = None, + secret_variables: dict[str, str] | None = None, + input_variables: dict[str, Any] | None = None, + callback_url: str | None = None, + callback_secret: str | None = None, + callback_headers: dict[str, Any] | None = None, + timeout: int = 1000, + ) -> Response[None]: ... + + @overload + async def dispatch_request( + self, + *, + prompt: str, + agent: Literal[Agent.CORE_AGENT], + reasoning: ReasoningEffort | None = None, + clear_chat: bool | None = None, + generate_gif: bool | None = None, + output_schema: type[_StructuredOutput], + previous_request_id: str | None = None, + chat_history: list[RemoteDispatchChatHistoryItem] | None = None, + additional_context: dict[str, str] | None = None, + time_zone: str = "America/Los_Angeles", + user_resource_credentials: UserResourceCredentials | None = None, + mcp_servers: list[McpServer] | None = None, + secret_variables: dict[str, str] | None = None, + input_variables: dict[str, Any] | None = None, + callback_url: str | None = None, + callback_secret: str | None = None, + callback_headers: dict[str, Any] | None = None, + timeout: int = 1000, + ) -> Response[_StructuredOutput]: ... + @overload async def dispatch_request( self, @@ -247,6 +299,7 @@ async def dispatch_request( *, prompt: str, agent: Agent | str = Agent.OPERATOR, + reasoning: ReasoningEffort | None = None, clear_chat: bool | None = None, generate_gif: bool | None = None, output_schema: type[BaseModel] | None = None, @@ -267,6 +320,14 @@ async def dispatch_request( The higher-level `agent` method should be preferred for most use cases. """ + # The overloads enforce this at type-check time when callers use + # ``Agent.CORE_AGENT``; the runtime check covers string-form agents + # (``agent="..."``) and callers without a type checker. + if reasoning is not None and agent is not Agent.CORE_AGENT: + raise ValueError( + "`reasoning` is only supported with `agent=Agent.CORE_AGENT` " + f"(got agent={agent!r})" + ) # Trace instrumentation: the entire method body is wrapped so that any # exit (successful return, timeout, or non-timeout failure) produces a # ``subAgentCall`` trace event with matching status. See `_trace.py`. @@ -319,6 +380,8 @@ async def dispatch_request( body["callbackSecret"] = callback_secret if callback_headers is not None: body["callbackHeaders"] = callback_headers + if reasoning is not None: + body["reasoningMode"] = reasoning.value try: controller = AbortController.new() @@ -436,6 +499,42 @@ async def dispatch_request( ) raise + # `reasoning` is only valid with the Core Agent. See `dispatch_request` + # above for the rationale; the same overload pattern is mirrored here. + @overload + async def agent( + self, + *, + prompt: str, + agent: Literal[Agent.CORE_AGENT], + reasoning: ReasoningEffort | None = None, + clear_chat: bool | None = None, + generate_gif: bool | None = None, + output_schema: None = None, + time_zone: str = "America/Los_Angeles", + mcp_servers: list[McpServer] | None = None, + secret_variables: dict[str, str] | None = None, + input_variables: dict[str, Any] | None = None, + timeout: int = 1000, + ) -> AgentResponse[dict[str, Any]]: ... + + @overload + async def agent( + self, + *, + prompt: str, + agent: Literal[Agent.CORE_AGENT], + reasoning: ReasoningEffort | None = None, + clear_chat: bool | None = None, + generate_gif: bool | None = None, + output_schema: type[_StructuredOutput], + time_zone: str = "America/Los_Angeles", + mcp_servers: list[McpServer] | None = None, + secret_variables: dict[str, str] | None = None, + input_variables: dict[str, Any] | None = None, + timeout: int = 1000, + ) -> AgentResponse[_StructuredOutput]: ... + @overload async def agent( self, @@ -473,6 +572,7 @@ async def agent( *, prompt: str, agent: Agent | str = Agent.OPERATOR, + reasoning: ReasoningEffort | None = None, clear_chat: bool | None = None, generate_gif: bool | None = None, output_schema: type[BaseModel] | None = None, @@ -483,18 +583,49 @@ async def agent( timeout: int = 1000, ) -> AgentResponse: """Invokes an agent in the Narada extension side panel chat.""" - remote_dispatch_response = await self.dispatch_request( - prompt=prompt, - agent=agent, - clear_chat=clear_chat, - generate_gif=generate_gif, - output_schema=output_schema, - time_zone=time_zone, - mcp_servers=mcp_servers, - secret_variables=secret_variables, - input_variables=input_variables, - timeout=timeout, - ) + # Branch on `reasoning` so each call site binds a single, typed overload + # of `dispatch_request`. The validation also lives in `dispatch_request` + # itself (defense in depth + reachable when callers go straight to the + # low-level API), so the redundancy here is intentional. + if reasoning is None: + remote_dispatch_response = await self.dispatch_request( + prompt=prompt, + agent=agent, + clear_chat=clear_chat, + generate_gif=generate_gif, + output_schema=output_schema, + time_zone=time_zone, + mcp_servers=mcp_servers, + secret_variables=secret_variables, + input_variables=input_variables, + timeout=timeout, + ) + else: + if agent is not Agent.CORE_AGENT: + raise ValueError( + "`reasoning` is only supported with `agent=Agent.CORE_AGENT` " + f"(got agent={agent!r})" + ) + # The CORE_AGENT-specific overloads of `dispatch_request` split on + # a narrower `output_schema` discriminator (None vs `type[T]`), + # which the impl's `type[BaseModel] | None` union doesn't cleanly + # narrow into without further branching. The public `agent()` + # overloads above already give callers correct return-type + # narrowing, so the internal forward call bypasses overload + # disambiguation on this single dimension. + remote_dispatch_response = await self.dispatch_request( # pyright: ignore[reportCallIssue] + prompt=prompt, + agent=agent, + reasoning=reasoning, + clear_chat=clear_chat, + generate_gif=generate_gif, + output_schema=output_schema, # pyright: ignore[reportArgumentType] + time_zone=time_zone, + mcp_servers=mcp_servers, + secret_variables=secret_variables, + input_variables=input_variables, + timeout=timeout, + ) response_content = remote_dispatch_response["response"] assert response_content is not None diff --git a/packages/narada-pyodide/tests/test_reasoning.py b/packages/narada-pyodide/tests/test_reasoning.py new file mode 100644 index 0000000..5d7d53e --- /dev/null +++ b/packages/narada-pyodide/tests/test_reasoning.py @@ -0,0 +1,277 @@ +"""Tests for the `reasoning` parameter on the Core Agent. + +These exercise the `narada-pyodide` window because it is the only package with +a runnable test harness today; the impl in the sibling `narada` package shares +the same request-body wiring and runtime check, so coverage here verifies the +behavior across both code paths. + +We mirror `test_cloud_browser.py`'s module-clearing pattern: each test gets a +fresh import of `narada.window` with a freshly stubbed `pyodide.http.pyfetch`, +because cached module references from earlier tests would otherwise leak into +this file when the suite runs in alphabetical order. +""" + +from __future__ import annotations + +import importlib +import json +import sys +from collections.abc import Iterator +from types import ModuleType, SimpleNamespace +from typing import Any +from unittest.mock import AsyncMock + +import pytest + + +def _clear_modules() -> None: + for name in list(sys.modules): + if name == "narada" or name.startswith("narada."): + sys.modules.pop(name, None) + for name in ("js", "pyodide", "pyodide.http", "pyodide.ffi"): + sys.modules.pop(name, None) + + +class _FakeResponse: + def __init__(self, *, ok: bool = True, json_data: object = None) -> None: + self.ok = ok + self.status = 200 + self._json_data = json_data + + async def json(self) -> object: + return self._json_data + + async def text(self) -> str: + return "" + + +def _make_pyfetch_recorder() -> tuple[AsyncMock, list[dict[str, Any]]]: + """Build an `AsyncMock` for `pyfetch` that captures every JSON body posted + to /remote-dispatch and returns a canned success response on the poll.""" + posted_bodies: list[dict[str, Any]] = [] + + async def _impl(url: str, **kwargs: Any) -> _FakeResponse: + if "body" in kwargs: + posted_bodies.append(json.loads(kwargs["body"])) + if url.endswith("/remote-dispatch"): + return _FakeResponse(json_data={"requestId": "req-test"}) + return _FakeResponse( + json_data={ + "status": "success", + "response": { + "text": "ok", + "output": {"type": "text", "content": "ok"}, + }, + "createdAt": "now", + "completedAt": "now", + "usage": {"actions": 0, "credits": 0.0}, + } + ) + + pyfetch = AsyncMock(side_effect=_impl) + return pyfetch, posted_bodies + + +@pytest.fixture +def reimported_window( + monkeypatch: pytest.MonkeyPatch, +) -> Iterator[tuple[ModuleType, AsyncMock, list[dict[str, Any]]]]: + """Force a fresh import of `narada.window` after planting freshly-mocked + Pyodide-bridge modules. Yields the window module, the captured `pyfetch` + mock, and the list that records every posted JSON body. + """ + _clear_modules() + + js_module = ModuleType("js") + js_module.AbortController = SimpleNamespace( # type: ignore[attr-defined] + new=lambda: SimpleNamespace(signal=object(), abort=lambda: None) + ) + js_module.setTimeout = lambda callback, timeout: None # type: ignore[attr-defined] + + pyodide_module = ModuleType("pyodide") + pyodide_module.__path__ = [] # type: ignore[attr-defined] + + pyfetch, posted_bodies = _make_pyfetch_recorder() + pyodide_http_module = ModuleType("pyodide.http") + pyodide_http_module.pyfetch = pyfetch # type: ignore[attr-defined] + + pyodide_ffi_module = ModuleType("pyodide.ffi") + + class _FakeJsProxy: + def __init__(self, value: object) -> None: + self._value = value + + def to_py(self) -> object: + return self._value + + pyodide_ffi_module.JsProxy = _FakeJsProxy # type: ignore[attr-defined] + pyodide_ffi_module.create_once_callable = lambda fn: fn # type: ignore[attr-defined] + + monkeypatch.setitem(sys.modules, "js", js_module) + monkeypatch.setitem(sys.modules, "pyodide", pyodide_module) + monkeypatch.setitem(sys.modules, "pyodide.http", pyodide_http_module) + monkeypatch.setitem(sys.modules, "pyodide.ffi", pyodide_ffi_module) + + window_module = importlib.import_module("narada.window") + window_module._narada_parent_run_ids = _FakeJsProxy([]) # type: ignore[attr-defined] + yield window_module, pyfetch, posted_bodies + _clear_modules() + + +def _make_window(window_module: ModuleType) -> Any: + window = window_module.LocalBrowserWindow.__new__(window_module.LocalBrowserWindow) + window._auth_headers = {"x-narada-test": "1"} + window._base_url = "https://example.invalid/api" + window._browser_window_id = "test-window" + + async def _stub_auth_headers() -> dict[str, str]: + return {"x-narada-test": "1"} + + window._get_auth_headers = _stub_auth_headers + window._current_parent_run_ids = lambda: [] + return window + + +class TestReasoningBodyWiring: + """The `reasoning` arg flows through to the JSON body as `reasoningMode`.""" + + @pytest.mark.asyncio + async def test_present_when_reasoning_is_set( + self, + reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]], + ) -> None: + window_module, _pyfetch, posted_bodies = reimported_window + from narada_core.models import Agent, ReasoningEffort + + window = _make_window(window_module) + await window.dispatch_request( + prompt="solve this", + agent=Agent.CORE_AGENT, + reasoning=ReasoningEffort.MEDIUM, + ) + + assert posted_bodies[0]["reasoningMode"] == "medium" + + @pytest.mark.asyncio + async def test_absent_when_reasoning_is_none( + self, + reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]], + ) -> None: + window_module, _pyfetch, posted_bodies = reimported_window + from narada_core.models import Agent + + window = _make_window(window_module) + await window.dispatch_request( + prompt="solve this", + agent=Agent.CORE_AGENT, + ) + + # Absent (not null) — wire-compatible with backends predating the field. + assert "reasoningMode" not in posted_bodies[0] + + @pytest.mark.asyncio + async def test_each_effort_level_serializes_to_string( + self, + reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]], + ) -> None: + window_module, _pyfetch, posted_bodies = reimported_window + from narada_core.models import Agent, ReasoningEffort + + window = _make_window(window_module) + + for level in ( + ReasoningEffort.NONE, + ReasoningEffort.LOW, + ReasoningEffort.MEDIUM, + ReasoningEffort.HIGH, + ): + await window.dispatch_request( + prompt="x", + agent=Agent.CORE_AGENT, + reasoning=level, + ) + + seen = [b["reasoningMode"] for b in posted_bodies if "reasoningMode" in b] + assert seen == ["none", "low", "medium", "high"] + + +class TestReasoningRuntimeValidation: + """Misuse — `reasoning` paired with a non-Core agent — fails fast at runtime + with a clear message. The overload contract on the public `agent()` method + catches this at type-check time when callers use the enum, but the runtime + check covers the string-form (`agent="..."`) and untyped paths.""" + + @pytest.mark.asyncio + async def test_dispatch_request_rejects_non_core_agent_enum( + self, + reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]], + ) -> None: + window_module, _pyfetch, _posted = reimported_window + from narada_core.models import Agent, ReasoningEffort + + window = _make_window(window_module) + with pytest.raises(ValueError, match="agent=Agent.CORE_AGENT"): + await window.dispatch_request( + prompt="x", + agent=Agent.OPERATOR, + reasoning=ReasoningEffort.MEDIUM, # pyright: ignore[reportCallIssue] + ) + + @pytest.mark.asyncio + async def test_dispatch_request_rejects_string_agent( + self, + reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]], + ) -> None: + # String-form bypasses the type-checker overload, so the runtime check + # is the only safety net here. + window_module, _pyfetch, _posted = reimported_window + from narada_core.models import ReasoningEffort + + window = _make_window(window_module) + with pytest.raises(ValueError, match="agent=Agent.CORE_AGENT"): + await window.dispatch_request( + prompt="x", + agent="some-custom-agent", + reasoning=ReasoningEffort.HIGH, # pyright: ignore[reportCallIssue] + ) + + @pytest.mark.asyncio + async def test_agent_rejects_non_core_agent_enum( + self, + reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]], + ) -> None: + # The same constraint must hold on the higher-level `agent()` method. + window_module, _pyfetch, _posted = reimported_window + from narada_core.models import Agent, ReasoningEffort + + window = _make_window(window_module) + with pytest.raises(ValueError, match="agent=Agent.CORE_AGENT"): + await window.agent( + prompt="x", + agent=Agent.OPERATOR, + reasoning=ReasoningEffort.LOW, # pyright: ignore[reportCallIssue] + ) + + +class TestReasoningEffortEnum: + """The enum values are exactly what the backend expects.""" + + def test_values_match_backend_literal(self) -> None: + # The backend declares `reasoningMode: Literal["none", "low", + # "medium", "high"] | None`. If we drift, requests will start failing + # validation server-side. + from narada_core.models import ReasoningEffort + + assert ReasoningEffort.NONE.value == "none" + assert ReasoningEffort.LOW.value == "low" + assert ReasoningEffort.MEDIUM.value == "medium" + assert ReasoningEffort.HIGH.value == "high" + + def test_str_enum_serializes_inline(self) -> None: + # `StrEnum` values double as `str`, which is what `json.dumps` writes + # without any custom encoder. + from narada_core.models import ReasoningEffort + + assert json.dumps({"reasoningMode": ReasoningEffort.MEDIUM.value}) == ( + '{"reasoningMode": "medium"}' + ) diff --git a/packages/narada/pyproject.toml b/packages/narada/pyproject.toml index 59e1179..f220a03 100644 --- a/packages/narada/pyproject.toml +++ b/packages/narada/pyproject.toml @@ -1,13 +1,13 @@ [project] name = "narada" -version = "0.1.43" +version = "0.1.44" description = "Python client SDK for Narada" license = "Apache-2.0" readme = "README.md" authors = [{ name = "Narada", email = "support@narada.ai" }] requires-python = ">=3.12" dependencies = [ - "narada-core==0.0.18", + "narada-core==0.0.19", "aiohttp>=3.12.13", "playwright>=1.53.0", "rich>=14.0.0", diff --git a/packages/narada/src/narada/__init__.py b/packages/narada/src/narada/__init__.py index 3f6e9fe..1434183 100644 --- a/packages/narada/src/narada/__init__.py +++ b/packages/narada/src/narada/__init__.py @@ -7,7 +7,7 @@ NaradaUnsupportedBrowserError, UserAbortedError, ) -from narada_core.models import Agent, File, Response, ResponseContent +from narada_core.models import Agent, File, ReasoningEffort, Response, ResponseContent from narada.client import Narada from narada.config import BrowserConfig, ProxyConfig @@ -31,6 +31,7 @@ "NaradaTimeoutError", "NaradaUnsupportedBrowserError", "ProxyConfig", + "ReasoningEffort", "RemoteBrowserWindow", "render_html", "Response", diff --git a/packages/narada/src/narada/window.py b/packages/narada/src/narada/window.py index 29c359b..aa30065 100644 --- a/packages/narada/src/narada/window.py +++ b/packages/narada/src/narada/window.py @@ -7,7 +7,7 @@ from http import HTTPStatus from io import IOBase from pathlib import Path -from typing import IO, Any, Mapping, TypeGuard, TypeVar, overload, override +from typing import IO, Any, Literal, Mapping, TypeGuard, TypeVar, overload, override import aiohttp from narada_core.actions.models import ( @@ -53,6 +53,7 @@ Agent, File, McpServer, + ReasoningEffort, RemoteDispatchChatHistoryItem, Response, UserResourceCredentials, @@ -214,6 +215,59 @@ async def _upload_input_variable_file( uploaded_file = await self._upload_file_impl(file=input_variable_value) return _InputVariableFileReference(key=uploaded_file["key"], name=filename) + # `reasoning` is only valid with the Core Agent; these two overloads make + # that constraint type-checkable. Generic-agent calls fall through to the + # general overloads below, which do not accept a `reasoning` argument. + @overload + async def dispatch_request( + self, + *, + prompt: str, + agent: Literal[Agent.CORE_AGENT], + reasoning: ReasoningEffort | None = None, + clear_chat: bool | None = None, + generate_gif: bool | None = None, + output_schema: None = None, + previous_request_id: str | None = None, + chat_history: list[RemoteDispatchChatHistoryItem] | None = None, + additional_context: dict[str, str] | None = None, + attachment: File | None = None, + time_zone: str = "America/Los_Angeles", + user_resource_credentials: UserResourceCredentials | None = None, + mcp_servers: list[McpServer] | None = None, + secret_variables: dict[str, str] | None = None, + input_variables: Mapping[str, Any] | None = None, + callback_url: str | None = None, + callback_secret: str | None = None, + callback_headers: Mapping[str, Any] | None = None, + timeout: int = 1000, + ) -> Response[None]: ... + + @overload + async def dispatch_request( + self, + *, + prompt: str, + agent: Literal[Agent.CORE_AGENT], + reasoning: ReasoningEffort | None = None, + clear_chat: bool | None = None, + generate_gif: bool | None = None, + output_schema: type[_StructuredOutput], + previous_request_id: str | None = None, + chat_history: list[RemoteDispatchChatHistoryItem] | None = None, + additional_context: dict[str, str] | None = None, + attachment: File | None = None, + time_zone: str = "America/Los_Angeles", + user_resource_credentials: UserResourceCredentials | None = None, + mcp_servers: list[McpServer] | None = None, + secret_variables: dict[str, str] | None = None, + input_variables: Mapping[str, Any] | None = None, + callback_url: str | None = None, + callback_secret: str | None = None, + callback_headers: Mapping[str, Any] | None = None, + timeout: int = 1000, + ) -> Response[_StructuredOutput]: ... + @overload async def dispatch_request( self, @@ -267,6 +321,7 @@ async def dispatch_request( *, prompt: str, agent: Agent | str = Agent.OPERATOR, + reasoning: ReasoningEffort | None = None, clear_chat: bool | None = None, generate_gif: bool | None = None, output_schema: type[BaseModel] | None = None, @@ -288,6 +343,14 @@ async def dispatch_request( The higher-level `agent` method should be preferred for most use cases. """ + # The overloads enforce this at type-check time when callers use + # ``Agent.CORE_AGENT``; the runtime check covers string-form agents + # (``agent="..."``) and callers without a type checker. + if reasoning is not None and agent is not Agent.CORE_AGENT: + raise ValueError( + "`reasoning` is only supported with `agent=Agent.CORE_AGENT` " + f"(got agent={agent!r})" + ) deadline = time.monotonic() + timeout agent_prefix = ( @@ -333,6 +396,8 @@ async def dispatch_request( body["callbackSecret"] = callback_secret if callback_headers is not None: body["callbackHeaders"] = callback_headers + if reasoning is not None: + body["reasoningMode"] = reasoning.value try: async with aiohttp.ClientSession() as session: @@ -383,6 +448,44 @@ async def dispatch_request( except asyncio.TimeoutError: raise NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE(timeout) + # `reasoning` is only valid with the Core Agent. See `dispatch_request` + # above for the rationale; the same overload pattern is mirrored here. + @overload + async def agent( + self, + *, + prompt: str, + agent: Literal[Agent.CORE_AGENT], + reasoning: ReasoningEffort | None = None, + clear_chat: bool | None = None, + generate_gif: bool | None = None, + output_schema: None = None, + attachment: File | None = None, + time_zone: str = "America/Los_Angeles", + mcp_servers: list[McpServer] | None = None, + secret_variables: dict[str, str] | None = None, + input_variables: Mapping[str, Any] | None = None, + timeout: int = 1000, + ) -> AgentResponse[dict[str, Any]]: ... + + @overload + async def agent( + self, + *, + prompt: str, + agent: Literal[Agent.CORE_AGENT], + reasoning: ReasoningEffort | None = None, + clear_chat: bool | None = None, + generate_gif: bool | None = None, + output_schema: type[_StructuredOutput], + attachment: File | None = None, + time_zone: str = "America/Los_Angeles", + mcp_servers: list[McpServer] | None = None, + secret_variables: dict[str, str] | None = None, + input_variables: Mapping[str, Any] | None = None, + timeout: int = 1000, + ) -> AgentResponse[_StructuredOutput]: ... + @overload async def agent( self, @@ -422,6 +525,7 @@ async def agent( *, prompt: str, agent: Agent | str = Agent.OPERATOR, + reasoning: ReasoningEffort | None = None, clear_chat: bool | None = None, generate_gif: bool | None = None, output_schema: type[BaseModel] | None = None, @@ -433,19 +537,51 @@ async def agent( timeout: int = 1000, ) -> AgentResponse: """Invokes an agent in the Narada extension side panel chat.""" - remote_dispatch_response = await self.dispatch_request( - prompt=prompt, - agent=agent, - clear_chat=clear_chat, - generate_gif=generate_gif, - output_schema=output_schema, - attachment=attachment, - time_zone=time_zone, - mcp_servers=mcp_servers, - secret_variables=secret_variables, - input_variables=input_variables, - timeout=timeout, - ) + # Branch on `reasoning` so each call site binds a single, typed overload + # of `dispatch_request`. The validation also lives in `dispatch_request` + # itself (defense in depth + reachable when callers go straight to the + # low-level API), so the redundancy here is intentional. + if reasoning is None: + remote_dispatch_response = await self.dispatch_request( + prompt=prompt, + agent=agent, + clear_chat=clear_chat, + generate_gif=generate_gif, + output_schema=output_schema, + attachment=attachment, + time_zone=time_zone, + mcp_servers=mcp_servers, + secret_variables=secret_variables, + input_variables=input_variables, + timeout=timeout, + ) + else: + if agent is not Agent.CORE_AGENT: + raise ValueError( + "`reasoning` is only supported with `agent=Agent.CORE_AGENT` " + f"(got agent={agent!r})" + ) + # The CORE_AGENT-specific overloads of `dispatch_request` split on + # a narrower `output_schema` discriminator (None vs `type[T]`), + # which the impl's `type[BaseModel] | None` union doesn't cleanly + # narrow into without further branching. The public `agent()` + # overloads above already give callers correct return-type + # narrowing, so the internal forward call bypasses overload + # disambiguation on this single dimension. + remote_dispatch_response = await self.dispatch_request( # pyright: ignore[reportCallIssue] + prompt=prompt, + agent=agent, + reasoning=reasoning, + clear_chat=clear_chat, + generate_gif=generate_gif, + output_schema=output_schema, # pyright: ignore[reportArgumentType] + attachment=attachment, + time_zone=time_zone, + mcp_servers=mcp_servers, + secret_variables=secret_variables, + input_variables=input_variables, + timeout=timeout, + ) response_content = remote_dispatch_response["response"] assert response_content is not None diff --git a/uv.lock b/uv.lock index 0ee3814..b8493dc 100644 --- a/uv.lock +++ b/uv.lock @@ -312,7 +312,7 @@ wheels = [ [[package]] name = "narada" -version = "0.1.43" +version = "0.1.44" source = { editable = "packages/narada" } dependencies = [ { name = "aiohttp" }, @@ -345,7 +345,7 @@ dev = [ [[package]] name = "narada-core" -version = "0.0.18" +version = "0.0.19" source = { editable = "packages/narada-core" } dependencies = [ { name = "pydantic" }, @@ -356,7 +356,7 @@ requires-dist = [{ name = "pydantic", specifier = "==2.12.5" }] [[package]] name = "narada-pyodide" -version = "0.0.45a2" +version = "0.0.46a1" source = { editable = "packages/narada-pyodide" } dependencies = [ { name = "narada-core" }, From ba8044df6c44fcbce344f80d934291f29df3834e Mon Sep 17 00:00:00 2001 From: xTRam1 Date: Fri, 24 Apr 2026 16:05:54 -0700 Subject: [PATCH 04/13] test(sdk): hoist test imports to module top `narada_core.models` is not affected by the `_clear_modules()` reset (only `narada` and `pyodide.*` get popped), so the per-test `from narada_core.models import Agent, ReasoningEffort` repeats were unnecessary. Move them up. --- packages/narada-pyodide/tests/test_reasoning.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/packages/narada-pyodide/tests/test_reasoning.py b/packages/narada-pyodide/tests/test_reasoning.py index 5d7d53e..2844015 100644 --- a/packages/narada-pyodide/tests/test_reasoning.py +++ b/packages/narada-pyodide/tests/test_reasoning.py @@ -22,6 +22,7 @@ from unittest.mock import AsyncMock import pytest +from narada_core.models import Agent, ReasoningEffort def _clear_modules() -> None: @@ -141,8 +142,6 @@ async def test_present_when_reasoning_is_set( reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]], ) -> None: window_module, _pyfetch, posted_bodies = reimported_window - from narada_core.models import Agent, ReasoningEffort - window = _make_window(window_module) await window.dispatch_request( prompt="solve this", @@ -158,8 +157,6 @@ async def test_absent_when_reasoning_is_none( reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]], ) -> None: window_module, _pyfetch, posted_bodies = reimported_window - from narada_core.models import Agent - window = _make_window(window_module) await window.dispatch_request( prompt="solve this", @@ -175,8 +172,6 @@ async def test_each_effort_level_serializes_to_string( reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]], ) -> None: window_module, _pyfetch, posted_bodies = reimported_window - from narada_core.models import Agent, ReasoningEffort - window = _make_window(window_module) for level in ( @@ -207,8 +202,6 @@ async def test_dispatch_request_rejects_non_core_agent_enum( reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]], ) -> None: window_module, _pyfetch, _posted = reimported_window - from narada_core.models import Agent, ReasoningEffort - window = _make_window(window_module) with pytest.raises(ValueError, match="agent=Agent.CORE_AGENT"): await window.dispatch_request( @@ -225,8 +218,6 @@ async def test_dispatch_request_rejects_string_agent( # String-form bypasses the type-checker overload, so the runtime check # is the only safety net here. window_module, _pyfetch, _posted = reimported_window - from narada_core.models import ReasoningEffort - window = _make_window(window_module) with pytest.raises(ValueError, match="agent=Agent.CORE_AGENT"): await window.dispatch_request( @@ -242,8 +233,6 @@ async def test_agent_rejects_non_core_agent_enum( ) -> None: # The same constraint must hold on the higher-level `agent()` method. window_module, _pyfetch, _posted = reimported_window - from narada_core.models import Agent, ReasoningEffort - window = _make_window(window_module) with pytest.raises(ValueError, match="agent=Agent.CORE_AGENT"): await window.agent( @@ -260,8 +249,6 @@ def test_values_match_backend_literal(self) -> None: # The backend declares `reasoningMode: Literal["none", "low", # "medium", "high"] | None`. If we drift, requests will start failing # validation server-side. - from narada_core.models import ReasoningEffort - assert ReasoningEffort.NONE.value == "none" assert ReasoningEffort.LOW.value == "low" assert ReasoningEffort.MEDIUM.value == "medium" @@ -270,8 +257,6 @@ def test_values_match_backend_literal(self) -> None: def test_str_enum_serializes_inline(self) -> None: # `StrEnum` values double as `str`, which is what `json.dumps` writes # without any custom encoder. - from narada_core.models import ReasoningEffort - assert json.dumps({"reasoningMode": ReasoningEffort.MEDIUM.value}) == ( '{"reasoningMode": "medium"}' ) From 40848daf934f67bf772a65bd3adbee4ebae374c7 Mon Sep 17 00:00:00 2001 From: xTRam1 Date: Sun, 26 Apr 2026 09:51:19 -0700 Subject: [PATCH 05/13] Forward nested action_trace events unmodified MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removes _strip_nested_python_events. The function dropped events from any nested pythonAgentRun node and stamped truncated_event_count on it, citing "deep recursion blowing up persisted JSON size" as the reason. In practice the policy was always-on and uniform — a 1-event nested trace got stripped just as readily as a 10K-event one — and the frontend already owns size enforcement via MAX_NESTED_ACTION_TRACE_BYTES in python.worker.ts plus the workflow-run-detail consumer caps. Two layers of stripping is strictly worse: small nested traces lose their events for no benefit, and the dashboard's CollapsibleNestedTrace can't recover them (it does not lazy-fetch by request_id). Now: emit_sub_agent_call forwards action_trace_raw as-is. The frontend caps when actually over budget. Tests updated to assert events flow through unmodified. --- packages/narada-pyodide/src/narada/_trace.py | 35 +++----------- packages/narada-pyodide/tests/test_trace.py | 48 ++++++-------------- 2 files changed, 19 insertions(+), 64 deletions(-) diff --git a/packages/narada-pyodide/src/narada/_trace.py b/packages/narada-pyodide/src/narada/_trace.py index 06be667..22117a5 100644 --- a/packages/narada-pyodide/src/narada/_trace.py +++ b/packages/narada-pyodide/src/narada/_trace.py @@ -106,34 +106,6 @@ def emit_trace_event(event: dict[str, Any]) -> None: _logger.warning("trace event emission failed", exc_info=True) -def _strip_nested_python_events( - raw: list[dict[str, Any]] | None, -) -> list[dict[str, Any]] | None: - """Forward a nested action trace one level deep. Any ``pythonAgentRun`` - node inside retains its outer status/duration metadata but its ``events`` - list is dropped, preventing deep recursion from blowing up persisted - JSON size. A ``truncated_event_count`` field is left behind so the - dashboard can show that events were elided. - """ - if raw is None: - return None - - def strip(item: dict[str, Any]) -> dict[str, Any]: - if not isinstance(item, dict): - return item - if item.get("step_type") != "pythonAgentRun": - return item - events = item.get("events", []) - stripped = dict(item) - stripped["events"] = [] - stripped["truncated_event_count"] = ( - len(events) if isinstance(events, list) else 0 - ) - return stripped - - return [strip(item) for item in raw] - - def summarize_request(request: ExtensionActionRequest) -> dict[str, Any]: """Produce a bounded-size summary of an extension action request for display in the observability dashboard. Large payloads (sheet row values, @@ -243,7 +215,12 @@ def emit_sub_agent_call( if error_message is not None: event["error_message"] = truncate_error(error_message) if action_trace_raw is not None: - event["action_trace"] = _strip_nested_python_events(action_trace_raw) + # Forward the nested action trace as-is. Size/depth enforcement is the + # frontend's responsibility (`MAX_NESTED_ACTION_TRACE_BYTES` in + # python.worker.ts, plus the workflow-run-detail consumer caps). + # Stripping events here is redundant and prevents the dashboard from + # rendering small inline nested traces inline in CollapsibleNestedTrace. + event["action_trace"] = action_trace_raw emit_trace_event(event) diff --git a/packages/narada-pyodide/tests/test_trace.py b/packages/narada-pyodide/tests/test_trace.py index 4d5e34b..9c192d7 100644 --- a/packages/narada-pyodide/tests/test_trace.py +++ b/packages/narada-pyodide/tests/test_trace.py @@ -473,60 +473,38 @@ def _boom(_json: str) -> None: # --------------------------------------------------------------------------- -# Nested action_trace stripping: cap recursion depth to one level +# Nested action_trace forwarding: SDK forwards events as-is; size enforcement +# is the frontend's responsibility (MAX_NESTED_ACTION_TRACE_BYTES in +# python.worker.ts and the workflow-run-detail consumer caps). # --------------------------------------------------------------------------- -class TestStripNestedPythonEvents: - def test_passes_through_operator_items_unchanged(self) -> None: - raw = [{"url": "https://x", "action": "click Foo"}] - assert _trace._strip_nested_python_events(raw) == raw - - def test_passes_through_non_python_apa_items_unchanged(self) -> None: - raw = [{"step_type": "goToUrl", "url": "https://x", "description": "..."}] - assert _trace._strip_nested_python_events(raw) == raw - - def test_strips_events_from_nested_python_agent_run(self) -> None: +class TestNestedActionTraceForwarding: + def test_forwards_nested_python_events_unchanged(self, recorded_events) -> None: raw = [ { "step_type": "pythonAgentRun", "url": "", "status": "success", "duration_ms": 10, - "events": [{"kind": "stdout", "ts": 1, "text": "a"}], + "events": [ + {"kind": "stdout", "ts": 1, "text": "a"}, + {"kind": "stdout", "ts": 2, "text": "b"}, + ], } ] - stripped = _trace._strip_nested_python_events(raw) - assert stripped is not None - assert stripped[0]["events"] == [] - assert stripped[0]["truncated_event_count"] == 1 - - def test_none_passes_through(self) -> None: - assert _trace._strip_nested_python_events(None) is None - - def test_integrates_with_emit_sub_agent_call(self, recorded_events) -> None: _trace.emit_sub_agent_call( ts_start=1, agent_type="custom_python", prompt="nested", status="success", - action_trace_raw=[ - { - "step_type": "pythonAgentRun", - "url": "", - "status": "success", - "duration_ms": 10, - "events": [ - {"kind": "stdout", "ts": 1, "text": "a"}, - {"kind": "stdout", "ts": 2, "text": "b"}, - ], - } - ], + action_trace_raw=raw, ) event = recorded_events.events[0] inner = event["action_trace"][0] - assert inner["events"] == [] - assert inner["truncated_event_count"] == 2 + # Events are forwarded as-is; the SDK no longer strips them. + assert inner["events"] == raw[0]["events"] + assert "truncated_event_count" not in inner # --------------------------------------------------------------------------- From 7ef25b1c678f3fe7b1aa6c246097dd98942d8c2b Mon Sep 17 00:00:00 2001 From: xTRam1 Date: Tue, 28 Apr 2026 11:20:24 -0700 Subject: [PATCH 06/13] Remove pyodide trace tests --- packages/narada-pyodide/tests/README.md | 20 - packages/narada-pyodide/tests/__init__.py | 0 packages/narada-pyodide/tests/conftest.py | 56 -- packages/narada-pyodide/tests/test_trace.py | 575 -------------------- 4 files changed, 651 deletions(-) delete mode 100644 packages/narada-pyodide/tests/README.md delete mode 100644 packages/narada-pyodide/tests/__init__.py delete mode 100644 packages/narada-pyodide/tests/conftest.py delete mode 100644 packages/narada-pyodide/tests/test_trace.py diff --git a/packages/narada-pyodide/tests/README.md b/packages/narada-pyodide/tests/README.md deleted file mode 100644 index 5ba6499..0000000 --- a/packages/narada-pyodide/tests/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# narada-pyodide tests - -narada-pyodide and narada both publish under the top-level `narada` Python -package namespace. When both are installed in the same environment, the -workspace-installed `narada` package shadows narada-pyodide's source. This -is fine at runtime (Pyodide only installs narada-pyodide) but breaks -local unit testing. - -To run the unit tests locally from the workspace root: - -```bash -uv pip uninstall narada -uv run --package narada-pyodide pytest packages/narada-pyodide/tests/ -``` - -Re-running `uv sync` will reinstall the `narada` package and require the -uninstall step again. - -The `conftest.py` stubs the Pyodide-only `js` and `pyodide.*` imports so -the non-HTTP helpers in narada-pyodide can be exercised on host CPython. diff --git a/packages/narada-pyodide/tests/__init__.py b/packages/narada-pyodide/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/packages/narada-pyodide/tests/conftest.py b/packages/narada-pyodide/tests/conftest.py deleted file mode 100644 index 47abb32..0000000 --- a/packages/narada-pyodide/tests/conftest.py +++ /dev/null @@ -1,56 +0,0 @@ -"""Pytest fixtures shared across narada-pyodide tests. - -narada-pyodide is designed to run inside a Pyodide web worker; several of its -transitive imports (``js``, ``pyodide.ffi``, ``pyodide.http``) are only -available in that environment. To make the pure-Python unit tests runnable on -a host CPython interpreter we stub those modules before any narada-pyodide -code is imported. The real Pyodide runtime will obviously provide them. -""" - -from __future__ import annotations - -import json -import sys -from collections.abc import Iterator -from typing import Any -from unittest.mock import MagicMock - -# Stub Pyodide-only modules. Must happen before `from narada import _trace`. -for _mod in ("js", "pyodide", "pyodide.ffi", "pyodide.http"): - if _mod not in sys.modules: - sys.modules[_mod] = MagicMock() - -import pytest # noqa: E402 - -from narada import _trace # noqa: E402 - - -class RecordingEmitter: - """Captures every event forwarded by ``_trace.emit_trace_event`` during a - test so assertions can inspect the JSON that would reach the JS harness. - """ - - def __init__(self) -> None: - self.events: list[dict[str, Any]] = [] - - def __call__(self, event_json: str) -> None: - # Round-trip through json to catch non-serialisable payloads early. - self.events.append(json.loads(event_json)) - - -@pytest.fixture -def recorded_events() -> Iterator[RecordingEmitter]: - """Replace the JS-harness-injected ``_narada_emit_trace_event`` with a - recorder for the duration of a test, restoring the original binding - afterwards. - """ - emitter = RecordingEmitter() - previous = getattr(_trace, "_narada_emit_trace_event", None) - _trace._narada_emit_trace_event = emitter # type: ignore[attr-defined] - try: - yield emitter - finally: - if previous is None: - delattr(_trace, "_narada_emit_trace_event") - else: - _trace._narada_emit_trace_event = previous # type: ignore[attr-defined] diff --git a/packages/narada-pyodide/tests/test_trace.py b/packages/narada-pyodide/tests/test_trace.py deleted file mode 100644 index 9c192d7..0000000 --- a/packages/narada-pyodide/tests/test_trace.py +++ /dev/null @@ -1,575 +0,0 @@ -"""Tests for the private ``narada._trace`` module. - -Covers the pure helpers (truncation, request/response summarisation) plus the -``emit_*`` functions, asserting that the JSON payloads emitted to the JS -harness match the ``PythonTraceEvent`` Pydantic schema defined in -``narada_core.actions.models``. -""" - -from __future__ import annotations - -import pytest -from narada_core.actions.models import ( - AgenticMouseActionRequest, - AgenticSelectorRequest, - CloseWindowRequest, - GetFullHtmlRequest, - GetFullHtmlResponse, - GetScreenshotRequest, - GetScreenshotResponse, - GetSimplifiedHtmlRequest, - GetSimplifiedHtmlResponse, - GetUrlRequest, - GetUrlResponse, - GoToUrlRequest, - PrintMessageRequest, - PythonAgentRunTrace, - ReadGoogleSheetRequest, - ReadGoogleSheetResponse, - WriteGoogleSheetRequest, - parse_action_trace, -) - -from narada import _trace - - -# --------------------------------------------------------------------------- -# Truncation -# --------------------------------------------------------------------------- - - -class TestTruncate: - def test_returns_none_for_none(self) -> None: - assert _trace.truncate(None, 10) is None - - def test_preserves_short_strings(self) -> None: - assert _trace.truncate("hello", 10) == "hello" - - def test_preserves_exact_length(self) -> None: - assert _trace.truncate("1234567890", 10) == "1234567890" - - def test_truncates_long_strings_with_ellipsis(self) -> None: - result = _trace.truncate("abcdefghij", 5) - assert result is not None - assert len(result) == 5 - assert result.endswith("\u2026") - assert result.startswith("abcd") - - def test_truncate_prompt_falls_back_to_empty(self) -> None: - assert _trace.truncate_prompt("") == "" - - def test_truncate_error_bounded(self) -> None: - long = "x" * 5000 - result = _trace.truncate_error(long) - assert len(result) == 1000 - assert result.endswith("\u2026") - - -# --------------------------------------------------------------------------- -# summarize_request / summarize_response -# --------------------------------------------------------------------------- - - -class TestSummarizeRequest: - def test_go_to_url(self) -> None: - req = GoToUrlRequest(url="https://example.com", new_tab=True) - assert _trace.summarize_request(req) == { - "url": "https://example.com", - "new_tab": True, - } - - @pytest.mark.parametrize( - "request_instance", - [ - GetUrlRequest(), - GetScreenshotRequest(), - GetFullHtmlRequest(), - GetSimplifiedHtmlRequest(), - CloseWindowRequest(), - ], - ) - def test_parameterless_requests_return_empty( - self, request_instance: object - ) -> None: - assert _trace.summarize_request(request_instance) == {} # type: ignore[arg-type] - - def test_read_google_sheet(self) -> None: - req = ReadGoogleSheetRequest(spreadsheet_id="abc123", range="Sheet1!A1:B10") - assert _trace.summarize_request(req) == { - "spreadsheet_id": "abc123", - "range": "Sheet1!A1:B10", - } - - def test_write_google_sheet_reports_row_count_not_values(self) -> None: - big_values = [["r"] * 5 for _ in range(847)] - req = WriteGoogleSheetRequest( - spreadsheet_id="abc123", range="Sheet1!A1:E847", values=big_values - ) - summary = _trace.summarize_request(req) - assert summary == { - "spreadsheet_id": "abc123", - "range": "Sheet1!A1:E847", - "row_count": 847, - } - # Explicitly guard against regressions that leak row payloads. - assert "values" not in summary - - def test_print_message_truncates_long_messages(self) -> None: - long_msg = "x" * 2000 - summary = _trace.summarize_request(PrintMessageRequest(message=long_msg)) - truncated = summary["message"] - assert isinstance(truncated, str) - assert len(truncated) == 500 - assert truncated.endswith("\u2026") - - def test_agentic_selector_reports_action_type_and_truncates_query(self) -> None: - req = AgenticSelectorRequest( - action={"type": "click"}, - selectors={"id": "submit-btn"}, - fallback_operator_query="y" * 1000, - ) - summary = _trace.summarize_request(req) - assert summary["action_type"] == "click" - assert len(summary["fallback_operator_query"]) == 200 - # Selectors are intentionally omitted (not user-useful in trace view). - assert "selectors" not in summary - - def test_agentic_mouse_action(self) -> None: - req = AgenticMouseActionRequest( - action={"type": "click"}, - recorded_click={"x": 1, "y": 2, "viewport": {"width": 10, "height": 20}}, - fallback_operator_query="click the button", - resize_window=False, - ) - summary = _trace.summarize_request(req) - assert summary == { - "action_type": "click", - "fallback_operator_query": "click the button", - } - - -class TestSummarizeResponse: - def test_get_url_returns_url(self) -> None: - req = GetUrlRequest() - resp = GetUrlResponse(url="https://example.com/page") - assert _trace.summarize_response(req, resp) == { - "url": "https://example.com/page" - } - - def test_get_screenshot_returns_fixed_description(self) -> None: - req = GetScreenshotRequest() - resp = GetScreenshotResponse( - base64_content="...huge blob...", - name="page.png", - mime_type="image/png", - timestamp="2025-01-01T00:00:00Z", - ) - summary = _trace.summarize_response(req, resp) - assert summary == {"description": "Took screenshot of the page"} - - def test_full_html_returns_fixed_description(self) -> None: - summary = _trace.summarize_response( - GetFullHtmlRequest(), GetFullHtmlResponse(html="...massive...") - ) - assert summary == {"description": "Got the full HTML of the page"} - - def test_simplified_html_returns_fixed_description(self) -> None: - summary = _trace.summarize_response( - GetSimplifiedHtmlRequest(), - GetSimplifiedHtmlResponse(html="short"), - ) - assert summary == {"description": "Got the simplified HTML of the page"} - - def test_read_google_sheet_reports_dimensions(self) -> None: - req = ReadGoogleSheetRequest(spreadsheet_id="x", range="A1:C5") - resp = ReadGoogleSheetResponse(values=[["a", "b", "c"], ["d", "e", "f"], ["g"]]) - assert _trace.summarize_response(req, resp) == { - "row_count": 3, - "column_count": 3, - } - - def test_read_google_sheet_empty_values(self) -> None: - req = ReadGoogleSheetRequest(spreadsheet_id="x", range="A1:C5") - resp = ReadGoogleSheetResponse(values=[]) - assert _trace.summarize_response(req, resp) == { - "row_count": 0, - "column_count": 0, - } - - def test_write_google_sheet_returns_none(self) -> None: - req = WriteGoogleSheetRequest(spreadsheet_id="x", range="A1", values=[["v"]]) - assert _trace.summarize_response(req, None) is None - - def test_close_window_returns_none(self) -> None: - assert _trace.summarize_response(CloseWindowRequest(), None) is None - - -# --------------------------------------------------------------------------- -# Event emitters -# --------------------------------------------------------------------------- - - -class TestEmitSubAgentCall: - def test_success_with_action_trace(self, recorded_events) -> None: - _trace.emit_sub_agent_call( - ts_start=1000, - agent_type="operator", - prompt="Find leads", - status="success", - request_id="req_abc", - action_trace_raw=[{"url": "https://sf.com", "action": "click Leads"}], - ) - (event,) = recorded_events.events - assert event["kind"] == "subAgentCall" - assert event["ts_start"] == 1000 - assert event["ts_end"] >= 1000 - assert event["agent_type"] == "operator" - assert event["prompt"] == "Find leads" - assert event["status"] == "success" - assert event["request_id"] == "req_abc" - assert event["action_trace"] == [ - {"url": "https://sf.com", "action": "click Leads"} - ] - assert "error_message" not in event - - def test_success_without_action_trace_omits_field(self, recorded_events) -> None: - _trace.emit_sub_agent_call( - ts_start=1000, agent_type="operator", prompt="hi", status="success" - ) - (event,) = recorded_events.events - assert "action_trace" not in event - assert "request_id" not in event - - def test_timeout_includes_error_message(self, recorded_events) -> None: - _trace.emit_sub_agent_call( - ts_start=1000, - agent_type="operator", - prompt="hi", - status="timeout", - error_message="Timed out after 60s", - ) - (event,) = recorded_events.events - assert event["status"] == "timeout" - assert event["error_message"] == "Timed out after 60s" - - def test_error_truncates_error_message(self, recorded_events) -> None: - _trace.emit_sub_agent_call( - ts_start=1000, - agent_type="operator", - prompt="hi", - status="error", - error_message="x" * 5000, - ) - (event,) = recorded_events.events - assert len(event["error_message"]) == 1000 - - def test_prompt_is_truncated(self, recorded_events) -> None: - _trace.emit_sub_agent_call( - ts_start=1000, - agent_type="operator", - prompt="y" * 1000, - status="success", - ) - (event,) = recorded_events.events - assert len(event["prompt"]) == 500 - - -class TestEmitExtensionAction: - def test_success_with_result_summary(self, recorded_events) -> None: - req = GetUrlRequest() - resp = GetUrlResponse(url="https://x.com") - _trace.emit_extension_action( - ts_start=2000, request=req, status="success", response=resp - ) - (event,) = recorded_events.events - assert event["kind"] == "extensionAction" - assert event["action_name"] == "get_url" - assert event["request_summary"] == {} - assert event["result_summary"] == {"url": "https://x.com"} - assert event["status"] == "success" - - def test_success_without_result_summary_omits_field(self, recorded_events) -> None: - req = WriteGoogleSheetRequest( - spreadsheet_id="abc", range="A1:B2", values=[["1", "2"], ["3", "4"]] - ) - _trace.emit_extension_action(ts_start=2000, request=req, status="success") - (event,) = recorded_events.events - assert event["request_summary"] == { - "spreadsheet_id": "abc", - "range": "A1:B2", - "row_count": 2, - } - assert "result_summary" not in event - - def test_timeout(self, recorded_events) -> None: - _trace.emit_extension_action( - ts_start=0, - request=GoToUrlRequest(url="https://a.b", new_tab=False), - status="timeout", - error_message="Timed out", - ) - (event,) = recorded_events.events - assert event["status"] == "timeout" - assert event["action_name"] == "go_to_url" - - def test_error(self, recorded_events) -> None: - _trace.emit_extension_action( - ts_start=0, - request=CloseWindowRequest(), - status="error", - error_message="permission denied", - ) - (event,) = recorded_events.events - assert event["status"] == "error" - assert event["error_message"] == "permission denied" - - -class TestEmitSideEffect: - def test_download_file(self, recorded_events) -> None: - _trace.emit_side_effect( - effect_type="download_file", description="Downloaded file: report.pdf" - ) - (event,) = recorded_events.events - assert event["kind"] == "sideEffect" - assert event["effect_type"] == "download_file" - assert event["description"] == "Downloaded file: report.pdf" - assert "ts" in event - - def test_render_html(self, recorded_events) -> None: - _trace.emit_side_effect( - effect_type="render_html", description="Rendered HTML in a new tab" - ) - (event,) = recorded_events.events - assert event["effect_type"] == "render_html" - - -# --------------------------------------------------------------------------- -# End-to-end schema validation: every event kind produced by the emitters -# round-trips cleanly through the ``PythonAgentRunTrace`` Pydantic model and -# the ``parse_action_trace`` entry point used by downstream consumers. -# --------------------------------------------------------------------------- - - -class TestPythonAgentRunTraceRoundtrip: - def test_every_event_kind_parses(self, recorded_events) -> None: - _trace.emit_sub_agent_call( - ts_start=1000, - agent_type="operator", - prompt="Find leads", - status="success", - request_id="req_abc", - action_trace_raw=[{"url": "https://sf.com", "action": "click Leads"}], - ) - _trace.emit_extension_action( - ts_start=2000, - request=GetScreenshotRequest(), - status="success", - response=GetScreenshotResponse( - base64_content="ignored", - name="page.png", - mime_type="image/png", - timestamp="now", - ), - ) - _trace.emit_side_effect( - effect_type="download_file", description="Downloaded file: leads.csv" - ) - - # Assemble a representative PythonAgentRunTrace containing the emitted - # events alongside stdout / stderr events (which are synthesised by - # the JS-side runnable, not the SDK). - stdout_stderr_events = [ - {"kind": "stdout", "ts": 500, "text": "starting"}, - {"kind": "stderr", "ts": 2500, "text": "deprecation warning"}, - ] - events = stdout_stderr_events + recorded_events.events - events.sort(key=lambda e: e.get("ts", e.get("ts_start", 0))) - - raw = [ - { - "step_type": "pythonAgentRun", - "url": "https://app.narada.ai/agent", - "status": "success", - "duration_ms": 3000, - "events": events, - } - ] - trace = parse_action_trace(raw) - assert len(trace) == 1 - (node,) = trace - assert isinstance(node, PythonAgentRunTrace) - # Order reflects the real wall-clock timestamps: the emitters stamp - # events with ``now_ms()`` at emit time, which in this test runs much - # later than the synthetic stdout/stderr timestamps below. The side - # effect therefore sorts after ``stderr`` (ts=2500). - assert [e.kind for e in node.events] == [ - "stdout", - "subAgentCall", - "extensionAction", - "stderr", - "sideEffect", - ] - # Nested action_trace rehydrates correctly as an OperatorActionTrace. - sub_call = node.events[1] - assert sub_call.kind == "subAgentCall" - assert sub_call.action_trace is not None - assert sub_call.action_trace[0].url == "https://sf.com" - - def test_error_status_parses(self) -> None: - raw = [ - { - "step_type": "pythonAgentRun", - "url": "https://x", - "status": "error", - "duration_ms": 120, - "error_message": "ZeroDivisionError", - "events": [], - } - ] - trace = parse_action_trace(raw) - assert isinstance(trace[0], PythonAgentRunTrace) - assert trace[0].status == "error" - assert trace[0].error_message == "ZeroDivisionError" - - -# --------------------------------------------------------------------------- -# Defensive emit: observability must never break the user's agent run -# --------------------------------------------------------------------------- - - -class TestEmitDefensive: - def test_non_serialisable_payload_is_stringified_not_raised( - self, recorded_events - ) -> None: - """A stray datetime / set / custom object in a summary should not crash - user code mid-run. ``default=str`` stringifies and the event still - reaches the harness.""" - import datetime as _dt - - _trace.emit_trace_event( - { - "kind": "stdout", - "ts": _dt.datetime(2026, 1, 1), # non-serialisable in std json - "text": "hello", - } - ) - # Event was recorded (ts got stringified by default=str). - assert len(recorded_events.events) == 1 - assert isinstance(recorded_events.events[0]["ts"], str) - - def test_harness_raising_does_not_propagate(self, monkeypatch) -> None: - """If the JS-injected emitter raises, we swallow and log rather than - propagate — tracing failures must not break the agent run.""" - - def _boom(_json: str) -> None: - raise RuntimeError("bridge down") - - # `_narada_emit_trace_event` is injected by the JS harness at runtime - # (TYPE_CHECKING stub only in source); set without `raising` so the - # assignment succeeds even when the attribute isn't yet bound. - monkeypatch.setattr(_trace, "_narada_emit_trace_event", _boom, raising=False) - # Must not raise. - _trace.emit_trace_event({"kind": "stdout", "ts": 1, "text": "hi"}) - - -# --------------------------------------------------------------------------- -# Nested action_trace forwarding: SDK forwards events as-is; size enforcement -# is the frontend's responsibility (MAX_NESTED_ACTION_TRACE_BYTES in -# python.worker.ts and the workflow-run-detail consumer caps). -# --------------------------------------------------------------------------- - - -class TestNestedActionTraceForwarding: - def test_forwards_nested_python_events_unchanged(self, recorded_events) -> None: - raw = [ - { - "step_type": "pythonAgentRun", - "url": "", - "status": "success", - "duration_ms": 10, - "events": [ - {"kind": "stdout", "ts": 1, "text": "a"}, - {"kind": "stdout", "ts": 2, "text": "b"}, - ], - } - ] - _trace.emit_sub_agent_call( - ts_start=1, - agent_type="custom_python", - prompt="nested", - status="success", - action_trace_raw=raw, - ) - event = recorded_events.events[0] - inner = event["action_trace"][0] - # Events are forwarded as-is; the SDK no longer strips them. - assert inner["events"] == raw[0]["events"] - assert "truncated_event_count" not in inner - - -# --------------------------------------------------------------------------- -# Pydantic invariants on new event models -# --------------------------------------------------------------------------- - - -class TestPythonEventInvariants: - def test_sub_agent_call_rejects_ts_end_before_ts_start(self) -> None: - from narada_core.actions.models import PythonSubAgentCallEvent - from pydantic import ValidationError - - with pytest.raises(ValidationError, match="ts_end"): - PythonSubAgentCallEvent( - ts_start=1000, - ts_end=999, - agent_type="operator", - prompt="p", - status="success", - ) - - def test_extension_action_rejects_ts_end_before_ts_start(self) -> None: - from narada_core.actions.models import PythonExtensionActionEvent - from pydantic import ValidationError - - with pytest.raises(ValidationError, match="ts_end"): - PythonExtensionActionEvent( - ts_start=1000, - ts_end=999, - action_name="get_url", - request_summary={}, - status="success", - ) - - def test_python_agent_run_rejects_negative_duration(self) -> None: - from pydantic import ValidationError - - with pytest.raises(ValidationError): - PythonAgentRunTrace( - url="", - status="success", - duration_ms=-1, - events=[], - ) - - -# --------------------------------------------------------------------------- -# Deterministic parse_action_trace selection -# --------------------------------------------------------------------------- - - -class TestParseActionTraceDispatch: - def test_empty_list_parses_as_apa(self) -> None: - result = parse_action_trace([]) - assert result == [] - - def test_step_type_routes_to_apa_adapter(self) -> None: - result = parse_action_trace( - [{"step_type": "goToUrl", "url": "https://x", "description": "..."}] - ) - assert result[0].step_type == "goToUrl" - - def test_action_plus_url_routes_to_operator_adapter(self) -> None: - from narada_core.actions.models import OperatorActionTraceItem - - result = parse_action_trace([{"url": "https://x", "action": "click Foo"}]) - assert isinstance(result[0], OperatorActionTraceItem) - assert result[0].action == "click Foo" From cd05fb7b7b44058f3ad6d95b6d731ff080373b88 Mon Sep 17 00:00:00 2001 From: xTRam1 Date: Tue, 28 Apr 2026 18:01:35 -0700 Subject: [PATCH 07/13] Remove pyodide reasoning test --- .../narada-pyodide/tests/test_reasoning.py | 262 ------------------ 1 file changed, 262 deletions(-) delete mode 100644 packages/narada-pyodide/tests/test_reasoning.py diff --git a/packages/narada-pyodide/tests/test_reasoning.py b/packages/narada-pyodide/tests/test_reasoning.py deleted file mode 100644 index 2844015..0000000 --- a/packages/narada-pyodide/tests/test_reasoning.py +++ /dev/null @@ -1,262 +0,0 @@ -"""Tests for the `reasoning` parameter on the Core Agent. - -These exercise the `narada-pyodide` window because it is the only package with -a runnable test harness today; the impl in the sibling `narada` package shares -the same request-body wiring and runtime check, so coverage here verifies the -behavior across both code paths. - -We mirror `test_cloud_browser.py`'s module-clearing pattern: each test gets a -fresh import of `narada.window` with a freshly stubbed `pyodide.http.pyfetch`, -because cached module references from earlier tests would otherwise leak into -this file when the suite runs in alphabetical order. -""" - -from __future__ import annotations - -import importlib -import json -import sys -from collections.abc import Iterator -from types import ModuleType, SimpleNamespace -from typing import Any -from unittest.mock import AsyncMock - -import pytest -from narada_core.models import Agent, ReasoningEffort - - -def _clear_modules() -> None: - for name in list(sys.modules): - if name == "narada" or name.startswith("narada."): - sys.modules.pop(name, None) - for name in ("js", "pyodide", "pyodide.http", "pyodide.ffi"): - sys.modules.pop(name, None) - - -class _FakeResponse: - def __init__(self, *, ok: bool = True, json_data: object = None) -> None: - self.ok = ok - self.status = 200 - self._json_data = json_data - - async def json(self) -> object: - return self._json_data - - async def text(self) -> str: - return "" - - -def _make_pyfetch_recorder() -> tuple[AsyncMock, list[dict[str, Any]]]: - """Build an `AsyncMock` for `pyfetch` that captures every JSON body posted - to /remote-dispatch and returns a canned success response on the poll.""" - posted_bodies: list[dict[str, Any]] = [] - - async def _impl(url: str, **kwargs: Any) -> _FakeResponse: - if "body" in kwargs: - posted_bodies.append(json.loads(kwargs["body"])) - if url.endswith("/remote-dispatch"): - return _FakeResponse(json_data={"requestId": "req-test"}) - return _FakeResponse( - json_data={ - "status": "success", - "response": { - "text": "ok", - "output": {"type": "text", "content": "ok"}, - }, - "createdAt": "now", - "completedAt": "now", - "usage": {"actions": 0, "credits": 0.0}, - } - ) - - pyfetch = AsyncMock(side_effect=_impl) - return pyfetch, posted_bodies - - -@pytest.fixture -def reimported_window( - monkeypatch: pytest.MonkeyPatch, -) -> Iterator[tuple[ModuleType, AsyncMock, list[dict[str, Any]]]]: - """Force a fresh import of `narada.window` after planting freshly-mocked - Pyodide-bridge modules. Yields the window module, the captured `pyfetch` - mock, and the list that records every posted JSON body. - """ - _clear_modules() - - js_module = ModuleType("js") - js_module.AbortController = SimpleNamespace( # type: ignore[attr-defined] - new=lambda: SimpleNamespace(signal=object(), abort=lambda: None) - ) - js_module.setTimeout = lambda callback, timeout: None # type: ignore[attr-defined] - - pyodide_module = ModuleType("pyodide") - pyodide_module.__path__ = [] # type: ignore[attr-defined] - - pyfetch, posted_bodies = _make_pyfetch_recorder() - pyodide_http_module = ModuleType("pyodide.http") - pyodide_http_module.pyfetch = pyfetch # type: ignore[attr-defined] - - pyodide_ffi_module = ModuleType("pyodide.ffi") - - class _FakeJsProxy: - def __init__(self, value: object) -> None: - self._value = value - - def to_py(self) -> object: - return self._value - - pyodide_ffi_module.JsProxy = _FakeJsProxy # type: ignore[attr-defined] - pyodide_ffi_module.create_once_callable = lambda fn: fn # type: ignore[attr-defined] - - monkeypatch.setitem(sys.modules, "js", js_module) - monkeypatch.setitem(sys.modules, "pyodide", pyodide_module) - monkeypatch.setitem(sys.modules, "pyodide.http", pyodide_http_module) - monkeypatch.setitem(sys.modules, "pyodide.ffi", pyodide_ffi_module) - - window_module = importlib.import_module("narada.window") - window_module._narada_parent_run_ids = _FakeJsProxy([]) # type: ignore[attr-defined] - yield window_module, pyfetch, posted_bodies - _clear_modules() - - -def _make_window(window_module: ModuleType) -> Any: - window = window_module.LocalBrowserWindow.__new__(window_module.LocalBrowserWindow) - window._auth_headers = {"x-narada-test": "1"} - window._base_url = "https://example.invalid/api" - window._browser_window_id = "test-window" - - async def _stub_auth_headers() -> dict[str, str]: - return {"x-narada-test": "1"} - - window._get_auth_headers = _stub_auth_headers - window._current_parent_run_ids = lambda: [] - return window - - -class TestReasoningBodyWiring: - """The `reasoning` arg flows through to the JSON body as `reasoningMode`.""" - - @pytest.mark.asyncio - async def test_present_when_reasoning_is_set( - self, - reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]], - ) -> None: - window_module, _pyfetch, posted_bodies = reimported_window - window = _make_window(window_module) - await window.dispatch_request( - prompt="solve this", - agent=Agent.CORE_AGENT, - reasoning=ReasoningEffort.MEDIUM, - ) - - assert posted_bodies[0]["reasoningMode"] == "medium" - - @pytest.mark.asyncio - async def test_absent_when_reasoning_is_none( - self, - reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]], - ) -> None: - window_module, _pyfetch, posted_bodies = reimported_window - window = _make_window(window_module) - await window.dispatch_request( - prompt="solve this", - agent=Agent.CORE_AGENT, - ) - - # Absent (not null) — wire-compatible with backends predating the field. - assert "reasoningMode" not in posted_bodies[0] - - @pytest.mark.asyncio - async def test_each_effort_level_serializes_to_string( - self, - reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]], - ) -> None: - window_module, _pyfetch, posted_bodies = reimported_window - window = _make_window(window_module) - - for level in ( - ReasoningEffort.NONE, - ReasoningEffort.LOW, - ReasoningEffort.MEDIUM, - ReasoningEffort.HIGH, - ): - await window.dispatch_request( - prompt="x", - agent=Agent.CORE_AGENT, - reasoning=level, - ) - - seen = [b["reasoningMode"] for b in posted_bodies if "reasoningMode" in b] - assert seen == ["none", "low", "medium", "high"] - - -class TestReasoningRuntimeValidation: - """Misuse — `reasoning` paired with a non-Core agent — fails fast at runtime - with a clear message. The overload contract on the public `agent()` method - catches this at type-check time when callers use the enum, but the runtime - check covers the string-form (`agent="..."`) and untyped paths.""" - - @pytest.mark.asyncio - async def test_dispatch_request_rejects_non_core_agent_enum( - self, - reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]], - ) -> None: - window_module, _pyfetch, _posted = reimported_window - window = _make_window(window_module) - with pytest.raises(ValueError, match="agent=Agent.CORE_AGENT"): - await window.dispatch_request( - prompt="x", - agent=Agent.OPERATOR, - reasoning=ReasoningEffort.MEDIUM, # pyright: ignore[reportCallIssue] - ) - - @pytest.mark.asyncio - async def test_dispatch_request_rejects_string_agent( - self, - reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]], - ) -> None: - # String-form bypasses the type-checker overload, so the runtime check - # is the only safety net here. - window_module, _pyfetch, _posted = reimported_window - window = _make_window(window_module) - with pytest.raises(ValueError, match="agent=Agent.CORE_AGENT"): - await window.dispatch_request( - prompt="x", - agent="some-custom-agent", - reasoning=ReasoningEffort.HIGH, # pyright: ignore[reportCallIssue] - ) - - @pytest.mark.asyncio - async def test_agent_rejects_non_core_agent_enum( - self, - reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]], - ) -> None: - # The same constraint must hold on the higher-level `agent()` method. - window_module, _pyfetch, _posted = reimported_window - window = _make_window(window_module) - with pytest.raises(ValueError, match="agent=Agent.CORE_AGENT"): - await window.agent( - prompt="x", - agent=Agent.OPERATOR, - reasoning=ReasoningEffort.LOW, # pyright: ignore[reportCallIssue] - ) - - -class TestReasoningEffortEnum: - """The enum values are exactly what the backend expects.""" - - def test_values_match_backend_literal(self) -> None: - # The backend declares `reasoningMode: Literal["none", "low", - # "medium", "high"] | None`. If we drift, requests will start failing - # validation server-side. - assert ReasoningEffort.NONE.value == "none" - assert ReasoningEffort.LOW.value == "low" - assert ReasoningEffort.MEDIUM.value == "medium" - assert ReasoningEffort.HIGH.value == "high" - - def test_str_enum_serializes_inline(self) -> None: - # `StrEnum` values double as `str`, which is what `json.dumps` writes - # without any custom encoder. - assert json.dumps({"reasoningMode": ReasoningEffort.MEDIUM.value}) == ( - '{"reasoningMode": "medium"}' - ) From 66e9fb3c89d17e743819d69a4cec779f83f668eb Mon Sep 17 00:00:00 2001 From: xTRam1 Date: Wed, 29 Apr 2026 19:03:02 -0700 Subject: [PATCH 08/13] Address Python trace review comments --- .../src/narada_core/actions/models.py | 4 - packages/narada-pyodide/src/narada/_trace.py | 150 ++---------------- 2 files changed, 16 insertions(+), 138 deletions(-) diff --git a/packages/narada-core/src/narada_core/actions/models.py b/packages/narada-core/src/narada_core/actions/models.py index 4e46ee2..24e03ec 100644 --- a/packages/narada-core/src/narada_core/actions/models.py +++ b/packages/narada-core/src/narada_core/actions/models.py @@ -308,10 +308,6 @@ class PythonAgentRunTrace(BaseModel): duration_ms: NonNegativeInt events: list[PythonTraceEvent] error_message: str | None = None - # Set by the runtime when it caps the number of buffered events (see - # `python.worker.ts`). Informational only; the dashboard surfaces it so - # users know their trace is partial. - truncated_event_count: NonNegativeInt | None = None ApaStepTrace = Annotated[ diff --git a/packages/narada-pyodide/src/narada/_trace.py b/packages/narada-pyodide/src/narada/_trace.py index 22117a5..0586a48 100644 --- a/packages/narada-pyodide/src/narada/_trace.py +++ b/packages/narada-pyodide/src/narada/_trace.py @@ -18,49 +18,16 @@ import time from typing import TYPE_CHECKING, Any, Literal -from narada_core.actions.models import ( - AgenticMouseActionRequest, - AgenticSelectorRequest, - CloseWindowRequest, - ExtensionActionRequest, - GetFullHtmlRequest, - GetScreenshotRequest, - GetSimplifiedHtmlRequest, - GetUrlRequest, - GetUrlResponse, - GoToUrlRequest, - PrintMessageRequest, - ReadGoogleSheetRequest, - ReadGoogleSheetResponse, - WriteGoogleSheetRequest, -) +from narada_core.actions.models import ExtensionActionRequest from pydantic import BaseModel if TYPE_CHECKING: - # Injected by the JavaScript harness at worker startup (see - # `frontend/src/lib/apa/python/python.worker.ts`). narada-pyodide is + # Injected by the JavaScript harness at worker startup. narada-pyodide is # only ever imported under a Pyodide worker that has registered this # builtin; there is no non-Pyodide code path. def _narada_emit_trace_event(event_json: str) -> None: ... -# Hard caps on payload sizes carried in trace events. Values are large enough -# that typical prompts and error messages survive intact but small enough to -# bound worst-case persisted actionTrace JSON. -_MAX_PROMPT_CHARS = 500 -_MAX_MESSAGE_CHARS = 500 -_MAX_ERROR_CHARS = 1000 -_MAX_QUERY_CHARS = 200 - -# When a sub-agent's response includes its own action trace (for example, the -# operator's step-by-step actions), we forward that trace one level deep so -# the dashboard can expand it. We do not forward deeper nesting — Python -# agents that delegate into other Python agents would otherwise produce -# exponentially-sized persisted traces. -_MAX_NESTED_ACTION_TRACE_DEPTH = 1 - -_ELLIPSIS = "\u2026" - _logger = logging.getLogger(__name__) @@ -69,24 +36,6 @@ def now_ms() -> int: return int(time.time() * 1000) -def truncate(value: str | None, max_chars: int) -> str | None: - """Return ``value`` shortened to at most ``max_chars`` characters, suffixed - with an ellipsis when truncation occurred. Returns ``None`` unchanged.""" - if value is None: - return None - if len(value) <= max_chars: - return value - return value[: max_chars - 1] + _ELLIPSIS - - -def truncate_prompt(prompt: str) -> str: - return truncate(prompt, _MAX_PROMPT_CHARS) or "" - - -def truncate_error(error: str) -> str: - return truncate(error, _MAX_ERROR_CHARS) or "" - - def emit_trace_event(event: dict[str, Any]) -> None: """Forward a single trace event to the JavaScript harness. @@ -106,75 +55,14 @@ def emit_trace_event(event: dict[str, Any]) -> None: _logger.warning("trace event emission failed", exc_info=True) -def summarize_request(request: ExtensionActionRequest) -> dict[str, Any]: - """Produce a bounded-size summary of an extension action request for - display in the observability dashboard. Large payloads (sheet row values, - selector graphs) are reduced to row counts or action types; free-form - strings are truncated. - - The returned dict is always JSON-serialisable and fits the - ``PythonExtensionActionEvent.request_summary`` field. - """ - if isinstance(request, GoToUrlRequest): - return {"url": request.url, "new_tab": request.new_tab} - if isinstance( - request, - ( - GetUrlRequest, - GetScreenshotRequest, - GetFullHtmlRequest, - GetSimplifiedHtmlRequest, - CloseWindowRequest, - ), - ): - return {} - if isinstance(request, ReadGoogleSheetRequest): - return {"spreadsheet_id": request.spreadsheet_id, "range": request.range} - if isinstance(request, WriteGoogleSheetRequest): - return { - "spreadsheet_id": request.spreadsheet_id, - "range": request.range, - "row_count": len(request.values), - } - if isinstance(request, PrintMessageRequest): - return {"message": truncate(request.message, _MAX_MESSAGE_CHARS)} - if isinstance(request, (AgenticSelectorRequest, AgenticMouseActionRequest)): - return { - "action_type": request.action["type"], - "fallback_operator_query": truncate( - request.fallback_operator_query, _MAX_QUERY_CHARS - ), - } - # ExtensionActionRequest is a closed union today. If a new variant is - # added without updating this function, we degrade gracefully to an empty - # summary rather than crashing the user's agent mid-run. - return {} - - -def summarize_response( - request: ExtensionActionRequest, - response: BaseModel | None, -) -> dict[str, Any] | None: - """Produce a bounded-size summary of an extension action response, keyed - on the originating request type. Returns ``None`` for actions that have - no observable result (writes, navigations, close) so the dashboard can - omit an empty row rather than rendering a hollow card. - """ - if isinstance(request, GetUrlRequest) and isinstance(response, GetUrlResponse): - return {"url": response.url} - if isinstance(request, GetScreenshotRequest): - return {"description": "Took screenshot of the page"} - if isinstance(request, GetFullHtmlRequest): - return {"description": "Got the full HTML of the page"} - if isinstance(request, GetSimplifiedHtmlRequest): - return {"description": "Got the simplified HTML of the page"} - if isinstance(request, ReadGoogleSheetRequest) and isinstance( - response, ReadGoogleSheetResponse - ): - rows = response.values - column_count = max((len(row) for row in rows), default=0) - return {"row_count": len(rows), "column_count": column_count} - return None +def dump_model(model: BaseModel) -> dict[str, Any]: + """Return the model's JSON-ready representation for trace persistence.""" + try: + return model.model_dump(mode="json") + except TypeError: + # Some narada-core request models override model_dump without accepting + # Pydantic's keyword arguments. + return model.model_dump() # --------------------------------------------------------------------------- @@ -207,19 +95,14 @@ def emit_sub_agent_call( "ts_start": ts_start, "ts_end": now_ms(), "agent_type": agent_type, - "prompt": truncate_prompt(prompt), + "prompt": prompt, "status": status, } if request_id is not None: event["request_id"] = request_id if error_message is not None: - event["error_message"] = truncate_error(error_message) + event["error_message"] = error_message if action_trace_raw is not None: - # Forward the nested action trace as-is. Size/depth enforcement is the - # frontend's responsibility (`MAX_NESTED_ACTION_TRACE_BYTES` in - # python.worker.ts, plus the workflow-run-detail consumer caps). - # Stripping events here is redundant and prevents the dashboard from - # rendering small inline nested traces inline in CollapsibleNestedTrace. event["action_trace"] = action_trace_raw emit_trace_event(event) @@ -237,14 +120,13 @@ def emit_extension_action( "ts_start": ts_start, "ts_end": now_ms(), "action_name": request.name, - "request_summary": summarize_request(request), + "request_summary": dump_model(request), "status": status, } - result_summary = summarize_response(request, response) - if result_summary is not None: - event["result_summary"] = result_summary + if response is not None: + event["result_summary"] = dump_model(response) if error_message is not None: - event["error_message"] = truncate_error(error_message) + event["error_message"] = error_message emit_trace_event(event) From 68150bc2b8905f22d22126d3c20bed4437380077 Mon Sep 17 00:00:00 2001 From: xTRam1 Date: Thu, 30 Apr 2026 10:20:25 -0700 Subject: [PATCH 09/13] Move trace models into tracing package --- .../src/narada_core/actions/models.py | 398 ++---------------- .../src/narada_core/tracing/__init__.py | 2 + .../src/narada_core/tracing/model.py | 361 ++++++++++++++++ packages/narada-pyodide/src/narada/_trace.py | 4 +- packages/narada-pyodide/src/narada/window.py | 2 +- packages/narada/src/narada/window.py | 2 +- 6 files changed, 412 insertions(+), 357 deletions(-) create mode 100644 packages/narada-core/src/narada_core/tracing/__init__.py create mode 100644 packages/narada-core/src/narada_core/tracing/model.py diff --git a/packages/narada-core/src/narada_core/actions/models.py b/packages/narada-core/src/narada_core/actions/models.py index 24e03ec..db49d64 100644 --- a/packages/narada-core/src/narada_core/actions/models.py +++ b/packages/narada-core/src/narada_core/actions/models.py @@ -15,12 +15,53 @@ from pydantic import ( BaseModel, Field, - NonNegativeInt, - TypeAdapter, - ValidationError, - model_validator, ) +from narada_core.tracing import model as tracing_model + +ActionTrace = tracing_model.ActionTrace +AgentTrace = tracing_model.AgentTrace +AgenticMouseActionTrace = tracing_model.AgenticMouseActionTrace +AgenticSelectorTrace = tracing_model.AgenticSelectorTrace +ApaActionTrace = tracing_model.ApaActionTrace +ApaStepTrace = tracing_model.ApaStepTrace +DataTableExportAsCsvTrace = tracing_model.DataTableExportAsCsvTrace +DataTableInsertRowTrace = tracing_model.DataTableInsertRowTrace +DataTableUpdateCellValueTrace = tracing_model.DataTableUpdateCellValueTrace +EndTrace = tracing_model.EndTrace +ForLoopTrace = tracing_model.ForLoopTrace +GetFullHtmlTrace = tracing_model.GetFullHtmlTrace +GetScreenshotTrace = tracing_model.GetScreenshotTrace +GetSimplifiedHtmlTrace = tracing_model.GetSimplifiedHtmlTrace +GetUrlTrace = tracing_model.GetUrlTrace +GoToUrlTrace = tracing_model.GoToUrlTrace +IfTrace = tracing_model.IfTrace +ObjectExportAsJsonTrace = tracing_model.ObjectExportAsJsonTrace +ObjectSetPropertiesTrace = tracing_model.ObjectSetPropertiesTrace +OperatorActionTrace = tracing_model.OperatorActionTrace +OperatorActionTraceItem = tracing_model.OperatorActionTraceItem +OutputTrace = tracing_model.OutputTrace +PressKeysTrace = tracing_model.PressKeysTrace +PrintTrace = tracing_model.PrintTrace +PythonAgentRunTrace = tracing_model.PythonAgentRunTrace +PythonExtensionActionEvent = tracing_model.PythonExtensionActionEvent +PythonSideEffectEvent = tracing_model.PythonSideEffectEvent +PythonStderrEvent = tracing_model.PythonStderrEvent +PythonStdoutEvent = tracing_model.PythonStdoutEvent +PythonSubAgentCallEvent = tracing_model.PythonSubAgentCallEvent +PythonTrace = tracing_model.PythonTrace +PythonTraceEvent = tracing_model.PythonTraceEvent +ReadCsvTrace = tracing_model.ReadCsvTrace +ReadGoogleSheetTrace = tracing_model.ReadGoogleSheetTrace +RunCustomAgentTrace = tracing_model.RunCustomAgentTrace +SetVariableTrace = tracing_model.SetVariableTrace +StartTrace = tracing_model.StartTrace +WaitForElementTrace = tracing_model.WaitForElementTrace +WaitTrace = tracing_model.WaitTrace +WhileLoopTrace = tracing_model.WhileLoopTrace +WriteGoogleSheetTrace = tracing_model.WriteGoogleSheetTrace +parse_action_trace = tracing_model.parse_action_trace + # There is no `AgentRequest` because the `agent` action delegates to the `dispatch_request` method # under the hood. @@ -32,355 +73,6 @@ class AgentUsage(BaseModel): credits: float -class OperatorActionTraceItem(BaseModel): - url: str - action: str - - -class GoToUrlTrace(BaseModel): - step_type: Literal["goToUrl"] - url: str - description: str - - -class GetUrlTrace(BaseModel): - step_type: Literal["getUrl"] - url: str - description: str - - -class PrintTrace(BaseModel): - step_type: Literal["print"] - url: str - message: str - - -class AgentTrace(BaseModel): - step_type: Literal["agent"] - url: str - agent_type: str - action_trace: ActionTrace | None = None - text: str | None = None - - -class ForLoopTrace(BaseModel): - step_type: Literal["for"] - url: str - loop_type: Literal["nTimes", "forEachRowInDataTable", "forEachItemsInArray"] - description: str - iterations: list[ApaActionTrace] # Recursive reference - - -class WhileLoopTrace(BaseModel): - step_type: Literal["while"] - url: str - condition: str - iterations: list[ApaActionTrace] # Recursive reference - total_iterations: int - - -class AgenticSelectorTrace(BaseModel): - step_type: Literal["agenticSelector"] - url: str - description: str - action_trace: ActionTrace | None = None - - -class AgenticMouseActionTrace(BaseModel): - step_type: Literal["agenticMouseAction"] - url: str - description: str - action_trace: ActionTrace | None = None - - -class WaitForElementTrace(BaseModel): - step_type: Literal["waitForElement"] - url: str - description: str - - -class PressKeysTrace(BaseModel): - step_type: Literal["pressKeys"] - url: str - description: str - - -class ReadGoogleSheetTrace(BaseModel): - step_type: Literal["readGoogleSheet"] - url: str - description: str - - -class WriteGoogleSheetTrace(BaseModel): - step_type: Literal["writeGoogleSheet"] - url: str - description: str - - -class DataTableExportAsCsvTrace(BaseModel): - step_type: Literal["dataTableExportAsCsv"] - url: str - description: str - - -class PythonTrace(BaseModel): - step_type: Literal["python"] - url: str - description: str - - -class ReadCsvTrace(BaseModel): - step_type: Literal["readCsv"] - url: str - description: str - - -class StartTrace(BaseModel): - step_type: Literal["start"] - url: str - description: str - - -class EndTrace(BaseModel): - step_type: Literal["end"] - url: str - description: str - - -class GetFullHtmlTrace(BaseModel): - step_type: Literal["getFullHtml"] - url: str - description: str - - -class GetSimplifiedHtmlTrace(BaseModel): - step_type: Literal["getSimplifiedHtml"] - url: str - description: str - - -class GetScreenshotTrace(BaseModel): - step_type: Literal["getScreenshot"] - url: str - description: str - - -class ObjectExportAsJsonTrace(BaseModel): - step_type: Literal["objectExportAsJson"] - url: str - description: str - - -class RunCustomAgentTrace(BaseModel): - step_type: Literal["runCustomAgent"] - url: str - workflow_id: str - workflow_name: str - status: Literal["success", "error"] - error_message: str | None = None - - -class IfTrace(BaseModel): - step_type: Literal["if"] - url: str - description: str - - -class SetVariableTrace(BaseModel): - step_type: Literal["setVariable"] - url: str - description: str - - -class WaitTrace(BaseModel): - step_type: Literal["wait"] - url: str - description: str - - -class DataTableInsertRowTrace(BaseModel): - step_type: Literal["dataTableInsertRow"] - url: str - description: str - - -class DataTableUpdateCellValueTrace(BaseModel): - step_type: Literal["dataTableUpdateCellValue"] - url: str - description: str - - -class ObjectSetPropertiesTrace(BaseModel): - step_type: Literal["objectSetProperties"] - url: str - description: str - - -class OutputTrace(BaseModel): - step_type: Literal["output"] - description: str - - -# --------------------------------------------------------------------------- -# Python agent run trace: emitted by CustomPythonAgentRunnable for custom -# Python agents executed in the browser Pyodide runtime. A single -# PythonAgentRunTrace wraps the full agent's execution; its `events` list is -# a chronologically sorted timeline of stdout / stderr / SDK call events. -# --------------------------------------------------------------------------- - - -class PythonStdoutEvent(BaseModel): - kind: Literal["stdout"] = "stdout" - ts: int - text: str - - -class PythonStderrEvent(BaseModel): - kind: Literal["stderr"] = "stderr" - ts: int - text: str - - -class PythonSubAgentCallEvent(BaseModel): - kind: Literal["subAgentCall"] = "subAgentCall" - ts_start: int - ts_end: int - agent_type: str - prompt: str - status: Literal["success", "error", "timeout"] - request_id: str | None = None - error_message: str | None = None - action_trace: ActionTrace | None = None - - @model_validator(mode="after") - def _check_ts_ordering(self) -> PythonSubAgentCallEvent: - if self.ts_end < self.ts_start: - raise ValueError( - f"PythonSubAgentCallEvent: ts_end ({self.ts_end}) must be >= ts_start ({self.ts_start})" - ) - return self - - -class PythonExtensionActionEvent(BaseModel): - kind: Literal["extensionAction"] = "extensionAction" - ts_start: int - ts_end: int - # Matches the snake_case `name` discriminator on ExtensionActionRequest - # (e.g. "go_to_url", "get_screenshot"). Carried as a plain string rather - # than a Literal so adding a new extension action in the future does not - # require a parse-time migration of historical trace data. - action_name: str - request_summary: dict[str, Any] - result_summary: dict[str, Any] | None = None - status: Literal["success", "error", "timeout"] - error_message: str | None = None - - @model_validator(mode="after") - def _check_ts_ordering(self) -> PythonExtensionActionEvent: - if self.ts_end < self.ts_start: - raise ValueError( - f"PythonExtensionActionEvent: ts_end ({self.ts_end}) must be >= ts_start ({self.ts_start})" - ) - return self - - -class PythonSideEffectEvent(BaseModel): - kind: Literal["sideEffect"] = "sideEffect" - ts: int - effect_type: Literal["download_file", "render_html"] - description: str - - -PythonTraceEvent = Annotated[ - PythonStdoutEvent - | PythonStderrEvent - | PythonSubAgentCallEvent - | PythonExtensionActionEvent - | PythonSideEffectEvent, - Field(discriminator="kind"), -] - - -class PythonAgentRunTrace(BaseModel): - step_type: Literal["pythonAgentRun"] = "pythonAgentRun" - url: str - status: Literal["success", "error", "aborted"] - duration_ms: NonNegativeInt - events: list[PythonTraceEvent] - error_message: str | None = None - - -ApaStepTrace = Annotated[ - GoToUrlTrace - | GetUrlTrace - | PrintTrace - | AgentTrace - | ForLoopTrace - | WhileLoopTrace - | AgenticSelectorTrace - | AgenticMouseActionTrace - | WaitForElementTrace - | PressKeysTrace - | ReadCsvTrace - | ReadGoogleSheetTrace - | WriteGoogleSheetTrace - | DataTableExportAsCsvTrace - | ObjectExportAsJsonTrace - | PythonTrace - | StartTrace - | EndTrace - | GetFullHtmlTrace - | GetSimplifiedHtmlTrace - | GetScreenshotTrace - | RunCustomAgentTrace - | IfTrace - | SetVariableTrace - | WaitTrace - | DataTableInsertRowTrace - | DataTableUpdateCellValueTrace - | ObjectSetPropertiesTrace - | OutputTrace - | PythonAgentRunTrace, - Field(discriminator="step_type"), -] - -type OperatorActionTrace = list[OperatorActionTraceItem] -type ApaActionTrace = list[ApaStepTrace] -type ActionTrace = OperatorActionTrace | ApaActionTrace - - -# TypeAdapter for parsing discriminated union -_OperatorActionTraceAdapter = TypeAdapter(OperatorActionTrace) -_ApaActionTraceAdapter = TypeAdapter(ApaActionTrace) - - -def parse_action_trace(trace_data: list[dict[str, Any] | Any]) -> ActionTrace: - """Parse the action trace. - - Dispatches deterministically based on the shape of the first item rather - than try/except-falling-through two adapters: operator items carry - ``action`` + ``url`` fields, APA steps carry ``step_type``. On an empty - list (no discriminator available) we default to APA, which is the - superset shape used by all custom agents. - """ - if not trace_data: - return _ApaActionTraceAdapter.validate_python(trace_data) - - first = trace_data[0] - if isinstance(first, dict) and "step_type" in first: - return _ApaActionTraceAdapter.validate_python(trace_data) - if isinstance(first, dict) and "action" in first and "url" in first: - return _OperatorActionTraceAdapter.validate_python(trace_data) - - # Ambiguous shape — fall back to the previous try/except pattern so we - # do not regress existing callers passing Pydantic instances or other - # shapes the adapters already know how to coerce. - try: - return _OperatorActionTraceAdapter.validate_python(trace_data) - except ValidationError: - return _ApaActionTraceAdapter.validate_python(trace_data) - - class TextOutput(BaseModel): type: Literal["text"] content: str diff --git a/packages/narada-core/src/narada_core/tracing/__init__.py b/packages/narada-core/src/narada_core/tracing/__init__.py new file mode 100644 index 0000000..3237a27 --- /dev/null +++ b/packages/narada-core/src/narada_core/tracing/__init__.py @@ -0,0 +1,2 @@ +"""Tracing models for narada-core.""" + diff --git a/packages/narada-core/src/narada_core/tracing/model.py b/packages/narada-core/src/narada_core/tracing/model.py new file mode 100644 index 0000000..0f9125c --- /dev/null +++ b/packages/narada-core/src/narada_core/tracing/model.py @@ -0,0 +1,361 @@ +from __future__ import annotations + +from typing import Annotated, Any, Literal + +from pydantic import ( + BaseModel, + Field, + NonNegativeInt, + TypeAdapter, + ValidationError, + model_validator, +) + + +class OperatorActionTraceItem(BaseModel): + url: str + action: str + + +class GoToUrlTrace(BaseModel): + step_type: Literal["goToUrl"] + url: str + description: str + + +class GetUrlTrace(BaseModel): + step_type: Literal["getUrl"] + url: str + description: str + + +class PrintTrace(BaseModel): + step_type: Literal["print"] + url: str + message: str + + +class AgentTrace(BaseModel): + step_type: Literal["agent"] + url: str + agent_type: str + action_trace: ActionTrace | None = None + text: str | None = None + + +class ForLoopTrace(BaseModel): + step_type: Literal["for"] + url: str + loop_type: Literal["nTimes", "forEachRowInDataTable", "forEachItemsInArray"] + description: str + iterations: list[ApaActionTrace] # Recursive reference + + +class WhileLoopTrace(BaseModel): + step_type: Literal["while"] + url: str + condition: str + iterations: list[ApaActionTrace] # Recursive reference + total_iterations: int + + +class AgenticSelectorTrace(BaseModel): + step_type: Literal["agenticSelector"] + url: str + description: str + action_trace: ActionTrace | None = None + + +class AgenticMouseActionTrace(BaseModel): + step_type: Literal["agenticMouseAction"] + url: str + description: str + action_trace: ActionTrace | None = None + + +class WaitForElementTrace(BaseModel): + step_type: Literal["waitForElement"] + url: str + description: str + + +class PressKeysTrace(BaseModel): + step_type: Literal["pressKeys"] + url: str + description: str + + +class ReadGoogleSheetTrace(BaseModel): + step_type: Literal["readGoogleSheet"] + url: str + description: str + + +class WriteGoogleSheetTrace(BaseModel): + step_type: Literal["writeGoogleSheet"] + url: str + description: str + + +class DataTableExportAsCsvTrace(BaseModel): + step_type: Literal["dataTableExportAsCsv"] + url: str + description: str + + +class PythonTrace(BaseModel): + step_type: Literal["python"] + url: str + description: str + + +class ReadCsvTrace(BaseModel): + step_type: Literal["readCsv"] + url: str + description: str + + +class StartTrace(BaseModel): + step_type: Literal["start"] + url: str + description: str + + +class EndTrace(BaseModel): + step_type: Literal["end"] + url: str + description: str + + +class GetFullHtmlTrace(BaseModel): + step_type: Literal["getFullHtml"] + url: str + description: str + + +class GetSimplifiedHtmlTrace(BaseModel): + step_type: Literal["getSimplifiedHtml"] + url: str + description: str + + +class GetScreenshotTrace(BaseModel): + step_type: Literal["getScreenshot"] + url: str + description: str + + +class ObjectExportAsJsonTrace(BaseModel): + step_type: Literal["objectExportAsJson"] + url: str + description: str + + +class RunCustomAgentTrace(BaseModel): + step_type: Literal["runCustomAgent"] + url: str + workflow_id: str + workflow_name: str + status: Literal["success", "error"] + error_message: str | None = None + + +class IfTrace(BaseModel): + step_type: Literal["if"] + url: str + description: str + + +class SetVariableTrace(BaseModel): + step_type: Literal["setVariable"] + url: str + description: str + + +class WaitTrace(BaseModel): + step_type: Literal["wait"] + url: str + description: str + + +class DataTableInsertRowTrace(BaseModel): + step_type: Literal["dataTableInsertRow"] + url: str + description: str + + +class DataTableUpdateCellValueTrace(BaseModel): + step_type: Literal["dataTableUpdateCellValue"] + url: str + description: str + + +class ObjectSetPropertiesTrace(BaseModel): + step_type: Literal["objectSetProperties"] + url: str + description: str + + +class OutputTrace(BaseModel): + step_type: Literal["output"] + description: str + + +# --------------------------------------------------------------------------- +# Python agent run trace: emitted by CustomPythonAgentRunnable for custom +# Python agents executed in the browser Pyodide runtime. A single +# PythonAgentRunTrace wraps the full agent's execution; its `events` list is +# a chronologically sorted timeline of stdout / stderr / SDK call events. +# --------------------------------------------------------------------------- + + +class PythonStdoutEvent(BaseModel): + kind: Literal["stdout"] = "stdout" + ts: int + text: str + + +class PythonStderrEvent(BaseModel): + kind: Literal["stderr"] = "stderr" + ts: int + text: str + + +class PythonSubAgentCallEvent(BaseModel): + kind: Literal["subAgentCall"] = "subAgentCall" + ts_start: int + ts_end: int + agent_type: str + prompt: str + status: Literal["success", "error", "timeout"] + request_id: str | None = None + error_message: str | None = None + action_trace: ActionTrace | None = None + + @model_validator(mode="after") + def _check_ts_ordering(self) -> PythonSubAgentCallEvent: + if self.ts_end < self.ts_start: + raise ValueError( + f"PythonSubAgentCallEvent: ts_end ({self.ts_end}) must be >= ts_start ({self.ts_start})" + ) + return self + + +class PythonExtensionActionEvent(BaseModel): + kind: Literal["extensionAction"] = "extensionAction" + ts_start: int + ts_end: int + # Matches the snake_case `name` discriminator on ExtensionActionRequest + # (e.g. "go_to_url", "get_screenshot"). Carried as a plain string rather + # than a Literal so adding a new extension action in the future does not + # require a parse-time migration of historical trace data. + action_name: str + request_summary: dict[str, Any] + result_summary: dict[str, Any] | None = None + status: Literal["success", "error", "timeout"] + error_message: str | None = None + + @model_validator(mode="after") + def _check_ts_ordering(self) -> PythonExtensionActionEvent: + if self.ts_end < self.ts_start: + raise ValueError( + f"PythonExtensionActionEvent: ts_end ({self.ts_end}) must be >= ts_start ({self.ts_start})" + ) + return self + + +class PythonSideEffectEvent(BaseModel): + kind: Literal["sideEffect"] = "sideEffect" + ts: int + effect_type: Literal["download_file", "render_html"] + description: str + + +PythonTraceEvent = Annotated[ + PythonStdoutEvent + | PythonStderrEvent + | PythonSubAgentCallEvent + | PythonExtensionActionEvent + | PythonSideEffectEvent, + Field(discriminator="kind"), +] + + +class PythonAgentRunTrace(BaseModel): + step_type: Literal["pythonAgentRun"] = "pythonAgentRun" + url: str + status: Literal["success", "error", "aborted"] + duration_ms: NonNegativeInt + events: list[PythonTraceEvent] + error_message: str | None = None + + +ApaStepTrace = Annotated[ + GoToUrlTrace + | GetUrlTrace + | PrintTrace + | AgentTrace + | ForLoopTrace + | WhileLoopTrace + | AgenticSelectorTrace + | AgenticMouseActionTrace + | WaitForElementTrace + | PressKeysTrace + | ReadCsvTrace + | ReadGoogleSheetTrace + | WriteGoogleSheetTrace + | DataTableExportAsCsvTrace + | ObjectExportAsJsonTrace + | PythonTrace + | StartTrace + | EndTrace + | GetFullHtmlTrace + | GetSimplifiedHtmlTrace + | GetScreenshotTrace + | RunCustomAgentTrace + | IfTrace + | SetVariableTrace + | WaitTrace + | DataTableInsertRowTrace + | DataTableUpdateCellValueTrace + | ObjectSetPropertiesTrace + | OutputTrace + | PythonAgentRunTrace, + Field(discriminator="step_type"), +] + +type OperatorActionTrace = list[OperatorActionTraceItem] +type ApaActionTrace = list[ApaStepTrace] +type ActionTrace = OperatorActionTrace | ApaActionTrace + + +_OperatorActionTraceAdapter = TypeAdapter(OperatorActionTrace) +_ApaActionTraceAdapter = TypeAdapter(ApaActionTrace) + + +def parse_action_trace(trace_data: list[dict[str, Any] | Any]) -> ActionTrace: + """Parse the action trace. + + Dispatches deterministically based on the shape of the first item rather + than try/except-falling-through two adapters: operator items carry + ``action`` + ``url`` fields, APA steps carry ``step_type``. On an empty + list (no discriminator available) we default to APA, which is the + superset shape used by all custom agents. + """ + if not trace_data: + return _ApaActionTraceAdapter.validate_python(trace_data) + + first = trace_data[0] + if isinstance(first, dict) and "step_type" in first: + return _ApaActionTraceAdapter.validate_python(trace_data) + if isinstance(first, dict) and "action" in first and "url" in first: + return _OperatorActionTraceAdapter.validate_python(trace_data) + + # Ambiguous shape — fall back to the previous try/except pattern so we + # do not regress existing callers passing Pydantic instances or other + # shapes the adapters already know how to coerce. + try: + return _OperatorActionTraceAdapter.validate_python(trace_data) + except ValidationError: + return _ApaActionTraceAdapter.validate_python(trace_data) + diff --git a/packages/narada-pyodide/src/narada/_trace.py b/packages/narada-pyodide/src/narada/_trace.py index 0586a48..659ac88 100644 --- a/packages/narada-pyodide/src/narada/_trace.py +++ b/packages/narada-pyodide/src/narada/_trace.py @@ -40,7 +40,7 @@ def emit_trace_event(event: dict[str, Any]) -> None: """Forward a single trace event to the JavaScript harness. The event must be JSON-serialisable and shaped as one of the - ``PythonTraceEvent`` variants defined in ``narada_core.actions.models``. + ``PythonTraceEvent`` variants defined in ``narada_core.tracing.model``. No validation is performed here; callers construct events directly and are responsible for matching the schema. @@ -69,7 +69,7 @@ def dump_model(model: BaseModel) -> dict[str, Any]: # Event emitters # # Each emitter builds a JSON-serialisable event shaped to match one of the -# ``PythonTraceEvent`` Pydantic variants in ``narada_core.actions.models`` +# ``PythonTraceEvent`` Pydantic variants in ``narada_core.tracing.model`` # and forwards it to the JavaScript harness. Optional fields are included # only when non-None so the JSON stays compact. # --------------------------------------------------------------------------- diff --git a/packages/narada-pyodide/src/narada/window.py b/packages/narada-pyodide/src/narada/window.py index 791503c..85b8238 100644 --- a/packages/narada-pyodide/src/narada/window.py +++ b/packages/narada-pyodide/src/narada/window.py @@ -51,7 +51,6 @@ UserApprovalRequest, UserApprovalResponse, WriteGoogleSheetRequest, - parse_action_trace, ) from narada_core.errors import ( NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE, @@ -67,6 +66,7 @@ Response, UserResourceCredentials, ) +from narada_core.tracing.model import parse_action_trace from pydantic import BaseModel from pyodide.ffi import JsProxy, create_once_callable from pyodide.http import pyfetch diff --git a/packages/narada/src/narada/window.py b/packages/narada/src/narada/window.py index 29c359b..9a93ece 100644 --- a/packages/narada/src/narada/window.py +++ b/packages/narada/src/narada/window.py @@ -41,7 +41,6 @@ UserApprovalRequest, UserApprovalResponse, WriteGoogleSheetRequest, - parse_action_trace, ) from narada_core.errors import ( NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE, @@ -57,6 +56,7 @@ Response, UserResourceCredentials, ) +from narada_core.tracing.model import parse_action_trace from playwright.async_api import ( BrowserContext, ) From a33e02a1bc99ece1d1f46fa233525d0486d4895d Mon Sep 17 00:00:00 2001 From: xTRam1 Date: Thu, 30 Apr 2026 10:28:53 -0700 Subject: [PATCH 10/13] Remove trace model re-exports --- .../src/narada_core/actions/models.py | 45 +------------------ 1 file changed, 1 insertion(+), 44 deletions(-) diff --git a/packages/narada-core/src/narada_core/actions/models.py b/packages/narada-core/src/narada_core/actions/models.py index db49d64..1b33b83 100644 --- a/packages/narada-core/src/narada_core/actions/models.py +++ b/packages/narada-core/src/narada_core/actions/models.py @@ -19,49 +19,6 @@ from narada_core.tracing import model as tracing_model -ActionTrace = tracing_model.ActionTrace -AgentTrace = tracing_model.AgentTrace -AgenticMouseActionTrace = tracing_model.AgenticMouseActionTrace -AgenticSelectorTrace = tracing_model.AgenticSelectorTrace -ApaActionTrace = tracing_model.ApaActionTrace -ApaStepTrace = tracing_model.ApaStepTrace -DataTableExportAsCsvTrace = tracing_model.DataTableExportAsCsvTrace -DataTableInsertRowTrace = tracing_model.DataTableInsertRowTrace -DataTableUpdateCellValueTrace = tracing_model.DataTableUpdateCellValueTrace -EndTrace = tracing_model.EndTrace -ForLoopTrace = tracing_model.ForLoopTrace -GetFullHtmlTrace = tracing_model.GetFullHtmlTrace -GetScreenshotTrace = tracing_model.GetScreenshotTrace -GetSimplifiedHtmlTrace = tracing_model.GetSimplifiedHtmlTrace -GetUrlTrace = tracing_model.GetUrlTrace -GoToUrlTrace = tracing_model.GoToUrlTrace -IfTrace = tracing_model.IfTrace -ObjectExportAsJsonTrace = tracing_model.ObjectExportAsJsonTrace -ObjectSetPropertiesTrace = tracing_model.ObjectSetPropertiesTrace -OperatorActionTrace = tracing_model.OperatorActionTrace -OperatorActionTraceItem = tracing_model.OperatorActionTraceItem -OutputTrace = tracing_model.OutputTrace -PressKeysTrace = tracing_model.PressKeysTrace -PrintTrace = tracing_model.PrintTrace -PythonAgentRunTrace = tracing_model.PythonAgentRunTrace -PythonExtensionActionEvent = tracing_model.PythonExtensionActionEvent -PythonSideEffectEvent = tracing_model.PythonSideEffectEvent -PythonStderrEvent = tracing_model.PythonStderrEvent -PythonStdoutEvent = tracing_model.PythonStdoutEvent -PythonSubAgentCallEvent = tracing_model.PythonSubAgentCallEvent -PythonTrace = tracing_model.PythonTrace -PythonTraceEvent = tracing_model.PythonTraceEvent -ReadCsvTrace = tracing_model.ReadCsvTrace -ReadGoogleSheetTrace = tracing_model.ReadGoogleSheetTrace -RunCustomAgentTrace = tracing_model.RunCustomAgentTrace -SetVariableTrace = tracing_model.SetVariableTrace -StartTrace = tracing_model.StartTrace -WaitForElementTrace = tracing_model.WaitForElementTrace -WaitTrace = tracing_model.WaitTrace -WhileLoopTrace = tracing_model.WhileLoopTrace -WriteGoogleSheetTrace = tracing_model.WriteGoogleSheetTrace -parse_action_trace = tracing_model.parse_action_trace - # There is no `AgentRequest` because the `agent` action delegates to the `dispatch_request` method # under the hood. @@ -93,7 +50,7 @@ class AgentResponse(BaseModel, Generic[_StructuredOutputT]): Field(discriminator="type"), ] usage: AgentUsage - action_trace: ActionTrace | None = None + action_trace: tracing_model.ActionTrace | None = None class AgenticSelectorClickAction(TypedDict): From c6636dad827cacfb62f58c129fc0a24754e50027 Mon Sep 17 00:00:00 2001 From: xTRam1 Date: Thu, 30 Apr 2026 10:47:31 -0700 Subject: [PATCH 11/13] Fix tracing package formatting --- packages/narada-core/src/narada_core/tracing/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/narada-core/src/narada_core/tracing/__init__.py b/packages/narada-core/src/narada_core/tracing/__init__.py index 3237a27..cb6fd54 100644 --- a/packages/narada-core/src/narada_core/tracing/__init__.py +++ b/packages/narada-core/src/narada_core/tracing/__init__.py @@ -1,2 +1 @@ """Tracing models for narada-core.""" - From 8d01131f2fef7b5fcc6e5cd4f456be284b5c984b Mon Sep 17 00:00:00 2001 From: xTRam1 Date: Thu, 30 Apr 2026 12:38:16 -0700 Subject: [PATCH 12/13] Bump package versions for reasoning support --- packages/narada-core/pyproject.toml | 2 +- packages/narada-pyodide/pyproject.toml | 4 ++-- packages/narada/pyproject.toml | 4 ++-- uv.lock | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/packages/narada-core/pyproject.toml b/packages/narada-core/pyproject.toml index ca75dc0..edadabf 100644 --- a/packages/narada-core/pyproject.toml +++ b/packages/narada-core/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "narada-core" -version = "0.0.20" +version = "0.0.21" description = "Code shared by the `narada` and `narada-pyodide` packages." license = "Apache-2.0" readme = "README.md" diff --git a/packages/narada-pyodide/pyproject.toml b/packages/narada-pyodide/pyproject.toml index 1443d3e..15bcbdd 100644 --- a/packages/narada-pyodide/pyproject.toml +++ b/packages/narada-pyodide/pyproject.toml @@ -1,14 +1,14 @@ [project] name = "narada-pyodide" -version = "0.0.47" +version = "0.0.48" description = "Pyodide-compatible Python client SDK for Narada" license = "Apache-2.0" readme = "README.md" authors = [{ name = "Narada", email = "support@narada.ai" }] requires-python = ">=3.12" dependencies = [ - "narada-core==0.0.20", + "narada-core==0.0.21", # Must be a supported version in https://pyodide.org/en/stable/usage/packages-in-pyodide.html "packaging==24.2", ] diff --git a/packages/narada/pyproject.toml b/packages/narada/pyproject.toml index f5be009..6575246 100644 --- a/packages/narada/pyproject.toml +++ b/packages/narada/pyproject.toml @@ -1,13 +1,13 @@ [project] name = "narada" -version = "0.1.47" +version = "0.1.48" description = "Python client SDK for Narada" license = "Apache-2.0" readme = "README.md" authors = [{ name = "Narada", email = "support@narada.ai" }] requires-python = ">=3.12" dependencies = [ - "narada-core==0.0.20", + "narada-core==0.0.21", "aiohttp>=3.12.13", "playwright>=1.53.0", "rich>=14.0.0", diff --git a/uv.lock b/uv.lock index dc19511..2c454fd 100644 --- a/uv.lock +++ b/uv.lock @@ -312,7 +312,7 @@ wheels = [ [[package]] name = "narada" -version = "0.1.47" +version = "0.1.48" source = { editable = "packages/narada" } dependencies = [ { name = "aiohttp" }, @@ -345,7 +345,7 @@ dev = [ [[package]] name = "narada-core" -version = "0.0.20" +version = "0.0.21" source = { editable = "packages/narada-core" } dependencies = [ { name = "pydantic" }, @@ -356,7 +356,7 @@ requires-dist = [{ name = "pydantic", specifier = "==2.12.5" }] [[package]] name = "narada-pyodide" -version = "0.0.47" +version = "0.0.48" source = { editable = "packages/narada-pyodide" } dependencies = [ { name = "narada-core" }, From d007cd45de48d7fc355d3327370b6f916c3772c4 Mon Sep 17 00:00:00 2001 From: xTRam1 Date: Thu, 30 Apr 2026 13:08:10 -0700 Subject: [PATCH 13/13] Simplify reasoning effort docstring --- packages/narada-core/src/narada_core/models.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/packages/narada-core/src/narada_core/models.py b/packages/narada-core/src/narada_core/models.py index aba8ffa..d7d075f 100644 --- a/packages/narada-core/src/narada_core/models.py +++ b/packages/narada-core/src/narada_core/models.py @@ -22,11 +22,9 @@ def prompt_prefix(self) -> str: class ReasoningEffort(StrEnum): - """Amount of reasoning the Core Agent applies before responding. + """Controls how much reasoning the Core Agent uses before responding. - Maps 1:1 to OpenAI's ``reasoning.effort`` parameter. Only honored when the - invoked agent is :py:attr:`Agent.CORE_AGENT`; the SDK enforces this both at - type-check time (via ``@overload``) and at runtime (with a ``ValueError``). + Only `Agent.CORE_AGENT` supports this option; other agents raise `ValueError`. """ NONE = "none"