From bbc88d9585acbb3ed42dbd78fbc87b2b9a877ac4 Mon Sep 17 00:00:00 2001
From: xTRam1 <lerdogan@berkeley.edu>
Date: Thu, 16 Apr 2026 16:52:56 -0700
Subject: [PATCH 01/13] Add Python agent run trace

Add structured tracing for custom Python agents so their execution surfaces
on the Narada observability dashboard alongside GUI-built custom agents.

narada-core:
  - New PythonAgentRunTrace step type + PythonTraceEvent discriminated union
    covering stdout, stderr, sub-agent calls, extension actions, and side
    effects. Added to the ApaStepTrace union; parse_action_trace handles it
    transparently.

narada-pyodide:
  - New private _trace.py module with bounded-size summarisation of
    extension action requests/responses and per-event emitters
    (emit_sub_agent_call, emit_extension_action, emit_side_effect).
  - Instrument dispatch_request() to emit one subAgentCall event per
    invocation, covering success/error/timeout paths.
  - Instrument _run_extension_action() to emit one extensionAction event
    per call, with action_name keyed off the request discriminator.
  - Instrument download_file / render_html in utils.py to emit sideEffect
    events.
  - 38 unit tests exercise summarisation, truncation, emitter shapes, and
    Pydantic round-trip via parse_action_trace.

Version bumps (coupled to avoid parse_action_trace ValidationError for
external narada users whose traces may contain pythonAgentRun nodes):
  - narada-core:    0.0.17 -> 0.0.18
  - narada-pyodide: 0.0.43 -> 0.0.44
  - narada:         0.1.42 -> 0.1.43 (repin narada-core==0.0.18 only)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 packages/narada-core/pyproject.toml           |   2 +-
 .../src/narada_core/actions/models.py         |  76 ++-
 packages/narada-pyodide/pyproject.toml        |   6 +-
 packages/narada-pyodide/src/narada/_trace.py  | 236 ++++++++++
 packages/narada-pyodide/src/narada/utils.py   |  10 +
 packages/narada-pyodide/src/narada/window.py  | 155 +++++--
 packages/narada-pyodide/tests/README.md       |  20 +
 packages/narada-pyodide/tests/__init__.py     |   0
 packages/narada-pyodide/tests/conftest.py     |  56 +++
 packages/narada-pyodide/tests/test_trace.py   | 432 ++++++++++++++++++
 packages/narada/pyproject.toml                |   4 +-
 uv.lock                                       |  14 +-
 12 files changed, 961 insertions(+), 50 deletions(-)
 create mode 100644 packages/narada-pyodide/src/narada/_trace.py
 create mode 100644 packages/narada-pyodide/tests/README.md
 create mode 100644 packages/narada-pyodide/tests/__init__.py
 create mode 100644 packages/narada-pyodide/tests/conftest.py
 create mode 100644 packages/narada-pyodide/tests/test_trace.py

diff --git a/packages/narada-core/pyproject.toml b/packages/narada-core/pyproject.toml
index be162df..e47b7c8 100644
--- a/packages/narada-core/pyproject.toml
+++ b/packages/narada-core/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "narada-core"
-version = "0.0.17"
+version = "0.0.18"
 description = "Code shared by the `narada` and `narada-pyodide` packages."
 license = "Apache-2.0"
 readme = "README.md"
diff --git a/packages/narada-core/src/narada_core/actions/models.py b/packages/narada-core/src/narada_core/actions/models.py
index 6e68db6..c566d75 100644
--- a/packages/narada-core/src/narada_core/actions/models.py
+++ b/packages/narada-core/src/narada_core/actions/models.py
@@ -214,6 +214,79 @@ class OutputTrace(BaseModel):
     description: str
 
 
+# ---------------------------------------------------------------------------
+# Python agent run trace: emitted by CustomPythonAgentRunnable for custom
+# Python agents executed in the browser Pyodide runtime. A single
+# PythonAgentRunTrace wraps the full agent's execution; its `events` list is
+# a chronologically sorted timeline of stdout / stderr / SDK call events.
+# ---------------------------------------------------------------------------
+
+
+class PythonStdoutEvent(BaseModel):
+    kind: Literal["stdout"] = "stdout"
+    ts: int
+    text: str
+
+
+class PythonStderrEvent(BaseModel):
+    kind: Literal["stderr"] = "stderr"
+    ts: int
+    text: str
+
+
+class PythonSubAgentCallEvent(BaseModel):
+    kind: Literal["subAgentCall"] = "subAgentCall"
+    ts_start: int
+    ts_end: int
+    agent_type: str
+    prompt: str
+    status: Literal["success", "error", "timeout"]
+    request_id: str | None = None
+    error_message: str | None = None
+    action_trace: ActionTrace | None = None
+
+
+class PythonExtensionActionEvent(BaseModel):
+    kind: Literal["extensionAction"] = "extensionAction"
+    ts_start: int
+    ts_end: int
+    # Matches the snake_case `name` discriminator on ExtensionActionRequest
+    # (e.g. "go_to_url", "get_screenshot"). Carried as a plain string rather
+    # than a Literal so adding a new extension action in the future does not
+    # require a parse-time migration of historical trace data.
+    action_name: str
+    request_summary: dict[str, Any]
+    result_summary: dict[str, Any] | None = None
+    status: Literal["success", "error", "timeout"]
+    error_message: str | None = None
+
+
+class PythonSideEffectEvent(BaseModel):
+    kind: Literal["sideEffect"] = "sideEffect"
+    ts: int
+    effect_type: Literal["download_file", "render_html"]
+    description: str
+
+
+PythonTraceEvent = Annotated[
+    PythonStdoutEvent
+    | PythonStderrEvent
+    | PythonSubAgentCallEvent
+    | PythonExtensionActionEvent
+    | PythonSideEffectEvent,
+    Field(discriminator="kind"),
+]
+
+
+class PythonAgentRunTrace(BaseModel):
+    step_type: Literal["pythonAgentRun"] = "pythonAgentRun"
+    url: str
+    status: Literal["success", "error", "aborted"]
+    duration_ms: int
+    events: list[PythonTraceEvent]
+    error_message: str | None = None
+
+
 ApaStepTrace = Annotated[
     GoToUrlTrace
     | GetUrlTrace
@@ -243,7 +316,8 @@ class OutputTrace(BaseModel):
     | DataTableInsertRowTrace
     | DataTableUpdateCellValueTrace
     | ObjectSetPropertiesTrace
-    | OutputTrace,
+    | OutputTrace
+    | PythonAgentRunTrace,
     Field(discriminator="step_type"),
 ]
 
diff --git a/packages/narada-pyodide/pyproject.toml b/packages/narada-pyodide/pyproject.toml
index 655d588..bf33ccb 100644
--- a/packages/narada-pyodide/pyproject.toml
+++ b/packages/narada-pyodide/pyproject.toml
@@ -1,14 +1,14 @@
 
 [project]
 name = "narada-pyodide"
-version = "0.0.43"
+version = "0.0.44"
 description = "Pyodide-compatible Python client SDK for Narada"
 license = "Apache-2.0"
 readme = "README.md"
 authors = [{ name = "Narada", email = "support@narada.ai" }]
 requires-python = ">=3.12"
 dependencies = [
-    "narada-core==0.0.17",
+    "narada-core==0.0.18",
     # Must be a supported version in https://pyodide.org/en/stable/usage/packages-in-pyodide.html
     "packaging==24.2",
 ]
@@ -23,7 +23,7 @@ requires = ["hatchling"]
 build-backend = "hatchling.build"
 
 [dependency-groups]
-dev = ["pyodide-py>=0.27.7"]
+dev = ["pyodide-py>=0.27.7", "pytest>=8.4.1"]
 
 [tool.hatch.build.targets.wheel]
 packages = ["src/narada"]
diff --git a/packages/narada-pyodide/src/narada/_trace.py b/packages/narada-pyodide/src/narada/_trace.py
new file mode 100644
index 0000000..037d10f
--- /dev/null
+++ b/packages/narada-pyodide/src/narada/_trace.py
@@ -0,0 +1,236 @@
+"""Private trace-emission helpers for narada-pyodide.
+
+This module is used internally by narada-pyodide to forward structured
+telemetry (sub-agent invocations, extension actions, side effects) from
+Python code running inside the Pyodide worker to the JavaScript harness,
+which assembles a ``PythonAgentRunTrace`` that surfaces on the Narada
+observability dashboard.
+
+The module is private: user code should not import from here. The public
+surface lives in ``window.py`` and ``utils.py``; instrumentation is applied
+at those module boundaries by calling into this module.
+"""
+
+from __future__ import annotations
+
+import json
+import time
+from typing import TYPE_CHECKING, Any, Literal
+
+from narada_core.actions.models import (
+    AgenticMouseActionRequest,
+    AgenticSelectorRequest,
+    CloseWindowRequest,
+    ExtensionActionRequest,
+    GetFullHtmlRequest,
+    GetScreenshotRequest,
+    GetSimplifiedHtmlRequest,
+    GetUrlRequest,
+    GetUrlResponse,
+    GoToUrlRequest,
+    PrintMessageRequest,
+    ReadGoogleSheetRequest,
+    ReadGoogleSheetResponse,
+    WriteGoogleSheetRequest,
+)
+from pydantic import BaseModel
+
+if TYPE_CHECKING:
+    # Injected by the JavaScript harness at worker startup (see
+    # `frontend/src/lib/apa/python/python.worker.ts`). narada-pyodide is
+    # only ever imported under a Pyodide worker that has registered this
+    # builtin; there is no non-Pyodide code path.
+    def _narada_emit_trace_event(event_json: str) -> None: ...
+
+
+# Hard caps on payload sizes carried in trace events. Values are large enough
+# that typical prompts and error messages survive intact but small enough to
+# bound worst-case persisted actionTrace JSON.
+_MAX_PROMPT_CHARS = 500
+_MAX_MESSAGE_CHARS = 500
+_MAX_ERROR_CHARS = 1000
+_MAX_QUERY_CHARS = 200
+
+_ELLIPSIS = "\u2026"
+
+
+def now_ms() -> int:
+    """Current wall-clock time in integer milliseconds."""
+    return int(time.time() * 1000)
+
+
+def truncate(value: str | None, max_chars: int) -> str | None:
+    """Return ``value`` shortened to at most ``max_chars`` characters, suffixed
+    with an ellipsis when truncation occurred. Returns ``None`` unchanged."""
+    if value is None:
+        return None
+    if len(value) <= max_chars:
+        return value
+    return value[: max_chars - 1] + _ELLIPSIS
+
+
+def truncate_prompt(prompt: str) -> str:
+    return truncate(prompt, _MAX_PROMPT_CHARS) or ""
+
+
+def truncate_error(error: str) -> str:
+    return truncate(error, _MAX_ERROR_CHARS) or ""
+
+
+def emit_trace_event(event: dict[str, Any]) -> None:
+    """Forward a single trace event to the JavaScript harness.
+
+    The event must be JSON-serialisable and shaped as one of the
+    ``PythonTraceEvent`` variants defined in ``narada_core.actions.models``.
+    No validation is performed here; callers construct events directly and
+    are responsible for matching the schema.
+    """
+    _narada_emit_trace_event(json.dumps(event))  # noqa: F821
+
+
+def summarize_request(request: ExtensionActionRequest) -> dict[str, Any]:
+    """Produce a bounded-size summary of an extension action request for
+    display in the observability dashboard. Large payloads (sheet row values,
+    selector graphs) are reduced to row counts or action types; free-form
+    strings are truncated.
+
+    The returned dict is always JSON-serialisable and fits the
+    ``PythonExtensionActionEvent.request_summary`` field.
+    """
+    if isinstance(request, GoToUrlRequest):
+        return {"url": request.url, "new_tab": request.new_tab}
+    if isinstance(
+        request,
+        (
+            GetUrlRequest,
+            GetScreenshotRequest,
+            GetFullHtmlRequest,
+            GetSimplifiedHtmlRequest,
+            CloseWindowRequest,
+        ),
+    ):
+        return {}
+    if isinstance(request, ReadGoogleSheetRequest):
+        return {"spreadsheet_id": request.spreadsheet_id, "range": request.range}
+    if isinstance(request, WriteGoogleSheetRequest):
+        return {
+            "spreadsheet_id": request.spreadsheet_id,
+            "range": request.range,
+            "row_count": len(request.values),
+        }
+    if isinstance(request, PrintMessageRequest):
+        return {"message": truncate(request.message, _MAX_MESSAGE_CHARS)}
+    if isinstance(request, (AgenticSelectorRequest, AgenticMouseActionRequest)):
+        return {
+            "action_type": request.action["type"],
+            "fallback_operator_query": truncate(
+                request.fallback_operator_query, _MAX_QUERY_CHARS
+            ),
+        }
+    # ExtensionActionRequest is a closed union today. If a new variant is
+    # added without updating this function, we degrade gracefully to an empty
+    # summary rather than crashing the user's agent mid-run.
+    return {}
+
+
+def summarize_response(
+    request: ExtensionActionRequest,
+    response: BaseModel | None,
+) -> dict[str, Any] | None:
+    """Produce a bounded-size summary of an extension action response, keyed
+    on the originating request type. Returns ``None`` for actions that have
+    no observable result (writes, navigations, close) so the dashboard can
+    omit an empty row rather than rendering a hollow card.
+    """
+    if isinstance(request, GetUrlRequest) and isinstance(response, GetUrlResponse):
+        return {"url": response.url}
+    if isinstance(request, GetScreenshotRequest):
+        return {"description": "Took screenshot of the page"}
+    if isinstance(request, GetFullHtmlRequest):
+        return {"description": "Got the full HTML of the page"}
+    if isinstance(request, GetSimplifiedHtmlRequest):
+        return {"description": "Got the simplified HTML of the page"}
+    if isinstance(request, ReadGoogleSheetRequest) and isinstance(
+        response, ReadGoogleSheetResponse
+    ):
+        rows = response.values
+        column_count = max((len(row) for row in rows), default=0)
+        return {"row_count": len(rows), "column_count": column_count}
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Event emitters
+#
+# Each emitter builds a JSON-serialisable event shaped to match one of the
+# ``PythonTraceEvent`` Pydantic variants in ``narada_core.actions.models``
+# and forwards it to the JavaScript harness. Optional fields are included
+# only when non-None so the JSON stays compact.
+# ---------------------------------------------------------------------------
+
+
+SubAgentCallStatus = Literal["success", "error", "timeout"]
+ExtensionActionStatus = Literal["success", "error", "timeout"]
+SideEffectType = Literal["download_file", "render_html"]
+
+
+def emit_sub_agent_call(
+    *,
+    ts_start: int,
+    agent_type: str,
+    prompt: str,
+    status: SubAgentCallStatus,
+    request_id: str | None = None,
+    error_message: str | None = None,
+    action_trace_raw: list[dict[str, Any]] | None = None,
+) -> None:
+    event: dict[str, Any] = {
+        "kind": "subAgentCall",
+        "ts_start": ts_start,
+        "ts_end": now_ms(),
+        "agent_type": agent_type,
+        "prompt": truncate_prompt(prompt),
+        "status": status,
+    }
+    if request_id is not None:
+        event["request_id"] = request_id
+    if error_message is not None:
+        event["error_message"] = truncate_error(error_message)
+    if action_trace_raw is not None:
+        event["action_trace"] = action_trace_raw
+    emit_trace_event(event)
+
+
+def emit_extension_action(
+    *,
+    ts_start: int,
+    request: ExtensionActionRequest,
+    status: ExtensionActionStatus,
+    response: BaseModel | None = None,
+    error_message: str | None = None,
+) -> None:
+    event: dict[str, Any] = {
+        "kind": "extensionAction",
+        "ts_start": ts_start,
+        "ts_end": now_ms(),
+        "action_name": request.name,
+        "request_summary": summarize_request(request),
+        "status": status,
+    }
+    result_summary = summarize_response(request, response)
+    if result_summary is not None:
+        event["result_summary"] = result_summary
+    if error_message is not None:
+        event["error_message"] = truncate_error(error_message)
+    emit_trace_event(event)
+
+
+def emit_side_effect(*, effect_type: SideEffectType, description: str) -> None:
+    emit_trace_event(
+        {
+            "kind": "sideEffect",
+            "ts": now_ms(),
+            "effect_type": effect_type,
+            "description": description,
+        }
+    )
diff --git a/packages/narada-pyodide/src/narada/utils.py b/packages/narada-pyodide/src/narada/utils.py
index 68778c4..1107ad9 100644
--- a/packages/narada-pyodide/src/narada/utils.py
+++ b/packages/narada-pyodide/src/narada/utils.py
@@ -1,5 +1,7 @@
 from typing import TYPE_CHECKING
 
+from . import _trace
+
 if TYPE_CHECKING:
     # Magic functions injected by the JavaScript harness.
     def _narada_render_html(html: str) -> None: ...
@@ -18,6 +20,10 @@ def download_file(filename: str, content: str | bytes) -> None:
                  If bytes, writes in binary mode.
     """
     _narada_download_file(filename, content)
+    _trace.emit_side_effect(
+        effect_type="download_file",
+        description=f"Downloaded file: {filename}",
+    )
 
 
 def render_html(html: str) -> None:
@@ -28,3 +34,7 @@ def render_html(html: str) -> None:
         html: The HTML content to render.
     """
     _narada_render_html(html)
+    _trace.emit_side_effect(
+        effect_type="render_html",
+        description="Rendered HTML in a new tab",
+    )
diff --git a/packages/narada-pyodide/src/narada/window.py b/packages/narada-pyodide/src/narada/window.py
index df6bfa1..509499c 100644
--- a/packages/narada-pyodide/src/narada/window.py
+++ b/packages/narada-pyodide/src/narada/window.py
@@ -52,6 +52,8 @@
 from pyodide.ffi import JsProxy, create_once_callable
 from pyodide.http import pyfetch
 
+from . import _trace
+
 # Magic variable injected by the JavaScript harness that stores the IDs of the current runnables
 # in the stack on the frontend.
 
@@ -192,6 +194,12 @@ async def dispatch_request(
 
         The higher-level `agent` method should be preferred for most use cases.
         """
+        # Trace instrumentation: the entire method body is wrapped so that any
+        # exit (successful return, timeout, or non-timeout failure) produces a
+        # ``subAgentCall`` trace event with matching status. See `_trace.py`.
+        trace_start_ms = _trace.now_ms()
+        agent_type_str = agent.value if isinstance(agent, Agent) else str(agent)
+
         deadline = time.monotonic() + timeout
 
         headers = {"Content-Type": "application/json"}
@@ -305,6 +313,18 @@ async def dispatch_request(
                         else:
                             response_content["structuredOutput"] = None
 
+                    _trace.emit_sub_agent_call(
+                        ts_start=trace_start_ms,
+                        agent_type=agent_type_str,
+                        prompt=prompt,
+                        status="success",
+                        request_id=request_id,
+                        action_trace_raw=(
+                            response_content.get("actionTrace")
+                            if response_content is not None
+                            else None
+                        ),
+                    )
                     return response
 
                 # Poll every 3 seconds.
@@ -313,7 +333,32 @@ async def dispatch_request(
                 raise NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE(timeout)
 
         except asyncio.TimeoutError:
+            _trace.emit_sub_agent_call(
+                ts_start=trace_start_ms,
+                agent_type=agent_type_str,
+                prompt=prompt,
+                status="timeout",
+                error_message=f"Timed out after {timeout}s",
+            )
             raise NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE(timeout)
+        except NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE:
+            _trace.emit_sub_agent_call(
+                ts_start=trace_start_ms,
+                agent_type=agent_type_str,
+                prompt=prompt,
+                status="timeout",
+                error_message=f"Timed out after {timeout}s",
+            )
+            raise
+        except Exception as err:
+            _trace.emit_sub_agent_call(
+                ts_start=trace_start_ms,
+                agent_type=agent_type_str,
+                prompt=prompt,
+                status="error",
+                error_message=str(err),
+            )
+            raise
 
     @overload
     async def agent(
@@ -562,51 +607,85 @@ async def _run_extension_action(
         *,
         timeout: int | None = None,
     ) -> _ResponseModel | None:
-        headers = {"Content-Type": "application/json"}
-        if self._api_key is not None:
-            headers["x-api-key"] = self._api_key
-        else:
-            assert self._user_id is not None
-            assert self._env is not None
+        # Trace instrumentation: every exit path emits an ``extensionAction``
+        # trace event with a status matching the outcome. See `_trace.py`.
+        trace_start_ms = _trace.now_ms()
 
-            headers["Authorization"] = f"Bearer {await _narada_get_id_token()}"
-            headers["X-Narada-User-ID"] = self._user_id
-            headers["X-Narada-Env"] = self._env
+        try:
+            headers = {"Content-Type": "application/json"}
+            if self._api_key is not None:
+                headers["x-api-key"] = self._api_key
+            else:
+                assert self._user_id is not None
+                assert self._env is not None
 
-        body = {
-            "action": request.model_dump(),
-            "browserWindowId": self.browser_window_id,
-            "parentRunIds": _parent_run_ids(),
-        }
-        if timeout is not None:
-            body["timeout"] = timeout
-
-        fetch_response = await pyfetch(
-            f"{self._base_url}/extension-actions",
-            method="POST",
-            headers=headers,
-            body=json.dumps(body),
-            # Don't specify `timeout` here as the (soft) timeout is handled by the server.
-        )
+                headers["Authorization"] = f"Bearer {await _narada_get_id_token()}"
+                headers["X-Narada-User-ID"] = self._user_id
+                headers["X-Narada-Env"] = self._env
+
+            body = {
+                "action": request.model_dump(),
+                "browserWindowId": self.browser_window_id,
+                "parentRunIds": _parent_run_ids(),
+            }
+            if timeout is not None:
+                body["timeout"] = timeout
 
-        if fetch_response.status == HTTPStatus.GATEWAY_TIMEOUT:
-            raise NaradaTimeoutError
-        elif not fetch_response.ok:
-            status = fetch_response.status
-            text = await fetch_response.text()
-            raise NaradaError(f"Failed to run extension action: {status} {text}")
+            fetch_response = await pyfetch(
+                f"{self._base_url}/extension-actions",
+                method="POST",
+                headers=headers,
+                body=json.dumps(body),
+                # Don't specify `timeout` here as the (soft) timeout is handled by the server.
+            )
 
-        resp_json = await fetch_response.json()
+            if fetch_response.status == HTTPStatus.GATEWAY_TIMEOUT:
+                raise NaradaTimeoutError
+            elif not fetch_response.ok:
+                status = fetch_response.status
+                text = await fetch_response.text()
+                raise NaradaError(f"Failed to run extension action: {status} {text}")
 
-        response = ExtensionActionResponse.model_validate(resp_json)
-        if response.status == "error":
-            raise NaradaError(response.error)
+            resp_json = await fetch_response.json()
 
-        if response_model is None:
-            return None
+            response = ExtensionActionResponse.model_validate(resp_json)
+            if response.status == "error":
+                raise NaradaError(response.error)
 
-        assert response.data is not None
-        return response_model.model_validate_json(response.data)
+            if response_model is None:
+                _trace.emit_extension_action(
+                    ts_start=trace_start_ms,
+                    request=request,
+                    status="success",
+                )
+                return None
+
+            assert response.data is not None
+            parsed_response = response_model.model_validate_json(response.data)
+            _trace.emit_extension_action(
+                ts_start=trace_start_ms,
+                request=request,
+                status="success",
+                response=parsed_response,
+            )
+            return parsed_response
+
+        except NaradaTimeoutError:
+            _trace.emit_extension_action(
+                ts_start=trace_start_ms,
+                request=request,
+                status="timeout",
+                error_message="Extension action timed out",
+            )
+            raise
+        except Exception as err:
+            _trace.emit_extension_action(
+                ts_start=trace_start_ms,
+                request=request,
+                status="error",
+                error_message=str(err),
+            )
+            raise
 
 
 class LocalBrowserWindow(BaseBrowserWindow):
diff --git a/packages/narada-pyodide/tests/README.md b/packages/narada-pyodide/tests/README.md
new file mode 100644
index 0000000..5ba6499
--- /dev/null
+++ b/packages/narada-pyodide/tests/README.md
@@ -0,0 +1,20 @@
+# narada-pyodide tests
+
+narada-pyodide and narada both publish under the top-level `narada` Python
+package namespace. When both are installed in the same environment, the
+workspace-installed `narada` package shadows narada-pyodide's source. This
+is fine at runtime (Pyodide only installs narada-pyodide) but breaks
+local unit testing.
+
+To run the unit tests locally from the workspace root:
+
+```bash
+uv pip uninstall narada
+uv run --package narada-pyodide pytest packages/narada-pyodide/tests/
+```
+
+Re-running `uv sync` will reinstall the `narada` package and require the
+uninstall step again.
+
+The `conftest.py` stubs the Pyodide-only `js` and `pyodide.*` imports so
+the non-HTTP helpers in narada-pyodide can be exercised on host CPython.
diff --git a/packages/narada-pyodide/tests/__init__.py b/packages/narada-pyodide/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/packages/narada-pyodide/tests/conftest.py b/packages/narada-pyodide/tests/conftest.py
new file mode 100644
index 0000000..47abb32
--- /dev/null
+++ b/packages/narada-pyodide/tests/conftest.py
@@ -0,0 +1,56 @@
+"""Pytest fixtures shared across narada-pyodide tests.
+
+narada-pyodide is designed to run inside a Pyodide web worker; several of its
+transitive imports (``js``, ``pyodide.ffi``, ``pyodide.http``) are only
+available in that environment. To make the pure-Python unit tests runnable on
+a host CPython interpreter we stub those modules before any narada-pyodide
+code is imported. The real Pyodide runtime will obviously provide them.
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+from collections.abc import Iterator
+from typing import Any
+from unittest.mock import MagicMock
+
+# Stub Pyodide-only modules. Must happen before `from narada import _trace`.
+for _mod in ("js", "pyodide", "pyodide.ffi", "pyodide.http"):
+    if _mod not in sys.modules:
+        sys.modules[_mod] = MagicMock()
+
+import pytest  # noqa: E402
+
+from narada import _trace  # noqa: E402
+
+
+class RecordingEmitter:
+    """Captures every event forwarded by ``_trace.emit_trace_event`` during a
+    test so assertions can inspect the JSON that would reach the JS harness.
+    """
+
+    def __init__(self) -> None:
+        self.events: list[dict[str, Any]] = []
+
+    def __call__(self, event_json: str) -> None:
+        # Round-trip through json to catch non-serialisable payloads early.
+        self.events.append(json.loads(event_json))
+
+
+@pytest.fixture
+def recorded_events() -> Iterator[RecordingEmitter]:
+    """Replace the JS-harness-injected ``_narada_emit_trace_event`` with a
+    recorder for the duration of a test, restoring the original binding
+    afterwards.
+    """
+    emitter = RecordingEmitter()
+    previous = getattr(_trace, "_narada_emit_trace_event", None)
+    _trace._narada_emit_trace_event = emitter  # type: ignore[attr-defined]
+    try:
+        yield emitter
+    finally:
+        if previous is None:
+            delattr(_trace, "_narada_emit_trace_event")
+        else:
+            _trace._narada_emit_trace_event = previous  # type: ignore[attr-defined]
diff --git a/packages/narada-pyodide/tests/test_trace.py b/packages/narada-pyodide/tests/test_trace.py
new file mode 100644
index 0000000..2a60f54
--- /dev/null
+++ b/packages/narada-pyodide/tests/test_trace.py
@@ -0,0 +1,432 @@
+"""Tests for the private ``narada._trace`` module.
+
+Covers the pure helpers (truncation, request/response summarisation) plus the
+``emit_*`` functions, asserting that the JSON payloads emitted to the JS
+harness match the ``PythonTraceEvent`` Pydantic schema defined in
+``narada_core.actions.models``.
+"""
+
+from __future__ import annotations
+
+import pytest
+from narada_core.actions.models import (
+    AgenticMouseActionRequest,
+    AgenticSelectorRequest,
+    CloseWindowRequest,
+    GetFullHtmlRequest,
+    GetFullHtmlResponse,
+    GetScreenshotRequest,
+    GetScreenshotResponse,
+    GetSimplifiedHtmlRequest,
+    GetSimplifiedHtmlResponse,
+    GetUrlRequest,
+    GetUrlResponse,
+    GoToUrlRequest,
+    PrintMessageRequest,
+    PythonAgentRunTrace,
+    ReadGoogleSheetRequest,
+    ReadGoogleSheetResponse,
+    WriteGoogleSheetRequest,
+    parse_action_trace,
+)
+
+from narada import _trace
+
+
+# ---------------------------------------------------------------------------
+# Truncation
+# ---------------------------------------------------------------------------
+
+
+class TestTruncate:
+    def test_returns_none_for_none(self) -> None:
+        assert _trace.truncate(None, 10) is None
+
+    def test_preserves_short_strings(self) -> None:
+        assert _trace.truncate("hello", 10) == "hello"
+
+    def test_preserves_exact_length(self) -> None:
+        assert _trace.truncate("1234567890", 10) == "1234567890"
+
+    def test_truncates_long_strings_with_ellipsis(self) -> None:
+        result = _trace.truncate("abcdefghij", 5)
+        assert result is not None
+        assert len(result) == 5
+        assert result.endswith("\u2026")
+        assert result.startswith("abcd")
+
+    def test_truncate_prompt_falls_back_to_empty(self) -> None:
+        assert _trace.truncate_prompt("") == ""
+
+    def test_truncate_error_bounded(self) -> None:
+        long = "x" * 5000
+        result = _trace.truncate_error(long)
+        assert len(result) == 1000
+        assert result.endswith("\u2026")
+
+
+# ---------------------------------------------------------------------------
+# summarize_request / summarize_response
+# ---------------------------------------------------------------------------
+
+
+class TestSummarizeRequest:
+    def test_go_to_url(self) -> None:
+        req = GoToUrlRequest(url="https://example.com", new_tab=True)
+        assert _trace.summarize_request(req) == {
+            "url": "https://example.com",
+            "new_tab": True,
+        }
+
+    @pytest.mark.parametrize(
+        "request_instance",
+        [
+            GetUrlRequest(),
+            GetScreenshotRequest(),
+            GetFullHtmlRequest(),
+            GetSimplifiedHtmlRequest(),
+            CloseWindowRequest(),
+        ],
+    )
+    def test_parameterless_requests_return_empty(
+        self, request_instance: object
+    ) -> None:
+        assert _trace.summarize_request(request_instance) == {}  # type: ignore[arg-type]
+
+    def test_read_google_sheet(self) -> None:
+        req = ReadGoogleSheetRequest(spreadsheet_id="abc123", range="Sheet1!A1:B10")
+        assert _trace.summarize_request(req) == {
+            "spreadsheet_id": "abc123",
+            "range": "Sheet1!A1:B10",
+        }
+
+    def test_write_google_sheet_reports_row_count_not_values(self) -> None:
+        big_values = [["r"] * 5 for _ in range(847)]
+        req = WriteGoogleSheetRequest(
+            spreadsheet_id="abc123", range="Sheet1!A1:E847", values=big_values
+        )
+        summary = _trace.summarize_request(req)
+        assert summary == {
+            "spreadsheet_id": "abc123",
+            "range": "Sheet1!A1:E847",
+            "row_count": 847,
+        }
+        # Explicitly guard against regressions that leak row payloads.
+        assert "values" not in summary
+
+    def test_print_message_truncates_long_messages(self) -> None:
+        long_msg = "x" * 2000
+        summary = _trace.summarize_request(PrintMessageRequest(message=long_msg))
+        truncated = summary["message"]
+        assert isinstance(truncated, str)
+        assert len(truncated) == 500
+        assert truncated.endswith("\u2026")
+
+    def test_agentic_selector_reports_action_type_and_truncates_query(self) -> None:
+        req = AgenticSelectorRequest(
+            action={"type": "click"},
+            selectors={"id": "submit-btn"},
+            fallback_operator_query="y" * 1000,
+        )
+        summary = _trace.summarize_request(req)
+        assert summary["action_type"] == "click"
+        assert len(summary["fallback_operator_query"]) == 200
+        # Selectors are intentionally omitted (not user-useful in trace view).
+        assert "selectors" not in summary
+
+    def test_agentic_mouse_action(self) -> None:
+        req = AgenticMouseActionRequest(
+            action={"type": "click"},
+            recorded_click={"x": 1, "y": 2, "viewport": {"width": 10, "height": 20}},
+            fallback_operator_query="click the button",
+            resize_window=False,
+        )
+        summary = _trace.summarize_request(req)
+        assert summary == {
+            "action_type": "click",
+            "fallback_operator_query": "click the button",
+        }
+
+
+class TestSummarizeResponse:
+    def test_get_url_returns_url(self) -> None:
+        req = GetUrlRequest()
+        resp = GetUrlResponse(url="https://example.com/page")
+        assert _trace.summarize_response(req, resp) == {
+            "url": "https://example.com/page"
+        }
+
+    def test_get_screenshot_returns_fixed_description(self) -> None:
+        req = GetScreenshotRequest()
+        resp = GetScreenshotResponse(
+            base64_content="...huge blob...",
+            name="page.png",
+            mime_type="image/png",
+            timestamp="2025-01-01T00:00:00Z",
+        )
+        summary = _trace.summarize_response(req, resp)
+        assert summary == {"description": "Took screenshot of the page"}
+
+    def test_full_html_returns_fixed_description(self) -> None:
+        summary = _trace.summarize_response(
+            GetFullHtmlRequest(), GetFullHtmlResponse(html="<html>...massive...</html>")
+        )
+        assert summary == {"description": "Got the full HTML of the page"}
+
+    def test_simplified_html_returns_fixed_description(self) -> None:
+        summary = _trace.summarize_response(
+            GetSimplifiedHtmlRequest(),
+            GetSimplifiedHtmlResponse(html="<html>short</html>"),
+        )
+        assert summary == {"description": "Got the simplified HTML of the page"}
+
+    def test_read_google_sheet_reports_dimensions(self) -> None:
+        req = ReadGoogleSheetRequest(spreadsheet_id="x", range="A1:C5")
+        resp = ReadGoogleSheetResponse(values=[["a", "b", "c"], ["d", "e", "f"], ["g"]])
+        assert _trace.summarize_response(req, resp) == {
+            "row_count": 3,
+            "column_count": 3,
+        }
+
+    def test_read_google_sheet_empty_values(self) -> None:
+        req = ReadGoogleSheetRequest(spreadsheet_id="x", range="A1:C5")
+        resp = ReadGoogleSheetResponse(values=[])
+        assert _trace.summarize_response(req, resp) == {
+            "row_count": 0,
+            "column_count": 0,
+        }
+
+    def test_write_google_sheet_returns_none(self) -> None:
+        req = WriteGoogleSheetRequest(spreadsheet_id="x", range="A1", values=[["v"]])
+        assert _trace.summarize_response(req, None) is None
+
+    def test_close_window_returns_none(self) -> None:
+        assert _trace.summarize_response(CloseWindowRequest(), None) is None
+
+
+# ---------------------------------------------------------------------------
+# Event emitters
+# ---------------------------------------------------------------------------
+
+
+class TestEmitSubAgentCall:
+    def test_success_with_action_trace(self, recorded_events) -> None:
+        _trace.emit_sub_agent_call(
+            ts_start=1000,
+            agent_type="operator",
+            prompt="Find leads",
+            status="success",
+            request_id="req_abc",
+            action_trace_raw=[{"url": "https://sf.com", "action": "click Leads"}],
+        )
+        (event,) = recorded_events.events
+        assert event["kind"] == "subAgentCall"
+        assert event["ts_start"] == 1000
+        assert event["ts_end"] >= 1000
+        assert event["agent_type"] == "operator"
+        assert event["prompt"] == "Find leads"
+        assert event["status"] == "success"
+        assert event["request_id"] == "req_abc"
+        assert event["action_trace"] == [
+            {"url": "https://sf.com", "action": "click Leads"}
+        ]
+        assert "error_message" not in event
+
+    def test_success_without_action_trace_omits_field(self, recorded_events) -> None:
+        _trace.emit_sub_agent_call(
+            ts_start=1000, agent_type="operator", prompt="hi", status="success"
+        )
+        (event,) = recorded_events.events
+        assert "action_trace" not in event
+        assert "request_id" not in event
+
+    def test_timeout_includes_error_message(self, recorded_events) -> None:
+        _trace.emit_sub_agent_call(
+            ts_start=1000,
+            agent_type="operator",
+            prompt="hi",
+            status="timeout",
+            error_message="Timed out after 60s",
+        )
+        (event,) = recorded_events.events
+        assert event["status"] == "timeout"
+        assert event["error_message"] == "Timed out after 60s"
+
+    def test_error_truncates_error_message(self, recorded_events) -> None:
+        _trace.emit_sub_agent_call(
+            ts_start=1000,
+            agent_type="operator",
+            prompt="hi",
+            status="error",
+            error_message="x" * 5000,
+        )
+        (event,) = recorded_events.events
+        assert len(event["error_message"]) == 1000
+
+    def test_prompt_is_truncated(self, recorded_events) -> None:
+        _trace.emit_sub_agent_call(
+            ts_start=1000,
+            agent_type="operator",
+            prompt="y" * 1000,
+            status="success",
+        )
+        (event,) = recorded_events.events
+        assert len(event["prompt"]) == 500
+
+
+class TestEmitExtensionAction:
+    def test_success_with_result_summary(self, recorded_events) -> None:
+        req = GetUrlRequest()
+        resp = GetUrlResponse(url="https://x.com")
+        _trace.emit_extension_action(
+            ts_start=2000, request=req, status="success", response=resp
+        )
+        (event,) = recorded_events.events
+        assert event["kind"] == "extensionAction"
+        assert event["action_name"] == "get_url"
+        assert event["request_summary"] == {}
+        assert event["result_summary"] == {"url": "https://x.com"}
+        assert event["status"] == "success"
+
+    def test_success_without_result_summary_omits_field(self, recorded_events) -> None:
+        req = WriteGoogleSheetRequest(
+            spreadsheet_id="abc", range="A1:B2", values=[["1", "2"], ["3", "4"]]
+        )
+        _trace.emit_extension_action(ts_start=2000, request=req, status="success")
+        (event,) = recorded_events.events
+        assert event["request_summary"] == {
+            "spreadsheet_id": "abc",
+            "range": "A1:B2",
+            "row_count": 2,
+        }
+        assert "result_summary" not in event
+
+    def test_timeout(self, recorded_events) -> None:
+        _trace.emit_extension_action(
+            ts_start=0,
+            request=GoToUrlRequest(url="https://a.b", new_tab=False),
+            status="timeout",
+            error_message="Timed out",
+        )
+        (event,) = recorded_events.events
+        assert event["status"] == "timeout"
+        assert event["action_name"] == "go_to_url"
+
+    def test_error(self, recorded_events) -> None:
+        _trace.emit_extension_action(
+            ts_start=0,
+            request=CloseWindowRequest(),
+            status="error",
+            error_message="permission denied",
+        )
+        (event,) = recorded_events.events
+        assert event["status"] == "error"
+        assert event["error_message"] == "permission denied"
+
+
+class TestEmitSideEffect:
+    def test_download_file(self, recorded_events) -> None:
+        _trace.emit_side_effect(
+            effect_type="download_file", description="Downloaded file: report.pdf"
+        )
+        (event,) = recorded_events.events
+        assert event["kind"] == "sideEffect"
+        assert event["effect_type"] == "download_file"
+        assert event["description"] == "Downloaded file: report.pdf"
+        assert "ts" in event
+
+    def test_render_html(self, recorded_events) -> None:
+        _trace.emit_side_effect(
+            effect_type="render_html", description="Rendered HTML in a new tab"
+        )
+        (event,) = recorded_events.events
+        assert event["effect_type"] == "render_html"
+
+
+# ---------------------------------------------------------------------------
+# End-to-end schema validation: every event kind produced by the emitters
+# round-trips cleanly through the ``PythonAgentRunTrace`` Pydantic model and
+# the ``parse_action_trace`` entry point used by downstream consumers.
+# ---------------------------------------------------------------------------
+
+
+class TestPythonAgentRunTraceRoundtrip:
+    def test_every_event_kind_parses(self, recorded_events) -> None:
+        _trace.emit_sub_agent_call(
+            ts_start=1000,
+            agent_type="operator",
+            prompt="Find leads",
+            status="success",
+            request_id="req_abc",
+            action_trace_raw=[{"url": "https://sf.com", "action": "click Leads"}],
+        )
+        _trace.emit_extension_action(
+            ts_start=2000,
+            request=GetScreenshotRequest(),
+            status="success",
+            response=GetScreenshotResponse(
+                base64_content="ignored",
+                name="page.png",
+                mime_type="image/png",
+                timestamp="now",
+            ),
+        )
+        _trace.emit_side_effect(
+            effect_type="download_file", description="Downloaded file: leads.csv"
+        )
+
+        # Assemble a representative PythonAgentRunTrace containing the emitted
+        # events alongside stdout / stderr events (which are synthesised by
+        # the JS-side runnable, not the SDK).
+        stdout_stderr_events = [
+            {"kind": "stdout", "ts": 500, "text": "starting"},
+            {"kind": "stderr", "ts": 2500, "text": "deprecation warning"},
+        ]
+        events = stdout_stderr_events + recorded_events.events
+        events.sort(key=lambda e: e.get("ts", e.get("ts_start", 0)))
+
+        raw = [
+            {
+                "step_type": "pythonAgentRun",
+                "url": "https://app.narada.ai/agent",
+                "status": "success",
+                "duration_ms": 3000,
+                "events": events,
+            }
+        ]
+        trace = parse_action_trace(raw)
+        assert len(trace) == 1
+        (node,) = trace
+        assert isinstance(node, PythonAgentRunTrace)
+        # Order reflects the real wall-clock timestamps: the emitters stamp
+        # events with ``now_ms()`` at emit time, which in this test runs much
+        # later than the synthetic stdout/stderr timestamps below. The side
+        # effect therefore sorts after ``stderr`` (ts=2500).
+        assert [e.kind for e in node.events] == [
+            "stdout",
+            "subAgentCall",
+            "extensionAction",
+            "stderr",
+            "sideEffect",
+        ]
+        # Nested action_trace rehydrates correctly as an OperatorActionTrace.
+        sub_call = node.events[1]
+        assert sub_call.kind == "subAgentCall"
+        assert sub_call.action_trace is not None
+        assert sub_call.action_trace[0].url == "https://sf.com"
+
+    def test_error_status_parses(self) -> None:
+        raw = [
+            {
+                "step_type": "pythonAgentRun",
+                "url": "https://x",
+                "status": "error",
+                "duration_ms": 120,
+                "error_message": "ZeroDivisionError",
+                "events": [],
+            }
+        ]
+        trace = parse_action_trace(raw)
+        assert isinstance(trace[0], PythonAgentRunTrace)
+        assert trace[0].status == "error"
+        assert trace[0].error_message == "ZeroDivisionError"
diff --git a/packages/narada/pyproject.toml b/packages/narada/pyproject.toml
index cab7376..59e1179 100644
--- a/packages/narada/pyproject.toml
+++ b/packages/narada/pyproject.toml
@@ -1,13 +1,13 @@
 [project]
 name = "narada"
-version = "0.1.42"
+version = "0.1.43"
 description = "Python client SDK for Narada"
 license = "Apache-2.0"
 readme = "README.md"
 authors = [{ name = "Narada", email = "support@narada.ai" }]
 requires-python = ">=3.12"
 dependencies = [
-    "narada-core==0.0.17",
+    "narada-core==0.0.18",
     "aiohttp>=3.12.13",
     "playwright>=1.53.0",
     "rich>=14.0.0",
diff --git a/uv.lock b/uv.lock
index 5fd8861..310eca5 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.12"
 resolution-markers = [
     "python_full_version >= '3.13'",
@@ -312,7 +312,7 @@ wheels = [
 
 [[package]]
 name = "narada"
-version = "0.1.42"
+version = "0.1.43"
 source = { editable = "packages/narada" }
 dependencies = [
     { name = "aiohttp" },
@@ -345,7 +345,7 @@ dev = [
 
 [[package]]
 name = "narada-core"
-version = "0.0.17"
+version = "0.0.18"
 source = { editable = "packages/narada-core" }
 dependencies = [
     { name = "pydantic" },
@@ -356,7 +356,7 @@ requires-dist = [{ name = "pydantic", specifier = "==2.12.5" }]
 
 [[package]]
 name = "narada-pyodide"
-version = "0.0.43"
+version = "0.0.44"
 source = { editable = "packages/narada-pyodide" }
 dependencies = [
     { name = "narada-core" },
@@ -367,6 +367,7 @@ dependencies = [
 dev = [
     { name = "pyodide-py", version = "0.27.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
     { name = "pyodide-py", version = "0.28.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
+    { name = "pytest" },
 ]
 
 [package.metadata]
@@ -376,7 +377,10 @@ requires-dist = [
 ]
 
 [package.metadata.requires-dev]
-dev = [{ name = "pyodide-py", specifier = ">=0.27.7" }]
+dev = [
+    { name = "pyodide-py", specifier = ">=0.27.7" },
+    { name = "pytest", specifier = ">=8.4.1" },
+]
 
 [[package]]
 name = "packaging"

From ae3a1115bd7181b12b193a5396385d3afbacb3c3 Mon Sep 17 00:00:00 2001
From: xTRam1 <lerdogan@berkeley.edu>
Date: Thu, 16 Apr 2026 17:17:24 -0700
Subject: [PATCH 02/13] Apply review ship-blockers to Python agent trace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses the four ship-blocker findings from the cross-dimensional review:

Robustness — trace emission must not break user code (_trace.py):
  - `emit_trace_event` now wraps the serialise + forward in try/except and
    logs the failure instead of propagating it. Previously a stray non-
    serialisable value in a summary (a datetime, a Pydantic model leak)
    would raise TypeError out of `_run_extension_action` and abort the
    user's agent mid-run.
  - `json.dumps(event, default=str)` stringifies unknown types defensively.

Scalability — bound recursive trace size (_trace.py):
  - `emit_sub_agent_call` now strips the `events` list from any nested
    `pythonAgentRun` node in the forwarded action trace, replacing it with
    a `truncated_event_count` marker. Previously a custom Python agent
    that delegated to another custom Python agent embedded the sub-run's
    full event timeline in the parent's persisted JSON, producing
    O(breadth^depth) growth.

Robustness — code-quality cleanup (window.py):
  - Collapsed the duplicated `except asyncio.TimeoutError` / `except
    NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE` blocks in
    `dispatch_request` into a single `except (A, B):` branch. Removes
    ~12 lines and the divergence risk.

Robustness — side-effect tracing on failure (utils.py):
  - `download_file` and `render_html` now emit a "failed" side-effect
    trace when the underlying JS call raises, then re-raise. Previously
    a failed download produced no trace at all — users saw silence
    rather than the actual error.

Type safety — schema invariants (narada-core/actions/models.py):
  - `PythonAgentRunTrace.duration_ms` and `truncated_event_count` now
    use `NonNegativeInt` — Pydantic rejects negative values at parse
    time rather than letting `-42ms` reach the dashboard formatter.
  - New `@model_validator` on `PythonSubAgentCallEvent` and
    `PythonExtensionActionEvent` rejects `ts_end < ts_start`; clock
    skew on the Pyodide clock can no longer produce negative-duration
    events that the renderer would display as `-5ms`.
  - `parse_action_trace` now dispatches deterministically based on the
    first item's discriminator (`step_type` vs `action`+`url`) rather
    than try/except-falling-through two adapters. Eliminates the risk
    of silently misrouting a homogeneity-violated trace.

Tests:
  - 13 new unit tests across `TestEmitDefensive`,
    `TestStripNestedPythonEvents`, `TestPythonEventInvariants`, and
    `TestParseActionTraceDispatch`. Full suite is now 51 tests, all
    passing under `uv run --package narada-pyodide pytest`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../src/narada_core/actions/models.py         |  52 +++++-
 packages/narada-pyodide/src/narada/_trace.py  |  50 +++++-
 packages/narada-pyodide/src/narada/utils.py   |  20 ++-
 packages/narada-pyodide/src/narada/window.py  |  11 +-
 packages/narada-pyodide/tests/test_trace.py   | 165 ++++++++++++++++++
 5 files changed, 281 insertions(+), 17 deletions(-)

diff --git a/packages/narada-core/src/narada_core/actions/models.py b/packages/narada-core/src/narada_core/actions/models.py
index c566d75..025ea51 100644
--- a/packages/narada-core/src/narada_core/actions/models.py
+++ b/packages/narada-core/src/narada_core/actions/models.py
@@ -12,7 +12,14 @@
     override,
 )
 
-from pydantic import BaseModel, Field, TypeAdapter, ValidationError
+from pydantic import (
+    BaseModel,
+    Field,
+    NonNegativeInt,
+    TypeAdapter,
+    ValidationError,
+    model_validator,
+)
 
 # There is no `AgentRequest` because the `agent` action delegates to the `dispatch_request` method
 # under the hood.
@@ -245,6 +252,14 @@ class PythonSubAgentCallEvent(BaseModel):
     error_message: str | None = None
     action_trace: ActionTrace | None = None
 
+    @model_validator(mode="after")
+    def _check_ts_ordering(self) -> PythonSubAgentCallEvent:
+        if self.ts_end < self.ts_start:
+            raise ValueError(
+                f"PythonSubAgentCallEvent: ts_end ({self.ts_end}) must be >= ts_start ({self.ts_start})"
+            )
+        return self
+
 
 class PythonExtensionActionEvent(BaseModel):
     kind: Literal["extensionAction"] = "extensionAction"
@@ -260,6 +275,14 @@ class PythonExtensionActionEvent(BaseModel):
     status: Literal["success", "error", "timeout"]
     error_message: str | None = None
 
+    @model_validator(mode="after")
+    def _check_ts_ordering(self) -> PythonExtensionActionEvent:
+        if self.ts_end < self.ts_start:
+            raise ValueError(
+                f"PythonExtensionActionEvent: ts_end ({self.ts_end}) must be >= ts_start ({self.ts_start})"
+            )
+        return self
+
 
 class PythonSideEffectEvent(BaseModel):
     kind: Literal["sideEffect"] = "sideEffect"
@@ -282,9 +305,13 @@ class PythonAgentRunTrace(BaseModel):
     step_type: Literal["pythonAgentRun"] = "pythonAgentRun"
     url: str
     status: Literal["success", "error", "aborted"]
-    duration_ms: int
+    duration_ms: NonNegativeInt
     events: list[PythonTraceEvent]
     error_message: str | None = None
+    # Set by the runtime when it caps the number of buffered events (see
+    # `python.worker.ts`). Informational only; the dashboard surfaces it so
+    # users know their trace is partial.
+    truncated_event_count: NonNegativeInt | None = None
 
 
 ApaStepTrace = Annotated[
@@ -332,7 +359,26 @@ class PythonAgentRunTrace(BaseModel):
 
 
 def parse_action_trace(trace_data: list[dict[str, Any] | Any]) -> ActionTrace:
-    """Parse the action trace, it will either be a list of operator action trace items or a list of APA action trace items."""
+    """Parse the action trace.
+
+    Dispatches deterministically based on the shape of the first item rather
+    than try/except-falling-through two adapters: operator items carry
+    ``action`` + ``url`` fields, APA steps carry ``step_type``. On an empty
+    list (no discriminator available) we default to APA, which is the
+    superset shape used by all custom agents.
+    """
+    if not trace_data:
+        return _ApaActionTraceAdapter.validate_python(trace_data)
+
+    first = trace_data[0]
+    if isinstance(first, dict) and "step_type" in first:
+        return _ApaActionTraceAdapter.validate_python(trace_data)
+    if isinstance(first, dict) and "action" in first and "url" in first:
+        return _OperatorActionTraceAdapter.validate_python(trace_data)
+
+    # Ambiguous shape — fall back to the previous try/except pattern so we
+    # do not regress existing callers passing Pydantic instances or other
+    # shapes the adapters already know how to coerce.
     try:
         return _OperatorActionTraceAdapter.validate_python(trace_data)
     except ValidationError:
diff --git a/packages/narada-pyodide/src/narada/_trace.py b/packages/narada-pyodide/src/narada/_trace.py
index 037d10f..06be667 100644
--- a/packages/narada-pyodide/src/narada/_trace.py
+++ b/packages/narada-pyodide/src/narada/_trace.py
@@ -14,6 +14,7 @@
 from __future__ import annotations
 
 import json
+import logging
 import time
 from typing import TYPE_CHECKING, Any, Literal
 
@@ -51,8 +52,17 @@ def _narada_emit_trace_event(event_json: str) -> None: ...
 _MAX_ERROR_CHARS = 1000
 _MAX_QUERY_CHARS = 200
 
+# When a sub-agent's response includes its own action trace (for example, the
+# operator's step-by-step actions), we forward that trace one level deep so
+# the dashboard can expand it. We do not forward deeper nesting — Python
+# agents that delegate into other Python agents would otherwise produce
+# exponentially-sized persisted traces.
+_MAX_NESTED_ACTION_TRACE_DEPTH = 1
+
 _ELLIPSIS = "\u2026"
 
+_logger = logging.getLogger(__name__)
+
 
 def now_ms() -> int:
     """Current wall-clock time in integer milliseconds."""
@@ -84,8 +94,44 @@ def emit_trace_event(event: dict[str, Any]) -> None:
     ``PythonTraceEvent`` variants defined in ``narada_core.actions.models``.
     No validation is performed here; callers construct events directly and
     are responsible for matching the schema.
+
+    Observability must not break the thing it observes: any failure
+    serialising or forwarding the event is logged and swallowed rather than
+    propagated to user code. ``default=str`` catches stray non-serialisable
+    values (timestamps, Pydantic models, numpy scalars) by stringifying them.
+    """
+    try:
+        _narada_emit_trace_event(json.dumps(event, default=str))  # noqa: F821
+    except Exception:  # noqa: BLE001 — broad by design; see docstring
+        _logger.warning("trace event emission failed", exc_info=True)
+
+
+def _strip_nested_python_events(
+    raw: list[dict[str, Any]] | None,
+) -> list[dict[str, Any]] | None:
+    """Forward a nested action trace one level deep. Any ``pythonAgentRun``
+    node inside retains its outer status/duration metadata but its ``events``
+    list is dropped, preventing deep recursion from blowing up persisted
+    JSON size. A ``truncated_event_count`` field is left behind so the
+    dashboard can show that events were elided.
     """
-    _narada_emit_trace_event(json.dumps(event))  # noqa: F821
+    if raw is None:
+        return None
+
+    def strip(item: dict[str, Any]) -> dict[str, Any]:
+        if not isinstance(item, dict):
+            return item
+        if item.get("step_type") != "pythonAgentRun":
+            return item
+        events = item.get("events", [])
+        stripped = dict(item)
+        stripped["events"] = []
+        stripped["truncated_event_count"] = (
+            len(events) if isinstance(events, list) else 0
+        )
+        return stripped
+
+    return [strip(item) for item in raw]
 
 
 def summarize_request(request: ExtensionActionRequest) -> dict[str, Any]:
@@ -197,7 +243,7 @@ def emit_sub_agent_call(
     if error_message is not None:
         event["error_message"] = truncate_error(error_message)
     if action_trace_raw is not None:
-        event["action_trace"] = action_trace_raw
+        event["action_trace"] = _strip_nested_python_events(action_trace_raw)
     emit_trace_event(event)
 
 
diff --git a/packages/narada-pyodide/src/narada/utils.py b/packages/narada-pyodide/src/narada/utils.py
index 1107ad9..dbd5d19 100644
--- a/packages/narada-pyodide/src/narada/utils.py
+++ b/packages/narada-pyodide/src/narada/utils.py
@@ -19,7 +19,16 @@ def download_file(filename: str, content: str | bytes) -> None:
         content: The content to write. If str, writes in text mode (UTF-8).
                  If bytes, writes in binary mode.
     """
-    _narada_download_file(filename, content)
+    try:
+        _narada_download_file(filename, content)
+    except Exception as err:
+        # Record that the attempt happened and failed, then re-raise so user
+        # code still sees the exception.
+        _trace.emit_side_effect(
+            effect_type="download_file",
+            description=f"Failed to download file {filename}: {err}",
+        )
+        raise
     _trace.emit_side_effect(
         effect_type="download_file",
         description=f"Downloaded file: {filename}",
@@ -33,7 +42,14 @@ def render_html(html: str) -> None:
     Args:
         html: The HTML content to render.
     """
-    _narada_render_html(html)
+    try:
+        _narada_render_html(html)
+    except Exception as err:
+        _trace.emit_side_effect(
+            effect_type="render_html",
+            description=f"Failed to render HTML: {err}",
+        )
+        raise
     _trace.emit_side_effect(
         effect_type="render_html",
         description="Rendered HTML in a new tab",
diff --git a/packages/narada-pyodide/src/narada/window.py b/packages/narada-pyodide/src/narada/window.py
index 509499c..5912f00 100644
--- a/packages/narada-pyodide/src/narada/window.py
+++ b/packages/narada-pyodide/src/narada/window.py
@@ -332,7 +332,7 @@ async def dispatch_request(
             else:
                 raise NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE(timeout)
 
-        except asyncio.TimeoutError:
+        except (asyncio.TimeoutError, NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE):
             _trace.emit_sub_agent_call(
                 ts_start=trace_start_ms,
                 agent_type=agent_type_str,
@@ -341,15 +341,6 @@ async def dispatch_request(
                 error_message=f"Timed out after {timeout}s",
             )
             raise NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE(timeout)
-        except NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE:
-            _trace.emit_sub_agent_call(
-                ts_start=trace_start_ms,
-                agent_type=agent_type_str,
-                prompt=prompt,
-                status="timeout",
-                error_message=f"Timed out after {timeout}s",
-            )
-            raise
         except Exception as err:
             _trace.emit_sub_agent_call(
                 ts_start=trace_start_ms,
diff --git a/packages/narada-pyodide/tests/test_trace.py b/packages/narada-pyodide/tests/test_trace.py
index 2a60f54..4d5e34b 100644
--- a/packages/narada-pyodide/tests/test_trace.py
+++ b/packages/narada-pyodide/tests/test_trace.py
@@ -430,3 +430,168 @@ def test_error_status_parses(self) -> None:
         assert isinstance(trace[0], PythonAgentRunTrace)
         assert trace[0].status == "error"
         assert trace[0].error_message == "ZeroDivisionError"
+
+
+# ---------------------------------------------------------------------------
+# Defensive emit: observability must never break the user's agent run
+# ---------------------------------------------------------------------------
+
+
+class TestEmitDefensive:
+    def test_non_serialisable_payload_is_stringified_not_raised(
+        self, recorded_events
+    ) -> None:
+        """A stray datetime / set / custom object in a summary should not crash
+        user code mid-run. ``default=str`` stringifies and the event still
+        reaches the harness."""
+        import datetime as _dt
+
+        _trace.emit_trace_event(
+            {
+                "kind": "stdout",
+                "ts": _dt.datetime(2026, 1, 1),  # non-serialisable in std json
+                "text": "hello",
+            }
+        )
+        # Event was recorded (ts got stringified by default=str).
+        assert len(recorded_events.events) == 1
+        assert isinstance(recorded_events.events[0]["ts"], str)
+
+    def test_harness_raising_does_not_propagate(self, monkeypatch) -> None:
+        """If the JS-injected emitter raises, we swallow and log rather than
+        propagate — tracing failures must not break the agent run."""
+
+        def _boom(_json: str) -> None:
+            raise RuntimeError("bridge down")
+
+        # `_narada_emit_trace_event` is injected by the JS harness at runtime
+        # (TYPE_CHECKING stub only in source); set without `raising` so the
+        # assignment succeeds even when the attribute isn't yet bound.
+        monkeypatch.setattr(_trace, "_narada_emit_trace_event", _boom, raising=False)
+        # Must not raise.
+        _trace.emit_trace_event({"kind": "stdout", "ts": 1, "text": "hi"})
+
+
+# ---------------------------------------------------------------------------
+# Nested action_trace stripping: cap recursion depth to one level
+# ---------------------------------------------------------------------------
+
+
+class TestStripNestedPythonEvents:
+    def test_passes_through_operator_items_unchanged(self) -> None:
+        raw = [{"url": "https://x", "action": "click Foo"}]
+        assert _trace._strip_nested_python_events(raw) == raw
+
+    def test_passes_through_non_python_apa_items_unchanged(self) -> None:
+        raw = [{"step_type": "goToUrl", "url": "https://x", "description": "..."}]
+        assert _trace._strip_nested_python_events(raw) == raw
+
+    def test_strips_events_from_nested_python_agent_run(self) -> None:
+        raw = [
+            {
+                "step_type": "pythonAgentRun",
+                "url": "",
+                "status": "success",
+                "duration_ms": 10,
+                "events": [{"kind": "stdout", "ts": 1, "text": "a"}],
+            }
+        ]
+        stripped = _trace._strip_nested_python_events(raw)
+        assert stripped is not None
+        assert stripped[0]["events"] == []
+        assert stripped[0]["truncated_event_count"] == 1
+
+    def test_none_passes_through(self) -> None:
+        assert _trace._strip_nested_python_events(None) is None
+
+    def test_integrates_with_emit_sub_agent_call(self, recorded_events) -> None:
+        _trace.emit_sub_agent_call(
+            ts_start=1,
+            agent_type="custom_python",
+            prompt="nested",
+            status="success",
+            action_trace_raw=[
+                {
+                    "step_type": "pythonAgentRun",
+                    "url": "",
+                    "status": "success",
+                    "duration_ms": 10,
+                    "events": [
+                        {"kind": "stdout", "ts": 1, "text": "a"},
+                        {"kind": "stdout", "ts": 2, "text": "b"},
+                    ],
+                }
+            ],
+        )
+        event = recorded_events.events[0]
+        inner = event["action_trace"][0]
+        assert inner["events"] == []
+        assert inner["truncated_event_count"] == 2
+
+
+# ---------------------------------------------------------------------------
+# Pydantic invariants on new event models
+# ---------------------------------------------------------------------------
+
+
+class TestPythonEventInvariants:
+    def test_sub_agent_call_rejects_ts_end_before_ts_start(self) -> None:
+        from narada_core.actions.models import PythonSubAgentCallEvent
+        from pydantic import ValidationError
+
+        with pytest.raises(ValidationError, match="ts_end"):
+            PythonSubAgentCallEvent(
+                ts_start=1000,
+                ts_end=999,
+                agent_type="operator",
+                prompt="p",
+                status="success",
+            )
+
+    def test_extension_action_rejects_ts_end_before_ts_start(self) -> None:
+        from narada_core.actions.models import PythonExtensionActionEvent
+        from pydantic import ValidationError
+
+        with pytest.raises(ValidationError, match="ts_end"):
+            PythonExtensionActionEvent(
+                ts_start=1000,
+                ts_end=999,
+                action_name="get_url",
+                request_summary={},
+                status="success",
+            )
+
+    def test_python_agent_run_rejects_negative_duration(self) -> None:
+        from pydantic import ValidationError
+
+        with pytest.raises(ValidationError):
+            PythonAgentRunTrace(
+                url="",
+                status="success",
+                duration_ms=-1,
+                events=[],
+            )
+
+
+# ---------------------------------------------------------------------------
+# Deterministic parse_action_trace selection
+# ---------------------------------------------------------------------------
+
+
+class TestParseActionTraceDispatch:
+    def test_empty_list_parses_as_apa(self) -> None:
+        result = parse_action_trace([])
+        assert result == []
+
+    def test_step_type_routes_to_apa_adapter(self) -> None:
+        result = parse_action_trace(
+            [{"step_type": "goToUrl", "url": "https://x", "description": "..."}]
+        )
+        assert result[0].step_type == "goToUrl"
+
+    def test_action_plus_url_routes_to_operator_adapter(self) -> None:
+        from narada_core.actions.models import OperatorActionTraceItem
+
+        result = parse_action_trace([{"url": "https://x", "action": "click Foo"}])
+        assert isinstance(result[0], OperatorActionTraceItem)
+        assert result[0].action == "click Foo"

From 662d10decb917132a188d699dc1febdd625fac9d Mon Sep 17 00:00:00 2001
From: xTRam1 <lerdogan@berkeley.edu>
Date: Fri, 24 Apr 2026 16:01:15 -0700
Subject: [PATCH 03/13] feat(sdk): add reasoning effort to the Core Agent
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lets SDK callers opt into GPT-5.2's reasoning levels on the Core Agent,
matching the picker the web UI added in v2 of the agent step. The wire
field stays the existing `reasoningMode: "none"|"low"|"medium"|"high"`
on `NaradaGenerationRequest`; only `CoreAgent` reads it server-side.

What changes:

- `narada-core` — new `ReasoningEffort` `StrEnum` (NONE/LOW/MEDIUM/HIGH).
  Re-exported from both `narada` and `narada-pyodide` package roots so
  callers can `from narada import ReasoningEffort`.

- `narada` and `narada-pyodide` — `dispatch_request()` and `agent()`
  gain a `reasoning: ReasoningEffort | None = None` parameter that
  serializes to `body["reasoningMode"]` only when set (absent on the
  wire when `None`, preserving backward-compat with older backends).

- Type-level enforcement that `reasoning` is only valid with
  `agent=Agent.CORE_AGENT`: paired `@overload` signatures use
  `Literal[Agent.CORE_AGENT]` to give Pyright/mypy a hard error on
  misuse. A runtime `ValueError` covers the string-form path
  (`agent="..."`) where overload narrowing doesn't help.

- 8 new unit tests in `narada-pyodide/tests/test_reasoning.py` cover
  body wiring (each effort level, omission when None), runtime
  validation (enum and string agent forms, both `agent()` and
  `dispatch_request()`), and enum-value alignment with the backend
  Literal.

Version bumps (coupled — see PR description for rationale):
- narada-core    0.0.18 → 0.0.19
- narada         0.1.43 → 0.1.44
- narada-pyodide 0.0.45a2 → 0.0.46a1
---
 packages/narada-core/pyproject.toml           |   2 +-
 .../narada-core/src/narada_core/models.py     |  14 +
 packages/narada-pyodide/pyproject.toml        |   4 +-
 .../narada-pyodide/src/narada/__init__.py     |   3 +-
 packages/narada-pyodide/src/narada/window.py  | 155 +++++++++-
 .../narada-pyodide/tests/test_reasoning.py    | 277 ++++++++++++++++++
 packages/narada/pyproject.toml                |   4 +-
 packages/narada/src/narada/__init__.py        |   3 +-
 packages/narada/src/narada/window.py          | 164 ++++++++++-
 uv.lock                                       |   6 +-
 10 files changed, 596 insertions(+), 36 deletions(-)
 create mode 100644 packages/narada-pyodide/tests/test_reasoning.py

diff --git a/packages/narada-core/pyproject.toml b/packages/narada-core/pyproject.toml
index e47b7c8..2b29207 100644
--- a/packages/narada-core/pyproject.toml
+++ b/packages/narada-core/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "narada-core"
-version = "0.0.18"
+version = "0.0.19"
 description = "Code shared by the `narada` and `narada-pyodide` packages."
 license = "Apache-2.0"
 readme = "README.md"
diff --git a/packages/narada-core/src/narada_core/models.py b/packages/narada-core/src/narada_core/models.py
index 8e03292..8124545 100644
--- a/packages/narada-core/src/narada_core/models.py
+++ b/packages/narada-core/src/narada_core/models.py
@@ -21,6 +21,20 @@ def prompt_prefix(self) -> str:
                 return "/coreAgent "
 
 
+class ReasoningEffort(StrEnum):
+    """Amount of reasoning the Core Agent applies before responding.
+
+    Maps 1:1 to OpenAI's ``reasoning.effort`` parameter. Only honored when the
+    invoked agent is :py:attr:`Agent.CORE_AGENT`; the SDK enforces this both at
+    type-check time (via ``@overload``) and at runtime (with a ``ValueError``).
+    """
+
+    NONE = "none"
+    LOW = "low"
+    MEDIUM = "medium"
+    HIGH = "high"
+
+
 class UserResourceCredentials(TypedDict, total=False):
     salesforce: dict[str, str]
     jira: dict[str, str]
diff --git a/packages/narada-pyodide/pyproject.toml b/packages/narada-pyodide/pyproject.toml
index 38b5088..172cbf1 100644
--- a/packages/narada-pyodide/pyproject.toml
+++ b/packages/narada-pyodide/pyproject.toml
@@ -1,14 +1,14 @@
 
 [project]
 name = "narada-pyodide"
-version = "0.0.45a2"
+version = "0.0.46a1"
 description = "Pyodide-compatible Python client SDK for Narada"
 license = "Apache-2.0"
 readme = "README.md"
 authors = [{ name = "Narada", email = "support@narada.ai" }]
 requires-python = ">=3.12"
 dependencies = [
-    "narada-core==0.0.18",
+    "narada-core==0.0.19",
     # Must be a supported version in https://pyodide.org/en/stable/usage/packages-in-pyodide.html
     "packaging==24.2",
 ]
diff --git a/packages/narada-pyodide/src/narada/__init__.py b/packages/narada-pyodide/src/narada/__init__.py
index 544d452..386ed83 100644
--- a/packages/narada-pyodide/src/narada/__init__.py
+++ b/packages/narada-pyodide/src/narada/__init__.py
@@ -2,7 +2,7 @@
     NaradaError,
     NaradaTimeoutError,
 )
-from narada_core.models import Agent, File, Response, ResponseContent
+from narada_core.models import Agent, File, ReasoningEffort, Response, ResponseContent
 
 from narada.client import Narada
 from narada.utils import download_file, render_html
@@ -23,6 +23,7 @@
     "Narada",
     "NaradaError",
     "NaradaTimeoutError",
+    "ReasoningEffort",
     "RemoteBrowserWindow",
     "render_html",
     "Response",
diff --git a/packages/narada-pyodide/src/narada/window.py b/packages/narada-pyodide/src/narada/window.py
index 791503c..9395466 100644
--- a/packages/narada-pyodide/src/narada/window.py
+++ b/packages/narada-pyodide/src/narada/window.py
@@ -63,6 +63,7 @@
     Agent,
     File,
     McpServer,
+    ReasoningEffort,
     RemoteDispatchChatHistoryItem,
     Response,
     UserResourceCredentials,
@@ -196,6 +197,57 @@ async def upload_file(self, *, file: IO) -> File:
             "Uploading files is not supported in the browser environment"
         )
 
+    # `reasoning` is only valid with the Core Agent; these two overloads make
+    # that constraint type-checkable. Generic-agent calls fall through to the
+    # general overloads below, which do not accept a `reasoning` argument.
+    @overload
+    async def dispatch_request(
+        self,
+        *,
+        prompt: str,
+        agent: Literal[Agent.CORE_AGENT],
+        reasoning: ReasoningEffort | None = None,
+        clear_chat: bool | None = None,
+        generate_gif: bool | None = None,
+        output_schema: None = None,
+        previous_request_id: str | None = None,
+        chat_history: list[RemoteDispatchChatHistoryItem] | None = None,
+        additional_context: dict[str, str] | None = None,
+        time_zone: str = "America/Los_Angeles",
+        user_resource_credentials: UserResourceCredentials | None = None,
+        mcp_servers: list[McpServer] | None = None,
+        secret_variables: dict[str, str] | None = None,
+        input_variables: dict[str, Any] | None = None,
+        callback_url: str | None = None,
+        callback_secret: str | None = None,
+        callback_headers: dict[str, Any] | None = None,
+        timeout: int = 1000,
+    ) -> Response[None]: ...
+
+    @overload
+    async def dispatch_request(
+        self,
+        *,
+        prompt: str,
+        agent: Literal[Agent.CORE_AGENT],
+        reasoning: ReasoningEffort | None = None,
+        clear_chat: bool | None = None,
+        generate_gif: bool | None = None,
+        output_schema: type[_StructuredOutput],
+        previous_request_id: str | None = None,
+        chat_history: list[RemoteDispatchChatHistoryItem] | None = None,
+        additional_context: dict[str, str] | None = None,
+        time_zone: str = "America/Los_Angeles",
+        user_resource_credentials: UserResourceCredentials | None = None,
+        mcp_servers: list[McpServer] | None = None,
+        secret_variables: dict[str, str] | None = None,
+        input_variables: dict[str, Any] | None = None,
+        callback_url: str | None = None,
+        callback_secret: str | None = None,
+        callback_headers: dict[str, Any] | None = None,
+        timeout: int = 1000,
+    ) -> Response[_StructuredOutput]: ...
+
     @overload
     async def dispatch_request(
         self,
@@ -247,6 +299,7 @@ async def dispatch_request(
         *,
         prompt: str,
         agent: Agent | str = Agent.OPERATOR,
+        reasoning: ReasoningEffort | None = None,
         clear_chat: bool | None = None,
         generate_gif: bool | None = None,
         output_schema: type[BaseModel] | None = None,
@@ -267,6 +320,14 @@ async def dispatch_request(
 
         The higher-level `agent` method should be preferred for most use cases.
         """
+        # The overloads enforce this at type-check time when callers use
+        # ``Agent.CORE_AGENT``; the runtime check covers string-form agents
+        # (``agent="..."``) and callers without a type checker.
+        if reasoning is not None and agent is not Agent.CORE_AGENT:
+            raise ValueError(
+                "`reasoning` is only supported with `agent=Agent.CORE_AGENT` "
+                f"(got agent={agent!r})"
+            )
         # Trace instrumentation: the entire method body is wrapped so that any
         # exit (successful return, timeout, or non-timeout failure) produces a
         # ``subAgentCall`` trace event with matching status. See `_trace.py`.
@@ -319,6 +380,8 @@ async def dispatch_request(
             body["callbackSecret"] = callback_secret
         if callback_headers is not None:
             body["callbackHeaders"] = callback_headers
+        if reasoning is not None:
+            body["reasoningMode"] = reasoning.value
 
         try:
             controller = AbortController.new()
@@ -436,6 +499,42 @@ async def dispatch_request(
             )
             raise
 
+    # `reasoning` is only valid with the Core Agent. See `dispatch_request`
+    # above for the rationale; the same overload pattern is mirrored here.
+    @overload
+    async def agent(
+        self,
+        *,
+        prompt: str,
+        agent: Literal[Agent.CORE_AGENT],
+        reasoning: ReasoningEffort | None = None,
+        clear_chat: bool | None = None,
+        generate_gif: bool | None = None,
+        output_schema: None = None,
+        time_zone: str = "America/Los_Angeles",
+        mcp_servers: list[McpServer] | None = None,
+        secret_variables: dict[str, str] | None = None,
+        input_variables: dict[str, Any] | None = None,
+        timeout: int = 1000,
+    ) -> AgentResponse[dict[str, Any]]: ...
+
+    @overload
+    async def agent(
+        self,
+        *,
+        prompt: str,
+        agent: Literal[Agent.CORE_AGENT],
+        reasoning: ReasoningEffort | None = None,
+        clear_chat: bool | None = None,
+        generate_gif: bool | None = None,
+        output_schema: type[_StructuredOutput],
+        time_zone: str = "America/Los_Angeles",
+        mcp_servers: list[McpServer] | None = None,
+        secret_variables: dict[str, str] | None = None,
+        input_variables: dict[str, Any] | None = None,
+        timeout: int = 1000,
+    ) -> AgentResponse[_StructuredOutput]: ...
+
     @overload
     async def agent(
         self,
@@ -473,6 +572,7 @@ async def agent(
         *,
         prompt: str,
         agent: Agent | str = Agent.OPERATOR,
+        reasoning: ReasoningEffort | None = None,
         clear_chat: bool | None = None,
         generate_gif: bool | None = None,
         output_schema: type[BaseModel] | None = None,
@@ -483,18 +583,49 @@ async def agent(
         timeout: int = 1000,
     ) -> AgentResponse:
         """Invokes an agent in the Narada extension side panel chat."""
-        remote_dispatch_response = await self.dispatch_request(
-            prompt=prompt,
-            agent=agent,
-            clear_chat=clear_chat,
-            generate_gif=generate_gif,
-            output_schema=output_schema,
-            time_zone=time_zone,
-            mcp_servers=mcp_servers,
-            secret_variables=secret_variables,
-            input_variables=input_variables,
-            timeout=timeout,
-        )
+        # Branch on `reasoning` so each call site binds a single, typed overload
+        # of `dispatch_request`. The validation also lives in `dispatch_request`
+        # itself (defense in depth + reachable when callers go straight to the
+        # low-level API), so the redundancy here is intentional.
+        if reasoning is None:
+            remote_dispatch_response = await self.dispatch_request(
+                prompt=prompt,
+                agent=agent,
+                clear_chat=clear_chat,
+                generate_gif=generate_gif,
+                output_schema=output_schema,
+                time_zone=time_zone,
+                mcp_servers=mcp_servers,
+                secret_variables=secret_variables,
+                input_variables=input_variables,
+                timeout=timeout,
+            )
+        else:
+            if agent is not Agent.CORE_AGENT:
+                raise ValueError(
+                    "`reasoning` is only supported with `agent=Agent.CORE_AGENT` "
+                    f"(got agent={agent!r})"
+                )
+            # The CORE_AGENT-specific overloads of `dispatch_request` split on
+            # a narrower `output_schema` discriminator (None vs `type[T]`),
+            # which the impl's `type[BaseModel] | None` union doesn't cleanly
+            # narrow into without further branching. The public `agent()`
+            # overloads above already give callers correct return-type
+            # narrowing, so the internal forward call bypasses overload
+            # disambiguation on this single dimension.
+            remote_dispatch_response = await self.dispatch_request(  # pyright: ignore[reportCallIssue]
+                prompt=prompt,
+                agent=agent,
+                reasoning=reasoning,
+                clear_chat=clear_chat,
+                generate_gif=generate_gif,
+                output_schema=output_schema,  # pyright: ignore[reportArgumentType]
+                time_zone=time_zone,
+                mcp_servers=mcp_servers,
+                secret_variables=secret_variables,
+                input_variables=input_variables,
+                timeout=timeout,
+            )
         response_content = remote_dispatch_response["response"]
         assert response_content is not None
 
diff --git a/packages/narada-pyodide/tests/test_reasoning.py b/packages/narada-pyodide/tests/test_reasoning.py
new file mode 100644
index 0000000..5d7d53e
--- /dev/null
+++ b/packages/narada-pyodide/tests/test_reasoning.py
@@ -0,0 +1,277 @@
+"""Tests for the `reasoning` parameter on the Core Agent.
+
+These exercise the `narada-pyodide` window because it is the only package with
+a runnable test harness today; the impl in the sibling `narada` package shares
+the same request-body wiring and runtime check, so coverage here verifies the
+behavior across both code paths.
+
+We mirror `test_cloud_browser.py`'s module-clearing pattern: each test gets a
+fresh import of `narada.window` with a freshly stubbed `pyodide.http.pyfetch`,
+because cached module references from earlier tests would otherwise leak into
+this file when the suite runs in alphabetical order.
+"""
+
+from __future__ import annotations
+
+import importlib
+import json
+import sys
+from collections.abc import Iterator
+from types import ModuleType, SimpleNamespace
+from typing import Any
+from unittest.mock import AsyncMock
+
+import pytest
+
+
+def _clear_modules() -> None:
+    for name in list(sys.modules):
+        if name == "narada" or name.startswith("narada."):
+            sys.modules.pop(name, None)
+    for name in ("js", "pyodide", "pyodide.http", "pyodide.ffi"):
+        sys.modules.pop(name, None)
+
+
+class _FakeResponse:
+    def __init__(self, *, ok: bool = True, json_data: object = None) -> None:
+        self.ok = ok
+        self.status = 200
+        self._json_data = json_data
+
+    async def json(self) -> object:
+        return self._json_data
+
+    async def text(self) -> str:
+        return ""
+
+
+def _make_pyfetch_recorder() -> tuple[AsyncMock, list[dict[str, Any]]]:
+    """Build an `AsyncMock` for `pyfetch` that captures every JSON body posted
+    to /remote-dispatch and returns a canned success response on the poll."""
+    posted_bodies: list[dict[str, Any]] = []
+
+    async def _impl(url: str, **kwargs: Any) -> _FakeResponse:
+        if "body" in kwargs:
+            posted_bodies.append(json.loads(kwargs["body"]))
+        if url.endswith("/remote-dispatch"):
+            return _FakeResponse(json_data={"requestId": "req-test"})
+        return _FakeResponse(
+            json_data={
+                "status": "success",
+                "response": {
+                    "text": "ok",
+                    "output": {"type": "text", "content": "ok"},
+                },
+                "createdAt": "now",
+                "completedAt": "now",
+                "usage": {"actions": 0, "credits": 0.0},
+            }
+        )
+
+    pyfetch = AsyncMock(side_effect=_impl)
+    return pyfetch, posted_bodies
+
+
+@pytest.fixture
+def reimported_window(
+    monkeypatch: pytest.MonkeyPatch,
+) -> Iterator[tuple[ModuleType, AsyncMock, list[dict[str, Any]]]]:
+    """Force a fresh import of `narada.window` after planting freshly-mocked
+    Pyodide-bridge modules. Yields the window module, the captured `pyfetch`
+    mock, and the list that records every posted JSON body.
+    """
+    _clear_modules()
+
+    js_module = ModuleType("js")
+    js_module.AbortController = SimpleNamespace(  # type: ignore[attr-defined]
+        new=lambda: SimpleNamespace(signal=object(), abort=lambda: None)
+    )
+    js_module.setTimeout = lambda callback, timeout: None  # type: ignore[attr-defined]
+
+    pyodide_module = ModuleType("pyodide")
+    pyodide_module.__path__ = []  # type: ignore[attr-defined]
+
+    pyfetch, posted_bodies = _make_pyfetch_recorder()
+    pyodide_http_module = ModuleType("pyodide.http")
+    pyodide_http_module.pyfetch = pyfetch  # type: ignore[attr-defined]
+
+    pyodide_ffi_module = ModuleType("pyodide.ffi")
+
+    class _FakeJsProxy:
+        def __init__(self, value: object) -> None:
+            self._value = value
+
+        def to_py(self) -> object:
+            return self._value
+
+    pyodide_ffi_module.JsProxy = _FakeJsProxy  # type: ignore[attr-defined]
+    pyodide_ffi_module.create_once_callable = lambda fn: fn  # type: ignore[attr-defined]
+
+    monkeypatch.setitem(sys.modules, "js", js_module)
+    monkeypatch.setitem(sys.modules, "pyodide", pyodide_module)
+    monkeypatch.setitem(sys.modules, "pyodide.http", pyodide_http_module)
+    monkeypatch.setitem(sys.modules, "pyodide.ffi", pyodide_ffi_module)
+
+    window_module = importlib.import_module("narada.window")
+    window_module._narada_parent_run_ids = _FakeJsProxy([])  # type: ignore[attr-defined]
+    yield window_module, pyfetch, posted_bodies
+    _clear_modules()
+
+
+def _make_window(window_module: ModuleType) -> Any:
+    window = window_module.LocalBrowserWindow.__new__(window_module.LocalBrowserWindow)
+    window._auth_headers = {"x-narada-test": "1"}
+    window._base_url = "https://example.invalid/api"
+    window._browser_window_id = "test-window"
+
+    async def _stub_auth_headers() -> dict[str, str]:
+        return {"x-narada-test": "1"}
+
+    window._get_auth_headers = _stub_auth_headers
+    window._current_parent_run_ids = lambda: []
+    return window
+
+
+class TestReasoningBodyWiring:
+    """The `reasoning` arg flows through to the JSON body as `reasoningMode`."""
+
+    @pytest.mark.asyncio
+    async def test_present_when_reasoning_is_set(
+        self,
+        reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]],
+    ) -> None:
+        window_module, _pyfetch, posted_bodies = reimported_window
+        from narada_core.models import Agent, ReasoningEffort
+
+        window = _make_window(window_module)
+        await window.dispatch_request(
+            prompt="solve this",
+            agent=Agent.CORE_AGENT,
+            reasoning=ReasoningEffort.MEDIUM,
+        )
+
+        assert posted_bodies[0]["reasoningMode"] == "medium"
+
+    @pytest.mark.asyncio
+    async def test_absent_when_reasoning_is_none(
+        self,
+        reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]],
+    ) -> None:
+        window_module, _pyfetch, posted_bodies = reimported_window
+        from narada_core.models import Agent
+
+        window = _make_window(window_module)
+        await window.dispatch_request(
+            prompt="solve this",
+            agent=Agent.CORE_AGENT,
+        )
+
+        # Absent (not null) — wire-compatible with backends predating the field.
+        assert "reasoningMode" not in posted_bodies[0]
+
+    @pytest.mark.asyncio
+    async def test_each_effort_level_serializes_to_string(
+        self,
+        reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]],
+    ) -> None:
+        window_module, _pyfetch, posted_bodies = reimported_window
+        from narada_core.models import Agent, ReasoningEffort
+
+        window = _make_window(window_module)
+
+        for level in (
+            ReasoningEffort.NONE,
+            ReasoningEffort.LOW,
+            ReasoningEffort.MEDIUM,
+            ReasoningEffort.HIGH,
+        ):
+            await window.dispatch_request(
+                prompt="x",
+                agent=Agent.CORE_AGENT,
+                reasoning=level,
+            )
+
+        seen = [b["reasoningMode"] for b in posted_bodies if "reasoningMode" in b]
+        assert seen == ["none", "low", "medium", "high"]
+
+
+class TestReasoningRuntimeValidation:
+    """Misuse — `reasoning` paired with a non-Core agent — fails fast at runtime
+    with a clear message. The overload contract on the public `agent()` method
+    catches this at type-check time when callers use the enum, but the runtime
+    check covers the string-form (`agent="..."`) and untyped paths."""
+
+    @pytest.mark.asyncio
+    async def test_dispatch_request_rejects_non_core_agent_enum(
+        self,
+        reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]],
+    ) -> None:
+        window_module, _pyfetch, _posted = reimported_window
+        from narada_core.models import Agent, ReasoningEffort
+
+        window = _make_window(window_module)
+        with pytest.raises(ValueError, match="agent=Agent.CORE_AGENT"):
+            await window.dispatch_request(
+                prompt="x",
+                agent=Agent.OPERATOR,
+                reasoning=ReasoningEffort.MEDIUM,  # pyright: ignore[reportCallIssue]
+            )
+
+    @pytest.mark.asyncio
+    async def test_dispatch_request_rejects_string_agent(
+        self,
+        reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]],
+    ) -> None:
+        # String-form bypasses the type-checker overload, so the runtime check
+        # is the only safety net here.
+        window_module, _pyfetch, _posted = reimported_window
+        from narada_core.models import ReasoningEffort
+
+        window = _make_window(window_module)
+        with pytest.raises(ValueError, match="agent=Agent.CORE_AGENT"):
+            await window.dispatch_request(
+                prompt="x",
+                agent="some-custom-agent",
+                reasoning=ReasoningEffort.HIGH,  # pyright: ignore[reportCallIssue]
+            )
+
+    @pytest.mark.asyncio
+    async def test_agent_rejects_non_core_agent_enum(
+        self,
+        reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]],
+    ) -> None:
+        # The same constraint must hold on the higher-level `agent()` method.
+        window_module, _pyfetch, _posted = reimported_window
+        from narada_core.models import Agent, ReasoningEffort
+
+        window = _make_window(window_module)
+        with pytest.raises(ValueError, match="agent=Agent.CORE_AGENT"):
+            await window.agent(
+                prompt="x",
+                agent=Agent.OPERATOR,
+                reasoning=ReasoningEffort.LOW,  # pyright: ignore[reportCallIssue]
+            )
+
+
+class TestReasoningEffortEnum:
+    """The enum values are exactly what the backend expects."""
+
+    def test_values_match_backend_literal(self) -> None:
+        # The backend declares `reasoningMode: Literal["none", "low",
+        # "medium", "high"] | None`. If we drift, requests will start failing
+        # validation server-side.
+        from narada_core.models import ReasoningEffort
+
+        assert ReasoningEffort.NONE.value == "none"
+        assert ReasoningEffort.LOW.value == "low"
+        assert ReasoningEffort.MEDIUM.value == "medium"
+        assert ReasoningEffort.HIGH.value == "high"
+
+    def test_str_enum_serializes_inline(self) -> None:
+        # `StrEnum` values double as `str`, which is what `json.dumps` writes
+        # without any custom encoder.
+        from narada_core.models import ReasoningEffort
+
+        assert json.dumps({"reasoningMode": ReasoningEffort.MEDIUM.value}) == (
+            '{"reasoningMode": "medium"}'
+        )
diff --git a/packages/narada/pyproject.toml b/packages/narada/pyproject.toml
index 59e1179..f220a03 100644
--- a/packages/narada/pyproject.toml
+++ b/packages/narada/pyproject.toml
@@ -1,13 +1,13 @@
 [project]
 name = "narada"
-version = "0.1.43"
+version = "0.1.44"
 description = "Python client SDK for Narada"
 license = "Apache-2.0"
 readme = "README.md"
 authors = [{ name = "Narada", email = "support@narada.ai" }]
 requires-python = ">=3.12"
 dependencies = [
-    "narada-core==0.0.18",
+    "narada-core==0.0.19",
     "aiohttp>=3.12.13",
     "playwright>=1.53.0",
     "rich>=14.0.0",
diff --git a/packages/narada/src/narada/__init__.py b/packages/narada/src/narada/__init__.py
index 3f6e9fe..1434183 100644
--- a/packages/narada/src/narada/__init__.py
+++ b/packages/narada/src/narada/__init__.py
@@ -7,7 +7,7 @@
     NaradaUnsupportedBrowserError,
     UserAbortedError,
 )
-from narada_core.models import Agent, File, Response, ResponseContent
+from narada_core.models import Agent, File, ReasoningEffort, Response, ResponseContent
 
 from narada.client import Narada
 from narada.config import BrowserConfig, ProxyConfig
@@ -31,6 +31,7 @@
     "NaradaTimeoutError",
     "NaradaUnsupportedBrowserError",
     "ProxyConfig",
+    "ReasoningEffort",
     "RemoteBrowserWindow",
     "render_html",
     "Response",
diff --git a/packages/narada/src/narada/window.py b/packages/narada/src/narada/window.py
index 29c359b..aa30065 100644
--- a/packages/narada/src/narada/window.py
+++ b/packages/narada/src/narada/window.py
@@ -7,7 +7,7 @@
 from http import HTTPStatus
 from io import IOBase
 from pathlib import Path
-from typing import IO, Any, Mapping, TypeGuard, TypeVar, overload, override
+from typing import IO, Any, Literal, Mapping, TypeGuard, TypeVar, overload, override
 
 import aiohttp
 from narada_core.actions.models import (
@@ -53,6 +53,7 @@
     Agent,
     File,
     McpServer,
+    ReasoningEffort,
     RemoteDispatchChatHistoryItem,
     Response,
     UserResourceCredentials,
@@ -214,6 +215,59 @@ async def _upload_input_variable_file(
         uploaded_file = await self._upload_file_impl(file=input_variable_value)
         return _InputVariableFileReference(key=uploaded_file["key"], name=filename)
 
+    # `reasoning` is only valid with the Core Agent; these two overloads make
+    # that constraint type-checkable. Generic-agent calls fall through to the
+    # general overloads below, which do not accept a `reasoning` argument.
+    @overload
+    async def dispatch_request(
+        self,
+        *,
+        prompt: str,
+        agent: Literal[Agent.CORE_AGENT],
+        reasoning: ReasoningEffort | None = None,
+        clear_chat: bool | None = None,
+        generate_gif: bool | None = None,
+        output_schema: None = None,
+        previous_request_id: str | None = None,
+        chat_history: list[RemoteDispatchChatHistoryItem] | None = None,
+        additional_context: dict[str, str] | None = None,
+        attachment: File | None = None,
+        time_zone: str = "America/Los_Angeles",
+        user_resource_credentials: UserResourceCredentials | None = None,
+        mcp_servers: list[McpServer] | None = None,
+        secret_variables: dict[str, str] | None = None,
+        input_variables: Mapping[str, Any] | None = None,
+        callback_url: str | None = None,
+        callback_secret: str | None = None,
+        callback_headers: Mapping[str, Any] | None = None,
+        timeout: int = 1000,
+    ) -> Response[None]: ...
+
+    @overload
+    async def dispatch_request(
+        self,
+        *,
+        prompt: str,
+        agent: Literal[Agent.CORE_AGENT],
+        reasoning: ReasoningEffort | None = None,
+        clear_chat: bool | None = None,
+        generate_gif: bool | None = None,
+        output_schema: type[_StructuredOutput],
+        previous_request_id: str | None = None,
+        chat_history: list[RemoteDispatchChatHistoryItem] | None = None,
+        additional_context: dict[str, str] | None = None,
+        attachment: File | None = None,
+        time_zone: str = "America/Los_Angeles",
+        user_resource_credentials: UserResourceCredentials | None = None,
+        mcp_servers: list[McpServer] | None = None,
+        secret_variables: dict[str, str] | None = None,
+        input_variables: Mapping[str, Any] | None = None,
+        callback_url: str | None = None,
+        callback_secret: str | None = None,
+        callback_headers: Mapping[str, Any] | None = None,
+        timeout: int = 1000,
+    ) -> Response[_StructuredOutput]: ...
+
     @overload
     async def dispatch_request(
         self,
@@ -267,6 +321,7 @@ async def dispatch_request(
         *,
         prompt: str,
         agent: Agent | str = Agent.OPERATOR,
+        reasoning: ReasoningEffort | None = None,
         clear_chat: bool | None = None,
         generate_gif: bool | None = None,
         output_schema: type[BaseModel] | None = None,
@@ -288,6 +343,14 @@ async def dispatch_request(
 
         The higher-level `agent` method should be preferred for most use cases.
         """
+        # The overloads enforce this at type-check time when callers use
+        # ``Agent.CORE_AGENT``; the runtime check covers string-form agents
+        # (``agent="..."``) and callers without a type checker.
+        if reasoning is not None and agent is not Agent.CORE_AGENT:
+            raise ValueError(
+                "`reasoning` is only supported with `agent=Agent.CORE_AGENT` "
+                f"(got agent={agent!r})"
+            )
         deadline = time.monotonic() + timeout
 
         agent_prefix = (
@@ -333,6 +396,8 @@ async def dispatch_request(
             body["callbackSecret"] = callback_secret
         if callback_headers is not None:
             body["callbackHeaders"] = callback_headers
+        if reasoning is not None:
+            body["reasoningMode"] = reasoning.value
 
         try:
             async with aiohttp.ClientSession() as session:
@@ -383,6 +448,44 @@ async def dispatch_request(
         except asyncio.TimeoutError:
             raise NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE(timeout)
 
+    # `reasoning` is only valid with the Core Agent. See `dispatch_request`
+    # above for the rationale; the same overload pattern is mirrored here.
+    @overload
+    async def agent(
+        self,
+        *,
+        prompt: str,
+        agent: Literal[Agent.CORE_AGENT],
+        reasoning: ReasoningEffort | None = None,
+        clear_chat: bool | None = None,
+        generate_gif: bool | None = None,
+        output_schema: None = None,
+        attachment: File | None = None,
+        time_zone: str = "America/Los_Angeles",
+        mcp_servers: list[McpServer] | None = None,
+        secret_variables: dict[str, str] | None = None,
+        input_variables: Mapping[str, Any] | None = None,
+        timeout: int = 1000,
+    ) -> AgentResponse[dict[str, Any]]: ...
+
+    @overload
+    async def agent(
+        self,
+        *,
+        prompt: str,
+        agent: Literal[Agent.CORE_AGENT],
+        reasoning: ReasoningEffort | None = None,
+        clear_chat: bool | None = None,
+        generate_gif: bool | None = None,
+        output_schema: type[_StructuredOutput],
+        attachment: File | None = None,
+        time_zone: str = "America/Los_Angeles",
+        mcp_servers: list[McpServer] | None = None,
+        secret_variables: dict[str, str] | None = None,
+        input_variables: Mapping[str, Any] | None = None,
+        timeout: int = 1000,
+    ) -> AgentResponse[_StructuredOutput]: ...
+
     @overload
     async def agent(
         self,
@@ -422,6 +525,7 @@ async def agent(
         *,
         prompt: str,
         agent: Agent | str = Agent.OPERATOR,
+        reasoning: ReasoningEffort | None = None,
         clear_chat: bool | None = None,
         generate_gif: bool | None = None,
         output_schema: type[BaseModel] | None = None,
@@ -433,19 +537,51 @@ async def agent(
         timeout: int = 1000,
     ) -> AgentResponse:
         """Invokes an agent in the Narada extension side panel chat."""
-        remote_dispatch_response = await self.dispatch_request(
-            prompt=prompt,
-            agent=agent,
-            clear_chat=clear_chat,
-            generate_gif=generate_gif,
-            output_schema=output_schema,
-            attachment=attachment,
-            time_zone=time_zone,
-            mcp_servers=mcp_servers,
-            secret_variables=secret_variables,
-            input_variables=input_variables,
-            timeout=timeout,
-        )
+        # Branch on `reasoning` so each call site binds a single, typed overload
+        # of `dispatch_request`. The validation also lives in `dispatch_request`
+        # itself (defense in depth + reachable when callers go straight to the
+        # low-level API), so the redundancy here is intentional.
+        if reasoning is None:
+            remote_dispatch_response = await self.dispatch_request(
+                prompt=prompt,
+                agent=agent,
+                clear_chat=clear_chat,
+                generate_gif=generate_gif,
+                output_schema=output_schema,
+                attachment=attachment,
+                time_zone=time_zone,
+                mcp_servers=mcp_servers,
+                secret_variables=secret_variables,
+                input_variables=input_variables,
+                timeout=timeout,
+            )
+        else:
+            if agent is not Agent.CORE_AGENT:
+                raise ValueError(
+                    "`reasoning` is only supported with `agent=Agent.CORE_AGENT` "
+                    f"(got agent={agent!r})"
+                )
+            # The CORE_AGENT-specific overloads of `dispatch_request` split on
+            # a narrower `output_schema` discriminator (None vs `type[T]`),
+            # which the impl's `type[BaseModel] | None` union doesn't cleanly
+            # narrow into without further branching. The public `agent()`
+            # overloads above already give callers correct return-type
+            # narrowing, so the internal forward call bypasses overload
+            # disambiguation on this single dimension.
+            remote_dispatch_response = await self.dispatch_request(  # pyright: ignore[reportCallIssue]
+                prompt=prompt,
+                agent=agent,
+                reasoning=reasoning,
+                clear_chat=clear_chat,
+                generate_gif=generate_gif,
+                output_schema=output_schema,  # pyright: ignore[reportArgumentType]
+                attachment=attachment,
+                time_zone=time_zone,
+                mcp_servers=mcp_servers,
+                secret_variables=secret_variables,
+                input_variables=input_variables,
+                timeout=timeout,
+            )
         response_content = remote_dispatch_response["response"]
         assert response_content is not None
 
diff --git a/uv.lock b/uv.lock
index 0ee3814..b8493dc 100644
--- a/uv.lock
+++ b/uv.lock
@@ -312,7 +312,7 @@ wheels = [
 
 [[package]]
 name = "narada"
-version = "0.1.43"
+version = "0.1.44"
 source = { editable = "packages/narada" }
 dependencies = [
     { name = "aiohttp" },
@@ -345,7 +345,7 @@ dev = [
 
 [[package]]
 name = "narada-core"
-version = "0.0.18"
+version = "0.0.19"
 source = { editable = "packages/narada-core" }
 dependencies = [
     { name = "pydantic" },
@@ -356,7 +356,7 @@ requires-dist = [{ name = "pydantic", specifier = "==2.12.5" }]
 
 [[package]]
 name = "narada-pyodide"
-version = "0.0.45a2"
+version = "0.0.46a1"
 source = { editable = "packages/narada-pyodide" }
 dependencies = [
     { name = "narada-core" },

From ba8044df6c44fcbce344f80d934291f29df3834e Mon Sep 17 00:00:00 2001
From: xTRam1 <lerdogan@berkeley.edu>
Date: Fri, 24 Apr 2026 16:05:54 -0700
Subject: [PATCH 04/13] test(sdk): hoist test imports to module top

`narada_core.models` is not affected by the `_clear_modules()` reset
(only `narada` and `pyodide.*` get popped), so the per-test
`from narada_core.models import Agent, ReasoningEffort` repeats were
unnecessary. Move them up.
---
 packages/narada-pyodide/tests/test_reasoning.py | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/packages/narada-pyodide/tests/test_reasoning.py b/packages/narada-pyodide/tests/test_reasoning.py
index 5d7d53e..2844015 100644
--- a/packages/narada-pyodide/tests/test_reasoning.py
+++ b/packages/narada-pyodide/tests/test_reasoning.py
@@ -22,6 +22,7 @@
 from unittest.mock import AsyncMock
 
 import pytest
+from narada_core.models import Agent, ReasoningEffort
 
 
 def _clear_modules() -> None:
@@ -141,8 +142,6 @@ async def test_present_when_reasoning_is_set(
         reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]],
     ) -> None:
         window_module, _pyfetch, posted_bodies = reimported_window
-        from narada_core.models import Agent, ReasoningEffort
-
         window = _make_window(window_module)
         await window.dispatch_request(
             prompt="solve this",
@@ -158,8 +157,6 @@ async def test_absent_when_reasoning_is_none(
         reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]],
     ) -> None:
         window_module, _pyfetch, posted_bodies = reimported_window
-        from narada_core.models import Agent
-
         window = _make_window(window_module)
         await window.dispatch_request(
             prompt="solve this",
@@ -175,8 +172,6 @@ async def test_each_effort_level_serializes_to_string(
         reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]],
     ) -> None:
         window_module, _pyfetch, posted_bodies = reimported_window
-        from narada_core.models import Agent, ReasoningEffort
-
         window = _make_window(window_module)
 
         for level in (
@@ -207,8 +202,6 @@ async def test_dispatch_request_rejects_non_core_agent_enum(
         reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]],
     ) -> None:
         window_module, _pyfetch, _posted = reimported_window
-        from narada_core.models import Agent, ReasoningEffort
-
         window = _make_window(window_module)
         with pytest.raises(ValueError, match="agent=Agent.CORE_AGENT"):
             await window.dispatch_request(
@@ -225,8 +218,6 @@ async def test_dispatch_request_rejects_string_agent(
         # String-form bypasses the type-checker overload, so the runtime check
         # is the only safety net here.
         window_module, _pyfetch, _posted = reimported_window
-        from narada_core.models import ReasoningEffort
-
         window = _make_window(window_module)
         with pytest.raises(ValueError, match="agent=Agent.CORE_AGENT"):
             await window.dispatch_request(
@@ -242,8 +233,6 @@ async def test_agent_rejects_non_core_agent_enum(
     ) -> None:
         # The same constraint must hold on the higher-level `agent()` method.
         window_module, _pyfetch, _posted = reimported_window
-        from narada_core.models import Agent, ReasoningEffort
-
         window = _make_window(window_module)
         with pytest.raises(ValueError, match="agent=Agent.CORE_AGENT"):
             await window.agent(
@@ -260,8 +249,6 @@ def test_values_match_backend_literal(self) -> None:
         # The backend declares `reasoningMode: Literal["none", "low",
         # "medium", "high"] | None`. If we drift, requests will start failing
         # validation server-side.
-        from narada_core.models import ReasoningEffort
-
         assert ReasoningEffort.NONE.value == "none"
         assert ReasoningEffort.LOW.value == "low"
         assert ReasoningEffort.MEDIUM.value == "medium"
@@ -270,8 +257,6 @@ def test_values_match_backend_literal(self) -> None:
     def test_str_enum_serializes_inline(self) -> None:
         # `StrEnum` values double as `str`, which is what `json.dumps` writes
         # without any custom encoder.
-        from narada_core.models import ReasoningEffort
-
         assert json.dumps({"reasoningMode": ReasoningEffort.MEDIUM.value}) == (
             '{"reasoningMode": "medium"}'
         )

From 40848daf934f67bf772a65bd3adbee4ebae374c7 Mon Sep 17 00:00:00 2001
From: xTRam1 <lerdogan@berkeley.edu>
Date: Sun, 26 Apr 2026 09:51:19 -0700
Subject: [PATCH 05/13] Forward nested action_trace events unmodified
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Removes _strip_nested_python_events. The function dropped events from any
nested pythonAgentRun node and stamped truncated_event_count on it, citing
"deep recursion blowing up persisted JSON size" as the reason.

In practice the policy was always-on and uniform — a 1-event nested trace
got stripped just as readily as a 10K-event one — and the frontend already
owns size enforcement via MAX_NESTED_ACTION_TRACE_BYTES in python.worker.ts
plus the workflow-run-detail consumer caps. Two layers of stripping is
strictly worse: small nested traces lose their events for no benefit, and
the dashboard's CollapsibleNestedTrace can't recover them (it does not
lazy-fetch by request_id).

Now: emit_sub_agent_call forwards action_trace_raw as-is. The frontend
caps when actually over budget. Tests updated to assert events flow
through unmodified.
---
 packages/narada-pyodide/src/narada/_trace.py | 35 +++-----------
 packages/narada-pyodide/tests/test_trace.py  | 48 ++++++--------------
 2 files changed, 19 insertions(+), 64 deletions(-)

diff --git a/packages/narada-pyodide/src/narada/_trace.py b/packages/narada-pyodide/src/narada/_trace.py
index 06be667..22117a5 100644
--- a/packages/narada-pyodide/src/narada/_trace.py
+++ b/packages/narada-pyodide/src/narada/_trace.py
@@ -106,34 +106,6 @@ def emit_trace_event(event: dict[str, Any]) -> None:
         _logger.warning("trace event emission failed", exc_info=True)
 
 
-def _strip_nested_python_events(
-    raw: list[dict[str, Any]] | None,
-) -> list[dict[str, Any]] | None:
-    """Forward a nested action trace one level deep. Any ``pythonAgentRun``
-    node inside retains its outer status/duration metadata but its ``events``
-    list is dropped, preventing deep recursion from blowing up persisted
-    JSON size. A ``truncated_event_count`` field is left behind so the
-    dashboard can show that events were elided.
-    """
-    if raw is None:
-        return None
-
-    def strip(item: dict[str, Any]) -> dict[str, Any]:
-        if not isinstance(item, dict):
-            return item
-        if item.get("step_type") != "pythonAgentRun":
-            return item
-        events = item.get("events", [])
-        stripped = dict(item)
-        stripped["events"] = []
-        stripped["truncated_event_count"] = (
-            len(events) if isinstance(events, list) else 0
-        )
-        return stripped
-
-    return [strip(item) for item in raw]
-
-
 def summarize_request(request: ExtensionActionRequest) -> dict[str, Any]:
     """Produce a bounded-size summary of an extension action request for
     display in the observability dashboard. Large payloads (sheet row values,
@@ -243,7 +215,12 @@ def emit_sub_agent_call(
     if error_message is not None:
         event["error_message"] = truncate_error(error_message)
     if action_trace_raw is not None:
-        event["action_trace"] = _strip_nested_python_events(action_trace_raw)
+        # Forward the nested action trace as-is. Size/depth enforcement is the
+        # frontend's responsibility (`MAX_NESTED_ACTION_TRACE_BYTES` in
+        # python.worker.ts, plus the workflow-run-detail consumer caps).
+        # Stripping events here is redundant and prevents the dashboard from
+        # rendering small inline nested traces inline in CollapsibleNestedTrace.
+        event["action_trace"] = action_trace_raw
     emit_trace_event(event)
 
 
diff --git a/packages/narada-pyodide/tests/test_trace.py b/packages/narada-pyodide/tests/test_trace.py
index 4d5e34b..9c192d7 100644
--- a/packages/narada-pyodide/tests/test_trace.py
+++ b/packages/narada-pyodide/tests/test_trace.py
@@ -473,60 +473,38 @@ def _boom(_json: str) -> None:
 
 
 # ---------------------------------------------------------------------------
-# Nested action_trace stripping: cap recursion depth to one level
+# Nested action_trace forwarding: SDK forwards events as-is; size enforcement
+# is the frontend's responsibility (MAX_NESTED_ACTION_TRACE_BYTES in
+# python.worker.ts and the workflow-run-detail consumer caps).
 # ---------------------------------------------------------------------------
 
 
-class TestStripNestedPythonEvents:
-    def test_passes_through_operator_items_unchanged(self) -> None:
-        raw = [{"url": "https://x", "action": "click Foo"}]
-        assert _trace._strip_nested_python_events(raw) == raw
-
-    def test_passes_through_non_python_apa_items_unchanged(self) -> None:
-        raw = [{"step_type": "goToUrl", "url": "https://x", "description": "..."}]
-        assert _trace._strip_nested_python_events(raw) == raw
-
-    def test_strips_events_from_nested_python_agent_run(self) -> None:
+class TestNestedActionTraceForwarding:
+    def test_forwards_nested_python_events_unchanged(self, recorded_events) -> None:
         raw = [
             {
                 "step_type": "pythonAgentRun",
                 "url": "",
                 "status": "success",
                 "duration_ms": 10,
-                "events": [{"kind": "stdout", "ts": 1, "text": "a"}],
+                "events": [
+                    {"kind": "stdout", "ts": 1, "text": "a"},
+                    {"kind": "stdout", "ts": 2, "text": "b"},
+                ],
             }
         ]
-        stripped = _trace._strip_nested_python_events(raw)
-        assert stripped is not None
-        assert stripped[0]["events"] == []
-        assert stripped[0]["truncated_event_count"] == 1
-
-    def test_none_passes_through(self) -> None:
-        assert _trace._strip_nested_python_events(None) is None
-
-    def test_integrates_with_emit_sub_agent_call(self, recorded_events) -> None:
         _trace.emit_sub_agent_call(
             ts_start=1,
             agent_type="custom_python",
             prompt="nested",
             status="success",
-            action_trace_raw=[
-                {
-                    "step_type": "pythonAgentRun",
-                    "url": "",
-                    "status": "success",
-                    "duration_ms": 10,
-                    "events": [
-                        {"kind": "stdout", "ts": 1, "text": "a"},
-                        {"kind": "stdout", "ts": 2, "text": "b"},
-                    ],
-                }
-            ],
+            action_trace_raw=raw,
         )
         event = recorded_events.events[0]
         inner = event["action_trace"][0]
-        assert inner["events"] == []
-        assert inner["truncated_event_count"] == 2
+        # Events are forwarded as-is; the SDK no longer strips them.
+        assert inner["events"] == raw[0]["events"]
+        assert "truncated_event_count" not in inner
 
 
 # ---------------------------------------------------------------------------

From 7ef25b1c678f3fe7b1aa6c246097dd98942d8c2b Mon Sep 17 00:00:00 2001
From: xTRam1 <lerdogan@berkeley.edu>
Date: Tue, 28 Apr 2026 11:20:24 -0700
Subject: [PATCH 06/13] Remove pyodide trace tests

---
 packages/narada-pyodide/tests/README.md     |  20 -
 packages/narada-pyodide/tests/__init__.py   |   0
 packages/narada-pyodide/tests/conftest.py   |  56 --
 packages/narada-pyodide/tests/test_trace.py | 575 --------------------
 4 files changed, 651 deletions(-)
 delete mode 100644 packages/narada-pyodide/tests/README.md
 delete mode 100644 packages/narada-pyodide/tests/__init__.py
 delete mode 100644 packages/narada-pyodide/tests/conftest.py
 delete mode 100644 packages/narada-pyodide/tests/test_trace.py

diff --git a/packages/narada-pyodide/tests/README.md b/packages/narada-pyodide/tests/README.md
deleted file mode 100644
index 5ba6499..0000000
--- a/packages/narada-pyodide/tests/README.md
+++ /dev/null
@@ -1,20 +0,0 @@
-# narada-pyodide tests
-
-narada-pyodide and narada both publish under the top-level `narada` Python
-package namespace. When both are installed in the same environment, the
-workspace-installed `narada` package shadows narada-pyodide's source. This
-is fine at runtime (Pyodide only installs narada-pyodide) but breaks
-local unit testing.
-
-To run the unit tests locally from the workspace root:
-
-```bash
-uv pip uninstall narada
-uv run --package narada-pyodide pytest packages/narada-pyodide/tests/
-```
-
-Re-running `uv sync` will reinstall the `narada` package and require the
-uninstall step again.
-
-The `conftest.py` stubs the Pyodide-only `js` and `pyodide.*` imports so
-the non-HTTP helpers in narada-pyodide can be exercised on host CPython.
diff --git a/packages/narada-pyodide/tests/__init__.py b/packages/narada-pyodide/tests/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/packages/narada-pyodide/tests/conftest.py b/packages/narada-pyodide/tests/conftest.py
deleted file mode 100644
index 47abb32..0000000
--- a/packages/narada-pyodide/tests/conftest.py
+++ /dev/null
@@ -1,56 +0,0 @@
-"""Pytest fixtures shared across narada-pyodide tests.
-
-narada-pyodide is designed to run inside a Pyodide web worker; several of its
-transitive imports (``js``, ``pyodide.ffi``, ``pyodide.http``) are only
-available in that environment. To make the pure-Python unit tests runnable on
-a host CPython interpreter we stub those modules before any narada-pyodide
-code is imported. The real Pyodide runtime will obviously provide them.
-"""
-
-from __future__ import annotations
-
-import json
-import sys
-from collections.abc import Iterator
-from typing import Any
-from unittest.mock import MagicMock
-
-# Stub Pyodide-only modules. Must happen before `from narada import _trace`.
-for _mod in ("js", "pyodide", "pyodide.ffi", "pyodide.http"):
-    if _mod not in sys.modules:
-        sys.modules[_mod] = MagicMock()
-
-import pytest  # noqa: E402
-
-from narada import _trace  # noqa: E402
-
-
-class RecordingEmitter:
-    """Captures every event forwarded by ``_trace.emit_trace_event`` during a
-    test so assertions can inspect the JSON that would reach the JS harness.
-    """
-
-    def __init__(self) -> None:
-        self.events: list[dict[str, Any]] = []
-
-    def __call__(self, event_json: str) -> None:
-        # Round-trip through json to catch non-serialisable payloads early.
-        self.events.append(json.loads(event_json))
-
-
-@pytest.fixture
-def recorded_events() -> Iterator[RecordingEmitter]:
-    """Replace the JS-harness-injected ``_narada_emit_trace_event`` with a
-    recorder for the duration of a test, restoring the original binding
-    afterwards.
-    """
-    emitter = RecordingEmitter()
-    previous = getattr(_trace, "_narada_emit_trace_event", None)
-    _trace._narada_emit_trace_event = emitter  # type: ignore[attr-defined]
-    try:
-        yield emitter
-    finally:
-        if previous is None:
-            delattr(_trace, "_narada_emit_trace_event")
-        else:
-            _trace._narada_emit_trace_event = previous  # type: ignore[attr-defined]
diff --git a/packages/narada-pyodide/tests/test_trace.py b/packages/narada-pyodide/tests/test_trace.py
deleted file mode 100644
index 9c192d7..0000000
--- a/packages/narada-pyodide/tests/test_trace.py
+++ /dev/null
@@ -1,575 +0,0 @@
-"""Tests for the private ``narada._trace`` module.
-
-Covers the pure helpers (truncation, request/response summarisation) plus the
-``emit_*`` functions, asserting that the JSON payloads emitted to the JS
-harness match the ``PythonTraceEvent`` Pydantic schema defined in
-``narada_core.actions.models``.
-"""
-
-from __future__ import annotations
-
-import pytest
-from narada_core.actions.models import (
-    AgenticMouseActionRequest,
-    AgenticSelectorRequest,
-    CloseWindowRequest,
-    GetFullHtmlRequest,
-    GetFullHtmlResponse,
-    GetScreenshotRequest,
-    GetScreenshotResponse,
-    GetSimplifiedHtmlRequest,
-    GetSimplifiedHtmlResponse,
-    GetUrlRequest,
-    GetUrlResponse,
-    GoToUrlRequest,
-    PrintMessageRequest,
-    PythonAgentRunTrace,
-    ReadGoogleSheetRequest,
-    ReadGoogleSheetResponse,
-    WriteGoogleSheetRequest,
-    parse_action_trace,
-)
-
-from narada import _trace
-
-
-# ---------------------------------------------------------------------------
-# Truncation
-# ---------------------------------------------------------------------------
-
-
-class TestTruncate:
-    def test_returns_none_for_none(self) -> None:
-        assert _trace.truncate(None, 10) is None
-
-    def test_preserves_short_strings(self) -> None:
-        assert _trace.truncate("hello", 10) == "hello"
-
-    def test_preserves_exact_length(self) -> None:
-        assert _trace.truncate("1234567890", 10) == "1234567890"
-
-    def test_truncates_long_strings_with_ellipsis(self) -> None:
-        result = _trace.truncate("abcdefghij", 5)
-        assert result is not None
-        assert len(result) == 5
-        assert result.endswith("\u2026")
-        assert result.startswith("abcd")
-
-    def test_truncate_prompt_falls_back_to_empty(self) -> None:
-        assert _trace.truncate_prompt("") == ""
-
-    def test_truncate_error_bounded(self) -> None:
-        long = "x" * 5000
-        result = _trace.truncate_error(long)
-        assert len(result) == 1000
-        assert result.endswith("\u2026")
-
-
-# ---------------------------------------------------------------------------
-# summarize_request / summarize_response
-# ---------------------------------------------------------------------------
-
-
-class TestSummarizeRequest:
-    def test_go_to_url(self) -> None:
-        req = GoToUrlRequest(url="https://example.com", new_tab=True)
-        assert _trace.summarize_request(req) == {
-            "url": "https://example.com",
-            "new_tab": True,
-        }
-
-    @pytest.mark.parametrize(
-        "request_instance",
-        [
-            GetUrlRequest(),
-            GetScreenshotRequest(),
-            GetFullHtmlRequest(),
-            GetSimplifiedHtmlRequest(),
-            CloseWindowRequest(),
-        ],
-    )
-    def test_parameterless_requests_return_empty(
-        self, request_instance: object
-    ) -> None:
-        assert _trace.summarize_request(request_instance) == {}  # type: ignore[arg-type]
-
-    def test_read_google_sheet(self) -> None:
-        req = ReadGoogleSheetRequest(spreadsheet_id="abc123", range="Sheet1!A1:B10")
-        assert _trace.summarize_request(req) == {
-            "spreadsheet_id": "abc123",
-            "range": "Sheet1!A1:B10",
-        }
-
-    def test_write_google_sheet_reports_row_count_not_values(self) -> None:
-        big_values = [["r"] * 5 for _ in range(847)]
-        req = WriteGoogleSheetRequest(
-            spreadsheet_id="abc123", range="Sheet1!A1:E847", values=big_values
-        )
-        summary = _trace.summarize_request(req)
-        assert summary == {
-            "spreadsheet_id": "abc123",
-            "range": "Sheet1!A1:E847",
-            "row_count": 847,
-        }
-        # Explicitly guard against regressions that leak row payloads.
-        assert "values" not in summary
-
-    def test_print_message_truncates_long_messages(self) -> None:
-        long_msg = "x" * 2000
-        summary = _trace.summarize_request(PrintMessageRequest(message=long_msg))
-        truncated = summary["message"]
-        assert isinstance(truncated, str)
-        assert len(truncated) == 500
-        assert truncated.endswith("\u2026")
-
-    def test_agentic_selector_reports_action_type_and_truncates_query(self) -> None:
-        req = AgenticSelectorRequest(
-            action={"type": "click"},
-            selectors={"id": "submit-btn"},
-            fallback_operator_query="y" * 1000,
-        )
-        summary = _trace.summarize_request(req)
-        assert summary["action_type"] == "click"
-        assert len(summary["fallback_operator_query"]) == 200
-        # Selectors are intentionally omitted (not user-useful in trace view).
-        assert "selectors" not in summary
-
-    def test_agentic_mouse_action(self) -> None:
-        req = AgenticMouseActionRequest(
-            action={"type": "click"},
-            recorded_click={"x": 1, "y": 2, "viewport": {"width": 10, "height": 20}},
-            fallback_operator_query="click the button",
-            resize_window=False,
-        )
-        summary = _trace.summarize_request(req)
-        assert summary == {
-            "action_type": "click",
-            "fallback_operator_query": "click the button",
-        }
-
-
-class TestSummarizeResponse:
-    def test_get_url_returns_url(self) -> None:
-        req = GetUrlRequest()
-        resp = GetUrlResponse(url="https://example.com/page")
-        assert _trace.summarize_response(req, resp) == {
-            "url": "https://example.com/page"
-        }
-
-    def test_get_screenshot_returns_fixed_description(self) -> None:
-        req = GetScreenshotRequest()
-        resp = GetScreenshotResponse(
-            base64_content="...huge blob...",
-            name="page.png",
-            mime_type="image/png",
-            timestamp="2025-01-01T00:00:00Z",
-        )
-        summary = _trace.summarize_response(req, resp)
-        assert summary == {"description": "Took screenshot of the page"}
-
-    def test_full_html_returns_fixed_description(self) -> None:
-        summary = _trace.summarize_response(
-            GetFullHtmlRequest(), GetFullHtmlResponse(html="<html>...massive...</html>")
-        )
-        assert summary == {"description": "Got the full HTML of the page"}
-
-    def test_simplified_html_returns_fixed_description(self) -> None:
-        summary = _trace.summarize_response(
-            GetSimplifiedHtmlRequest(),
-            GetSimplifiedHtmlResponse(html="<html>short</html>"),
-        )
-        assert summary == {"description": "Got the simplified HTML of the page"}
-
-    def test_read_google_sheet_reports_dimensions(self) -> None:
-        req = ReadGoogleSheetRequest(spreadsheet_id="x", range="A1:C5")
-        resp = ReadGoogleSheetResponse(values=[["a", "b", "c"], ["d", "e", "f"], ["g"]])
-        assert _trace.summarize_response(req, resp) == {
-            "row_count": 3,
-            "column_count": 3,
-        }
-
-    def test_read_google_sheet_empty_values(self) -> None:
-        req = ReadGoogleSheetRequest(spreadsheet_id="x", range="A1:C5")
-        resp = ReadGoogleSheetResponse(values=[])
-        assert _trace.summarize_response(req, resp) == {
-            "row_count": 0,
-            "column_count": 0,
-        }
-
-    def test_write_google_sheet_returns_none(self) -> None:
-        req = WriteGoogleSheetRequest(spreadsheet_id="x", range="A1", values=[["v"]])
-        assert _trace.summarize_response(req, None) is None
-
-    def test_close_window_returns_none(self) -> None:
-        assert _trace.summarize_response(CloseWindowRequest(), None) is None
-
-
-# ---------------------------------------------------------------------------
-# Event emitters
-# ---------------------------------------------------------------------------
-
-
-class TestEmitSubAgentCall:
-    def test_success_with_action_trace(self, recorded_events) -> None:
-        _trace.emit_sub_agent_call(
-            ts_start=1000,
-            agent_type="operator",
-            prompt="Find leads",
-            status="success",
-            request_id="req_abc",
-            action_trace_raw=[{"url": "https://sf.com", "action": "click Leads"}],
-        )
-        (event,) = recorded_events.events
-        assert event["kind"] == "subAgentCall"
-        assert event["ts_start"] == 1000
-        assert event["ts_end"] >= 1000
-        assert event["agent_type"] == "operator"
-        assert event["prompt"] == "Find leads"
-        assert event["status"] == "success"
-        assert event["request_id"] == "req_abc"
-        assert event["action_trace"] == [
-            {"url": "https://sf.com", "action": "click Leads"}
-        ]
-        assert "error_message" not in event
-
-    def test_success_without_action_trace_omits_field(self, recorded_events) -> None:
-        _trace.emit_sub_agent_call(
-            ts_start=1000, agent_type="operator", prompt="hi", status="success"
-        )
-        (event,) = recorded_events.events
-        assert "action_trace" not in event
-        assert "request_id" not in event
-
-    def test_timeout_includes_error_message(self, recorded_events) -> None:
-        _trace.emit_sub_agent_call(
-            ts_start=1000,
-            agent_type="operator",
-            prompt="hi",
-            status="timeout",
-            error_message="Timed out after 60s",
-        )
-        (event,) = recorded_events.events
-        assert event["status"] == "timeout"
-        assert event["error_message"] == "Timed out after 60s"
-
-    def test_error_truncates_error_message(self, recorded_events) -> None:
-        _trace.emit_sub_agent_call(
-            ts_start=1000,
-            agent_type="operator",
-            prompt="hi",
-            status="error",
-            error_message="x" * 5000,
-        )
-        (event,) = recorded_events.events
-        assert len(event["error_message"]) == 1000
-
-    def test_prompt_is_truncated(self, recorded_events) -> None:
-        _trace.emit_sub_agent_call(
-            ts_start=1000,
-            agent_type="operator",
-            prompt="y" * 1000,
-            status="success",
-        )
-        (event,) = recorded_events.events
-        assert len(event["prompt"]) == 500
-
-
-class TestEmitExtensionAction:
-    def test_success_with_result_summary(self, recorded_events) -> None:
-        req = GetUrlRequest()
-        resp = GetUrlResponse(url="https://x.com")
-        _trace.emit_extension_action(
-            ts_start=2000, request=req, status="success", response=resp
-        )
-        (event,) = recorded_events.events
-        assert event["kind"] == "extensionAction"
-        assert event["action_name"] == "get_url"
-        assert event["request_summary"] == {}
-        assert event["result_summary"] == {"url": "https://x.com"}
-        assert event["status"] == "success"
-
-    def test_success_without_result_summary_omits_field(self, recorded_events) -> None:
-        req = WriteGoogleSheetRequest(
-            spreadsheet_id="abc", range="A1:B2", values=[["1", "2"], ["3", "4"]]
-        )
-        _trace.emit_extension_action(ts_start=2000, request=req, status="success")
-        (event,) = recorded_events.events
-        assert event["request_summary"] == {
-            "spreadsheet_id": "abc",
-            "range": "A1:B2",
-            "row_count": 2,
-        }
-        assert "result_summary" not in event
-
-    def test_timeout(self, recorded_events) -> None:
-        _trace.emit_extension_action(
-            ts_start=0,
-            request=GoToUrlRequest(url="https://a.b", new_tab=False),
-            status="timeout",
-            error_message="Timed out",
-        )
-        (event,) = recorded_events.events
-        assert event["status"] == "timeout"
-        assert event["action_name"] == "go_to_url"
-
-    def test_error(self, recorded_events) -> None:
-        _trace.emit_extension_action(
-            ts_start=0,
-            request=CloseWindowRequest(),
-            status="error",
-            error_message="permission denied",
-        )
-        (event,) = recorded_events.events
-        assert event["status"] == "error"
-        assert event["error_message"] == "permission denied"
-
-
-class TestEmitSideEffect:
-    def test_download_file(self, recorded_events) -> None:
-        _trace.emit_side_effect(
-            effect_type="download_file", description="Downloaded file: report.pdf"
-        )
-        (event,) = recorded_events.events
-        assert event["kind"] == "sideEffect"
-        assert event["effect_type"] == "download_file"
-        assert event["description"] == "Downloaded file: report.pdf"
-        assert "ts" in event
-
-    def test_render_html(self, recorded_events) -> None:
-        _trace.emit_side_effect(
-            effect_type="render_html", description="Rendered HTML in a new tab"
-        )
-        (event,) = recorded_events.events
-        assert event["effect_type"] == "render_html"
-
-
-# ---------------------------------------------------------------------------
-# End-to-end schema validation: every event kind produced by the emitters
-# round-trips cleanly through the ``PythonAgentRunTrace`` Pydantic model and
-# the ``parse_action_trace`` entry point used by downstream consumers.
-# ---------------------------------------------------------------------------
-
-
-class TestPythonAgentRunTraceRoundtrip:
-    def test_every_event_kind_parses(self, recorded_events) -> None:
-        _trace.emit_sub_agent_call(
-            ts_start=1000,
-            agent_type="operator",
-            prompt="Find leads",
-            status="success",
-            request_id="req_abc",
-            action_trace_raw=[{"url": "https://sf.com", "action": "click Leads"}],
-        )
-        _trace.emit_extension_action(
-            ts_start=2000,
-            request=GetScreenshotRequest(),
-            status="success",
-            response=GetScreenshotResponse(
-                base64_content="ignored",
-                name="page.png",
-                mime_type="image/png",
-                timestamp="now",
-            ),
-        )
-        _trace.emit_side_effect(
-            effect_type="download_file", description="Downloaded file: leads.csv"
-        )
-
-        # Assemble a representative PythonAgentRunTrace containing the emitted
-        # events alongside stdout / stderr events (which are synthesised by
-        # the JS-side runnable, not the SDK).
-        stdout_stderr_events = [
-            {"kind": "stdout", "ts": 500, "text": "starting"},
-            {"kind": "stderr", "ts": 2500, "text": "deprecation warning"},
-        ]
-        events = stdout_stderr_events + recorded_events.events
-        events.sort(key=lambda e: e.get("ts", e.get("ts_start", 0)))
-
-        raw = [
-            {
-                "step_type": "pythonAgentRun",
-                "url": "https://app.narada.ai/agent",
-                "status": "success",
-                "duration_ms": 3000,
-                "events": events,
-            }
-        ]
-        trace = parse_action_trace(raw)
-        assert len(trace) == 1
-        (node,) = trace
-        assert isinstance(node, PythonAgentRunTrace)
-        # Order reflects the real wall-clock timestamps: the emitters stamp
-        # events with ``now_ms()`` at emit time, which in this test runs much
-        # later than the synthetic stdout/stderr timestamps below. The side
-        # effect therefore sorts after ``stderr`` (ts=2500).
-        assert [e.kind for e in node.events] == [
-            "stdout",
-            "subAgentCall",
-            "extensionAction",
-            "stderr",
-            "sideEffect",
-        ]
-        # Nested action_trace rehydrates correctly as an OperatorActionTrace.
-        sub_call = node.events[1]
-        assert sub_call.kind == "subAgentCall"
-        assert sub_call.action_trace is not None
-        assert sub_call.action_trace[0].url == "https://sf.com"
-
-    def test_error_status_parses(self) -> None:
-        raw = [
-            {
-                "step_type": "pythonAgentRun",
-                "url": "https://x",
-                "status": "error",
-                "duration_ms": 120,
-                "error_message": "ZeroDivisionError",
-                "events": [],
-            }
-        ]
-        trace = parse_action_trace(raw)
-        assert isinstance(trace[0], PythonAgentRunTrace)
-        assert trace[0].status == "error"
-        assert trace[0].error_message == "ZeroDivisionError"
-
-
-# ---------------------------------------------------------------------------
-# Defensive emit: observability must never break the user's agent run
-# ---------------------------------------------------------------------------
-
-
-class TestEmitDefensive:
-    def test_non_serialisable_payload_is_stringified_not_raised(
-        self, recorded_events
-    ) -> None:
-        """A stray datetime / set / custom object in a summary should not crash
-        user code mid-run. ``default=str`` stringifies and the event still
-        reaches the harness."""
-        import datetime as _dt
-
-        _trace.emit_trace_event(
-            {
-                "kind": "stdout",
-                "ts": _dt.datetime(2026, 1, 1),  # non-serialisable in std json
-                "text": "hello",
-            }
-        )
-        # Event was recorded (ts got stringified by default=str).
-        assert len(recorded_events.events) == 1
-        assert isinstance(recorded_events.events[0]["ts"], str)
-
-    def test_harness_raising_does_not_propagate(self, monkeypatch) -> None:
-        """If the JS-injected emitter raises, we swallow and log rather than
-        propagate — tracing failures must not break the agent run."""
-
-        def _boom(_json: str) -> None:
-            raise RuntimeError("bridge down")
-
-        # `_narada_emit_trace_event` is injected by the JS harness at runtime
-        # (TYPE_CHECKING stub only in source); set without `raising` so the
-        # assignment succeeds even when the attribute isn't yet bound.
-        monkeypatch.setattr(_trace, "_narada_emit_trace_event", _boom, raising=False)
-        # Must not raise.
-        _trace.emit_trace_event({"kind": "stdout", "ts": 1, "text": "hi"})
-
-
-# ---------------------------------------------------------------------------
-# Nested action_trace forwarding: SDK forwards events as-is; size enforcement
-# is the frontend's responsibility (MAX_NESTED_ACTION_TRACE_BYTES in
-# python.worker.ts and the workflow-run-detail consumer caps).
-# ---------------------------------------------------------------------------
-
-
-class TestNestedActionTraceForwarding:
-    def test_forwards_nested_python_events_unchanged(self, recorded_events) -> None:
-        raw = [
-            {
-                "step_type": "pythonAgentRun",
-                "url": "",
-                "status": "success",
-                "duration_ms": 10,
-                "events": [
-                    {"kind": "stdout", "ts": 1, "text": "a"},
-                    {"kind": "stdout", "ts": 2, "text": "b"},
-                ],
-            }
-        ]
-        _trace.emit_sub_agent_call(
-            ts_start=1,
-            agent_type="custom_python",
-            prompt="nested",
-            status="success",
-            action_trace_raw=raw,
-        )
-        event = recorded_events.events[0]
-        inner = event["action_trace"][0]
-        # Events are forwarded as-is; the SDK no longer strips them.
-        assert inner["events"] == raw[0]["events"]
-        assert "truncated_event_count" not in inner
-
-
-# ---------------------------------------------------------------------------
-# Pydantic invariants on new event models
-# ---------------------------------------------------------------------------
-
-
-class TestPythonEventInvariants:
-    def test_sub_agent_call_rejects_ts_end_before_ts_start(self) -> None:
-        from narada_core.actions.models import PythonSubAgentCallEvent
-        from pydantic import ValidationError
-
-        with pytest.raises(ValidationError, match="ts_end"):
-            PythonSubAgentCallEvent(
-                ts_start=1000,
-                ts_end=999,
-                agent_type="operator",
-                prompt="p",
-                status="success",
-            )
-
-    def test_extension_action_rejects_ts_end_before_ts_start(self) -> None:
-        from narada_core.actions.models import PythonExtensionActionEvent
-        from pydantic import ValidationError
-
-        with pytest.raises(ValidationError, match="ts_end"):
-            PythonExtensionActionEvent(
-                ts_start=1000,
-                ts_end=999,
-                action_name="get_url",
-                request_summary={},
-                status="success",
-            )
-
-    def test_python_agent_run_rejects_negative_duration(self) -> None:
-        from pydantic import ValidationError
-
-        with pytest.raises(ValidationError):
-            PythonAgentRunTrace(
-                url="",
-                status="success",
-                duration_ms=-1,
-                events=[],
-            )
-
-
-# ---------------------------------------------------------------------------
-# Deterministic parse_action_trace selection
-# ---------------------------------------------------------------------------
-
-
-class TestParseActionTraceDispatch:
-    def test_empty_list_parses_as_apa(self) -> None:
-        result = parse_action_trace([])
-        assert result == []
-
-    def test_step_type_routes_to_apa_adapter(self) -> None:
-        result = parse_action_trace(
-            [{"step_type": "goToUrl", "url": "https://x", "description": "..."}]
-        )
-        assert result[0].step_type == "goToUrl"
-
-    def test_action_plus_url_routes_to_operator_adapter(self) -> None:
-        from narada_core.actions.models import OperatorActionTraceItem
-
-        result = parse_action_trace([{"url": "https://x", "action": "click Foo"}])
-        assert isinstance(result[0], OperatorActionTraceItem)
-        assert result[0].action == "click Foo"

From cd05fb7b7b44058f3ad6d95b6d731ff080373b88 Mon Sep 17 00:00:00 2001
From: xTRam1 <lerdogan@berkeley.edu>
Date: Tue, 28 Apr 2026 18:01:35 -0700
Subject: [PATCH 07/13] Remove pyodide reasoning test

---
 .../narada-pyodide/tests/test_reasoning.py    | 262 ------------------
 1 file changed, 262 deletions(-)
 delete mode 100644 packages/narada-pyodide/tests/test_reasoning.py

diff --git a/packages/narada-pyodide/tests/test_reasoning.py b/packages/narada-pyodide/tests/test_reasoning.py
deleted file mode 100644
index 2844015..0000000
--- a/packages/narada-pyodide/tests/test_reasoning.py
+++ /dev/null
@@ -1,262 +0,0 @@
-"""Tests for the `reasoning` parameter on the Core Agent.
-
-These exercise the `narada-pyodide` window because it is the only package with
-a runnable test harness today; the impl in the sibling `narada` package shares
-the same request-body wiring and runtime check, so coverage here verifies the
-behavior across both code paths.
-
-We mirror `test_cloud_browser.py`'s module-clearing pattern: each test gets a
-fresh import of `narada.window` with a freshly stubbed `pyodide.http.pyfetch`,
-because cached module references from earlier tests would otherwise leak into
-this file when the suite runs in alphabetical order.
-"""
-
-from __future__ import annotations
-
-import importlib
-import json
-import sys
-from collections.abc import Iterator
-from types import ModuleType, SimpleNamespace
-from typing import Any
-from unittest.mock import AsyncMock
-
-import pytest
-from narada_core.models import Agent, ReasoningEffort
-
-
-def _clear_modules() -> None:
-    for name in list(sys.modules):
-        if name == "narada" or name.startswith("narada."):
-            sys.modules.pop(name, None)
-    for name in ("js", "pyodide", "pyodide.http", "pyodide.ffi"):
-        sys.modules.pop(name, None)
-
-
-class _FakeResponse:
-    def __init__(self, *, ok: bool = True, json_data: object = None) -> None:
-        self.ok = ok
-        self.status = 200
-        self._json_data = json_data
-
-    async def json(self) -> object:
-        return self._json_data
-
-    async def text(self) -> str:
-        return ""
-
-
-def _make_pyfetch_recorder() -> tuple[AsyncMock, list[dict[str, Any]]]:
-    """Build an `AsyncMock` for `pyfetch` that captures every JSON body posted
-    to /remote-dispatch and returns a canned success response on the poll."""
-    posted_bodies: list[dict[str, Any]] = []
-
-    async def _impl(url: str, **kwargs: Any) -> _FakeResponse:
-        if "body" in kwargs:
-            posted_bodies.append(json.loads(kwargs["body"]))
-        if url.endswith("/remote-dispatch"):
-            return _FakeResponse(json_data={"requestId": "req-test"})
-        return _FakeResponse(
-            json_data={
-                "status": "success",
-                "response": {
-                    "text": "ok",
-                    "output": {"type": "text", "content": "ok"},
-                },
-                "createdAt": "now",
-                "completedAt": "now",
-                "usage": {"actions": 0, "credits": 0.0},
-            }
-        )
-
-    pyfetch = AsyncMock(side_effect=_impl)
-    return pyfetch, posted_bodies
-
-
-@pytest.fixture
-def reimported_window(
-    monkeypatch: pytest.MonkeyPatch,
-) -> Iterator[tuple[ModuleType, AsyncMock, list[dict[str, Any]]]]:
-    """Force a fresh import of `narada.window` after planting freshly-mocked
-    Pyodide-bridge modules. Yields the window module, the captured `pyfetch`
-    mock, and the list that records every posted JSON body.
-    """
-    _clear_modules()
-
-    js_module = ModuleType("js")
-    js_module.AbortController = SimpleNamespace(  # type: ignore[attr-defined]
-        new=lambda: SimpleNamespace(signal=object(), abort=lambda: None)
-    )
-    js_module.setTimeout = lambda callback, timeout: None  # type: ignore[attr-defined]
-
-    pyodide_module = ModuleType("pyodide")
-    pyodide_module.__path__ = []  # type: ignore[attr-defined]
-
-    pyfetch, posted_bodies = _make_pyfetch_recorder()
-    pyodide_http_module = ModuleType("pyodide.http")
-    pyodide_http_module.pyfetch = pyfetch  # type: ignore[attr-defined]
-
-    pyodide_ffi_module = ModuleType("pyodide.ffi")
-
-    class _FakeJsProxy:
-        def __init__(self, value: object) -> None:
-            self._value = value
-
-        def to_py(self) -> object:
-            return self._value
-
-    pyodide_ffi_module.JsProxy = _FakeJsProxy  # type: ignore[attr-defined]
-    pyodide_ffi_module.create_once_callable = lambda fn: fn  # type: ignore[attr-defined]
-
-    monkeypatch.setitem(sys.modules, "js", js_module)
-    monkeypatch.setitem(sys.modules, "pyodide", pyodide_module)
-    monkeypatch.setitem(sys.modules, "pyodide.http", pyodide_http_module)
-    monkeypatch.setitem(sys.modules, "pyodide.ffi", pyodide_ffi_module)
-
-    window_module = importlib.import_module("narada.window")
-    window_module._narada_parent_run_ids = _FakeJsProxy([])  # type: ignore[attr-defined]
-    yield window_module, pyfetch, posted_bodies
-    _clear_modules()
-
-
-def _make_window(window_module: ModuleType) -> Any:
-    window = window_module.LocalBrowserWindow.__new__(window_module.LocalBrowserWindow)
-    window._auth_headers = {"x-narada-test": "1"}
-    window._base_url = "https://example.invalid/api"
-    window._browser_window_id = "test-window"
-
-    async def _stub_auth_headers() -> dict[str, str]:
-        return {"x-narada-test": "1"}
-
-    window._get_auth_headers = _stub_auth_headers
-    window._current_parent_run_ids = lambda: []
-    return window
-
-
-class TestReasoningBodyWiring:
-    """The `reasoning` arg flows through to the JSON body as `reasoningMode`."""
-
-    @pytest.mark.asyncio
-    async def test_present_when_reasoning_is_set(
-        self,
-        reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]],
-    ) -> None:
-        window_module, _pyfetch, posted_bodies = reimported_window
-        window = _make_window(window_module)
-        await window.dispatch_request(
-            prompt="solve this",
-            agent=Agent.CORE_AGENT,
-            reasoning=ReasoningEffort.MEDIUM,
-        )
-
-        assert posted_bodies[0]["reasoningMode"] == "medium"
-
-    @pytest.mark.asyncio
-    async def test_absent_when_reasoning_is_none(
-        self,
-        reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]],
-    ) -> None:
-        window_module, _pyfetch, posted_bodies = reimported_window
-        window = _make_window(window_module)
-        await window.dispatch_request(
-            prompt="solve this",
-            agent=Agent.CORE_AGENT,
-        )
-
-        # Absent (not null) — wire-compatible with backends predating the field.
-        assert "reasoningMode" not in posted_bodies[0]
-
-    @pytest.mark.asyncio
-    async def test_each_effort_level_serializes_to_string(
-        self,
-        reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]],
-    ) -> None:
-        window_module, _pyfetch, posted_bodies = reimported_window
-        window = _make_window(window_module)
-
-        for level in (
-            ReasoningEffort.NONE,
-            ReasoningEffort.LOW,
-            ReasoningEffort.MEDIUM,
-            ReasoningEffort.HIGH,
-        ):
-            await window.dispatch_request(
-                prompt="x",
-                agent=Agent.CORE_AGENT,
-                reasoning=level,
-            )
-
-        seen = [b["reasoningMode"] for b in posted_bodies if "reasoningMode" in b]
-        assert seen == ["none", "low", "medium", "high"]
-
-
-class TestReasoningRuntimeValidation:
-    """Misuse — `reasoning` paired with a non-Core agent — fails fast at runtime
-    with a clear message. The overload contract on the public `agent()` method
-    catches this at type-check time when callers use the enum, but the runtime
-    check covers the string-form (`agent="..."`) and untyped paths."""
-
-    @pytest.mark.asyncio
-    async def test_dispatch_request_rejects_non_core_agent_enum(
-        self,
-        reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]],
-    ) -> None:
-        window_module, _pyfetch, _posted = reimported_window
-        window = _make_window(window_module)
-        with pytest.raises(ValueError, match="agent=Agent.CORE_AGENT"):
-            await window.dispatch_request(
-                prompt="x",
-                agent=Agent.OPERATOR,
-                reasoning=ReasoningEffort.MEDIUM,  # pyright: ignore[reportCallIssue]
-            )
-
-    @pytest.mark.asyncio
-    async def test_dispatch_request_rejects_string_agent(
-        self,
-        reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]],
-    ) -> None:
-        # String-form bypasses the type-checker overload, so the runtime check
-        # is the only safety net here.
-        window_module, _pyfetch, _posted = reimported_window
-        window = _make_window(window_module)
-        with pytest.raises(ValueError, match="agent=Agent.CORE_AGENT"):
-            await window.dispatch_request(
-                prompt="x",
-                agent="some-custom-agent",
-                reasoning=ReasoningEffort.HIGH,  # pyright: ignore[reportCallIssue]
-            )
-
-    @pytest.mark.asyncio
-    async def test_agent_rejects_non_core_agent_enum(
-        self,
-        reimported_window: tuple[ModuleType, AsyncMock, list[dict[str, Any]]],
-    ) -> None:
-        # The same constraint must hold on the higher-level `agent()` method.
-        window_module, _pyfetch, _posted = reimported_window
-        window = _make_window(window_module)
-        with pytest.raises(ValueError, match="agent=Agent.CORE_AGENT"):
-            await window.agent(
-                prompt="x",
-                agent=Agent.OPERATOR,
-                reasoning=ReasoningEffort.LOW,  # pyright: ignore[reportCallIssue]
-            )
-
-
-class TestReasoningEffortEnum:
-    """The enum values are exactly what the backend expects."""
-
-    def test_values_match_backend_literal(self) -> None:
-        # The backend declares `reasoningMode: Literal["none", "low",
-        # "medium", "high"] | None`. If we drift, requests will start failing
-        # validation server-side.
-        assert ReasoningEffort.NONE.value == "none"
-        assert ReasoningEffort.LOW.value == "low"
-        assert ReasoningEffort.MEDIUM.value == "medium"
-        assert ReasoningEffort.HIGH.value == "high"
-
-    def test_str_enum_serializes_inline(self) -> None:
-        # `StrEnum` values double as `str`, which is what `json.dumps` writes
-        # without any custom encoder.
-        assert json.dumps({"reasoningMode": ReasoningEffort.MEDIUM.value}) == (
-            '{"reasoningMode": "medium"}'
-        )

From 66e9fb3c89d17e743819d69a4cec779f83f668eb Mon Sep 17 00:00:00 2001
From: xTRam1 <lerdogan@berkeley.edu>
Date: Wed, 29 Apr 2026 19:03:02 -0700
Subject: [PATCH 08/13] Address Python trace review comments

---
 .../src/narada_core/actions/models.py         |   4 -
 packages/narada-pyodide/src/narada/_trace.py  | 150 ++----------------
 2 files changed, 16 insertions(+), 138 deletions(-)

diff --git a/packages/narada-core/src/narada_core/actions/models.py b/packages/narada-core/src/narada_core/actions/models.py
index 4e46ee2..24e03ec 100644
--- a/packages/narada-core/src/narada_core/actions/models.py
+++ b/packages/narada-core/src/narada_core/actions/models.py
@@ -308,10 +308,6 @@ class PythonAgentRunTrace(BaseModel):
     duration_ms: NonNegativeInt
     events: list[PythonTraceEvent]
     error_message: str | None = None
-    # Set by the runtime when it caps the number of buffered events (see
-    # `python.worker.ts`). Informational only; the dashboard surfaces it so
-    # users know their trace is partial.
-    truncated_event_count: NonNegativeInt | None = None
 
 
 ApaStepTrace = Annotated[
diff --git a/packages/narada-pyodide/src/narada/_trace.py b/packages/narada-pyodide/src/narada/_trace.py
index 22117a5..0586a48 100644
--- a/packages/narada-pyodide/src/narada/_trace.py
+++ b/packages/narada-pyodide/src/narada/_trace.py
@@ -18,49 +18,16 @@
 import time
 from typing import TYPE_CHECKING, Any, Literal
 
-from narada_core.actions.models import (
-    AgenticMouseActionRequest,
-    AgenticSelectorRequest,
-    CloseWindowRequest,
-    ExtensionActionRequest,
-    GetFullHtmlRequest,
-    GetScreenshotRequest,
-    GetSimplifiedHtmlRequest,
-    GetUrlRequest,
-    GetUrlResponse,
-    GoToUrlRequest,
-    PrintMessageRequest,
-    ReadGoogleSheetRequest,
-    ReadGoogleSheetResponse,
-    WriteGoogleSheetRequest,
-)
+from narada_core.actions.models import ExtensionActionRequest
 from pydantic import BaseModel
 
 if TYPE_CHECKING:
-    # Injected by the JavaScript harness at worker startup (see
-    # `frontend/src/lib/apa/python/python.worker.ts`). narada-pyodide is
+    # Injected by the JavaScript harness at worker startup. narada-pyodide is
     # only ever imported under a Pyodide worker that has registered this
     # builtin; there is no non-Pyodide code path.
     def _narada_emit_trace_event(event_json: str) -> None: ...
 
 
-# Hard caps on payload sizes carried in trace events. Values are large enough
-# that typical prompts and error messages survive intact but small enough to
-# bound worst-case persisted actionTrace JSON.
-_MAX_PROMPT_CHARS = 500
-_MAX_MESSAGE_CHARS = 500
-_MAX_ERROR_CHARS = 1000
-_MAX_QUERY_CHARS = 200
-
-# When a sub-agent's response includes its own action trace (for example, the
-# operator's step-by-step actions), we forward that trace one level deep so
-# the dashboard can expand it. We do not forward deeper nesting — Python
-# agents that delegate into other Python agents would otherwise produce
-# exponentially-sized persisted traces.
-_MAX_NESTED_ACTION_TRACE_DEPTH = 1
-
-_ELLIPSIS = "\u2026"
-
 _logger = logging.getLogger(__name__)
 
 
@@ -69,24 +36,6 @@ def now_ms() -> int:
     return int(time.time() * 1000)
 
 
-def truncate(value: str | None, max_chars: int) -> str | None:
-    """Return ``value`` shortened to at most ``max_chars`` characters, suffixed
-    with an ellipsis when truncation occurred. Returns ``None`` unchanged."""
-    if value is None:
-        return None
-    if len(value) <= max_chars:
-        return value
-    return value[: max_chars - 1] + _ELLIPSIS
-
-
-def truncate_prompt(prompt: str) -> str:
-    return truncate(prompt, _MAX_PROMPT_CHARS) or ""
-
-
-def truncate_error(error: str) -> str:
-    return truncate(error, _MAX_ERROR_CHARS) or ""
-
-
 def emit_trace_event(event: dict[str, Any]) -> None:
     """Forward a single trace event to the JavaScript harness.
 
@@ -106,75 +55,14 @@ def emit_trace_event(event: dict[str, Any]) -> None:
         _logger.warning("trace event emission failed", exc_info=True)
 
 
-def summarize_request(request: ExtensionActionRequest) -> dict[str, Any]:
-    """Produce a bounded-size summary of an extension action request for
-    display in the observability dashboard. Large payloads (sheet row values,
-    selector graphs) are reduced to row counts or action types; free-form
-    strings are truncated.
-
-    The returned dict is always JSON-serialisable and fits the
-    ``PythonExtensionActionEvent.request_summary`` field.
-    """
-    if isinstance(request, GoToUrlRequest):
-        return {"url": request.url, "new_tab": request.new_tab}
-    if isinstance(
-        request,
-        (
-            GetUrlRequest,
-            GetScreenshotRequest,
-            GetFullHtmlRequest,
-            GetSimplifiedHtmlRequest,
-            CloseWindowRequest,
-        ),
-    ):
-        return {}
-    if isinstance(request, ReadGoogleSheetRequest):
-        return {"spreadsheet_id": request.spreadsheet_id, "range": request.range}
-    if isinstance(request, WriteGoogleSheetRequest):
-        return {
-            "spreadsheet_id": request.spreadsheet_id,
-            "range": request.range,
-            "row_count": len(request.values),
-        }
-    if isinstance(request, PrintMessageRequest):
-        return {"message": truncate(request.message, _MAX_MESSAGE_CHARS)}
-    if isinstance(request, (AgenticSelectorRequest, AgenticMouseActionRequest)):
-        return {
-            "action_type": request.action["type"],
-            "fallback_operator_query": truncate(
-                request.fallback_operator_query, _MAX_QUERY_CHARS
-            ),
-        }
-    # ExtensionActionRequest is a closed union today. If a new variant is
-    # added without updating this function, we degrade gracefully to an empty
-    # summary rather than crashing the user's agent mid-run.
-    return {}
-
-
-def summarize_response(
-    request: ExtensionActionRequest,
-    response: BaseModel | None,
-) -> dict[str, Any] | None:
-    """Produce a bounded-size summary of an extension action response, keyed
-    on the originating request type. Returns ``None`` for actions that have
-    no observable result (writes, navigations, close) so the dashboard can
-    omit an empty row rather than rendering a hollow card.
-    """
-    if isinstance(request, GetUrlRequest) and isinstance(response, GetUrlResponse):
-        return {"url": response.url}
-    if isinstance(request, GetScreenshotRequest):
-        return {"description": "Took screenshot of the page"}
-    if isinstance(request, GetFullHtmlRequest):
-        return {"description": "Got the full HTML of the page"}
-    if isinstance(request, GetSimplifiedHtmlRequest):
-        return {"description": "Got the simplified HTML of the page"}
-    if isinstance(request, ReadGoogleSheetRequest) and isinstance(
-        response, ReadGoogleSheetResponse
-    ):
-        rows = response.values
-        column_count = max((len(row) for row in rows), default=0)
-        return {"row_count": len(rows), "column_count": column_count}
-    return None
+def dump_model(model: BaseModel) -> dict[str, Any]:
+    """Return the model's JSON-ready representation for trace persistence."""
+    try:
+        return model.model_dump(mode="json")
+    except TypeError:
+        # Some narada-core request models override model_dump without accepting
+        # Pydantic's keyword arguments.
+        return model.model_dump()
 
 
 # ---------------------------------------------------------------------------
@@ -207,19 +95,14 @@ def emit_sub_agent_call(
         "ts_start": ts_start,
         "ts_end": now_ms(),
         "agent_type": agent_type,
-        "prompt": truncate_prompt(prompt),
+        "prompt": prompt,
         "status": status,
     }
     if request_id is not None:
         event["request_id"] = request_id
     if error_message is not None:
-        event["error_message"] = truncate_error(error_message)
+        event["error_message"] = error_message
     if action_trace_raw is not None:
-        # Forward the nested action trace as-is. Size/depth enforcement is the
-        # frontend's responsibility (`MAX_NESTED_ACTION_TRACE_BYTES` in
-        # python.worker.ts, plus the workflow-run-detail consumer caps).
-        # Stripping events here is redundant and prevents the dashboard from
-        # rendering small inline nested traces inline in CollapsibleNestedTrace.
         event["action_trace"] = action_trace_raw
     emit_trace_event(event)
 
@@ -237,14 +120,13 @@ def emit_extension_action(
         "ts_start": ts_start,
         "ts_end": now_ms(),
         "action_name": request.name,
-        "request_summary": summarize_request(request),
+        "request_summary": dump_model(request),
         "status": status,
     }
-    result_summary = summarize_response(request, response)
-    if result_summary is not None:
-        event["result_summary"] = result_summary
+    if response is not None:
+        event["result_summary"] = dump_model(response)
     if error_message is not None:
-        event["error_message"] = truncate_error(error_message)
+        event["error_message"] = error_message
     emit_trace_event(event)
 
 

From 68150bc2b8905f22d22126d3c20bed4437380077 Mon Sep 17 00:00:00 2001
From: xTRam1 <lerdogan@berkeley.edu>
Date: Thu, 30 Apr 2026 10:20:25 -0700
Subject: [PATCH 09/13] Move trace models into tracing package

---
 .../src/narada_core/actions/models.py         | 398 ++----------------
 .../src/narada_core/tracing/__init__.py       |   2 +
 .../src/narada_core/tracing/model.py          | 361 ++++++++++++++++
 packages/narada-pyodide/src/narada/_trace.py  |   4 +-
 packages/narada-pyodide/src/narada/window.py  |   2 +-
 packages/narada/src/narada/window.py          |   2 +-
 6 files changed, 412 insertions(+), 357 deletions(-)
 create mode 100644 packages/narada-core/src/narada_core/tracing/__init__.py
 create mode 100644 packages/narada-core/src/narada_core/tracing/model.py

diff --git a/packages/narada-core/src/narada_core/actions/models.py b/packages/narada-core/src/narada_core/actions/models.py
index 24e03ec..db49d64 100644
--- a/packages/narada-core/src/narada_core/actions/models.py
+++ b/packages/narada-core/src/narada_core/actions/models.py
@@ -15,12 +15,53 @@
 from pydantic import (
     BaseModel,
     Field,
-    NonNegativeInt,
-    TypeAdapter,
-    ValidationError,
-    model_validator,
 )
 
+from narada_core.tracing import model as tracing_model
+
+ActionTrace = tracing_model.ActionTrace
+AgentTrace = tracing_model.AgentTrace
+AgenticMouseActionTrace = tracing_model.AgenticMouseActionTrace
+AgenticSelectorTrace = tracing_model.AgenticSelectorTrace
+ApaActionTrace = tracing_model.ApaActionTrace
+ApaStepTrace = tracing_model.ApaStepTrace
+DataTableExportAsCsvTrace = tracing_model.DataTableExportAsCsvTrace
+DataTableInsertRowTrace = tracing_model.DataTableInsertRowTrace
+DataTableUpdateCellValueTrace = tracing_model.DataTableUpdateCellValueTrace
+EndTrace = tracing_model.EndTrace
+ForLoopTrace = tracing_model.ForLoopTrace
+GetFullHtmlTrace = tracing_model.GetFullHtmlTrace
+GetScreenshotTrace = tracing_model.GetScreenshotTrace
+GetSimplifiedHtmlTrace = tracing_model.GetSimplifiedHtmlTrace
+GetUrlTrace = tracing_model.GetUrlTrace
+GoToUrlTrace = tracing_model.GoToUrlTrace
+IfTrace = tracing_model.IfTrace
+ObjectExportAsJsonTrace = tracing_model.ObjectExportAsJsonTrace
+ObjectSetPropertiesTrace = tracing_model.ObjectSetPropertiesTrace
+OperatorActionTrace = tracing_model.OperatorActionTrace
+OperatorActionTraceItem = tracing_model.OperatorActionTraceItem
+OutputTrace = tracing_model.OutputTrace
+PressKeysTrace = tracing_model.PressKeysTrace
+PrintTrace = tracing_model.PrintTrace
+PythonAgentRunTrace = tracing_model.PythonAgentRunTrace
+PythonExtensionActionEvent = tracing_model.PythonExtensionActionEvent
+PythonSideEffectEvent = tracing_model.PythonSideEffectEvent
+PythonStderrEvent = tracing_model.PythonStderrEvent
+PythonStdoutEvent = tracing_model.PythonStdoutEvent
+PythonSubAgentCallEvent = tracing_model.PythonSubAgentCallEvent
+PythonTrace = tracing_model.PythonTrace
+PythonTraceEvent = tracing_model.PythonTraceEvent
+ReadCsvTrace = tracing_model.ReadCsvTrace
+ReadGoogleSheetTrace = tracing_model.ReadGoogleSheetTrace
+RunCustomAgentTrace = tracing_model.RunCustomAgentTrace
+SetVariableTrace = tracing_model.SetVariableTrace
+StartTrace = tracing_model.StartTrace
+WaitForElementTrace = tracing_model.WaitForElementTrace
+WaitTrace = tracing_model.WaitTrace
+WhileLoopTrace = tracing_model.WhileLoopTrace
+WriteGoogleSheetTrace = tracing_model.WriteGoogleSheetTrace
+parse_action_trace = tracing_model.parse_action_trace
+
 # There is no `AgentRequest` because the `agent` action delegates to the `dispatch_request` method
 # under the hood.
 
@@ -32,355 +73,6 @@ class AgentUsage(BaseModel):
     credits: float
 
 
-class OperatorActionTraceItem(BaseModel):
-    url: str
-    action: str
-
-
-class GoToUrlTrace(BaseModel):
-    step_type: Literal["goToUrl"]
-    url: str
-    description: str
-
-
-class GetUrlTrace(BaseModel):
-    step_type: Literal["getUrl"]
-    url: str
-    description: str
-
-
-class PrintTrace(BaseModel):
-    step_type: Literal["print"]
-    url: str
-    message: str
-
-
-class AgentTrace(BaseModel):
-    step_type: Literal["agent"]
-    url: str
-    agent_type: str
-    action_trace: ActionTrace | None = None
-    text: str | None = None
-
-
-class ForLoopTrace(BaseModel):
-    step_type: Literal["for"]
-    url: str
-    loop_type: Literal["nTimes", "forEachRowInDataTable", "forEachItemsInArray"]
-    description: str
-    iterations: list[ApaActionTrace]  # Recursive reference
-
-
-class WhileLoopTrace(BaseModel):
-    step_type: Literal["while"]
-    url: str
-    condition: str
-    iterations: list[ApaActionTrace]  # Recursive reference
-    total_iterations: int
-
-
-class AgenticSelectorTrace(BaseModel):
-    step_type: Literal["agenticSelector"]
-    url: str
-    description: str
-    action_trace: ActionTrace | None = None
-
-
-class AgenticMouseActionTrace(BaseModel):
-    step_type: Literal["agenticMouseAction"]
-    url: str
-    description: str
-    action_trace: ActionTrace | None = None
-
-
-class WaitForElementTrace(BaseModel):
-    step_type: Literal["waitForElement"]
-    url: str
-    description: str
-
-
-class PressKeysTrace(BaseModel):
-    step_type: Literal["pressKeys"]
-    url: str
-    description: str
-
-
-class ReadGoogleSheetTrace(BaseModel):
-    step_type: Literal["readGoogleSheet"]
-    url: str
-    description: str
-
-
-class WriteGoogleSheetTrace(BaseModel):
-    step_type: Literal["writeGoogleSheet"]
-    url: str
-    description: str
-
-
-class DataTableExportAsCsvTrace(BaseModel):
-    step_type: Literal["dataTableExportAsCsv"]
-    url: str
-    description: str
-
-
-class PythonTrace(BaseModel):
-    step_type: Literal["python"]
-    url: str
-    description: str
-
-
-class ReadCsvTrace(BaseModel):
-    step_type: Literal["readCsv"]
-    url: str
-    description: str
-
-
-class StartTrace(BaseModel):
-    step_type: Literal["start"]
-    url: str
-    description: str
-
-
-class EndTrace(BaseModel):
-    step_type: Literal["end"]
-    url: str
-    description: str
-
-
-class GetFullHtmlTrace(BaseModel):
-    step_type: Literal["getFullHtml"]
-    url: str
-    description: str
-
-
-class GetSimplifiedHtmlTrace(BaseModel):
-    step_type: Literal["getSimplifiedHtml"]
-    url: str
-    description: str
-
-
-class GetScreenshotTrace(BaseModel):
-    step_type: Literal["getScreenshot"]
-    url: str
-    description: str
-
-
-class ObjectExportAsJsonTrace(BaseModel):
-    step_type: Literal["objectExportAsJson"]
-    url: str
-    description: str
-
-
-class RunCustomAgentTrace(BaseModel):
-    step_type: Literal["runCustomAgent"]
-    url: str
-    workflow_id: str
-    workflow_name: str
-    status: Literal["success", "error"]
-    error_message: str | None = None
-
-
-class IfTrace(BaseModel):
-    step_type: Literal["if"]
-    url: str
-    description: str
-
-
-class SetVariableTrace(BaseModel):
-    step_type: Literal["setVariable"]
-    url: str
-    description: str
-
-
-class WaitTrace(BaseModel):
-    step_type: Literal["wait"]
-    url: str
-    description: str
-
-
-class DataTableInsertRowTrace(BaseModel):
-    step_type: Literal["dataTableInsertRow"]
-    url: str
-    description: str
-
-
-class DataTableUpdateCellValueTrace(BaseModel):
-    step_type: Literal["dataTableUpdateCellValue"]
-    url: str
-    description: str
-
-
-class ObjectSetPropertiesTrace(BaseModel):
-    step_type: Literal["objectSetProperties"]
-    url: str
-    description: str
-
-
-class OutputTrace(BaseModel):
-    step_type: Literal["output"]
-    description: str
-
-
-# ---------------------------------------------------------------------------
-# Python agent run trace: emitted by CustomPythonAgentRunnable for custom
-# Python agents executed in the browser Pyodide runtime. A single
-# PythonAgentRunTrace wraps the full agent's execution; its `events` list is
-# a chronologically sorted timeline of stdout / stderr / SDK call events.
-# ---------------------------------------------------------------------------
-
-
-class PythonStdoutEvent(BaseModel):
-    kind: Literal["stdout"] = "stdout"
-    ts: int
-    text: str
-
-
-class PythonStderrEvent(BaseModel):
-    kind: Literal["stderr"] = "stderr"
-    ts: int
-    text: str
-
-
-class PythonSubAgentCallEvent(BaseModel):
-    kind: Literal["subAgentCall"] = "subAgentCall"
-    ts_start: int
-    ts_end: int
-    agent_type: str
-    prompt: str
-    status: Literal["success", "error", "timeout"]
-    request_id: str | None = None
-    error_message: str | None = None
-    action_trace: ActionTrace | None = None
-
-    @model_validator(mode="after")
-    def _check_ts_ordering(self) -> PythonSubAgentCallEvent:
-        if self.ts_end < self.ts_start:
-            raise ValueError(
-                f"PythonSubAgentCallEvent: ts_end ({self.ts_end}) must be >= ts_start ({self.ts_start})"
-            )
-        return self
-
-
-class PythonExtensionActionEvent(BaseModel):
-    kind: Literal["extensionAction"] = "extensionAction"
-    ts_start: int
-    ts_end: int
-    # Matches the snake_case `name` discriminator on ExtensionActionRequest
-    # (e.g. "go_to_url", "get_screenshot"). Carried as a plain string rather
-    # than a Literal so adding a new extension action in the future does not
-    # require a parse-time migration of historical trace data.
-    action_name: str
-    request_summary: dict[str, Any]
-    result_summary: dict[str, Any] | None = None
-    status: Literal["success", "error", "timeout"]
-    error_message: str | None = None
-
-    @model_validator(mode="after")
-    def _check_ts_ordering(self) -> PythonExtensionActionEvent:
-        if self.ts_end < self.ts_start:
-            raise ValueError(
-                f"PythonExtensionActionEvent: ts_end ({self.ts_end}) must be >= ts_start ({self.ts_start})"
-            )
-        return self
-
-
-class PythonSideEffectEvent(BaseModel):
-    kind: Literal["sideEffect"] = "sideEffect"
-    ts: int
-    effect_type: Literal["download_file", "render_html"]
-    description: str
-
-
-PythonTraceEvent = Annotated[
-    PythonStdoutEvent
-    | PythonStderrEvent
-    | PythonSubAgentCallEvent
-    | PythonExtensionActionEvent
-    | PythonSideEffectEvent,
-    Field(discriminator="kind"),
-]
-
-
-class PythonAgentRunTrace(BaseModel):
-    step_type: Literal["pythonAgentRun"] = "pythonAgentRun"
-    url: str
-    status: Literal["success", "error", "aborted"]
-    duration_ms: NonNegativeInt
-    events: list[PythonTraceEvent]
-    error_message: str | None = None
-
-
-ApaStepTrace = Annotated[
-    GoToUrlTrace
-    | GetUrlTrace
-    | PrintTrace
-    | AgentTrace
-    | ForLoopTrace
-    | WhileLoopTrace
-    | AgenticSelectorTrace
-    | AgenticMouseActionTrace
-    | WaitForElementTrace
-    | PressKeysTrace
-    | ReadCsvTrace
-    | ReadGoogleSheetTrace
-    | WriteGoogleSheetTrace
-    | DataTableExportAsCsvTrace
-    | ObjectExportAsJsonTrace
-    | PythonTrace
-    | StartTrace
-    | EndTrace
-    | GetFullHtmlTrace
-    | GetSimplifiedHtmlTrace
-    | GetScreenshotTrace
-    | RunCustomAgentTrace
-    | IfTrace
-    | SetVariableTrace
-    | WaitTrace
-    | DataTableInsertRowTrace
-    | DataTableUpdateCellValueTrace
-    | ObjectSetPropertiesTrace
-    | OutputTrace
-    | PythonAgentRunTrace,
-    Field(discriminator="step_type"),
-]
-
-type OperatorActionTrace = list[OperatorActionTraceItem]
-type ApaActionTrace = list[ApaStepTrace]
-type ActionTrace = OperatorActionTrace | ApaActionTrace
-
-
-# TypeAdapter for parsing discriminated union
-_OperatorActionTraceAdapter = TypeAdapter(OperatorActionTrace)
-_ApaActionTraceAdapter = TypeAdapter(ApaActionTrace)
-
-
-def parse_action_trace(trace_data: list[dict[str, Any] | Any]) -> ActionTrace:
-    """Parse the action trace.
-
-    Dispatches deterministically based on the shape of the first item rather
-    than try/except-falling-through two adapters: operator items carry
-    ``action`` + ``url`` fields, APA steps carry ``step_type``. On an empty
-    list (no discriminator available) we default to APA, which is the
-    superset shape used by all custom agents.
-    """
-    if not trace_data:
-        return _ApaActionTraceAdapter.validate_python(trace_data)
-
-    first = trace_data[0]
-    if isinstance(first, dict) and "step_type" in first:
-        return _ApaActionTraceAdapter.validate_python(trace_data)
-    if isinstance(first, dict) and "action" in first and "url" in first:
-        return _OperatorActionTraceAdapter.validate_python(trace_data)
-
-    # Ambiguous shape — fall back to the previous try/except pattern so we
-    # do not regress existing callers passing Pydantic instances or other
-    # shapes the adapters already know how to coerce.
-    try:
-        return _OperatorActionTraceAdapter.validate_python(trace_data)
-    except ValidationError:
-        return _ApaActionTraceAdapter.validate_python(trace_data)
-
-
 class TextOutput(BaseModel):
     type: Literal["text"]
     content: str
diff --git a/packages/narada-core/src/narada_core/tracing/__init__.py b/packages/narada-core/src/narada_core/tracing/__init__.py
new file mode 100644
index 0000000..3237a27
--- /dev/null
+++ b/packages/narada-core/src/narada_core/tracing/__init__.py
@@ -0,0 +1,2 @@
+"""Tracing models for narada-core."""
+
diff --git a/packages/narada-core/src/narada_core/tracing/model.py b/packages/narada-core/src/narada_core/tracing/model.py
new file mode 100644
index 0000000..0f9125c
--- /dev/null
+++ b/packages/narada-core/src/narada_core/tracing/model.py
@@ -0,0 +1,361 @@
+from __future__ import annotations
+
+from typing import Annotated, Any, Literal
+
+from pydantic import (
+    BaseModel,
+    Field,
+    NonNegativeInt,
+    TypeAdapter,
+    ValidationError,
+    model_validator,
+)
+
+
+class OperatorActionTraceItem(BaseModel):
+    url: str
+    action: str
+
+
+class GoToUrlTrace(BaseModel):
+    step_type: Literal["goToUrl"]
+    url: str
+    description: str
+
+
+class GetUrlTrace(BaseModel):
+    step_type: Literal["getUrl"]
+    url: str
+    description: str
+
+
+class PrintTrace(BaseModel):
+    step_type: Literal["print"]
+    url: str
+    message: str
+
+
+class AgentTrace(BaseModel):
+    step_type: Literal["agent"]
+    url: str
+    agent_type: str
+    action_trace: ActionTrace | None = None
+    text: str | None = None
+
+
+class ForLoopTrace(BaseModel):
+    step_type: Literal["for"]
+    url: str
+    loop_type: Literal["nTimes", "forEachRowInDataTable", "forEachItemsInArray"]
+    description: str
+    iterations: list[ApaActionTrace]  # Recursive reference
+
+
+class WhileLoopTrace(BaseModel):
+    step_type: Literal["while"]
+    url: str
+    condition: str
+    iterations: list[ApaActionTrace]  # Recursive reference
+    total_iterations: int
+
+
+class AgenticSelectorTrace(BaseModel):
+    step_type: Literal["agenticSelector"]
+    url: str
+    description: str
+    action_trace: ActionTrace | None = None
+
+
+class AgenticMouseActionTrace(BaseModel):
+    step_type: Literal["agenticMouseAction"]
+    url: str
+    description: str
+    action_trace: ActionTrace | None = None
+
+
+class WaitForElementTrace(BaseModel):
+    step_type: Literal["waitForElement"]
+    url: str
+    description: str
+
+
+class PressKeysTrace(BaseModel):
+    step_type: Literal["pressKeys"]
+    url: str
+    description: str
+
+
+class ReadGoogleSheetTrace(BaseModel):
+    step_type: Literal["readGoogleSheet"]
+    url: str
+    description: str
+
+
+class WriteGoogleSheetTrace(BaseModel):
+    step_type: Literal["writeGoogleSheet"]
+    url: str
+    description: str
+
+
+class DataTableExportAsCsvTrace(BaseModel):
+    step_type: Literal["dataTableExportAsCsv"]
+    url: str
+    description: str
+
+
+class PythonTrace(BaseModel):
+    step_type: Literal["python"]
+    url: str
+    description: str
+
+
+class ReadCsvTrace(BaseModel):
+    step_type: Literal["readCsv"]
+    url: str
+    description: str
+
+
+class StartTrace(BaseModel):
+    step_type: Literal["start"]
+    url: str
+    description: str
+
+
+class EndTrace(BaseModel):
+    step_type: Literal["end"]
+    url: str
+    description: str
+
+
+class GetFullHtmlTrace(BaseModel):
+    step_type: Literal["getFullHtml"]
+    url: str
+    description: str
+
+
+class GetSimplifiedHtmlTrace(BaseModel):
+    step_type: Literal["getSimplifiedHtml"]
+    url: str
+    description: str
+
+
+class GetScreenshotTrace(BaseModel):
+    step_type: Literal["getScreenshot"]
+    url: str
+    description: str
+
+
+class ObjectExportAsJsonTrace(BaseModel):
+    step_type: Literal["objectExportAsJson"]
+    url: str
+    description: str
+
+
+class RunCustomAgentTrace(BaseModel):
+    step_type: Literal["runCustomAgent"]
+    url: str
+    workflow_id: str
+    workflow_name: str
+    status: Literal["success", "error"]
+    error_message: str | None = None
+
+
+class IfTrace(BaseModel):
+    step_type: Literal["if"]
+    url: str
+    description: str
+
+
+class SetVariableTrace(BaseModel):
+    step_type: Literal["setVariable"]
+    url: str
+    description: str
+
+
+class WaitTrace(BaseModel):
+    step_type: Literal["wait"]
+    url: str
+    description: str
+
+
+class DataTableInsertRowTrace(BaseModel):
+    step_type: Literal["dataTableInsertRow"]
+    url: str
+    description: str
+
+
+class DataTableUpdateCellValueTrace(BaseModel):
+    step_type: Literal["dataTableUpdateCellValue"]
+    url: str
+    description: str
+
+
+class ObjectSetPropertiesTrace(BaseModel):
+    step_type: Literal["objectSetProperties"]
+    url: str
+    description: str
+
+
+class OutputTrace(BaseModel):
+    step_type: Literal["output"]
+    description: str
+
+
+# ---------------------------------------------------------------------------
+# Python agent run trace: emitted by CustomPythonAgentRunnable for custom
+# Python agents executed in the browser Pyodide runtime. A single
+# PythonAgentRunTrace wraps the full agent's execution; its `events` list is
+# a chronologically sorted timeline of stdout / stderr / SDK call events.
+# ---------------------------------------------------------------------------
+
+
+class PythonStdoutEvent(BaseModel):
+    kind: Literal["stdout"] = "stdout"
+    ts: int
+    text: str
+
+
+class PythonStderrEvent(BaseModel):
+    kind: Literal["stderr"] = "stderr"
+    ts: int
+    text: str
+
+
+class PythonSubAgentCallEvent(BaseModel):
+    kind: Literal["subAgentCall"] = "subAgentCall"
+    ts_start: int
+    ts_end: int
+    agent_type: str
+    prompt: str
+    status: Literal["success", "error", "timeout"]
+    request_id: str | None = None
+    error_message: str | None = None
+    action_trace: ActionTrace | None = None
+
+    @model_validator(mode="after")
+    def _check_ts_ordering(self) -> PythonSubAgentCallEvent:
+        if self.ts_end < self.ts_start:
+            raise ValueError(
+                f"PythonSubAgentCallEvent: ts_end ({self.ts_end}) must be >= ts_start ({self.ts_start})"
+            )
+        return self
+
+
+class PythonExtensionActionEvent(BaseModel):
+    kind: Literal["extensionAction"] = "extensionAction"
+    ts_start: int
+    ts_end: int
+    # Matches the snake_case `name` discriminator on ExtensionActionRequest
+    # (e.g. "go_to_url", "get_screenshot"). Carried as a plain string rather
+    # than a Literal so adding a new extension action in the future does not
+    # require a parse-time migration of historical trace data.
+    action_name: str
+    request_summary: dict[str, Any]
+    result_summary: dict[str, Any] | None = None
+    status: Literal["success", "error", "timeout"]
+    error_message: str | None = None
+
+    @model_validator(mode="after")
+    def _check_ts_ordering(self) -> PythonExtensionActionEvent:
+        if self.ts_end < self.ts_start:
+            raise ValueError(
+                f"PythonExtensionActionEvent: ts_end ({self.ts_end}) must be >= ts_start ({self.ts_start})"
+            )
+        return self
+
+
+class PythonSideEffectEvent(BaseModel):
+    kind: Literal["sideEffect"] = "sideEffect"
+    ts: int
+    effect_type: Literal["download_file", "render_html"]
+    description: str
+
+
+PythonTraceEvent = Annotated[
+    PythonStdoutEvent
+    | PythonStderrEvent
+    | PythonSubAgentCallEvent
+    | PythonExtensionActionEvent
+    | PythonSideEffectEvent,
+    Field(discriminator="kind"),
+]
+
+
+class PythonAgentRunTrace(BaseModel):
+    step_type: Literal["pythonAgentRun"] = "pythonAgentRun"
+    url: str
+    status: Literal["success", "error", "aborted"]
+    duration_ms: NonNegativeInt
+    events: list[PythonTraceEvent]
+    error_message: str | None = None
+
+
+ApaStepTrace = Annotated[
+    GoToUrlTrace
+    | GetUrlTrace
+    | PrintTrace
+    | AgentTrace
+    | ForLoopTrace
+    | WhileLoopTrace
+    | AgenticSelectorTrace
+    | AgenticMouseActionTrace
+    | WaitForElementTrace
+    | PressKeysTrace
+    | ReadCsvTrace
+    | ReadGoogleSheetTrace
+    | WriteGoogleSheetTrace
+    | DataTableExportAsCsvTrace
+    | ObjectExportAsJsonTrace
+    | PythonTrace
+    | StartTrace
+    | EndTrace
+    | GetFullHtmlTrace
+    | GetSimplifiedHtmlTrace
+    | GetScreenshotTrace
+    | RunCustomAgentTrace
+    | IfTrace
+    | SetVariableTrace
+    | WaitTrace
+    | DataTableInsertRowTrace
+    | DataTableUpdateCellValueTrace
+    | ObjectSetPropertiesTrace
+    | OutputTrace
+    | PythonAgentRunTrace,
+    Field(discriminator="step_type"),
+]
+
+type OperatorActionTrace = list[OperatorActionTraceItem]
+type ApaActionTrace = list[ApaStepTrace]
+type ActionTrace = OperatorActionTrace | ApaActionTrace
+
+
+_OperatorActionTraceAdapter = TypeAdapter(OperatorActionTrace)
+_ApaActionTraceAdapter = TypeAdapter(ApaActionTrace)
+
+
+def parse_action_trace(trace_data: list[dict[str, Any] | Any]) -> ActionTrace:
+    """Parse the action trace.
+
+    Dispatches deterministically based on the shape of the first item rather
+    than try/except-falling-through two adapters: operator items carry
+    ``action`` + ``url`` fields, APA steps carry ``step_type``. On an empty
+    list (no discriminator available) we default to APA, which is the
+    superset shape used by all custom agents.
+    """
+    if not trace_data:
+        return _ApaActionTraceAdapter.validate_python(trace_data)
+
+    first = trace_data[0]
+    if isinstance(first, dict) and "step_type" in first:
+        return _ApaActionTraceAdapter.validate_python(trace_data)
+    if isinstance(first, dict) and "action" in first and "url" in first:
+        return _OperatorActionTraceAdapter.validate_python(trace_data)
+
+    # Ambiguous shape — fall back to the previous try/except pattern so we
+    # do not regress existing callers passing Pydantic instances or other
+    # shapes the adapters already know how to coerce.
+    try:
+        return _OperatorActionTraceAdapter.validate_python(trace_data)
+    except ValidationError:
+        return _ApaActionTraceAdapter.validate_python(trace_data)
+
diff --git a/packages/narada-pyodide/src/narada/_trace.py b/packages/narada-pyodide/src/narada/_trace.py
index 0586a48..659ac88 100644
--- a/packages/narada-pyodide/src/narada/_trace.py
+++ b/packages/narada-pyodide/src/narada/_trace.py
@@ -40,7 +40,7 @@ def emit_trace_event(event: dict[str, Any]) -> None:
     """Forward a single trace event to the JavaScript harness.
 
     The event must be JSON-serialisable and shaped as one of the
-    ``PythonTraceEvent`` variants defined in ``narada_core.actions.models``.
+    ``PythonTraceEvent`` variants defined in ``narada_core.tracing.model``.
     No validation is performed here; callers construct events directly and
     are responsible for matching the schema.
 
@@ -69,7 +69,7 @@ def dump_model(model: BaseModel) -> dict[str, Any]:
 # Event emitters
 #
 # Each emitter builds a JSON-serialisable event shaped to match one of the
-# ``PythonTraceEvent`` Pydantic variants in ``narada_core.actions.models``
+# ``PythonTraceEvent`` Pydantic variants in ``narada_core.tracing.model``
 # and forwards it to the JavaScript harness. Optional fields are included
 # only when non-None so the JSON stays compact.
 # ---------------------------------------------------------------------------
diff --git a/packages/narada-pyodide/src/narada/window.py b/packages/narada-pyodide/src/narada/window.py
index 791503c..85b8238 100644
--- a/packages/narada-pyodide/src/narada/window.py
+++ b/packages/narada-pyodide/src/narada/window.py
@@ -51,7 +51,6 @@
     UserApprovalRequest,
     UserApprovalResponse,
     WriteGoogleSheetRequest,
-    parse_action_trace,
 )
 from narada_core.errors import (
     NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE,
@@ -67,6 +66,7 @@
     Response,
     UserResourceCredentials,
 )
+from narada_core.tracing.model import parse_action_trace
 from pydantic import BaseModel
 from pyodide.ffi import JsProxy, create_once_callable
 from pyodide.http import pyfetch
diff --git a/packages/narada/src/narada/window.py b/packages/narada/src/narada/window.py
index 29c359b..9a93ece 100644
--- a/packages/narada/src/narada/window.py
+++ b/packages/narada/src/narada/window.py
@@ -41,7 +41,6 @@
     UserApprovalRequest,
     UserApprovalResponse,
     WriteGoogleSheetRequest,
-    parse_action_trace,
 )
 from narada_core.errors import (
     NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE,
@@ -57,6 +56,7 @@
     Response,
     UserResourceCredentials,
 )
+from narada_core.tracing.model import parse_action_trace
 from playwright.async_api import (
     BrowserContext,
 )

From a33e02a1bc99ece1d1f46fa233525d0486d4895d Mon Sep 17 00:00:00 2001
From: xTRam1 <lerdogan@berkeley.edu>
Date: Thu, 30 Apr 2026 10:28:53 -0700
Subject: [PATCH 10/13] Remove trace model re-exports

---
 .../src/narada_core/actions/models.py         | 45 +------------------
 1 file changed, 1 insertion(+), 44 deletions(-)

diff --git a/packages/narada-core/src/narada_core/actions/models.py b/packages/narada-core/src/narada_core/actions/models.py
index db49d64..1b33b83 100644
--- a/packages/narada-core/src/narada_core/actions/models.py
+++ b/packages/narada-core/src/narada_core/actions/models.py
@@ -19,49 +19,6 @@
 
 from narada_core.tracing import model as tracing_model
 
-ActionTrace = tracing_model.ActionTrace
-AgentTrace = tracing_model.AgentTrace
-AgenticMouseActionTrace = tracing_model.AgenticMouseActionTrace
-AgenticSelectorTrace = tracing_model.AgenticSelectorTrace
-ApaActionTrace = tracing_model.ApaActionTrace
-ApaStepTrace = tracing_model.ApaStepTrace
-DataTableExportAsCsvTrace = tracing_model.DataTableExportAsCsvTrace
-DataTableInsertRowTrace = tracing_model.DataTableInsertRowTrace
-DataTableUpdateCellValueTrace = tracing_model.DataTableUpdateCellValueTrace
-EndTrace = tracing_model.EndTrace
-ForLoopTrace = tracing_model.ForLoopTrace
-GetFullHtmlTrace = tracing_model.GetFullHtmlTrace
-GetScreenshotTrace = tracing_model.GetScreenshotTrace
-GetSimplifiedHtmlTrace = tracing_model.GetSimplifiedHtmlTrace
-GetUrlTrace = tracing_model.GetUrlTrace
-GoToUrlTrace = tracing_model.GoToUrlTrace
-IfTrace = tracing_model.IfTrace
-ObjectExportAsJsonTrace = tracing_model.ObjectExportAsJsonTrace
-ObjectSetPropertiesTrace = tracing_model.ObjectSetPropertiesTrace
-OperatorActionTrace = tracing_model.OperatorActionTrace
-OperatorActionTraceItem = tracing_model.OperatorActionTraceItem
-OutputTrace = tracing_model.OutputTrace
-PressKeysTrace = tracing_model.PressKeysTrace
-PrintTrace = tracing_model.PrintTrace
-PythonAgentRunTrace = tracing_model.PythonAgentRunTrace
-PythonExtensionActionEvent = tracing_model.PythonExtensionActionEvent
-PythonSideEffectEvent = tracing_model.PythonSideEffectEvent
-PythonStderrEvent = tracing_model.PythonStderrEvent
-PythonStdoutEvent = tracing_model.PythonStdoutEvent
-PythonSubAgentCallEvent = tracing_model.PythonSubAgentCallEvent
-PythonTrace = tracing_model.PythonTrace
-PythonTraceEvent = tracing_model.PythonTraceEvent
-ReadCsvTrace = tracing_model.ReadCsvTrace
-ReadGoogleSheetTrace = tracing_model.ReadGoogleSheetTrace
-RunCustomAgentTrace = tracing_model.RunCustomAgentTrace
-SetVariableTrace = tracing_model.SetVariableTrace
-StartTrace = tracing_model.StartTrace
-WaitForElementTrace = tracing_model.WaitForElementTrace
-WaitTrace = tracing_model.WaitTrace
-WhileLoopTrace = tracing_model.WhileLoopTrace
-WriteGoogleSheetTrace = tracing_model.WriteGoogleSheetTrace
-parse_action_trace = tracing_model.parse_action_trace
-
 # There is no `AgentRequest` because the `agent` action delegates to the `dispatch_request` method
 # under the hood.
 
@@ -93,7 +50,7 @@ class AgentResponse(BaseModel, Generic[_StructuredOutputT]):
         Field(discriminator="type"),
     ]
     usage: AgentUsage
-    action_trace: ActionTrace | None = None
+    action_trace: tracing_model.ActionTrace | None = None
 
 
 class AgenticSelectorClickAction(TypedDict):

From c6636dad827cacfb62f58c129fc0a24754e50027 Mon Sep 17 00:00:00 2001
From: xTRam1 <lerdogan@berkeley.edu>
Date: Thu, 30 Apr 2026 10:47:31 -0700
Subject: [PATCH 11/13] Fix tracing package formatting

---
 packages/narada-core/src/narada_core/tracing/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/packages/narada-core/src/narada_core/tracing/__init__.py b/packages/narada-core/src/narada_core/tracing/__init__.py
index 3237a27..cb6fd54 100644
--- a/packages/narada-core/src/narada_core/tracing/__init__.py
+++ b/packages/narada-core/src/narada_core/tracing/__init__.py
@@ -1,2 +1 @@
 """Tracing models for narada-core."""
-

From 8d01131f2fef7b5fcc6e5cd4f456be284b5c984b Mon Sep 17 00:00:00 2001
From: xTRam1 <lerdogan@berkeley.edu>
Date: Thu, 30 Apr 2026 12:38:16 -0700
Subject: [PATCH 12/13] Bump package versions for reasoning support

---
 packages/narada-core/pyproject.toml    | 2 +-
 packages/narada-pyodide/pyproject.toml | 4 ++--
 packages/narada/pyproject.toml         | 4 ++--
 uv.lock                                | 6 +++---
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/packages/narada-core/pyproject.toml b/packages/narada-core/pyproject.toml
index ca75dc0..edadabf 100644
--- a/packages/narada-core/pyproject.toml
+++ b/packages/narada-core/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "narada-core"
-version = "0.0.20"
+version = "0.0.21"
 description = "Code shared by the `narada` and `narada-pyodide` packages."
 license = "Apache-2.0"
 readme = "README.md"
diff --git a/packages/narada-pyodide/pyproject.toml b/packages/narada-pyodide/pyproject.toml
index 1443d3e..15bcbdd 100644
--- a/packages/narada-pyodide/pyproject.toml
+++ b/packages/narada-pyodide/pyproject.toml
@@ -1,14 +1,14 @@
 
 [project]
 name = "narada-pyodide"
-version = "0.0.47"
+version = "0.0.48"
 description = "Pyodide-compatible Python client SDK for Narada"
 license = "Apache-2.0"
 readme = "README.md"
 authors = [{ name = "Narada", email = "support@narada.ai" }]
 requires-python = ">=3.12"
 dependencies = [
-    "narada-core==0.0.20",
+    "narada-core==0.0.21",
     # Must be a supported version in https://pyodide.org/en/stable/usage/packages-in-pyodide.html
     "packaging==24.2",
 ]
diff --git a/packages/narada/pyproject.toml b/packages/narada/pyproject.toml
index f5be009..6575246 100644
--- a/packages/narada/pyproject.toml
+++ b/packages/narada/pyproject.toml
@@ -1,13 +1,13 @@
 [project]
 name = "narada"
-version = "0.1.47"
+version = "0.1.48"
 description = "Python client SDK for Narada"
 license = "Apache-2.0"
 readme = "README.md"
 authors = [{ name = "Narada", email = "support@narada.ai" }]
 requires-python = ">=3.12"
 dependencies = [
-    "narada-core==0.0.20",
+    "narada-core==0.0.21",
     "aiohttp>=3.12.13",
     "playwright>=1.53.0",
     "rich>=14.0.0",
diff --git a/uv.lock b/uv.lock
index dc19511..2c454fd 100644
--- a/uv.lock
+++ b/uv.lock
@@ -312,7 +312,7 @@ wheels = [
 
 [[package]]
 name = "narada"
-version = "0.1.47"
+version = "0.1.48"
 source = { editable = "packages/narada" }
 dependencies = [
     { name = "aiohttp" },
@@ -345,7 +345,7 @@ dev = [
 
 [[package]]
 name = "narada-core"
-version = "0.0.20"
+version = "0.0.21"
 source = { editable = "packages/narada-core" }
 dependencies = [
     { name = "pydantic" },
@@ -356,7 +356,7 @@ requires-dist = [{ name = "pydantic", specifier = "==2.12.5" }]
 
 [[package]]
 name = "narada-pyodide"
-version = "0.0.47"
+version = "0.0.48"
 source = { editable = "packages/narada-pyodide" }
 dependencies = [
     { name = "narada-core" },

From d007cd45de48d7fc355d3327370b6f916c3772c4 Mon Sep 17 00:00:00 2001
From: xTRam1 <lerdogan@berkeley.edu>
Date: Thu, 30 Apr 2026 13:08:10 -0700
Subject: [PATCH 13/13] Simplify reasoning effort docstring

---
 packages/narada-core/src/narada_core/models.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/packages/narada-core/src/narada_core/models.py b/packages/narada-core/src/narada_core/models.py
index aba8ffa..d7d075f 100644
--- a/packages/narada-core/src/narada_core/models.py
+++ b/packages/narada-core/src/narada_core/models.py
@@ -22,11 +22,9 @@ def prompt_prefix(self) -> str:
 
 
 class ReasoningEffort(StrEnum):
-    """Amount of reasoning the Core Agent applies before responding.
+    """Controls how much reasoning the Core Agent uses before responding.
 
-    Maps 1:1 to OpenAI's ``reasoning.effort`` parameter. Only honored when the
-    invoked agent is :py:attr:`Agent.CORE_AGENT`; the SDK enforces this both at
-    type-check time (via ``@overload``) and at runtime (with a ``ValueError``).
+    Only `Agent.CORE_AGENT` supports this option; other agents raise `ValueError`.
     """
 
     NONE = "none"