From 8f0523f42e51765f1dbdeb6426defb392b792c96 Mon Sep 17 00:00:00 2001 From: sebzhao Date: Thu, 28 May 2026 14:54:32 -0700 Subject: [PATCH 1/2] feat: critic agent in observability --- .../src/narada_core/actions/critic.py | 29 +++ .../src/narada_core/actions/models.py | 3 + packages/narada-pyodide/src/narada/window.py | 29 +-- .../tests/test_cloud_browser.py | 201 ++++++++++++++++++ packages/narada/src/narada/window.py | 10 +- packages/narada/tests/test_cloud_browser.py | 103 +++++++++ 6 files changed, 358 insertions(+), 17 deletions(-) diff --git a/packages/narada-core/src/narada_core/actions/critic.py b/packages/narada-core/src/narada_core/actions/critic.py index 04f5850..fb5a02f 100644 --- a/packages/narada-core/src/narada_core/actions/critic.py +++ b/packages/narada-core/src/narada_core/actions/critic.py @@ -74,10 +74,39 @@ async def run_critic( if critic_action_trace_raw is not None else None ) + critic_workflow_trace = critic_content.get("workflowTrace") return CriticResult( validation_passed=validation_passed, structured_output=structured_output, usage=AgentUsage.model_validate(critic_dispatch_response["usage"]), action_trace=critic_action_trace, + workflow_trace=critic_workflow_trace, ) + + +def merge_critic_workflow_trace( + *, + workflow_trace: dict[str, Any] | None, + critic_result: CriticResult | None, +) -> dict[str, Any] | None: + critic_workflow_trace = ( + critic_result.workflow_trace if critic_result is not None else None + ) + if critic_workflow_trace is None: + return workflow_trace + + if workflow_trace is None: + return critic_workflow_trace + + children = workflow_trace.get("children") + if not isinstance(children, list): + return workflow_trace + + return { + **workflow_trace, + "children": [ + *children, + {"kind": "sub_workflow", "trace": critic_workflow_trace}, + ], + } diff --git a/packages/narada-core/src/narada_core/actions/models.py b/packages/narada-core/src/narada_core/actions/models.py index 07d7176..ade0457 100644 --- a/packages/narada-core/src/narada_core/actions/models.py +++ b/packages/narada-core/src/narada_core/actions/models.py @@ -43,10 +43,13 @@ class StructuredOutput(BaseModel, Generic[_StructuredOutputT]): class CriticResult(BaseModel): + model_config = ConfigDict(populate_by_name=True) + validation_passed: bool structured_output: Any usage: AgentUsage action_trace: tracing_model.ActionTrace | None = None + workflow_trace: dict[str, Any] | None = Field(default=None, alias="workflowTrace") class AgentResponse(BaseModel, Generic[_StructuredOutputT]): diff --git a/packages/narada-pyodide/src/narada/window.py b/packages/narada-pyodide/src/narada/window.py index 6c2afb8..700e342 100644 --- a/packages/narada-pyodide/src/narada/window.py +++ b/packages/narada-pyodide/src/narada/window.py @@ -6,27 +6,17 @@ from abc import ABC from dataclasses import dataclass from http import HTTPStatus -from typing import ( - IO, - TYPE_CHECKING, - Any, - Literal, - Optional, - TypeVar, - cast, - overload, - override, -) +from typing import IO, TYPE_CHECKING, Any, Literal, Optional, TypeVar, cast, overload, override from urllib.parse import urlencode from js import AbortController, setTimeout # type: ignore -from narada_core.actions.critic import run_critic +from narada_core.actions.critic import merge_critic_workflow_trace, run_critic from narada_core.actions.models import ( DEFAULT_HITL_TIMEOUT_SECONDS, - AgenticMouseAction, - AgenticMouseActionRequest, AgenticMatchingSelectorsFinderRequest, AgenticMatchingSelectorsFinderResponse, + AgenticMouseAction, + AgenticMouseActionRequest, AgenticSelectorAction, AgenticSelectorRequest, AgenticSelectorResponse, @@ -689,6 +679,8 @@ async def agent( if action_trace_raw is not None else None ) + workflow_trace = response_content.get("workflowTrace") + parent_request_id = self._current_parent_request_id() critic_result: CriticResult | None = None if critic is not None: @@ -701,6 +693,15 @@ async def agent( time_zone=time_zone, timeout=timeout, ) + workflow_trace = merge_critic_workflow_trace( + workflow_trace=workflow_trace, + critic_result=critic_result, + ) + + # Preserve the response contract for direct callers, but avoid adding a second + # child node when the backend will stitch the child request into the parent row. + if workflow_trace is not None and parent_request_id is None: + _trace.emit_sub_workflow(workflow_trace=workflow_trace) return AgentResponse( request_id=remote_dispatch_response["requestId"], diff --git a/packages/narada-pyodide/tests/test_cloud_browser.py b/packages/narada-pyodide/tests/test_cloud_browser.py index 75561d3..1585b05 100644 --- a/packages/narada-pyodide/tests/test_cloud_browser.py +++ b/packages/narada-pyodide/tests/test_cloud_browser.py @@ -277,6 +277,207 @@ async def test_cloud_browser_window_dispatch_request_omits_parent_run_ids( assert "parentRunIds" not in payload +@pytest.mark.asyncio +async def test_cloud_browser_window_dispatch_request_keeps_parent_request_id( + monkeypatch: pytest.MonkeyPatch, +) -> None: + pyfetch = AsyncMock( + side_effect=[ + _FakeResponse(json_data={"requestId": "child-request-123"}), + _FakeResponse(json_data={"status": "success", "response": None}), + ] + ) + _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + window_module._narada_parent_run_ids = _FakeJsProxy(["outer-run", "inner-run"]) + monkeypatch.setattr( + builtins, "_narada_request_id", "parent-request-123", raising=False + ) + + window = window_module.CloudBrowserWindow( + browser_window_id="browser-window-123", + session_id="session-123", + api_key="test-api-key", + ) + response = await window.dispatch_request(prompt="hello from cloud browser") + + assert response["status"] == "success" + post_call = pyfetch.await_args_list[0] + payload = json.loads(post_call.kwargs["body"]) + assert payload["parentRequestId"] == "parent-request-123" + assert "parentRunIds" not in payload + + +@pytest.mark.asyncio +async def test_window_agent_keeps_parent_request_id_from_injected_builtins( + monkeypatch: pytest.MonkeyPatch, +) -> None: + pyfetch = AsyncMock( + side_effect=[ + _FakeResponse(json_data={"requestId": "child-request-123"}), + _FakeResponse( + json_data={ + "status": "success", + "response": { + "text": "done", + "output": {"type": "text", "content": "done"}, + }, + "usage": {"actions": 0, "credits": 0}, + } + ), + ] + ) + _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + window_module._narada_parent_run_ids = _FakeJsProxy(["outer-run", "inner-run"]) + monkeypatch.setattr( + builtins, "_narada_request_id", "parent-request-123", raising=False + ) + + window = window_module.CloudBrowserWindow( + browser_window_id="browser-window-123", + session_id="session-123", + api_key="test-api-key", + ) + response = await window.agent(prompt="run gui child", agent="/$USER/gui-child") + + assert response.status == "success" + post_call = pyfetch.await_args_list[0] + payload = json.loads(post_call.kwargs["body"]) + assert payload["parentRequestId"] == "parent-request-123" + + +@pytest.mark.asyncio +async def test_window_agent_exposes_workflow_trace_alias( + monkeypatch: pytest.MonkeyPatch, +) -> None: + workflow_trace = {"step_type": "workflow", "children": []} + pyfetch = AsyncMock( + side_effect=[ + _FakeResponse(json_data={"requestId": "child-request-123"}), + _FakeResponse( + json_data={ + "status": "success", + "response": { + "text": "done", + "output": {"type": "text", "content": "done"}, + "workflowTrace": workflow_trace, + }, + "usage": {"actions": 0, "credits": 0}, + } + ), + ] + ) + _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + emitted_events: list[str] = [] + monkeypatch.setattr( + sys.modules["narada._trace"], + "_narada_emit_trace_event", + emitted_events.append, + raising=False, + ) + + window = window_module.CloudBrowserWindow( + browser_window_id="browser-window-123", + session_id="session-123", + api_key="test-api-key", + ) + response = await window.agent(prompt="return a trace") + + assert response.workflow_trace == workflow_trace + assert response.model_dump(by_alias=True)["workflowTrace"] == workflow_trace + sub_workflow_events = [ + json.loads(event) + for event in emitted_events + if json.loads(event)["kind"] == "subWorkflow" + ] + assert sub_workflow_events == [ + {"kind": "subWorkflow", "workflowTrace": workflow_trace} + ] + + +@pytest.mark.asyncio +async def test_window_agent_emits_combined_critic_workflow_trace( + monkeypatch: pytest.MonkeyPatch, +) -> None: + workflow_trace = { + "workflowId": "main-workflow", + "workflowName": "Main Workflow", + "runtime": "gui", + "status": "success", + "startTs": 100, + "children": [], + } + critic_workflow_trace = { + "workflowId": "critic-workflow", + "workflowName": "Critic Workflow", + "runtime": "gui", + "status": "success", + "startTs": 200, + "children": [], + } + pyfetch = AsyncMock( + side_effect=[ + _FakeResponse(json_data={"requestId": "main-request-123"}), + _FakeResponse( + json_data={ + "status": "success", + "response": { + "text": "done", + "output": {"type": "text", "content": "done"}, + "workflowTrace": workflow_trace, + }, + "usage": {"actions": 0, "credits": 0}, + } + ), + _FakeResponse(json_data={"requestId": "critic-request-123"}), + _FakeResponse( + json_data={ + "status": "success", + "response": { + "text": '{"narada_validation_passed":true}', + "output": { + "type": "structured", + "content": {"narada_validation_passed": True}, + }, + "workflowTrace": critic_workflow_trace, + }, + "usage": {"actions": 0, "credits": 0}, + } + ), + ] + ) + _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + emitted_events: list[str] = [] + monkeypatch.setattr( + sys.modules["narada._trace"], + "_narada_emit_trace_event", + emitted_events.append, + raising=False, + ) + + window = window_module.CloudBrowserWindow( + browser_window_id="browser-window-123", + session_id="session-123", + api_key="test-api-key", + ) + response = await window.agent(prompt="return a trace", critic={}) + + combined_workflow_trace = { + **workflow_trace, + "children": [{"kind": "sub_workflow", "trace": critic_workflow_trace}], + } + assert response.critic_result is not None + assert response.critic_result.workflow_trace == critic_workflow_trace + assert response.workflow_trace == combined_workflow_trace + sub_workflow_events = [ + json.loads(event) + for event in emitted_events + if json.loads(event)["kind"] == "subWorkflow" + ] + assert sub_workflow_events == [ + {"kind": "subWorkflow", "workflowTrace": combined_workflow_trace} + ] + + @pytest.mark.asyncio async def test_cloud_browser_window_dispatch_request_retries_poll_fetch_failures( monkeypatch: pytest.MonkeyPatch, diff --git a/packages/narada/src/narada/window.py b/packages/narada/src/narada/window.py index 8df4bb3..c1e2c25 100644 --- a/packages/narada/src/narada/window.py +++ b/packages/narada/src/narada/window.py @@ -21,13 +21,13 @@ ) import aiohttp -from narada_core.actions.critic import run_critic +from narada_core.actions.critic import merge_critic_workflow_trace, run_critic from narada_core.actions.models import ( DEFAULT_HITL_TIMEOUT_SECONDS, - AgenticMouseAction, - AgenticMouseActionRequest, AgenticMatchingSelectorsFinderRequest, AgenticMatchingSelectorsFinderResponse, + AgenticMouseAction, + AgenticMouseActionRequest, AgenticSelectorAction, AgenticSelectorRequest, AgenticSelectorResponse, @@ -655,6 +655,10 @@ async def agent( time_zone=time_zone, timeout=timeout, ) + workflow_trace = merge_critic_workflow_trace( + workflow_trace=workflow_trace, + critic_result=critic_result, + ) return AgentResponse( request_id=remote_dispatch_response["requestId"], diff --git a/packages/narada/tests/test_cloud_browser.py b/packages/narada/tests/test_cloud_browser.py index db54062..37bee36 100644 --- a/packages/narada/tests/test_cloud_browser.py +++ b/packages/narada/tests/test_cloud_browser.py @@ -171,3 +171,106 @@ async def test_initialize_cloud_browser_window_uses_domcontentloaded_for_retry_n ] assert wait_for_browser_window_id.await_count == 2 assert window.browser_window_id == "browser-window-123" + + +@pytest.mark.asyncio +async def test_window_agent_exposes_workflow_trace_alias( + monkeypatch: pytest.MonkeyPatch, +) -> None: + workflow_trace = {"step_type": "workflow", "children": []} + window = CloudBrowserWindow( + browser_window_id="browser-window-123", + session_id="session-123", + auth_headers={"x-api-key": "test-key"}, + ) + monkeypatch.setattr( + window, + "dispatch_request", + AsyncMock( + return_value={ + "requestId": "request-123", + "status": "success", + "response": { + "text": "done", + "output": {"type": "text", "content": "done"}, + "workflowTrace": workflow_trace, + }, + "usage": {"actions": 0, "credits": 0}, + } + ), + ) + + response = await window.agent(prompt="return a trace") + + assert response.workflow_trace == workflow_trace + assert response.model_dump(by_alias=True)["workflowTrace"] == workflow_trace + + +@pytest.mark.asyncio +async def test_window_agent_appends_critic_workflow_trace( + monkeypatch: pytest.MonkeyPatch, +) -> None: + workflow_trace = { + "workflowId": "main-workflow", + "workflowName": "Main Workflow", + "runtime": "gui", + "status": "success", + "startTs": 100, + "children": [], + } + critic_workflow_trace = { + "workflowId": "critic-workflow", + "workflowName": "Critic Workflow", + "runtime": "gui", + "status": "success", + "startTs": 200, + "children": [], + } + window = CloudBrowserWindow( + browser_window_id="browser-window-123", + session_id="session-123", + auth_headers={"x-api-key": "test-key"}, + ) + monkeypatch.setattr( + window, + "dispatch_request", + AsyncMock( + side_effect=[ + { + "requestId": "request-123", + "status": "success", + "response": { + "text": "done", + "output": {"type": "text", "content": "done"}, + "workflowTrace": workflow_trace, + }, + "usage": {"actions": 0, "credits": 0}, + }, + { + "requestId": "critic-request-123", + "status": "success", + "response": { + "text": '{"narada_validation_passed":true}', + "output": { + "type": "structured", + "content": {"narada_validation_passed": True}, + }, + "structuredOutput": SimpleNamespace( + narada_validation_passed=True + ), + "workflowTrace": critic_workflow_trace, + }, + "usage": {"actions": 0, "credits": 0}, + }, + ] + ), + ) + + response = await window.agent(prompt="return a trace", critic={}) + + assert response.critic_result is not None + assert response.critic_result.workflow_trace == critic_workflow_trace + assert response.workflow_trace == { + **workflow_trace, + "children": [{"kind": "sub_workflow", "trace": critic_workflow_trace}], + } From f7422125457c14cbfde36991a13e98ef459e118f Mon Sep 17 00:00:00 2001 From: sebzhao Date: Thu, 28 May 2026 14:58:19 -0700 Subject: [PATCH 2/2] style: lint --- packages/narada-pyodide/src/narada/window.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/packages/narada-pyodide/src/narada/window.py b/packages/narada-pyodide/src/narada/window.py index 700e342..59a3efc 100644 --- a/packages/narada-pyodide/src/narada/window.py +++ b/packages/narada-pyodide/src/narada/window.py @@ -6,7 +6,17 @@ from abc import ABC from dataclasses import dataclass from http import HTTPStatus -from typing import IO, TYPE_CHECKING, Any, Literal, Optional, TypeVar, cast, overload, override +from typing import ( + IO, + TYPE_CHECKING, + Any, + Literal, + Optional, + TypeVar, + cast, + overload, + override, +) from urllib.parse import urlencode from js import AbortController, setTimeout # type: ignore