From e86c223aaf8fc1753b2e1419df650bb07e2d3996 Mon Sep 17 00:00:00 2001 From: Alessandro Pogliaghi Date: Tue, 17 Mar 2026 09:23:34 +0000 Subject: [PATCH] feat(cloud): research agent hardening --- products/tasks/backend/models.py | 2 ++ .../tasks/backend/services/docker_sandbox.py | 18 +++++++++++++++--- .../tasks/backend/services/modal_sandbox.py | 6 +++++- products/tasks/backend/services/sandbox.py | 1 + products/tasks/backend/temporal/client.py | 4 ++++ .../activities/start_agent_server.py | 2 ++ .../backend/temporal/process_task/workflow.py | 8 ++++++++ 7 files changed, 37 insertions(+), 4 deletions(-) diff --git a/products/tasks/backend/models.py b/products/tasks/backend/models.py index 3f1aad594c63..beded72d7f4a 100644 --- a/products/tasks/backend/models.py +++ b/products/tasks/backend/models.py @@ -165,6 +165,7 @@ def create_and_run( start_workflow: bool = True, posthog_mcp_scopes: PosthogMcpScopes = "full", branch: str | None = None, + tools_preset: str = "default", ) -> "Task": from products.tasks.backend.temporal.client import execute_task_processing_workflow @@ -204,6 +205,7 @@ def create_and_run( create_pr=create_pr, slack_thread_context=slack_thread_context, posthog_mcp_scopes=posthog_mcp_scopes, + tools_preset=tools_preset, ) return task diff --git a/products/tasks/backend/services/docker_sandbox.py b/products/tasks/backend/services/docker_sandbox.py index 9dec0d28d2dd..005da1f205ae 100644 --- a/products/tasks/backend/services/docker_sandbox.py +++ b/products/tasks/backend/services/docker_sandbox.py @@ -563,16 +563,20 @@ def _build_agent_server_command( interaction_origin: str | None = None, branch: str | None = None, mcp_servers_arg: str = "", + tools_preset: str | None = None, ) -> str: env_prefix = ( f"env POSTHOG_CODE_INTERACTION_ORIGIN={shlex.quote(interaction_origin)} " if interaction_origin else "" ) branch_flag = f" --baseBranch {shlex.quote(branch)}" if branch else "" + tools_preset_flag = ( + f" --toolsPreset {shlex.quote(tools_preset)}" if tools_preset and tools_preset != "default" else "" + ) return ( f"cd /scripts && " f"nohup {env_prefix}./node_modules/.bin/agent-server --port {AGENT_SERVER_PORT} --repositoryPath {shlex.quote(repo_path)} " f"--taskId {shlex.quote(task_id)} --runId {shlex.quote(run_id)} --mode {shlex.quote(mode)}" - f"{branch_flag}{mcp_servers_arg} " + f"{branch_flag}{mcp_servers_arg}{tools_preset_flag} " f"> /tmp/agent-server.log 2>&1 &" ) @@ -596,6 +600,7 @@ def start_agent_server( interaction_origin: str | None = None, branch: str | None = None, mcp_configs: list[McpServerConfig] | None = None, + tools_preset: str | None = None, ) -> None: """Start the agent-server HTTP server in the sandbox. @@ -617,7 +622,7 @@ def start_agent_server( mcp_servers_arg = f" --mcpServers {shlex.quote(mcp_json)}" command = self._build_agent_server_command( - repo_path, task_id, run_id, mode, interaction_origin, branch, mcp_servers_arg + repo_path, task_id, run_id, mode, interaction_origin, branch, mcp_servers_arg, tools_preset ) logger.info(f"Starting agent-server in sandbox {self.id} for {repository}") @@ -637,7 +642,14 @@ def start_agent_server( self.execute("pkill -f agent-server || true", timeout_seconds=5) command = self._build_agent_server_command( - repo_path, task_id, run_id, mode, interaction_origin, branch=None, mcp_servers_arg=mcp_servers_arg + repo_path, + task_id, + run_id, + mode, + interaction_origin, + branch=None, + mcp_servers_arg=mcp_servers_arg, + tools_preset=tools_preset, ) if self._launch_and_check(command): logger.info(f"Agent-server started on port {self._host_port} (without --baseBranch)") diff --git a/products/tasks/backend/services/modal_sandbox.py b/products/tasks/backend/services/modal_sandbox.py index 198f23b3acde..8f60c548252f 100644 --- a/products/tasks/backend/services/modal_sandbox.py +++ b/products/tasks/backend/services/modal_sandbox.py @@ -445,6 +445,7 @@ def start_agent_server( interaction_origin: str | None = None, branch: str | None = None, mcp_configs: list[McpServerConfig] | None = None, + tools_preset: str | None = None, ) -> None: """Start the agent-server HTTP server in the sandbox. @@ -467,11 +468,14 @@ def start_agent_server( f"env POSTHOG_CODE_INTERACTION_ORIGIN={shlex.quote(interaction_origin)} " if interaction_origin else "" ) branch_flag = f" --baseBranch {shlex.quote(branch)}" if branch else "" + tools_preset_flag = ( + f" --toolsPreset {shlex.quote(tools_preset)}" if tools_preset and tools_preset != "default" else "" + ) command = ( f"cd /scripts && " f"nohup {env_prefix}./node_modules/.bin/agent-server --port {AGENT_SERVER_PORT} --repositoryPath {shlex.quote(repo_path)} " f"--taskId {shlex.quote(task_id)} --runId {shlex.quote(run_id)} --mode {shlex.quote(mode)}" - f"{branch_flag}{mcp_servers_arg} " + f"{branch_flag}{mcp_servers_arg}{tools_preset_flag} " f"> /tmp/agent-server.log 2>&1 &" ) diff --git a/products/tasks/backend/services/sandbox.py b/products/tasks/backend/services/sandbox.py index 928dd3894bd4..6a472cb1fc78 100644 --- a/products/tasks/backend/services/sandbox.py +++ b/products/tasks/backend/services/sandbox.py @@ -125,6 +125,7 @@ def start_agent_server( interaction_origin: str | None = None, branch: str | None = None, mcp_configs: list[McpServerConfig] | None = None, + tools_preset: str | None = None, ) -> None: """Start the agent-server HTTP server in the sandbox. diff --git a/products/tasks/backend/temporal/client.py b/products/tasks/backend/temporal/client.py index 8d6917316f71..0746bcda466b 100644 --- a/products/tasks/backend/temporal/client.py +++ b/products/tasks/backend/temporal/client.py @@ -41,6 +41,7 @@ async def execute_task_processing_workflow_async( slack_thread_context: Optional[Any] = None, skip_user_check: bool = False, posthog_mcp_scopes: PosthogMcpScopes = "read_only", + tools_preset: str = "default", ) -> None: """ Start the task processing workflow asynchronously. Fire-and-forget. @@ -101,6 +102,7 @@ async def execute_task_processing_workflow_async( create_pr=create_pr, slack_thread_context=slack_context_dict, posthog_mcp_scopes=posthog_mcp_scopes, + tools_preset=tools_preset, ) logger.info( @@ -141,6 +143,7 @@ def execute_task_processing_workflow( slack_thread_context: Optional["SlackThreadContext"] = None, skip_user_check: bool = False, posthog_mcp_scopes: PosthogMcpScopes = "read_only", + tools_preset: str = "default", ) -> None: """ Start the task processing workflow synchronously. Fire-and-forget. @@ -200,6 +203,7 @@ def execute_task_processing_workflow( create_pr=create_pr, slack_thread_context=slack_context_dict, posthog_mcp_scopes=posthog_mcp_scopes, + tools_preset=tools_preset, ) logger.info( diff --git a/products/tasks/backend/temporal/process_task/activities/start_agent_server.py b/products/tasks/backend/temporal/process_task/activities/start_agent_server.py index 327afa8572b4..79fb48fdb4fb 100644 --- a/products/tasks/backend/temporal/process_task/activities/start_agent_server.py +++ b/products/tasks/backend/temporal/process_task/activities/start_agent_server.py @@ -25,6 +25,7 @@ class StartAgentServerInput: sandbox_url: str sandbox_connect_token: str | None = None posthog_mcp_scopes: PosthogMcpScopes = "read_only" + tools_preset: str = "default" @dataclass @@ -84,6 +85,7 @@ def start_agent_server(input: StartAgentServerInput) -> StartAgentServerOutput: interaction_origin=ctx.interaction_origin, branch=ctx.branch, mcp_configs=mcp_configs or None, + tools_preset=input.tools_preset if input.tools_preset != "default" else None, ) except Exception as e: raise SandboxExecutionError( diff --git a/products/tasks/backend/temporal/process_task/workflow.py b/products/tasks/backend/temporal/process_task/workflow.py index c5fa294fd0ad..5f18b3858a6b 100644 --- a/products/tasks/backend/temporal/process_task/workflow.py +++ b/products/tasks/backend/temporal/process_task/workflow.py @@ -45,6 +45,7 @@ class ProcessTaskInput: create_pr: bool = True slack_thread_context: Optional[dict[str, Any]] = None posthog_mcp_scopes: PosthogMcpScopes = "read_only" + tools_preset: str = "default" @dataclass @@ -65,6 +66,7 @@ def __init__(self) -> None: self._context: Optional[TaskProcessingContext] = None self._slack_thread_context: Optional[dict[str, Any]] = None self._posthog_mcp_scopes: PosthogMcpScopes = "read_only" + self._tools_preset: str = "default" self._task_completed: bool = False self._completion_status: str = "completed" self._completion_error: Optional[str] = None @@ -85,6 +87,7 @@ def parse_inputs(inputs: list[str]) -> ProcessTaskInput: create_pr=loaded.get("create_pr", True), slack_thread_context=loaded.get("slack_thread_context"), posthog_mcp_scopes=loaded.get("posthog_mcp_scopes", "read_only"), + tools_preset=loaded.get("tools_preset", "default"), ) @temporalio.workflow.run @@ -98,6 +101,10 @@ async def run(self, input: ProcessTaskInput) -> ProcessTaskOutput: try: self._context = await self._get_task_processing_context(input) self._posthog_mcp_scopes = input.posthog_mcp_scopes + self._tools_preset = input.tools_preset + # Force read-only MCP scopes for research_background_agent + if self._tools_preset == "research_background_agent": + self._posthog_mcp_scopes = "read_only" await self._update_task_run_status("in_progress") await self._track_workflow_event( @@ -280,6 +287,7 @@ async def _start_agent_server(self, sandbox_output: GetSandboxForRepositoryOutpu sandbox_url=sandbox_output.sandbox_url, sandbox_connect_token=sandbox_output.connect_token, posthog_mcp_scopes=self._posthog_mcp_scopes, + tools_preset=self._tools_preset, ), start_to_close_timeout=timedelta(minutes=5), retry_policy=RetryPolicy(maximum_attempts=3),