diff --git a/src/google/adk/code_executors/container_code_executor.py b/src/google/adk/code_executors/container_code_executor.py index d6a78d4d26..1c5a00569d 100644 --- a/src/google/adk/code_executors/container_code_executor.py +++ b/src/google/adk/code_executors/container_code_executor.py @@ -17,6 +17,7 @@ import atexit import logging import os +import threading from typing import Optional import docker @@ -125,24 +126,62 @@ def execute_code( invocation_context: InvocationContext, code_execution_input: CodeExecutionInput, ) -> CodeExecutionResult: - output = '' - error = '' - exec_result = self._container.exec_run( + logger.debug('Executing code:\n```\n%s\n```', code_execution_input.code) + + timeout = self.timeout_seconds + exec_id = self._client.api.exec_create( + self._container.id, ['python3', '-c', code_execution_input.code], - demux=True, + )['Id'] + + # exec_start blocks until the command finishes — run in a daemon + # thread so we can enforce timeout. + result_holder = {} + + def _run(): + try: + result_holder['data'] = self._client.api.exec_start(exec_id, demux=True) + except Exception as e: + result_holder['error'] = e + + t = threading.Thread(target=_run, daemon=True) + t.start() + + if timeout is not None: + t.join(timeout=timeout) + else: + t.join() + + if t.is_alive(): + # Timeout: best-effort kill of the exec process inside the + # container. NOTE: the blocked daemon thread may linger on the + # Docker socket until the container is cleaned up. Repeated + # timeouts can accumulate such threads. This is acceptable as + # a first-pass fix; a stronger approach would restart the + # container on timeout. + try: + inspect = self._client.api.exec_inspect(exec_id) + pid = inspect.get('Pid') + if pid: + self._container.exec_run(['kill', '-9', str(pid)]) + except Exception: + pass + return CodeExecutionResult( + stdout='', + stderr=f'Code execution timed out after {timeout} seconds.', + output_files=[], + ) + + # Propagate exceptions from exec_start (e.g. Docker API errors). + if 'error' in result_holder: + raise result_holder['error'] + + data = result_holder.get('data', (None, None)) + output = data[0].decode('utf-8') if data and data[0] else '' + error = ( + data[1].decode('utf-8') if data and len(data) > 1 and data[1] else '' ) - logger.debug('Executed code:\n```\n%s\n```', code_execution_input.code) - - if exec_result.output and exec_result.output[0]: - output = exec_result.output[0].decode('utf-8') - if ( - exec_result.output - and len(exec_result.output) > 1 - and exec_result.output[1] - ): - error = exec_result.output[1].decode('utf-8') - - # Collect the final result. + return CodeExecutionResult( stdout=output, stderr=error, diff --git a/src/google/adk/code_executors/unsafe_local_code_executor.py b/src/google/adk/code_executors/unsafe_local_code_executor.py index 64752fffd5..8559c43ad5 100644 --- a/src/google/adk/code_executors/unsafe_local_code_executor.py +++ b/src/google/adk/code_executors/unsafe_local_code_executor.py @@ -55,7 +55,27 @@ def _prepare_globals(code: str, globals_: dict[str, Any]) -> None: class UnsafeLocalCodeExecutor(BaseCodeExecutor): - """A code executor that unsafely execute code in the current local context.""" + """A code executor that runs LLM-generated code on the local machine. + + .. warning:: + **Security notice** -- This executor runs arbitrary code in a + spawned process with the *same* OS-level privileges as the host + application. It provides **no sandboxing, filesystem isolation, + or network restrictions**. + + Recommended only for: + - Local development and prototyping + - Trusted, pre-reviewed code + - Environments where the host is already disposable (CI runners, + ephemeral VMs) + + For production workloads, prefer ``ContainerCodeExecutor`` (Docker + isolation) or Vertex AI code execution for stronger boundaries. + + Timeout enforcement is handled via ``multiprocessing`` with + ``queue.get(timeout=self.timeout_seconds)``. The spawned process is + terminated if it exceeds the configured ``timeout_seconds``. + """ # Overrides the BaseCodeExecutor attribute: this executor cannot be stateful. stateful: bool = Field(default=False, frozen=True, exclude=True) diff --git a/src/google/adk/tools/skill_toolset.py b/src/google/adk/tools/skill_toolset.py index 3ea1f2bfe4..f91779d57b 100644 --- a/src/google/adk/tools/skill_toolset.py +++ b/src/google/adk/tools/skill_toolset.py @@ -493,6 +493,7 @@ def _build_wrapper_code( "import sys", "import json as _json", "import subprocess", + "import signal", "import runpy", f"_files = {files_dict!r}", "def _materialize_and_run():", @@ -512,13 +513,29 @@ def _build_wrapper_code( argv_list = [script_path] for k, v in script_args.items(): argv_list.extend([f"--{k}", str(v)]) + timeout = self._script_timeout code_lines.extend([ f" sys.argv = {argv_list!r}", + " if hasattr(signal, 'SIGALRM'):", + " def _timeout_handler(*_args):", + ( + " raise TimeoutError(" + f"'Python script timed out after {timeout}s')" + ), + ( + " _prev_handler = signal.signal(" + "signal.SIGALRM, _timeout_handler)" + ), + f" signal.alarm({timeout})", " try:", f" runpy.run_path({script_path!r}, run_name='__main__')", " except SystemExit as e:", " if e.code is not None and e.code != 0:", " raise e", + " finally:", + " if hasattr(signal, 'SIGALRM'):", + " signal.alarm(0)", + " signal.signal(signal.SIGALRM, _prev_handler)", ]) elif ext in ("sh", "bash"): arr = ["bash", script_path] @@ -684,9 +701,11 @@ def __init__( Args: skills: List of skills to register. code_executor: Optional code executor for script execution. - script_timeout: Timeout in seconds for shell script execution via - subprocess.run. Defaults to 300 seconds. Does not apply to Python - scripts executed via exec(). + script_timeout: Timeout in seconds for script execution. Applies to + shell scripts (via subprocess.run) and Python scripts (via + signal.SIGALRM on POSIX). On platforms without SIGALRM the Python + timeout is best-effort and depends on the underlying code executor's + own timeout. Defaults to 300 seconds. """ super().__init__() diff --git a/tests/unittests/code_executors/test_container_code_executor.py b/tests/unittests/code_executors/test_container_code_executor.py new file mode 100644 index 0000000000..4a92a5c024 --- /dev/null +++ b/tests/unittests/code_executors/test_container_code_executor.py @@ -0,0 +1,115 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for ContainerCodeExecutor timeout enforcement.""" + +import time +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest + +pytest.importorskip('docker', reason='docker SDK not installed') + +from google.adk.code_executors.code_execution_utils import CodeExecutionInput +from google.adk.code_executors.container_code_executor import ContainerCodeExecutor + + +@pytest.fixture +def mock_docker(): + with patch('google.adk.code_executors.container_code_executor.docker') as m: + mock_client = MagicMock() + mock_container = MagicMock() + mock_container.id = 'test-container-123' + mock_client.containers.run.return_value = mock_container + # verify_python_installation needs exit_code=0 + mock_container.exec_run.return_value = MagicMock(exit_code=0) + m.from_env.return_value = mock_client + yield mock_client, mock_container + + +@pytest.fixture +def mock_invocation_context(): + return MagicMock() + + +class TestContainerTimeout: + + def test_timeout_returns_error(self, mock_docker, mock_invocation_context): + """exec_start that blocks is killed after timeout_seconds.""" + client, container = mock_docker + + def slow_exec_start(exec_id, **kwargs): + time.sleep(10) + return (b'output', b'') + + client.api.exec_create.return_value = {'Id': 'exec-123'} + client.api.exec_start.side_effect = slow_exec_start + client.api.exec_inspect.return_value = {'Pid': 42} + + executor = ContainerCodeExecutor(image='test:latest', timeout_seconds=1) + code_input = CodeExecutionInput(code='time.sleep(999)') + result = executor.execute_code(mock_invocation_context, code_input) + + assert 'timed out' in result.stderr.lower() + assert result.stdout == '' + + # Verify the best-effort kill cleanup path was exercised + client.api.exec_inspect.assert_called_once_with('exec-123') + container.exec_run.assert_called_with(['kill', '-9', '42']) + + def test_no_timeout_waits_indefinitely( + self, mock_docker, mock_invocation_context + ): + """When timeout_seconds is None, wait for completion.""" + client, container = mock_docker + client.api.exec_create.return_value = {'Id': 'exec-456'} + client.api.exec_start.return_value = (b'hello\n', b'') + + executor = ContainerCodeExecutor(image='test:latest') + code_input = CodeExecutionInput(code='print("hello")') + result = executor.execute_code(mock_invocation_context, code_input) + + assert result.stdout == 'hello\n' + assert result.stderr == '' + + def test_normal_execution_within_timeout( + self, mock_docker, mock_invocation_context + ): + """Fast command completes within timeout.""" + client, container = mock_docker + client.api.exec_create.return_value = {'Id': 'exec-789'} + client.api.exec_start.return_value = (b'42\n', b'') + + executor = ContainerCodeExecutor(image='test:latest', timeout_seconds=30) + code_input = CodeExecutionInput(code='print(42)') + result = executor.execute_code(mock_invocation_context, code_input) + + assert result.stdout == '42\n' + assert result.stderr == '' + + def test_exec_start_exception_propagated( + self, mock_docker, mock_invocation_context + ): + """Docker API error in exec_start is re-raised, not swallowed.""" + client, container = mock_docker + client.api.exec_create.return_value = {'Id': 'exec-err'} + client.api.exec_start.side_effect = RuntimeError( + 'Docker API connection lost' + ) + + executor = ContainerCodeExecutor(image='test:latest', timeout_seconds=30) + code_input = CodeExecutionInput(code='print(1)') + with pytest.raises(RuntimeError, match='Docker API connection'): + executor.execute_code(mock_invocation_context, code_input) diff --git a/tests/unittests/tools/test_skill_toolset.py b/tests/unittests/tools/test_skill_toolset.py index 3e8ca66377..81b5687ddf 100644 --- a/tests/unittests/tools/test_skill_toolset.py +++ b/tests/unittests/tools/test_skill_toolset.py @@ -839,6 +839,68 @@ async def test_execute_script_shell_includes_timeout(mock_skill1): assert "timeout=60" in code_input.code +@pytest.mark.asyncio +async def test_execute_script_python_includes_timeout(mock_skill1): + """Python wrapper includes guarded signal.alarm timeout.""" + executor = _make_mock_executor(stdout="ok\n") + toolset = skill_toolset.SkillToolset( + [mock_skill1], code_executor=executor, script_timeout=120 + ) + tool = skill_toolset.RunSkillScriptTool(toolset) + ctx = _make_tool_context_with_agent() + result = await tool.run_async( + args={"skill_name": "skill1", "script_path": "run.py"}, + tool_context=ctx, + ) + assert result["status"] == "success" + call_args = executor.execute_code.call_args + code = call_args[0][1].code + # Verify timeout wiring + assert "signal.alarm(120)" in code + assert "hasattr(signal, 'SIGALRM')" in code + # Verify cleanup (alarm cancel + handler restore) in finally block + assert "signal.alarm(0)" in code + + +import signal as _signal_mod + + +@pytest.mark.asyncio +@pytest.mark.skipif( + not hasattr(_signal_mod, "SIGALRM"), + reason="Python wrapper timeout requires SIGALRM (POSIX only)", +) +async def test_python_script_timeout_fires_with_real_executor(): + """Python skill script that sleeps is killed by SIGALRM timeout.""" + from google.adk.code_executors.unsafe_local_code_executor import UnsafeLocalCodeExecutor + + # Build a skill with a Python script that sleeps forever + skill = _make_skill_with_script( + "sleeper", + "hang.py", + models.Script(src="import time\ntime.sleep(9999)"), + ) + executor = UnsafeLocalCodeExecutor(timeout_seconds=30) + toolset = skill_toolset.SkillToolset( + [skill], code_executor=executor, script_timeout=2 + ) + tool = skill_toolset.RunSkillScriptTool(toolset) + ctx = _make_tool_context_with_agent() + import time as _time + + t0 = _time.monotonic() + result = await tool.run_async( + args={"skill_name": "sleeper", "script_path": "hang.py"}, + tool_context=ctx, + ) + elapsed = _time.monotonic() - t0 + # Should complete well under 30s (the executor timeout) + # thanks to the 2s SIGALRM in the wrapper + assert elapsed < 10 + assert result["status"] == "error" + assert "timed out" in result["stderr"].lower() + + @pytest.mark.asyncio async def test_execute_script_extensionless_unsupported(mock_skill1): """Files without extensions should return UNSUPPORTED_SCRIPT_TYPE."""