Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 44 additions & 7 deletions astrbot/core/computer/booters/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,30 @@ def _is_safe_command(command: str) -> bool:
return not any(pat in cmd for pat in _BLOCKED_COMMAND_PATTERNS)


def _kill_process_tree(proc: subprocess.Popen) -> None:
"""Forcefully terminate a process and every child it spawned.

On Windows ``Popen.kill()`` only terminates the immediate (shell) process.
Any child it started keeps running and holds the captured stdout/stderr
pipes open, so ``communicate()`` blocks until those children exit and the
requested timeout is never actually enforced. ``taskkill /T`` tears down the
whole tree. On POSIX we keep the existing ``proc.kill()`` behavior.
"""
if os.name == "nt":
try:
subprocess.run(
["taskkill", "/F", "/T", "/PID", str(proc.pid)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
return
except OSError:
# taskkill missing/unavailable: fall back to a single-process kill.
pass
proc.kill()
Comment on lines +56 to +68

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

In _kill_process_tree, if os.name == "nt" is true, we attempt to run taskkill. If taskkill is missing or fails, we catch OSError and fall back to proc.kill(). However, proc.kill() itself can raise an OSError (such as PermissionError or ProcessLookupError) if the process has already exited or if access is denied. It is safer to wrap proc.kill() in a try...except OSError: block to prevent unexpected crashes during cleanup.

    if os.name == "nt":
        try:
            subprocess.run(
                ["taskkill", "/F", "/T", "/PID", str(proc.pid)],
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL,
                check=False,
            )
            return
        except OSError:
            # taskkill missing/unavailable: fall back to a single-process kill.
            pass
    try:
        proc.kill()
    except OSError:
        pass



def _decode_bytes_with_fallback(
output: bytes | None,
*,
Expand Down Expand Up @@ -118,18 +142,31 @@ def _run() -> dict[str, Any]:
# `command` is intentionally executed through the current shell so
# local computer-use behavior matches existing tool semantics.
# Safety relies on `_is_safe_command()` and the allowed-root checks.
result = subprocess.run( # noqa: S602 # nosemgrep: python.lang.security.audit.dangerous-subprocess-use-audit
with subprocess.Popen( # noqa: S602 # nosemgrep: python.lang.security.audit.dangerous-subprocess-use-audit
command,
shell=shell,
cwd=working_dir,
env=run_env,
timeout=timeout or 300,
capture_output=True,
)
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
) as proc:
try:
stdout, stderr = proc.communicate(timeout=timeout or 300)
except subprocess.TimeoutExpired:
# subprocess.run only kills the immediate process here, which
# leaves the shell's children alive on Windows and keeps the
# pipes open. Kill the whole tree, then drain/reap mirroring
# CPython's subprocess.run, and re-raise the same timeout.
_kill_process_tree(proc)
if os.name == "nt":
proc.communicate()
else:
proc.wait()
Comment on lines +161 to +164

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Calling proc.communicate() without a timeout on Windows inside the TimeoutExpired exception handler can block indefinitely if _kill_process_tree fails to terminate all child processes (e.g., if taskkill is unavailable and we fall back to proc.kill(), which leaves children alive).

In CPython's standard library subprocess.run implementation, Windows does not call communicate() after a timeout precisely because it cannot reliably wait for the process to exit and close its pipes without blocking. Instead, it only calls proc.wait(). We should align with this behavior and call proc.wait() on Windows as well, or at least provide a short timeout to proc.communicate() to prevent an infinite hang.

                    _kill_process_tree(proc)
                    try:
                        proc.wait(timeout=5)
                    except subprocess.TimeoutExpired:
                        pass
                    raise

raise
return {
"stdout": _decode_shell_output(result.stdout),
"stderr": _decode_shell_output(result.stderr),
"exit_code": result.returncode,
"stdout": _decode_shell_output(stdout),
"stderr": _decode_shell_output(stderr),
"exit_code": proc.returncode,
}

return await asyncio.to_thread(_run)
Expand Down
64 changes: 49 additions & 15 deletions tests/test_local_shell_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,39 @@

import asyncio
import subprocess
import sys
import time

from astrbot.core.computer.booters import local as local_booter
from astrbot.core.computer.booters.local import LocalShellComponent


class _FakeCompletedProcess:
class _FakePopen:
"""Minimal stand-in for the subprocess.Popen context manager used in exec()."""

def __init__(self, stdout: bytes, stderr: bytes = b"", returncode: int = 0):
self.stdout = stdout
self.stderr = stderr
self._stdout = stdout
self._stderr = stderr
self.returncode = returncode
self.pid = 4321

def __enter__(self):
return self

def __exit__(self, *exc_info):
return False

def communicate(self, timeout=None):
_ = timeout
return self._stdout, self._stderr


def test_local_shell_component_decodes_utf8_output(monkeypatch):
def fake_run(*args, **kwargs):
def fake_popen(*args, **kwargs):
_ = args, kwargs
return _FakeCompletedProcess(stdout="技能内容".encode())
return _FakePopen(stdout="技能内容".encode())

monkeypatch.setattr(subprocess, "run", fake_run)
monkeypatch.setattr(subprocess, "Popen", fake_popen)

result = asyncio.run(LocalShellComponent().exec("dummy"))

Expand All @@ -31,11 +46,11 @@ def fake_run(*args, **kwargs):
def test_local_shell_component_prefers_utf8_before_windows_locale(
monkeypatch,
):
def fake_run(*args, **kwargs):
def fake_popen(*args, **kwargs):
_ = args, kwargs
return _FakeCompletedProcess(stdout="技能内容".encode())
return _FakePopen(stdout="技能内容".encode())

monkeypatch.setattr(subprocess, "run", fake_run)
monkeypatch.setattr(subprocess, "Popen", fake_popen)
monkeypatch.setattr(local_booter.os, "name", "nt", raising=False)
monkeypatch.setattr(
local_booter.locale,
Expand All @@ -51,11 +66,11 @@ def fake_run(*args, **kwargs):


def test_local_shell_component_falls_back_to_gbk_on_windows(monkeypatch):
def fake_run(*args, **kwargs):
def fake_popen(*args, **kwargs):
_ = args, kwargs
return _FakeCompletedProcess(stdout="微博热搜".encode("gbk"))
return _FakePopen(stdout="微博热搜".encode("gbk"))

monkeypatch.setattr(subprocess, "run", fake_run)
monkeypatch.setattr(subprocess, "Popen", fake_popen)
monkeypatch.setattr(local_booter.os, "name", "nt", raising=False)
monkeypatch.setattr(
local_booter.locale,
Expand All @@ -71,11 +86,11 @@ def fake_run(*args, **kwargs):


def test_local_shell_component_falls_back_to_utf8_replace(monkeypatch):
def fake_run(*args, **kwargs):
def fake_popen(*args, **kwargs):
_ = args, kwargs
return _FakeCompletedProcess(stdout=b"\xffabc")
return _FakePopen(stdout=b"\xffabc")

monkeypatch.setattr(subprocess, "run", fake_run)
monkeypatch.setattr(subprocess, "Popen", fake_popen)
monkeypatch.setattr(local_booter.os, "name", "posix", raising=False)
monkeypatch.setattr(
local_booter.locale,
Expand All @@ -86,3 +101,22 @@ def fake_run(*args, **kwargs):
result = asyncio.run(LocalShellComponent().exec("dummy"))

assert result["stdout"] == "\ufffdabc"


def test_local_shell_component_timeout_returns_promptly():
# Spawn a long-running child through the shell. On Windows the shell keeps
# running while the child holds the captured stdout/stderr pipes open, so a
# plain subprocess.run(timeout=...) blocks until the child exits and the
# timeout is never enforced. The component must kill the whole process tree
# and surface the timeout within the configured window.
sleeper = f'"{sys.executable}" -c "import time; time.sleep(30)"'
start = time.monotonic()
timed_out = False
try:
asyncio.run(LocalShellComponent().exec(sleeper, timeout=2))
except subprocess.TimeoutExpired:
timed_out = True
elapsed = time.monotonic() - start

assert timed_out, "expected the shell command to time out"
assert elapsed < 15, f"shell timeout was not enforced (took {elapsed:.1f}s)"
Loading