From b9783834fa9e9e973a36d5b9eae28270d99d4aa1 Mon Sep 17 00:00:00 2001 From: Debug Agent Date: Thu, 26 Mar 2026 16:33:00 -0300 Subject: [PATCH 1/3] fix: copy Python 3.13 runtime into eval images for venv compatibility SDK v1.15.0 changed the agent-server Dockerfile builder stage from --managed-python (self-contained venv) to --python-preference only-system (venv symlinks to system Python). This broke eval image builds because Dockerfile.agent-layer copies /agent-server from the builder into an eval-base image that doesn't have Python 3.13 at /usr/local/bin/, causing "no such file or directory" on every runtime pod. Fix: copy the Python 3.13 binary, stdlib, and shared library from the builder stage into the final image so the venv symlinks resolve. Co-Authored-By: Claude Opus 4.6 --- benchmarks/utils/Dockerfile.agent-layer | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/benchmarks/utils/Dockerfile.agent-layer b/benchmarks/utils/Dockerfile.agent-layer index eb526285d..97d75404e 100644 --- a/benchmarks/utils/Dockerfile.agent-layer +++ b/benchmarks/utils/Dockerfile.agent-layer @@ -21,5 +21,15 @@ ARG USERNAME=openhands ARG OPENHANDS_BUILD_GIT_SHA=unknown ENV OPENHANDS_BUILD_GIT_SHA=${OPENHANDS_BUILD_GIT_SHA} ENV UV_PYTHON_INSTALL_DIR=/agent-server/uv-managed-python + +# Copy the Python 3.13 runtime from the builder so the venv's symlinks resolve. +# Since SDK v1.15.0 the builder venv uses --python-preference only-system, +# meaning .venv/bin/python symlinks to /usr/local/bin/python3.13. The eval-base +# image doesn't have Python 3.13, so we bring the interpreter and stdlib along. +COPY --from=builder /usr/local/bin/python3.13 /usr/local/bin/python3.13 +COPY --from=builder /usr/local/lib/python3.13 /usr/local/lib/python3.13 +COPY --from=builder /usr/local/lib/libpython3.13* /usr/local/lib/ +RUN ldconfig + COPY --chown=${USERNAME}:${USERNAME} --from=builder /agent-server /agent-server ENTRYPOINT ["/agent-server/.venv/bin/python", "-m", "openhands.agent_server"] From dc5060f727813f7e8c90cff5739e84c3fc0d1d28 Mon Sep 17 00:00:00 2001 From: Debug Agent Date: Thu, 26 Mar 2026 16:51:43 -0300 Subject: [PATCH 2/3] fix: use LD_LIBRARY_PATH instead of ldconfig (non-root compatible) ldconfig fails in eval-base images that run as non-root user. Use LD_LIBRARY_PATH=/usr/local/lib instead to make libpython3.13 discoverable without needing root privileges. Co-Authored-By: Claude Opus 4.6 --- benchmarks/utils/Dockerfile.agent-layer | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/utils/Dockerfile.agent-layer b/benchmarks/utils/Dockerfile.agent-layer index 97d75404e..d4d81e266 100644 --- a/benchmarks/utils/Dockerfile.agent-layer +++ b/benchmarks/utils/Dockerfile.agent-layer @@ -29,7 +29,7 @@ ENV UV_PYTHON_INSTALL_DIR=/agent-server/uv-managed-python COPY --from=builder /usr/local/bin/python3.13 /usr/local/bin/python3.13 COPY --from=builder /usr/local/lib/python3.13 /usr/local/lib/python3.13 COPY --from=builder /usr/local/lib/libpython3.13* /usr/local/lib/ -RUN ldconfig +ENV LD_LIBRARY_PATH=/usr/local/lib COPY --chown=${USERNAME}:${USERNAME} --from=builder /agent-server /agent-server ENTRYPOINT ["/agent-server/.venv/bin/python", "-m", "openhands.agent_server"] From 5020fb3541654dd964d591a8c1b43f7e90c2e181 Mon Sep 17 00:00:00 2001 From: Debug Agent Date: Thu, 26 Mar 2026 18:35:07 -0300 Subject: [PATCH 3/3] Increase api_timeout for swebenchmultimodal to 300s The default api_timeout of 60s is too short for the ACP agent lazy initialization that happens on the first send_message() call. For large repo images (wp-calypso, p5.js, marked), the ACP subprocess startup consistently exceeds 60s, causing 100% failure rates even at resource_factor=8. Raising to 300s (configurable via REMOTE_API_TIMEOUT env var) gives the agent-server enough time to complete lazy init for large images. Co-Authored-By: Claude Opus 4.6 --- benchmarks/swebenchmultimodal/run_infer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchmarks/swebenchmultimodal/run_infer.py b/benchmarks/swebenchmultimodal/run_infer.py index 1485aa98a..62a94cf0f 100644 --- a/benchmarks/swebenchmultimodal/run_infer.py +++ b/benchmarks/swebenchmultimodal/run_infer.py @@ -207,6 +207,7 @@ def prepare_workspace( f"(tag prefix: {IMAGE_TAG_PREFIX}, resource_factor: {resource_factor})" ) startup_timeout = float(os.getenv("REMOTE_RUNTIME_STARTUP_TIMEOUT", "600")) + api_timeout = float(os.getenv("REMOTE_API_TIMEOUT", "300")) workspace = APIRemoteWorkspace( runtime_api_url=os.getenv( "RUNTIME_API_URL", "https://runtime.eval.all-hands.dev" @@ -215,6 +216,7 @@ def prepare_workspace( server_image=agent_server_image, init_timeout=startup_timeout, startup_wait_timeout=startup_timeout, + api_timeout=api_timeout, target_type="source" if "source" in build_target else "binary", forward_env=forward_env or [], resource_factor=resource_factor,