diff --git a/src/lib/adapters/docker/index.ts b/src/lib/adapters/docker/index.ts index 9e726a0577..fc6d4250ba 100644 --- a/src/lib/adapters/docker/index.ts +++ b/src/lib/adapters/docker/index.ts @@ -5,6 +5,7 @@ export * from "./run"; export * from "./exec"; export * from "./pull"; export * from "./info"; +export * from "./runtime"; export * from "./inspect"; export * from "./image"; export * from "./container"; diff --git a/src/lib/adapters/docker/runtime.test.ts b/src/lib/adapters/docker/runtime.test.ts new file mode 100644 index 0000000000..6356ea2124 --- /dev/null +++ b/src/lib/adapters/docker/runtime.test.ts @@ -0,0 +1,58 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; + +vi.mock("../../runner", () => ({ + ROOT: "/repo/root", + run: vi.fn(), + runCapture: vi.fn(), +})); + +import { + DOCKER_INFO_RUNTIME_PROBE_ATTEMPTS, + DOCKER_INFO_RUNTIME_PROBE_TIMEOUT_MS, + detectContainerRuntimeFromDockerInfo, +} from "./runtime"; + +describe("docker runtime detection", () => { + it("retries indeterminate docker info output before returning a runtime", () => { + const calls: unknown[] = []; + const outputs = ["", "", "Operating System: Docker Desktop"]; + + const runtime = detectContainerRuntimeFromDockerInfo({ + dockerInfoImpl: (opts) => { + calls.push(opts); + return outputs.shift() ?? ""; + }, + }); + + expect(runtime).toBe("docker-desktop"); + expect(calls).toHaveLength(DOCKER_INFO_RUNTIME_PROBE_ATTEMPTS); + expect(calls).toEqual( + Array.from({ length: DOCKER_INFO_RUNTIME_PROBE_ATTEMPTS }, () => ({ + ignoreError: true, + timeout: DOCKER_INFO_RUNTIME_PROBE_TIMEOUT_MS, + })), + ); + }); + + it("returns unknown after all attempts are indeterminate", () => { + const calls: unknown[] = []; + + const runtime = detectContainerRuntimeFromDockerInfo({ + attempts: 2, + dockerInfoImpl: (opts) => { + calls.push(opts); + return ""; + }, + timeoutMs: 1234, + }); + + expect(runtime).toBe("unknown"); + expect(calls).toEqual([ + { ignoreError: true, timeout: 1234 }, + { ignoreError: true, timeout: 1234 }, + ]); + }); +}); diff --git a/src/lib/adapters/docker/runtime.ts b/src/lib/adapters/docker/runtime.ts new file mode 100644 index 0000000000..eca9cb29e2 --- /dev/null +++ b/src/lib/adapters/docker/runtime.ts @@ -0,0 +1,33 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import type { ContainerRuntime } from "../../platform"; +import { inferContainerRuntime } from "../../platform"; +import { dockerInfo } from "./info"; +import type { DockerCaptureOptions } from "./run"; + +export const DOCKER_INFO_RUNTIME_PROBE_ATTEMPTS = 3; +export const DOCKER_INFO_RUNTIME_PROBE_TIMEOUT_MS = 5000; + +type DockerInfoProbe = (opts: DockerCaptureOptions) => string; + +export interface DetectContainerRuntimeOptions { + attempts?: number; + dockerInfoImpl?: DockerInfoProbe; + timeoutMs?: number; +} + +export function detectContainerRuntimeFromDockerInfo( + opts: DetectContainerRuntimeOptions = {}, +): ContainerRuntime { + const attempts = Math.max(1, Math.floor(opts.attempts ?? DOCKER_INFO_RUNTIME_PROBE_ATTEMPTS)); + const timeout = Math.max(1, Math.floor(opts.timeoutMs ?? DOCKER_INFO_RUNTIME_PROBE_TIMEOUT_MS)); + const probe = opts.dockerInfoImpl ?? dockerInfo; + + for (let attempt = 0; attempt < attempts; attempt++) { + const runtime = inferContainerRuntime(probe({ ignoreError: true, timeout })); + if (runtime !== "unknown") return runtime; + } + + return "unknown"; +} diff --git a/src/lib/adapters/http/probe.test.ts b/src/lib/adapters/http/probe.test.ts index 275e147952..5224b8cc6f 100644 --- a/src/lib/adapters/http/probe.test.ts +++ b/src/lib/adapters/http/probe.test.ts @@ -186,6 +186,30 @@ describe("http-probe helpers", () => { expect(result.message).toContain("curl failed"); expect(result.stderr).toContain("spawn ENOENT"); }); + + it("reports spawnSync ETIMEDOUT as a timeout status", () => { + const result = runCurlProbe(["-sS", "https://example.test/models"], { + spawnSyncImpl: () => { + const error = Object.assign(new Error("spawnSync curl ETIMEDOUT"), { + code: "ETIMEDOUT", + errno: -60, + }); + return { + pid: 1, + output: [], + stdout: "", + stderr: "", + status: null, + signal: null, + error, + }; + }, + }); + + expect(result.ok).toBe(false); + expect(result.curlStatus).toBe(-110); + expect(result.message).toContain("ETIMEDOUT"); + }); }); describe("runChatCompletionsStreamingProbe", () => { @@ -237,6 +261,33 @@ describe("runChatCompletionsStreamingProbe", () => { expect(result.curlStatus).toBe(28); }); + it("reports chat streaming spawnSync ETIMEDOUT as a timeout status", () => { + const result = runChatCompletionsStreamingProbe( + ["-sS", "--max-time", "120", "https://example.test/v1/chat/completions"], + { + spawnSyncImpl: () => { + const error = Object.assign(new Error("spawnSync curl ETIMEDOUT"), { + code: "ETIMEDOUT", + errno: -60, + }); + return { + pid: 1, + output: [], + stdout: "", + stderr: "", + status: null, + signal: null, + error, + }; + }, + }, + ); + + expect(result.ok).toBe(false); + expect(result.curlStatus).toBe(-110); + expect(result.message).toContain("ETIMEDOUT"); + }); + it("does not treat a lone DONE frame as successful streaming data", () => { const result = runChatCompletionsStreamingProbe( ["-sS", "--max-time", "120", "https://example.test/v1/chat/completions"], @@ -390,6 +441,39 @@ describe("runStreamingEventProbe", () => { expect(result.message).toContain("Streaming probe failed"); }); + it("records normalized timeout status for responses streaming spawnSync ETIMEDOUT", () => { + withTraceFile((traceFile) => { + const result = runStreamingEventProbe(["-sS", "https://example.test/v1/responses"], { + spawnSyncImpl: () => { + const error = Object.assign(new Error("spawnSync curl ETIMEDOUT"), { + code: "ETIMEDOUT", + errno: -60, + }); + return { + pid: 1, + output: [], + stdout: "", + stderr: "", + status: null, + signal: null, + error, + }; + }, + }); + + expect(result.ok).toBe(false); + flushTrace(); + const artifact = JSON.parse(fs.readFileSync(traceFile, "utf8")) as TraceArtifact; + const span = artifact.resource_spans[0].scope_spans[0].spans.find( + (entry) => entry.name === "nemoclaw.inference.curl_streaming_event_probe", + ); + expect(span?.events[0].attributes).toMatchObject({ + ok: false, + curl_status: -110, + }); + }); + }); + it("cleans up temp files after probe", () => { let outputPath = ""; runStreamingEventProbe(["-sS", "--max-time", "15", "https://example.test/v1/responses"], { diff --git a/src/lib/adapters/http/probe.ts b/src/lib/adapters/http/probe.ts index 8fbc224195..be5b7c2929 100644 --- a/src/lib/adapters/http/probe.ts +++ b/src/lib/adapters/http/probe.ts @@ -103,6 +103,14 @@ function resolveCurlProcessTimeoutMs(argv: string[], opts: CurlProbeOptions): nu ); } +function normalizeSpawnErrorCode(error: unknown): number { + if (isErrnoException(error) && error.code === "ETIMEDOUT") return -110; + const rawErrorCode = isErrnoException(error) + ? (error.errno ?? error.code) + : undefined; + return typeof rawErrorCode === "number" ? rawErrorCode : 1; +} + function sanitizeCurlUrl(value: string): string { try { const url = new URL(value); @@ -220,10 +228,7 @@ function runCurlProbeImpl(argv: string[], opts: CurlProbeOptions = {}): CurlProb ); const body = fs.existsSync(bodyFile) ? fs.readFileSync(bodyFile, "utf8") : ""; if (result.error) { - const rawErrorCode = isErrnoException(result.error) - ? (result.error.errno ?? result.error.code) - : undefined; - const errorCode = typeof rawErrorCode === "number" ? rawErrorCode : 1; + const errorCode = normalizeSpawnErrorCode(result.error); const errorMessage = compactText( `${result.error.message || String(result.error)} ${String(result.stderr || "")}`, ); @@ -335,10 +340,7 @@ function runChatCompletionsStreamingProbeImpl( const body = fs.existsSync(bodyFile) ? fs.readFileSync(bodyFile, "utf8") : ""; if (result.error) { - const rawErrorCode = isErrnoException(result.error) - ? (result.error.errno ?? result.error.code) - : undefined; - const errorCode = typeof rawErrorCode === "number" ? rawErrorCode : 1; + const errorCode = normalizeSpawnErrorCode(result.error); const errorMessage = compactText( `${result.error.message || String(result.error)} ${String(result.stderr || "")}`, ); @@ -457,13 +459,14 @@ function runStreamingEventProbeImpl( if (result.error || (result.status !== null && result.status !== 0 && result.status !== 28)) { // curl exit 28 = timeout, which is expected — we cap with --max-time // and may still have collected enough events before the timeout. + const curlStatus = result.error ? normalizeSpawnErrorCode(result.error) : (result.status ?? 1); const detail = result.error ? String(result.error.message || result.error) : String(result.stderr || ""); emitCurlResultTraceEvent({ ok: false, missing_events_count: REQUIRED_STREAMING_EVENTS.length, - curl_status: result.status ?? 1, + curl_status: curlStatus, }); return { ok: false, diff --git a/src/lib/inference/local.ts b/src/lib/inference/local.ts index e138e0ac81..26eaed18f6 100644 --- a/src/lib/inference/local.ts +++ b/src/lib/inference/local.ts @@ -11,7 +11,6 @@ import os from "node:os"; import nodePath from "node:path"; import type { CurlProbeResult } from "../adapters/http/probe"; import { runCurlProbe } from "../adapters/http/probe"; -import type { ContainerRuntime } from "../platform"; import type { CaptureResult } from "../runner"; import { buildSubprocessEnv } from "../subprocess-env"; import { @@ -39,12 +38,11 @@ import { SMALLEST_OLLAMA_MODEL_TAG, } from "./ollama-model-registry"; -const { containerCanReachHostLoopback, inferContainerRuntime, isWsl } = require("../platform"); -const { dockerInfo } = require("../adapters/docker/info"); +const { containerCanReachHostLoopback, isWsl } = require("../platform"); +const { detectContainerRuntimeFromDockerInfo } = + require("../adapters/docker/runtime") as typeof import("../adapters/docker/runtime"); const { detectNvidiaPlatform } = require("./nim"); -const DOCKER_INFO_RUNTIME_PROBE_TIMEOUT_MS = 1500; - /** * Port containers use to reach Ollama. Returns the raw Ollama port when the * container can reach the host's 127.0.0.1 directly (Docker Desktop on WSL), @@ -54,9 +52,7 @@ const DOCKER_INFO_RUNTIME_PROBE_TIMEOUT_MS = 1500; let _ollamaContainerPort: number | null = null; export function getOllamaContainerPort(): number { if (_ollamaContainerPort !== null) return _ollamaContainerPort; - const runtime = inferContainerRuntime( - dockerInfo({ ignoreError: true, timeout: DOCKER_INFO_RUNTIME_PROBE_TIMEOUT_MS }), - ) as ContainerRuntime; + const runtime = detectContainerRuntimeFromDockerInfo(); _ollamaContainerPort = containerCanReachHostLoopback(runtime) ? OLLAMA_PORT : OLLAMA_PROXY_PORT; return _ollamaContainerPort; } diff --git a/src/lib/inference/onboard-probes.test.ts b/src/lib/inference/onboard-probes.test.ts index 26ee173d6f..551f10fc46 100644 --- a/src/lib/inference/onboard-probes.test.ts +++ b/src/lib/inference/onboard-probes.test.ts @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 +import { spawnSync } from "node:child_process"; import fs from "node:fs"; import os from "node:os"; import path from "node:path"; @@ -597,6 +598,70 @@ exit 0 } }); + it("preserves query-param auth on doubled-timeout chat-completions retry", () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-query-retry-probe-")); + const fakeBin = path.join(tmpDir, "bin"); + const counter = path.join(tmpDir, "counter"); + fs.mkdirSync(fakeBin, { recursive: true }); + fs.writeFileSync(counter, "0"); + fs.writeFileSync( + path.join(fakeBin, "curl"), + `#!/usr/bin/env bash +outfile="" +n=$(cat "${counter}") +n=$((n + 1)) +echo "$n" > "${counter}" +printf '%s\\n' "$@" > "${tmpDir}/args-$n.txt" +while [ "$#" -gt 0 ]; do + case "$1" in + -o) outfile="$2"; shift 2 ;; + -w) shift 2 ;; + *) shift ;; + esac +done +if [ "$n" -eq 1 ]; then + if [ -n "$outfile" ]; then + : > "$outfile" + fi + printf '000' + exit 28 +fi +if [ -n "$outfile" ]; then + cat <<'JSON' > "$outfile" +{"choices":[{"message":{"content":"OK"}}]} +JSON +fi +printf '200' +exit 0 +`, + { mode: 0o755 }, + ); + + const originalPath = process.env.PATH; + process.env.PATH = `${fakeBin}:${originalPath || ""}`; + try { + const result = probeOpenAiLikeEndpoint( + "https://api.example.com/v1", + "test-model", + "secret key", + { skipResponsesProbe: true, authMode: "query-param" }, + ); + + expect(result).toMatchObject({ ok: true, api: "openai-completions" }); + expect(fs.readFileSync(counter, "utf8").trim()).toBe("2"); + for (const call of ["1", "2"]) { + const args = fs.readFileSync(path.join(tmpDir, `args-${call}.txt`), "utf8"); + expect(args).toContain( + "https://api.example.com/v1/chat/completions?key=secret%20key", + ); + expect(args).not.toContain("Authorization: Bearer"); + } + } finally { + process.env.PATH = originalPath; + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + it("keeps timeout retries strict when chat-completions tool calling is required", () => { const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-strict-retry-probe-")); const fakeBin = path.join(tmpDir, "bin"); @@ -660,6 +725,80 @@ exit 0 } }); + it("retries strict tool-call validation after the parent curl process times out", () => { + const repoRoot = path.join(import.meta.dirname, "../../.."); + const onboardProbePath = JSON.stringify( + path.join(repoRoot, "dist", "lib", "inference", "onboard-probes.js"), + ); + const httpProbePath = JSON.stringify( + path.join(repoRoot, "dist", "lib", "adapters", "http", "probe.js"), + ); + const script = ` +const httpProbe = require(${httpProbePath}); +let calls = 0; +const timeoutMs = []; +httpProbe.runCurlProbe = (_args, opts = {}) => { + calls += 1; + timeoutMs.push(opts.timeoutMs ?? null); + if (calls === 1) { + return { + ok: false, + httpStatus: 0, + curlStatus: -110, + body: "", + stderr: "spawnSync curl ETIMEDOUT", + message: "curl failed (exit -110): spawnSync curl ETIMEDOUT", + }; + } + return { + ok: true, + httpStatus: 200, + curlStatus: 0, + body: JSON.stringify({ + choices: [ + { + message: { + tool_calls: [ + { + type: "function", + function: { + name: "sessions_send", + arguments: { message: "hello" }, + }, + }, + ], + }, + }, + ], + }), + stderr: "", + message: "HTTP 200", + }; +}; +const probes = require(${onboardProbePath}); +const result = probes.probeOpenAiLikeEndpoint( + "https://api.example.com/v1", + "test-model", + null, + { skipResponsesProbe: true, requireChatCompletionsToolCalling: true }, +); +process.stdout.write(JSON.stringify({ result, calls, timeoutMs })); +`; + + const run = spawnSync(process.execPath, ["-e", script], { + cwd: repoRoot, + encoding: "utf8", + }); + + expect(run.status).toBe(0); + const payload = JSON.parse(run.stdout); + expect(payload.result).toMatchObject({ ok: true, api: "openai-completions" }); + expect(payload.calls).toBe(2); + expect(payload.timeoutMs).toHaveLength(2); + expect(payload.timeoutMs[0]).toBeGreaterThan(0); + expect(payload.timeoutMs[1]).toBeGreaterThan(payload.timeoutMs[0]); + }); + it("keeps retrying when initial timeout is followed by a transient 502", () => { const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-timeout-502-probe-")); const fakeBin = path.join(tmpDir, "bin"); diff --git a/src/lib/inference/onboard-probes.ts b/src/lib/inference/onboard-probes.ts index c11790bfa7..b77655ca8c 100644 --- a/src/lib/inference/onboard-probes.ts +++ b/src/lib/inference/onboard-probes.ts @@ -33,6 +33,8 @@ const EXTENDED_NVIDIA_ENDPOINT_VALIDATION_MODELS = new Set([ "qwen/qwen3.5-397b-a17b", "deepseek-ai/deepseek-v4-flash", ]); +const CURL_TIMEOUT_STATUS = 28; +const NODE_SPAWN_TIMEOUT_STATUS = -110; // Hostnames that are normally meant for the sandbox/container host boundary. // host.openshell.internal only resolves inside the OpenShell sandbox network, @@ -254,8 +256,22 @@ function shouldRetryHttpProbe(result) { ); } -function isCurlTimeout(result) { - return result && !result.ok && result.curlStatus === 28; +function isProbeTimeout(result) { + return ( + result && + !result.ok && + (result.curlStatus === CURL_TIMEOUT_STATUS || + result.curlStatus === NODE_SPAWN_TIMEOUT_STATUS) + ); +} + +function isTimeoutOrConnFailureStatus(curlStatus) { + return ( + curlStatus === CURL_TIMEOUT_STATUS || + curlStatus === NODE_SPAWN_TIMEOUT_STATUS || + curlStatus === 6 || + curlStatus === 7 + ); } function executeProbeWithHttpRetry(probe) { @@ -694,7 +710,7 @@ function probeOpenAiLikeEndpoint(endpointUrl, model, apiKey, options = {}) { if ( probe.api === "openai-completions" && isDeepSeekV4ProModel(model) && - isCurlTimeout(result) + isProbeTimeout(result) ) { const warning = "DeepSeek V4 Pro validation timed out before the stream returned data; continuing with NVIDIA Endpoints because this model can take longer than the onboarding probe budget to emit its first token."; @@ -725,15 +741,14 @@ function probeOpenAiLikeEndpoint(endpointUrl, model, apiKey, options = {}) { // stack can cause the initial probe to time out before the TLS handshake // completes (#987); hosted providers also occasionally drop connections for // tens of seconds during incidents (#3033). - const isTimeoutOrConnFailure = (cs) => cs === 28 || cs === 6 || cs === 7; const isRetriableProbeResult = (result) => - isTimeoutOrConnFailure(result.curlStatus) || + isTimeoutOrConnFailureStatus(result.curlStatus) || RETRIABLE_HTTP_PROBE_STATUSES.has(result.httpStatus); // Look across every failure entry rather than only failures[0] so a probe // ordering like /responses (HTTP error) followed by /chat/completions // (curl 28) still triggers the chat-completions retry path. let retriedAfterTimeout = false; - if (failures.some((failure) => isTimeoutOrConnFailure(failure.curlStatus))) { + if (failures.some((failure) => isTimeoutOrConnFailureStatus(failure.curlStatus))) { retriedAfterTimeout = true; const platformOptions = typeof options.isWsl === "boolean" ? { isWsl: options.isWsl } : undefined; @@ -744,10 +759,10 @@ function probeOpenAiLikeEndpoint(endpointUrl, model, apiKey, options = {}) { ...doubledArgs, "-H", "Content-Type: application/json", - ...(apiKey ? ["-H", `Authorization: Bearer ${normalizeCredentialValue(apiKey)}`] : []), + ...authHeader, "-d", JSON.stringify(getChatCompletionsProbePayload(model)), - `${String(endpointUrl).replace(/\/+$/, "")}/chat/completions`, + appendKey("/chat/completions"), ]; const runRetryProbe = () => options.requireChatCompletionsToolCalling === true @@ -765,7 +780,7 @@ function probeOpenAiLikeEndpoint(endpointUrl, model, apiKey, options = {}) { } for (const delayMs of HTTP_PROBE_RETRY_DELAYS_MS) { if (!isRetriableProbeResult(retryResult)) break; - const reason = isTimeoutOrConnFailure(retryResult.curlStatus) + const reason = isTimeoutOrConnFailureStatus(retryResult.curlStatus) ? "timed out" : `returned HTTP ${retryResult.httpStatus}`; console.log( diff --git a/src/lib/onboard/local-inference-topology.ts b/src/lib/onboard/local-inference-topology.ts index 0b9db9485a..1da344f28e 100644 --- a/src/lib/onboard/local-inference-topology.ts +++ b/src/lib/onboard/local-inference-topology.ts @@ -1,20 +1,12 @@ // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -import { dockerInfo } from "../adapters/docker/info"; -import { - type ContainerRuntime, - containerCanReachHostLoopback, - inferContainerRuntime, -} from "../platform"; +import { detectContainerRuntimeFromDockerInfo } from "../adapters/docker/runtime"; +import { type ContainerRuntime, containerCanReachHostLoopback } from "../platform"; import { ensureOllamaLoopbackSystemdOverride } from "./ollama-systemd"; -const DOCKER_INFO_RUNTIME_PROBE_TIMEOUT_MS = 1500; - export function getContainerRuntime(): ContainerRuntime { - return inferContainerRuntime( - dockerInfo({ ignoreError: true, timeout: DOCKER_INFO_RUNTIME_PROBE_TIMEOUT_MS }), - ); + return detectContainerRuntimeFromDockerInfo(); } function describeContainerRuntime(runtime: ContainerRuntime): string {