Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ repos:

- id: pyright-check
name: Pyright (nemoclaw-blueprint)
entry: bash -c 'cd nemoclaw-blueprint && uv run --with pyright pyright .'
entry: bash -c 'cd nemoclaw-blueprint && uv run --with pyright --with pytest pyright .'
language: system
pass_filenames: false
always_run: true
Expand Down
280 changes: 176 additions & 104 deletions bin/lib/nim.js
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,38 +1,92 @@
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// NIM container management — pull, start, stop, health-check NIM images.
#!/usr/bin/env node

const { run, runCapture, shellQuote } = require("./runner");
const nimImages = require("./nim-images.json");
import { spawnSync } from "node:child_process";
import { fileURLToPath } from "node:url";

function containerName(sandboxName) {
return `nemoclaw-nim-${sandboxName}`;
function shellQuote(s) {
return `'${String(s).replace(/'/g, `'\\''`)}'`;
}

function getImageForModel(modelName) {
const entry = nimImages.models.find((m) => m.name === modelName);
return entry ? entry.image : null;
function run(cmd, opts = {}) {
const res = spawnSync(cmd, {
shell: true,
stdio: "inherit",
env: process.env,
...opts,
});
if (res.error) throw res.error;
if (res.status !== 0 && !opts.ignoreError) {
process.exit(res.status ?? 1);
}
return res.status ?? 0;
}

function listModels() {
return nimImages.models.map((m) => ({
name: m.name,
image: m.image,
minGpuMemoryMB: m.minGpuMemoryMB,
}));
function runCapture(cmd, opts = {}) {
const res = spawnSync(cmd, {
shell: true,
encoding: "utf8",
stdio: ["ignore", "pipe", "pipe"],
env: process.env,
...opts,
});
if (res.error) throw res.error;
if (res.status !== 0 && !opts.ignoreError) {
throw new Error((res.stderr || "").trim() || `Command failed: ${cmd}`);
}
return (res.stdout || "").trim();
}

function detectGpu() {
// Try NVIDIA first — query VRAM
function isJetsonPlatform() {
try {
const nvTegra = runCapture("test -f /etc/nv_tegra_release && echo yes", {
ignoreError: true,
});
if (nvTegra && nvTegra.trim() === "yes") return true;
} catch {}

try {
const compat = runCapture("tr '\\0' '\\n' < /proc/device-tree/compatible", {
ignoreError: true,
});
if (compat && /nvidia,tegra|nvidia,thor/i.test(compat)) return true;
} catch {}

try {
const model = runCapture("tr '\\0' '\\n' < /proc/device-tree/model", {
ignoreError: true,
});
if (model && /jetson|thor|nvidia/i.test(model)) return true;
} catch {}

return false;
}

export function detectGpu() {
function getSystemMemoryMB() {
try {
const memLine = runCapture("awk '/MemTotal:/ {print $2}' /proc/meminfo", {
ignoreError: true,
});
if (memLine) {
const memKB = parseInt(memLine.trim(), 10);
if (!isNaN(memKB) && memKB > 0) return Math.floor(memKB / 1024);
}
} catch {}
return 0;
}

// 1) Standard NVIDIA path — query VRAM with nvidia-smi
try {
const output = runCapture(
"nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits",
{ ignoreError: true }
);
if (output) {
const lines = output.split("\n").filter((l) => l.trim());
const perGpuMB = lines.map((l) => parseInt(l.trim(), 10)).filter((n) => !isNaN(n));
const perGpuMB = lines
.map((l) => parseInt(l.trim(), 10))
.filter((n) => !isNaN(n) && n > 0);

if (perGpuMB.length > 0) {
const totalMemoryMB = perGpuMB.reduce((a, b) => a + b, 0);
return {
Expand All @@ -46,37 +100,73 @@ function detectGpu() {
}
} catch {}

// Fallback: DGX Spark (GB10) — VRAM not queryable due to unified memory architecture
// 2) Jetson / Thor fallback
try {
if (process.platform === "linux" && isJetsonPlatform()) {
let gpuCount = 1;

try {
const listOutput = runCapture("nvidia-smi -L", { ignoreError: true });
if (listOutput) {
const lines = listOutput.split("\n").filter((l) => l.trim());
if (lines.length > 0) gpuCount = lines.length;
}
} catch {}

const totalMemoryMB = Math.floor(getSystemMemoryMB() / 2);

if (totalMemoryMB > 0) {
return {
type: "nvidia",
name: "NVIDIA Jetson",
count: gpuCount,
totalMemoryMB,
perGpuMB: Math.floor(totalMemoryMB / gpuCount),
nimCapable: true,
jetson: true,
unifiedMemory: true,
};
}

return {
type: "nvidia",
name: "NVIDIA Jetson",
count: gpuCount,
totalMemoryMB: 0,
perGpuMB: 0,
nimCapable: false,
jetson: true,
unifiedMemory: true,
};
}
} catch {}

// 3) Fallback: DGX Spark (GB10)
try {
const nameOutput = runCapture(
"nvidia-smi --query-gpu=name --format=csv,noheader,nounits",
{ ignoreError: true }
);
if (nameOutput && nameOutput.includes("GB10")) {
// GB10 has 128GB unified memory shared with Grace CPU — use system RAM
let totalMemoryMB = 0;
try {
const memLine = runCapture("free -m | awk '/Mem:/ {print $2}'", { ignoreError: true });
if (memLine) totalMemoryMB = parseInt(memLine.trim(), 10) || 0;
} catch {}
const totalMemoryMB = getSystemMemoryMB();
return {
type: "nvidia",
count: 1,
totalMemoryMB,
perGpuMB: totalMemoryMB,
nimCapable: true,
spark: true,
unifiedMemory: true,
};
}
} catch {}

// macOS: detect Apple Silicon or discrete GPU
// 4) macOS: detect Apple Silicon or discrete GPU
if (process.platform === "darwin") {
try {
const spOutput = runCapture(
"system_profiler SPDisplaysDataType 2>/dev/null",
{ ignoreError: true }
);
const spOutput = runCapture("system_profiler SPDisplaysDataType", {
ignoreError: true,
});
if (spOutput) {
const chipMatch = spOutput.match(/Chipset Model:\s*(.+)/);
const vramMatch = spOutput.match(/VRAM.*?:\s*(\d+)\s*(MB|GB)/i);
Expand All @@ -90,10 +180,13 @@ function detectGpu() {
memoryMB = parseInt(vramMatch[1], 10);
if (vramMatch[2].toUpperCase() === "GB") memoryMB *= 1024;
} else {
// Apple Silicon shares system RAM — read total memory
try {
const memBytes = runCapture("sysctl -n hw.memsize", { ignoreError: true });
if (memBytes) memoryMB = Math.floor(parseInt(memBytes, 10) / 1024 / 1024);
const memBytes = runCapture("sysctl -n hw.memsize", {
ignoreError: true,
});
if (memBytes) {
memoryMB = Math.floor(parseInt(memBytes, 10) / 1024 / 1024);
}
} catch {}
}

Expand All @@ -114,97 +207,76 @@ function detectGpu() {
return null;
}

function pullNimImage(model) {
const image = getImageForModel(model);
if (!image) {
console.error(` Unknown model: ${model}`);
process.exit(1);
}
console.log(` Pulling NIM image: ${image}`);
run(`docker pull ${shellQuote(image)}`);
return image;
function containerName(sandboxName) {
return `nim-${sandboxName}`;
}

function getImageForModel(model) {
const map = {
llama: "nvcr.io/nim/meta/llama-3.1-8b-instruct:latest",
mistral: "nvcr.io/nim/mistralai/mistral-7b-instruct-v0.3:latest",
};
return map[model] || null;
}

function startNimContainer(sandboxName, model, port = 8000) {
export function startNimContainer(sandboxName, model, port = 8000) {
const name = containerName(sandboxName);
const image = getImageForModel(model);
if (!image) {
console.error(` Unknown model: ${model}`);
process.exit(1);
}

// Stop any existing container with same name
const qn = shellQuote(name);
run(`docker rm -f ${qn} 2>/dev/null || true`, { ignoreError: true });

// Stop any existing container with same name
run(`docker rm -f ${qn} || true`, { ignoreError: true });

console.log(` Starting NIM container: ${name}`);

const runtimeArgs = isJetsonPlatform() ? "--runtime nvidia " : "";
run(
`docker run -d --gpus all -p ${Number(port)}:8000 --name ${qn} --shm-size 16g ${shellQuote(image)}`
`docker run -d --gpus all ${runtimeArgs}-p ${Number(port)}:8000 --name ${qn} --shm-size 16g ${shellQuote(image)}`
);

return name;
}

function waitForNimHealth(port = 8000, timeout = 300) {
const start = Date.now();
const interval = 5000;
const safePort = Number(port);
console.log(` Waiting for NIM health on port ${safePort} (timeout: ${timeout}s)...`);

while ((Date.now() - start) / 1000 < timeout) {
try {
const result = runCapture(`curl -sf http://localhost:${safePort}/v1/models`, {
ignoreError: true,
});
if (result) {
console.log(" NIM is healthy.");
return true;
}
} catch {}
// Synchronous sleep via spawnSync
require("child_process").spawnSync("sleep", ["5"]);
}
console.error(` NIM did not become healthy within ${timeout}s.`);
return false;
function usage() {
console.log(`Usage:
nim.js detect-gpu
nim.js start <sandboxName> <model> [port]`);
}

function stopNimContainer(sandboxName) {
const name = containerName(sandboxName);
const qn = shellQuote(name);
console.log(` Stopping NIM container: ${name}`);
run(`docker stop ${qn} 2>/dev/null || true`, { ignoreError: true });
run(`docker rm ${qn} 2>/dev/null || true`, { ignoreError: true });
}
function main() {
const [, , cmd, ...args] = process.argv;

function nimStatus(sandboxName) {
const name = containerName(sandboxName);
try {
const state = runCapture(
`docker inspect --format '{{.State.Status}}' ${shellQuote(name)} 2>/dev/null`,
{ ignoreError: true }
);
if (!state) return { running: false, container: name };
switch (cmd) {
case "detect-gpu": {
const gpu = detectGpu();
console.log(JSON.stringify(gpu, null, 2));
break;
}

let healthy = false;
if (state === "running") {
const health = runCapture(`curl -sf http://localhost:8000/v1/models 2>/dev/null`, {
ignoreError: true,
});
healthy = !!health;
case "start": {
const [sandboxName, model, port] = args;
if (!sandboxName || !model) {
usage();
process.exit(1);
}
startNimContainer(sandboxName, model, port ? Number(port) : 8000);
break;
}
return { running: state === "running", healthy, container: name, state };
} catch {
return { running: false, container: name };

default:
usage();
process.exit(1);
}
}

module.exports = {
containerName,
getImageForModel,
listModels,
detectGpu,
pullNimImage,
startNimContainer,
waitForNimHealth,
stopNimContainer,
nimStatus,
};
const isMain =
process.argv[1] && fileURLToPath(import.meta.url) === process.argv[1];

if (isMain) {
main();
}
Loading