diff --git a/docs/reference/commands.mdx b/docs/reference/commands.mdx index 58b0846d9e..0aa7b0ef66 100644 --- a/docs/reference/commands.mdx +++ b/docs/reference/commands.mdx @@ -45,6 +45,17 @@ Print the installed NemoClaw CLI version. $ nemoclaw --version ``` +### `nemoclaw resources` + +Display host hardware inventory and configured sandbox resource profiles. +Use `--json` for machine-readable CPU, memory, GPU, Kubernetes allocatable-capacity, and profile data. + +```console +$ nemoclaw resources [--json] +``` + +If the gateway is not running, Kubernetes allocatable fields are omitted and host CPU/RAM totals are still shown. + ### `nemoclaw onboard` Run the interactive setup wizard (recommended for new installs). @@ -1257,6 +1268,9 @@ These flags toggle optional behaviors during onboarding; set them before running | `NEMOCLAW_OVERLAY_SNAPSHOTTER` | snapshotter name | Selects the containerd overlay snapshotter for sandbox builds. Empty (default) preserves containerd's choice. | | `NEMOCLAW_SKIP_TELEGRAM_REACHABILITY` | `1` to enable | Skips the Telegram bot reachability probe during onboard (useful in restricted networks). | | `NEMOCLAW_CONFIG_ACCEPT_NEW_PATH` | `1` to enable | Accepts a new sandbox config path without an interactive prompt when the stored path differs from the discovered one. | +| `NEMOCLAW_RESOURCE_PROFILE` | profile name or `default` | Selects a sandbox CPU/RAM resource profile from the blueprint during onboarding. `default` means no resource preference, so NemoClaw passes no OpenShell CPU or memory flags. Unknown names fail fast. | +| `NEMOCLAW_CPU` | percentage or Kubernetes CPU quantity | Overrides the selected profile's CPU size passed to OpenShell `--cpu`. Percentages resolve against detected capacity. | +| `NEMOCLAW_RAM` | percentage or Kubernetes memory quantity | Overrides the selected profile's memory size passed to OpenShell `--memory`. Percentages resolve against detected capacity. | | `NEMOCLAW_SANDBOX_GPU` | `auto`, `1`, or `0` | Controls sandbox GPU passthrough during onboarding. `auto` enables GPU passthrough when an NVIDIA GPU is detected, `1` requires GPU passthrough, and `0` forces CPU-only sandbox creation. | | `NEMOCLAW_SANDBOX_GPU_DEVICE` | OpenShell GPU device selector | Selects the GPU device passed with `openshell sandbox create --gpu-device`. Requires explicit sandbox GPU enablement with `NEMOCLAW_SANDBOX_GPU=1` (or `--sandbox-gpu` for CLI-driven onboarding); otherwise onboarding rejects the selector instead of treating it as an implicit opt-in. | | `NEMOCLAW_DOCKER_GPU_PATCH` | `0` to disable, anything else to keep the default | Controls the Linux Docker-driver GPU sandbox compatibility patch. Set to `0` only as an escape hatch when the patch fails and you need onboarding to continue without patching the GPU sandbox container. | diff --git a/nemoclaw-blueprint/blueprint.yaml b/nemoclaw-blueprint/blueprint.yaml index 9faae92a98..8b8c91ef88 100644 --- a/nemoclaw-blueprint/blueprint.yaml +++ b/nemoclaw-blueprint/blueprint.yaml @@ -35,6 +35,19 @@ components: name: "openclaw" forward_ports: - 18789 + resource_profiles: + creator: + cpu: "50%" + memory: "50%" + gamer: + cpu: "25%" + memory: "25%" + game-developer: + cpu: "60%" + memory: "60%" + developer: + cpu: "75%" + memory: "75%" inference: profiles: diff --git a/schemas/blueprint.schema.json b/schemas/blueprint.schema.json index ad4f498b89..8627d11de5 100644 --- a/schemas/blueprint.schema.json +++ b/schemas/blueprint.schema.json @@ -62,6 +62,19 @@ "forward_ports": { "type": "array", "items": { "type": "integer", "minimum": 1, "maximum": 65535 } + }, + "resource_profiles": { + "type": "object", + "description": "Named resource profiles for sandbox CPU/memory sizing. Values can be absolute Kubernetes quantities (e.g. '4', '8Gi') or percentages of detected hardware (e.g. '25%').", + "additionalProperties": { + "type": "object", + "required": ["cpu", "memory"], + "properties": { + "cpu": { "type": "string", "minLength": 1, "pattern": "^([1-9]\\d?%|100%|\\d+(\\.\\d+)?[mKMGTPE]?i?)$" }, + "memory": { "type": "string", "minLength": 1, "pattern": "^([1-9]\\d?%|100%|\\d+(\\.\\d+)?[mKMGTPE]?i?)$" } + }, + "additionalProperties": false + } } } }, diff --git a/src/commands/resources.test.ts b/src/commands/resources.test.ts new file mode 100644 index 0000000000..a39a888dad --- /dev/null +++ b/src/commands/resources.test.ts @@ -0,0 +1,32 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { beforeEach, describe, expect, it, vi } from "vitest"; + +import ResourcesCommand from "../../dist/commands/resources.js"; + +const rootDir = process.cwd(); + +describe("ResourcesCommand", () => { + beforeEach(() => { + vi.restoreAllMocks(); + }); + + it("returns the hardware resource object in JSON mode", async () => { + const result = await ResourcesCommand.run(["--json"], rootDir); + expect(result).toEqual(expect.objectContaining({ + cpu: expect.objectContaining({ cores: expect.any(Number), model: expect.any(String) }), + memory: expect.objectContaining({ totalMB: expect.any(Number), swapMB: expect.any(Number) }), + })); + }); + + it("prints human-readable output without returning data in text mode", async () => { + const logSpy = vi.spyOn(console, "log").mockImplementation(() => {}); + try { + await expect(ResourcesCommand.run([], rootDir)).resolves.toBeUndefined(); + expect(logSpy).toHaveBeenCalledWith(" Hardware Resources"); + } finally { + logSpy.mockRestore(); + } + }); +}); diff --git a/src/commands/resources.ts b/src/commands/resources.ts new file mode 100644 index 0000000000..ac47783933 --- /dev/null +++ b/src/commands/resources.ts @@ -0,0 +1,26 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { NemoClawCommand } from "../lib/cli/nemoclaw-oclif-command"; +import { getHardwareResources, printHardwareResources } from "../lib/resources-cmd"; + +export default class ResourcesCommand extends NemoClawCommand { + static id = "resources"; + static strict = true; + static enableJsonFlag = true; + static summary = "Show hardware inventory (CPU cores, RAM, GPU VRAM)"; + static description = + "Display available hardware resources including CPU core count and model, " + + "total system RAM and swap, Kubernetes node allocatable capacity (when a " + + "gateway is running), and NVIDIA GPU name and VRAM. Supports --json for " + + "machine-readable output."; + static usage = ["resources [--json]"]; + static examples = ["<%= config.bin %> resources", "<%= config.bin %> resources --json"]; + static flags = {}; + + public async run(): Promise { + await this.parse(ResourcesCommand); + if (this.jsonEnabled()) return getHardwareResources(); + printHardwareResources(false); + } +} diff --git a/src/lib/cli/command-display.ts b/src/lib/cli/command-display.ts index 60bf5818b7..0e53ea6291 100644 --- a/src/lib/cli/command-display.ts +++ b/src/lib/cli/command-display.ts @@ -13,6 +13,7 @@ export type CommandGroup = | "Credentials" | "Backup" | "Upgrade" + | "Resources" | "Cleanup"; /** diff --git a/src/lib/cli/command-registry.test.ts b/src/lib/cli/command-registry.test.ts index 9a7d90ae74..275523be1a 100644 --- a/src/lib/cli/command-registry.test.ts +++ b/src/lib/cli/command-registry.test.ts @@ -17,10 +17,10 @@ import { getRegisteredOclifCommandsMetadata } from "./oclif-metadata"; describe("command-registry", () => { describe("COMMANDS array", () => { - it("should contain exactly 61 commands", () => { - // 27 global (21 visible + 6 hidden help/version aliases) + it("should contain exactly 62 commands", () => { + // 28 global (22 visible + 6 hidden help/version aliases) // 34 sandbox (28 visible + 6 hidden shields/config) - expect(COMMANDS).toHaveLength(61); + expect(COMMANDS).toHaveLength(62); }); it("should have no duplicate usage strings", () => { @@ -39,9 +39,9 @@ describe("command-registry", () => { }); describe("globalCommands()", () => { - it("should return exactly 27 entries", () => { - // 21 visible + 6 hidden (help, --help, -h, version, --version, -v) - expect(globalCommands()).toHaveLength(27); + it("should return exactly 28 entries", () => { + // 22 visible + 6 hidden (help, --help, -h, version, --version, -v) + expect(globalCommands()).toHaveLength(28); }); it("every entry has scope global", () => { @@ -65,10 +65,10 @@ describe("command-registry", () => { }); describe("visibleCommands()", () => { - it("should exclude 12 hidden commands (49 visible)", () => { + it("should exclude 12 hidden commands (50 visible)", () => { // 6 hidden global (help, --help, -h, version, --version, -v) + // 6 hidden sandbox (shields×3, config get/set/rotate-token) - expect(visibleCommands()).toHaveLength(49); + expect(visibleCommands()).toHaveLength(50); }); it("no visible command has hidden=true", () => { @@ -171,7 +171,7 @@ describe("command-registry", () => { }); describe("globalCommandTokens()", () => { - it("returns the exact set of 23 tokens matching the global dispatch commands", () => { + it("returns the exact set of 24 tokens matching the global dispatch commands", () => { const tokens = globalCommandTokens(); const expected = new Set([ "onboard", @@ -191,6 +191,7 @@ describe("command-registry", () => { "upgrade-sandboxes", "gc", "inference", + "resources", "help", "version", "--help", @@ -280,6 +281,7 @@ describe("command-registry", () => { "Credentials", "Backup", "Upgrade", + "Resources", "Cleanup", ]); }); diff --git a/src/lib/cli/command-registry.ts b/src/lib/cli/command-registry.ts index 6a4613192a..55f6585d18 100644 --- a/src/lib/cli/command-registry.ts +++ b/src/lib/cli/command-registry.ts @@ -50,6 +50,7 @@ export const GROUP_ORDER: readonly CommandGroup[] = [ "Credentials", "Backup", "Upgrade", + "Resources", "Cleanup", ] as const; diff --git a/src/lib/cli/public-display-defaults.ts b/src/lib/cli/public-display-defaults.ts index bc47165ff6..f1b77b50d2 100644 --- a/src/lib/cli/public-display-defaults.ts +++ b/src/lib/cli/public-display-defaults.ts @@ -110,6 +110,14 @@ const PUBLIC_DISPLAY_LAYOUT: Record = { "hidden": true } ], + "resources": [ + { + "group": "Resources", + "order": 900, + "description": "Show hardware inventory (CPU cores, RAM, GPU VRAM)", + "flags": "[--json]" + } + ], "root:version": [ { "group": "Getting Started", diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index 70e36b13b5..17c180a611 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -48,6 +48,7 @@ const dockerGpuSandboxCreate: typeof import("./onboard/docker-gpu-sandbox-create const dockerDriverGatewayLaunch: typeof import("./onboard/docker-driver-gateway-launch") = require("./onboard/docker-driver-gateway-launch"); const { findReadableNvidiaCdiSpecFiles, parseDockerCdiSpecDirs }: typeof import("./onboard/docker-cdi") = require("./onboard/docker-cdi"); const { buildSandboxGpuCreateArgs, getSandboxReadyTimeoutSecs }: typeof import("./onboard/sandbox-gpu-create") = require("./onboard/sandbox-gpu-create"); +const { appendResourceFlagsForProfile, selectResourceProfileForSandbox }: typeof import("./onboard/resource-profile-selection") = require("./onboard/resource-profile-selection"); const { isValidProxyHost, isValidProxyPort, @@ -535,8 +536,6 @@ const OPENCLAW_LAUNCH_AGENT_PLIST = "~/Library/LaunchAgents/ai.openclaw.gateway. const BRAVE_SEARCH_HELP_URL = "https://brave.com/search/api/"; -// Re-export shared JSON types under the names used throughout this module. -// See src/lib/core/json-types.ts for the canonical definitions. import type { JsonObject as LooseObject, } from "./core/json-types"; @@ -2942,6 +2941,7 @@ async function createSandbox( agent: AgentDefinition | null = null, controlUiPort: number | null = null, sandboxGpuConfig: SandboxGpuConfig | null = null, + resourceProfile: import("./resources-cmd").ResourceProfile | null = null, hermesToolGateways: string[] = [], ) { step(6, 8, "Creating sandbox"); @@ -3531,8 +3531,6 @@ async function createSandbox( }; process.on("exit", cleanupBuildCtx); - // Create sandbox (use -- echo to avoid dropping into interactive shell) - // Pass the base policy so sandbox starts in proxy mode (required for policy updates later) const defaultPolicyPath = path.join( ROOT, "nemoclaw-blueprint", @@ -3606,6 +3604,7 @@ async function createSandbox( }), ]; + appendResourceFlagsForProfile(createArgs, resourceProfile, getOpenshellBinary(), { isNonInteractive, note, prompt, promptOrDefault }); // Create OpenShell providers for messaging credentials so they flow through // the provider/placeholder system instead of raw env vars. The L7 proxy // rewrites Authorization headers (Bearer/Bot) and URL-path segments @@ -7340,6 +7339,7 @@ async function onboard(opts: OnboardOptions = {}): Promise { setupMessagingChannels, readMessagingChannelConfigFromEnv, promptValidatedSandboxName, + selectResourceProfileForSandbox: () => selectResourceProfileForSandbox({ isNonInteractive, note, prompt, promptOrDefault }), stopStaleDashboardListenersForSandbox, listRegistrySandboxes: registry.listSandboxes, createSandbox, diff --git a/src/lib/onboard/machine/handlers/sandbox.test.ts b/src/lib/onboard/machine/handlers/sandbox.test.ts index 01ab264521..52cf8a6db2 100644 --- a/src/lib/onboard/machine/handlers/sandbox.test.ts +++ b/src/lib/onboard/machine/handlers/sandbox.test.ts @@ -11,8 +11,9 @@ type Agent = { displayName?: string } | null; type WebSearchConfig = { fetchEnabled: true }; type MessagingChannelConfig = Record; type SandboxGpuConfig = { sandboxGpuEnabled: boolean; mode: string }; +type ResourceProfile = { cpu: string; memory: string }; -function createDeps(overrides: Partial["deps"]> = {}) { +function createDeps(overrides: Partial["deps"]> = {}) { let session = createSession(); const calls = { note: vi.fn(), @@ -30,6 +31,7 @@ function createDeps(overrides: Partial null), setupMessaging: vi.fn(async () => [] as string[]), promptName: vi.fn(async () => "my-assistant"), + selectResourceProfile: vi.fn(async () => null as ResourceProfile | null), stopStale: vi.fn(), createSandbox: vi.fn(async () => "my-assistant"), updateSandbox: vi.fn(), @@ -72,6 +74,7 @@ function createDeps(overrides: Partial null, promptValidatedSandboxName: calls.promptName, + selectResourceProfileForSandbox: calls.selectResourceProfile, stopStaleDashboardListenersForSandbox: calls.stopStale, listRegistrySandboxes: () => ({ sandboxes: [{ name: "old" }] }), createSandbox: calls.createSandbox, @@ -92,9 +95,9 @@ function createDeps(overrides: Partial["deps"], + deps: SandboxStateOptions["deps"], session: Session | null = createSession(), -): SandboxStateOptions { +): SandboxStateOptions { return { resume: false, fresh: false, @@ -143,6 +146,7 @@ describe("handleSandboxState", () => { null, null, { sandboxGpuEnabled: false, mode: "0" }, + null, [], ); expect(calls.updateSandbox).toHaveBeenCalledWith("my-assistant", expect.objectContaining({ model: "model", provider: "provider" })); @@ -286,6 +290,7 @@ describe("handleSandboxState", () => { null, null, { sandboxGpuEnabled: false, mode: "0" }, + null, [], ); expect(result.webSearchConfig).toBeNull(); diff --git a/src/lib/onboard/machine/handlers/sandbox.ts b/src/lib/onboard/machine/handlers/sandbox.ts index fa3e0eb08e..032fc74276 100644 --- a/src/lib/onboard/machine/handlers/sandbox.ts +++ b/src/lib/onboard/machine/handlers/sandbox.ts @@ -3,7 +3,7 @@ import type { Session, SessionUpdates } from "../../../state/onboard-session"; -export interface SandboxStateOptions { +export interface SandboxStateOptions { resume: boolean; fresh: boolean; resumeAgentChanged: boolean; @@ -57,6 +57,7 @@ export interface SandboxStateOptions; readMessagingChannelConfigFromEnv(): MessagingChannelConfig | null; promptValidatedSandboxName(agent: Agent): Promise; + selectResourceProfileForSandbox(): Promise; stopStaleDashboardListenersForSandbox(sandboxes: unknown[], sandboxName: string): void; listRegistrySandboxes(): { sandboxes: unknown[] }; createSandbox( @@ -71,6 +72,7 @@ export interface SandboxStateOptions; updateSandboxRegistry(sandboxName: string, updates: Record): void; @@ -101,7 +103,7 @@ function sameEffectiveTelegramRequireMention(left: boolean | null, right: boolea return (left ?? false) === (right ?? false); } -export async function handleSandboxState({ +export async function handleSandboxState({ resume, fresh, resumeAgentChanged, @@ -126,7 +128,8 @@ export async function handleSandboxState): Promise> { const webSearchSupportProbePath = fromDockerfile ? deps.resolvePath(fromDockerfile) : null; const webSearchSupported = deps.agentSupportsWebSearch(agent, webSearchSupportProbePath, rootDir); @@ -272,6 +275,7 @@ export async function handleSandboxState = {}): ResourceProfileSelectionDeps { + return { + isNonInteractive: vi.fn(() => false), + note: vi.fn(), + prompt: vi.fn(), + promptOrDefault: vi.fn(), + env: {}, + ...overrides, + }; +} + +describe("selectResourceProfileForSandbox", () => { + let exitSpy: ReturnType; + let errorSpy: ReturnType; + + beforeEach(() => { + vi.clearAllMocks(); + exitSpy = vi.spyOn(process, "exit").mockImplementation(((code?: number) => { + throw new Error(`process.exit(${code})`); + }) as never); + errorSpy = vi.spyOn(console, "error").mockImplementation(() => {}); + }); + + afterEach(() => { + exitSpy.mockRestore(); + errorSpy.mockRestore(); + }); + + it("selects a named resource profile from the environment", async () => { + const deps = makeDeps({ env: { NEMOCLAW_RESOURCE_PROFILE: "developer" } as NodeJS.ProcessEnv }); + + await expect(selectResourceProfileForSandbox(deps)).resolves.toEqual({ + cpu: "75%", + memory: "75%", + }); + + expect(deps.note).toHaveBeenCalledWith(" Resource profile (env): developer"); + expect(deps.promptOrDefault).not.toHaveBeenCalled(); + }); + + it("treats the default environment profile as no resource preference", async () => { + const deps = makeDeps({ env: { NEMOCLAW_RESOURCE_PROFILE: "default" } as NodeJS.ProcessEnv }); + + await expect(selectResourceProfileForSandbox(deps)).resolves.toBeNull(); + + expect(deps.note).toHaveBeenCalledWith(" Resource profile (env): default (OpenShell defaults)"); + expect(deps.promptOrDefault).not.toHaveBeenCalled(); + }); + + it("rejects unknown environment-selected profiles", async () => { + const deps = makeDeps({ env: { NEMOCLAW_RESOURCE_PROFILE: "missing" } as NodeJS.ProcessEnv }); + + await expect(selectResourceProfileForSandbox(deps)).rejects.toThrow("process.exit(1)"); + + expect(errorSpy).toHaveBeenCalledWith(" Unknown resource profile: 'missing'"); + }); + + it("applies CPU and RAM env overrides without prompting", async () => { + const deps = makeDeps({ + env: { + NEMOCLAW_CPU: "4", + NEMOCLAW_RAM: "8Gi", + } as NodeJS.ProcessEnv, + isNonInteractive: vi.fn(() => true), + }); + + await expect(selectResourceProfileForSandbox(deps)).resolves.toEqual({ + cpu: "4", + memory: "8Gi", + }); + + expect(deps.note).toHaveBeenCalledWith(" Resource overrides (env): cpu=4, ram=8Gi"); + expect(deps.promptOrDefault).not.toHaveBeenCalled(); + }); + + it("returns a menu-selected profile", async () => { + const deps = makeDeps({ promptOrDefault: vi.fn().mockResolvedValue("2") }); + + await expect(selectResourceProfileForSandbox(deps)).resolves.toEqual({ + cpu: "25%", + memory: "25%", + }); + + expect(deps.promptOrDefault).toHaveBeenCalledWith(" Choose [6]: ", null, "6"); + }); + + it("fails fast for non-numeric or out-of-range menu choices", async () => { + const deps = makeDeps({ promptOrDefault: vi.fn().mockResolvedValue("99") }); + + await expect(selectResourceProfileForSandbox(deps)).rejects.toThrow("process.exit(1)"); + + expect(errorSpy).toHaveBeenCalledWith(" Invalid resource profile selection '99'. Choose a number from 1 to 6."); + }); + + it("collects a custom profile and validates CPU and RAM", async () => { + const deps = makeDeps({ + promptOrDefault: vi.fn().mockResolvedValue("5"), + prompt: vi + .fn() + .mockResolvedValueOnce("25%") + .mockResolvedValueOnce("25%"), + }); + + await expect(selectResourceProfileForSandbox(deps)).resolves.toEqual({ + cpu: "25%", + memory: "25%", + }); + + expect(deps.prompt).toHaveBeenCalledTimes(2); + }); + + it("exits when custom profile validation fails", async () => { + const deps = makeDeps({ + promptOrDefault: vi.fn().mockResolvedValue("5"), + prompt: vi + .fn() + .mockResolvedValueOnce("101%") + .mockResolvedValueOnce("25%"), + }); + + await expect(selectResourceProfileForSandbox(deps)).rejects.toThrow("process.exit(1)"); + + expect(errorSpy).toHaveBeenCalledWith(" Invalid percentage '101%': must be an integer between 1% and 100%"); + }); +}); diff --git a/src/lib/onboard/resource-profile-selection.ts b/src/lib/onboard/resource-profile-selection.ts new file mode 100644 index 0000000000..c60d0e6c26 --- /dev/null +++ b/src/lib/onboard/resource-profile-selection.ts @@ -0,0 +1,135 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { + appendResourceFlags, + getHardwareResources, + loadResourceProfiles, + resolveResourceValue, + type ResourceProfile, +} from "../resources-cmd"; + +export type ResourceProfileSelectionDeps = { + isNonInteractive: () => boolean; + note: (message: string) => void; + prompt: (question: string) => Promise; + promptOrDefault: ( + question: string, + envVar: string | null, + defaultValue: string, + ) => Promise; + env?: NodeJS.ProcessEnv; +}; + +function hasResourceEnvOverrides(env: NodeJS.ProcessEnv): boolean { + return !!(env.NEMOCLAW_CPU || env.NEMOCLAW_RAM); +} + +function applyResourceEnvOverrides( + selectedProfile: ResourceProfile | null, + deps: ResourceProfileSelectionDeps, +): ResourceProfile | null { + const env = deps.env ?? process.env; + if (!hasResourceEnvOverrides(env)) return selectedProfile; + const nextProfile = selectedProfile ? { ...selectedProfile } : { cpu: "", memory: "" }; + if (env.NEMOCLAW_CPU) nextProfile.cpu = env.NEMOCLAW_CPU; + if (env.NEMOCLAW_RAM) nextProfile.memory = env.NEMOCLAW_RAM; + deps.note( + ` Resource overrides (env): cpu=${nextProfile.cpu}, ram=${nextProfile.memory}`, + ); + return nextProfile; +} + +function exitWithResourceProfileError(message: string): never { + console.error(` ${message}`); + process.exit(1); +} + +function printResolvedResourceProfile(profile: ResourceProfile, cpuTotal: number, memTotal: number): void { + const resolvedCpu = resolveResourceValue(profile.cpu, cpuTotal, "cpu"); + const resolvedMemory = resolveResourceValue(profile.memory, memTotal, "memory"); + console.log( + ` Resolved: CPU=${resolvedCpu}, RAM=${resolvedMemory}`, + ); +} + +export async function selectResourceProfileForSandbox( + deps: ResourceProfileSelectionDeps, +): Promise { + const env = deps.env ?? process.env; + const availableProfiles = loadResourceProfiles(); + const profileNames = Object.keys(availableProfiles).filter((name) => name !== "default"); + let selectedProfile: ResourceProfile | null = null; + + if (env.NEMOCLAW_RESOURCE_PROFILE) { + const envProfile = env.NEMOCLAW_RESOURCE_PROFILE; + if (envProfile === "default") { + selectedProfile = null; + deps.note(" Resource profile (env): default (OpenShell defaults)"); + } else if (Object.prototype.hasOwnProperty.call(availableProfiles, envProfile)) { + selectedProfile = { ...availableProfiles[envProfile] }; + deps.note(` Resource profile (env): ${envProfile}`); + } else { + console.error(` Unknown resource profile: '${envProfile}'`); + console.error(` Valid profiles: ${["default", ...profileNames].join(", ")}`); + process.exit(1); + } + } else if (profileNames.length > 0 && !deps.isNonInteractive() && !hasResourceEnvOverrides(env)) { + const hw = getHardwareResources(); + console.log(""); + console.log(" Resource profiles:"); + profileNames.forEach((name: string, i: number) => { + const p = availableProfiles[name]; + console.log( + ` ${i + 1}) ${name} (cpu=${p.cpu}, ram=${p.memory})`, + ); + }); + console.log(` ${profileNames.length + 1}) custom (enter values manually)`); + console.log(` ${profileNames.length + 2}) No profile (OpenShell defaults)`); + const choice = await deps.promptOrDefault( + ` Choose [${profileNames.length + 2}]: `, + null, + String(profileNames.length + 2), + ); + const trimmedChoice = choice.trim(); + const idx = Number.parseInt(trimmedChoice, 10) - 1; + if (!/^\d+$/.test(trimmedChoice) || idx < 0 || idx > profileNames.length + 1) { + exitWithResourceProfileError( + `Invalid resource profile selection '${choice}'. Choose a number from 1 to ${profileNames.length + 2}.`, + ); + } + if (idx >= 0 && idx < profileNames.length) { + selectedProfile = { ...availableProfiles[profileNames[idx]] }; + console.log(` Using profile: ${profileNames[idx]}`); + } else if (idx === profileNames.length) { + console.log(""); + console.log(` Available: ${hw.cpu.cores} CPU cores, ${hw.memory.totalMB} MB RAM`); + console.log(" Enter values as percentages (e.g. 25%) or absolutes (e.g. 4, 8Gi)"); + console.log(""); + const cpu = (await deps.prompt(` CPU [25%]: `)).trim() || "25%"; + const memory = (await deps.prompt(` RAM [25%]: `)).trim() || "25%"; + selectedProfile = { + cpu, + memory, + }; + try { + printResolvedResourceProfile(selectedProfile, hw.cpu.cores, hw.memory.totalMB); + } catch (e: unknown) { + exitWithResourceProfileError((e as Error).message); + } + } + } + + return applyResourceEnvOverrides(selectedProfile, deps); +} + +export function appendResourceFlagsForProfile( + args: string[], + profile: ResourceProfile | null, + openshellBinary: string, + deps: ResourceProfileSelectionDeps, +): void { + if (profile && !appendResourceFlags(args, profile, openshellBinary)) { + deps.note(" OpenShell does not support resource flags — sandbox will use default limits."); + } +} diff --git a/src/lib/resources-cmd.test.ts b/src/lib/resources-cmd.test.ts new file mode 100644 index 0000000000..9900cff158 --- /dev/null +++ b/src/lib/resources-cmd.test.ts @@ -0,0 +1,158 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import { afterEach, describe, expect, it, vi } from "vitest"; + +import { + appendResourceFlags, + getHardwareResources, + loadResourceProfiles, + printHardwareResources, + resolveProfile, + resolveResourceValue, +} from "../../dist/lib/resources-cmd.js"; + +const tempDirs: string[] = []; + +function makeExecutable(contents: string): string { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-resources-test-")); + tempDirs.push(dir); + const file = path.join(dir, "openshell-fake"); + fs.writeFileSync(file, contents, { mode: 0o755 }); + return file; +} + +describe("resources-cmd", () => { + afterEach(() => { + vi.restoreAllMocks(); + for (const dir of tempDirs.splice(0)) { + fs.rmSync(dir, { recursive: true, force: true }); + } + }); + + it("resolves percentage and absolute resource values", () => { + expect(resolveResourceValue("25%", 16, "cpu")).toBe("4"); + expect(resolveResourceValue("25%", 3.5, "cpu")).toBe("875m"); + expect(resolveResourceValue("50%", 8192, "memory")).toBe("4Gi"); + expect(resolveResourceValue("10%", 1024, "memory")).toBe("128Mi"); + expect(resolveResourceValue("750m", 16, "cpu")).toBe("750m"); + expect(resolveResourceValue("8Gi", 8192, "memory")).toBe("8Gi"); + }); + + it("rejects malformed percentages before they reach OpenShell", () => { + expect(() => resolveResourceValue("0%", 16, "cpu")).toThrow("integer between 1% and 100%"); + expect(() => resolveResourceValue("101%", 16, "cpu")).toThrow("integer between 1% and 100%"); + expect(() => resolveResourceValue("12.5%", 16, "cpu")).toThrow("integer between 1% and 100%"); + }); + + it("resolves profiles against Kubernetes allocatable capacity when available", () => { + const resolved = resolveProfile( + { + cpu: "50%", + memory: "25%", + }, + { + cpu: { cores: 16, model: "test-cpu", allocatable: "7500m" }, + memory: { totalMB: 32768, swapMB: 0, allocatableMB: 16384 }, + gpu: null, + profiles: null, + }, + ); + + expect(resolved).toEqual({ + cpu: "3750m", + memory: "4Gi", + }); + }); + + it("loads resource profiles from the blueprint", () => { + const profiles = loadResourceProfiles(); + + expect(profiles.developer).toEqual({ + cpu: "75%", + memory: "75%", + }); + expect(profiles["game-developer"].cpu).toBe("60%"); + }); + + it("returns hardware resources and includes parsed blueprint profiles", () => { + const hw = getHardwareResources(); + + expect(hw.cpu.cores).toBeGreaterThan(0); + expect(hw.cpu.model).toEqual(expect.any(String)); + expect(hw.memory.totalMB).toBeGreaterThan(0); + expect(hw.profiles?.creator.cpu).toBe("50%"); + }); + + it("prints JSON and returns the hardware object in JSON mode", () => { + const writeSpy = vi.spyOn(process.stdout, "write").mockImplementation(() => true); + try { + const hw = printHardwareResources(true); + expect(hw.memory.totalMB).toBeGreaterThan(0); + expect(writeSpy).toHaveBeenCalledWith(expect.stringContaining('"memory"')); + } finally { + writeSpy.mockRestore(); + } + }); + + it("appends resolved OpenShell CPU and memory flags when supported", () => { + const openshell = makeExecutable("#!/usr/bin/env sh\necho '--cpu --memory'\n"); + const args = ["sandbox", "create"]; + + const applied = appendResourceFlags( + args, + { cpu: "4", memory: "2Gi" }, + openshell, + ); + + expect(applied).toBe(true); + expect(args).toEqual([ + "sandbox", + "create", + "--cpu", + "4", + "--memory", + "2Gi", + ]); + }); + + it("does not use old request/limit resource flags", () => { + const openshell = makeExecutable("#!/usr/bin/env sh\necho '--cpu-request --cpu-limit --memory-request --memory-limit'\n"); + const args = ["sandbox", "create"]; + + expect(appendResourceFlags(args, { cpu: "4", memory: "2Gi" }, openshell)).toBe(false); + expect(args).toEqual(["sandbox", "create"]); + }); + + it("gracefully skips resource flags when OpenShell does not support them", () => { + const openshell = makeExecutable("#!/usr/bin/env sh\necho 'usage: openshell sandbox create'\n"); + const args = ["sandbox", "create"]; + + expect( + appendResourceFlags( + args, + { cpu: "25%", memory: "25%" }, + openshell, + ), + ).toBe(false); + expect(args).toEqual(["sandbox", "create"]); + }); + + it("gracefully skips resource flags when profile resolution fails", () => { + const openshell = makeExecutable("#!/usr/bin/env sh\necho '--cpu --memory'\n"); + const args = ["sandbox", "create"]; + + expect( + appendResourceFlags( + args, + { cpu: "bogus%", memory: "25%" }, + openshell, + ), + ).toBe(false); + expect(args).toEqual(["sandbox", "create"]); + }); +}); diff --git a/src/lib/resources-cmd.ts b/src/lib/resources-cmd.ts new file mode 100644 index 0000000000..1d87d896a9 --- /dev/null +++ b/src/lib/resources-cmd.ts @@ -0,0 +1,322 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Hardware resource discovery for NemoClaw. + * + * Provides `nemoclaw resources` — a read-only inventory of CPU, RAM, GPU, + * and Kubernetes allocatable capacity. Used by the NemoClaw Installer to + * auto-select profiles and models based on available hardware. + */ + +import * as os from "os"; +import * as fs from "fs"; +import * as path from "path"; +import { spawnSync, execSync } from "child_process"; +import * as YAML from "yaml"; + +import { dockerSpawnSync } from "./adapters/docker"; + +const GATEWAY_NAME = "nemoclaw"; + +function getGatewayContainer(): string { + return `openshell-cluster-${GATEWAY_NAME}`; +} + +function getBlueprintPath(): string { + return path.join(__dirname, "..", "..", "nemoclaw-blueprint", "blueprint.yaml"); +} + +function parseResourceProfilesFromBlueprint( + blueprintPath: string, +): Record { + if (!fs.existsSync(blueprintPath)) return {}; + const content = fs.readFileSync(blueprintPath, "utf-8"); + const blueprint = YAML.parse(content); + const raw = blueprint?.components?.sandbox?.resource_profiles; + if (!raw || typeof raw !== "object") return {}; + const profiles: Record = {}; + for (const [name, p] of Object.entries(raw)) { + const prof = p as Record; + const profile = normalizeResourceProfile(prof); + if (profile) profiles[name] = profile; + } + return profiles; +} + +// ── Types ──────────────────────────────────────────────────────── + +export interface ResourceProfile { + cpu: string; + memory: string; +} + +export interface HardwareResources { + cpu: { cores: number; model: string; allocatable?: string }; + memory: { totalMB: number; swapMB: number; allocatableMB?: number }; + gpu: { type: string; name: string; count: number; vramMB: number } | null; + profiles: Record | null; +} + +// ── Implementation ─────────────────────────────────────────────── + +/** + * Query system hardware resources. Returns CPU, memory, and GPU info. + * Also attempts to read Kubernetes node allocatable capacity from the + * gateway's k3s cluster (returns undefined fields if gateway is not running). + */ +export function getHardwareResources(): HardwareResources { + const cpus = os.cpus(); + const cpuModel = cpus[0]?.model?.trim() || "unknown"; + + let totalMB = 0; + let swapMB = 0; + try { + const meminfo = execSync("cat /proc/meminfo", { + encoding: "utf-8", + timeout: 3000, + stdio: ["ignore", "pipe", "ignore"], + }); + const totalMatch = meminfo.match(/MemTotal:\s+(\d+)/); + const swapMatch = meminfo.match(/SwapTotal:\s+(\d+)/); + if (totalMatch) totalMB = Math.round(parseInt(totalMatch[1], 10) / 1024); + if (swapMatch) swapMB = Math.round(parseInt(swapMatch[1], 10) / 1024); + } catch { + // Non-Linux or /proc unreadable — fall back to os.totalmem() + totalMB = Math.round(os.totalmem() / 1024 / 1024); + } + + // Kubernetes allocatable (best-effort — only works if gateway is running) + let allocatableCpu: string | undefined; + let allocatableMemMB: number | undefined; + try { + const container = getGatewayContainer(); + const result = dockerSpawnSync( + ["exec", container, "kubectl", "get", "nodes", "-o", "json"], + { encoding: "utf-8", timeout: 10000, stdio: ["ignore", "pipe", "ignore"] }, + ); + if (result.status === 0 && result.stdout) { + const nodes = JSON.parse(String(result.stdout)); + const alloc = nodes.items?.[0]?.status?.allocatable; + if (alloc) { + allocatableCpu = alloc.cpu; + const memStr: string = alloc.memory || ""; + const kiMatch = memStr.match(/^(\d+)Ki$/); + if (kiMatch) allocatableMemMB = Math.round(parseInt(kiMatch[1], 10) / 1024); + } + } + } catch { + // Gateway not running — skip k8s allocatable + } + + // GPU detection via nvidia-smi + let gpu: HardwareResources["gpu"] = null; + try { + const nvOut = execSync( + "nvidia-smi --query-gpu=name,memory.total --format=csv,noheader,nounits", + { encoding: "utf-8", timeout: 5000, stdio: ["ignore", "pipe", "ignore"] }, + ).trim(); + if (nvOut) { + const lines = nvOut.split("\n").filter(Boolean); + const [name, vramStr] = lines[0].split(",").map((s: string) => s.trim()); + gpu = { + type: "nvidia", + name: name || "unknown", + count: lines.length, + vramMB: parseInt(vramStr, 10) || 0, + }; + } + } catch { + // nvidia-smi not available + } + + // Resource profiles from blueprint.yaml (CPU/RAM only) + let profiles: Record | null = null; + try { + const parsedProfiles = parseResourceProfilesFromBlueprint(getBlueprintPath()); + profiles = Object.keys(parsedProfiles).length > 0 ? parsedProfiles : null; + } catch { + // blueprint.yaml missing or unparseable — skip profiles + } + + return { + cpu: { cores: cpus.length, model: cpuModel, allocatable: allocatableCpu }, + memory: { totalMB, swapMB, allocatableMB: allocatableMemMB }, + gpu, + profiles, + }; +} + +/** + * Print hardware resources. JSON mode writes to stdout for machine parsing. + * Human mode writes a formatted table to stdout via console.log. + */ +export function printHardwareResources(json: boolean): HardwareResources { + const hw = getHardwareResources(); + if (json) { + process.stdout.write(JSON.stringify(hw) + "\n"); + return hw; + } + console.log(""); + console.log(" Hardware Resources"); + console.log(" " + "\u2500".repeat(44)); + console.log(` CPU: ${hw.cpu.cores} cores (${hw.cpu.model})`); + if (hw.cpu.allocatable) { + console.log(` k8s allocatable: ${hw.cpu.allocatable}`); + } + console.log(` RAM: ${hw.memory.totalMB} MB + ${hw.memory.swapMB} MB swap`); + if (hw.memory.allocatableMB) { + console.log(` k8s allocatable: ${hw.memory.allocatableMB} MB`); + } + if (hw.gpu) { + console.log(` GPU: ${hw.gpu.name}`); + console.log(` VRAM: ${hw.gpu.vramMB} MB (${hw.gpu.count} device${hw.gpu.count > 1 ? "s" : ""})`); + } else { + console.log(" GPU: not detected"); + } + if (hw.profiles && Object.keys(hw.profiles).length > 0) { + console.log(""); + console.log(" Resource Profiles:"); + for (const [name, p] of Object.entries(hw.profiles)) { + const resolved = resolveProfile(p, hw); + const cpuStr = p.cpu.endsWith("%") + ? `${p.cpu} \u2192 ${resolved.cpu} cores` + : `${p.cpu} cores`; + const ramStr = p.memory.endsWith("%") + ? `${p.memory} \u2192 ${resolved.memory}` + : p.memory; + console.log(` ${name}: cpu=${cpuStr}, ram=${ramStr}`); + } + } + console.log(" " + "\u2500".repeat(44)); + console.log(""); + return hw; +} + +/** + * Resolve a resource value that may be a percentage or absolute quantity. + * Throws on invalid percentages so callers can surface clear errors. + */ +export function resolveResourceValue( + value: string, + total: number, + unit: "cpu" | "memory", +): string { + if (!value) return ""; + const trimmed = value.trim(); + if (trimmed.endsWith("%")) { + // Strict validation: only accept integers 1-100 followed by % + if (!/^(?:[1-9]\d?|100)%$/.test(trimmed)) { + throw new Error(`Invalid percentage '${trimmed}': must be an integer between 1% and 100%`); + } + const pct = parseInt(trimmed.slice(0, -1), 10); + if (unit === "cpu") { + const milliCores = Math.max(1, Math.floor(total * 1000 * pct / 100)); + return milliCores % 1000 === 0 ? String(milliCores / 1000) : `${milliCores}m`; + } + // Memory: use Mi for precision on smaller machines, Gi for larger + const resultMB = Math.floor(total * pct / 100); + if (resultMB < 4096) { + return `${Math.max(128, resultMB)}Mi`; + } + const resultGi = Math.max(1, Math.floor(resultMB / 1024)); + return `${resultGi}Gi`; + } + // Absolute value — pass through as-is + return trimmed; +} + +/** + * Parse a Kubernetes CPU quantity. + * Handles plain quantities ("16", "3.5") and millicores ("7500m" -> 7.5). + */ +function parseCpuQuantity(value: string): number | null { + const trimmed = value.trim(); + if (trimmed.endsWith("m")) { + const millis = parseInt(trimmed.slice(0, -1), 10); + if (isNaN(millis)) return null; + return millis / 1000; + } + const cores = parseFloat(trimmed); + return isNaN(cores) ? null : cores; +} + +/** + * Resolve profile percentages to absolutes. Prefers k8s allocatable capacity + * when available (accounts for kubelet/system reservations); falls back to + * host totals when gateway is not running. + */ +export function resolveProfile(profile: ResourceProfile, hw: HardwareResources): ResourceProfile { + const cpuTotal = hw.cpu.allocatable ? (parseCpuQuantity(hw.cpu.allocatable) ?? hw.cpu.cores) : hw.cpu.cores; + const memTotalMB = hw.memory.allocatableMB ?? hw.memory.totalMB; + return { + cpu: resolveResourceValue(profile.cpu, cpuTotal, "cpu"), + memory: resolveResourceValue(profile.memory, memTotalMB, "memory"), + }; +} + +/** + * Append resource flags to an openshell sandbox create args array. + * Resolves percentage values against detected hardware before passing. + * Gracefully degrades: checks `openshell sandbox create --help` for flag + * support and skips silently if the installed OpenShell doesn't have them. + */ +export function appendResourceFlags( + args: string[], + profile: ResourceProfile, + openshellBinary = "openshell", +): boolean { + try { + const result = spawnSync(openshellBinary, ["sandbox", "create", "--help"], { + encoding: "utf-8", + timeout: 5000, + stdio: ["ignore", "pipe", "ignore"], + }); + const help = result.stdout || ""; + const hasFlag = (name: string) => new RegExp(`(^|\\s)--${name}(\\s|,|$)`).test(help); + if (result.status !== 0 || !hasFlag("cpu") || !hasFlag("memory")) { + return false; + } + } catch { + return false; + } + try { + const hw = getHardwareResources(); + const resolved = resolveProfile(profile, hw); + if (resolved.cpu) args.push("--cpu", resolved.cpu); + if (resolved.memory) args.push("--memory", resolved.memory); + return true; + } catch { + return false; + } +} + +function normalizeResourceProfile(prof: Record): ResourceProfile | null { + const cpu = prof.cpu; + const memory = prof.memory; + if (!cpu || !memory) return null; + return { + cpu: String(cpu), + memory: String(memory), + }; +} + +/** + * Load resource profiles from blueprint.yaml. Returns empty object if + * the file doesn't exist or has no profiles section. + */ +export function loadResourceProfiles(): Record { + try { + return parseResourceProfilesFromBlueprint(getBlueprintPath()); + } catch { + return {}; + } +} + +/** + * Dispatcher for the `nemoclaw resources` command. + */ +export function runResourcesCommand(argv: string[]): void { + const json = argv.includes("--json"); + printHardwareResources(json); +} diff --git a/test/channels-add-preset.test.ts b/test/channels-add-preset.test.ts index 31a87dc05e..51cd8228f4 100644 --- a/test/channels-add-preset.test.ts +++ b/test/channels-add-preset.test.ts @@ -27,8 +27,8 @@ function runScript(scriptBody: string, extraEnv: Record = {}): S HOME: tmpDir, NEMOCLAW_NON_INTERACTIVE: "1", TELEGRAM_BOT_TOKEN: "test-telegram-token", - SLACK_BOT_TOKEN: "xoxb-test-1234-5678", - SLACK_APP_TOKEN: "xapp-1-test-1234-5678", + SLACK_BOT_TOKEN: "slack-bot-token-for-test", + SLACK_APP_TOKEN: "slack-app-token-for-test", DISCORD_BOT_TOKEN: "test-discord-token", ...extraEnv, },