Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
node_modules
/dist
!nemoclaw/dist
.git
*.pyc
__pycache__
Expand Down
44 changes: 27 additions & 17 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,12 @@ RUN chmod +x /usr/local/bin/nemoclaw-start
# Build args for config that varies per deployment.
# nemoclaw onboard passes these at image build time.
ARG NEMOCLAW_MODEL=nvidia/nemotron-3-super-120b-a12b
ARG NEMOCLAW_PROVIDER_KEY=nvidia
ARG NEMOCLAW_PRIMARY_MODEL_REF=nvidia/nemotron-3-super-120b-a12b
ARG CHAT_UI_URL=http://127.0.0.1:18789
ARG NEMOCLAW_INFERENCE_BASE_URL=https://inference.local/v1
ARG NEMOCLAW_INFERENCE_API=openai-completions
ARG NEMOCLAW_INFERENCE_COMPAT_B64=e30=
# Unique per build to ensure each image gets a fresh auth token.
# Pass --build-arg NEMOCLAW_BUILD_ID=$(date +%s) to bust the cache.
ARG NEMOCLAW_BUILD_ID=default
Expand All @@ -89,7 +94,12 @@ ARG NEMOCLAW_BUILD_ID=default
# via os.environ, never via string interpolation into Python source code.
# Direct ARG interpolation into python3 -c is a code injection vector (C-2).
ENV NEMOCLAW_MODEL=${NEMOCLAW_MODEL} \
CHAT_UI_URL=${CHAT_UI_URL}
NEMOCLAW_PROVIDER_KEY=${NEMOCLAW_PROVIDER_KEY} \
NEMOCLAW_PRIMARY_MODEL_REF=${NEMOCLAW_PRIMARY_MODEL_REF} \
CHAT_UI_URL=${CHAT_UI_URL} \
NEMOCLAW_INFERENCE_BASE_URL=${NEMOCLAW_INFERENCE_BASE_URL} \
NEMOCLAW_INFERENCE_API=${NEMOCLAW_INFERENCE_API} \
NEMOCLAW_INFERENCE_COMPAT_B64=${NEMOCLAW_INFERENCE_COMPAT_B64}

WORKDIR /sandbox
USER sandbox
Expand All @@ -100,30 +110,30 @@ USER sandbox
# Build args (NEMOCLAW_MODEL, CHAT_UI_URL) customize per deployment.
# Auth token is generated per build so each image has a unique token.
RUN python3 -c "\
import json, os, secrets; \
import base64, json, os, secrets; \
from urllib.parse import urlparse; \
model = os.environ['NEMOCLAW_MODEL']; \
chat_ui_url = os.environ['CHAT_UI_URL']; \
provider_key = os.environ['NEMOCLAW_PROVIDER_KEY']; \
primary_model_ref = os.environ['NEMOCLAW_PRIMARY_MODEL_REF']; \
inference_base_url = os.environ['NEMOCLAW_INFERENCE_BASE_URL']; \
inference_api = os.environ['NEMOCLAW_INFERENCE_API']; \
inference_compat = json.loads(base64.b64decode(os.environ['NEMOCLAW_INFERENCE_COMPAT_B64']).decode('utf-8')); \
parsed = urlparse(chat_ui_url); \
chat_origin = f'{parsed.scheme}://{parsed.netloc}' if parsed.scheme and parsed.netloc else 'http://127.0.0.1:18789'; \
origins = ['http://127.0.0.1:18789']; \
origins = list(dict.fromkeys(origins + [chat_origin])); \
providers = { \
provider_key: { \
'baseUrl': inference_base_url, \
'apiKey': 'unused', \
'api': inference_api, \
'models': [{**({'compat': inference_compat} if inference_compat else {}), 'id': model, 'name': primary_model_ref, 'reasoning': False, 'input': ['text'], 'cost': {'input': 0, 'output': 0, 'cacheRead': 0, 'cacheWrite': 0}, 'contextWindow': 131072, 'maxTokens': 4096}] \
} \
}; \
config = { \
'agents': {'defaults': {'model': {'primary': f'inference/{model}'}}}, \
'models': {'mode': 'merge', 'providers': { \
'nvidia': { \
'baseUrl': 'https://inference.local/v1', \
'apiKey': 'openshell-managed', \
'api': 'openai-completions', \
'models': [{'id': model.split('/')[-1], 'name': model, 'reasoning': False, 'input': ['text'], 'cost': {'input': 0, 'output': 0, 'cacheRead': 0, 'cacheWrite': 0}, 'contextWindow': 131072, 'maxTokens': 4096}] \
}, \
'inference': { \
'baseUrl': 'https://inference.local/v1', \
'apiKey': 'unused', \
'api': 'openai-completions', \
'models': [{'id': model, 'name': model, 'reasoning': False, 'input': ['text'], 'cost': {'input': 0, 'output': 0, 'cacheRead': 0, 'cacheWrite': 0}, 'contextWindow': 131072, 'maxTokens': 4096}] \
} \
}}, \
'agents': {'defaults': {'model': {'primary': primary_model_ref}}}, \
'models': {'mode': 'merge', 'providers': providers}, \
'channels': {'defaults': {'configWrites': False}}, \
'gateway': { \
'mode': 'local', \
Expand Down
33 changes: 23 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ When the install completes, a summary confirms the running environment:
```text
──────────────────────────────────────────────────
Sandbox my-assistant (Landlock + seccomp + netns)
Model nvidia/nemotron-3-super-120b-a12b (NVIDIA Endpoint API)
Model nvidia/nemotron-3-super-120b-a12b (NVIDIA Endpoints)
──────────────────────────────────────────────────
Run: nemoclaw my-assistant connect
Status: nemoclaw my-assistant status
Expand Down Expand Up @@ -162,14 +162,14 @@ curl -fsSL https://raw.githubusercontent.com/NVIDIA/NemoClaw/refs/heads/main/uni

## How It Works

NemoClaw installs the NVIDIA OpenShell runtime and Nemotron models, then uses a versioned blueprint to create a sandboxed environment where every network request, file access, and inference call is governed by declarative policy. The `nemoclaw` CLI orchestrates the full stack: OpenShell gateway, sandbox, inference provider, and network policy.
NemoClaw installs the NVIDIA OpenShell runtime, then creates a sandboxed OpenClaw environment where every network request, file access, and inference call is governed by declarative policy. The `nemoclaw` CLI orchestrates the full stack: OpenShell gateway, sandbox, inference provider, and network policy.

| Component | Role |
|------------------|-------------------------------------------------------------------------------------------|
| **Plugin** | TypeScript CLI commands for launch, connect, status, and logs. |
| **Blueprint** | Versioned Python artifact that orchestrates sandbox creation, policy, and inference setup. |
| **Sandbox** | Isolated OpenShell container running OpenClaw with policy-enforced egress and filesystem. |
| **Inference** | NVIDIA Endpoint model calls, routed through the OpenShell gateway, transparent to the agent. |
| **Inference** | Provider-routed model calls, routed through the OpenShell gateway, transparent to the agent. |

The blueprint lifecycle follows four stages: resolve the artifact, verify its digest, plan the resources, and apply through the OpenShell CLI.

Expand All @@ -179,15 +179,28 @@ When something goes wrong, errors may originate from either NemoClaw or the Open

## Inference

Inference requests from the agent never leave the sandbox directly. OpenShell intercepts every call and routes it to the NVIDIA Endpoint provider.
Inference requests from the agent never leave the sandbox directly. OpenShell intercepts every call and routes it to the provider you selected during onboarding.

| Provider | Model | Use Case |
|--------------|--------------------------------------|-------------------------------------------------|
| NVIDIA Endpoint | `nvidia/nemotron-3-super-120b-a12b` | Production. Requires an NVIDIA API key. |
Supported non-experimental onboarding paths:

Get an API key from [build.nvidia.com](https://build.nvidia.com). The `nemoclaw onboard` command prompts for this key during setup.
| Provider | Notes |
|---|---|
| NVIDIA Endpoints | Curated hosted models on `integrate.api.nvidia.com`. |
| OpenAI | Curated GPT models plus `Other...` for manual model entry. |
| Other OpenAI-compatible endpoint | For proxies and compatible gateways. |
| Anthropic | Curated Claude models plus `Other...` for manual model entry. |
| Other Anthropic-compatible endpoint | For Claude proxies and compatible gateways. |
| Google Gemini | Google's OpenAI-compatible endpoint. |

Local inference options such as Ollama and vLLM are still experimental. On macOS, they also depend on OpenShell host-routing support in addition to the local service itself being reachable on the host.
During onboarding, NemoClaw validates the selected provider and model before it creates the sandbox:

- OpenAI-compatible providers: tries `/responses` first, then `/chat/completions`
- Anthropic-compatible providers: tries `/v1/messages`
- If validation fails, the wizard prompts you to fix the selection before continuing

Credentials stay on the host in `~/.nemoclaw/credentials.json`. The sandbox only sees the routed `inference.local` endpoint, not your raw provider key.

Local Ollama is supported in the standard onboarding flow. Local vLLM remains experimental, and local host-routed inference on macOS still depends on OpenShell host-routing support in addition to the local service itself being reachable on the host.

---

Expand Down Expand Up @@ -252,7 +265,7 @@ Refer to the documentation for more information on NemoClaw.
- [Overview](https://docs.nvidia.com/nemoclaw/latest/about/overview.html): Learn what NemoClaw does and how it fits together.
- [How It Works](https://docs.nvidia.com/nemoclaw/latest/about/how-it-works.html): Learn about the plugin, blueprint, and sandbox lifecycle.
- [Architecture](https://docs.nvidia.com/nemoclaw/latest/reference/architecture.html): Learn about the plugin structure, blueprint lifecycle, and sandbox environment.
- [Inference Profiles](https://docs.nvidia.com/nemoclaw/latest/reference/inference-profiles.html): Learn about the NVIDIA Endpoint inference configuration.
- [Inference Profiles](https://docs.nvidia.com/nemoclaw/latest/reference/inference-profiles.html): Learn how NemoClaw configures routed inference providers.
- [Network Policies](https://docs.nvidia.com/nemoclaw/latest/reference/network-policies.html): Learn about egress control and policy customization.
- [CLI Commands](https://docs.nvidia.com/nemoclaw/latest/reference/commands.html): Learn about the full command reference.
- [Troubleshooting](https://docs.nvidia.com/nemoclaw/latest/reference/troubleshooting.html): Troubleshoot common issues and resolution steps.
Expand Down
99 changes: 94 additions & 5 deletions bin/lib/credentials.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
const fs = require("fs");
const path = require("path");
const readline = require("readline");
const { execSync } = require("child_process");

const CREDS_DIR = path.join(process.env.HOME || "/tmp", ".nemoclaw");
const CREDS_FILE = path.join(CREDS_DIR, "credentials.json");
Expand All @@ -31,8 +30,98 @@ function getCredential(key) {
return creds[key] || null;
}

function prompt(question) {
return new Promise((resolve) => {
function promptSecret(question) {
return new Promise((resolve, reject) => {
const input = process.stdin;
const output = process.stderr;
let answer = "";
let rawModeEnabled = false;
let finished = false;

function cleanup() {
input.removeListener("data", onData);
if (rawModeEnabled && typeof input.setRawMode === "function") {
input.setRawMode(false);
}
if (typeof input.pause === "function") {
input.pause();
}
}

function finish(fn, value) {
if (finished) return;
finished = true;
cleanup();
output.write("\n");
fn(value);
}

function onData(chunk) {
const text = chunk.toString("utf8");
for (let i = 0; i < text.length; i += 1) {
const ch = text[i];

if (ch === "\u0003") {
finish(reject, Object.assign(new Error("Prompt interrupted"), { code: "SIGINT" }));
return;
}

if (ch === "\r" || ch === "\n") {
finish(resolve, answer.trim());
return;
}

if (ch === "\u0008" || ch === "\u007f") {
answer = answer.slice(0, -1);
continue;
}

if (ch === "\u001b") {
// Ignore terminal escape/control sequences such as Delete, arrows,
// Home/End, etc. while leaving the buffered secret untouched.
const rest = text.slice(i);
const match = rest.match(/^\u001b(?:\[[0-9;?]*[~A-Za-z]|\][^\u0007]*\u0007|.)/);
if (match) {
i += match[0].length - 1;
}
continue;
}

if (ch >= " ") {
answer += ch;
}
}
}

output.write(question);
input.setEncoding("utf8");
if (typeof input.resume === "function") {
input.resume();
}
if (typeof input.setRawMode === "function") {
input.setRawMode(true);
rawModeEnabled = true;
}
input.on("data", onData);
});
}

function prompt(question, opts = {}) {
return new Promise((resolve, reject) => {
const silent = opts.secret === true && process.stdin.isTTY && process.stderr.isTTY;
if (silent) {
promptSecret(question)
.then(resolve)
.catch((err) => {
if (err && err.code === "SIGINT") {
reject(err);
process.kill(process.pid, "SIGINT");
return;
}
reject(err);
});
return;
}
const rl = readline.createInterface({ input: process.stdin, output: process.stderr });
rl.question(question, (answer) => {
rl.close();
Expand Down Expand Up @@ -67,7 +156,7 @@ async function ensureApiKey() {
console.log(" └─────────────────────────────────────────────────────────────────┘");
console.log("");

key = await prompt(" NVIDIA API Key: ");
key = await prompt(" NVIDIA API Key: ", { secret: true });

if (!key || !key.startsWith("nvapi-")) {
console.error(" Invalid key. Must start with nvapi-");
Expand Down Expand Up @@ -114,7 +203,7 @@ async function ensureGithubToken() {
console.log(" └──────────────────────────────────────────────────┘");
console.log("");

token = await prompt(" GitHub Token: ");
token = await prompt(" GitHub Token: ", { secret: true });

if (!token) {
console.error(" Token required for deploy (repo is private).");
Expand Down
58 changes: 57 additions & 1 deletion bin/lib/inference-config.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const { DEFAULT_OLLAMA_MODEL } = require("./local-inference");

function getProviderSelectionConfig(provider, model) {
switch (provider) {
case "nvidia-prod":
case "nvidia-nim":
return {
endpointType: "custom",
Expand All @@ -27,7 +28,62 @@ function getProviderSelectionConfig(provider, model) {
profile: DEFAULT_ROUTE_PROFILE,
credentialEnv: DEFAULT_ROUTE_CREDENTIAL_ENV,
provider,
providerLabel: "NVIDIA Endpoint API",
providerLabel: "NVIDIA Endpoints",
};
case "openai-api":
return {
endpointType: "custom",
endpointUrl: INFERENCE_ROUTE_URL,
ncpPartner: null,
model: model || "gpt-5.4",
profile: DEFAULT_ROUTE_PROFILE,
credentialEnv: "OPENAI_API_KEY",
provider,
providerLabel: "OpenAI",
};
case "anthropic-prod":
return {
endpointType: "custom",
endpointUrl: INFERENCE_ROUTE_URL,
ncpPartner: null,
model: model || "claude-sonnet-4-6",
profile: DEFAULT_ROUTE_PROFILE,
credentialEnv: "ANTHROPIC_API_KEY",
provider,
providerLabel: "Anthropic",
};
case "compatible-anthropic-endpoint":
return {
endpointType: "custom",
endpointUrl: INFERENCE_ROUTE_URL,
ncpPartner: null,
model: model || "custom-anthropic-model",
profile: DEFAULT_ROUTE_PROFILE,
credentialEnv: "COMPATIBLE_ANTHROPIC_API_KEY",
provider,
providerLabel: "Other Anthropic-compatible endpoint",
};
case "gemini-api":
return {
endpointType: "custom",
endpointUrl: INFERENCE_ROUTE_URL,
ncpPartner: null,
model: model || "gemini-2.5-flash",
profile: DEFAULT_ROUTE_PROFILE,
credentialEnv: "GEMINI_API_KEY",
provider,
providerLabel: "Google Gemini",
};
case "compatible-endpoint":
return {
endpointType: "custom",
endpointUrl: INFERENCE_ROUTE_URL,
ncpPartner: null,
model: model || "custom-model",
profile: DEFAULT_ROUTE_PROFILE,
credentialEnv: "COMPATIBLE_API_KEY",
provider,
providerLabel: "Other OpenAI-compatible endpoint",
};
case "vllm-local":
return {
Expand Down
Loading