-
Notifications
You must be signed in to change notification settings - Fork 1.7k
fix: inject TLS secrets after gateway start on fresh install #217
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -415,6 +415,66 @@ async function preflight() { | |
| return gpu; | ||
| } | ||
|
|
||
|
|
||
| // ── TLS Fix: inject missing secrets on WSL2/Linux ──────────────── | ||
| function injectTlsSecrets() { | ||
| const { execSync } = require("child_process"); | ||
| console.log(" Injecting TLS secrets for OpenShell gateway..."); | ||
| const tmpDir = "/tmp/openshell-tls-fix"; | ||
| fs.mkdirSync(tmpDir, { recursive: true }); | ||
|
|
||
| const ext = [ | ||
| "[req]", | ||
| "req_extensions = v3_req", | ||
| "distinguished_name = req_distinguished_name", | ||
| "[req_distinguished_name]", | ||
| "[v3_req]", | ||
| "basicConstraints = CA:FALSE", | ||
| "keyUsage = nonRepudiation, digitalSignature, keyEncipherment", | ||
| "subjectAltName = @alt_names", | ||
| "[alt_names]", | ||
| "DNS.1 = openshell", | ||
| "DNS.2 = openshell.openshell.svc", | ||
| "DNS.3 = openshell.openshell.svc.cluster.local", | ||
| "DNS.4 = localhost", | ||
| "IP.1 = 127.0.0.1", | ||
| "[v3_ca]", | ||
| "subjectKeyIdentifier = hash", | ||
| "authorityKeyIdentifier = keyid:always,issuer", | ||
| "basicConstraints = CA:TRUE", | ||
| "keyUsage = cRLSign, keyCertSign", | ||
| ].join("\n"); | ||
| fs.writeFileSync(tmpDir + "/v3.ext", ext); | ||
|
|
||
| try { | ||
| execSync("openssl req -x509 -newkey rsa:4096 -keyout " + tmpDir + "/ca.key -out " + tmpDir + "/ca.crt -days 365 -nodes -subj '/CN=openshell-ca' -extensions v3_ca -config " + tmpDir + "/v3.ext 2>/dev/null"); | ||
| execSync("openssl req -newkey rsa:4096 -keyout " + tmpDir + "/server.key -out " + tmpDir + "/server.csr -nodes -subj '/CN=openshell' 2>/dev/null"); | ||
| execSync("openssl x509 -req -in " + tmpDir + "/server.csr -CA " + tmpDir + "/ca.crt -CAkey " + tmpDir + "/ca.key -CAcreateserial -out " + tmpDir + "/server.crt -days 365 -extensions v3_req -extfile " + tmpDir + "/v3.ext 2>/dev/null"); | ||
|
|
||
| console.log(" Waiting for k3s namespace..."); | ||
| const container = execSync("docker ps --filter name=openshell-cluster-nemoclaw --format '{{.Names}}'").toString().trim(); | ||
| for (let i = 0; i < 30; i++) { | ||
| try { | ||
| execSync("docker exec " + container + " kubectl get namespace openshell 2>/dev/null"); | ||
| console.log(" ✓ k3s namespace ready"); | ||
| break; | ||
| } catch { | ||
| require("child_process").spawnSync("sleep", ["3"]); | ||
| } | ||
| } | ||
|
Comment on lines
+454
to
+464
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: wc -l bin/lib/onboard.jsRepository: NVIDIA/NemoClaw Length of output: 82 🏁 Script executed: sed -n '440,480p' bin/lib/onboard.jsRepository: NVIDIA/NemoClaw Length of output: 2339 Fail fast if the The readiness loop (lines 454-464) waits up to 90 seconds but continues regardless of success. If readiness never occurs, subsequent Suggested fix console.log(" Waiting for k3s namespace...");
const container = execSync("docker ps --filter name=openshell-cluster-nemoclaw --format '{{.Names}}'").toString().trim();
+ let namespaceReady = false;
for (let i = 0; i < 30; i++) {
try {
execSync("docker exec " + container + " kubectl get namespace openshell 2>/dev/null");
console.log(" ✓ k3s namespace ready");
+ namespaceReady = true;
break;
} catch {
require("child_process").spawnSync("sleep", ["3"]);
}
}
+ if (!namespaceReady) {
+ throw new Error("k3s namespace 'openshell' did not become ready within 90s");
+ }🤖 Prompt for AI Agents |
||
|
|
||
| execSync("docker cp " + tmpDir + "/server.crt " + container + ":/tmp/server.crt"); | ||
| execSync("docker cp " + tmpDir + "/server.key " + container + ":/tmp/server.key"); | ||
| execSync("docker cp " + tmpDir + "/ca.crt " + container + ":/tmp/ca.crt"); | ||
| execSync("docker exec " + container + " kubectl delete secret openshell-server-tls openshell-server-client-ca -n openshell 2>/dev/null || true"); | ||
| execSync("docker exec " + container + " kubectl create secret tls openshell-server-tls -n openshell --cert=/tmp/server.crt --key=/tmp/server.key"); | ||
| execSync("docker exec " + container + " kubectl create secret generic openshell-server-client-ca -n openshell --from-file=ca.crt=/tmp/ca.crt"); | ||
|
Comment on lines
+469
to
+471
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: # First, let's check the file exists and read the relevant section
wc -l bin/lib/onboard.jsRepository: NVIDIA/NemoClaw Length of output: 82 🏁 Script executed: # Read lines around 469-471 with context
sed -n '450,490p' bin/lib/onboard.js | cat -nRepository: NVIDIA/NemoClaw Length of output: 2862 🏁 Script executed: # Search for cert-manager references to understand if it's used in this project
rg -i "cert-manager" --type js --type yaml --type jsonRepository: NVIDIA/NemoClaw Length of output: 41 🏁 Script executed: # Search for other TLS secret creation patterns
rg "kubectl.*secret" -A 2 -B 2Repository: NVIDIA/NemoClaw Length of output: 868 🏁 Script executed: # Look at the broader context of the onboard.js file to understand the purpose
head -100 bin/lib/onboard.js | cat -nRepository: NVIDIA/NemoClaw Length of output: 4128 🏁 Script executed: # Read more context around these lines to see the function name and purpose
sed -n '440,475p' bin/lib/onboard.js | cat -nRepository: NVIDIA/NemoClaw Length of output: 2483 🏁 Script executed: # Get the function name and surrounding context to confirm this is onboarding
sed -n '400,445p' bin/lib/onboard.js | cat -nRepository: NVIDIA/NemoClaw Length of output: 1942 🏁 Script executed: # Search for where injectTlsSecrets is called
rg "injectTlsSecrets" -B 3 -A 3Repository: NVIDIA/NemoClaw Length of output: 689 Avoid unconditional delete/recreate of TLS secrets. Lines 469-471 delete then recreate secrets without checking if they exist first. If the delete succeeds but creation fails, the secrets are left missing, creating a transient failure window. Check for secret existence before attempting creation. Suggested fix- execSync("docker exec " + container + " kubectl delete secret openshell-server-tls openshell-server-client-ca -n openshell 2>/dev/null || true");
- execSync("docker exec " + container + " kubectl create secret tls openshell-server-tls -n openshell --cert=/tmp/server.crt --key=/tmp/server.key");
- execSync("docker exec " + container + " kubectl create secret generic openshell-server-client-ca -n openshell --from-file=ca.crt=/tmp/ca.crt");
+ const tlsSecret = execSync(
+ "docker exec " + container + " kubectl get secret openshell-server-tls -n openshell --ignore-not-found -o name"
+ ).toString().trim();
+ const caSecret = execSync(
+ "docker exec " + container + " kubectl get secret openshell-server-client-ca -n openshell --ignore-not-found -o name"
+ ).toString().trim();
+
+ if (!tlsSecret) {
+ execSync("docker exec " + container + " kubectl create secret tls openshell-server-tls -n openshell --cert=/tmp/server.crt --key=/tmp/server.key");
+ }
+ if (!caSecret) {
+ execSync("docker exec " + container + " kubectl create secret generic openshell-server-client-ca -n openshell --from-file=ca.crt=/tmp/ca.crt");
+ }🤖 Prompt for AI Agents |
||
| console.log(" ✓ TLS secrets injected"); | ||
| } catch (e) { | ||
| console.error(" !! TLS injection failed:", e.message); | ||
| } | ||
|
Comment on lines
+473
to
+475
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: cat -n bin/lib/onboard.js | sed -n '460,520p'Repository: NVIDIA/NemoClaw Length of output: 3045 🏁 Script executed: cat -n bin/lib/onboard.js | sed -n '420,476p'Repository: NVIDIA/NemoClaw Length of output: 3325 🏁 Script executed: rg "injectTlsSecrets" bin/lib/onboard.jsRepository: NVIDIA/NemoClaw Length of output: 110 🏁 Script executed: cat -n bin/lib/onboard.js | sed -n '511,525p'Repository: NVIDIA/NemoClaw Length of output: 647 Propagate TLS injection failures to fail immediately instead of triggering delayed health check timeouts. The Suggested fix } catch (e) {
- console.error(" !! TLS injection failed:", e.message);
+ throw new Error(`TLS injection failed: ${e.message}`);
}
}
@@
- injectTlsSecrets();
+ try {
+ injectTlsSecrets();
+ } catch (error) {
+ console.error(` ${error.message}`);
+ console.error(" Required OpenShell TLS secrets were not created. Aborting onboarding.");
+ process.exit(1);
+ }🤖 Prompt for AI Agents |
||
| } | ||
|
|
||
| // ── Step 2: Gateway ────────────────────────────────────────────── | ||
|
|
||
| async function startGateway(gpu) { | ||
|
|
@@ -445,6 +505,8 @@ async function startGateway(gpu) { | |
| env: gatewayEnv, | ||
| }); | ||
|
|
||
| injectTlsSecrets(); | ||
|
|
||
| // Verify health | ||
| for (let i = 0; i < 5; i++) { | ||
| const status = runCapture("openshell status 2>&1", { ignoreError: true }); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
# First, check the file structure and locate the file git ls-files bin/lib/onboard.jsRepository: NVIDIA/NemoClaw
Length of output: 77
🏁 Script executed:
Repository: NVIDIA/NemoClaw
Length of output: 4718
🏁 Script executed:
Repository: NVIDIA/NemoClaw
Length of output: 4349
🏁 Script executed:
Repository: NVIDIA/NemoClaw
Length of output: 1243
🏁 Script executed:
Repository: NVIDIA/NemoClaw
Length of output: 2131
Remove private keys from /tmp after use and prevent concurrent invocation collisions.
Line 423 creates a shared fixed temp directory that persists across invocations, leaving generated private keys (ca.key, server.key) in /tmp indefinitely. This enables stale key material to accumulate and allows concurrent runs to collide and overwrite each other's key material.
Suggested fix
function injectTlsSecrets() { const { execSync } = require("child_process"); console.log(" Injecting TLS secrets for OpenShell gateway..."); - const tmpDir = "/tmp/openshell-tls-fix"; - fs.mkdirSync(tmpDir, { recursive: true }); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "openshell-tls-")); + fs.chmodSync(tmpDir, 0o700); @@ - fs.writeFileSync(tmpDir + "/v3.ext", ext); + fs.writeFileSync(path.join(tmpDir, "v3.ext"), ext, { mode: 0o600 }); @@ } catch (e) { console.error(" !! TLS injection failed:", e.message); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); } }🤖 Prompt for AI Agents