diff --git a/src/rootcell/providers/lima.ts b/src/rootcell/providers/lima.ts index ac24f20..390c8e8 100644 --- a/src/rootcell/providers/lima.ts +++ b/src/rootcell/providers/lima.ts @@ -149,19 +149,18 @@ export class LimaVmProvider implements VmProvider { const status = await this.status(input.name); switch (status.state) { case "running": - this.refreshVmState(input); - await this.waitForLimaSsh(input.name); + await this.waitForLimaSsh(input); return { created: false }; case "stopped": this.log(`starting ${input.name} Lima VM...`); this.startVm(input); - await this.waitForLimaSsh(input.name); + await this.waitForLimaSsh(input); return { created: false }; case "missing": this.log(`${input.name} Lima VM not found; creating from nixos-lima image...`); this.createVm(input); + await this.waitForLimaSsh(input); await this.forgetSshHostKey(input.name); - await this.waitForLimaSsh(input.name); return { created: true }; case "unexpected": throw new Error(`${input.name} VM in unexpected state: ${status.detail}`); @@ -293,12 +292,13 @@ export class LimaVmProvider implements VmProvider { } private bootstrapSshPort(name: string): number { - return this.readVmState(name)?.sshLocalPort ?? this.sshLocalPort(name); + const port = this.sshLocalPort(name); + this.updateVmSshLocalPort(name, port); + return port; } private startVm(input: { readonly role: VmRole; readonly name: string; readonly network: LimaUserV2NetworkAttachment }): void { runInherited(this.ensureLimactl(), ["--tty=false", "start", input.name]); - this.refreshVmState(input); } private refreshVmState( @@ -344,25 +344,35 @@ export class LimaVmProvider implements VmProvider { return path; } - private async waitForLimaSsh(name: string): Promise { + private async waitForLimaSsh(input: { + readonly role: VmRole; + readonly name: string; + readonly network: LimaUserV2NetworkAttachment; + }): Promise { let lastError = ""; for (let attempt = 0; attempt < 300; attempt += 1) { - const result = await this.execBootstrapCapture(name, ["true"], { - allowFailure: true, - }); - if (result.status === 0) { + let result: CommandResult | null = null; + try { + result = await this.execBootstrapCapture(input.name, ["true"], { + allowFailure: true, + }); + } catch (error) { + lastError = messageFromUnknown(error); + } + if (result?.status === 0) { + this.refreshVmState(input); return; } - const message = `${result.stderr}${result.stdout}`.trim(); + const message = result === null ? lastError : `${result.stderr}${result.stdout}`.trim(); if (message.length > 0) { lastError = message; } if (/Operation not permitted/i.test(message)) { - throw new Error(`host cannot connect to Lima SSH endpoint for ${name}: ${message}`); + throw new Error(`host cannot connect to Lima SSH endpoint for ${input.name}: ${message}`); } await sleep(500); } - throw new Error(`timeout waiting for SSH transport to ${name}${lastError.length === 0 ? "" : `: ${lastError}`}`); + throw new Error(`timeout waiting for SSH transport to ${input.name}${lastError.length === 0 ? "" : `: ${lastError}`}`); } private async waitForFinalSsh(name: string): Promise { @@ -407,7 +417,7 @@ export class LimaVmProvider implements VmProvider { } private transportEndpoints(): ProxyJumpSshEndpoints { - const firewall = this.readVmState(this.config.firewallVm); + const firewall = this.refreshVmSshLocalPort(this.config.firewallVm) ?? this.readVmState(this.config.firewallVm); if (firewall?.sshLocalPort === undefined) { throw new Error("firewall Lima SSH local port is not known yet"); } @@ -482,6 +492,24 @@ export class LimaVmProvider implements VmProvider { writeFileSync(this.statePath(name), `${JSON.stringify(state, null, 2)}\n`, { encoding: "utf8", mode: 0o600 }); } + private refreshVmSshLocalPort(name: string): LimaVmState | null { + const port = this.sshLocalPort(name); + return this.updateVmSshLocalPort(name, port); + } + + private updateVmSshLocalPort(name: string, port: number): LimaVmState | null { + const state = this.readVmState(name); + if (state === null) { + return null; + } + if (state.sshLocalPort === port) { + return state; + } + const updated = { ...state, sshLocalPort: port }; + this.writeVmState(name, updated); + return updated; + } + private vmDir(name: string): string { return join(this.config.instanceDir, "v", this.vmRoleDir(name)); } @@ -665,6 +693,10 @@ function shellQuote(value: string): string { return `'${value.replaceAll("'", "'\\''")}'`; } +function messageFromUnknown(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + function sleep(milliseconds: number): Promise { return new Promise((resolveSleep) => { setTimeout(resolveSleep, milliseconds); diff --git a/src/rootcell/rootcell.test.ts b/src/rootcell/rootcell.test.ts index 9234567..50af654 100644 --- a/src/rootcell/rootcell.test.ts +++ b/src/rootcell/rootcell.test.ts @@ -14,7 +14,7 @@ import { limaUserV2ReservedIps, MacOsLimaUserV2NetworkProvider, } from "./providers/macos-lima-user-v2-network.ts"; -import { directSshConfig, limaYaml, NIXOS_LIMA_AARCH64_IMAGE, parseLimaVmState, userV2ProofScript } from "./providers/lima.ts"; +import { directSshConfig, LimaVmProvider, limaYaml, NIXOS_LIMA_AARCH64_IMAGE, parseLimaVmState, userV2ProofScript } from "./providers/lima.ts"; import { ImageStore, imageDownloadUrl, @@ -26,7 +26,7 @@ import { } from "./images.ts"; import { forgetKnownHost, sshConfig } from "./transports/proxyjump-ssh.ts"; import { dnsmasqAllowlistConfig, generatedLineCount } from "../bin/reload.ts"; -import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { chmodSync, mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; import { join } from "node:path"; import { tmpdir } from "node:os"; import { @@ -737,6 +737,82 @@ describe("VM and network providers", () => { expect(() => parseLimaVmState({ provider: "unknown" })).toThrow("provider mismatch"); }); + test("Lima transport refreshes stale firewall SSH local ports", async () => { + const dir = mkdtempSync(join(tmpdir(), "rootcell-lima-port-test-")); + const oldPath = process.env.PATH; + const oldLimactl = process.env.ROOTCELL_LIMACTL; + try { + const bin = join(dir, "bin"); + mkdirSync(bin, { recursive: true }); + const limactl = join(bin, "limactl"); + writeFileSync(limactl, [ + "#!/bin/sh", + "if [ \"$1\" = \"list\" ] && [ \"$2\" = \"--format\" ] && [ \"$3\" = \"{{.SSHLocalPort}}\" ]; then", + " printf '61000\\n'", + " exit 0", + "fi", + "echo unexpected limactl \"$@\" >&2", + "exit 1", + "", + ].join("\n"), "utf8"); + chmodSync(limactl, 0o755); + const ssh = join(bin, "ssh"); + writeFileSync(ssh, [ + "#!/bin/sh", + "config=", + "while [ \"$#\" -gt 0 ]; do", + " if [ \"$1\" = \"-F\" ]; then", + " config=$2", + " shift 2", + " continue", + " fi", + " shift", + "done", + "if grep -q 'Port 61000' \"$config\"; then", + " exit 0", + "fi", + "echo stale SSH port >&2", + "exit 255", + "", + ].join("\n"), "utf8"); + chmodSync(ssh, 0o755); + + process.env.ROOTCELL_LIMACTL = limactl; + process.env.PATH = `${bin}:${oldPath ?? ""}`; + const config = buildConfig(dir, {}, fakeInstance("dev", dir)); + const stateDir = join(config.instanceDir, "v", "f"); + mkdirSync(stateDir, { recursive: true }); + const statePath = join(stateDir, "state.json"); + writeFileSync(statePath, `${JSON.stringify({ + provider: "lima", + name: config.firewallVm, + role: "firewall", + limaInstance: config.firewallVm, + yamlPath: join(stateDir, "lima.yaml"), + privateInterface: "enp0s1", + egressInterface: "enp0s2", + privateIp: config.firewallIp, + networkName: limaUserV2NetworkName(config), + hasEgress: true, + sshLocalPort: 60000, + }, null, 2)}\n`, "utf8"); + + const provider = new LimaVmProvider(config, ignoreLog); + const result = await provider.execCapture(config.firewallVm, ["true"], { allowFailure: true }); + + expect(result.status).toBe(0); + expect(parseLimaVmState(JSON.parse(readFileSync(statePath, "utf8"))).sshLocalPort).toBe(61000); + } finally { + process.env.PATH = oldPath; + if (oldLimactl === undefined) { + delete process.env.ROOTCELL_LIMACTL; + } else { + process.env.ROOTCELL_LIMACTL = oldLimactl; + } + rmSync(dir, { recursive: true, force: true }); + } + }); + test("formats VM state list", () => { expect(formatVmList([ { instance: "default", vm: "agent", state: "running" }, diff --git a/src/rootcell/rootcell.ts b/src/rootcell/rootcell.ts index 5430a30..fbca0ff 100644 --- a/src/rootcell/rootcell.ts +++ b/src/rootcell/rootcell.ts @@ -423,6 +423,7 @@ exit 1 } private async syncAllowlists(): Promise { + await this.waitForFirewallSsh(); for (const file of ["allowed-https.txt", "allowed-ssh.txt", "allowed-dns.txt"]) { await this.providers.vm.copyToGuest( this.config.firewallVm, @@ -433,6 +434,28 @@ exit 1 await this.providers.vm.exec(this.config.firewallVm, ["sudo", "/etc/agent-vm/reload.sh"]); } + private async waitForFirewallSsh(): Promise { + let lastError = ""; + for (let attempt = 0; attempt < 120; attempt += 1) { + try { + const result = await this.providers.vm.execCapture(this.config.firewallVm, ["true"], { + allowFailure: true, + }); + if (result.status === 0) { + return; + } + const message = `${result.stderr}${result.stdout}`.trim(); + if (message.length > 0) { + lastError = message; + } + } catch (error) { + lastError = messageFromUnknown(error); + } + await sleep(500); + } + throw new Error(`timeout waiting for SSH transport to ${this.config.firewallVm}${lastError.length === 0 ? "" : `: ${lastError}`}`); + } + private ensureCa(): void { const dir = this.config.pkiDir; const key = join(dir, "agent-vm-ca.key");