From 94759481c520841f30ca761e7e10a799b97c7eab Mon Sep 17 00:00:00 2001 From: Jim Pudar Date: Sat, 23 May 2026 10:11:57 -0400 Subject: [PATCH] Enable Lima VSOCK control with DHCP lease --- agent-vm.nix | 23 +++++++++++---- firewall-vm.nix | 16 ++++++++-- .../provider.integration.test.ts | 16 ++++++++-- src/rootcell/providers/lima.ts | 2 +- .../providers/macos-lima-user-v2-network.ts | 4 +-- .../providers/macos-lima-user-v2/README.md | 29 ++++++++++++------- src/rootcell/rootcell.test.ts | 23 ++++++++++----- 7 files changed, 82 insertions(+), 31 deletions(-) diff --git a/agent-vm.nix b/agent-vm.nix index 549e931..a052449 100644 --- a/agent-vm.nix +++ b/agent-vm.nix @@ -18,9 +18,10 @@ in # anyway. All meaningful filtering happens in the firewall VM. networking.firewall.enable = false; - # Networking: only the per-instance private Lima user-v2 link is configured, so - # there is no direct host control path a root-capable agent could reconfigure - # into egress. + # Networking: only the per-instance private Lima user-v2 link is configured. + # Lima's VZ hostagent still needs a DHCP lease on that link before it opens + # the VSOCK SSH control path after restarts, but Rootcell keeps ownership of + # the steady-state address, DNS, and default route below. networking.useDHCP = false; networking.useNetworkd = true; systemd.network.enable = true; @@ -30,12 +31,24 @@ in systemd.network.networks."10-enp0s1" = { matchConfig = privateMatch; networkConfig = { - DHCP = "no"; + DHCP = "ipv4"; IPv6AcceptRA = false; LinkLocalAddressing = "no"; }; + dhcpV4Config = { + UseDNS = false; + UseDomains = false; + UseHostname = false; + UseMTU = false; + UseNTP = false; + UseRoutes = false; + UseTimezone = false; + }; address = [ "${net.agentIp}/${toString net.networkPrefix}" ]; - routes = [ { Gateway = net.agentDefaultGatewayIp; } ]; + routes = [ { + Gateway = net.agentDefaultGatewayIp; + PreferredSource = net.agentIp; + } ]; dns = [ net.firewallIp ]; }; diff --git a/firewall-vm.nix b/firewall-vm.nix index 2aff5b6..0e2794d 100644 --- a/firewall-vm.nix +++ b/firewall-vm.nix @@ -92,8 +92,9 @@ in dns = net.firewallUpstreamDns; }; - # Private Lima user-v2 link to the agent VM. - # Static address; DHCP would conflict with the agent's static address. + # Private Lima user-v2 link to the agent VM. Lima's VZ hostagent waits for a + # DHCP lease on user-v2 before it opens the VSOCK SSH control path after + # restarts, but Rootcell keeps DHCP routes and DNS disabled here. systemd.network.networks."20-private" = { matchConfig = privateMatch; # The firewall boots before the agent, so the private user-v2 peer may not @@ -101,10 +102,19 @@ in # this link; the static address is still configured by networkd. linkConfig.RequiredForOnline = false; networkConfig = { - DHCP = "no"; + DHCP = "ipv4"; IPv6AcceptRA = false; LinkLocalAddressing = "no"; }; + dhcpV4Config = { + UseDNS = false; + UseDomains = false; + UseHostname = false; + UseMTU = false; + UseNTP = false; + UseRoutes = false; + UseTimezone = false; + }; address = [ "${net.firewallIp}/${toString net.networkPrefix}" ]; }; diff --git a/src/rootcell/integration/providers/macos-lima-user-v2/provider.integration.test.ts b/src/rootcell/integration/providers/macos-lima-user-v2/provider.integration.test.ts index 9a10552..c1a5493 100644 --- a/src/rootcell/integration/providers/macos-lima-user-v2/provider.integration.test.ts +++ b/src/rootcell/integration/providers/macos-lima-user-v2/provider.integration.test.ts @@ -32,7 +32,7 @@ describe.skipIf(!shouldRun)("macos-lima-user-v2 integration provider", { concurr privateIp: z.literal(FIREWALL_IP), hasEgress: z.literal(true), }))); - expect(plan.vms.agent.reservedIps).toEqual(["192.168.109.2", "192.168.109.3"]); + expect(plan.vms.agent.reservedIps).toEqual(["192.168.109.2"]); }); test("records running Lima VM state files and generated YAML", () => { @@ -82,9 +82,21 @@ describe.skipIf(!shouldRun)("macos-lima-user-v2 integration provider", { concurr sshGuest(flow, "rootcell-agent", "true"); }); + test("keeps Lima VSOCK control after VM restarts", () => { + flow.hostCommandOk("limactl", ["shell", AGENT_VM_NAME, "true"]); + flow.hostCommandOk("limactl", ["shell", FIREWALL_VM_NAME, "true"]); + for (const vm of [AGENT_VM_NAME, FIREWALL_VM_NAME]) { + flow.hostCommandOk("limactl", ["stop", vm]); + flow.hostCommandOk("limactl", ["start", "--timeout", "3m", vm]); + flow.hostCommandOk("limactl", ["shell", vm, "true"]); + } + }); + test("passes the strict no-bypass user-v2 proof gate", async () => { + const agentInterface = flow.providers.network.plan().vms.agent.privateInterface; await flow.agentSh("test \"$(find /sys/class/net -mindepth 1 -maxdepth 1 ! -name lo | wc -l | tr -d \" \")\" = 1"); - await flow.agentSh("! ip -4 -o addr show scope global | grep -v ' 192\\.168\\.109\\.11/24' | grep -q ."); + await flow.agentSh(`ip -4 addr show dev ${shellQuote(agentInterface)} | grep -q ${shellQuote(` ${AGENT_IP}/24`)}`); + await flow.agentSh(`! ip -4 -o addr show scope global | grep -v ${shellQuote(`^[0-9]\\+: ${agentInterface}\\b`)} | grep -q .`); await flow.agentSh(`ip route show default | grep -q '^default via ${FIREWALL_IP} '`); await flow.agentSh(`ping -c 1 -W 2 ${FIREWALL_IP} >/dev/null`); flow.hostCommandFails("ssh", [ diff --git a/src/rootcell/providers/lima.ts b/src/rootcell/providers/lima.ts index 390c8e8..dfb5216 100644 --- a/src/rootcell/providers/lima.ts +++ b/src/rootcell/providers/lima.ts @@ -596,9 +596,9 @@ export function userV2ProofScript(input: { "test -d \"/sys/class/net/$iface\"", "test \"$(find /sys/class/net -mindepth 1 -maxdepth 1 ! -name lo | wc -l | tr -d ' ')\" = 1", "ip -4 addr show dev \"$iface\" | grep -q \" $agent_ip/$prefix\"", + "! ip -4 -o addr show scope global | grep -v \"^[0-9]\\+: $iface\\b\" | grep -q .", "test \"$(ip route show default | wc -l | tr -d ' ')\" = 1", "ip route show default | grep -q \"^default via $firewall_ip dev $iface\\b\"", - "! ip -4 -o addr show scope global | grep -v \" $agent_ip/$prefix\" | grep -q .", "! ip route show default | grep -qv \"via $firewall_ip dev $iface\"", "ping -c 1 -W 2 \"$firewall_ip\" >/dev/null", "", diff --git a/src/rootcell/providers/macos-lima-user-v2-network.ts b/src/rootcell/providers/macos-lima-user-v2-network.ts index e96d54d..14287a5 100644 --- a/src/rootcell/providers/macos-lima-user-v2-network.ts +++ b/src/rootcell/providers/macos-lima-user-v2-network.ts @@ -162,11 +162,11 @@ export function limaUserV2ReservedIps(config: RootcellConfig): { } { const prefix = config.firewallIp.slice(0, config.firewallIp.lastIndexOf(".")); const gatewayIp = `${prefix}.2`; - const dnsIp = `${prefix}.3`; + const dnsIp = gatewayIp; return { gatewayIp, dnsIp, - all: [gatewayIp, dnsIp], + all: [gatewayIp], }; } diff --git a/src/rootcell/providers/macos-lima-user-v2/README.md b/src/rootcell/providers/macos-lima-user-v2/README.md index bee132c..2b3afda 100644 --- a/src/rootcell/providers/macos-lima-user-v2/README.md +++ b/src/rootcell/providers/macos-lima-user-v2/README.md @@ -6,7 +6,8 @@ private Lima user-v2 network per rootcell instance. The firewall VM has public egress through Lima VZ NAT plus a private user-v2 interface. The agent VM has only the private user-v2 interface and reaches DNS, -HTTPS, SSH, and the host control path through the firewall. +HTTPS, and SSH egress through the firewall. Lima's own `limactl shell` control +path stays on VSOCK. ## Required Instance Environment @@ -114,18 +115,23 @@ Default rootcell instance allocation uses: - `.10` for the firewall VM. - `.11` for the agent VM. -- `.2` for the Lima user-v2 gateway. -- `.3` for Lima user-v2 DNS. +- `.2` for the Lima user-v2 gateway and DNS service. The firewall VM receives two network interfaces: - A Lima VZ NAT interface for public egress and host control. - A Lima user-v2 interface for private traffic from the agent. -The agent VM receives only the user-v2 interface. During startup, rootcell runs -a proof gate inside the agent that checks there is exactly one non-loopback -interface, that the default route points to the firewall, and that there is no -extra global IPv4 address or bypass route. +The agent VM receives only the user-v2 interface. It keeps a DHCP lease on that +link because Lima's VZ hostagent waits for the user-v2 lease before it opens the +VSOCK SSH control endpoint after restarts. DHCP routes and DNS are ignored; the +Rootcell static address, firewall DNS, and default route remain authoritative. +The firewall VM keeps the same route-free, DNS-free DHCP lease on its private +user-v2 interface for the same Lima VSOCK startup path. +During startup, rootcell runs a proof gate inside the agent that checks there is +exactly one non-loopback interface, that all global IPv4 addresses are on that +interface, that the Rootcell static address is present, and that there is no +default-route bypass. The host connects to the firewall through Lima's generated localhost SSH endpoint. The agent is reached through SSH ProxyJump via the firewall over the @@ -138,7 +144,7 @@ v0.0.5 template. It keeps the upstream NixOS guest contract while replacing the pieces rootcell needs to control: - `mounts: []`, so the host home directory is not mounted into either VM. -- `ssh.overVsock: true`, so host-to-firewall SSH uses Lima's local endpoint. +- `ssh.overVsock: true`, so Lima's local SSH endpoints use VSOCK. - The guest user, network interfaces, CPU, memory, and disk settings. The generated YAML pins the upstream `nixos-lima` image URLs and digests instead @@ -176,9 +182,10 @@ guests. For Intel Macs or x86 Linux guests, update these together: ## Security Notes The Lima provider writes generated YAML and keeps host filesystem mounts -disabled. The agent VM has no VZ NAT attachment and no direct host-to-agent SSH -path. Host entry goes through the firewall, and agent egress goes through the -firewall allowlist path. +disabled. The agent VM has no VZ NAT attachment and no direct host-to-agent +network SSH path. Rootcell host entry goes through the firewall, Lima's own +control endpoint uses VSOCK, and agent egress goes through the firewall +allowlist path. The provider uses Lima's normal host-side SSH identity from `LIMA_HOME/_config/user` for the initial firewall connection. Agent Git pushes use the separate SSH key diff --git a/src/rootcell/rootcell.test.ts b/src/rootcell/rootcell.test.ts index 149ae51..f18dc2e 100644 --- a/src/rootcell/rootcell.test.ts +++ b/src/rootcell/rootcell.test.ts @@ -786,7 +786,7 @@ describe("VM and network providers", () => { privateInterface: z.literal("enp0s1"), privateIp: z.literal("192.168.109.11"), gatewayIp: z.literal("192.168.109.2"), - dnsIp: z.literal("192.168.109.3"), + dnsIp: z.literal("192.168.109.2"), reservedIps: z.array(z.string()), hasEgress: z.literal(false), }).strict(), @@ -799,7 +799,7 @@ describe("VM and network providers", () => { egressInterface: z.literal("enp0s2"), privateIp: z.literal("192.168.109.10"), gatewayIp: z.literal("192.168.109.2"), - dnsIp: z.literal("192.168.109.3"), + dnsIp: z.literal("192.168.109.2"), reservedIps: z.array(z.string()), hasEgress: z.literal(true), }).strict(), @@ -818,7 +818,7 @@ describe("VM and network providers", () => { expect(plan.vms.agent.kind).toBe("lima-user-v2"); expect(plan.vms.agent.hasEgress).toBe(false); expect(plan.vms.firewall.hasEgress).toBe(true); - expect(plan.vms.agent.reservedIps).toEqual(["192.168.109.2", "192.168.109.3"]); + expect(plan.vms.agent.reservedIps).toEqual(["192.168.109.2"]); }); test("AWS EC2 provider exposes public firewall and private-only agent attachments", () => { @@ -848,12 +848,12 @@ describe("VM and network providers", () => { expect(awsVpcRouterIp(config)).toBe("192.168.109.1"); }); - test("user-v2 network plan reserves Lima gateway and DNS IPs", () => { + test("user-v2 network plan reserves Lima gateway and DNS IP", () => { const config = buildConfig("/repo", {}, fakeInstance("dev")); expect(limaUserV2ReservedIps(config)).toEqual({ gatewayIp: "192.168.109.2", - dnsIp: "192.168.109.3", - all: ["192.168.109.2", "192.168.109.3"], + dnsIp: "192.168.109.2", + all: ["192.168.109.2"], }); }); @@ -964,6 +964,14 @@ describe("VM and network providers", () => { const firewallModule = readFileSync("firewall-vm.nix", "utf8"); expect(firewallModule).toContain("systemd.network.wait-online.enable = false;"); expect(firewallModule).toContain("linkConfig.RequiredForOnline = false;"); + expect(firewallModule).toContain("Rootcell keeps DHCP routes and DNS disabled"); + expect(firewallModule).toContain("UseRoutes = false;"); + + const agentModule = readFileSync("agent-vm.nix", "utf8"); + expect(agentModule).toContain('DHCP = "ipv4";'); + expect(agentModule).toContain("UseDNS = false;"); + expect(agentModule).toContain("UseRoutes = false;"); + expect(agentModule).toContain("PreferredSource = net.agentIp;"); }); test("user-v2 proof gate rejects extra agent interfaces and default-route bypasses", () => { @@ -974,9 +982,10 @@ describe("VM and network providers", () => { agentPrivateInterface: "enp0s1", }); expect(script).toContain("find /sys/class/net -mindepth 1 -maxdepth 1 ! -name lo"); + expect(script).toContain("ip -4 addr show dev \"$iface\" | grep -q \" $agent_ip/$prefix\""); + expect(script).toContain("! ip -4 -o addr show scope global | grep -v \"^[0-9]\\+: $iface\\b\" | grep -q ."); expect(script).toContain("test \"$(ip route show default | wc -l | tr -d ' ')\" = 1"); expect(script).toContain("ip route show default | grep -q \"^default via $firewall_ip dev $iface\\b\""); - expect(script).toContain("! ip -4 -o addr show scope global | grep -v"); }); test("generated AWS EC2 Terraform keeps IAM, IMDS, tagging, and networking invariants", () => {