From 106be9d810bf55398950a4928f5c12b61702ce2a Mon Sep 17 00:00:00 2001 From: Brian Taylor Date: Thu, 19 Mar 2026 12:38:09 -0700 Subject: [PATCH] fix(cluster): resolve DNS failures on systemd-resolved hosts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Docker's embedded DNS at 127.0.0.11 is only reachable from the container's own network namespace. k3s pods in child namespaces cannot reach it, causing silent DNS failures on Ubuntu and other systemd-resolved hosts where /etc/resolv.conf contains 127.0.0.53. Sniff upstream DNS resolvers from the host in the Rust bootstrap crate by reading /run/systemd/resolve/resolv.conf (systemd-resolved only — intentionally does NOT read /etc/resolv.conf to avoid bypassing Docker Desktop's DNAT proxy on macOS/Windows). Filter loopback addresses (127.x.x.x and ::1) and pass the result to the container as the UPSTREAM_DNS env var. Skip DNS sniffing for remote deploys where the local host's resolvers would be wrong. The entrypoint checks UPSTREAM_DNS first, falling back to /etc/resolv.conf inside the container for manual launches. This follows the existing pattern used by registry config, SSH gateway, GPU support, and image tags. Closes #437 Signed-off-by: Brian Taylor --- crates/openshell-bootstrap/src/docker.rs | 171 ++++++++++++++ crates/openshell-bootstrap/src/lib.rs | 1 + deploy/docker/cluster-entrypoint.sh | 41 ++++ deploy/docker/tests/test-dns-resolvers.sh | 258 ++++++++++++++++++++++ 4 files changed, 471 insertions(+) create mode 100755 deploy/docker/tests/test-dns-resolvers.sh diff --git a/crates/openshell-bootstrap/src/docker.rs b/crates/openshell-bootstrap/src/docker.rs index 9c365bfe..f5de2b9b 100644 --- a/crates/openshell-bootstrap/src/docker.rs +++ b/crates/openshell-bootstrap/src/docker.rs @@ -236,6 +236,57 @@ fn home_dir() -> Option { std::env::var("HOME").ok() } +/// Discover upstream DNS resolvers from systemd-resolved's configuration. +/// +/// Only reads `/run/systemd/resolve/resolv.conf` — the upstream resolver file +/// maintained by systemd-resolved. This file is only present on Linux hosts +/// running systemd-resolved (e.g., Ubuntu), so the function is a no-op on +/// macOS, Windows/WSL, and non-systemd Linux distributions. +/// +/// We intentionally do NOT fall back to `/etc/resolv.conf` here. On Docker +/// Desktop (macOS/Windows), `/etc/resolv.conf` may contain non-loopback +/// resolvers that appear valid but are unreachable via direct UDP from inside +/// the container's network stack. Those environments rely on the entrypoint's +/// iptables DNAT proxy to Docker's embedded DNS — sniffing host resolvers +/// would bypass that proxy and break DNS. +/// +/// Returns an empty vec if no usable resolvers are found. +fn resolve_upstream_dns() -> Vec { + let paths = ["/run/systemd/resolve/resolv.conf"]; + + for path in &paths { + if let Ok(contents) = std::fs::read_to_string(path) { + let resolvers: Vec = contents + .lines() + .filter_map(|line| { + let line = line.trim(); + if !line.starts_with("nameserver") { + return None; + } + let ip = line.split_whitespace().nth(1)?; + if ip.starts_with("127.") || ip == "::1" { + return None; + } + Some(ip.to_string()) + }) + .collect(); + + if !resolvers.is_empty() { + tracing::debug!( + "Discovered {} upstream DNS resolver(s) from {}: {}", + resolvers.len(), + path, + resolvers.join(", "), + ); + return resolvers; + } + } + } + + tracing::debug!("No upstream DNS resolvers found in host resolver config"); + Vec::new() +} + /// Create an SSH Docker client from remote options. pub async fn create_ssh_docker_client(remote: &RemoteOptions) -> Result { // Ensure destination has ssh:// prefix @@ -455,6 +506,7 @@ pub async fn ensure_container( registry_username: Option<&str>, registry_token: Option<&str>, gpu: bool, + is_remote: bool, ) -> Result<()> { let container_name = container_name(name); @@ -675,6 +727,17 @@ pub async fn ensure_container( env_vars.push("GPU_ENABLED=true".to_string()); } + // Pass upstream DNS resolvers discovered on the host so the entrypoint + // can configure k3s without probing files inside the container. + // Skip for remote deploys — the local host's resolvers are likely wrong + // for the remote Docker host (different network, split-horizon DNS, etc.). + if !is_remote { + let upstream_dns = resolve_upstream_dns(); + if !upstream_dns.is_empty() { + env_vars.push(format!("UPSTREAM_DNS={}", upstream_dns.join(","))); + } + } + let env = Some(env_vars); let config = ContainerCreateBody { @@ -1195,4 +1258,112 @@ mod tests { "should return a reasonable number of sockets" ); } + + #[test] + fn resolve_upstream_dns_filters_loopback() { + // This test validates the function runs without panic on the current host. + // The exact output depends on the host's DNS config, but loopback + // addresses must never appear in the result. + let resolvers = resolve_upstream_dns(); + for r in &resolvers { + assert!( + !r.starts_with("127."), + "IPv4 loopback should be filtered: {r}" + ); + assert_ne!(r, "::1", "IPv6 loopback should be filtered"); + } + } + + #[test] + fn resolve_upstream_dns_returns_vec() { + // Verify the function returns a vec (may be empty in some CI environments + // where no resolv.conf exists, but should never panic). + let resolvers = resolve_upstream_dns(); + assert!( + resolvers.len() <= 20, + "should return a reasonable number of resolvers" + ); + } + + /// Helper: parse resolv.conf content using the same logic as resolve_upstream_dns(). + /// Allows deterministic testing without depending on host DNS config. + fn parse_resolv_conf(contents: &str) -> Vec { + contents + .lines() + .filter_map(|line| { + let line = line.trim(); + if !line.starts_with("nameserver") { + return None; + } + let ip = line.split_whitespace().nth(1)?; + if ip.starts_with("127.") || ip == "::1" { + return None; + } + Some(ip.to_string()) + }) + .collect() + } + + #[test] + fn parse_resolv_conf_filters_ipv4_loopback() { + let input = "nameserver 127.0.0.1\nnameserver 127.0.0.53\nnameserver 127.0.0.11\n"; + assert!(parse_resolv_conf(input).is_empty()); + } + + #[test] + fn parse_resolv_conf_filters_ipv6_loopback() { + let input = "nameserver ::1\n"; + assert!(parse_resolv_conf(input).is_empty()); + } + + #[test] + fn parse_resolv_conf_passes_real_resolvers() { + let input = "nameserver 8.8.8.8\nnameserver 1.1.1.1\n"; + assert_eq!(parse_resolv_conf(input), vec!["8.8.8.8", "1.1.1.1"]); + } + + #[test] + fn parse_resolv_conf_mixed_loopback_and_real() { + let input = + "nameserver 127.0.0.53\nnameserver ::1\nnameserver 10.0.0.1\nnameserver 172.16.0.1\n"; + assert_eq!(parse_resolv_conf(input), vec!["10.0.0.1", "172.16.0.1"]); + } + + #[test] + fn parse_resolv_conf_ignores_comments_and_other_lines() { + let input = + "# nameserver 8.8.8.8\nsearch example.com\noptions ndots:5\nnameserver 1.1.1.1\n"; + assert_eq!(parse_resolv_conf(input), vec!["1.1.1.1"]); + } + + #[test] + fn parse_resolv_conf_handles_tabs_and_extra_spaces() { + let input = "nameserver\t8.8.8.8\nnameserver 1.1.1.1\n"; + assert_eq!(parse_resolv_conf(input), vec!["8.8.8.8", "1.1.1.1"]); + } + + #[test] + fn parse_resolv_conf_empty_input() { + assert!(parse_resolv_conf("").is_empty()); + assert!(parse_resolv_conf(" \n\n").is_empty()); + } + + #[test] + fn parse_resolv_conf_bare_nameserver_keyword() { + assert!(parse_resolv_conf("nameserver\n").is_empty()); + assert!(parse_resolv_conf("nameserver \n").is_empty()); + } + + #[test] + fn parse_resolv_conf_systemd_resolved_typical() { + let input = + "# This is /run/systemd/resolve/resolv.conf\nnameserver 192.168.1.1\nsearch lan\n"; + assert_eq!(parse_resolv_conf(input), vec!["192.168.1.1"]); + } + + #[test] + fn parse_resolv_conf_crlf_line_endings() { + let input = "nameserver 8.8.8.8\r\nnameserver 1.1.1.1\r\n"; + assert_eq!(parse_resolv_conf(input), vec!["8.8.8.8", "1.1.1.1"]); + } } diff --git a/crates/openshell-bootstrap/src/lib.rs b/crates/openshell-bootstrap/src/lib.rs index 9098fd4a..8e9e2e74 100644 --- a/crates/openshell-bootstrap/src/lib.rs +++ b/crates/openshell-bootstrap/src/lib.rs @@ -417,6 +417,7 @@ where registry_username.as_deref(), registry_token.as_deref(), gpu, + remote_opts.is_some(), ) .await?; start_container(&target_docker, &name).await?; diff --git a/deploy/docker/cluster-entrypoint.sh b/deploy/docker/cluster-entrypoint.sh index 84b8cf9a..0e75e67c 100644 --- a/deploy/docker/cluster-entrypoint.sh +++ b/deploy/docker/cluster-entrypoint.sh @@ -69,7 +69,46 @@ wait_for_default_route() { # 3. Adding DNAT rules so traffic to :53 reaches Docker's DNS # 4. Writing that IP into the k3s resolv.conf +# Extract upstream DNS resolvers reachable from k3s pod namespaces. +# Docker's embedded DNS (127.0.0.11) is namespace-local — DNAT to it from +# pod traffic is dropped as a martian packet. Use real upstream servers instead. +# +# Priority: +# 1. UPSTREAM_DNS env var (set by bootstrap, comma-separated) +# 2. /etc/resolv.conf (fallback for non-bootstrap launches) +get_upstream_resolvers() { + local resolvers="" + + # Bootstrap-provided resolvers (sniffed from host by the Rust bootstrap crate) + if [ -n "${UPSTREAM_DNS:-}" ]; then + resolvers=$(printf '%s\n' "$UPSTREAM_DNS" | tr ',' '\n' | \ + awk '{ip=$1; if(ip !~ /^127\./ && ip != "::1" && ip != "") print ip}') + fi + + # Fallback: Docker-generated resolv.conf may have non-loopback servers + if [ -z "$resolvers" ]; then + resolvers=$(awk '/^nameserver/{ip=$2; gsub(/\r/,"",ip); if(ip !~ /^127\./ && ip != "::1") print ip}' \ + /etc/resolv.conf) + fi + + echo "$resolvers" +} + setup_dns_proxy() { + # Prefer upstream resolvers that work across network namespaces. + # This avoids the DNAT-to-loopback problem on systemd-resolved hosts. + UPSTREAM_DNS=$(get_upstream_resolvers) + if [ -n "$UPSTREAM_DNS" ]; then + : > "$RESOLV_CONF" + echo "$UPSTREAM_DNS" | while read -r ns; do + [ -n "$ns" ] && echo "nameserver $ns" >> "$RESOLV_CONF" + done + echo "DNS: using upstream resolvers directly (avoids cross-namespace DNAT)" + cat "$RESOLV_CONF" + return 0 + fi + + # Fall back to DNAT proxy when no upstream resolvers are available. # Extract Docker's actual DNS listener ports from the DOCKER_OUTPUT chain. # Docker sets up rules like: # -A DOCKER_OUTPUT -d 127.0.0.11/32 -p udp --dport 53 -j DNAT --to-destination 127.0.0.11: @@ -160,6 +199,8 @@ verify_dns() { sleep 1 i=$((i + 1)) done + echo "Warning: DNS verification failed for $lookup_host after $attempts attempts" + echo " resolv.conf: $(head -3 "$RESOLV_CONF" 2>/dev/null)" return 1 } diff --git a/deploy/docker/tests/test-dns-resolvers.sh b/deploy/docker/tests/test-dns-resolvers.sh new file mode 100755 index 00000000..1c0abdc0 --- /dev/null +++ b/deploy/docker/tests/test-dns-resolvers.sh @@ -0,0 +1,258 @@ +#!/bin/sh +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Unit tests for the DNS resolver extraction logic in cluster-entrypoint.sh. +# +# Validates that get_upstream_resolvers() correctly filters loopback addresses +# (IPv4 127.x.x.x, IPv6 ::1) and passes through real upstream nameservers. +# +# Usage: sh deploy/docker/tests/test-dns-resolvers.sh + +set -eu + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" + +_PASS=0 +_FAIL=0 + +pass() { + _PASS=$((_PASS + 1)) + printf ' PASS: %s\n' "$1" +} + +fail() { + _FAIL=$((_FAIL + 1)) + printf ' FAIL: %s\n' "$1" >&2 + if [ -n "${2:-}" ]; then + printf ' %s\n' "$2" >&2 + fi +} + +assert_eq() { + _actual="$1" + _expected="$2" + _label="$3" + + if [ "$_actual" = "$_expected" ]; then + pass "$_label" + else + fail "$_label" "expected '$_expected', got '$_actual'" + fi +} + +assert_contains() { + _haystack="$1" + _needle="$2" + _label="$3" + + if printf '%s' "$_haystack" | grep -qF "$_needle"; then + pass "$_label" + else + fail "$_label" "expected '$_needle' in output '$_haystack'" + fi +} + +assert_not_contains() { + _haystack="$1" + _needle="$2" + _label="$3" + + if printf '%s' "$_haystack" | grep -qF "$_needle"; then + fail "$_label" "unexpected '$_needle' found in output '$_haystack'" + else + pass "$_label" + fi +} + +assert_empty() { + _val="$1" + _label="$2" + + if [ -z "$_val" ]; then + pass "$_label" + else + fail "$_label" "expected empty, got '$_val'" + fi +} + +# The awk filter extracted from cluster-entrypoint.sh. Tested in isolation +# so we don't need root, iptables, or a running container. +filter_resolvers() { + awk '/^nameserver/{ip=$2; if(ip !~ /^127\./ && ip != "::1") print ip}' +} + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +test_filters_ipv4_loopback() { + printf 'TEST: filters IPv4 loopback addresses\n' + + input="nameserver 127.0.0.1 +nameserver 127.0.0.11 +nameserver 127.0.0.53 +nameserver 127.1.2.3" + result=$(printf '%s\n' "$input" | filter_resolvers) + assert_empty "$result" "all 127.x.x.x addresses filtered" +} + +test_filters_ipv6_loopback() { + printf 'TEST: filters IPv6 loopback address\n' + + input="nameserver ::1" + result=$(printf '%s\n' "$input" | filter_resolvers) + assert_empty "$result" "::1 filtered" +} + +test_passes_real_ipv4() { + printf 'TEST: passes real IPv4 nameservers\n' + + input="nameserver 8.8.8.8 +nameserver 8.8.4.4 +nameserver 1.1.1.1" + result=$(printf '%s\n' "$input" | filter_resolvers) + assert_contains "$result" "8.8.8.8" "passes 8.8.8.8" + assert_contains "$result" "8.8.4.4" "passes 8.8.4.4" + assert_contains "$result" "1.1.1.1" "passes 1.1.1.1" +} + +test_passes_real_ipv6() { + printf 'TEST: passes real IPv6 nameservers\n' + + input="nameserver 2001:4860:4860::8888 +nameserver fd00::1" + result=$(printf '%s\n' "$input" | filter_resolvers) + assert_contains "$result" "2001:4860:4860::8888" "passes Google IPv6 DNS" + assert_contains "$result" "fd00::1" "passes ULA IPv6 address" +} + +test_mixed_loopback_and_real() { + printf 'TEST: filters loopback, keeps real in mixed config\n' + + input="nameserver 127.0.0.53 +nameserver ::1 +nameserver 10.0.0.1 +nameserver 172.16.0.1" + result=$(printf '%s\n' "$input" | filter_resolvers) + assert_not_contains "$result" "127.0.0.53" "127.0.0.53 filtered" + assert_not_contains "$result" "::1" "::1 filtered" + assert_contains "$result" "10.0.0.1" "10.0.0.1 kept" + assert_contains "$result" "172.16.0.1" "172.16.0.1 kept" +} + +test_systemd_resolved_typical() { + printf 'TEST: typical systemd-resolved upstream config\n' + + # /run/systemd/resolve/resolv.conf typically looks like this + input="# This is /run/systemd/resolve/resolv.conf managed by man:systemd-resolved(8). +nameserver 192.168.1.1 +search lan" + result=$(printf '%s\n' "$input" | filter_resolvers) + assert_eq "$result" "192.168.1.1" "extracts router DNS from systemd-resolved" +} + +test_docker_embedded_dns() { + printf 'TEST: Docker embedded DNS (127.0.0.11) filtered\n' + + input="nameserver 127.0.0.11 +search openshell_default" + result=$(printf '%s\n' "$input" | filter_resolvers) + assert_empty "$result" "Docker 127.0.0.11 filtered" +} + +test_ignores_non_nameserver_lines() { + printf 'TEST: ignores comments, search, options lines\n' + + input="# nameserver 8.8.8.8 +search example.com +options ndots:5 +nameserver 1.1.1.1" + result=$(printf '%s\n' "$input" | filter_resolvers) + assert_eq "$result" "1.1.1.1" "only real nameserver line extracted" +} + +test_empty_input() { + printf 'TEST: empty input returns empty\n' + + result=$(printf '' | filter_resolvers) + assert_empty "$result" "empty input produces empty output" +} + +test_no_command_injection() { + printf 'TEST: malicious resolv.conf entries are not executed\n' + + # These should be extracted as literal strings by awk, not executed + input='nameserver $(rm -rf /) +nameserver 8.8.8.8 +nameserver ; echo pwned +nameserver `id`' + result=$(printf '%s\n' "$input" | filter_resolvers) + # awk $2 splits on whitespace: "$(rm" is $2 for line 1, ";" for line 3 + # None of these are executed — they're just strings + assert_contains "$result" "8.8.8.8" "real resolver preserved" + assert_not_contains "$result" "pwned" "no command injection" +} + +# --------------------------------------------------------------------------- +# UPSTREAM_DNS env var tests +# --------------------------------------------------------------------------- +# Note: these test the tr/awk pipeline in isolation rather than the full +# get_upstream_resolvers() function, which requires the entrypoint environment. +# The pipeline logic is identical; this validates the parsing and filtering. + +test_upstream_dns_env_var() { + printf 'TEST: UPSTREAM_DNS env var consumed\n' + result=$(UPSTREAM_DNS="8.8.8.8,1.1.1.1" printf '%s\n' "8.8.8.8,1.1.1.1" | tr ',' '\n' | \ + awk '{ip=$1; if(ip !~ /^127\./ && ip != "::1" && ip != "") print ip}') + assert_contains "$result" "8.8.8.8" "first resolver from env var" + assert_contains "$result" "1.1.1.1" "second resolver from env var" +} + +test_upstream_dns_env_filters_loopback() { + printf 'TEST: UPSTREAM_DNS env var filters loopback\n' + result=$(printf '%s\n' "127.0.0.1,8.8.8.8,::1,1.1.1.1" | tr ',' '\n' | \ + awk '{ip=$1; if(ip !~ /^127\./ && ip != "::1" && ip != "") print ip}') + assert_not_contains "$result" "127.0.0.1" "IPv4 loopback filtered from env var" + assert_not_contains "$result" "::1" "IPv6 loopback filtered from env var" + assert_contains "$result" "8.8.8.8" "real IPv4 kept from env var" + assert_contains "$result" "1.1.1.1" "real IPv4 kept from env var" +} + +test_upstream_dns_env_empty() { + printf 'TEST: empty UPSTREAM_DNS falls through\n' + result=$(printf '' | tr ',' '\n' | \ + awk '{ip=$1; if(ip !~ /^127\./ && ip != "::1" && ip != "") print ip}') + assert_empty "$result" "empty env var produces no output" +} + +test_upstream_dns_env_single() { + printf 'TEST: single resolver in UPSTREAM_DNS\n' + result=$(printf '%s\n' "10.0.0.1" | tr ',' '\n' | \ + awk '{ip=$1; if(ip !~ /^127\./ && ip != "::1" && ip != "") print ip}') + assert_eq "$result" "10.0.0.1" "single resolver extracted" +} + +# --------------------------------------------------------------------------- +# Run all tests +# --------------------------------------------------------------------------- + +printf '=== DNS resolver filter tests ===\n\n' + +test_filters_ipv4_loopback +test_filters_ipv6_loopback +test_passes_real_ipv4 +test_passes_real_ipv6 +test_mixed_loopback_and_real +test_systemd_resolved_typical +test_docker_embedded_dns +test_ignores_non_nameserver_lines +test_empty_input +test_no_command_injection +test_upstream_dns_env_var +test_upstream_dns_env_filters_loopback +test_upstream_dns_env_empty +test_upstream_dns_env_single + +printf '\n=== Results: %d passed, %d failed ===\n' "$_PASS" "$_FAIL" +[ "$_FAIL" -eq 0 ]