Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions e2e/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ func ValidateCommonLinux(ctx context.Context, s *Scenario) {
if !s.VHD.UnsupportedLocalDns && !config.Config.TestPreProvision && !s.VHDCaching {
ValidateLocalDNSService(ctx, s, "enabled")
ValidateLocalDNSResolution(ctx, s, "169.254.10.10")
ValidateLocalDNSIptablesRules(ctx, s)
ValidateLocalDNSExporterMetrics(ctx, s)
}

Expand Down
99 changes: 99 additions & 0 deletions e2e/validators.go
Original file line number Diff line number Diff line change
Expand Up @@ -1482,6 +1482,105 @@ func ValidateLocalDNSResolution(ctx context.Context, s *Scenario, server string)
assert.Contains(s.T, execResult.stdout, fmt.Sprintf("SERVER: %s", server))
}

// ValidateLocalDNSIptablesRules checks that the NOTRACK iptables rules for localdns are correctly
// applied in the raw table. These rules skip connection tracking for DNS traffic to localdns IPs
// to prevent conntrack table exhaustion on busy nodes.
func ValidateLocalDNSIptablesRules(ctx context.Context, s *Scenario) {
s.T.Helper()
script := `set -euo pipefail
echo "Checking iptables raw table for localdns NOTRACK rules..."
rules=$(sudo iptables -w -t raw -S 2>&1)
echo "$rules"

# Verify the localdns script uses iptables-restore (not legacy individual iptables calls)
if grep -q "iptables-restore" /opt/azure/containers/localdns/localdns.sh; then
echo "PASS: localdns.sh uses iptables-restore (batched rules)"
else
echo "FAIL: localdns.sh does not use iptables-restore — VHD may be outdated"
exit 1
fi

# Verify rules exist in both OUTPUT and PREROUTING chains for both protocols
for chain in OUTPUT PREROUTING; do
chain_rules=$(sudo iptables -w -t raw -S "$chain" 2>&1)
for proto in tcp udp; do
if ! echo "$chain_rules" | grep -q "\-p ${proto}.*--dport 53.*NOTRACK"; then
echo "FAIL: missing NOTRACK rule for $proto in $chain chain"
exit 1
fi
done
done

# Verify the comment tag is present (used by cleanup logic)
if ! sudo iptables -w -t raw -S | grep -q "localdns: skip conntrack"; then
echo "FAIL: localdns comment tag not found in iptables rules"
exit 1
fi

echo "PASS: all localdns NOTRACK iptables rules verified"

# Verify NOTRACK rules are functional by doing DNS lookups and checking no conntrack entries exist
echo "Verifying NOTRACK rules are functional..."

# First, flush any stale conntrack entries for localdns IPs
for ip in 169.254.10.10 169.254.10.11; do
sudo conntrack -D -d "$ip" -p udp --dport 53 2>/dev/null || true
sudo conntrack -D -d "$ip" -p tcp --dport 53 2>/dev/null || true
done

# Do DNS lookups with NOTRACK rules in place — should create NO conntrack entries
dig bing.com @169.254.10.10 +short +timeout=2 +tries=1 > /dev/null 2>&1 || true
dig bing.com @169.254.10.11 +short +timeout=2 +tries=1 > /dev/null 2>&1 || true

for ip in 169.254.10.10 169.254.10.11; do
ct_dns=$(sudo conntrack -L -d "$ip" -p udp --dport 53 2>/dev/null | wc -l)
if [ "$ct_dns" -gt 0 ]; then
echo "FAIL: found $ct_dns conntrack entries for $ip:53 — NOTRACK rules not working"
sudo conntrack -L -d "$ip" -p udp --dport 53 2>/dev/null
exit 1
fi
echo "PASS: no conntrack entries for $ip:53 with NOTRACK rules active"
done

# Negative test: temporarily drop NOTRACK rules, do a DNS lookup, and verify conntrack entries DO appear.
# This proves our conntrack check is actually capable of detecting entries.
echo "Negative test: verifying conntrack entries appear WITHOUT NOTRACK rules..."
saved_rules=$(sudo iptables -w -t raw -S | grep "localdns: skip conntrack")
sudo iptables -w -t raw -S | grep "localdns: skip conntrack" | while IFS= read -r rule; do
# Convert -A to -D to delete the rule
sudo iptables -w -t raw $(echo "$rule" | sed 's/^-A/-D/') 2>/dev/null || true
done

# Flush any leftover conntrack entries before the negative test
for ip in 169.254.10.10 169.254.10.11; do
sudo conntrack -D -d "$ip" -p udp --dport 53 2>/dev/null || true
done

# Do a DNS lookup without NOTRACK — this SHOULD create conntrack entries
dig bing.com @169.254.10.10 +short +timeout=2 +tries=1 > /dev/null 2>&1 || true

ct_dns_neg=$(sudo conntrack -L -d 169.254.10.10 -p udp --dport 53 2>/dev/null | wc -l)
echo "Conntrack entries for 169.254.10.10:53 without NOTRACK: $ct_dns_neg"

# Restore NOTRACK rules
echo "$saved_rules" | while IFS= read -r rule; do
sudo iptables -w -t raw $rule 2>/dev/null || true
done
Comment on lines +1548 to +1568
Copy link

Copilot AI Apr 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The negative-test restore path builds iptables commands by expanding the iptables -S output into $rule unquoted (sudo iptables ... $rule). Since iptables -S prints the comment as --comment "localdns: skip conntrack", unquoted expansion will split that into multiple argv tokens (quotes become literal characters), so both deletion and restoration are likely to fail and make this validator flaky (and may leave rules removed). Consider deleting by rule number (using iptables -L --line-numbers like localdns.sh cleanup does) and restoring rules via a known-safe mechanism (e.g., iptables-restore payload or re-adding rules with properly quoted args), and don’t ignore restore errors.

Copilot uses AI. Check for mistakes.

# Clean up conntrack entries created during negative test
sudo conntrack -D -d 169.254.10.10 -p udp --dport 53 2>/dev/null || true

if [ "$ct_dns_neg" -eq 0 ]; then
echo "FAIL: no conntrack entries appeared even without NOTRACK rules — conntrack check may be broken"
exit 1
fi
echo "PASS: conntrack entries appeared without NOTRACK, confirming NOTRACK enforcement is real"

echo "PASS: NOTRACK rules are functional — DNS traffic bypasses conntrack"
`
execScriptOnVMForScenarioValidateExitCode(ctx, s, script, 0, "localdns iptables NOTRACK rules validation failed")
}

// ValidateJournalctlOutput checks if specific content exists in the systemd service logs
func ValidateJournalctlOutput(ctx context.Context, s *Scenario, serviceName string, expectedContent string) {
s.T.Helper()
Expand Down
21 changes: 15 additions & 6 deletions parts/linux/cloud-init/artifacts/localdns.sh
Original file line number Diff line number Diff line change
Expand Up @@ -403,11 +403,12 @@ start_localdns() {
${COREDNS_COMMAND} &

# Wait until the PID file is created.
# Use 0.1s polling interval since CoreDNS typically creates the PID file in <100ms.
local elapsed=0
while [ ! -f "${LOCALDNS_PID_FILE}" ]; do
sleep 1
sleep 0.1
elapsed=$((elapsed + 1))
if [ "$elapsed" -ge "$START_LOCALDNS_TIMEOUT" ]; then
if [ "$elapsed" -ge "$((START_LOCALDNS_TIMEOUT * 10))" ]; then
echo "Timed out waiting for CoreDNS to create PID file at ${LOCALDNS_PID_FILE}."
return 1
fi
Expand Down Expand Up @@ -438,7 +439,7 @@ wait_for_localdns_ready() {
echo "Localdns failed to come online after $timeout_duration seconds (timeout)."
return 1
fi
sleep 1
sleep 0.1
((attempts++))
done
echo "Localdns is online and ready to serve traffic."
Expand All @@ -459,10 +460,19 @@ add_iptable_rules_to_skip_conntrack_from_pods(){
ip addr add ${LOCALDNS_CLUSTER_LISTENER_IP}/32 dev localdns

# Add IPtables rules that skip conntrack for DNS connections coming from pods.
# Use iptables-restore to batch all rules in a single lock acquisition for performance.
echo "Adding iptables rules to skip conntrack for queries to localdns."
local restore_input="*raw"
for RULE in "${IPTABLES_RULES[@]}"; do
eval "${IPTABLES}" -A "${RULE}"
# Extract chain name and remainder, insert comment after chain to match legacy display order.
local chain="${RULE%% *}"
local rule_rest="${RULE#"$chain" }"
restore_input="${restore_input}
-A ${chain} -m comment --comment \"localdns: skip conntrack\" ${rule_rest}"
done
restore_input="${restore_input}
COMMIT"
echo "${restore_input}" | iptables-restore -w --noflush
Comment thread
jingwenw15 marked this conversation as resolved.
}

# Wait for localdns IP to be removed from resolv.conf after networkctl reload.
Expand Down Expand Up @@ -822,7 +832,6 @@ replace_azurednsip_in_corefile || exit $ERR_LOCALDNS_FAIL

# Build IPtable rules.
# ---------------------------------------------------------------------------------------------------------------------
IPTABLES='iptables -w -t raw -m comment --comment "localdns: skip conntrack"'
IPTABLES_RULES=()
build_localdns_iptable_rules

Expand Down Expand Up @@ -852,7 +861,7 @@ fi
start_localdns || exit $ERR_LOCALDNS_FAIL

# Wait to direct traffic to localdns until it's ready.
wait_for_localdns_ready 60 60 || exit $ERR_LOCALDNS_FAIL
wait_for_localdns_ready 600 60 || exit $ERR_LOCALDNS_FAIL

# Disable DNS from DHCP and point the system at localdns.
# --------------------------------------------------------------------------------------------------------------------
Expand Down
19 changes: 14 additions & 5 deletions spec/parts/linux/cloud-init/artifacts/localdns_spec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,14 @@ EOF
LOCALDNS_NODE_LISTENER_IP="10.0.0.1"
LOCALDNS_CLUSTER_LISTENER_IP="10.0.0.2"
IPTABLES_RULES=("raw -t raw -p udp --dport 53 -j NOTRACK" "raw -t raw -p tcp --dport 53 -j NOTRACK")
Comment thread
jingwenw15 marked this conversation as resolved.
IPTABLES="echo iptables"
MOCK_BIN_DIR=$(mktemp -d)
cat > "${MOCK_BIN_DIR}/iptables-restore" << 'MOCK'
#!/bin/sh
echo "iptables-restore called with args: $*"
cat
Comment thread
jingwenw15 marked this conversation as resolved.
MOCK
chmod +x "${MOCK_BIN_DIR}/iptables-restore"
export PATH="${MOCK_BIN_DIR}:${PATH}"
}
BeforeEach 'setup'
#------------------------- add_iptable_rules_to_skip_conntrack_from_pods -------------------------------------
Expand All @@ -773,11 +780,14 @@ EOF
;;
esac
}
Path prepend "$(pwd)"
When call add_iptable_rules_to_skip_conntrack_from_pods
The output should include "Adding iptables rules to skip conntrack for queries to localdns."
The output should include "iptables -A raw -t raw -p udp --dport 53 -j NOTRACK"
The output should include "iptables -A raw -t raw -p tcp --dport 53 -j NOTRACK"
The output should include "*raw"
The output should include "-A raw -m comment --comment"
The output should include "-p udp"
The output should include "-p tcp"
The output should include "-j NOTRACK"
The output should include "COMMIT"
End

It 'should delete existing localdns interface'
Expand All @@ -795,7 +805,6 @@ EOF
esac
}

Path prepend "$(pwd)"
When call add_iptable_rules_to_skip_conntrack_from_pods
The output should include "Interface localdns already exists, deleting it."
The output should include "Deleting interface: link delete localdns"
Expand Down
Loading