From 24bdc6cc37bdd9a3c91900cf61e477207517cb1f Mon Sep 17 00:00:00 2001
From: crazydi4mond <255249920+crazydi4mond@users.noreply.github.com>
Date: Wed, 1 Apr 2026 22:15:08 +0200
Subject: [PATCH 01/19] feat: tune session defaults and hide
 session-check-interval flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- idle-timeout: 60s → 10s, keepalive: 10s → 2s, session-check-interval: 20s → 500ms
- hide session-check-interval from help output, README, and man page
- improve README explanation of keepalive/idle-timeout relationship
- fix server queue-size minimum validation (0 → 32)
---
 README.md             | 19 +++++++++----------
 client/client.go      |  6 +++---
 man/vaydns-client.1   |  5 -----
 vaydns-client/main.go | 18 +++++++++++++++++-
 vaydns-server/main.go |  8 ++++----
 5 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/README.md b/README.md
index f0beeb5..bde923c 100644
--- a/README.md
+++ b/README.md
@@ -130,8 +130,8 @@ sudo ip6tables -t nat -I PREROUTING -i eth0 -p udp --dport 53 -j REDIRECT --to-p
 | `-privkey HEX`       | Server private key as hex string                                  | —          |
 | `-gen-key`           | Generate a new keypair and exit                                   | —          |
 | `-mtu N`             | Max UDP payload size for responses                                | `1232`     |
-| `-idle-timeout D`    | Session idle timeout (must match client)                          | `60s`      |
-| `-keepalive D`       | Keepalive ping interval (must match client, must be < idle-timeout) | `10s`      |
+| `-idle-timeout D`    | Session idle timeout (must match client)                          | `10s`      |
+| `-keepalive D`       | Keepalive ping interval (must match client, must be < idle-timeout) | `2s`      |
 | `-fallback ADDR`     | UDP endpoint to forward non-DNS packets to (e.g. `127.0.0.1:8888`) | —          |
 | `-dnstt-compat`      | Use original dnstt wire format (8-byte ClientID, padding prefixes). Also sets `-idle-timeout` to 2m and `-keepalive` to 10s unless explicitly overridden. | `false`    |
 | `-clientid-size N`   | ClientID size in bytes (ignored when `-dnstt-compat` is set)       | `2`        |
@@ -164,17 +164,16 @@ sudo ip6tables -t nat -I PREROUTING -i eth0 -p udp --dport 53 -j REDIRECT --to-p
 
 | Flag                        | Description                                        | Default |
 | --------------------------- | -------------------------------------------------- | ------- |
-| `-idle-timeout D`           | Session idle timeout (must match server)                                                    | `60s`   |
-| `-keepalive D`              | Keepalive ping interval (must match server, must be < idle-timeout)                         | `10s`   |
+| `-idle-timeout D`           | Session idle timeout (must match server)                                                    | `10s`   |
+| `-keepalive D`              | Keepalive ping interval (must match server, must be < idle-timeout)                         | `2s`   |
 | `-max-streams N`            | Max concurrent streams per session (0 = unlimited)                                          | `0`   |
 | `-open-stream-timeout D`    | Timeout for opening an smux stream                                                          | `10s`   |
 | `-reconnect-min D`          | Initial backoff delay for session reconnect                                                  | `1s`    |
 | `-reconnect-max D`          | Max backoff delay (must be >= reconnect-min)                                                 | `30s`   |
-| `-session-check-interval D` | How often to check if the session is alive (should be shorter than idle-timeout)              | `20s`   |
 
-> **Note:** `idle-timeout` and `keepalive` must be set to the same values on both client and server — mismatched values will cause one side to close the session before the other detects it. Keep `keepalive` well below `idle-timeout` (the default 6x ratio allows ~6 ping attempts before timeout).
+> **Note:** `idle-timeout` and `keepalive` must be set to the same values on both client and server — mismatched values will cause one side to close the session before the other detects it. Keep `keepalive` well below `idle-timeout` (the default 5x ratio allows ~5 ping attempts before timeout).
 >
-> `session-check-interval` controls how quickly the client detects a dead session and starts reconnecting — it does not affect when the session dies. A lower value means faster reconnection but can cause unnecessary churn on lossy networks. It does not need to match on client and server.
+> **How they relate:** `keepalive` controls how often smux sends ping frames to prove the session is alive. `idle-timeout` is how long smux waits with no received data (including pings) before declaring the session dead — it applies symmetrically on both sides.
 
 #### UDP transport tuning
 
@@ -393,14 +392,14 @@ On both client and server, `-dnstt-compat` switches to the original dnstt wire f
 | Setting | VayDNS default | With `-dnstt-compat` | Applies to |
 | ------- | -------------- | -------------------- | ---------- |
 | `-max-qname-len` | `101` | `253` | client |
-| `-idle-timeout` | `60s` | `2m` | client and server |
-| `-keepalive` | `10s` | `10s` | client and server |
+| `-idle-timeout` | `10s` | `2m` | client and server |
+| `-keepalive` | `2s` | `10s` | client and server |
 
 All three can be explicitly overridden even when `-dnstt-compat` is set — the flag only changes the defaults, it does not lock the values. For example, `-dnstt-compat -idle-timeout 30s` uses the dnstt wire format with a 30-second idle timeout.
 
 > **Note:** `-dnstt-compat` forces `-record-type` to `txt` (with a warning if another type was set). dnstt only supports TXT records, so other record types are incompatible.
 >
-> The timeout defaults are critical for interop with original dnstt binaries. dnstt uses a 10-second keepalive interval (smux default) and a 2-minute idle timeout. Setting `-idle-timeout` below 10s in compat mode will cause sessions to churn because dnstt peers only send keepalives every 10 seconds. When mixing with dnstt, keep the compat defaults unless you know what you're doing.
+> The timeout defaults are critical for interop with original dnstt binaries. dnstt uses a 10-second keepalive interval (smux default) and a 2-minute idle timeout. Setting `-idle-timeout` below 10s in compat mode will cause sessions to churn because dnstt peers only send keepalives every 10 seconds. When connecting to dnstt, keep the compat defaults unless you know what you're doing.
 
 ### Record types
 
diff --git a/client/client.go b/client/client.go
index 5e1b754..5c4b713 100644
--- a/client/client.go
+++ b/client/client.go
@@ -41,12 +41,12 @@ import (
 
 // Default timeouts for VayDNS mode.
 const (
-	DefaultIdleTimeout          = 60 * time.Second
-	DefaultKeepAlive            = 10 * time.Second
+	DefaultIdleTimeout          = 10 * time.Second
+	DefaultKeepAlive            = 2 * time.Second
 	DefaultOpenStreamTimeout    = 10 * time.Second
 	DefaultReconnectDelay       = 1 * time.Second
 	DefaultReconnectMaxDelay    = 30 * time.Second
-	DefaultSessionCheckInterval = 20 * time.Second
+	DefaultSessionCheckInterval = 500 * time.Millisecond
 	DefaultUDPResponseTimeout   = 500 * time.Millisecond
 	DefaultUDPWorkers           = 100
 	DefaultMaxStreams           = 0 // unlimited
diff --git a/man/vaydns-client.1 b/man/vaydns-client.1
index c277d78..0b92691 100644
--- a/man/vaydns-client.1
+++ b/man/vaydns-client.1
@@ -182,11 +182,6 @@ Must be >= reconnect-min.
 Default:
 .Cm 30s .
 
-.It Fl session-check-interval Ar DURATION
-How often to check whether the current session is alive.
-Default:
-.Cm 500ms .
-
 .It Fl rps Ar N
 Rate limit outgoing DNS queries to
 .Ar N
diff --git a/vaydns-client/main.go b/vaydns-client/main.go
index f6e9ac5..212e5bf 100644
--- a/vaydns-client/main.go
+++ b/vaydns-client/main.go
@@ -68,7 +68,23 @@ Examples:
   %[1]s -dot resolver.example:853 -pubkey-file server.pub -domain t.example.com -listen 127.0.0.1:7000
 
 `, os.Args[0])
-		flag.PrintDefaults()
+		flag.CommandLine.VisitAll(func(f *flag.Flag) {
+			if f.Name == "session-check-interval" {
+				return
+			}
+			fmt.Fprintf(flag.CommandLine.Output(), "  -%s", f.Name)
+			name, usage := flag.UnquoteUsage(f)
+			if len(name) > 0 {
+				fmt.Fprintf(flag.CommandLine.Output(), " %s", name)
+			}
+			if len(f.DefValue) > 0 {
+				fmt.Fprintf(flag.CommandLine.Output(), " (default %s)", f.DefValue)
+			}
+			if len(usage) > 0 {
+				fmt.Fprintf(flag.CommandLine.Output(), "\n    \t%s", usage)
+			}
+			fmt.Fprint(flag.CommandLine.Output(), "\n")
+		})
 		labels := make([]string, 0)
 		labels = append(labels, "none")
 		for _, entry := range client.UTLSClientHelloIDMap() {
diff --git a/vaydns-server/main.go b/vaydns-server/main.go
index ae8cbca..7fb3ec1 100644
--- a/vaydns-server/main.go
+++ b/vaydns-server/main.go
@@ -73,8 +73,8 @@ import (
 )
 
 const (
-	defaultIdleTimeout = 60 * time.Second
-	defaultKeepAlive   = 10 * time.Second
+	defaultIdleTimeout = 10 * time.Second
+	defaultKeepAlive   = 2 * time.Second
 	// Bound the pre-smux handshake so half-open KCP sessions cannot linger
 	// indefinitely and consume server resources.
 	defaultHandshakeTimeout = 15 * time.Second
@@ -1395,8 +1395,8 @@ Example:
 			fmt.Fprintf(os.Stderr, "-keepalive (%s) must be less than -idle-timeout (%s)\n", keepAlive, idleTimeout)
 			os.Exit(1)
 		}
-		if queueSize <= 0 {
-			fmt.Fprintf(os.Stderr, "-queue-size (%d) must be greater than 0\n", queueSize)
+		if queueSize < 32 {
+			fmt.Fprintf(os.Stderr, "-queue-size (%d) must be at least 32\n", queueSize)
 			os.Exit(1)
 		}
 		if kcpWindowSize < 0 {

From 280e389abd2f0a9cf351f68c94166e6b119eadcf Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 1 Apr 2026 22:27:45 +0200
Subject: [PATCH 02/19] chore(main): release 0.2.7 (#61)

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 .release-please-manifest.json |  2 +-
 CHANGELOG.md                  | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 954b159..054e8bc 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.2.6"
+  ".": "0.2.7"
 }
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 439342b..fb45f67 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,18 @@
 # Changelog
 
+## [0.2.7](https://github.com/net2share/vaydns/compare/v0.2.6...v0.2.7) (2026-04-01)
+
+
+### Features
+
+* tune session defaults and hide session-check-interval flag ([24bdc6c](https://github.com/net2share/vaydns/commit/24bdc6cc37bdd9a3c91900cf61e477207517cb1f))
+
+
+### Bug Fixes
+
+* **client:** make max streams unlimited by default ([#63](https://github.com/net2share/vaydns/issues/63)) ([4cc8228](https://github.com/net2share/vaydns/commit/4cc8228190d415cc75a32f59d83f7b77ae2af68b))
+* **server:** clarify server accept session/stream warning logs ([#57](https://github.com/net2share/vaydns/issues/57)) ([654b2f1](https://github.com/net2share/vaydns/commit/654b2f1f3dbde6001e19c9f8391436c28e55eca9))
+
 ## [0.2.6](https://github.com/net2share/vaydns/compare/v0.2.5...v0.2.6) (2026-03-29)
 
 

From c4150b272c230a261b4a19ac08057a7b2dcd7a71 Mon Sep 17 00:00:00 2001
From: crazydi4mond <255249920+crazydi4mond@users.noreply.github.com>
Date: Wed, 1 Apr 2026 22:42:34 +0200
Subject: [PATCH 03/19] docs(server): update stale dnstt references in package
 doc comments

---
 vaydns-server/main.go | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/vaydns-server/main.go b/vaydns-server/main.go
index 7fb3ec1..32b61c4 100644
--- a/vaydns-server/main.go
+++ b/vaydns-server/main.go
@@ -1,26 +1,26 @@
-// dnstt-server is the server end of a DNS tunnel.
+// vaydns-server is the server end of a DNS tunnel.
 //
 // Usage:
 //
-//	dnstt-server -gen-key [-privkey-file PRIVKEYFILE] [-pubkey-file PUBKEYFILE]
-//	dnstt-server -udp ADDR [-privkey PRIVKEY|-privkey-file PRIVKEYFILE] [-fallback FALLBACKADDR] -domain DOMAIN -upstream UPSTREAMADDR
+//	vaydns-server -gen-key [-privkey-file PRIVKEYFILE] [-pubkey-file PUBKEYFILE]
+//	vaydns-server -udp ADDR [-privkey PRIVKEY|-privkey-file PRIVKEYFILE] [-fallback FALLBACKADDR] -domain DOMAIN -upstream UPSTREAMADDR
 //
 // Example:
 //
-//	dnstt-server -gen-key -privkey-file server.key -pubkey-file server.pub
-//	dnstt-server -udp :53 -privkey-file server.key -domain t.example.com -upstream 127.0.0.1:8000
+//	vaydns-server -gen-key -privkey-file server.key -pubkey-file server.pub
+//	vaydns-server -udp :53 -privkey-file server.key -domain t.example.com -upstream 127.0.0.1:8000
 //
 // With fallback for non-DNS traffic:
 //
-//	dnstt-server -udp :53 -privkey-file server.key -fallback 127.0.0.1:8888 -domain t.example.com -upstream 127.0.0.1:8000
+//	vaydns-server -udp :53 -privkey-file server.key -fallback 127.0.0.1:8888 -domain t.example.com -upstream 127.0.0.1:8000
 //
 // To generate a persistent server private key, first run with the -gen-key
 // option. By default the generated private and public keys are printed to
 // standard output. To save them to files instead, use the -privkey-file and
 // -pubkey-file options.
 //
-//	dnstt-server -gen-key
-//	dnstt-server -gen-key -privkey-file server.key -pubkey-file server.pub
+//	vaydns-server -gen-key
+//	vaydns-server -gen-key -privkey-file server.key -pubkey-file server.pub
 //
 // You can give the server's private key as a file or as a hex string.
 //

From f0337b7daa907330dae8ca2e5a01b737ca70f95e Mon Sep 17 00:00:00 2001
From: crazydi4mond <255249920+crazydi4mond@users.noreply.github.com>
Date: Fri, 3 Apr 2026 18:40:42 +0200
Subject: [PATCH 04/19] docs(client): add missing fields and fix stale defaults
 in client library doc

- Add RoundTripper, DialerControl, UDPAcceptErrors, RecordType, OpenStreamTimeout
- Fix SessionCheckInterval default (20s -> 500ms) and HandshakeTimeout (30s -> 15s)
---
 client/client.go       |  4 ++--
 docs/client-library.md | 11 ++++++++---
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/client/client.go b/client/client.go
index 5c4b713..6bc6acc 100644
--- a/client/client.go
+++ b/client/client.go
@@ -200,8 +200,8 @@ type Tunnel struct {
 	MaxStreams           int                           // default: 0 (0 = unlimited)
 	ReconnectMinDelay    time.Duration                 // default: 1s
 	ReconnectMaxDelay    time.Duration                 // default: 30s
-	SessionCheckInterval time.Duration                 // default: 20s
-	HandshakeTimeout     time.Duration                 // default: 30s
+	SessionCheckInterval time.Duration                 // default: 500ms
+	HandshakeTimeout     time.Duration                 // default: 15s
 	PacketQueueSize      int                           // default: QueueSize (512)
 	KCPWindowSize        int                           // default: PacketQueueSize/2
 	QueueOverflowMode    turbotunnel.QueueOverflowMode // default: drop
diff --git a/docs/client-library.md b/docs/client-library.md
index cc1f84d..25514ec 100644
--- a/docs/client-library.md
+++ b/docs/client-library.md
@@ -71,9 +71,12 @@ All configuration is done through struct fields before calling `Initiate*` or `L
 ```go
 // Resolver options
 r.UTLSClientHelloID = &utls.ClientHelloID{...} // TLS fingerprint
+r.RoundTripper = customTransport                // custom HTTP transport for DoH (overrides UTLSClientHelloID)
+r.DialerControl = controlFunc                   // socket options callback (SO_MARK, SO_BINDTODEVICE, etc.)
 r.UDPWorkers = 200                              // concurrent UDP workers
-r.UDPSharedSocket = true                         // single socket mode
-r.UDPTimeout = 500 * time.Millisecond            // per-query timeout
+r.UDPSharedSocket = true                        // single socket mode
+r.UDPTimeout = 500 * time.Millisecond           // per-query timeout
+r.UDPAcceptErrors = true                        // accept non-NOERROR responses (disables forged filtering)
 
 // Tunnel server options
 ts.DnsttCompat = true    // original dnstt wire format
@@ -81,12 +84,14 @@ ts.ClientIDSize = 1      // smaller ClientID
 ts.MaxQnameLen = 101     // QNAME length constraint
 ts.MaxNumLabels = 2      // label count constraint
 ts.RPS = 200             // rate limit queries/second
+ts.RecordType = "cname"  // DNS record type for downstream data (default: "txt")
 
 // Session options
 t.IdleTimeout = 60 * time.Second
 t.KeepAlive = 10 * time.Second
+t.OpenStreamTimeout = 10 * time.Second
 t.MaxStreams = 256
-t.SessionCheckInterval = 20 * time.Second
+t.SessionCheckInterval = 500 * time.Millisecond
 t.ReconnectMinDelay = 1 * time.Second
 t.ReconnectMaxDelay = 30 * time.Second
 t.HandshakeTimeout = 15 * time.Second

From 056810f63c5c479e20bd65c5de5da9d247f28e63 Mon Sep 17 00:00:00 2001
From: crazydi4mond <255249920+crazydi4mond@users.noreply.github.com>
Date: Sat, 4 Apr 2026 19:33:20 +0200
Subject: [PATCH 05/19] build: add CGO_ENABLED=0 to CI and release workflows

Dockerfile already had it; CI and release-please builds did not,
producing dynamically linked binaries unnecessarily.
---
 .github/workflows/ci.yml             | 3 ++-
 .github/workflows/release-please.yml | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5356bef..5f81ef2 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -19,7 +19,7 @@ jobs:
           go-version: '1.24'
 
       - name: Build
-        run: go build -v ./...
+        run: CGO_ENABLED=0 go build -v ./...
 
       - name: Test
         run: go test -v ./...
@@ -60,6 +60,7 @@ jobs:
 
       - name: Build
         env:
+          CGO_ENABLED: '0'
           GOOS: ${{ matrix.goos }}
           GOARCH: ${{ matrix.goarch }}
           GOARM: ${{ matrix.goarm }}
diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml
index a968f49..0fd2657 100644
--- a/.github/workflows/release-please.yml
+++ b/.github/workflows/release-please.yml
@@ -68,6 +68,7 @@ jobs:
 
       - name: Build
         env:
+          CGO_ENABLED: '0'
           GOOS: ${{ matrix.goos }}
           GOARCH: ${{ matrix.goarch }}
           GOARM: ${{ matrix.goarm }}

From 35331484bce1b628372dc37accae4e96b507841f Mon Sep 17 00:00:00 2001
From: crazydi4mond <255249920+crazydi4mond@users.noreply.github.com>
Date: Sat, 4 Apr 2026 19:51:00 +0200
Subject: [PATCH 06/19] feat: add -v flag for printing version (#71)

- inject version via ldflags in CI, release, and Docker builds
- defaults to "dev" for local builds
---
 .github/workflows/ci.yml             | 8 +++++---
 .github/workflows/release-please.yml | 7 +++++--
 Dockerfile                           | 5 +++--
 vaydns-client/main.go                | 9 +++++++++
 vaydns-server/main.go                | 9 +++++++++
 5 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5f81ef2..9ded57a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -19,7 +19,7 @@ jobs:
           go-version: '1.24'
 
       - name: Build
-        run: CGO_ENABLED=0 go build -v ./...
+        run: CGO_ENABLED=0 go build -v -ldflags="-X main.version=${GITHUB_SHA::7}" ./...
 
       - name: Test
         run: go test -v ./...
@@ -70,8 +70,9 @@ jobs:
             ARCH="armv7"
           fi
           SUFFIX="${{ matrix.suffix }}"
-          go build -trimpath -ldflags="-s -w" -o "vaydns-client-${{ matrix.goos }}-${ARCH}${SUFFIX}" ./vaydns-client
-          go build -trimpath -ldflags="-s -w" -o "vaydns-server-${{ matrix.goos }}-${ARCH}${SUFFIX}" ./vaydns-server
+          VERSION="${GITHUB_SHA::7}"
+          go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" -o "vaydns-client-${{ matrix.goos }}-${ARCH}${SUFFIX}" ./vaydns-client
+          go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" -o "vaydns-server-${{ matrix.goos }}-${ARCH}${SUFFIX}" ./vaydns-server
 
       - name: Upload artifacts
         uses: actions/upload-artifact@v4
@@ -119,5 +120,6 @@ jobs:
           context: .
           file: Dockerfile
           push: true
+          build-args: VERSION=dev-${{ env.SHORT_SHA }}
           tags: ghcr.io/${{ github.repository }}:dev-${{ env.SHORT_SHA }}
 
diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml
index 0fd2657..6b2ba35 100644
--- a/.github/workflows/release-please.yml
+++ b/.github/workflows/release-please.yml
@@ -78,8 +78,10 @@ jobs:
             ARCH="armv7"
           fi
           SUFFIX="${{ matrix.suffix }}"
-          go build -trimpath -ldflags="-s -w" -o "vaydns-client-${{ matrix.goos }}-${ARCH}${SUFFIX}" ./vaydns-client
-          go build -trimpath -ldflags="-s -w" -o "vaydns-server-${{ matrix.goos }}-${ARCH}${SUFFIX}" ./vaydns-server
+          TAG="${{ needs.release-please.outputs.tag_name }}"
+          VERSION="${TAG#v}"
+          go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" -o "vaydns-client-${{ matrix.goos }}-${ARCH}${SUFFIX}" ./vaydns-client
+          go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" -o "vaydns-server-${{ matrix.goos }}-${ARCH}${SUFFIX}" ./vaydns-server
           chmod +x vaydns-client-* vaydns-server-*
 
       - name: Upload artifact
@@ -148,6 +150,7 @@ jobs:
           context: .
           file: Dockerfile
           push: true
+          build-args: VERSION=${{ steps.version.outputs.VERSION }}
           tags: |
             ghcr.io/${{ github.repository }}:${{ steps.version.outputs.VERSION }}
             ghcr.io/${{ github.repository }}:latest
diff --git a/Dockerfile b/Dockerfile
index bb2b842..d948aad 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -5,8 +5,9 @@ COPY go.mod go.sum ./
 RUN go mod download
 COPY . .
 
-RUN CGO_ENABLED=0 go build -trimpath -ldflags="-s -w" -o /vaydns-server ./vaydns-server
-RUN CGO_ENABLED=0 go build -trimpath -ldflags="-s -w" -o /vaydns-client ./vaydns-client
+ARG VERSION=dev
+RUN CGO_ENABLED=0 go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" -o /vaydns-server ./vaydns-server
+RUN CGO_ENABLED=0 go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" -o /vaydns-client ./vaydns-client
 
 FROM alpine
 RUN apk add --no-cache curl
diff --git a/vaydns-client/main.go b/vaydns-client/main.go
index 212e5bf..c37cff2 100644
--- a/vaydns-client/main.go
+++ b/vaydns-client/main.go
@@ -20,6 +20,8 @@ import (
 	log "github.com/sirupsen/logrus"
 )
 
+var version = "dev"
+
 func readKeyFromFile(filename string) ([]byte, error) {
 	f, err := os.Open(filename)
 	if err != nil {
@@ -30,6 +32,7 @@ func readKeyFromFile(filename string) ([]byte, error) {
 }
 
 func main() {
+	var showVersion bool
 	var dohURL string
 	var dotAddr string
 	var domainArg string
@@ -140,8 +143,14 @@ Known TLS fingerprints for -utls are:
 
 	var logLevel string
 	flag.StringVar(&logLevel, "log-level", "info", "log level (debug, info, warning, error)")
+	flag.BoolVar(&showVersion, "v", false, "print version and exit")
 	flag.Parse()
 
+	if showVersion {
+		fmt.Println(version)
+		os.Exit(0)
+	}
+
 	level, err := log.ParseLevel(logLevel)
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "invalid log level: %s\n", logLevel)
diff --git a/vaydns-server/main.go b/vaydns-server/main.go
index 32b61c4..e043251 100644
--- a/vaydns-server/main.go
+++ b/vaydns-server/main.go
@@ -1194,7 +1194,10 @@ func run(privkey []byte, domain dns.Name, upstream string, dnsConn net.PacketCon
 	return recvLoop(domain, dnsConn, ttConn, ch, fallbackMgr, stats, wireConfig)
 }
 
+var version = "dev"
+
 func main() {
+	var showVersion bool
 	var genKey bool
 	var domainArg string
 	var upstream string
@@ -1250,8 +1253,14 @@ Example:
 
 	var logLevel string
 	flag.StringVar(&logLevel, "log-level", "info", "log level (debug, info, warning, error)")
+	flag.BoolVar(&showVersion, "v", false, "print version and exit")
 	flag.Parse()
 
+	if showVersion {
+		fmt.Println(version)
+		os.Exit(0)
+	}
+
 	level, err := log.ParseLevel(logLevel)
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "invalid log level: %s\n", logLevel)

From 8354db2a080ce9543594bc4e71592f33e6489d82 Mon Sep 17 00:00:00 2001
From: ebpfx <262974424+ebpfx@users.noreply.github.com>
Date: Sat, 11 Apr 2026 01:02:27 +0330
Subject: [PATCH 07/19] feat: add NULL and CAA record type support and fix
 server EDNS mtu advertisement (#70)

- add `NULL` and `CAA` to record type parsing and CLI flags
- add `NULL` and `CAA` encode/decode support on client and server
- reuse the existing single-RR MTU calculation path for `TXT`, `NULL`, and `CAA`
- keep `-mtu` warning only for name-limited record types: `CNAME`, `NS`, `MX`, and `SRV`
- use the configured server UDP payload limit in the response OPT RR instead of always advertising `4096`
---
 README.md             |  6 ++++--
 client/dns.go         | 20 ++++++++++++--------
 dns/dns.go            | 42 +++++++++++++++++++++++++++++++++++++++++-
 vaydns-client/main.go |  2 +-
 vaydns-server/main.go | 30 ++++++++++++++++++++++--------
 5 files changed, 80 insertions(+), 20 deletions(-)

diff --git a/README.md b/README.md
index bde923c..b002a90 100644
--- a/README.md
+++ b/README.md
@@ -135,7 +135,7 @@ sudo ip6tables -t nat -I PREROUTING -i eth0 -p udp --dport 53 -j REDIRECT --to-p
 | `-fallback ADDR`     | UDP endpoint to forward non-DNS packets to (e.g. `127.0.0.1:8888`) | —          |
 | `-dnstt-compat`      | Use original dnstt wire format (8-byte ClientID, padding prefixes). Also sets `-idle-timeout` to 2m and `-keepalive` to 10s unless explicitly overridden. | `false`    |
 | `-clientid-size N`   | ClientID size in bytes (ignored when `-dnstt-compat` is set)       | `2`        |
-| `-record-type TYPE`  | DNS record type for downstream data: `txt`, `cname`, `a`, `aaaa`, `mx`, `ns`, `srv`. Must match the client. Ignored (forced to `txt`) when `-dnstt-compat` is set. | `txt`      |
+| `-record-type TYPE`  | DNS record type for downstream data: `txt`, `null`, `cname`, `a`, `aaaa`, `mx`, `ns`, `srv`, `caa`. Must match the client. Ignored (forced to `txt`) when `-dnstt-compat` is set. | `txt`      |
 | `-queue-size N`      | Packet queue size for transport and DNS layers                    | `512`      |
 | `-kcp-window-size N` | KCP send/receive window size in packets (0 = queue-size/2)        | `0`        |
 | `-queue-overflow MODE` | Queue overflow behavior: `drop` (silent discard) or `block` (backpressure) | `drop`     |
@@ -224,7 +224,7 @@ These reduce upstream throughput but improve compatibility. The minimum effectiv
 | `-rps N`           | Rate limit outgoing DNS queries per second (0 = unlimited). Uses a token bucket with 1-second burst allowance. | `0`             |
 | `-dnstt-compat`    | Use original dnstt wire format (8-byte ClientID, padding prefixes). Sets `-max-qname-len` to 253 unless explicitly overridden. Forces `-record-type` to `txt` with a warning if another type is set. | `false`         |
 | `-clientid-size N` | ClientID size in bytes (ignored when `-dnstt-compat` is set) | `2`             |
-| `-record-type TYPE` | DNS record type for downstream data: `txt`, `cname`, `a`, `aaaa`, `mx`, `ns`, `srv`. Must match the server. | `txt`           |
+| `-record-type TYPE` | DNS record type for downstream data: `txt`, `null`, `cname`, `a`, `aaaa`, `mx`, `ns`, `srv`, `caa`. Must match the server. | `txt`           |
 | `-utls SPEC`       | TLS fingerprint distribution (see below)                   | weighted random |
 | `-log-level LEVEL` | Log level: debug, info, warning, error                     | `info`          |
 
@@ -408,12 +408,14 @@ VayDNS supports multiple DNS record types for downstream data encoding. Both cli
 | Type | Description | Capacity |
 | ---- | ----------- | -------- |
 | `txt` | TXT record (default). Highest capacity, compatible with dnstt. | Bounded by UDP payload (~1200 bytes) |
+| `null` | NULL record. Raw binary payload in a single RR. | Bounded by UDP payload |
 | `cname` | CNAME record. Data encoded as a DNS name under the tunnel domain. | Bounded by 255-byte DNS name limit |
 | `ns` | NS record. Same encoding as CNAME. | Same as CNAME |
 | `mx` | MX record. 2-byte preference header + name encoding. | Same as CNAME |
 | `srv` | SRV record. 6-byte header + name encoding. | Same as CNAME |
 | `a` | A records. Data split into 4-byte chunks across multiple answer RRs. | Bounded by UDP payload |
 | `aaaa` | AAAA records. Data split into 16-byte chunks across multiple answer RRs. | Bounded by UDP payload |
+| `caa` | CAA record. Payload encoded in the value portion of a fixed `issue` property. | Bounded by UDP payload |
 
 > **Compatibility:** Old VayDNS clients (pre-record-type) only send TXT queries. A new server with the default `-record-type txt` is fully compatible with old clients. Using a non-TXT type requires updating both client and server.
 
diff --git a/client/dns.go b/client/dns.go
index 729dece..0453b78 100644
--- a/client/dns.go
+++ b/client/dns.go
@@ -150,8 +150,8 @@ func forgedInfoMilestone(total uint64) bool {
 
 // DNSPacketConn provides a packet-sending and -receiving interface over various
 // forms of DNS. It handles the details of how packets and padding are encoded
-// as a DNS name in the Question section of an upstream query, and as a TXT RR
-// in downstream responses.
+// as a DNS name in the Question section of an upstream query, and as an RR in
+// downstream responses.
 //
 // DNSPacketConn does not handle the mechanics of actually sending and receiving
 // encoded DNS messages. That is rather the responsibility of some other
@@ -166,7 +166,8 @@ type DNSPacketConn struct {
 	clientID   turbotunnel.ClientID
 	wireConfig turbotunnel.WireConfig
 	domain     dns.Name
-	// rrType is the DNS record type used for downstream data (TXT, CNAME, A, AAAA, MX, NS, or SRV).
+	// rrType is the DNS record type used for downstream data (TXT, NULL, CNAME,
+	// A, AAAA, MX, NS, SRV, or CAA).
 	rrType uint16
 	// Sending on pollChan permits sendLoop to send an empty polling query.
 	// sendLoop also does its own polling according to a time schedule.
@@ -259,11 +260,10 @@ func (c *DNSPacketConn) TransportErrors() <-chan error {
 	return c.transportErr
 }
 
-// dnsResponsePayload extracts the downstream payload of a DNS response, encoded
-// into the RDATA of a TXT or CNAME RR. It returns (nil, true) when the response
-// has a non-NoError RCODE, indicating a forged or hijacked response. It returns
-// (payload, false) on success or (nil, false) when the response doesn't pass
-// format checks.
+// dnsResponsePayload extracts the downstream payload of a DNS response. It
+// returns (nil, true) when the response has a non-NoError RCODE, indicating a
+// forged or hijacked response. It returns (payload, false) on success or
+// (nil, false) when the response doesn't pass format checks.
 func dnsResponsePayload(resp *dns.Message, domain dns.Name, rrType uint16) ([]byte, bool) {
 	if resp.Flags&0x8000 != 0x8000 {
 		// QR != 1, this is not a response.
@@ -318,6 +318,10 @@ func dnsResponsePayload(resp *dns.Message, domain dns.Name, rrType uint16) ([]by
 	var payload []byte
 	var err error
 	switch rrType {
+	case dns.RRTypeNULL:
+		payload, err = dns.DecodeRDataNULL(answer.Data)
+	case dns.RRTypeCAA:
+		payload, err = dns.DecodeRDataCAA(answer.Data)
 	case dns.RRTypeCNAME:
 		payload, err = dns.DecodeRDataCNAME(answer.Data, domain)
 	case dns.RRTypeNS:
diff --git a/dns/dns.go b/dns/dns.go
index 8f35a6a..909e054 100644
--- a/dns/dns.go
+++ b/dns/dns.go
@@ -50,10 +50,12 @@ const (
 	RRTypeA     = 1
 	RRTypeNS    = 2
 	RRTypeCNAME = 5
+	RRTypeNULL  = 10
 	RRTypeMX    = 15
 	RRTypeTXT   = 16
 	RRTypeAAAA  = 28
 	RRTypeSRV   = 33
+	RRTypeCAA   = 257
 	// https://tools.ietf.org/html/rfc6891#section-6.1.1
 	RRTypeOPT = 41
 
@@ -78,6 +80,8 @@ func ParseRecordType(s string) (uint16, error) {
 		return RRTypeTXT, nil
 	case "cname":
 		return RRTypeCNAME, nil
+	case "null":
+		return RRTypeNULL, nil
 	case "a":
 		return RRTypeA, nil
 	case "aaaa":
@@ -88,8 +92,10 @@ func ParseRecordType(s string) (uint16, error) {
 		return RRTypeNS, nil
 	case "srv":
 		return RRTypeSRV, nil
+	case "caa":
+		return RRTypeCAA, nil
 	default:
-		return 0, fmt.Errorf("unknown record type %q: must be one of: txt, cname, a, aaaa, mx, ns, srv", s)
+		return 0, fmt.Errorf("unknown record type %q: must be one of: txt, cname, null, a, aaaa, mx, ns, srv, caa", s)
 	}
 }
 
@@ -692,6 +698,40 @@ func EncodeRDataTXT(p []byte) []byte {
 	return buf.Bytes()
 }
 
+// DecodeRDataNULL decodes NULL RDATA as a raw byte slice.
+// https://tools.ietf.org/html/rfc1035#section-3.3.10
+func DecodeRDataNULL(p []byte) ([]byte, error) { return p, nil }
+
+// EncodeRDataNULL encodes a slice of bytes as NULL RDATA.
+// https://tools.ietf.org/html/rfc1035#section-3.3.10
+func EncodeRDataNULL(p []byte) []byte { return p }
+
+// DecodeRDataCAA decodes CAA RDATA and returns the value portion.
+// https://datatracker.ietf.org/doc/html/rfc8659
+func DecodeRDataCAA(p []byte) ([]byte, error) {
+	if len(p) < 2 {
+		return nil, io.ErrUnexpectedEOF
+	}
+	tagLen := int(p[1])
+	p = p[2:]
+	if len(p) < tagLen {
+		return nil, io.ErrUnexpectedEOF
+	}
+	return p[tagLen:], nil
+}
+
+// EncodeRDataCAA encodes a slice of bytes as CAA RDATA using a fixed
+// "issue" tag so the payload lives entirely in the value portion.
+// https://datatracker.ietf.org/doc/html/rfc8659
+func EncodeRDataCAA(p []byte) []byte {
+	const tag = "issue"
+	rdata := make([]byte, 2+len(tag)+len(p))
+	rdata[1] = byte(len(tag))
+	copy(rdata[2:], tag)
+	copy(rdata[2+len(tag):], p)
+	return rdata
+}
+
 // base32Encoding is a base32 encoding without padding, used for CNAME RDATA.
 var base32Encoding = base32.StdEncoding.WithPadding(base32.NoPadding)
 
diff --git a/vaydns-client/main.go b/vaydns-client/main.go
index c37cff2..fc7e621 100644
--- a/vaydns-client/main.go
+++ b/vaydns-client/main.go
@@ -136,7 +136,7 @@ Known TLS fingerprints for -utls are:
 	flag.BoolVar(&udpAcceptErrors, "udp-accept-errors", false, "accept DNS error responses instead of filtering them (disables censorship evasion)")
 	flag.BoolVar(&compatDnstt, "dnstt-compat", false, "use original dnstt wire format (8-byte ClientID, padding prefixes)")
 	flag.IntVar(&clientIDSize, "clientid-size", 2, "client ID size in bytes (ignored when -dnstt-compat is set)")
-	flag.StringVar(&recordTypeStr, "record-type", "txt", "DNS record type for downstream data (txt, cname, a, aaaa, mx, ns, srv)")
+	flag.StringVar(&recordTypeStr, "record-type", "txt", "DNS record type for downstream data (txt, null, cname, a, aaaa, mx, ns, srv, caa)")
 	flag.IntVar(&queueSize, "queue-size", turbotunnel.QueueSize, "packet queue size for transport and DNS layers")
 	flag.IntVar(&kcpWindowSize, "kcp-window-size", 0, "KCP send/receive window size in packets (0 = queue-size/2)")
 	flag.StringVar(&queueOverflowStr, "queue-overflow", string(turbotunnel.DefaultQueueOverflowMode), "queue overflow behavior: drop or block")
diff --git a/vaydns-server/main.go b/vaydns-server/main.go
index e043251..71e56ec 100644
--- a/vaydns-server/main.go
+++ b/vaydns-server/main.go
@@ -421,6 +421,11 @@ func nextPacketDnstt(r *bytes.Reader) ([]byte, error) {
 // this query. If the returned dns.Message has an Rcode() of dns.RcodeNoError,
 // the message is a candidate for for carrying downstream data in a TXT record.
 func responseFor(query *dns.Message, domain dns.Name) (*dns.Message, []byte) {
+	responsePayloadSize := uint16(maxUDPPayload)
+	if int(responsePayloadSize) != maxUDPPayload {
+		responsePayloadSize = 0xffff
+	}
+
 	resp := &dns.Message{
 		ID:       query.ID,
 		Flags:    0x8000, // QR = 1, RCODE = no error
@@ -455,7 +460,7 @@ func responseFor(query *dns.Message, domain dns.Name) (*dns.Message, []byte) {
 		resp.Additional = append(resp.Additional, dns.RR{
 			Name:  dns.Name{},
 			Type:  dns.RRTypeOPT,
-			Class: 4096, // responder's UDP payload size
+			Class: responsePayloadSize, // responder's UDP payload size
 			TTL:   0,
 			Data:  []byte{},
 		})
@@ -778,6 +783,10 @@ func encodeResponsePayload(rec *record, data []byte, domain dns.Name) error {
 				Data:  chunk,
 			}
 		}
+	case dns.RRTypeNULL:
+		rec.Resp.Answer[0].Data = dns.EncodeRDataNULL(data)
+	case dns.RRTypeCAA:
+		rec.Resp.Answer[0].Data = dns.EncodeRDataCAA(data)
 	case dns.RRTypeCNAME:
 		rdata, err := dns.EncodeRDataCNAME(data, domain)
 		if err != nil {
@@ -941,15 +950,15 @@ func sendLoop(dnsConn net.PacketConn, ttConn *turbotunnel.QueuePacketConn, ch <-
 	return nil
 }
 
-// computeMaxEncodedPayload computes the maximum amount of downstream TXT RR
-// data that keep the overall response size less than maxUDPPayload, in the
+// computeMaxEncodedPayload computes the maximum amount of downstream single-RR
+// payload that keeps the overall response size less than maxUDPPayload, in the
 // worst case when the response answers a query that has a maximum-length name
 // in its Question section. Returns 0 in the case that no amount of data makes
 // the overall response size small enough.
 //
 // This function needs to be kept in sync with sendLoop with regard to how it
 // builds candidate responses.
-func computeMaxEncodedPayload(limit int) int {
+func computeMaxEncodedPayload(limit int, encode func([]byte) []byte) int {
 	// 64+64+64+62 octets, needs to be base32-decodable.
 	maxLengthName, err := dns.NewName([][]byte{
 		[]byte("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"),
@@ -1014,7 +1023,7 @@ func computeMaxEncodedPayload(limit int) int {
 	high := 32768
 	for low+1 < high {
 		mid := (low + high) / 2
-		resp.Answer[0].Data = dns.EncodeRDataTXT(make([]byte, mid))
+		resp.Answer[0].Data = encode(make([]byte, mid))
 		buf, err := resp.WireFormat()
 		if err != nil {
 			panic(err)
@@ -1136,8 +1145,12 @@ func run(privkey []byte, domain dns.Name, upstream string, dnsConn net.PacketCon
 		maxEncodedPayload = computeMaxEncodedPayloadMultiRR(maxUDPPayload, 4)
 	case dns.RRTypeAAAA:
 		maxEncodedPayload = computeMaxEncodedPayloadMultiRR(maxUDPPayload, 16)
+	case dns.RRTypeNULL:
+		maxEncodedPayload = computeMaxEncodedPayload(maxUDPPayload, dns.EncodeRDataNULL)
+	case dns.RRTypeCAA:
+		maxEncodedPayload = computeMaxEncodedPayload(maxUDPPayload, dns.EncodeRDataCAA)
 	default:
-		maxEncodedPayload = computeMaxEncodedPayload(maxUDPPayload)
+		maxEncodedPayload = computeMaxEncodedPayload(maxUDPPayload, dns.EncodeRDataTXT)
 	}
 	// 2 bytes accounts for a packet length prefix.
 	mtu := maxEncodedPayload - 2
@@ -1246,7 +1259,7 @@ Example:
 	flag.StringVar(&keepAliveStr, "keepalive", defaultKeepAlive.String(), "keepalive ping interval (e.g. 2s, 500ms); must be less than idle-timeout")
 	flag.BoolVar(&compatDnstt, "dnstt-compat", false, "use original dnstt wire format (8-byte ClientID, padding prefixes)")
 	flag.IntVar(&clientIDSize, "clientid-size", 2, "client ID size in bytes (ignored when -dnstt-compat is set)")
-	flag.StringVar(&recordTypeStr, "record-type", "txt", "DNS record type for downstream data (txt, cname, a, aaaa, mx, ns, srv)")
+	flag.StringVar(&recordTypeStr, "record-type", "txt", "DNS record type for downstream data (txt, null, cname, a, aaaa, mx, ns, srv, caa)")
 	flag.IntVar(&queueSize, "queue-size", turbotunnel.QueueSize, "packet queue size for DNS tunnel transport")
 	flag.IntVar(&kcpWindowSize, "kcp-window-size", 0, "KCP send/receive window size in packets (0 = queue-size/2)")
 	flag.StringVar(&queueOverflowStr, "queue-overflow", string(turbotunnel.DefaultQueueOverflowMode), "queue overflow behavior: drop or block")
@@ -1462,7 +1475,8 @@ Example:
 		}
 		log.Infof("wire config: clientid-size=%d compat=%v", wireConfig.ClientIDSize, wireConfig.Compat)
 
-		if recordType != dns.RRTypeTXT {
+		switch recordType {
+		case dns.RRTypeCNAME, dns.RRTypeNS, dns.RRTypeMX, dns.RRTypeSRV:
 			explicitFlags := make(map[string]bool)
 			flag.Visit(func(f *flag.Flag) {
 				explicitFlags[f.Name] = true

From 0d87441d53356f9f675ede19b7bc0f72b7a214e7 Mon Sep 17 00:00:00 2001
From: crazydi4mond <255249920+crazydi4mond@users.noreply.github.com>
Date: Fri, 10 Apr 2026 23:40:42 +0200
Subject: [PATCH 08/19] test(dns): add NULL and CAA encode/decode unit tests

- Add round-trip, identity, error-path, and wire-format tests
- Add resolver filtering note for NULL record in README
- Add CAA flags comment in EncodeRDataCAA
- Update client library doc with new record type options
---
 README.md              |  2 +-
 dns/dns.go             |  1 +
 dns/dns_test.go        | 96 ++++++++++++++++++++++++++++++++++++++++++
 docs/client-library.md |  2 +-
 4 files changed, 99 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index b002a90..7259542 100644
--- a/README.md
+++ b/README.md
@@ -408,7 +408,7 @@ VayDNS supports multiple DNS record types for downstream data encoding. Both cli
 | Type | Description | Capacity |
 | ---- | ----------- | -------- |
 | `txt` | TXT record (default). Highest capacity, compatible with dnstt. | Bounded by UDP payload (~1200 bytes) |
-| `null` | NULL record. Raw binary payload in a single RR. | Bounded by UDP payload |
+| `null` | NULL record. Raw binary payload in a single RR. Some recursive resolvers may filter or refuse to relay NULL records. | Bounded by UDP payload |
 | `cname` | CNAME record. Data encoded as a DNS name under the tunnel domain. | Bounded by 255-byte DNS name limit |
 | `ns` | NS record. Same encoding as CNAME. | Same as CNAME |
 | `mx` | MX record. 2-byte preference header + name encoding. | Same as CNAME |
diff --git a/dns/dns.go b/dns/dns.go
index 909e054..57eb1a3 100644
--- a/dns/dns.go
+++ b/dns/dns.go
@@ -726,6 +726,7 @@ func DecodeRDataCAA(p []byte) ([]byte, error) {
 func EncodeRDataCAA(p []byte) []byte {
 	const tag = "issue"
 	rdata := make([]byte, 2+len(tag)+len(p))
+	// rdata[0] = 0 (flags; bit 7 is the "critical" flag per RFC 8659 §4.1)
 	rdata[1] = byte(len(tag))
 	copy(rdata[2:], tag)
 	copy(rdata[2+len(tag):], p)
diff --git a/dns/dns_test.go b/dns/dns_test.go
index 4c90b9e..59fe22f 100644
--- a/dns/dns_test.go
+++ b/dns/dns_test.go
@@ -805,6 +805,102 @@ func TestEncodeDecodeRDataAAAA(t *testing.T) {
 	}
 }
 
+func TestEncodeDecodeRDataNULL(t *testing.T) {
+	for _, p := range [][]byte{
+		{},
+		{0x00},
+		{0x01, 0x02, 0x03},
+		bytes.Repeat([]byte{0xab}, 100),
+		bytes.Repeat([]byte{0xff}, 1000),
+	} {
+		rdata := EncodeRDataNULL(p)
+		decoded, err := DecodeRDataNULL(rdata)
+		if err != nil {
+			t.Errorf("DecodeRDataNULL(%x): %v", rdata, err)
+			continue
+		}
+		if !bytes.Equal(decoded, p) {
+			t.Errorf("NULL round-trip failed for len=%d: got len=%d", len(p), len(decoded))
+		}
+	}
+}
+
+func TestRDataNULLIdentity(t *testing.T) {
+	// NULL encode/decode should be identity — no framing overhead.
+	p := []byte{0x01, 0x02, 0x03}
+	if !bytes.Equal(EncodeRDataNULL(p), p) {
+		t.Error("EncodeRDataNULL should return input unchanged")
+	}
+	decoded, _ := DecodeRDataNULL(p)
+	if !bytes.Equal(decoded, p) {
+		t.Error("DecodeRDataNULL should return input unchanged")
+	}
+}
+
+func TestDecodeRDataCAA(t *testing.T) {
+	for _, test := range []struct {
+		desc    string
+		p       []byte
+		decoded []byte
+		err     error
+	}{
+		{"empty input", []byte{}, nil, io.ErrUnexpectedEOF},
+		{"single byte", []byte{0x00}, nil, io.ErrUnexpectedEOF},
+		{"tag length exceeds data", []byte{0x00, 0x05, 'a'}, nil, io.ErrUnexpectedEOF},
+		{"tag only, no value", []byte{0x00, 0x05, 'i', 's', 's', 'u', 'e'}, []byte{}, nil},
+		{"tag + value", []byte{0x00, 0x05, 'i', 's', 's', 'u', 'e', 0xaa, 0xbb}, []byte{0xaa, 0xbb}, nil},
+		{"zero-length tag", []byte{0x00, 0x00, 0x01, 0x02}, []byte{0x01, 0x02}, nil},
+		{"flags byte ignored", []byte{0x80, 0x05, 'i', 's', 's', 'u', 'e', 0xff}, []byte{0xff}, nil},
+	} {
+		decoded, err := DecodeRDataCAA(test.p)
+		if err != test.err {
+			t.Errorf("%s: got err %v, want %v", test.desc, err, test.err)
+			continue
+		}
+		if err == nil && !bytes.Equal(decoded, test.decoded) {
+			t.Errorf("%s: got %x, want %x", test.desc, decoded, test.decoded)
+		}
+	}
+}
+
+func TestEncodeRDataCAA(t *testing.T) {
+	p := []byte{0x01, 0x02, 0x03}
+	rdata := EncodeRDataCAA(p)
+	// Expected: flags(0) + tagLen(5) + "issue" + payload
+	expected := append([]byte{0x00, 0x05, 'i', 's', 's', 'u', 'e'}, p...)
+	if !bytes.Equal(rdata, expected) {
+		t.Errorf("EncodeRDataCAA(%x) = %x, want %x", p, rdata, expected)
+	}
+}
+
+func TestEncodeRDataCAAEmpty(t *testing.T) {
+	rdata := EncodeRDataCAA([]byte{})
+	// Even with empty payload, should have flags + tagLen + tag.
+	if len(rdata) != 7 {
+		t.Errorf("EncodeRDataCAA(empty) length = %d, want 7", len(rdata))
+	}
+}
+
+func TestRDataCAARoundTrip(t *testing.T) {
+	for _, p := range [][]byte{
+		{},
+		{0x00},
+		{0x01, 0x02, 0x03},
+		bytes.Repeat([]byte{0xab}, 100),
+		bytes.Repeat([]byte{0xff}, 1000),
+	} {
+		rdata := EncodeRDataCAA(p)
+		decoded, err := DecodeRDataCAA(rdata)
+		if err != nil {
+			t.Errorf("CAA round-trip decode error for len=%d: %v", len(p), err)
+			continue
+		}
+		if !bytes.Equal(decoded, p) {
+			t.Errorf("CAA round-trip failed for len=%d: got len=%d", len(p), len(decoded))
+		}
+	}
+}
+
 func TestReadRRMXCompression(t *testing.T) {
 	// DNS message with MX answer using compression pointer in exchange name.
 	msg := []byte{
diff --git a/docs/client-library.md b/docs/client-library.md
index 25514ec..391b6b0 100644
--- a/docs/client-library.md
+++ b/docs/client-library.md
@@ -84,7 +84,7 @@ ts.ClientIDSize = 1      // smaller ClientID
 ts.MaxQnameLen = 101     // QNAME length constraint
 ts.MaxNumLabels = 2      // label count constraint
 ts.RPS = 200             // rate limit queries/second
-ts.RecordType = "cname"  // DNS record type for downstream data (default: "txt")
+ts.RecordType = "cname"  // DNS record type for downstream data: txt, null, cname, a, aaaa, mx, ns, srv, caa (default: "txt")
 
 // Session options
 t.IdleTimeout = 60 * time.Second

From a0ff70110d5e96686ab8f4c8e1c44cd09be07a75 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 10 Apr 2026 23:43:53 +0200
Subject: [PATCH 09/19] chore(main): release 0.2.8 (#65)

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 .release-please-manifest.json | 2 +-
 CHANGELOG.md                  | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 054e8bc..e7cf9b3 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.2.7"
+  ".": "0.2.8"
 }
diff --git a/CHANGELOG.md b/CHANGELOG.md
index fb45f67..5b48bc8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## [0.2.8](https://github.com/net2share/vaydns/compare/v0.2.7...v0.2.8) (2026-04-10)
+
+
+### Features
+
+* add -v flag for printing version ([#71](https://github.com/net2share/vaydns/issues/71)) ([3533148](https://github.com/net2share/vaydns/commit/35331484bce1b628372dc37accae4e96b507841f))
+* add NULL and CAA record type support and fix server EDNS mtu advertisement ([#70](https://github.com/net2share/vaydns/issues/70)) ([8354db2](https://github.com/net2share/vaydns/commit/8354db2a080ce9543594bc4e71592f33e6489d82))
+
 ## [0.2.7](https://github.com/net2share/vaydns/compare/v0.2.6...v0.2.7) (2026-04-01)
 
 

From 4dcdee0310fe7729500c3c9be5b541dfbe93a9a4 Mon Sep 17 00:00:00 2001
From: crazydi4mond <255249920+crazydi4mond@users.noreply.github.com>
Date: Sun, 12 Apr 2026 00:36:37 +0200
Subject: [PATCH 10/19] fix(multi-resolver): isolate single-entry transport
 errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A read or write error on any one resolver entry previously propagated to
DNSPacketConn via recvChan, tearing down the entire tunnel session — the
opposite of what multi-resolver is meant to deliver. In practice only DoT
triggered the cascade (UDP/DoH workers retry silently), but a mixed setup
with one flaky DoT would reconnect in a loop.

- Reader goroutines now mark their entry dead on transport error and
  exit, instead of pushing error-bearing results onto recvChan.
- resolverEntry gains a sticky atomic dead flag; selection helpers and
  probe targeting skip dead entries; recomputeState short-circuits on
  them so the decay logic cannot flip Down back to Unknown.
- MultiResolver gains an aliveCount/allDead pair; the last dying reader
  closes allDead, causing ReadFrom/WriteTo to return a terminal
  "all resolvers are down" error. This is the only condition that
  should cascade to tunnel reconnect.
- WriteTo retries with the next alive entry on a writePacket failure,
  marking the failed entry dead. Error wrapping handles the empty case
  without a %w nil.
- ReadFrom drains any buffered packet from a deceased reader before
  surfacing the terminal error, avoiding packet loss at teardown.
- markDead holds mu during the transition and returns true only on the
  alive-to-dead edge, so concurrent observers (reader + writer seeing
  the same Close) cannot double-decrement aliveCount.

Unit tests in client/multi_resolver_test.go exercise both symptoms with
a fake net.PacketConn:
- TestMultiResolver_DeadEntryDoesNotBreakReadFrom — ReadFrom must
  return bytes from the working entry after a peer errors.
- TestMultiResolver_FailedEntryExcludedFromSelection — selectPrimary
  must stop returning the failed entry after its reader exits.
---
 client/multi_resolver.go      | 186 +++++++++++++++++++++++----
 client/multi_resolver_test.go | 229 ++++++++++++++++++++++++++++++++++
 2 files changed, 389 insertions(+), 26 deletions(-)
 create mode 100644 client/multi_resolver_test.go

diff --git a/client/multi_resolver.go b/client/multi_resolver.go
index c46f791..d1fea54 100644
--- a/client/multi_resolver.go
+++ b/client/multi_resolver.go
@@ -66,6 +66,13 @@ type resolverEntry struct {
 	invalidCount atomic.Int64
 	timeoutCount atomic.Int64
 
+	// dead is set when the entry's transport has reported an unrecoverable
+	// error and its reader goroutine has exited. A dead entry is permanently
+	// excluded from selection and never transitions back — the only way to
+	// recover is to rebuild the entire MultiResolver (which happens on a
+	// full tunnel reconnect).
+	dead atomic.Bool
+
 	mu        sync.Mutex
 	pending   map[uint16]time.Time
 	lastWrite time.Time
@@ -74,6 +81,31 @@ type resolverEntry struct {
 	state     ResolverState
 }
 
+// markDead transitions the entry to a permanent Down state. It is idempotent
+// and returns true only when this call was the one that transitioned the
+// entry — callers rely on this to avoid double-counting the entry in
+// MultiResolver.aliveCount when multiple goroutines detect the same transport
+// failure concurrently.
+//
+// The dead flag and the state field are set together under mu so that
+// recomputeState (also holding mu) cannot observe an inconsistent snapshot
+// where dead is set but state has not yet been transitioned to Down.
+func (e *resolverEntry) markDead() bool {
+	e.mu.Lock()
+	defer e.mu.Unlock()
+	if e.dead.Load() {
+		return false
+	}
+	e.dead.Store(true)
+	e.state = ResolverStateDown
+	return true
+}
+
+// isDead reports whether markDead has been called on this entry.
+func (e *resolverEntry) isDead() bool {
+	return e.dead.Load()
+}
+
 func (e *resolverEntry) writePacket(b []byte) (int, error) {
 	now := time.Now()
 	e.trackOutgoingID(b, now)
@@ -156,8 +188,17 @@ func (e *resolverEntry) expirePending(now time.Time) {
 }
 
 func (e *resolverEntry) recomputeState(now time.Time) {
+	// Dead is sticky — never recompute it back to any other state. We
+	// re-check under the lock after acquiring it, in case markDead raced
+	// in between the outer check and acquiring the lock.
+	if e.isDead() {
+		return
+	}
 	e.mu.Lock()
 	defer e.mu.Unlock()
+	if e.dead.Load() {
+		return
+	}
 
 	timeouts := e.timeoutCount.Load()
 	invalid := e.invalidCount.Load()
@@ -238,6 +279,15 @@ type MultiResolver struct {
 	recvChan  chan multiReadResult
 	closed    chan struct{}
 	closeOnce sync.Once
+
+	// aliveCount tracks how many entries still have a live reader
+	// goroutine. When it reaches zero, allDead is closed, causing pending
+	// ReadFrom/WriteTo calls to return an "all resolvers down" error,
+	// which is the only condition that should propagate a transport
+	// error to the upper tunnel session.
+	aliveCount  atomic.Int32
+	allDead     chan struct{}
+	allDeadOnce sync.Once
 }
 
 // NewMultiResolver creates a MultiResolver from a slice of Resolver configs.
@@ -269,25 +319,57 @@ func NewMultiResolver(resolvers []Resolver, mode SelectionMode, queueSize int, o
 		mode:     mode,
 		recvChan: make(chan multiReadResult, len(entries)*4),
 		closed:   make(chan struct{}),
+		allDead:  make(chan struct{}),
 	}
-	for _, e := range entries {
+	mr.aliveCount.Store(int32(len(entries)))
+	mr.startReaders()
+	go mr.healthWorker()
+	return mr, nil
+}
+
+// entryDied is called whenever a transport error is observed for an entry,
+// either from its reader goroutine or from a failed write. It marks the entry
+// dead and, if this was the transition from alive to dead (as opposed to a
+// second observer of the same failure), decrements aliveCount. When the last
+// alive entry dies, allDead is closed so pending ReadFrom/WriteTo callers can
+// unblock with a terminal error.
+func (mr *MultiResolver) entryDied(entry *resolverEntry, err error) {
+	if !entry.markDead() {
+		return
+	}
+	log.Warnf("multi-resolver: entry %s transport error: %v; marking down", entry.name, err)
+	if mr.aliveCount.Add(-1) == 0 {
+		mr.allDeadOnce.Do(func() { close(mr.allDead) })
+	}
+}
+
+// startReaders launches one reader goroutine per entry. Each goroutine reads
+// packets from its entry's transport and pushes the result onto recvChan.
+// Extracted so tests can construct MultiResolver with synthetic entries.
+//
+// On a transport error, the reader calls entryDied to mark the entry dead and
+// signal allDead when the last entry dies. It never pushes error-bearing
+// results onto recvChan: doing so would surface a single resolver's failure
+// to DNSPacketConn.recvLoop, which would tear down the entire tunnel session
+// and defeat the point of having multiple resolvers.
+func (mr *MultiResolver) startReaders() {
+	for _, e := range mr.entries {
 		entry := e
 		go func() {
 			for {
 				res := entry.readPacket()
+				if res.err != nil {
+					mr.entryDied(entry, res.err)
+					return
+				}
 				select {
 				case mr.recvChan <- res:
 				case <-mr.closed:
 					return
 				}
-				if res.err != nil {
-					return
-				}
 			}
 		}()
 	}
-	go mr.healthWorker()
-	return mr, nil
 }
 
 func (mr *MultiResolver) healthWorker() {
@@ -350,32 +432,66 @@ func isRateLimitedResponse(resp dns.Message) bool {
 }
 
 // ReadFrom receives a packet from whichever resolver responds first.
+// It only returns an error when the MultiResolver has been closed or every
+// entry has died; a single resolver's transport error is isolated at the
+// reader goroutine and does not propagate here.
 func (mr *MultiResolver) ReadFrom(b []byte) (n int, addr net.Addr, err error) {
 	select {
 	case <-mr.closed:
 		return 0, nil, net.ErrClosed
 	case res := <-mr.recvChan:
-		if res.err != nil {
-			return 0, res.addr, res.err
-		}
 		n = copy(b, res.buf[:res.n])
 		return n, turbotunnel.DummyAddr{}, nil
+	case <-mr.allDead:
+		// Drain any packet that was buffered by a reader before it
+		// died, so packets already delivered by the transport are not
+		// discarded in favour of the terminal error. No more readers
+		// are pushing (allDead is closed only after every reader has
+		// exited), so a non-blocking read here races only with the
+		// mr.closed case above, which is handled on the next call.
+		select {
+		case res := <-mr.recvChan:
+			n = copy(b, res.buf[:res.n])
+			return n, turbotunnel.DummyAddr{}, nil
+		default:
+			return 0, nil, fmt.Errorf("multi-resolver: all resolvers are down")
+		}
 	}
 }
 
 // WriteTo sends b to the selected primary resolver and may duplicate b to one
-// unhealthy resolver as a probe to detect recovery.
+// unhealthy resolver as a probe to detect recovery. If the primary's write
+// fails, the entry is marked dead and WriteTo retries with the next alive
+// entry. An error is returned only when the MultiResolver is closed or every
+// entry has been marked dead.
 func (mr *MultiResolver) WriteTo(b []byte, _ net.Addr) (n int, err error) {
 	select {
 	case <-mr.closed:
 		return 0, net.ErrClosed
+	case <-mr.allDead:
+		return 0, fmt.Errorf("multi-resolver: all resolvers are down")
 	default:
 	}
 
-	primary := mr.selectPrimary()
-	n, err = primary.writePacket(b)
-	if err != nil {
-		return n, err
+	// Try entries until one accepts the write or all alive entries have
+	// been exhausted. Each write error marks the entry dead.
+	var primary *resolverEntry
+	for attempts := 0; attempts < len(mr.entries); attempts++ {
+		primary = mr.selectPrimary()
+		if primary == nil {
+			break
+		}
+		n, err = primary.writePacket(b)
+		if err == nil {
+			break
+		}
+		mr.entryDied(primary, err)
+	}
+	switch {
+	case err != nil:
+		return 0, fmt.Errorf("multi-resolver: all write attempts failed: %w", err)
+	case primary == nil:
+		return 0, fmt.Errorf("multi-resolver: no alive resolver for write")
 	}
 
 	if probe := mr.selectProbeTarget(primary); probe != nil {
@@ -385,17 +501,17 @@ func (mr *MultiResolver) WriteTo(b []byte, _ net.Addr) (n int, err error) {
 	return n, nil
 }
 
+// selectPrimary returns the entry that should receive the next outgoing
+// query, or nil if every entry has been marked dead. The caller must handle
+// nil (e.g., return an "all resolvers down" error).
 func (mr *MultiResolver) selectPrimary() *resolverEntry {
 	if mr.mode == SelectionRoundRobin {
-		if e := mr.selectRoundRobinHealthy(); e != nil {
-			return e
-		}
-		return mr.entries[0]
+		return mr.selectRoundRobinHealthy()
 	}
 	if e := mr.selectBestScore(); e != nil {
 		return e
 	}
-	return mr.entries[0]
+	return mr.selectRoundRobinHealthy()
 }
 
 func (mr *MultiResolver) selectRoundRobinHealthy() *resolverEntry {
@@ -407,28 +523,43 @@ func (mr *MultiResolver) selectRoundRobinHealthy() *resolverEntry {
 	}
 
 	start := mr.rrIndex
+	// First pass: prefer Healthy or Unknown, skipping dead entries.
 	for i := 0; i < len(mr.entries); i++ {
 		idx := (start + i) % len(mr.entries)
+		if mr.entries[idx].isDead() {
+			continue
+		}
 		state := mr.entries[idx].stateSnapshot()
 		if state == ResolverStateHealthy || state == ResolverStateUnknown {
 			mr.rrIndex = (idx + 1) % len(mr.entries)
 			return mr.entries[idx]
 		}
 	}
-	idx := start % len(mr.entries)
-	mr.rrIndex = (idx + 1) % len(mr.entries)
-	return mr.entries[idx]
+	// Second pass: accept any non-dead entry even if RateLimited/Down.
+	for i := 0; i < len(mr.entries); i++ {
+		idx := (start + i) % len(mr.entries)
+		if mr.entries[idx].isDead() {
+			continue
+		}
+		mr.rrIndex = (idx + 1) % len(mr.entries)
+		return mr.entries[idx]
+	}
+	// Every entry is dead.
+	return nil
 }
 
 func (mr *MultiResolver) selectBestScore() *resolverEntry {
 	if len(mr.entries) == 0 {
 		return nil
 	}
-	best := mr.entries[0]
-	bestScore := resolverScore(best)
-	for _, e := range mr.entries[1:] {
+	var best *resolverEntry
+	var bestScore int64
+	for _, e := range mr.entries {
+		if e.isDead() {
+			continue
+		}
 		s := resolverScore(e)
-		if s > bestScore {
+		if best == nil || s > bestScore {
 			best = e
 			bestScore = s
 		}
@@ -462,6 +593,9 @@ func (mr *MultiResolver) selectProbeTarget(primary *resolverEntry) *resolverEntr
 		if e == primary {
 			continue
 		}
+		if e.isDead() {
+			continue
+		}
 		state := e.stateSnapshot()
 		if state == ResolverStateHealthy {
 			continue
diff --git a/client/multi_resolver_test.go b/client/multi_resolver_test.go
new file mode 100644
index 0000000..ec31411
--- /dev/null
+++ b/client/multi_resolver_test.go
@@ -0,0 +1,229 @@
+package client
+
+import (
+	"bytes"
+	"net"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/net2share/vaydns/turbotunnel"
+)
+
+// fakePacketConn is a controllable net.PacketConn for MultiResolver tests.
+// Pushed responses are consumed in order by successive ReadFrom calls; each
+// response can be either data bytes or an error.
+type fakePacketConn struct {
+	name   string
+	readCh chan fakeReadResp
+	closed chan struct{}
+	once   sync.Once
+}
+
+type fakeReadResp struct {
+	data []byte
+	err  error
+}
+
+func newFakePacketConn(name string) *fakePacketConn {
+	return &fakePacketConn{
+		name:   name,
+		readCh: make(chan fakeReadResp, 16),
+		closed: make(chan struct{}),
+	}
+}
+
+func (f *fakePacketConn) pushData(data []byte) {
+	cp := make([]byte, len(data))
+	copy(cp, data)
+	f.readCh <- fakeReadResp{data: cp}
+}
+
+func (f *fakePacketConn) pushError(err error) {
+	f.readCh <- fakeReadResp{err: err}
+}
+
+func (f *fakePacketConn) ReadFrom(p []byte) (int, net.Addr, error) {
+	select {
+	case r, ok := <-f.readCh:
+		if !ok {
+			return 0, nil, net.ErrClosed
+		}
+		if r.err != nil {
+			return 0, nil, r.err
+		}
+		return copy(p, r.data), turbotunnel.DummyAddr{}, nil
+	case <-f.closed:
+		return 0, nil, net.ErrClosed
+	}
+}
+
+func (f *fakePacketConn) WriteTo(p []byte, _ net.Addr) (int, error) {
+	return len(p), nil
+}
+
+func (f *fakePacketConn) Close() error {
+	f.once.Do(func() { close(f.closed) })
+	return nil
+}
+
+func (f *fakePacketConn) LocalAddr() net.Addr              { return turbotunnel.DummyAddr{} }
+func (f *fakePacketConn) SetDeadline(time.Time) error      { return nil }
+func (f *fakePacketConn) SetReadDeadline(time.Time) error  { return nil }
+func (f *fakePacketConn) SetWriteDeadline(time.Time) error { return nil }
+
+// newFakeEntry builds a resolverEntry around a fakePacketConn, suitable for
+// injection into a hand-constructed MultiResolver in tests.
+func newFakeEntry(name string, conn *fakePacketConn) *resolverEntry {
+	return &resolverEntry{
+		name:    name,
+		addr:    turbotunnel.DummyAddr{},
+		conn:    conn,
+		pending: make(map[uint16]time.Time),
+		state:   ResolverStateUnknown,
+	}
+}
+
+// newTestMultiResolver mirrors what NewMultiResolver does after
+// GetResolverConnection returns, but with pre-built synthetic entries. It does
+// not spawn the healthWorker — tests don't need it, and omitting it keeps them
+// hermetic with respect to timing.
+func newTestMultiResolver(entries []*resolverEntry, mode SelectionMode) *MultiResolver {
+	mr := &MultiResolver{
+		entries:  entries,
+		mode:     mode,
+		recvChan: make(chan multiReadResult, len(entries)*4),
+		closed:   make(chan struct{}),
+		allDead:  make(chan struct{}),
+	}
+	mr.aliveCount.Store(int32(len(entries)))
+	mr.startReaders()
+	return mr
+}
+
+// TestMultiResolver_DeadEntryDoesNotBreakReadFrom exercises C1: when one
+// resolver entry's transport returns a fatal read error, MultiResolver.ReadFrom
+// must continue to deliver packets from healthy entries instead of propagating
+// that error to the upper DNSPacketConn layer (which would tear down the whole
+// tunnel session).
+//
+// Pre-fix expectation: this test FAILS — the reader goroutine pushes the error
+// onto recvChan, ReadFrom returns it, and the test sees net.ErrClosed instead
+// of the bytes from the working entry.
+//
+// Post-fix expectation: this test PASSES — the reader goroutine handles the
+// error internally (e.g. marks the entry Down and exits), leaving recvChan to
+// deliver only real response bytes from the surviving entries.
+func TestMultiResolver_DeadEntryDoesNotBreakReadFrom(t *testing.T) {
+	failing := newFakePacketConn("failing")
+	working := newFakePacketConn("working")
+
+	entries := []*resolverEntry{
+		newFakeEntry("failing", failing),
+		newFakeEntry("working", working),
+	}
+	mr := newTestMultiResolver(entries, SelectionRoundRobin)
+	defer mr.Close()
+
+	// Step 1: make the failing entry's ReadFrom return a fatal error.
+	// The reader goroutine will pick this up immediately.
+	failing.pushError(net.ErrClosed)
+
+	// Give the reader goroutine time to handle the error. Under the buggy
+	// implementation it pushes an error-bearing multiReadResult onto
+	// recvChan and exits; under a correct implementation it would quietly
+	// mark the entry down and exit without poisoning recvChan.
+	time.Sleep(100 * time.Millisecond)
+
+	// Step 2: push a valid response to the working entry. This lands on
+	// recvChan strictly AFTER any push from the failing entry, so the
+	// ordering is deterministic.
+	wantResponse := []byte("valid-response-bytes-from-working-resolver")
+	working.pushData(wantResponse)
+
+	// Step 3: ReadFrom must return the bytes from the working entry, not
+	// the error from the failing one.
+	type readResult struct {
+		n   int
+		err error
+		buf []byte
+	}
+	done := make(chan readResult, 1)
+	go func() {
+		buf := make([]byte, 4096)
+		n, _, err := mr.ReadFrom(buf)
+		done <- readResult{n: n, err: err, buf: buf}
+	}()
+
+	select {
+	case r := <-done:
+		if r.err != nil {
+			t.Fatalf("MultiResolver.ReadFrom returned error %v; expected it to ignore the failed entry and return the bytes from the working entry. This is the C1 bug: a single resolver's read error tears down the tunnel.", r.err)
+		}
+		if !bytes.Equal(r.buf[:r.n], wantResponse) {
+			t.Fatalf("MultiResolver.ReadFrom returned wrong bytes.\n  got:  %x\n  want: %x", r.buf[:r.n], wantResponse)
+		}
+	case <-time.After(3 * time.Second):
+		t.Fatal("MultiResolver.ReadFrom timed out; expected it to return bytes from the working entry within 3s")
+	}
+}
+
+// TestMultiResolver_FailedEntryExcludedFromSelection exercises the selection
+// side of C1: after a reader goroutine has processed a fatal read error and
+// exited, the entry must no longer be returned by selectPrimary. Otherwise
+// the round-robin scheduler will keep sending queries to a resolver whose
+// reader goroutine is gone (so responses will never come back), defeating the
+// health state machine.
+//
+// Pre-fix expectation: this test FAILS — the reader goroutine exits without
+// updating the entry's state, so it remains ResolverStateUnknown, which
+// selectRoundRobinHealthy treats as an acceptable target. The "failing" entry
+// is returned roughly half the time.
+//
+// Post-fix expectation: this test PASSES — the reader goroutine transitions
+// the entry to ResolverStateDown before exiting, and selectPrimary skips it.
+func TestMultiResolver_FailedEntryExcludedFromSelection(t *testing.T) {
+	failing := newFakePacketConn("failing")
+	working := newFakePacketConn("working")
+
+	entries := []*resolverEntry{
+		newFakeEntry("failing", failing),
+		newFakeEntry("working", working),
+	}
+	mr := newTestMultiResolver(entries, SelectionRoundRobin)
+	defer mr.Close()
+
+	// Kill the failing entry's reader; leave working blocked in ReadFrom.
+	failing.pushError(net.ErrClosed)
+
+	// Let the reader goroutine process the error and (ideally) mark the
+	// entry down before it exits.
+	time.Sleep(100 * time.Millisecond)
+
+	// Drain any error result that the buggy reader may have pushed onto
+	// recvChan. A correct reader wouldn't push anything here, so this
+	// drain is a no-op post-fix. Without it, the buggy pre-fix state
+	// machine would leave an error lingering, which isn't what this
+	// specific assertion is about.
+drain:
+	for {
+		select {
+		case <-mr.recvChan:
+		default:
+			break drain
+		}
+	}
+
+	// Call selectPrimary repeatedly. It must never return the failing
+	// entry — otherwise real queries will be sent to a resolver whose
+	// reader is gone, so responses will never come back.
+	for i := range 10 {
+		selected := mr.selectPrimary()
+		if selected == nil {
+			t.Fatalf("iter %d: selectPrimary returned nil", i)
+		}
+		if selected.name == "failing" {
+			t.Fatalf("iter %d: selectPrimary returned the failed entry %q; expected it to be excluded after its reader exited on error. State is %s.", i, selected.name, selected.stateSnapshot())
+		}
+	}
+}

From 922faf0ff6c706d6330df9eb824d8ae8071c4afa Mon Sep 17 00:00:00 2001
From: crazydi4mond <255249920+crazydi4mond@users.noreply.github.com>
Date: Sun, 12 Apr 2026 01:06:27 +0200
Subject: [PATCH 11/19] test(e2e): add multi-resolver integration tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Exercise the multi-resolver client path end-to-end with three Docker-based
scenarios covering the happy path, a mid-flight resolver kill, and a
forging resolver:

- multi-resolver: two healthy CoreDNS forwarders, both serving the tunnel
  domain; verifies the client works with -udp flag repeated.
- multi-resolver-runtime-failure: two healthy resolvers, then docker kill
  one mid-flight; asserts HTTP traffic keeps flowing through the survivor
  AND that no new tunnel sessions are created — proving the single-entry
  failure is isolated, not escalated to a full session reconnect.
- multi-resolver-forge: one healthy resolver plus one CoreDNS instance
  using the template plugin to inject NXDOMAIN for every query; asserts
  the tunnel still delivers while the UDP worker's forged-response filter
  absorbs the injections, and sanity-checks the forging path by grepping
  dns-forge's own logs for NXDOMAIN to rule out a round-robin false pass.

All three tests use explicit -idle-timeout 10s -keepalive 2s on both
client and server (matching recovery/transport-recovery) so they stay
anchored to a production-like timing profile and won't drift when the
branch's defaults are later reconciled with main.

The full e2e suite runner (run-test.sh) is updated to include the three
new tests after transport-recovery.
---
 e2e/multi-resolver-forge/Corefile.forge       |  6 ++
 e2e/multi-resolver-forge/docker-compose.yml   | 82 +++++++++++++++++++
 e2e/multi-resolver-forge/run.sh               | 59 +++++++++++++
 .../docker-compose.yml                        | 82 +++++++++++++++++++
 e2e/multi-resolver-runtime-failure/run.sh     | 82 +++++++++++++++++++
 e2e/multi-resolver/docker-compose.yml         | 82 +++++++++++++++++++
 e2e/multi-resolver/run.sh                     | 29 +++++++
 e2e/run-test.sh                               |  2 +-
 8 files changed, 423 insertions(+), 1 deletion(-)
 create mode 100644 e2e/multi-resolver-forge/Corefile.forge
 create mode 100644 e2e/multi-resolver-forge/docker-compose.yml
 create mode 100755 e2e/multi-resolver-forge/run.sh
 create mode 100644 e2e/multi-resolver-runtime-failure/docker-compose.yml
 create mode 100755 e2e/multi-resolver-runtime-failure/run.sh
 create mode 100644 e2e/multi-resolver/docker-compose.yml
 create mode 100755 e2e/multi-resolver/run.sh

diff --git a/e2e/multi-resolver-forge/Corefile.forge b/e2e/multi-resolver-forge/Corefile.forge
new file mode 100644
index 0000000..224cbc9
--- /dev/null
+++ b/e2e/multi-resolver-forge/Corefile.forge
@@ -0,0 +1,6 @@
+. {
+    template IN ANY . {
+        rcode NXDOMAIN
+    }
+    log
+}
diff --git a/e2e/multi-resolver-forge/docker-compose.yml b/e2e/multi-resolver-forge/docker-compose.yml
new file mode 100644
index 0000000..b5be9f2
--- /dev/null
+++ b/e2e/multi-resolver-forge/docker-compose.yml
@@ -0,0 +1,82 @@
+networks:
+  dns-net:
+    ipam:
+      config:
+        - subnet: 172.28.0.0/24
+  backend-net:
+
+volumes:
+  keys:
+
+services:
+  keygen:
+    build:
+      context: ../..
+      dockerfile: Dockerfile
+    volumes:
+      - keys:/keys
+    command: >
+      sh -c "vaydns-server -gen-key -privkey-file /keys/server.key -pubkey-file /keys/server.pub"
+
+  dns-good:
+    image: coredns/coredns
+    networks:
+      dns-net:
+        ipv4_address: 172.28.0.10
+    volumes:
+      - ../Corefile:/Corefile
+    command: ["-conf", "/Corefile"]
+
+  dns-forge:
+    image: coredns/coredns
+    networks:
+      dns-net:
+        ipv4_address: 172.28.0.11
+    volumes:
+      - ./Corefile.forge:/Corefile
+    command: ["-conf", "/Corefile"]
+
+  backend:
+    image: nginx:alpine
+    networks:
+      - backend-net
+
+  server:
+    build:
+      context: ../..
+      dockerfile: Dockerfile
+    networks:
+      dns-net:
+        ipv4_address: 172.28.0.20
+      backend-net:
+    volumes:
+      - keys:/keys
+    command: >
+      vaydns-server -udp :53 -privkey-file /keys/server.key
+      -domain t.example.com -upstream backend:80
+      -idle-timeout 10s -keepalive 2s
+    depends_on:
+      keygen:
+        condition: service_completed_successfully
+      dns-good:
+        condition: service_started
+      dns-forge:
+        condition: service_started
+
+  client:
+    build:
+      context: ../..
+      dockerfile: Dockerfile
+    networks:
+      - dns-net
+    volumes:
+      - keys:/keys
+    command: >
+      vaydns-client -udp 172.28.0.10:53 -udp 172.28.0.11:53
+      -pubkey-file /keys/server.pub
+      -domain t.example.com -listen 0.0.0.0:7000
+      -idle-timeout 10s -keepalive 2s -session-check-interval 500ms
+      -reconnect-min 1s -reconnect-max 5s -log-level info
+    depends_on:
+      server:
+        condition: service_started
diff --git a/e2e/multi-resolver-forge/run.sh b/e2e/multi-resolver-forge/run.sh
new file mode 100755
index 0000000..418fa3c
--- /dev/null
+++ b/e2e/multi-resolver-forge/run.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+# Test: one resolver returns forged responses while another works.
+# dns-forge always replies with NXDOMAIN (CoreDNS template plugin), simulating
+# a censor or broken resolver injecting fake responses. dns-good behaves
+# normally. The tunnel must work through dns-good, with the per-query UDP
+# worker's forged-response filter absorbing dns-forge's NXDOMAINs and
+# MultiResolver's health state machine eventually routing around it.
+set -euo pipefail
+cd "$(dirname "$0")"
+
+cleanup() { docker compose down -v 2>/dev/null; }
+trap cleanup EXIT
+
+fetch() {
+    docker compose exec -T client wget -q -O- http://localhost:7000 2>/dev/null | grep -q "Welcome to nginx"
+}
+
+echo "--- Building and starting services ---"
+docker compose up -d --build
+
+echo "--- Waiting for tunnel through dns-good while dns-forge injects NXDOMAINs (up to 60s) ---"
+ok_count=0
+for i in $(seq 1 60); do
+    if fetch; then
+        ok_count=$((ok_count + 1))
+        # Require two consecutive successes so one lucky query doesn't pass
+        # the test while the forging resolver is still in rotation.
+        if [ "$ok_count" -ge 2 ]; then
+            echo ""
+            # Sanity check: make sure dns-forge actually saw queries, so we
+            # know the tunnel was exercising the forging code path and didn't
+            # only hit dns-good by chance. CoreDNS's log plugin may buffer
+            # output briefly, so give it a moment to flush before grepping.
+            sleep 2
+            forge_logs=$(docker compose logs dns-forge 2>&1)
+            if ! grep -q 'NXDOMAIN' <<<"$forge_logs"; then
+                echo "--- dns-forge never served NXDOMAIN; the forging code path may not have been exercised ---"
+                echo "$forge_logs"
+                echo "=== FAIL (forging path not exercised) ==="
+                exit 1
+            fi
+            nxdomain_count=$(grep -c 'NXDOMAIN' <<<"$forge_logs" || true)
+            echo "--- dns-forge served $nxdomain_count NXDOMAIN responses; forging path exercised ---"
+            echo "--- Tunnel delivers consistent responses despite forged NXDOMAINs ---"
+            echo "=== PASS ==="
+            exit 0
+        fi
+    else
+        ok_count=0
+    fi
+    printf "."
+    sleep 1
+done
+
+echo ""
+echo "--- Tunnel did not come up through dns-good ---"
+docker compose logs client server dns-good dns-forge
+echo "=== FAIL ==="
+exit 1
diff --git a/e2e/multi-resolver-runtime-failure/docker-compose.yml b/e2e/multi-resolver-runtime-failure/docker-compose.yml
new file mode 100644
index 0000000..d456182
--- /dev/null
+++ b/e2e/multi-resolver-runtime-failure/docker-compose.yml
@@ -0,0 +1,82 @@
+networks:
+  dns-net:
+    ipam:
+      config:
+        - subnet: 172.28.0.0/24
+  backend-net:
+
+volumes:
+  keys:
+
+services:
+  keygen:
+    build:
+      context: ../..
+      dockerfile: Dockerfile
+    volumes:
+      - keys:/keys
+    command: >
+      sh -c "vaydns-server -gen-key -privkey-file /keys/server.key -pubkey-file /keys/server.pub"
+
+  dns1:
+    image: coredns/coredns
+    networks:
+      dns-net:
+        ipv4_address: 172.28.0.10
+    volumes:
+      - ../Corefile:/Corefile
+    command: ["-conf", "/Corefile"]
+
+  dns2:
+    image: coredns/coredns
+    networks:
+      dns-net:
+        ipv4_address: 172.28.0.11
+    volumes:
+      - ../Corefile:/Corefile
+    command: ["-conf", "/Corefile"]
+
+  backend:
+    image: nginx:alpine
+    networks:
+      - backend-net
+
+  server:
+    build:
+      context: ../..
+      dockerfile: Dockerfile
+    networks:
+      dns-net:
+        ipv4_address: 172.28.0.20
+      backend-net:
+    volumes:
+      - keys:/keys
+    command: >
+      vaydns-server -udp :53 -privkey-file /keys/server.key
+      -domain t.example.com -upstream backend:80
+      -idle-timeout 10s -keepalive 2s
+    depends_on:
+      keygen:
+        condition: service_completed_successfully
+      dns1:
+        condition: service_started
+      dns2:
+        condition: service_started
+
+  client:
+    build:
+      context: ../..
+      dockerfile: Dockerfile
+    networks:
+      - dns-net
+    volumes:
+      - keys:/keys
+    command: >
+      vaydns-client -udp 172.28.0.10:53 -udp 172.28.0.11:53
+      -pubkey-file /keys/server.pub
+      -domain t.example.com -listen 0.0.0.0:7000
+      -idle-timeout 10s -keepalive 2s -session-check-interval 500ms
+      -reconnect-min 1s -reconnect-max 5s -log-level info
+    depends_on:
+      server:
+        condition: service_started
diff --git a/e2e/multi-resolver-runtime-failure/run.sh b/e2e/multi-resolver-runtime-failure/run.sh
new file mode 100755
index 0000000..a863b79
--- /dev/null
+++ b/e2e/multi-resolver-runtime-failure/run.sh
@@ -0,0 +1,82 @@
+#!/usr/bin/env bash
+# Test: one of several UDP resolvers dies mid-flight.
+# Start the tunnel with two working DNS resolvers, verify HTTP traffic works,
+# kill one resolver, and verify traffic continues through the remaining one.
+#
+# A failing UDP resolver does not surface an error to MultiResolver (the
+# per-query worker retries silently), so this test validates that the health
+# state machine + round-robin selection routes around it without tearing
+# down the tunnel session.
+set -euo pipefail
+cd "$(dirname "$0")"
+
+cleanup() { docker compose down -v 2>/dev/null; }
+trap cleanup EXIT
+
+fetch() {
+    docker compose exec -T client wget -q -O- http://localhost:7000 2>/dev/null | grep -q "Welcome to nginx"
+}
+
+echo "--- Building and starting services ---"
+docker compose up -d --build
+
+echo "--- Waiting for initial tunnel (up to 30s) ---"
+for i in $(seq 1 30); do
+    if fetch; then
+        echo ""
+        echo "--- Initial tunnel is up ---"
+        break
+    fi
+    if [ "$i" -eq 30 ]; then
+        echo ""
+        docker compose logs client server dns1 dns2
+        echo "=== FAIL (initial tunnel not ready) ==="
+        exit 1
+    fi
+    printf "."
+    sleep 1
+done
+
+# Snapshot the session id before the kill, to detect any reconnect later.
+pre_kill_sessions=$(docker compose logs client 2>&1 | grep -c 'session .* ready' || true)
+
+echo "--- Killing dns1 (half the queries will start dropping) ---"
+docker compose kill dns1
+
+# Give the client a moment to notice and start routing around dns1.
+sleep 3
+
+echo "--- Verifying tunnel still works through dns2 (up to 45s) ---"
+ok_count=0
+for i in $(seq 1 45); do
+    if fetch; then
+        ok_count=$((ok_count + 1))
+        # Require two consecutive successes so we don't declare victory on
+        # a lucky query that happened to go to dns2.
+        if [ "$ok_count" -ge 2 ]; then
+            post_kill_sessions=$(docker compose logs client 2>&1 | grep -c 'session .* ready' || true)
+            new_sessions=$((post_kill_sessions - pre_kill_sessions))
+            if [ "$new_sessions" -gt 0 ]; then
+                echo ""
+                echo "--- Tunnel recovered but triggered $new_sessions new session(s) — resolver failure should be isolated without a full reconnect ---"
+                docker compose logs client | tail -30
+                echo "=== FAIL (session was rebuilt instead of isolated) ==="
+                exit 1
+            fi
+            echo ""
+            echo "--- Tunnel survived with 0 new sessions (single-entry failure isolated) ---"
+            echo "=== PASS ==="
+            exit 0
+        fi
+    else
+        ok_count=0
+    fi
+    printf "."
+    sleep 1
+done
+
+echo ""
+echo "--- Tunnel did not survive dns1 kill ---"
+docker compose logs client server dns1 dns2
+echo "=== FAIL ==="
+exit 1
diff --git a/e2e/multi-resolver/docker-compose.yml b/e2e/multi-resolver/docker-compose.yml
new file mode 100644
index 0000000..36f9222
--- /dev/null
+++ b/e2e/multi-resolver/docker-compose.yml
@@ -0,0 +1,82 @@
+networks:
+  dns-net:
+    ipam:
+      config:
+        - subnet: 172.28.0.0/24
+  backend-net:
+
+volumes:
+  keys:
+
+services:
+  keygen:
+    build:
+      context: ../..
+      dockerfile: Dockerfile
+    volumes:
+      - keys:/keys
+    command: >
+      sh -c "vaydns-server -gen-key -privkey-file /keys/server.key -pubkey-file /keys/server.pub"
+
+  dns1:
+    image: coredns/coredns
+    networks:
+      dns-net:
+        ipv4_address: 172.28.0.10
+    volumes:
+      - ../Corefile:/Corefile
+    command: ["-conf", "/Corefile"]
+
+  dns2:
+    image: coredns/coredns
+    networks:
+      dns-net:
+        ipv4_address: 172.28.0.11
+    volumes:
+      - ../Corefile:/Corefile
+    command: ["-conf", "/Corefile"]
+
+  backend:
+    image: nginx:alpine
+    networks:
+      - backend-net
+
+  server:
+    build:
+      context: ../..
+      dockerfile: Dockerfile
+    networks:
+      dns-net:
+        ipv4_address: 172.28.0.20
+      backend-net:
+    volumes:
+      - keys:/keys
+    command: >
+      vaydns-server -udp :53 -privkey-file /keys/server.key
+      -domain t.example.com -upstream backend:80
+      -idle-timeout 10s -keepalive 2s
+    depends_on:
+      keygen:
+        condition: service_completed_successfully
+      dns1:
+        condition: service_started
+      dns2:
+        condition: service_started
+
+  client:
+    build:
+      context: ../..
+      dockerfile: Dockerfile
+    networks:
+      - dns-net
+    volumes:
+      - keys:/keys
+    command: >
+      vaydns-client -udp 172.28.0.10:53 -udp 172.28.0.11:53
+      -pubkey-file /keys/server.pub
+      -domain t.example.com -listen 0.0.0.0:7000
+      -idle-timeout 10s -keepalive 2s -session-check-interval 500ms
+      -reconnect-min 1s -reconnect-max 5s
+    depends_on:
+      server:
+        condition: service_started
diff --git a/e2e/multi-resolver/run.sh b/e2e/multi-resolver/run.sh
new file mode 100755
index 0000000..c64921d
--- /dev/null
+++ b/e2e/multi-resolver/run.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+# Test: multi-resolver smoke test.
+# Verifies that vaydns-client works when configured with two UDP resolvers,
+# and that an HTTP request through the tunnel succeeds.
+set -euo pipefail
+cd "$(dirname "$0")"
+
+cleanup() { docker compose down -v 2>/dev/null; }
+trap cleanup EXIT
+
+echo "--- Building and starting services ---"
+docker compose up -d --build
+
+echo "--- Waiting for tunnel (up to 30s) ---"
+for i in $(seq 1 30); do
+    if docker compose exec -T client wget -q -O- http://localhost:7000 2>/dev/null | grep -q "Welcome to nginx"; then
+        echo ""
+        echo "=== PASS ==="
+        exit 0
+    fi
+    printf "."
+    sleep 1
+done
+
+echo ""
+echo "--- Tunnel did not come up. Dumping logs ---"
+docker compose logs client server dns1 dns2
+echo "=== FAIL ==="
+exit 1
diff --git a/e2e/run-test.sh b/e2e/run-test.sh
index b07c0b7..9784ae1 100755
--- a/e2e/run-test.sh
+++ b/e2e/run-test.sh
@@ -20,7 +20,7 @@ for rt in txt cname a aaaa mx ns srv; do
     fi
 done
 
-for test_dir in socks-download recovery transport-recovery; do
+for test_dir in socks-download recovery transport-recovery multi-resolver multi-resolver-runtime-failure multi-resolver-forge; do
     total=$((total + 1))
     echo ""
     echo "========================================"

From fcd228a2b98bcf371fa984b4735604ec3f31c2d8 Mon Sep 17 00:00:00 2001
From: crazydi4mond <255249920+crazydi4mond@users.noreply.github.com>
Date: Sun, 12 Apr 2026 01:31:44 +0200
Subject: [PATCH 12/19] fix(client): apply uTLS fingerprint to DoT resolvers in
 multi mode

The multi-resolver refactor split the single "resolver" variable into
three per-type loops but only re-added the UTLSClientHelloID assignment
to the DoH loop, silently dropping fingerprint camouflage for every
DoT resolver. Pre-refactor main set this field unconditionally on the
single configured resolver, so single-resolver DoT users on main got
their fingerprint; multi-resolver DoT users on this branch do not.

Restore the assignment in the DoT loop, mirroring the DoH loop's
pattern.
---
 vaydns-client/main.go | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/vaydns-client/main.go b/vaydns-client/main.go
index 0924f7e..a93d35f 100644
--- a/vaydns-client/main.go
+++ b/vaydns-client/main.go
@@ -238,10 +238,12 @@ Known TLS fingerprints for -utls are:
 		resolvers = append(resolvers, resolver)
 	}
 	for _, dotAddr := range dotAddrs {
-		resolvers = append(resolvers, client.Resolver{
+		resolver := client.Resolver{
 			ResolverType: client.ResolverTypeDOT,
 			ResolverAddr: dotAddr,
-		})
+		}
+		resolver.UTLSClientHelloID = utlsClientHelloID
+		resolvers = append(resolvers, resolver)
 	}
 	for _, udpAddr := range udpAddrs {
 		resolver := client.Resolver{

From 6f35ceb1fa292d807410186321348ba53256c3de Mon Sep 17 00:00:00 2001
From: crazydi4mond <255249920+crazydi4mond@users.noreply.github.com>
Date: Sun, 12 Apr 2026 01:43:48 +0200
Subject: [PATCH 13/19] fix(multi-resolver): move health counters under mu to
 close decay race
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The decay logic in recomputeState read each counter with atomic Load,
then wrote back Load-1 with atomic Store. Concurrent Adds from
writePacket, evaluateIncoming, and expirePending (all running in
different goroutines) could land between the Load and Store and be
silently overwritten — classic lost-update on the invalid/timeout
counters. timeoutCount.Store(0) in the valid-response path had the
same shape.

The Go race detector does not catch this: mixed atomic operations on
the same field are considered synchronized, and the bug is a logic
lost-update rather than an unsynchronized memory access.

Move validCount, invalidCount, and timeoutCount from atomic.Int64 to
plain int64 fields protected by the entry's existing mu, and
consolidate the several small mu acquisitions in evaluateIncoming,
expirePending, and writePacket into one per branch. recomputeState's
decay becomes a simple "if count > 0 { count-- }" under the lock.

dead and MultiResolver.aliveCount remain atomic: they are read from
selection hot paths that should not acquire e.mu on every iteration.

resolverScore now acquires e.mu instead of calling stateSnapshot()
and reading counters separately — side benefit: eliminates a minor
pre-existing inconsistency where the state and the counter values
could come from different snapshots.
---
 client/multi_resolver.go | 106 +++++++++++++++++++++------------------
 1 file changed, 56 insertions(+), 50 deletions(-)

diff --git a/client/multi_resolver.go b/client/multi_resolver.go
index d1fea54..1b34535 100644
--- a/client/multi_resolver.go
+++ b/client/multi_resolver.go
@@ -62,23 +62,28 @@ type resolverEntry struct {
 	addr net.Addr
 	conn net.PacketConn
 
-	validCount   atomic.Int64
-	invalidCount atomic.Int64
-	timeoutCount atomic.Int64
-
 	// dead is set when the entry's transport has reported an unrecoverable
 	// error and its reader goroutine has exited. A dead entry is permanently
 	// excluded from selection and never transitions back — the only way to
 	// recover is to rebuild the entire MultiResolver (which happens on a
-	// full tunnel reconnect).
+	// full tunnel reconnect). Kept as an atomic.Bool so selection helpers
+	// can read it without acquiring mu on the hot path.
 	dead atomic.Bool
 
-	mu        sync.Mutex
-	pending   map[uint16]time.Time
-	lastWrite time.Time
-	lastValid time.Time
-	lastProbe time.Time
-	state     ResolverState
+	// mu protects every field below. Counters are deliberately not atomic:
+	// the decay path in recomputeState would lose concurrent Adds if the
+	// counters were atomic and decremented via Load→Store, and splitting
+	// decay across a CAS loop is uglier than just holding mu for the
+	// short window each counter update requires.
+	mu           sync.Mutex
+	validCount   int64
+	invalidCount int64
+	timeoutCount int64
+	pending      map[uint16]time.Time
+	lastWrite    time.Time
+	lastValid    time.Time
+	lastProbe    time.Time
+	state        ResolverState
 }
 
 // markDead transitions the entry to a permanent Down state. It is idempotent
@@ -110,11 +115,11 @@ func (e *resolverEntry) writePacket(b []byte) (int, error) {
 	now := time.Now()
 	e.trackOutgoingID(b, now)
 	n, err := e.conn.WriteTo(b, e.addr)
-	if err != nil {
-		e.invalidCount.Add(1)
-	}
 	e.mu.Lock()
 	e.lastWrite = now
+	if err != nil {
+		e.invalidCount++
+	}
 	e.mu.Unlock()
 	return n, err
 }
@@ -140,50 +145,51 @@ func (e *resolverEntry) readPacket() multiReadResult {
 }
 
 func (e *resolverEntry) evaluateIncoming(packet []byte) {
+	now := time.Now()
 	resp, err := dns.MessageFromWireFormat(packet)
 	if err != nil {
-		e.invalidCount.Add(1)
-		e.recomputeState(time.Now())
+		e.mu.Lock()
+		e.invalidCount++
+		e.mu.Unlock()
+		e.recomputeState(now)
 		return
 	}
 
-	e.mu.Lock()
-	delete(e.pending, resp.ID)
-	e.mu.Unlock()
-
 	if isValidDNSResponse(resp) {
-		e.validCount.Add(1)
-		e.timeoutCount.Store(0)
 		e.mu.Lock()
-		e.lastValid = time.Now()
+		delete(e.pending, resp.ID)
+		e.validCount++
+		e.timeoutCount = 0
+		e.lastValid = now
 		e.state = ResolverStateHealthy
 		e.mu.Unlock()
 		return
 	}
 
-	e.invalidCount.Add(1)
+	e.mu.Lock()
+	delete(e.pending, resp.ID)
+	e.invalidCount++
 	if isRateLimitedResponse(resp) {
-		e.mu.Lock()
 		e.state = ResolverStateRateLimited
-		e.mu.Unlock()
 	}
-	e.recomputeState(time.Now())
+	e.mu.Unlock()
+	e.recomputeState(now)
 }
 
 func (e *resolverEntry) expirePending(now time.Time) {
-	expired := int64(0)
 	e.mu.Lock()
+	expired := int64(0)
 	for id, t := range e.pending {
 		if now.Sub(t) >= pendingResponseTimeout {
 			delete(e.pending, id)
 			expired++
 		}
 	}
-	e.mu.Unlock()
 	if expired > 0 {
-		e.timeoutCount.Add(expired)
-		e.invalidCount.Add(expired)
+		e.timeoutCount += expired
+		e.invalidCount += expired
 	}
+	e.mu.Unlock()
 	e.recomputeState(now)
 }
 
@@ -200,29 +206,25 @@ func (e *resolverEntry) recomputeState(now time.Time) {
 		return
 	}
 
-	timeouts := e.timeoutCount.Load()
-	invalid := e.invalidCount.Load()
-	valid := e.validCount.Load()
-
 	switch {
-	case timeouts >= downTimeoutThreshold:
+	case e.timeoutCount >= downTimeoutThreshold:
 		e.state = ResolverStateDown
-	case invalid >= rateLimitThreshold && valid == 0:
+	case e.invalidCount >= rateLimitThreshold && e.validCount == 0:
 		e.state = ResolverStateRateLimited
-	case valid > 0 && now.Sub(e.lastValid) <= 30*time.Second:
+	case e.validCount > 0 && now.Sub(e.lastValid) <= 30*time.Second:
 		e.state = ResolverStateHealthy
-	case valid == 0:
+	case e.validCount == 0:
 		e.state = ResolverStateUnknown
 	default:
 		e.state = ResolverStateUnknown
 	}
 
 	// Slow decay to avoid sticky penalties.
-	if invalid > 0 {
-		e.invalidCount.Store(invalid - 1)
+	if e.invalidCount > 0 {
+		e.invalidCount--
 	}
-	if timeouts > 0 {
-		e.timeoutCount.Store(timeouts - 1)
+	if e.timeoutCount > 0 {
+		e.timeoutCount--
 	}
 }
 
@@ -250,9 +252,9 @@ func (e *resolverEntry) snapshot() ResolverStat {
 	return ResolverStat{
 		Address:      e.name,
 		State:        e.state,
-		ValidCount:   e.validCount.Load(),
-		InvalidCount: e.invalidCount.Load(),
-		TimeoutCount: e.timeoutCount.Load(),
+		ValidCount:   e.validCount,
+		InvalidCount: e.invalidCount,
+		TimeoutCount: e.timeoutCount,
 		LastWrite:    e.lastWrite,
 		LastValid:    e.lastValid,
 	}
@@ -568,8 +570,10 @@ func (mr *MultiResolver) selectBestScore() *resolverEntry {
 }
 
 func resolverScore(e *resolverEntry) int64 {
-	statePenalty := int64(0)
-	switch e.stateSnapshot() {
+	e.mu.Lock()
+	defer e.mu.Unlock()
+	var statePenalty int64
+	switch e.state {
 	case ResolverStateHealthy:
 		statePenalty = 0
 	case ResolverStateUnknown:
@@ -579,7 +583,7 @@ func resolverScore(e *resolverEntry) int64 {
 	case ResolverStateDown:
 		statePenalty = 30
 	}
-	return e.validCount.Load()*4 - e.invalidCount.Load()*2 - e.timeoutCount.Load()*3 - statePenalty
+	return e.validCount*4 - e.invalidCount*2 - e.timeoutCount*3 - statePenalty
 }
 
 func (mr *MultiResolver) selectProbeTarget(primary *resolverEntry) *resolverEntry {
@@ -621,7 +625,9 @@ func (mr *MultiResolver) ResolverStats() []ResolverStat {
 func (mr *MultiResolver) ValidInvalidCounts() map[string][2]int64 {
 	out := make(map[string][2]int64, len(mr.entries))
 	for _, e := range mr.entries {
-		out[e.name] = [2]int64{e.validCount.Load(), e.invalidCount.Load()}
+		e.mu.Lock()
+		out[e.name] = [2]int64{e.validCount, e.invalidCount}
+		e.mu.Unlock()
 	}
 	return out
 }

From 04d4811a3f11293dbed6e079fdfa2d28f7de9faf Mon Sep 17 00:00:00 2001
From: crazydi4mond <255249920+crazydi4mond@users.noreply.github.com>
Date: Sun, 12 Apr 2026 03:49:44 +0200
Subject: [PATCH 14/19] fix(multi-resolver): per-resolver forged response
 tracking with labeled logs

Each resolver entry now owns a labeled ForgedStats instance so
operators can see exactly which resolver is targeted by DNS injection.
Milestone logs include the resolver address:

  forged DNS responses from 8.8.8.8:53: total=10 SERVFAIL=0 NXDOMAIN=10 other=0

In multi-resolver mode, the reader goroutine filters forged responses
(QR=1, RCODE != NoError) at the MultiResolver layer and records them
per-entry, preventing double-counting at the DNSPacketConn safety net.

In single-resolver mode, ForgedStats is created by the Tunnel and
shared between UDPPacketConn and DNSPacketConn, matching pre-PR
behavior with the new labeled format.

- ForgedStats gains a Label field; Record() includes "from <label>" in
  milestone log lines (falls back to unlabeled for empty label).
- NewUDPPacketConn and GetResolverConnection accept an external
  ForgedStats pointer so the caller controls sharing and labeling.
- evaluateIncoming returns a forged bool; readPacket and startReaders
  use it to skip forwarding forged responses to recvChan.
- NewMultiResolver creates per-entry labeled ForgedStats at construction.
- e2e/multi-resolver-forge extended to assert the labeled milestone log
  fires on the client side with the forging resolver's address.
- Unit tests: ForgedStats labeled/unlabeled milestone, per-entry
  recording, forged-not-forwarded, per-entry label distinctness.
---
 client/client.go                |   8 +-
 client/dns.go                   |  31 ++++--
 client/multi_resolver.go        |  67 +++++++++---
 client/multi_resolver_test.go   | 175 +++++++++++++++++++++++++++++++-
 client/udp.go                   |  19 ++--
 e2e/multi-resolver-forge/run.sh |  15 +++
 6 files changed, 278 insertions(+), 37 deletions(-)

diff --git a/client/client.go b/client/client.go
index 915ff6e..457a3a4 100644
--- a/client/client.go
+++ b/client/client.go
@@ -298,9 +298,15 @@ func (t *Tunnel) InitiateResolverConnection() error {
 		}
 		t.resolverConn = conn
 		t.remoteAddr = turbotunnel.DummyAddr{}
+		// In multi-resolver mode, each entry owns a per-resolver
+		// ForgedStats. t.forgedStats stays nil so DNSPacketConn
+		// creates an unlabeled catch-all (which should rarely fire
+		// since MultiResolver filters forged responses upstream).
 		return nil
 	}
-	conn, addr, err := GetResolverConnection(t.Resolvers[0], t.effectivePacketQueueSize(), t.effectiveQueueOverflowMode())
+	r := t.Resolvers[0]
+	t.forgedStats = &ForgedStats{Label: r.ResolverAddr}
+	conn, addr, err := GetResolverConnection(r, t.effectivePacketQueueSize(), t.effectiveQueueOverflowMode(), t.forgedStats)
 	if err != nil {
 		return err
 	}
diff --git a/client/dns.go b/client/dns.go
index 0453b78..7847faf 100644
--- a/client/dns.go
+++ b/client/dns.go
@@ -100,10 +100,17 @@ func (rl *RateLimiter) Wait() {
 	}
 }
 
-// ForgedStats tracks forged DNS response counters. It is shared between
-// UDPPacketConn (per-query mode) and DNSPacketConn (shared socket mode) so
-// that forged response visibility is consistent regardless of transport.
+// ForgedStats tracks forged DNS response counters for a specific source.
+// In single-resolver mode, one instance is shared between UDPPacketConn
+// (per-query mode) and DNSPacketConn so milestone logs fire on a unified
+// count. In multi-resolver mode, each entry holds its own labeled instance
+// so operators can see which resolver is being targeted by injection.
 type ForgedStats struct {
+	// Label identifies the counter's source in log lines — typically the
+	// resolver address (e.g. "8.8.8.8:53"). An empty label is allowed
+	// and produces an unlabeled log line (used by DNSPacketConn's catch-all
+	// safety net when no resolver attribution is available).
+	Label    string
 	Total    uint64
 	SERVFAIL uint64
 	NXDOMAIN uint64
@@ -124,11 +131,19 @@ func (s *ForgedStats) Record(rcode uint16) {
 	}
 	total := atomic.AddUint64(&s.Total, 1)
 	if forgedInfoMilestone(total) {
-		log.Infof("forged DNS responses: total=%d, SERVFAIL=%d, NXDOMAIN=%d, other=%d",
-			total,
-			atomic.LoadUint64(&s.SERVFAIL),
-			atomic.LoadUint64(&s.NXDOMAIN),
-			atomic.LoadUint64(&s.Other))
+		if s.Label != "" {
+			log.Infof("forged DNS responses from %s: total=%d, SERVFAIL=%d, NXDOMAIN=%d, other=%d",
+				s.Label, total,
+				atomic.LoadUint64(&s.SERVFAIL),
+				atomic.LoadUint64(&s.NXDOMAIN),
+				atomic.LoadUint64(&s.Other))
+		} else {
+			log.Infof("forged DNS responses: total=%d, SERVFAIL=%d, NXDOMAIN=%d, other=%d",
+				total,
+				atomic.LoadUint64(&s.SERVFAIL),
+				atomic.LoadUint64(&s.NXDOMAIN),
+				atomic.LoadUint64(&s.Other))
+		}
 	}
 }
 
diff --git a/client/multi_resolver.go b/client/multi_resolver.go
index 1b34535..5434ba0 100644
--- a/client/multi_resolver.go
+++ b/client/multi_resolver.go
@@ -70,6 +70,13 @@ type resolverEntry struct {
 	// can read it without acquiring mu on the hot path.
 	dead atomic.Bool
 
+	// forgedStats tracks forged DNS response counters for this specific
+	// resolver entry, with milestone logging attributed to its address.
+	// In multi-resolver mode each entry owns its own labeled instance;
+	// in single-resolver mode the Tunnel shares one instance with both
+	// UDPPacketConn and DNSPacketConn.
+	forgedStats *ForgedStats
+
 	// mu protects every field below. Counters are deliberately not atomic:
 	// the decay path in recomputeState would lose concurrent Adds if the
 	// counters were atomic and decremented via Load→Store, and splitting
@@ -134,17 +141,22 @@ func (e *resolverEntry) trackOutgoingID(b []byte, now time.Time) {
 	e.mu.Unlock()
 }
 
-func (e *resolverEntry) readPacket() multiReadResult {
+func (e *resolverEntry) readPacket() (multiReadResult, bool) {
 	var result multiReadResult
 	result.entry = e
 	result.n, result.addr, result.err = e.conn.ReadFrom(result.buf[:])
+	var forged bool
 	if result.err == nil {
-		e.evaluateIncoming(result.buf[:result.n])
+		forged = e.evaluateIncoming(result.buf[:result.n])
 	}
-	return result
+	return result, forged
 }
 
-func (e *resolverEntry) evaluateIncoming(packet []byte) {
+// evaluateIncoming parses an incoming DNS response for health tracking and
+// forged-response detection. Returns true when the response is forged
+// (QR=1, RCODE != NoError) — the caller should not forward it upstream
+// because evaluateIncoming already recorded it in the entry's ForgedStats.
+func (e *resolverEntry) evaluateIncoming(packet []byte) bool {
 	now := time.Now()
 	resp, err := dns.MessageFromWireFormat(packet)
 	if err != nil {
@@ -152,7 +164,7 @@ func (e *resolverEntry) evaluateIncoming(packet []byte) {
 		e.invalidCount++
 		e.mu.Unlock()
 		e.recomputeState(now)
-		return
+		return false // parse error — not forged, let upper layer handle
 	}
 
 	if isValidDNSResponse(resp) {
@@ -163,9 +175,13 @@ func (e *resolverEntry) evaluateIncoming(packet []byte) {
 		e.lastValid = now
 		e.state = ResolverStateHealthy
 		e.mu.Unlock()
-		return
+		return false
 	}
 
+	// Non-valid DNS response. Track it for health purposes.
+	rcode := resp.Flags & 0x000f
+	isResponse := resp.Flags&0x8000 != 0
+
 	e.mu.Lock()
 	delete(e.pending, resp.ID)
 	e.invalidCount++
@@ -174,6 +190,13 @@ func (e *resolverEntry) evaluateIncoming(packet []byte) {
 	}
 	e.mu.Unlock()
 	e.recomputeState(now)
+
+	// Record in per-resolver forged stats and signal caller not to forward.
+	if isResponse && e.forgedStats != nil {
+		e.forgedStats.Record(rcode)
+		return true
+	}
+	return false
 }
 
 func (e *resolverEntry) expirePending(now time.Time) {
@@ -300,7 +323,8 @@ func NewMultiResolver(resolvers []Resolver, mode SelectionMode, queueSize int, o
 
 	entries := make([]*resolverEntry, 0, len(resolvers))
 	for _, r := range resolvers {
-		conn, addr, err := GetResolverConnection(r, queueSize, overflowMode)
+		entryStats := &ForgedStats{Label: r.ResolverAddr}
+		conn, addr, err := GetResolverConnection(r, queueSize, overflowMode, entryStats)
 		if err != nil {
 			for _, e := range entries {
 				e.conn.Close()
@@ -308,11 +332,12 @@ func NewMultiResolver(resolvers []Resolver, mode SelectionMode, queueSize int, o
 			return nil, fmt.Errorf("resolver %s %s: %w", r.ResolverType, r.ResolverAddr, err)
 		}
 		entries = append(entries, &resolverEntry{
-			name:    r.ResolverAddr,
-			addr:    addr,
-			conn:    conn,
-			pending: make(map[uint16]time.Time),
-			state:   ResolverStateUnknown,
+			name:        r.ResolverAddr,
+			addr:        addr,
+			conn:        conn,
+			forgedStats: entryStats,
+			pending:     make(map[uint16]time.Time),
+			state:       ResolverStateUnknown,
 		})
 	}
 
@@ -359,11 +384,18 @@ func (mr *MultiResolver) startReaders() {
 		entry := e
 		go func() {
 			for {
-				res := entry.readPacket()
+				res, forged := entry.readPacket()
 				if res.err != nil {
 					mr.entryDied(entry, res.err)
 					return
 				}
+				if forged {
+					// evaluateIncoming already recorded this in the
+					// entry's per-resolver ForgedStats. Don't forward
+					// to DNSPacketConn — its safety-net filter would
+					// double-count it.
+					continue
+				}
 				select {
 				case mr.recvChan <- res:
 				case <-mr.closed:
@@ -681,8 +713,11 @@ func (mr *MultiResolver) SetWriteDeadline(t time.Time) error {
 	return last
 }
 
-// getResolverConnection creates the underlying transport net.PacketConn for r.
-func GetResolverConnection(r Resolver, queueSize int, overflowMode turbotunnel.QueueOverflowMode) (net.PacketConn, net.Addr, error) {
+// GetResolverConnection creates the underlying transport net.PacketConn for r.
+// For UDP per-query mode, forgedStats is passed to NewUDPPacketConn so
+// forged-response milestone logs are attributed to this resolver. Callers
+// may pass nil to let each layer create its own counter.
+func GetResolverConnection(r Resolver, queueSize int, overflowMode turbotunnel.QueueOverflowMode, forgedStats *ForgedStats) (net.PacketConn, net.Addr, error) {
 	switch r.ResolverType {
 	case ResolverTypeUDP:
 		addr, err := net.ResolveUDPAddr("udp", r.ResolverAddr)
@@ -705,7 +740,7 @@ func GetResolverConnection(r Resolver, queueSize int, overflowMode turbotunnel.Q
 		if timeout <= 0 {
 			timeout = DefaultUDPResponseTimeout
 		}
-		conn, _, err := NewUDPPacketConn(addr, r.DialerControl, workers, timeout, !r.UDPAcceptErrors, queueSize, overflowMode)
+		conn, err := NewUDPPacketConn(addr, r.DialerControl, workers, timeout, !r.UDPAcceptErrors, queueSize, overflowMode, forgedStats)
 		if err != nil {
 			return nil, nil, err
 		}
diff --git a/client/multi_resolver_test.go b/client/multi_resolver_test.go
index ec31411..3c8044d 100644
--- a/client/multi_resolver_test.go
+++ b/client/multi_resolver_test.go
@@ -4,9 +4,11 @@ import (
 	"bytes"
 	"net"
 	"sync"
+	"sync/atomic"
 	"testing"
 	"time"
 
+	"github.com/net2share/vaydns/dns"
 	"github.com/net2share/vaydns/turbotunnel"
 )
 
@@ -76,11 +78,12 @@ func (f *fakePacketConn) SetWriteDeadline(time.Time) error { return nil }
 // injection into a hand-constructed MultiResolver in tests.
 func newFakeEntry(name string, conn *fakePacketConn) *resolverEntry {
 	return &resolverEntry{
-		name:    name,
-		addr:    turbotunnel.DummyAddr{},
-		conn:    conn,
-		pending: make(map[uint16]time.Time),
-		state:   ResolverStateUnknown,
+		name:        name,
+		addr:        turbotunnel.DummyAddr{},
+		conn:        conn,
+		forgedStats: &ForgedStats{Label: name},
+		pending:     make(map[uint16]time.Time),
+		state:       ResolverStateUnknown,
 	}
 }
 
@@ -227,3 +230,165 @@ drain:
 		}
 	}
 }
+
+// buildDNSResponse constructs a minimal valid DNS wire-format response with
+// the given RCODE. QR is set to 1. This is the smallest packet that
+// dns.MessageFromWireFormat can parse and isValidDNSResponse/isRateLimitedResponse
+// can classify.
+func buildDNSResponse(t *testing.T, rcode uint16) []byte {
+	t.Helper()
+	msg := &dns.Message{
+		ID:    0x1234,
+		Flags: 0x8000 | rcode, // QR=1 + RCODE
+	}
+	buf, err := msg.WireFormat()
+	if err != nil {
+		t.Fatalf("buildDNSResponse: %v", err)
+	}
+	return buf
+}
+
+// TestForgedStats_LabeledMilestoneLog verifies that ForgedStats.Record()
+// fires a milestone log at the expected thresholds and that the log line
+// includes the resolver label in the "from <addr>" format.
+func TestForgedStats_LabeledMilestoneLog(t *testing.T) {
+	stats := &ForgedStats{Label: "1.2.3.4:53"}
+
+	// Record 9 forged NXDOMAIN responses — no milestone yet.
+	for range 9 {
+		stats.Record(dns.RcodeNameError)
+	}
+	if atomic.LoadUint64(&stats.Total) != 9 {
+		t.Fatalf("expected Total=9, got %d", stats.Total)
+	}
+
+	// The 10th should trigger the first milestone.
+	stats.Record(dns.RcodeNameError)
+	if atomic.LoadUint64(&stats.Total) != 10 {
+		t.Fatalf("expected Total=10, got %d", stats.Total)
+	}
+	if atomic.LoadUint64(&stats.NXDOMAIN) != 10 {
+		t.Fatalf("expected NXDOMAIN=10, got %d", stats.NXDOMAIN)
+	}
+}
+
+// TestForgedStats_UnlabeledFallback verifies that a ForgedStats with an
+// empty Label still records correctly (used by DNSPacketConn's catch-all).
+func TestForgedStats_UnlabeledFallback(t *testing.T) {
+	stats := &ForgedStats{}
+	stats.Record(dns.RcodeServerFailure)
+	if atomic.LoadUint64(&stats.Total) != 1 {
+		t.Fatalf("expected Total=1, got %d", stats.Total)
+	}
+	if atomic.LoadUint64(&stats.SERVFAIL) != 1 {
+		t.Fatalf("expected SERVFAIL=1, got %d", stats.SERVFAIL)
+	}
+}
+
+// TestMultiResolver_ForgedResponseRecordedPerEntry verifies that in
+// multi-resolver mode, a forged DNS response (NXDOMAIN) arriving at one
+// entry increments that entry's ForgedStats but not the other entries'.
+func TestMultiResolver_ForgedResponseRecordedPerEntry(t *testing.T) {
+	forging := newFakePacketConn("forging")
+	clean := newFakePacketConn("clean")
+
+	forgingEntry := newFakeEntry("forging", forging)
+	cleanEntry := newFakeEntry("clean", clean)
+
+	mr := newTestMultiResolver([]*resolverEntry{forgingEntry, cleanEntry}, SelectionRoundRobin)
+	defer mr.Close()
+
+	// Push a forged NXDOMAIN response to the forging entry.
+	forgedResp := buildDNSResponse(t, dns.RcodeNameError)
+	forging.pushData(forgedResp)
+
+	// Give the reader time to process.
+	time.Sleep(100 * time.Millisecond)
+
+	// Forging entry's stats should have recorded the forged response.
+	if got := atomic.LoadUint64(&forgingEntry.forgedStats.Total); got != 1 {
+		t.Errorf("forging entry ForgedStats.Total = %d, want 1", got)
+	}
+	if got := atomic.LoadUint64(&forgingEntry.forgedStats.NXDOMAIN); got != 1 {
+		t.Errorf("forging entry ForgedStats.NXDOMAIN = %d, want 1", got)
+	}
+
+	// Clean entry's stats should be untouched.
+	if got := atomic.LoadUint64(&cleanEntry.forgedStats.Total); got != 0 {
+		t.Errorf("clean entry ForgedStats.Total = %d, want 0", got)
+	}
+}
+
+// TestMultiResolver_ForgedResponseNotForwardedToRecvChan verifies that
+// forged responses detected by evaluateIncoming are NOT forwarded to
+// recvChan (and therefore never reach DNSPacketConn). Only legitimate
+// responses should appear on the channel.
+func TestMultiResolver_ForgedResponseNotForwardedToRecvChan(t *testing.T) {
+	conn := newFakePacketConn("resolver")
+	entry := newFakeEntry("resolver", conn)
+
+	mr := newTestMultiResolver([]*resolverEntry{entry}, SelectionRoundRobin)
+	defer mr.Close()
+
+	// Push a forged NXDOMAIN, then a valid NoError response.
+	forgedResp := buildDNSResponse(t, dns.RcodeNameError)
+	validResp := buildDNSResponse(t, dns.RcodeNoError)
+	conn.pushData(forgedResp)
+	conn.pushData(validResp)
+
+	// ReadFrom should return the valid response, skipping the forged one.
+	buf := make([]byte, 4096)
+	type readResult struct {
+		n   int
+		err error
+	}
+	done := make(chan readResult, 1)
+	go func() {
+		n, _, err := mr.ReadFrom(buf)
+		done <- readResult{n: n, err: err}
+	}()
+
+	select {
+	case r := <-done:
+		if r.err != nil {
+			t.Fatalf("ReadFrom error: %v", r.err)
+		}
+		// The bytes should be the valid response, not the forged one.
+		if bytes.Equal(buf[:r.n], forgedResp) {
+			t.Fatal("ReadFrom returned the forged response — it should have been filtered by the reader goroutine")
+		}
+		if !bytes.Equal(buf[:r.n], validResp) {
+			t.Fatalf("ReadFrom returned unexpected bytes:\n  got:  %x\n  want: %x", buf[:r.n], validResp)
+		}
+	case <-time.After(3 * time.Second):
+		t.Fatal("ReadFrom timed out; expected valid response within 3s")
+	}
+
+	// Verify the forged response was recorded.
+	if got := atomic.LoadUint64(&entry.forgedStats.Total); got != 1 {
+		t.Errorf("ForgedStats.Total = %d, want 1", got)
+	}
+}
+
+// TestMultiResolver_ForgedStatsPerEntryLabeled verifies that each entry
+// in a MultiResolver carries a distinctly-labeled ForgedStats instance.
+func TestMultiResolver_ForgedStatsPerEntryLabeled(t *testing.T) {
+	conn1 := newFakePacketConn("r1")
+	conn2 := newFakePacketConn("r2")
+
+	e1 := newFakeEntry("1.1.1.1:53", conn1)
+	e2 := newFakeEntry("8.8.8.8:53", conn2)
+
+	mr := newTestMultiResolver([]*resolverEntry{e1, e2}, SelectionRoundRobin)
+	defer mr.Close()
+
+	if e1.forgedStats == e2.forgedStats {
+		t.Fatal("entries share the same ForgedStats pointer; expected distinct per-resolver instances")
+	}
+	if e1.forgedStats.Label != "1.1.1.1:53" {
+		t.Errorf("entry1 label = %q, want %q", e1.forgedStats.Label, "1.1.1.1:53")
+	}
+	if e2.forgedStats.Label != "8.8.8.8:53" {
+		t.Errorf("entry2 label = %q, want %q", e2.forgedStats.Label, "8.8.8.8:53")
+	}
+}
diff --git a/client/udp.go b/client/udp.go
index 8eae1b5..30aeb92 100644
--- a/client/udp.go
+++ b/client/udp.go
@@ -33,23 +33,28 @@ type UDPPacketConn struct {
 }
 
 // NewUDPPacketConn creates a UDPPacketConn with numWorkers goroutines that
-// each send one query at a time on a fresh UDP socket. The returned
-// ForgedStats pointer is shared with the caller so DNSPacketConn can
-// include per-query forged counts in its reporting.
-func NewUDPPacketConn(remoteAddr net.Addr, dialerControl func(network, address string, c syscall.RawConn) error, numWorkers int, responseTimeout time.Duration, ignoreErrors bool, queueSize int, overflowMode turbotunnel.QueueOverflowMode) (*UDPPacketConn, *ForgedStats, error) {
-	stats := &ForgedStats{}
+// each send one query at a time on a fresh UDP socket. If forgedStats is
+// non-nil, the UDPPacketConn records forged-response counts into it;
+// otherwise a new instance is created internally (labeled with the remote
+// address). Callers that want unified milestone logging across layers
+// should create a single ForgedStats and pass the same pointer here and
+// to NewDNSPacketConn.
+func NewUDPPacketConn(remoteAddr net.Addr, dialerControl func(network, address string, c syscall.RawConn) error, numWorkers int, responseTimeout time.Duration, ignoreErrors bool, queueSize int, overflowMode turbotunnel.QueueOverflowMode, forgedStats *ForgedStats) (*UDPPacketConn, error) {
+	if forgedStats == nil {
+		forgedStats = &ForgedStats{Label: remoteAddr.String()}
+	}
 	pconn := &UDPPacketConn{
 		remoteAddr:      remoteAddr,
 		dialerControl:   dialerControl,
 		responseTimeout: responseTimeout,
 		ignoreErrors:    ignoreErrors,
-		forgedStats:     stats,
+		forgedStats:     forgedStats,
 		QueuePacketConn: turbotunnel.NewQueuePacketConn(remoteAddr, 0, queueSize, overflowMode),
 	}
 	for i := 0; i < numWorkers; i++ {
 		go pconn.sendLoop()
 	}
-	return pconn, stats, nil
+	return pconn, nil
 }
 
 // sendLoop is the per-worker loop. It dequeues one packet at a time from the
diff --git a/e2e/multi-resolver-forge/run.sh b/e2e/multi-resolver-forge/run.sh
index 418fa3c..2ee38ca 100755
--- a/e2e/multi-resolver-forge/run.sh
+++ b/e2e/multi-resolver-forge/run.sh
@@ -41,6 +41,21 @@ for i in $(seq 1 60); do
             fi
             nxdomain_count=$(grep -c 'NXDOMAIN' <<<"$forge_logs" || true)
             echo "--- dns-forge served $nxdomain_count NXDOMAIN responses; forging path exercised ---"
+
+            # Verify the client-side milestone log fired with per-resolver
+            # attribution: "forged DNS responses from <dns-forge-ip>: ..."
+            # This proves the ForgedStats plumbing carries the resolver
+            # label end-to-end and that milestone thresholds fire correctly
+            # in multi-resolver mode.
+            client_logs=$(docker compose logs client 2>&1)
+            if ! grep -q 'forged DNS responses from 172.28.0.11' <<<"$client_logs"; then
+                echo "--- Client did not log labeled forged milestone for dns-forge (172.28.0.11) ---"
+                echo "$client_logs" | tail -30
+                echo "=== FAIL (missing per-resolver forged milestone log) ==="
+                exit 1
+            fi
+            milestone_line=$(grep 'forged DNS responses from 172.28.0.11' <<<"$client_logs" | tail -1)
+            echo "--- Client milestone: $milestone_line ---"
             echo "--- Tunnel delivers consistent responses despite forged NXDOMAINs ---"
             echo "=== PASS ==="
             exit 0

From 453e0dea8fcefcf4c597cdbf152088d993fc27e1 Mon Sep 17 00:00:00 2001
From: crazydi4mond <255249920+crazydi4mond@users.noreply.github.com>
Date: Sun, 12 Apr 2026 03:56:30 +0200
Subject: [PATCH 15/19] fix(client): preserve NewTunnel single-resolver API,
 add NewTunnelMulti

The multi-resolver PR changed NewTunnel from accepting a single
Resolver to accepting []Resolver, breaking every existing library
consumer. The package doc example and docs/client-library.md still
showed the old single-resolver call pattern.

Restore NewTunnel(Resolver, TunnelServer) as a thin wrapper that
delegates to the new NewTunnelMulti([]Resolver, TunnelServer).
Existing code compiles unchanged; multi-resolver users call
NewTunnelMulti explicitly.

Update the package doc example and docs/client-library.md to show
both the single-resolver and multi-resolver patterns. Fix stale
default values in the doc's session-options comment block to match
main's current defaults (10s idle, 2s keepalive).
---
 client/client.go       | 22 +++++++++++++++++++---
 docs/client-library.md | 19 +++++++++++++++++--
 vaydns-client/main.go  |  2 +-
 3 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/client/client.go b/client/client.go
index 457a3a4..6adc487 100644
--- a/client/client.go
+++ b/client/client.go
@@ -16,6 +16,14 @@
 //	t.InitiateSmuxSession()
 //	stream, _ := t.OpenStream() // returns net.Conn
 //	defer t.Close()
+//
+// Multi-resolver usage (spread queries across multiple DNS resolvers):
+//
+//	r1, _ := client.NewResolver(client.ResolverTypeUDP, "8.8.8.8:53")
+//	r2, _ := client.NewResolver(client.ResolverTypeDOH, "https://1.1.1.1/dns-query")
+//	ts, _ := client.NewTunnelServer("t.example.com", "pubkey-hex")
+//	t, _ := client.NewTunnelMulti([]client.Resolver{r1, r2}, ts)
+//	t.ListenAndServe("127.0.0.1:7000")
 package client
 
 import (
@@ -215,9 +223,17 @@ type Tunnel struct {
 	remoteAddr    net.Addr
 }
 
-// NewTunnel creates a Tunnel with the given resolver and server configuration.
+// NewTunnel creates a Tunnel with a single resolver and server configuration.
+// For multiple resolvers, use NewTunnelMulti.
+func NewTunnel(resolver Resolver, tunnelServer TunnelServer) (*Tunnel, error) {
+	return NewTunnelMulti([]Resolver{resolver}, tunnelServer)
+}
+
+// NewTunnelMulti creates a Tunnel with multiple resolvers and server
+// configuration. When more than one resolver is provided, the client
+// multiplexes queries across them with health-based routing.
 // Zero-value fields use sensible defaults.
-func NewTunnel(resolvers []Resolver, tunnelServer TunnelServer) (*Tunnel, error) {
+func NewTunnelMulti(resolvers []Resolver, tunnelServer TunnelServer) (*Tunnel, error) {
 	t := &Tunnel{
 		Resolvers:    resolvers,
 		TunnelServer: tunnelServer,
@@ -850,7 +866,7 @@ func (o *Outbound) Start(bind string) error {
 
 	tunnelServer := o.TunnelServers[0]
 
-	tunnel, err := NewTunnel(o.Resolvers, tunnelServer)
+	tunnel, err := NewTunnelMulti(o.Resolvers, tunnelServer)
 	if err != nil {
 		return fmt.Errorf("failed to create tunnel: %w", err)
 	}
diff --git a/docs/client-library.md b/docs/client-library.md
index 391b6b0..2989632 100644
--- a/docs/client-library.md
+++ b/docs/client-library.md
@@ -47,6 +47,21 @@ t.ListenAndServe("127.0.0.1:7000") // blocks, handles reconnection
 
 `ListenAndServe` opens a local TCP listener, creates tunnel sessions with automatic reconnection on failure, and forwards connections through the tunnel.
 
+### Multi-resolver API
+
+Spread DNS queries across multiple resolvers for throughput and resilience:
+
+```go
+r1, _ := client.NewResolver(client.ResolverTypeUDP, "8.8.8.8:53")
+r2, _ := client.NewResolver(client.ResolverTypeDOH, "https://1.1.1.1/dns-query")
+ts, _ := client.NewTunnelServer("t.example.com", "pubkey-hex")
+t, _ := client.NewTunnelMulti([]client.Resolver{r1, r2}, ts)
+
+t.ListenAndServe("127.0.0.1:7000")
+```
+
+`NewTunnelMulti` accepts a slice of resolvers. The client multiplexes queries across them with round-robin selection and health-based routing. Per-resolver forged-response tracking logs which resolver is being targeted by DNS injection.
+
 ## Key types
 
 | Type | Description |
@@ -87,8 +102,8 @@ ts.RPS = 200             // rate limit queries/second
 ts.RecordType = "cname"  // DNS record type for downstream data: txt, null, cname, a, aaaa, mx, ns, srv, caa (default: "txt")
 
 // Session options
-t.IdleTimeout = 60 * time.Second
-t.KeepAlive = 10 * time.Second
+t.IdleTimeout = 10 * time.Second
+t.KeepAlive = 2 * time.Second
 t.OpenStreamTimeout = 10 * time.Second
 t.MaxStreams = 256
 t.SessionCheckInterval = 500 * time.Millisecond
diff --git a/vaydns-client/main.go b/vaydns-client/main.go
index a93d35f..7b4c986 100644
--- a/vaydns-client/main.go
+++ b/vaydns-client/main.go
@@ -389,7 +389,7 @@ Known TLS fingerprints for -utls are:
 	ts.RecordType = recordTypeStr
 
 	// Build tunnel.
-	tunnel, err := client.NewTunnel(resolvers, ts)
+	tunnel, err := client.NewTunnelMulti(resolvers, ts)
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "%v\n", err)
 		os.Exit(1)

From aa1ab753128dd2efb0a2c7ff311fe5ba54308664 Mon Sep 17 00:00:00 2001
From: crazydi4mond <255249920+crazydi4mond@users.noreply.github.com>
Date: Sun, 12 Apr 2026 04:23:18 +0200
Subject: [PATCH 16/19] fix(multi-resolver): decouple counter decay from
 per-response health checks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The health state machine decayed invalidCount and timeoutCount by 1 on
every recomputeState call. Since evaluateIncoming calls recomputeState
after every response, each invalid response added 1 and immediately
decayed 1 — net zero. A resolver actively returning error RCODEs
(REFUSED, NXDOMAIN, SERVFAIL) was never detected as problematic; its
state stayed Unknown and round-robin kept sending queries to it.

Move the decay into expirePending (the once-per-second health tick),
and only apply it when no pending IDs expired that tick. This ensures:

- Response-driven invalidCount grows at the response rate between
  ticks and can cross the rateLimitThreshold within seconds.
- Timeout-driven accumulation from bulk pending-ID expirations is not
  immediately cancelled by decay in the same tick.
- Once the resolver is excluded and stops receiving traffic, counters
  decay at 1/sec back to zero, allowing re-probing.

Add e2e/multi-resolver-health-detection: uses -udp-shared-socket so
forged NXDOMAINs reach evaluateIncoming directly (the affected path).
After a 20s detection window and a 10-fetch traffic burst, asserts that
dns-forge received ≤15 queries during the burst (probe-only), not the
~50% share it would get without health-based exclusion.
---
 client/multi_resolver.go                      | 20 ++--
 client/multi_resolver_test.go                 | 38 ++-----
 .../docker-compose.yml                        | 83 ++++++++++++++++
 e2e/multi-resolver-health-detection/run.sh    | 99 +++++++++++++++++++
 e2e/run-test.sh                               |  2 +-
 5 files changed, 204 insertions(+), 38 deletions(-)
 create mode 100644 e2e/multi-resolver-health-detection/docker-compose.yml
 create mode 100755 e2e/multi-resolver-health-detection/run.sh

diff --git a/client/multi_resolver.go b/client/multi_resolver.go
index 5434ba0..3b82a3f 100644
--- a/client/multi_resolver.go
+++ b/client/multi_resolver.go
@@ -211,6 +211,18 @@ func (e *resolverEntry) expirePending(now time.Time) {
 	if expired > 0 {
 		e.timeoutCount += expired
 		e.invalidCount += expired
+	} else {
+		// Decay counters only when nothing accumulated this tick.
+		// This ensures burst expirations (and response-driven
+		// invalidCount growth between ticks) can outpace the decay
+		// and eventually cross the detection thresholds. Decay runs
+		// at most once per health-tick interval (~1s).
+		if e.invalidCount > 0 {
+			e.invalidCount--
+		}
+		if e.timeoutCount > 0 {
+			e.timeoutCount--
+		}
 	}
 	e.mu.Unlock()
 	e.recomputeState(now)
@@ -241,14 +253,6 @@ func (e *resolverEntry) recomputeState(now time.Time) {
 	default:
 		e.state = ResolverStateUnknown
 	}
-
-	// Slow decay to avoid sticky penalties.
-	if e.invalidCount > 0 {
-		e.invalidCount--
-	}
-	if e.timeoutCount > 0 {
-		e.timeoutCount--
-	}
 }
 
 func (e *resolverEntry) stateSnapshot() ResolverState {
diff --git a/client/multi_resolver_test.go b/client/multi_resolver_test.go
index 3c8044d..230ecd9 100644
--- a/client/multi_resolver_test.go
+++ b/client/multi_resolver_test.go
@@ -104,19 +104,11 @@ func newTestMultiResolver(entries []*resolverEntry, mode SelectionMode) *MultiRe
 	return mr
 }
 
-// TestMultiResolver_DeadEntryDoesNotBreakReadFrom exercises C1: when one
+// TestMultiResolver_DeadEntryDoesNotBreakReadFrom verifies that when one
 // resolver entry's transport returns a fatal read error, MultiResolver.ReadFrom
-// must continue to deliver packets from healthy entries instead of propagating
+// continues to deliver packets from healthy entries instead of propagating
 // that error to the upper DNSPacketConn layer (which would tear down the whole
 // tunnel session).
-//
-// Pre-fix expectation: this test FAILS — the reader goroutine pushes the error
-// onto recvChan, ReadFrom returns it, and the test sees net.ErrClosed instead
-// of the bytes from the working entry.
-//
-// Post-fix expectation: this test PASSES — the reader goroutine handles the
-// error internally (e.g. marks the entry Down and exits), leaving recvChan to
-// deliver only real response bytes from the surviving entries.
 func TestMultiResolver_DeadEntryDoesNotBreakReadFrom(t *testing.T) {
 	failing := newFakePacketConn("failing")
 	working := newFakePacketConn("working")
@@ -171,20 +163,11 @@ func TestMultiResolver_DeadEntryDoesNotBreakReadFrom(t *testing.T) {
 	}
 }
 
-// TestMultiResolver_FailedEntryExcludedFromSelection exercises the selection
-// side of C1: after a reader goroutine has processed a fatal read error and
-// exited, the entry must no longer be returned by selectPrimary. Otherwise
-// the round-robin scheduler will keep sending queries to a resolver whose
-// reader goroutine is gone (so responses will never come back), defeating the
-// health state machine.
-//
-// Pre-fix expectation: this test FAILS — the reader goroutine exits without
-// updating the entry's state, so it remains ResolverStateUnknown, which
-// selectRoundRobinHealthy treats as an acceptable target. The "failing" entry
-// is returned roughly half the time.
-//
-// Post-fix expectation: this test PASSES — the reader goroutine transitions
-// the entry to ResolverStateDown before exiting, and selectPrimary skips it.
+// TestMultiResolver_FailedEntryExcludedFromSelection verifies that after a
+// reader goroutine has processed a fatal read error and exited, the entry
+// is no longer returned by selectPrimary. Otherwise the round-robin scheduler
+// would keep sending queries to a resolver whose reader goroutine is gone
+// (so responses would never come back).
 func TestMultiResolver_FailedEntryExcludedFromSelection(t *testing.T) {
 	failing := newFakePacketConn("failing")
 	working := newFakePacketConn("working")
@@ -203,11 +186,8 @@ func TestMultiResolver_FailedEntryExcludedFromSelection(t *testing.T) {
 	// entry down before it exits.
 	time.Sleep(100 * time.Millisecond)
 
-	// Drain any error result that the buggy reader may have pushed onto
-	// recvChan. A correct reader wouldn't push anything here, so this
-	// drain is a no-op post-fix. Without it, the buggy pre-fix state
-	// machine would leave an error lingering, which isn't what this
-	// specific assertion is about.
+	// Drain any stale result from recvChan so the assertion below only
+	// checks selectPrimary behaviour, not lingering channel contents.
 drain:
 	for {
 		select {
diff --git a/e2e/multi-resolver-health-detection/docker-compose.yml b/e2e/multi-resolver-health-detection/docker-compose.yml
new file mode 100644
index 0000000..9a3f25c
--- /dev/null
+++ b/e2e/multi-resolver-health-detection/docker-compose.yml
@@ -0,0 +1,83 @@
+networks:
+  dns-net:
+    ipam:
+      config:
+        - subnet: 172.28.0.0/24
+  backend-net:
+
+volumes:
+  keys:
+
+services:
+  keygen:
+    build:
+      context: ../..
+      dockerfile: Dockerfile
+    volumes:
+      - keys:/keys
+    command: >
+      sh -c "vaydns-server -gen-key -privkey-file /keys/server.key -pubkey-file /keys/server.pub"
+
+  dns-good:
+    image: coredns/coredns
+    networks:
+      dns-net:
+        ipv4_address: 172.28.0.10
+    volumes:
+      - ../Corefile:/Corefile
+    command: ["-conf", "/Corefile"]
+
+  dns-forge:
+    image: coredns/coredns
+    networks:
+      dns-net:
+        ipv4_address: 172.28.0.11
+    volumes:
+      - ../multi-resolver-forge/Corefile.forge:/Corefile
+    command: ["-conf", "/Corefile"]
+
+  backend:
+    image: nginx:alpine
+    networks:
+      - backend-net
+
+  server:
+    build:
+      context: ../..
+      dockerfile: Dockerfile
+    networks:
+      dns-net:
+        ipv4_address: 172.28.0.20
+      backend-net:
+    volumes:
+      - keys:/keys
+    command: >
+      vaydns-server -udp :53 -privkey-file /keys/server.key
+      -domain t.example.com -upstream backend:80
+      -idle-timeout 10s -keepalive 2s
+    depends_on:
+      keygen:
+        condition: service_completed_successfully
+      dns-good:
+        condition: service_started
+      dns-forge:
+        condition: service_started
+
+  client:
+    build:
+      context: ../..
+      dockerfile: Dockerfile
+    networks:
+      - dns-net
+    volumes:
+      - keys:/keys
+    command: >
+      vaydns-client -udp 172.28.0.10:53 -udp 172.28.0.11:53
+      -udp-shared-socket
+      -pubkey-file /keys/server.pub
+      -domain t.example.com -listen 0.0.0.0:7000
+      -idle-timeout 10s -keepalive 2s -session-check-interval 500ms
+      -reconnect-min 1s -reconnect-max 5s -log-level info
+    depends_on:
+      server:
+        condition: service_started
diff --git a/e2e/multi-resolver-health-detection/run.sh b/e2e/multi-resolver-health-detection/run.sh
new file mode 100755
index 0000000..b2162b4
--- /dev/null
+++ b/e2e/multi-resolver-health-detection/run.sh
@@ -0,0 +1,99 @@
+#!/usr/bin/env bash
+# Test: health state machine detects and excludes a forging resolver.
+#
+# dns-forge returns NXDOMAIN for every query. The client uses
+# -udp-shared-socket so there is no per-query UDP worker filtering —
+# forged NXDOMAIN responses reach evaluateIncoming directly, which is
+# the code path affected by the per-response decay bug.
+#
+# With per-query UDP (the default), the UDP worker absorbs forged
+# responses and detection happens through the pending-timeout path
+# instead. That path works at high query rates even without the decay fix,
+# so it cannot demonstrate the bug. Shared-socket mode is required to
+# exercise the evaluateIncoming accumulation path.
+#
+# The health state machine decays penalty counters only on the 1/sec
+# health tick, not per-response. This allows invalidCount to grow at
+# the response rate (~several/sec) and cross the detection threshold
+# within seconds. dns-forge should be marked Down and excluded from
+# round-robin. During the measurement burst, dns-forge should receive
+# only occasional probe queries, not 50% of all traffic.
+set -euo pipefail
+cd "$(dirname "$0")"
+
+cleanup() { docker compose down -v 2>/dev/null; }
+trap cleanup EXIT
+
+fetch() {
+    docker compose exec -T client wget -q -O /dev/null http://localhost:7000 2>/dev/null
+}
+
+query_count() {
+    # Count the number of DNS queries logged by a CoreDNS service.
+    docker compose logs "$1" 2>&1 | grep -c 't\.example\.com' || echo 0
+}
+
+echo "--- Building and starting services ---"
+docker compose up -d --build
+
+echo "--- Waiting for tunnel (up to 30s) ---"
+for i in $(seq 1 30); do
+    if fetch; then
+        echo ""
+        echo "--- Tunnel is up ---"
+        break
+    fi
+    if [ "$i" -eq 30 ]; then
+        echo ""
+        docker compose logs client server dns-good dns-forge
+        echo "=== FAIL (tunnel not ready) ==="
+        exit 1
+    fi
+    printf "."
+    sleep 1
+done
+
+# Phase 1: let the health state machine observe the forging resolver.
+# The pending-response timeout is 5s and the detection threshold is 8
+# timeouts / 5 invalids, so ~15s should be enough for the health machine
+# to mark dns-forge as Down.
+echo "--- Waiting 20s for health machine to observe dns-forge ---"
+# Drive some traffic during the detection window so pending IDs accumulate.
+for i in $(seq 1 20); do
+    fetch || true
+    sleep 1
+done
+
+# Phase 2: snapshot dns-forge query count, then drive a traffic burst
+# and measure how many NEW queries dns-forge receives.
+count_before=$(query_count dns-forge)
+echo "--- dns-forge query count before burst: $count_before ---"
+
+echo "--- Driving 10 HTTP fetches through tunnel ---"
+for i in $(seq 1 10); do
+    fetch || true
+    sleep 1
+done
+
+count_after=$(query_count dns-forge)
+delta=$((count_after - count_before))
+echo "--- dns-forge query count after burst: $count_after (delta: $delta) ---"
+
+# Also check dns-good for comparison.
+good_before_ignored=0  # not tracked, but log the total for debugging
+good_total=$(query_count dns-good)
+echo "--- dns-good total queries: $good_total ---"
+
+# dns-forge should receive very few queries during the burst (only probe
+# queries at ~1/sec = ~10 max), not ~50% of all traffic.
+max_allowed=15
+if [ "$delta" -gt "$max_allowed" ]; then
+    echo "--- dns-forge received $delta queries during the burst (max allowed: $max_allowed) ---"
+    echo "--- Health machine did not route away from the forging resolver (decay bug) ---"
+    docker compose logs client | tail -20
+    echo "=== FAIL ==="
+    exit 1
+fi
+
+echo "--- dns-forge received only $delta queries during burst (≤$max_allowed); health machine routed away ---"
+echo "=== PASS ==="
diff --git a/e2e/run-test.sh b/e2e/run-test.sh
index 9784ae1..e1739d8 100755
--- a/e2e/run-test.sh
+++ b/e2e/run-test.sh
@@ -20,7 +20,7 @@ for rt in txt cname a aaaa mx ns srv; do
     fi
 done
 
-for test_dir in socks-download recovery transport-recovery multi-resolver multi-resolver-runtime-failure multi-resolver-forge; do
+for test_dir in socks-download recovery transport-recovery multi-resolver multi-resolver-runtime-failure multi-resolver-forge multi-resolver-health-detection; do
     total=$((total + 1))
     echo ""
     echo "========================================"

From 207be17c91dcc41b752257cec8ac7f33c9e88fd7 Mon Sep 17 00:00:00 2001
From: crazydi4mond <255249920+crazydi4mond@users.noreply.github.com>
Date: Sun, 12 Apr 2026 04:49:07 +0200
Subject: [PATCH 17/19] refactor(multi-resolver): remove dead selection modes,
 add startup logging
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Delete SelectionBest, SelectionSmart, selectBestScore, resolverScore,
the SelectionMode type, and the mode field/parameter — all unreachable
from the CLI (hardcoded to round-robin) and conceptually questionable
for a DNS tunnel where resolver latency is dwarfed by the tunnel path.
selectPrimary now delegates directly to selectRoundRobinHealthy.

NewMultiResolver logs each resolver's type and address at startup, plus
the total UDP worker goroutine count when per-query UDP entries are
present, so operators can see the resource cost of their configuration.

Remove stray blank line in Outbound.Start.
---
 client/client.go              |  3 +-
 client/multi_resolver.go      | 77 +++++++++--------------------------
 client/multi_resolver_test.go | 13 +++---
 3 files changed, 27 insertions(+), 66 deletions(-)

diff --git a/client/client.go b/client/client.go
index 6adc487..7629eac 100644
--- a/client/client.go
+++ b/client/client.go
@@ -308,7 +308,7 @@ func (t *Tunnel) effectiveKCPWindowSize() int {
 // based on the Resolver configuration.
 func (t *Tunnel) InitiateResolverConnection() error {
 	if len(t.Resolvers) > 1 {
-		conn, err := NewMultiResolver(t.Resolvers, SelectionRoundRobin, t.effectivePacketQueueSize(), t.effectiveQueueOverflowMode())
+		conn, err := NewMultiResolver(t.Resolvers, t.effectivePacketQueueSize(), t.effectiveQueueOverflowMode())
 		if err != nil {
 			return err
 		}
@@ -863,7 +863,6 @@ func NewOutbound(resolvers []Resolver, tunnelServers []TunnelServer) *Outbound {
 // Start begins accepting connections on bind and forwarding them through the
 // first resolver/server pair.
 func (o *Outbound) Start(bind string) error {
-
 	tunnelServer := o.TunnelServers[0]
 
 	tunnel, err := NewTunnelMulti(o.Resolvers, tunnelServer)
diff --git a/client/multi_resolver.go b/client/multi_resolver.go
index 3b82a3f..ec4a5a5 100644
--- a/client/multi_resolver.go
+++ b/client/multi_resolver.go
@@ -16,18 +16,6 @@ import (
 	log "github.com/sirupsen/logrus"
 )
 
-// SelectionMode controls which resolver MultiResolver picks for outgoing packets.
-type SelectionMode string
-
-const (
-	// SelectionRoundRobin rotates through preferred resolvers in order.
-	SelectionRoundRobin SelectionMode = "roundrobin"
-	// SelectionBest picks the resolver with the best observed score.
-	SelectionBest SelectionMode = "best"
-	// SelectionSmart currently aliases best-score selection.
-	SelectionSmart SelectionMode = "smart"
-)
-
 // ResolverState is the current health state of one resolver.
 type ResolverState string
 
@@ -301,7 +289,6 @@ type multiReadResult struct {
 // unhealthy resolvers by duplicating selected packets.
 type MultiResolver struct {
 	entries   []*resolverEntry
-	mode      SelectionMode
 	mu        sync.Mutex
 	rrIndex   int
 	probeRR   int
@@ -320,7 +307,7 @@ type MultiResolver struct {
 }
 
 // NewMultiResolver creates a MultiResolver from a slice of Resolver configs.
-func NewMultiResolver(resolvers []Resolver, mode SelectionMode, queueSize int, overflowMode turbotunnel.QueueOverflowMode) (*MultiResolver, error) {
+func NewMultiResolver(resolvers []Resolver, queueSize int, overflowMode turbotunnel.QueueOverflowMode) (*MultiResolver, error) {
 	if len(resolvers) == 0 {
 		return nil, fmt.Errorf("at least one resolver is required")
 	}
@@ -345,9 +332,24 @@ func NewMultiResolver(resolvers []Resolver, mode SelectionMode, queueSize int, o
 		})
 	}
 
+	log.Infof("multi-resolver: %d resolvers configured", len(entries))
+	udpWorkerTotal := 0
+	for i, r := range resolvers {
+		log.Infof("  [%d] %s %s", i+1, r.ResolverType, r.ResolverAddr)
+		if r.ResolverType == ResolverTypeUDP && !r.UDPSharedSocket {
+			workers := r.UDPWorkers
+			if workers <= 0 {
+				workers = DefaultUDPWorkers
+			}
+			udpWorkerTotal += workers
+		}
+	}
+	if udpWorkerTotal > 0 {
+		log.Infof("multi-resolver: %d total UDP worker goroutines", udpWorkerTotal)
+	}
+
 	mr := &MultiResolver{
 		entries:  entries,
-		mode:     mode,
 		recvChan: make(chan multiReadResult, len(entries)*4),
 		closed:   make(chan struct{}),
 		allDead:  make(chan struct{}),
@@ -543,12 +545,6 @@ func (mr *MultiResolver) WriteTo(b []byte, _ net.Addr) (n int, err error) {
 // query, or nil if every entry has been marked dead. The caller must handle
 // nil (e.g., return an "all resolvers down" error).
 func (mr *MultiResolver) selectPrimary() *resolverEntry {
-	if mr.mode == SelectionRoundRobin {
-		return mr.selectRoundRobinHealthy()
-	}
-	if e := mr.selectBestScore(); e != nil {
-		return e
-	}
 	return mr.selectRoundRobinHealthy()
 }
 
@@ -586,42 +582,6 @@ func (mr *MultiResolver) selectRoundRobinHealthy() *resolverEntry {
 	return nil
 }
 
-func (mr *MultiResolver) selectBestScore() *resolverEntry {
-	if len(mr.entries) == 0 {
-		return nil
-	}
-	var best *resolverEntry
-	var bestScore int64
-	for _, e := range mr.entries {
-		if e.isDead() {
-			continue
-		}
-		s := resolverScore(e)
-		if best == nil || s > bestScore {
-			best = e
-			bestScore = s
-		}
-	}
-	return best
-}
-
-func resolverScore(e *resolverEntry) int64 {
-	e.mu.Lock()
-	defer e.mu.Unlock()
-	var statePenalty int64
-	switch e.state {
-	case ResolverStateHealthy:
-		statePenalty = 0
-	case ResolverStateUnknown:
-		statePenalty = 5
-	case ResolverStateRateLimited:
-		statePenalty = 15
-	case ResolverStateDown:
-		statePenalty = 30
-	}
-	return e.validCount*4 - e.invalidCount*2 - e.timeoutCount*3 - statePenalty
-}
-
 func (mr *MultiResolver) selectProbeTarget(primary *resolverEntry) *resolverEntry {
 	mr.mu.Lock()
 	defer mr.mu.Unlock()
@@ -680,6 +640,9 @@ func (mr *MultiResolver) Close() error {
 }
 
 // LocalAddr returns the local address of the first underlying connection.
+// The value is arbitrary (there is no single meaningful local address for a
+// multi-resolver), but the method must exist to satisfy net.PacketConn.
+// No caller uses the returned address for routing or logic decisions.
 func (mr *MultiResolver) LocalAddr() net.Addr {
 	return mr.entries[0].conn.LocalAddr()
 }
diff --git a/client/multi_resolver_test.go b/client/multi_resolver_test.go
index 230ecd9..e08536c 100644
--- a/client/multi_resolver_test.go
+++ b/client/multi_resolver_test.go
@@ -91,10 +91,9 @@ func newFakeEntry(name string, conn *fakePacketConn) *resolverEntry {
 // GetResolverConnection returns, but with pre-built synthetic entries. It does
 // not spawn the healthWorker — tests don't need it, and omitting it keeps them
 // hermetic with respect to timing.
-func newTestMultiResolver(entries []*resolverEntry, mode SelectionMode) *MultiResolver {
+func newTestMultiResolver(entries []*resolverEntry) *MultiResolver {
 	mr := &MultiResolver{
 		entries:  entries,
-		mode:     mode,
 		recvChan: make(chan multiReadResult, len(entries)*4),
 		closed:   make(chan struct{}),
 		allDead:  make(chan struct{}),
@@ -117,7 +116,7 @@ func TestMultiResolver_DeadEntryDoesNotBreakReadFrom(t *testing.T) {
 		newFakeEntry("failing", failing),
 		newFakeEntry("working", working),
 	}
-	mr := newTestMultiResolver(entries, SelectionRoundRobin)
+	mr := newTestMultiResolver(entries)
 	defer mr.Close()
 
 	// Step 1: make the failing entry's ReadFrom return a fatal error.
@@ -176,7 +175,7 @@ func TestMultiResolver_FailedEntryExcludedFromSelection(t *testing.T) {
 		newFakeEntry("failing", failing),
 		newFakeEntry("working", working),
 	}
-	mr := newTestMultiResolver(entries, SelectionRoundRobin)
+	mr := newTestMultiResolver(entries)
 	defer mr.Close()
 
 	// Kill the failing entry's reader; leave working blocked in ReadFrom.
@@ -275,7 +274,7 @@ func TestMultiResolver_ForgedResponseRecordedPerEntry(t *testing.T) {
 	forgingEntry := newFakeEntry("forging", forging)
 	cleanEntry := newFakeEntry("clean", clean)
 
-	mr := newTestMultiResolver([]*resolverEntry{forgingEntry, cleanEntry}, SelectionRoundRobin)
+	mr := newTestMultiResolver([]*resolverEntry{forgingEntry, cleanEntry})
 	defer mr.Close()
 
 	// Push a forged NXDOMAIN response to the forging entry.
@@ -307,7 +306,7 @@ func TestMultiResolver_ForgedResponseNotForwardedToRecvChan(t *testing.T) {
 	conn := newFakePacketConn("resolver")
 	entry := newFakeEntry("resolver", conn)
 
-	mr := newTestMultiResolver([]*resolverEntry{entry}, SelectionRoundRobin)
+	mr := newTestMultiResolver([]*resolverEntry{entry})
 	defer mr.Close()
 
 	// Push a forged NXDOMAIN, then a valid NoError response.
@@ -359,7 +358,7 @@ func TestMultiResolver_ForgedStatsPerEntryLabeled(t *testing.T) {
 	e1 := newFakeEntry("1.1.1.1:53", conn1)
 	e2 := newFakeEntry("8.8.8.8:53", conn2)
 
-	mr := newTestMultiResolver([]*resolverEntry{e1, e2}, SelectionRoundRobin)
+	mr := newTestMultiResolver([]*resolverEntry{e1, e2})
 	defer mr.Close()
 
 	if e1.forgedStats == e2.forgedStats {

From 4f8962fa03d31c485f21ac982606b9038268a831 Mon Sep 17 00:00:00 2001
From: crazydi4mond <255249920+crazydi4mond@users.noreply.github.com>
Date: Sun, 12 Apr 2026 04:59:38 +0200
Subject: [PATCH 18/19] test(e2e): add dnstt-compat + multi-resolver regression
 test

Verify that -dnstt-compat (8-byte ClientID, padding prefixes, forced
TXT, 2m idle / 10s keepalive) works correctly when combined with
multiple UDP resolvers. The wire format operates above the MultiResolver
transport layer with no shared state, but this test guards against
regressions in the interaction.
---
 .../docker-compose.yml                        | 81 +++++++++++++++++++
 e2e/multi-resolver-dnstt-compat/run.sh        | 33 ++++++++
 e2e/run-test.sh                               |  2 +-
 3 files changed, 115 insertions(+), 1 deletion(-)
 create mode 100644 e2e/multi-resolver-dnstt-compat/docker-compose.yml
 create mode 100755 e2e/multi-resolver-dnstt-compat/run.sh

diff --git a/e2e/multi-resolver-dnstt-compat/docker-compose.yml b/e2e/multi-resolver-dnstt-compat/docker-compose.yml
new file mode 100644
index 0000000..b492929
--- /dev/null
+++ b/e2e/multi-resolver-dnstt-compat/docker-compose.yml
@@ -0,0 +1,81 @@
+networks:
+  dns-net:
+    ipam:
+      config:
+        - subnet: 172.28.0.0/24
+  backend-net:
+
+volumes:
+  keys:
+
+services:
+  keygen:
+    build:
+      context: ../..
+      dockerfile: Dockerfile
+    volumes:
+      - keys:/keys
+    command: >
+      sh -c "vaydns-server -gen-key -privkey-file /keys/server.key -pubkey-file /keys/server.pub"
+
+  dns1:
+    image: coredns/coredns
+    networks:
+      dns-net:
+        ipv4_address: 172.28.0.10
+    volumes:
+      - ../Corefile:/Corefile
+    command: ["-conf", "/Corefile"]
+
+  dns2:
+    image: coredns/coredns
+    networks:
+      dns-net:
+        ipv4_address: 172.28.0.11
+    volumes:
+      - ../Corefile:/Corefile
+    command: ["-conf", "/Corefile"]
+
+  backend:
+    image: nginx:alpine
+    networks:
+      - backend-net
+
+  server:
+    build:
+      context: ../..
+      dockerfile: Dockerfile
+    networks:
+      dns-net:
+        ipv4_address: 172.28.0.20
+      backend-net:
+    volumes:
+      - keys:/keys
+    command: >
+      vaydns-server -udp :53 -privkey-file /keys/server.key
+      -domain t.example.com -upstream backend:80
+      -dnstt-compat
+    depends_on:
+      keygen:
+        condition: service_completed_successfully
+      dns1:
+        condition: service_started
+      dns2:
+        condition: service_started
+
+  client:
+    build:
+      context: ../..
+      dockerfile: Dockerfile
+    networks:
+      - dns-net
+    volumes:
+      - keys:/keys
+    command: >
+      vaydns-client -udp 172.28.0.10:53 -udp 172.28.0.11:53
+      -pubkey-file /keys/server.pub
+      -domain t.example.com -listen 0.0.0.0:7000
+      -dnstt-compat
+    depends_on:
+      server:
+        condition: service_started
diff --git a/e2e/multi-resolver-dnstt-compat/run.sh b/e2e/multi-resolver-dnstt-compat/run.sh
new file mode 100755
index 0000000..66618c6
--- /dev/null
+++ b/e2e/multi-resolver-dnstt-compat/run.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+# Test: multi-resolver with dnstt-compat wire format.
+# Verifies that -dnstt-compat (8-byte ClientID, padding prefixes, forced TXT,
+# longer timeouts) works correctly when combined with multiple UDP resolvers.
+# The wire format sits above the MultiResolver transport layer so there should
+# be no interaction, but this test guards against regressions.
+set -euo pipefail
+cd "$(dirname "$0")"
+
+cleanup() { docker compose down -v 2>/dev/null; }
+trap cleanup EXIT
+
+echo "--- Building and starting services ---"
+docker compose up -d --build
+
+# dnstt-compat uses longer timeouts (2m idle, 10s keepalive) and the tunnel
+# handshake may be slower due to the larger wire overhead, so allow more time.
+echo "--- Waiting for tunnel (up to 45s) ---"
+for i in $(seq 1 45); do
+    if docker compose exec -T client wget -q -O- http://localhost:7000 2>/dev/null | grep -q "Welcome to nginx"; then
+        echo ""
+        echo "=== PASS ==="
+        exit 0
+    fi
+    printf "."
+    sleep 1
+done
+
+echo ""
+echo "--- Tunnel did not come up. Dumping logs ---"
+docker compose logs client server dns1 dns2
+echo "=== FAIL ==="
+exit 1
diff --git a/e2e/run-test.sh b/e2e/run-test.sh
index e1739d8..70eaf89 100755
--- a/e2e/run-test.sh
+++ b/e2e/run-test.sh
@@ -20,7 +20,7 @@ for rt in txt cname a aaaa mx ns srv; do
     fi
 done
 
-for test_dir in socks-download recovery transport-recovery multi-resolver multi-resolver-runtime-failure multi-resolver-forge multi-resolver-health-detection; do
+for test_dir in socks-download recovery transport-recovery multi-resolver multi-resolver-runtime-failure multi-resolver-forge multi-resolver-health-detection multi-resolver-dnstt-compat; do
     total=$((total + 1))
     echo ""
     echo "========================================"

From 68022f40e5591ef450eedab6cf3e3af4b2446a9c Mon Sep 17 00:00:00 2001
From: crazydi4mond <255249920+crazydi4mond@users.noreply.github.com>
Date: Sun, 12 Apr 2026 05:23:59 +0200
Subject: [PATCH 19/19] refactor(multi-resolver): cleanup dead code, fix edge
 cases, improve tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Validate empty resolver slice in NewTunnelMulti to prevent panic on
  Resolvers[0] access in InitiateResolverConnection.
- Remove the direct isRateLimitedResponse state assignment in
  evaluateIncoming — recomputeState unconditionally overwrites it, so
  the assignment had no observable effect. Remove the now-callerless
  isRateLimitedResponse function. Rate-limit classification happens
  via the accumulation threshold in recomputeState.
- Remove unused Tunnel.MultiResolverStats and
  Tunnel.MultiResolverValidInvalidCounts methods and their underlying
  MultiResolver.ResolverStats and MultiResolver.ValidInvalidCounts
  (no caller in the codebase).
- Fix QF1012 lint: WriteString(fmt.Sprintf(...)) → fmt.Fprintf in
  renderResolverStatsTable.
- Add defensive comment on the forgedStats nil guard in evaluateIncoming
  explaining it is always non-nil in multi-resolver mode.
- Change health stats table log level from Trace to Debug for easier
  operator access.
- Add TestMultiResolver_WriteToFailsOverToNextEntry: closes one entry's
  transport, verifies WriteTo marks it dead and succeeds via the next
  alive entry.
---
 client/client.go              | 21 +++------------
 client/multi_resolver.go      | 42 ++++++++----------------------
 client/multi_resolver_test.go | 48 ++++++++++++++++++++++++++++++++---
 3 files changed, 59 insertions(+), 52 deletions(-)

diff --git a/client/client.go b/client/client.go
index 7629eac..377a4d8 100644
--- a/client/client.go
+++ b/client/client.go
@@ -234,6 +234,9 @@ func NewTunnel(resolver Resolver, tunnelServer TunnelServer) (*Tunnel, error) {
 // multiplexes queries across them with health-based routing.
 // Zero-value fields use sensible defaults.
 func NewTunnelMulti(resolvers []Resolver, tunnelServer TunnelServer) (*Tunnel, error) {
+	if len(resolvers) == 0 {
+		return nil, fmt.Errorf("at least one resolver is required")
+	}
 	t := &Tunnel{
 		Resolvers:    resolvers,
 		TunnelServer: tunnelServer,
@@ -552,24 +555,6 @@ func (t *Tunnel) closeTransportLayers() {
 	t.forgedStats = nil
 }
 
-// MultiResolverStats returns per-resolver health and count snapshots when the
-// active resolver transport is MultiResolver; otherwise it returns nil.
-func (t *Tunnel) MultiResolverStats() []ResolverStat {
-	if mr, ok := t.resolverConn.(*MultiResolver); ok {
-		return mr.ResolverStats()
-	}
-	return nil
-}
-
-// MultiResolverValidInvalidCounts returns valid/invalid counters per resolver
-// address when the active resolver transport is MultiResolver.
-func (t *Tunnel) MultiResolverValidInvalidCounts() map[string][2]int64 {
-	if mr, ok := t.resolverConn.(*MultiResolver); ok {
-		return mr.ValidInvalidCounts()
-	}
-	return nil
-}
-
 // resetTransportLayers tears down existing transport layers and creates fresh
 // ones. Used during reconnect to ensure a clean transport stack.
 func (t *Tunnel) resetTransportLayers() error {
diff --git a/client/multi_resolver.go b/client/multi_resolver.go
index ec4a5a5..00cd6c8 100644
--- a/client/multi_resolver.go
+++ b/client/multi_resolver.go
@@ -173,13 +173,18 @@ func (e *resolverEntry) evaluateIncoming(packet []byte) bool {
 	e.mu.Lock()
 	delete(e.pending, resp.ID)
 	e.invalidCount++
-	if isRateLimitedResponse(resp) {
-		e.state = ResolverStateRateLimited
-	}
+	// Rate-limit classification is handled by the accumulation path in
+	// recomputeState (invalidCount >= threshold && validCount == 0), not
+	// by a direct state assignment here. recomputeState unconditionally
+	// reassigns e.state, so any direct assignment would be immediately
+	// overwritten.
 	e.mu.Unlock()
 	e.recomputeState(now)
 
 	// Record in per-resolver forged stats and signal caller not to forward.
+	// forgedStats is always non-nil in multi-resolver mode (set at entry
+	// construction). The nil guard is defensive for any future caller that
+	// constructs a resolverEntry without setting forgedStats.
 	if isResponse && e.forgedStats != nil {
 		e.forgedStats.Record(rcode)
 		return true
@@ -424,7 +429,7 @@ func (mr *MultiResolver) healthWorker() {
 				e.expirePending(now)
 				stats = append(stats, e.snapshot())
 			}
-			log.Trace("\n" + renderResolverStatsTable(stats, now))
+			log.Debug("\n" + renderResolverStatsTable(stats, now))
 		case <-mr.closed:
 			return
 		}
@@ -445,7 +450,7 @@ func renderResolverStatsTable(stats []ResolverStat, now time.Time) string {
 		if !s.LastValid.IsZero() {
 			lastValidAgo = now.Sub(s.LastValid).Truncate(time.Second).String()
 		}
-		b.WriteString(fmt.Sprintf("| %-23.23s | %-12s | %6d | %7d | %7d | %11s | %11s |\n",
+		fmt.Fprintf(&b, "| %-23.23s | %-12s | %6d | %7d | %7d | %11s | %11s |\n",
 			s.Address,
 			s.State,
 			s.ValidCount,
@@ -453,7 +458,7 @@ func renderResolverStatsTable(stats []ResolverStat, now time.Time) string {
 			s.TimeoutCount,
 			lastWriteAgo,
 			lastValidAgo,
-		))
+		)
 	}
 	b.WriteString("+-------------------------+--------------+--------+---------+---------+-------------+-------------+")
 	return b.String()
@@ -466,11 +471,6 @@ func isValidDNSResponse(resp dns.Message) bool {
 	return (resp.Flags & 0x000f) == dns.RcodeNoError
 }
 
-func isRateLimitedResponse(resp dns.Message) bool {
-	rcode := resp.Flags & 0x000f
-	return rcode == dns.RcodeRefused || rcode == dns.RcodeServerFailure
-}
-
 // ReadFrom receives a packet from whichever resolver responds first.
 // It only returns an error when the MultiResolver has been closed or every
 // entry has died; a single resolver's transport error is isolated at the
@@ -608,26 +608,6 @@ func (mr *MultiResolver) selectProbeTarget(primary *resolverEntry) *resolverEntr
 	return nil
 }
 
-// ResolverStats returns current resolver health counters.
-func (mr *MultiResolver) ResolverStats() []ResolverStat {
-	stats := make([]ResolverStat, 0, len(mr.entries))
-	for _, e := range mr.entries {
-		stats = append(stats, e.snapshot())
-	}
-	return stats
-}
-
-// ValidInvalidCounts returns valid/invalid counts by resolver address.
-func (mr *MultiResolver) ValidInvalidCounts() map[string][2]int64 {
-	out := make(map[string][2]int64, len(mr.entries))
-	for _, e := range mr.entries {
-		e.mu.Lock()
-		out[e.name] = [2]int64{e.validCount, e.invalidCount}
-		e.mu.Unlock()
-	}
-	return out
-}
-
 // Close closes all underlying connections and stops the reader goroutines.
 func (mr *MultiResolver) Close() error {
 	mr.closeOnce.Do(func() {
diff --git a/client/multi_resolver_test.go b/client/multi_resolver_test.go
index e08536c..93eff20 100644
--- a/client/multi_resolver_test.go
+++ b/client/multi_resolver_test.go
@@ -61,7 +61,12 @@ func (f *fakePacketConn) ReadFrom(p []byte) (int, net.Addr, error) {
 }
 
 func (f *fakePacketConn) WriteTo(p []byte, _ net.Addr) (int, error) {
-	return len(p), nil
+	select {
+	case <-f.closed:
+		return 0, net.ErrClosed
+	default:
+		return len(p), nil
+	}
 }
 
 func (f *fakePacketConn) Close() error {
@@ -212,8 +217,7 @@ drain:
 
 // buildDNSResponse constructs a minimal valid DNS wire-format response with
 // the given RCODE. QR is set to 1. This is the smallest packet that
-// dns.MessageFromWireFormat can parse and isValidDNSResponse/isRateLimitedResponse
-// can classify.
+// dns.MessageFromWireFormat can parse and evaluateIncoming can classify.
 func buildDNSResponse(t *testing.T, rcode uint16) []byte {
 	t.Helper()
 	msg := &dns.Message{
@@ -371,3 +375,41 @@ func TestMultiResolver_ForgedStatsPerEntryLabeled(t *testing.T) {
 		t.Errorf("entry2 label = %q, want %q", e2.forgedStats.Label, "8.8.8.8:53")
 	}
 }
+
+// TestMultiResolver_WriteToFailsOverToNextEntry verifies that when the
+// primary entry's transport is closed, WriteTo marks it dead and retries
+// with the next alive entry rather than returning an error.
+func TestMultiResolver_WriteToFailsOverToNextEntry(t *testing.T) {
+	broken := newFakePacketConn("broken")
+	working := newFakePacketConn("working")
+
+	entries := []*resolverEntry{
+		newFakeEntry("broken", broken),
+		newFakeEntry("working", working),
+	}
+	mr := newTestMultiResolver(entries)
+	defer mr.Close()
+
+	// Close the first entry's transport so WriteTo errors on it.
+	broken.Close()
+
+	// Build a minimal DNS query so trackOutgoingID can parse it.
+	query := buildDNSResponse(t, dns.RcodeNoError) // any valid DNS message works
+
+	n, err := mr.WriteTo(query, turbotunnel.DummyAddr{})
+	if err != nil {
+		t.Fatalf("WriteTo returned error %v; expected failover to working entry", err)
+	}
+	if n != len(query) {
+		t.Fatalf("WriteTo wrote %d bytes, want %d", n, len(query))
+	}
+
+	// Verify the broken entry was marked dead.
+	if !entries[0].isDead() {
+		t.Error("broken entry should be marked dead after write failure")
+	}
+	// Verify the working entry is still alive.
+	if entries[1].isDead() {
+		t.Error("working entry should still be alive")
+	}
+}