diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5356bef..9ded57a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ jobs: go-version: '1.24' - name: Build - run: go build -v ./... + run: CGO_ENABLED=0 go build -v -ldflags="-X main.version=${GITHUB_SHA::7}" ./... - name: Test run: go test -v ./... @@ -60,6 +60,7 @@ jobs: - name: Build env: + CGO_ENABLED: '0' GOOS: ${{ matrix.goos }} GOARCH: ${{ matrix.goarch }} GOARM: ${{ matrix.goarm }} @@ -69,8 +70,9 @@ jobs: ARCH="armv7" fi SUFFIX="${{ matrix.suffix }}" - go build -trimpath -ldflags="-s -w" -o "vaydns-client-${{ matrix.goos }}-${ARCH}${SUFFIX}" ./vaydns-client - go build -trimpath -ldflags="-s -w" -o "vaydns-server-${{ matrix.goos }}-${ARCH}${SUFFIX}" ./vaydns-server + VERSION="${GITHUB_SHA::7}" + go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" -o "vaydns-client-${{ matrix.goos }}-${ARCH}${SUFFIX}" ./vaydns-client + go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" -o "vaydns-server-${{ matrix.goos }}-${ARCH}${SUFFIX}" ./vaydns-server - name: Upload artifacts uses: actions/upload-artifact@v4 @@ -118,5 +120,6 @@ jobs: context: . file: Dockerfile push: true + build-args: VERSION=dev-${{ env.SHORT_SHA }} tags: ghcr.io/${{ github.repository }}:dev-${{ env.SHORT_SHA }} diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml index a968f49..6b2ba35 100644 --- a/.github/workflows/release-please.yml +++ b/.github/workflows/release-please.yml @@ -68,6 +68,7 @@ jobs: - name: Build env: + CGO_ENABLED: '0' GOOS: ${{ matrix.goos }} GOARCH: ${{ matrix.goarch }} GOARM: ${{ matrix.goarm }} @@ -77,8 +78,10 @@ jobs: ARCH="armv7" fi SUFFIX="${{ matrix.suffix }}" - go build -trimpath -ldflags="-s -w" -o "vaydns-client-${{ matrix.goos }}-${ARCH}${SUFFIX}" ./vaydns-client - go build -trimpath -ldflags="-s -w" -o "vaydns-server-${{ matrix.goos }}-${ARCH}${SUFFIX}" ./vaydns-server + TAG="${{ needs.release-please.outputs.tag_name }}" + VERSION="${TAG#v}" + go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" -o "vaydns-client-${{ matrix.goos }}-${ARCH}${SUFFIX}" ./vaydns-client + go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" -o "vaydns-server-${{ matrix.goos }}-${ARCH}${SUFFIX}" ./vaydns-server chmod +x vaydns-client-* vaydns-server-* - name: Upload artifact @@ -147,6 +150,7 @@ jobs: context: . file: Dockerfile push: true + build-args: VERSION=${{ steps.version.outputs.VERSION }} tags: | ghcr.io/${{ github.repository }}:${{ steps.version.outputs.VERSION }} ghcr.io/${{ github.repository }}:latest diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 954b159..e7cf9b3 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.2.6" + ".": "0.2.8" } diff --git a/CHANGELOG.md b/CHANGELOG.md index 439342b..5b48bc8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,26 @@ # Changelog +## [0.2.8](https://github.com/net2share/vaydns/compare/v0.2.7...v0.2.8) (2026-04-10) + + +### Features + +* add -v flag for printing version ([#71](https://github.com/net2share/vaydns/issues/71)) ([3533148](https://github.com/net2share/vaydns/commit/35331484bce1b628372dc37accae4e96b507841f)) +* add NULL and CAA record type support and fix server EDNS mtu advertisement ([#70](https://github.com/net2share/vaydns/issues/70)) ([8354db2](https://github.com/net2share/vaydns/commit/8354db2a080ce9543594bc4e71592f33e6489d82)) + +## [0.2.7](https://github.com/net2share/vaydns/compare/v0.2.6...v0.2.7) (2026-04-01) + + +### Features + +* tune session defaults and hide session-check-interval flag ([24bdc6c](https://github.com/net2share/vaydns/commit/24bdc6cc37bdd9a3c91900cf61e477207517cb1f)) + + +### Bug Fixes + +* **client:** make max streams unlimited by default ([#63](https://github.com/net2share/vaydns/issues/63)) ([4cc8228](https://github.com/net2share/vaydns/commit/4cc8228190d415cc75a32f59d83f7b77ae2af68b)) +* **server:** clarify server accept session/stream warning logs ([#57](https://github.com/net2share/vaydns/issues/57)) ([654b2f1](https://github.com/net2share/vaydns/commit/654b2f1f3dbde6001e19c9f8391436c28e55eca9)) + ## [0.2.6](https://github.com/net2share/vaydns/compare/v0.2.5...v0.2.6) (2026-03-29) diff --git a/Dockerfile b/Dockerfile index bb2b842..d948aad 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,8 +5,9 @@ COPY go.mod go.sum ./ RUN go mod download COPY . . -RUN CGO_ENABLED=0 go build -trimpath -ldflags="-s -w" -o /vaydns-server ./vaydns-server -RUN CGO_ENABLED=0 go build -trimpath -ldflags="-s -w" -o /vaydns-client ./vaydns-client +ARG VERSION=dev +RUN CGO_ENABLED=0 go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" -o /vaydns-server ./vaydns-server +RUN CGO_ENABLED=0 go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" -o /vaydns-client ./vaydns-client FROM alpine RUN apk add --no-cache curl diff --git a/README.md b/README.md index f0beeb5..7259542 100644 --- a/README.md +++ b/README.md @@ -130,12 +130,12 @@ sudo ip6tables -t nat -I PREROUTING -i eth0 -p udp --dport 53 -j REDIRECT --to-p | `-privkey HEX` | Server private key as hex string | — | | `-gen-key` | Generate a new keypair and exit | — | | `-mtu N` | Max UDP payload size for responses | `1232` | -| `-idle-timeout D` | Session idle timeout (must match client) | `60s` | -| `-keepalive D` | Keepalive ping interval (must match client, must be < idle-timeout) | `10s` | +| `-idle-timeout D` | Session idle timeout (must match client) | `10s` | +| `-keepalive D` | Keepalive ping interval (must match client, must be < idle-timeout) | `2s` | | `-fallback ADDR` | UDP endpoint to forward non-DNS packets to (e.g. `127.0.0.1:8888`) | — | | `-dnstt-compat` | Use original dnstt wire format (8-byte ClientID, padding prefixes). Also sets `-idle-timeout` to 2m and `-keepalive` to 10s unless explicitly overridden. | `false` | | `-clientid-size N` | ClientID size in bytes (ignored when `-dnstt-compat` is set) | `2` | -| `-record-type TYPE` | DNS record type for downstream data: `txt`, `cname`, `a`, `aaaa`, `mx`, `ns`, `srv`. Must match the client. Ignored (forced to `txt`) when `-dnstt-compat` is set. | `txt` | +| `-record-type TYPE` | DNS record type for downstream data: `txt`, `null`, `cname`, `a`, `aaaa`, `mx`, `ns`, `srv`, `caa`. Must match the client. Ignored (forced to `txt`) when `-dnstt-compat` is set. | `txt` | | `-queue-size N` | Packet queue size for transport and DNS layers | `512` | | `-kcp-window-size N` | KCP send/receive window size in packets (0 = queue-size/2) | `0` | | `-queue-overflow MODE` | Queue overflow behavior: `drop` (silent discard) or `block` (backpressure) | `drop` | @@ -164,17 +164,16 @@ sudo ip6tables -t nat -I PREROUTING -i eth0 -p udp --dport 53 -j REDIRECT --to-p | Flag | Description | Default | | --------------------------- | -------------------------------------------------- | ------- | -| `-idle-timeout D` | Session idle timeout (must match server) | `60s` | -| `-keepalive D` | Keepalive ping interval (must match server, must be < idle-timeout) | `10s` | +| `-idle-timeout D` | Session idle timeout (must match server) | `10s` | +| `-keepalive D` | Keepalive ping interval (must match server, must be < idle-timeout) | `2s` | | `-max-streams N` | Max concurrent streams per session (0 = unlimited) | `0` | | `-open-stream-timeout D` | Timeout for opening an smux stream | `10s` | | `-reconnect-min D` | Initial backoff delay for session reconnect | `1s` | | `-reconnect-max D` | Max backoff delay (must be >= reconnect-min) | `30s` | -| `-session-check-interval D` | How often to check if the session is alive (should be shorter than idle-timeout) | `20s` | -> **Note:** `idle-timeout` and `keepalive` must be set to the same values on both client and server — mismatched values will cause one side to close the session before the other detects it. Keep `keepalive` well below `idle-timeout` (the default 6x ratio allows ~6 ping attempts before timeout). +> **Note:** `idle-timeout` and `keepalive` must be set to the same values on both client and server — mismatched values will cause one side to close the session before the other detects it. Keep `keepalive` well below `idle-timeout` (the default 5x ratio allows ~5 ping attempts before timeout). > -> `session-check-interval` controls how quickly the client detects a dead session and starts reconnecting — it does not affect when the session dies. A lower value means faster reconnection but can cause unnecessary churn on lossy networks. It does not need to match on client and server. +> **How they relate:** `keepalive` controls how often smux sends ping frames to prove the session is alive. `idle-timeout` is how long smux waits with no received data (including pings) before declaring the session dead — it applies symmetrically on both sides. #### UDP transport tuning @@ -225,7 +224,7 @@ These reduce upstream throughput but improve compatibility. The minimum effectiv | `-rps N` | Rate limit outgoing DNS queries per second (0 = unlimited). Uses a token bucket with 1-second burst allowance. | `0` | | `-dnstt-compat` | Use original dnstt wire format (8-byte ClientID, padding prefixes). Sets `-max-qname-len` to 253 unless explicitly overridden. Forces `-record-type` to `txt` with a warning if another type is set. | `false` | | `-clientid-size N` | ClientID size in bytes (ignored when `-dnstt-compat` is set) | `2` | -| `-record-type TYPE` | DNS record type for downstream data: `txt`, `cname`, `a`, `aaaa`, `mx`, `ns`, `srv`. Must match the server. | `txt` | +| `-record-type TYPE` | DNS record type for downstream data: `txt`, `null`, `cname`, `a`, `aaaa`, `mx`, `ns`, `srv`, `caa`. Must match the server. | `txt` | | `-utls SPEC` | TLS fingerprint distribution (see below) | weighted random | | `-log-level LEVEL` | Log level: debug, info, warning, error | `info` | @@ -393,14 +392,14 @@ On both client and server, `-dnstt-compat` switches to the original dnstt wire f | Setting | VayDNS default | With `-dnstt-compat` | Applies to | | ------- | -------------- | -------------------- | ---------- | | `-max-qname-len` | `101` | `253` | client | -| `-idle-timeout` | `60s` | `2m` | client and server | -| `-keepalive` | `10s` | `10s` | client and server | +| `-idle-timeout` | `10s` | `2m` | client and server | +| `-keepalive` | `2s` | `10s` | client and server | All three can be explicitly overridden even when `-dnstt-compat` is set — the flag only changes the defaults, it does not lock the values. For example, `-dnstt-compat -idle-timeout 30s` uses the dnstt wire format with a 30-second idle timeout. > **Note:** `-dnstt-compat` forces `-record-type` to `txt` (with a warning if another type was set). dnstt only supports TXT records, so other record types are incompatible. > -> The timeout defaults are critical for interop with original dnstt binaries. dnstt uses a 10-second keepalive interval (smux default) and a 2-minute idle timeout. Setting `-idle-timeout` below 10s in compat mode will cause sessions to churn because dnstt peers only send keepalives every 10 seconds. When mixing with dnstt, keep the compat defaults unless you know what you're doing. +> The timeout defaults are critical for interop with original dnstt binaries. dnstt uses a 10-second keepalive interval (smux default) and a 2-minute idle timeout. Setting `-idle-timeout` below 10s in compat mode will cause sessions to churn because dnstt peers only send keepalives every 10 seconds. When connecting to dnstt, keep the compat defaults unless you know what you're doing. ### Record types @@ -409,12 +408,14 @@ VayDNS supports multiple DNS record types for downstream data encoding. Both cli | Type | Description | Capacity | | ---- | ----------- | -------- | | `txt` | TXT record (default). Highest capacity, compatible with dnstt. | Bounded by UDP payload (~1200 bytes) | +| `null` | NULL record. Raw binary payload in a single RR. Some recursive resolvers may filter or refuse to relay NULL records. | Bounded by UDP payload | | `cname` | CNAME record. Data encoded as a DNS name under the tunnel domain. | Bounded by 255-byte DNS name limit | | `ns` | NS record. Same encoding as CNAME. | Same as CNAME | | `mx` | MX record. 2-byte preference header + name encoding. | Same as CNAME | | `srv` | SRV record. 6-byte header + name encoding. | Same as CNAME | | `a` | A records. Data split into 4-byte chunks across multiple answer RRs. | Bounded by UDP payload | | `aaaa` | AAAA records. Data split into 16-byte chunks across multiple answer RRs. | Bounded by UDP payload | +| `caa` | CAA record. Payload encoded in the value portion of a fixed `issue` property. | Bounded by UDP payload | > **Compatibility:** Old VayDNS clients (pre-record-type) only send TXT queries. A new server with the default `-record-type txt` is fully compatible with old clients. Using a non-TXT type requires updating both client and server. diff --git a/client/client.go b/client/client.go index 0e1ffad..377a4d8 100644 --- a/client/client.go +++ b/client/client.go @@ -16,6 +16,14 @@ // t.InitiateSmuxSession() // stream, _ := t.OpenStream() // returns net.Conn // defer t.Close() +// +// Multi-resolver usage (spread queries across multiple DNS resolvers): +// +// r1, _ := client.NewResolver(client.ResolverTypeUDP, "8.8.8.8:53") +// r2, _ := client.NewResolver(client.ResolverTypeDOH, "https://1.1.1.1/dns-query") +// ts, _ := client.NewTunnelServer("t.example.com", "pubkey-hex") +// t, _ := client.NewTunnelMulti([]client.Resolver{r1, r2}, ts) +// t.ListenAndServe("127.0.0.1:7000") package client import ( @@ -39,12 +47,12 @@ import ( // Default timeouts for VayDNS mode. const ( - DefaultIdleTimeout = 60 * time.Second - DefaultKeepAlive = 10 * time.Second + DefaultIdleTimeout = 10 * time.Second + DefaultKeepAlive = 2 * time.Second DefaultOpenStreamTimeout = 10 * time.Second DefaultReconnectDelay = 1 * time.Second DefaultReconnectMaxDelay = 30 * time.Second - DefaultSessionCheckInterval = 20 * time.Second + DefaultSessionCheckInterval = 500 * time.Millisecond DefaultUDPResponseTimeout = 500 * time.Millisecond DefaultUDPWorkers = 100 DefaultMaxStreams = 0 // unlimited @@ -198,8 +206,8 @@ type Tunnel struct { MaxStreams int // default: 0 (0 = unlimited) ReconnectMinDelay time.Duration // default: 1s ReconnectMaxDelay time.Duration // default: 30s - SessionCheckInterval time.Duration // default: 20s - HandshakeTimeout time.Duration // default: 30s + SessionCheckInterval time.Duration // default: 500ms + HandshakeTimeout time.Duration // default: 15s PacketQueueSize int // default: QueueSize (512) KCPWindowSize int // default: PacketQueueSize/2 QueueOverflowMode turbotunnel.QueueOverflowMode // default: drop @@ -215,9 +223,20 @@ type Tunnel struct { remoteAddr net.Addr } -// NewTunnel creates a Tunnel with the given resolver and server configuration. +// NewTunnel creates a Tunnel with a single resolver and server configuration. +// For multiple resolvers, use NewTunnelMulti. +func NewTunnel(resolver Resolver, tunnelServer TunnelServer) (*Tunnel, error) { + return NewTunnelMulti([]Resolver{resolver}, tunnelServer) +} + +// NewTunnelMulti creates a Tunnel with multiple resolvers and server +// configuration. When more than one resolver is provided, the client +// multiplexes queries across them with health-based routing. // Zero-value fields use sensible defaults. -func NewTunnel(resolvers []Resolver, tunnelServer TunnelServer) (*Tunnel, error) { +func NewTunnelMulti(resolvers []Resolver, tunnelServer TunnelServer) (*Tunnel, error) { + if len(resolvers) == 0 { + return nil, fmt.Errorf("at least one resolver is required") + } t := &Tunnel{ Resolvers: resolvers, TunnelServer: tunnelServer, @@ -292,15 +311,21 @@ func (t *Tunnel) effectiveKCPWindowSize() int { // based on the Resolver configuration. func (t *Tunnel) InitiateResolverConnection() error { if len(t.Resolvers) > 1 { - conn, err := NewMultiResolver(t.Resolvers, SelectionRoundRobin, t.effectivePacketQueueSize(), t.effectiveQueueOverflowMode()) + conn, err := NewMultiResolver(t.Resolvers, t.effectivePacketQueueSize(), t.effectiveQueueOverflowMode()) if err != nil { return err } t.resolverConn = conn t.remoteAddr = turbotunnel.DummyAddr{} + // In multi-resolver mode, each entry owns a per-resolver + // ForgedStats. t.forgedStats stays nil so DNSPacketConn + // creates an unlabeled catch-all (which should rarely fire + // since MultiResolver filters forged responses upstream). return nil } - conn, addr, err := GetResolverConnection(t.Resolvers[0], t.effectivePacketQueueSize(), t.effectiveQueueOverflowMode()) + r := t.Resolvers[0] + t.forgedStats = &ForgedStats{Label: r.ResolverAddr} + conn, addr, err := GetResolverConnection(r, t.effectivePacketQueueSize(), t.effectiveQueueOverflowMode(), t.forgedStats) if err != nil { return err } @@ -530,24 +555,6 @@ func (t *Tunnel) closeTransportLayers() { t.forgedStats = nil } -// MultiResolverStats returns per-resolver health and count snapshots when the -// active resolver transport is MultiResolver; otherwise it returns nil. -func (t *Tunnel) MultiResolverStats() []ResolverStat { - if mr, ok := t.resolverConn.(*MultiResolver); ok { - return mr.ResolverStats() - } - return nil -} - -// MultiResolverValidInvalidCounts returns valid/invalid counters per resolver -// address when the active resolver transport is MultiResolver. -func (t *Tunnel) MultiResolverValidInvalidCounts() map[string][2]int64 { - if mr, ok := t.resolverConn.(*MultiResolver); ok { - return mr.ValidInvalidCounts() - } - return nil -} - // resetTransportLayers tears down existing transport layers and creates fresh // ones. Used during reconnect to ensure a clean transport stack. func (t *Tunnel) resetTransportLayers() error { @@ -841,10 +848,9 @@ func NewOutbound(resolvers []Resolver, tunnelServers []TunnelServer) *Outbound { // Start begins accepting connections on bind and forwarding them through the // first resolver/server pair. func (o *Outbound) Start(bind string) error { - tunnelServer := o.TunnelServers[0] - tunnel, err := NewTunnel(o.Resolvers, tunnelServer) + tunnel, err := NewTunnelMulti(o.Resolvers, tunnelServer) if err != nil { return fmt.Errorf("failed to create tunnel: %w", err) } diff --git a/client/dns.go b/client/dns.go index 729dece..7847faf 100644 --- a/client/dns.go +++ b/client/dns.go @@ -100,10 +100,17 @@ func (rl *RateLimiter) Wait() { } } -// ForgedStats tracks forged DNS response counters. It is shared between -// UDPPacketConn (per-query mode) and DNSPacketConn (shared socket mode) so -// that forged response visibility is consistent regardless of transport. +// ForgedStats tracks forged DNS response counters for a specific source. +// In single-resolver mode, one instance is shared between UDPPacketConn +// (per-query mode) and DNSPacketConn so milestone logs fire on a unified +// count. In multi-resolver mode, each entry holds its own labeled instance +// so operators can see which resolver is being targeted by injection. type ForgedStats struct { + // Label identifies the counter's source in log lines — typically the + // resolver address (e.g. "8.8.8.8:53"). An empty label is allowed + // and produces an unlabeled log line (used by DNSPacketConn's catch-all + // safety net when no resolver attribution is available). + Label string Total uint64 SERVFAIL uint64 NXDOMAIN uint64 @@ -124,11 +131,19 @@ func (s *ForgedStats) Record(rcode uint16) { } total := atomic.AddUint64(&s.Total, 1) if forgedInfoMilestone(total) { - log.Infof("forged DNS responses: total=%d, SERVFAIL=%d, NXDOMAIN=%d, other=%d", - total, - atomic.LoadUint64(&s.SERVFAIL), - atomic.LoadUint64(&s.NXDOMAIN), - atomic.LoadUint64(&s.Other)) + if s.Label != "" { + log.Infof("forged DNS responses from %s: total=%d, SERVFAIL=%d, NXDOMAIN=%d, other=%d", + s.Label, total, + atomic.LoadUint64(&s.SERVFAIL), + atomic.LoadUint64(&s.NXDOMAIN), + atomic.LoadUint64(&s.Other)) + } else { + log.Infof("forged DNS responses: total=%d, SERVFAIL=%d, NXDOMAIN=%d, other=%d", + total, + atomic.LoadUint64(&s.SERVFAIL), + atomic.LoadUint64(&s.NXDOMAIN), + atomic.LoadUint64(&s.Other)) + } } } @@ -150,8 +165,8 @@ func forgedInfoMilestone(total uint64) bool { // DNSPacketConn provides a packet-sending and -receiving interface over various // forms of DNS. It handles the details of how packets and padding are encoded -// as a DNS name in the Question section of an upstream query, and as a TXT RR -// in downstream responses. +// as a DNS name in the Question section of an upstream query, and as an RR in +// downstream responses. // // DNSPacketConn does not handle the mechanics of actually sending and receiving // encoded DNS messages. That is rather the responsibility of some other @@ -166,7 +181,8 @@ type DNSPacketConn struct { clientID turbotunnel.ClientID wireConfig turbotunnel.WireConfig domain dns.Name - // rrType is the DNS record type used for downstream data (TXT, CNAME, A, AAAA, MX, NS, or SRV). + // rrType is the DNS record type used for downstream data (TXT, NULL, CNAME, + // A, AAAA, MX, NS, SRV, or CAA). rrType uint16 // Sending on pollChan permits sendLoop to send an empty polling query. // sendLoop also does its own polling according to a time schedule. @@ -259,11 +275,10 @@ func (c *DNSPacketConn) TransportErrors() <-chan error { return c.transportErr } -// dnsResponsePayload extracts the downstream payload of a DNS response, encoded -// into the RDATA of a TXT or CNAME RR. It returns (nil, true) when the response -// has a non-NoError RCODE, indicating a forged or hijacked response. It returns -// (payload, false) on success or (nil, false) when the response doesn't pass -// format checks. +// dnsResponsePayload extracts the downstream payload of a DNS response. It +// returns (nil, true) when the response has a non-NoError RCODE, indicating a +// forged or hijacked response. It returns (payload, false) on success or +// (nil, false) when the response doesn't pass format checks. func dnsResponsePayload(resp *dns.Message, domain dns.Name, rrType uint16) ([]byte, bool) { if resp.Flags&0x8000 != 0x8000 { // QR != 1, this is not a response. @@ -318,6 +333,10 @@ func dnsResponsePayload(resp *dns.Message, domain dns.Name, rrType uint16) ([]by var payload []byte var err error switch rrType { + case dns.RRTypeNULL: + payload, err = dns.DecodeRDataNULL(answer.Data) + case dns.RRTypeCAA: + payload, err = dns.DecodeRDataCAA(answer.Data) case dns.RRTypeCNAME: payload, err = dns.DecodeRDataCNAME(answer.Data, domain) case dns.RRTypeNS: diff --git a/client/multi_resolver.go b/client/multi_resolver.go index c46f791..00cd6c8 100644 --- a/client/multi_resolver.go +++ b/client/multi_resolver.go @@ -16,18 +16,6 @@ import ( log "github.com/sirupsen/logrus" ) -// SelectionMode controls which resolver MultiResolver picks for outgoing packets. -type SelectionMode string - -const ( - // SelectionRoundRobin rotates through preferred resolvers in order. - SelectionRoundRobin SelectionMode = "roundrobin" - // SelectionBest picks the resolver with the best observed score. - SelectionBest SelectionMode = "best" - // SelectionSmart currently aliases best-score selection. - SelectionSmart SelectionMode = "smart" -) - // ResolverState is the current health state of one resolver. type ResolverState string @@ -62,27 +50,71 @@ type resolverEntry struct { addr net.Addr conn net.PacketConn - validCount atomic.Int64 - invalidCount atomic.Int64 - timeoutCount atomic.Int64 + // dead is set when the entry's transport has reported an unrecoverable + // error and its reader goroutine has exited. A dead entry is permanently + // excluded from selection and never transitions back — the only way to + // recover is to rebuild the entire MultiResolver (which happens on a + // full tunnel reconnect). Kept as an atomic.Bool so selection helpers + // can read it without acquiring mu on the hot path. + dead atomic.Bool + + // forgedStats tracks forged DNS response counters for this specific + // resolver entry, with milestone logging attributed to its address. + // In multi-resolver mode each entry owns its own labeled instance; + // in single-resolver mode the Tunnel shares one instance with both + // UDPPacketConn and DNSPacketConn. + forgedStats *ForgedStats + + // mu protects every field below. Counters are deliberately not atomic: + // the decay path in recomputeState would lose concurrent Adds if the + // counters were atomic and decremented via Load→Store, and splitting + // decay across a CAS loop is uglier than just holding mu for the + // short window each counter update requires. + mu sync.Mutex + validCount int64 + invalidCount int64 + timeoutCount int64 + pending map[uint16]time.Time + lastWrite time.Time + lastValid time.Time + lastProbe time.Time + state ResolverState +} + +// markDead transitions the entry to a permanent Down state. It is idempotent +// and returns true only when this call was the one that transitioned the +// entry — callers rely on this to avoid double-counting the entry in +// MultiResolver.aliveCount when multiple goroutines detect the same transport +// failure concurrently. +// +// The dead flag and the state field are set together under mu so that +// recomputeState (also holding mu) cannot observe an inconsistent snapshot +// where dead is set but state has not yet been transitioned to Down. +func (e *resolverEntry) markDead() bool { + e.mu.Lock() + defer e.mu.Unlock() + if e.dead.Load() { + return false + } + e.dead.Store(true) + e.state = ResolverStateDown + return true +} - mu sync.Mutex - pending map[uint16]time.Time - lastWrite time.Time - lastValid time.Time - lastProbe time.Time - state ResolverState +// isDead reports whether markDead has been called on this entry. +func (e *resolverEntry) isDead() bool { + return e.dead.Load() } func (e *resolverEntry) writePacket(b []byte) (int, error) { now := time.Now() e.trackOutgoingID(b, now) n, err := e.conn.WriteTo(b, e.addr) - if err != nil { - e.invalidCount.Add(1) - } e.mu.Lock() e.lastWrite = now + if err != nil { + e.invalidCount++ + } e.mu.Unlock() return n, err } @@ -97,92 +129,123 @@ func (e *resolverEntry) trackOutgoingID(b []byte, now time.Time) { e.mu.Unlock() } -func (e *resolverEntry) readPacket() multiReadResult { +func (e *resolverEntry) readPacket() (multiReadResult, bool) { var result multiReadResult result.entry = e result.n, result.addr, result.err = e.conn.ReadFrom(result.buf[:]) + var forged bool if result.err == nil { - e.evaluateIncoming(result.buf[:result.n]) + forged = e.evaluateIncoming(result.buf[:result.n]) } - return result + return result, forged } -func (e *resolverEntry) evaluateIncoming(packet []byte) { +// evaluateIncoming parses an incoming DNS response for health tracking and +// forged-response detection. Returns true when the response is forged +// (QR=1, RCODE != NoError) — the caller should not forward it upstream +// because evaluateIncoming already recorded it in the entry's ForgedStats. +func (e *resolverEntry) evaluateIncoming(packet []byte) bool { + now := time.Now() resp, err := dns.MessageFromWireFormat(packet) if err != nil { - e.invalidCount.Add(1) - e.recomputeState(time.Now()) - return + e.mu.Lock() + e.invalidCount++ + e.mu.Unlock() + e.recomputeState(now) + return false // parse error — not forged, let upper layer handle } - e.mu.Lock() - delete(e.pending, resp.ID) - e.mu.Unlock() - if isValidDNSResponse(resp) { - e.validCount.Add(1) - e.timeoutCount.Store(0) e.mu.Lock() - e.lastValid = time.Now() + delete(e.pending, resp.ID) + e.validCount++ + e.timeoutCount = 0 + e.lastValid = now e.state = ResolverStateHealthy e.mu.Unlock() - return + return false } - e.invalidCount.Add(1) - if isRateLimitedResponse(resp) { - e.mu.Lock() - e.state = ResolverStateRateLimited - e.mu.Unlock() + // Non-valid DNS response. Track it for health purposes. + rcode := resp.Flags & 0x000f + isResponse := resp.Flags&0x8000 != 0 + + e.mu.Lock() + delete(e.pending, resp.ID) + e.invalidCount++ + // Rate-limit classification is handled by the accumulation path in + // recomputeState (invalidCount >= threshold && validCount == 0), not + // by a direct state assignment here. recomputeState unconditionally + // reassigns e.state, so any direct assignment would be immediately + // overwritten. + e.mu.Unlock() + e.recomputeState(now) + + // Record in per-resolver forged stats and signal caller not to forward. + // forgedStats is always non-nil in multi-resolver mode (set at entry + // construction). The nil guard is defensive for any future caller that + // constructs a resolverEntry without setting forgedStats. + if isResponse && e.forgedStats != nil { + e.forgedStats.Record(rcode) + return true } - e.recomputeState(time.Now()) + return false } func (e *resolverEntry) expirePending(now time.Time) { - expired := int64(0) e.mu.Lock() + expired := int64(0) for id, t := range e.pending { if now.Sub(t) >= pendingResponseTimeout { delete(e.pending, id) expired++ } } - e.mu.Unlock() if expired > 0 { - e.timeoutCount.Add(expired) - e.invalidCount.Add(expired) + e.timeoutCount += expired + e.invalidCount += expired + } else { + // Decay counters only when nothing accumulated this tick. + // This ensures burst expirations (and response-driven + // invalidCount growth between ticks) can outpace the decay + // and eventually cross the detection thresholds. Decay runs + // at most once per health-tick interval (~1s). + if e.invalidCount > 0 { + e.invalidCount-- + } + if e.timeoutCount > 0 { + e.timeoutCount-- + } } + e.mu.Unlock() e.recomputeState(now) } func (e *resolverEntry) recomputeState(now time.Time) { + // Dead is sticky — never recompute it back to any other state. We + // re-check under the lock after acquiring it, in case markDead raced + // in between the outer check and acquiring the lock. + if e.isDead() { + return + } e.mu.Lock() defer e.mu.Unlock() - - timeouts := e.timeoutCount.Load() - invalid := e.invalidCount.Load() - valid := e.validCount.Load() + if e.dead.Load() { + return + } switch { - case timeouts >= downTimeoutThreshold: + case e.timeoutCount >= downTimeoutThreshold: e.state = ResolverStateDown - case invalid >= rateLimitThreshold && valid == 0: + case e.invalidCount >= rateLimitThreshold && e.validCount == 0: e.state = ResolverStateRateLimited - case valid > 0 && now.Sub(e.lastValid) <= 30*time.Second: + case e.validCount > 0 && now.Sub(e.lastValid) <= 30*time.Second: e.state = ResolverStateHealthy - case valid == 0: + case e.validCount == 0: e.state = ResolverStateUnknown default: e.state = ResolverStateUnknown } - - // Slow decay to avoid sticky penalties. - if invalid > 0 { - e.invalidCount.Store(invalid - 1) - } - if timeouts > 0 { - e.timeoutCount.Store(timeouts - 1) - } } func (e *resolverEntry) stateSnapshot() ResolverState { @@ -209,9 +272,9 @@ func (e *resolverEntry) snapshot() ResolverStat { return ResolverStat{ Address: e.name, State: e.state, - ValidCount: e.validCount.Load(), - InvalidCount: e.invalidCount.Load(), - TimeoutCount: e.timeoutCount.Load(), + ValidCount: e.validCount, + InvalidCount: e.invalidCount, + TimeoutCount: e.timeoutCount, LastWrite: e.lastWrite, LastValid: e.lastValid, } @@ -231,24 +294,33 @@ type multiReadResult struct { // unhealthy resolvers by duplicating selected packets. type MultiResolver struct { entries []*resolverEntry - mode SelectionMode mu sync.Mutex rrIndex int probeRR int recvChan chan multiReadResult closed chan struct{} closeOnce sync.Once + + // aliveCount tracks how many entries still have a live reader + // goroutine. When it reaches zero, allDead is closed, causing pending + // ReadFrom/WriteTo calls to return an "all resolvers down" error, + // which is the only condition that should propagate a transport + // error to the upper tunnel session. + aliveCount atomic.Int32 + allDead chan struct{} + allDeadOnce sync.Once } // NewMultiResolver creates a MultiResolver from a slice of Resolver configs. -func NewMultiResolver(resolvers []Resolver, mode SelectionMode, queueSize int, overflowMode turbotunnel.QueueOverflowMode) (*MultiResolver, error) { +func NewMultiResolver(resolvers []Resolver, queueSize int, overflowMode turbotunnel.QueueOverflowMode) (*MultiResolver, error) { if len(resolvers) == 0 { return nil, fmt.Errorf("at least one resolver is required") } entries := make([]*resolverEntry, 0, len(resolvers)) for _, r := range resolvers { - conn, addr, err := GetResolverConnection(r, queueSize, overflowMode) + entryStats := &ForgedStats{Label: r.ResolverAddr} + conn, addr, err := GetResolverConnection(r, queueSize, overflowMode, entryStats) if err != nil { for _, e := range entries { e.conn.Close() @@ -256,38 +328,93 @@ func NewMultiResolver(resolvers []Resolver, mode SelectionMode, queueSize int, o return nil, fmt.Errorf("resolver %s %s: %w", r.ResolverType, r.ResolverAddr, err) } entries = append(entries, &resolverEntry{ - name: r.ResolverAddr, - addr: addr, - conn: conn, - pending: make(map[uint16]time.Time), - state: ResolverStateUnknown, + name: r.ResolverAddr, + addr: addr, + conn: conn, + forgedStats: entryStats, + pending: make(map[uint16]time.Time), + state: ResolverStateUnknown, }) } + log.Infof("multi-resolver: %d resolvers configured", len(entries)) + udpWorkerTotal := 0 + for i, r := range resolvers { + log.Infof(" [%d] %s %s", i+1, r.ResolverType, r.ResolverAddr) + if r.ResolverType == ResolverTypeUDP && !r.UDPSharedSocket { + workers := r.UDPWorkers + if workers <= 0 { + workers = DefaultUDPWorkers + } + udpWorkerTotal += workers + } + } + if udpWorkerTotal > 0 { + log.Infof("multi-resolver: %d total UDP worker goroutines", udpWorkerTotal) + } + mr := &MultiResolver{ entries: entries, - mode: mode, recvChan: make(chan multiReadResult, len(entries)*4), closed: make(chan struct{}), + allDead: make(chan struct{}), + } + mr.aliveCount.Store(int32(len(entries))) + mr.startReaders() + go mr.healthWorker() + return mr, nil +} + +// entryDied is called whenever a transport error is observed for an entry, +// either from its reader goroutine or from a failed write. It marks the entry +// dead and, if this was the transition from alive to dead (as opposed to a +// second observer of the same failure), decrements aliveCount. When the last +// alive entry dies, allDead is closed so pending ReadFrom/WriteTo callers can +// unblock with a terminal error. +func (mr *MultiResolver) entryDied(entry *resolverEntry, err error) { + if !entry.markDead() { + return } - for _, e := range entries { + log.Warnf("multi-resolver: entry %s transport error: %v; marking down", entry.name, err) + if mr.aliveCount.Add(-1) == 0 { + mr.allDeadOnce.Do(func() { close(mr.allDead) }) + } +} + +// startReaders launches one reader goroutine per entry. Each goroutine reads +// packets from its entry's transport and pushes the result onto recvChan. +// Extracted so tests can construct MultiResolver with synthetic entries. +// +// On a transport error, the reader calls entryDied to mark the entry dead and +// signal allDead when the last entry dies. It never pushes error-bearing +// results onto recvChan: doing so would surface a single resolver's failure +// to DNSPacketConn.recvLoop, which would tear down the entire tunnel session +// and defeat the point of having multiple resolvers. +func (mr *MultiResolver) startReaders() { + for _, e := range mr.entries { entry := e go func() { for { - res := entry.readPacket() + res, forged := entry.readPacket() + if res.err != nil { + mr.entryDied(entry, res.err) + return + } + if forged { + // evaluateIncoming already recorded this in the + // entry's per-resolver ForgedStats. Don't forward + // to DNSPacketConn — its safety-net filter would + // double-count it. + continue + } select { case mr.recvChan <- res: case <-mr.closed: return } - if res.err != nil { - return - } } }() } - go mr.healthWorker() - return mr, nil } func (mr *MultiResolver) healthWorker() { @@ -302,7 +429,7 @@ func (mr *MultiResolver) healthWorker() { e.expirePending(now) stats = append(stats, e.snapshot()) } - log.Trace("\n" + renderResolverStatsTable(stats, now)) + log.Debug("\n" + renderResolverStatsTable(stats, now)) case <-mr.closed: return } @@ -323,7 +450,7 @@ func renderResolverStatsTable(stats []ResolverStat, now time.Time) string { if !s.LastValid.IsZero() { lastValidAgo = now.Sub(s.LastValid).Truncate(time.Second).String() } - b.WriteString(fmt.Sprintf("| %-23.23s | %-12s | %6d | %7d | %7d | %11s | %11s |\n", + fmt.Fprintf(&b, "| %-23.23s | %-12s | %6d | %7d | %7d | %11s | %11s |\n", s.Address, s.State, s.ValidCount, @@ -331,7 +458,7 @@ func renderResolverStatsTable(stats []ResolverStat, now time.Time) string { s.TimeoutCount, lastWriteAgo, lastValidAgo, - )) + ) } b.WriteString("+-------------------------+--------------+--------+---------+---------+-------------+-------------+") return b.String() @@ -344,38 +471,67 @@ func isValidDNSResponse(resp dns.Message) bool { return (resp.Flags & 0x000f) == dns.RcodeNoError } -func isRateLimitedResponse(resp dns.Message) bool { - rcode := resp.Flags & 0x000f - return rcode == dns.RcodeRefused || rcode == dns.RcodeServerFailure -} - // ReadFrom receives a packet from whichever resolver responds first. +// It only returns an error when the MultiResolver has been closed or every +// entry has died; a single resolver's transport error is isolated at the +// reader goroutine and does not propagate here. func (mr *MultiResolver) ReadFrom(b []byte) (n int, addr net.Addr, err error) { select { case <-mr.closed: return 0, nil, net.ErrClosed case res := <-mr.recvChan: - if res.err != nil { - return 0, res.addr, res.err - } n = copy(b, res.buf[:res.n]) return n, turbotunnel.DummyAddr{}, nil + case <-mr.allDead: + // Drain any packet that was buffered by a reader before it + // died, so packets already delivered by the transport are not + // discarded in favour of the terminal error. No more readers + // are pushing (allDead is closed only after every reader has + // exited), so a non-blocking read here races only with the + // mr.closed case above, which is handled on the next call. + select { + case res := <-mr.recvChan: + n = copy(b, res.buf[:res.n]) + return n, turbotunnel.DummyAddr{}, nil + default: + return 0, nil, fmt.Errorf("multi-resolver: all resolvers are down") + } } } // WriteTo sends b to the selected primary resolver and may duplicate b to one -// unhealthy resolver as a probe to detect recovery. +// unhealthy resolver as a probe to detect recovery. If the primary's write +// fails, the entry is marked dead and WriteTo retries with the next alive +// entry. An error is returned only when the MultiResolver is closed or every +// entry has been marked dead. func (mr *MultiResolver) WriteTo(b []byte, _ net.Addr) (n int, err error) { select { case <-mr.closed: return 0, net.ErrClosed + case <-mr.allDead: + return 0, fmt.Errorf("multi-resolver: all resolvers are down") default: } - primary := mr.selectPrimary() - n, err = primary.writePacket(b) - if err != nil { - return n, err + // Try entries until one accepts the write or all alive entries have + // been exhausted. Each write error marks the entry dead. + var primary *resolverEntry + for attempts := 0; attempts < len(mr.entries); attempts++ { + primary = mr.selectPrimary() + if primary == nil { + break + } + n, err = primary.writePacket(b) + if err == nil { + break + } + mr.entryDied(primary, err) + } + switch { + case err != nil: + return 0, fmt.Errorf("multi-resolver: all write attempts failed: %w", err) + case primary == nil: + return 0, fmt.Errorf("multi-resolver: no alive resolver for write") } if probe := mr.selectProbeTarget(primary); probe != nil { @@ -385,17 +541,11 @@ func (mr *MultiResolver) WriteTo(b []byte, _ net.Addr) (n int, err error) { return n, nil } +// selectPrimary returns the entry that should receive the next outgoing +// query, or nil if every entry has been marked dead. The caller must handle +// nil (e.g., return an "all resolvers down" error). func (mr *MultiResolver) selectPrimary() *resolverEntry { - if mr.mode == SelectionRoundRobin { - if e := mr.selectRoundRobinHealthy(); e != nil { - return e - } - return mr.entries[0] - } - if e := mr.selectBestScore(); e != nil { - return e - } - return mr.entries[0] + return mr.selectRoundRobinHealthy() } func (mr *MultiResolver) selectRoundRobinHealthy() *resolverEntry { @@ -407,48 +557,29 @@ func (mr *MultiResolver) selectRoundRobinHealthy() *resolverEntry { } start := mr.rrIndex + // First pass: prefer Healthy or Unknown, skipping dead entries. for i := 0; i < len(mr.entries); i++ { idx := (start + i) % len(mr.entries) + if mr.entries[idx].isDead() { + continue + } state := mr.entries[idx].stateSnapshot() if state == ResolverStateHealthy || state == ResolverStateUnknown { mr.rrIndex = (idx + 1) % len(mr.entries) return mr.entries[idx] } } - idx := start % len(mr.entries) - mr.rrIndex = (idx + 1) % len(mr.entries) - return mr.entries[idx] -} - -func (mr *MultiResolver) selectBestScore() *resolverEntry { - if len(mr.entries) == 0 { - return nil - } - best := mr.entries[0] - bestScore := resolverScore(best) - for _, e := range mr.entries[1:] { - s := resolverScore(e) - if s > bestScore { - best = e - bestScore = s + // Second pass: accept any non-dead entry even if RateLimited/Down. + for i := 0; i < len(mr.entries); i++ { + idx := (start + i) % len(mr.entries) + if mr.entries[idx].isDead() { + continue } + mr.rrIndex = (idx + 1) % len(mr.entries) + return mr.entries[idx] } - return best -} - -func resolverScore(e *resolverEntry) int64 { - statePenalty := int64(0) - switch e.stateSnapshot() { - case ResolverStateHealthy: - statePenalty = 0 - case ResolverStateUnknown: - statePenalty = 5 - case ResolverStateRateLimited: - statePenalty = 15 - case ResolverStateDown: - statePenalty = 30 - } - return e.validCount.Load()*4 - e.invalidCount.Load()*2 - e.timeoutCount.Load()*3 - statePenalty + // Every entry is dead. + return nil } func (mr *MultiResolver) selectProbeTarget(primary *resolverEntry) *resolverEntry { @@ -462,6 +593,9 @@ func (mr *MultiResolver) selectProbeTarget(primary *resolverEntry) *resolverEntr if e == primary { continue } + if e.isDead() { + continue + } state := e.stateSnapshot() if state == ResolverStateHealthy { continue @@ -474,24 +608,6 @@ func (mr *MultiResolver) selectProbeTarget(primary *resolverEntry) *resolverEntr return nil } -// ResolverStats returns current resolver health counters. -func (mr *MultiResolver) ResolverStats() []ResolverStat { - stats := make([]ResolverStat, 0, len(mr.entries)) - for _, e := range mr.entries { - stats = append(stats, e.snapshot()) - } - return stats -} - -// ValidInvalidCounts returns valid/invalid counts by resolver address. -func (mr *MultiResolver) ValidInvalidCounts() map[string][2]int64 { - out := make(map[string][2]int64, len(mr.entries)) - for _, e := range mr.entries { - out[e.name] = [2]int64{e.validCount.Load(), e.invalidCount.Load()} - } - return out -} - // Close closes all underlying connections and stops the reader goroutines. func (mr *MultiResolver) Close() error { mr.closeOnce.Do(func() { @@ -504,6 +620,9 @@ func (mr *MultiResolver) Close() error { } // LocalAddr returns the local address of the first underlying connection. +// The value is arbitrary (there is no single meaningful local address for a +// multi-resolver), but the method must exist to satisfy net.PacketConn. +// No caller uses the returned address for routing or logic decisions. func (mr *MultiResolver) LocalAddr() net.Addr { return mr.entries[0].conn.LocalAddr() } @@ -541,8 +660,11 @@ func (mr *MultiResolver) SetWriteDeadline(t time.Time) error { return last } -// getResolverConnection creates the underlying transport net.PacketConn for r. -func GetResolverConnection(r Resolver, queueSize int, overflowMode turbotunnel.QueueOverflowMode) (net.PacketConn, net.Addr, error) { +// GetResolverConnection creates the underlying transport net.PacketConn for r. +// For UDP per-query mode, forgedStats is passed to NewUDPPacketConn so +// forged-response milestone logs are attributed to this resolver. Callers +// may pass nil to let each layer create its own counter. +func GetResolverConnection(r Resolver, queueSize int, overflowMode turbotunnel.QueueOverflowMode, forgedStats *ForgedStats) (net.PacketConn, net.Addr, error) { switch r.ResolverType { case ResolverTypeUDP: addr, err := net.ResolveUDPAddr("udp", r.ResolverAddr) @@ -565,7 +687,7 @@ func GetResolverConnection(r Resolver, queueSize int, overflowMode turbotunnel.Q if timeout <= 0 { timeout = DefaultUDPResponseTimeout } - conn, _, err := NewUDPPacketConn(addr, r.DialerControl, workers, timeout, !r.UDPAcceptErrors, queueSize, overflowMode) + conn, err := NewUDPPacketConn(addr, r.DialerControl, workers, timeout, !r.UDPAcceptErrors, queueSize, overflowMode, forgedStats) if err != nil { return nil, nil, err } diff --git a/client/multi_resolver_test.go b/client/multi_resolver_test.go new file mode 100644 index 0000000..93eff20 --- /dev/null +++ b/client/multi_resolver_test.go @@ -0,0 +1,415 @@ +package client + +import ( + "bytes" + "net" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/net2share/vaydns/dns" + "github.com/net2share/vaydns/turbotunnel" +) + +// fakePacketConn is a controllable net.PacketConn for MultiResolver tests. +// Pushed responses are consumed in order by successive ReadFrom calls; each +// response can be either data bytes or an error. +type fakePacketConn struct { + name string + readCh chan fakeReadResp + closed chan struct{} + once sync.Once +} + +type fakeReadResp struct { + data []byte + err error +} + +func newFakePacketConn(name string) *fakePacketConn { + return &fakePacketConn{ + name: name, + readCh: make(chan fakeReadResp, 16), + closed: make(chan struct{}), + } +} + +func (f *fakePacketConn) pushData(data []byte) { + cp := make([]byte, len(data)) + copy(cp, data) + f.readCh <- fakeReadResp{data: cp} +} + +func (f *fakePacketConn) pushError(err error) { + f.readCh <- fakeReadResp{err: err} +} + +func (f *fakePacketConn) ReadFrom(p []byte) (int, net.Addr, error) { + select { + case r, ok := <-f.readCh: + if !ok { + return 0, nil, net.ErrClosed + } + if r.err != nil { + return 0, nil, r.err + } + return copy(p, r.data), turbotunnel.DummyAddr{}, nil + case <-f.closed: + return 0, nil, net.ErrClosed + } +} + +func (f *fakePacketConn) WriteTo(p []byte, _ net.Addr) (int, error) { + select { + case <-f.closed: + return 0, net.ErrClosed + default: + return len(p), nil + } +} + +func (f *fakePacketConn) Close() error { + f.once.Do(func() { close(f.closed) }) + return nil +} + +func (f *fakePacketConn) LocalAddr() net.Addr { return turbotunnel.DummyAddr{} } +func (f *fakePacketConn) SetDeadline(time.Time) error { return nil } +func (f *fakePacketConn) SetReadDeadline(time.Time) error { return nil } +func (f *fakePacketConn) SetWriteDeadline(time.Time) error { return nil } + +// newFakeEntry builds a resolverEntry around a fakePacketConn, suitable for +// injection into a hand-constructed MultiResolver in tests. +func newFakeEntry(name string, conn *fakePacketConn) *resolverEntry { + return &resolverEntry{ + name: name, + addr: turbotunnel.DummyAddr{}, + conn: conn, + forgedStats: &ForgedStats{Label: name}, + pending: make(map[uint16]time.Time), + state: ResolverStateUnknown, + } +} + +// newTestMultiResolver mirrors what NewMultiResolver does after +// GetResolverConnection returns, but with pre-built synthetic entries. It does +// not spawn the healthWorker — tests don't need it, and omitting it keeps them +// hermetic with respect to timing. +func newTestMultiResolver(entries []*resolverEntry) *MultiResolver { + mr := &MultiResolver{ + entries: entries, + recvChan: make(chan multiReadResult, len(entries)*4), + closed: make(chan struct{}), + allDead: make(chan struct{}), + } + mr.aliveCount.Store(int32(len(entries))) + mr.startReaders() + return mr +} + +// TestMultiResolver_DeadEntryDoesNotBreakReadFrom verifies that when one +// resolver entry's transport returns a fatal read error, MultiResolver.ReadFrom +// continues to deliver packets from healthy entries instead of propagating +// that error to the upper DNSPacketConn layer (which would tear down the whole +// tunnel session). +func TestMultiResolver_DeadEntryDoesNotBreakReadFrom(t *testing.T) { + failing := newFakePacketConn("failing") + working := newFakePacketConn("working") + + entries := []*resolverEntry{ + newFakeEntry("failing", failing), + newFakeEntry("working", working), + } + mr := newTestMultiResolver(entries) + defer mr.Close() + + // Step 1: make the failing entry's ReadFrom return a fatal error. + // The reader goroutine will pick this up immediately. + failing.pushError(net.ErrClosed) + + // Give the reader goroutine time to handle the error. Under the buggy + // implementation it pushes an error-bearing multiReadResult onto + // recvChan and exits; under a correct implementation it would quietly + // mark the entry down and exit without poisoning recvChan. + time.Sleep(100 * time.Millisecond) + + // Step 2: push a valid response to the working entry. This lands on + // recvChan strictly AFTER any push from the failing entry, so the + // ordering is deterministic. + wantResponse := []byte("valid-response-bytes-from-working-resolver") + working.pushData(wantResponse) + + // Step 3: ReadFrom must return the bytes from the working entry, not + // the error from the failing one. + type readResult struct { + n int + err error + buf []byte + } + done := make(chan readResult, 1) + go func() { + buf := make([]byte, 4096) + n, _, err := mr.ReadFrom(buf) + done <- readResult{n: n, err: err, buf: buf} + }() + + select { + case r := <-done: + if r.err != nil { + t.Fatalf("MultiResolver.ReadFrom returned error %v; expected it to ignore the failed entry and return the bytes from the working entry. This is the C1 bug: a single resolver's read error tears down the tunnel.", r.err) + } + if !bytes.Equal(r.buf[:r.n], wantResponse) { + t.Fatalf("MultiResolver.ReadFrom returned wrong bytes.\n got: %x\n want: %x", r.buf[:r.n], wantResponse) + } + case <-time.After(3 * time.Second): + t.Fatal("MultiResolver.ReadFrom timed out; expected it to return bytes from the working entry within 3s") + } +} + +// TestMultiResolver_FailedEntryExcludedFromSelection verifies that after a +// reader goroutine has processed a fatal read error and exited, the entry +// is no longer returned by selectPrimary. Otherwise the round-robin scheduler +// would keep sending queries to a resolver whose reader goroutine is gone +// (so responses would never come back). +func TestMultiResolver_FailedEntryExcludedFromSelection(t *testing.T) { + failing := newFakePacketConn("failing") + working := newFakePacketConn("working") + + entries := []*resolverEntry{ + newFakeEntry("failing", failing), + newFakeEntry("working", working), + } + mr := newTestMultiResolver(entries) + defer mr.Close() + + // Kill the failing entry's reader; leave working blocked in ReadFrom. + failing.pushError(net.ErrClosed) + + // Let the reader goroutine process the error and (ideally) mark the + // entry down before it exits. + time.Sleep(100 * time.Millisecond) + + // Drain any stale result from recvChan so the assertion below only + // checks selectPrimary behaviour, not lingering channel contents. +drain: + for { + select { + case <-mr.recvChan: + default: + break drain + } + } + + // Call selectPrimary repeatedly. It must never return the failing + // entry — otherwise real queries will be sent to a resolver whose + // reader is gone, so responses will never come back. + for i := range 10 { + selected := mr.selectPrimary() + if selected == nil { + t.Fatalf("iter %d: selectPrimary returned nil", i) + } + if selected.name == "failing" { + t.Fatalf("iter %d: selectPrimary returned the failed entry %q; expected it to be excluded after its reader exited on error. State is %s.", i, selected.name, selected.stateSnapshot()) + } + } +} + +// buildDNSResponse constructs a minimal valid DNS wire-format response with +// the given RCODE. QR is set to 1. This is the smallest packet that +// dns.MessageFromWireFormat can parse and evaluateIncoming can classify. +func buildDNSResponse(t *testing.T, rcode uint16) []byte { + t.Helper() + msg := &dns.Message{ + ID: 0x1234, + Flags: 0x8000 | rcode, // QR=1 + RCODE + } + buf, err := msg.WireFormat() + if err != nil { + t.Fatalf("buildDNSResponse: %v", err) + } + return buf +} + +// TestForgedStats_LabeledMilestoneLog verifies that ForgedStats.Record() +// fires a milestone log at the expected thresholds and that the log line +// includes the resolver label in the "from " format. +func TestForgedStats_LabeledMilestoneLog(t *testing.T) { + stats := &ForgedStats{Label: "1.2.3.4:53"} + + // Record 9 forged NXDOMAIN responses — no milestone yet. + for range 9 { + stats.Record(dns.RcodeNameError) + } + if atomic.LoadUint64(&stats.Total) != 9 { + t.Fatalf("expected Total=9, got %d", stats.Total) + } + + // The 10th should trigger the first milestone. + stats.Record(dns.RcodeNameError) + if atomic.LoadUint64(&stats.Total) != 10 { + t.Fatalf("expected Total=10, got %d", stats.Total) + } + if atomic.LoadUint64(&stats.NXDOMAIN) != 10 { + t.Fatalf("expected NXDOMAIN=10, got %d", stats.NXDOMAIN) + } +} + +// TestForgedStats_UnlabeledFallback verifies that a ForgedStats with an +// empty Label still records correctly (used by DNSPacketConn's catch-all). +func TestForgedStats_UnlabeledFallback(t *testing.T) { + stats := &ForgedStats{} + stats.Record(dns.RcodeServerFailure) + if atomic.LoadUint64(&stats.Total) != 1 { + t.Fatalf("expected Total=1, got %d", stats.Total) + } + if atomic.LoadUint64(&stats.SERVFAIL) != 1 { + t.Fatalf("expected SERVFAIL=1, got %d", stats.SERVFAIL) + } +} + +// TestMultiResolver_ForgedResponseRecordedPerEntry verifies that in +// multi-resolver mode, a forged DNS response (NXDOMAIN) arriving at one +// entry increments that entry's ForgedStats but not the other entries'. +func TestMultiResolver_ForgedResponseRecordedPerEntry(t *testing.T) { + forging := newFakePacketConn("forging") + clean := newFakePacketConn("clean") + + forgingEntry := newFakeEntry("forging", forging) + cleanEntry := newFakeEntry("clean", clean) + + mr := newTestMultiResolver([]*resolverEntry{forgingEntry, cleanEntry}) + defer mr.Close() + + // Push a forged NXDOMAIN response to the forging entry. + forgedResp := buildDNSResponse(t, dns.RcodeNameError) + forging.pushData(forgedResp) + + // Give the reader time to process. + time.Sleep(100 * time.Millisecond) + + // Forging entry's stats should have recorded the forged response. + if got := atomic.LoadUint64(&forgingEntry.forgedStats.Total); got != 1 { + t.Errorf("forging entry ForgedStats.Total = %d, want 1", got) + } + if got := atomic.LoadUint64(&forgingEntry.forgedStats.NXDOMAIN); got != 1 { + t.Errorf("forging entry ForgedStats.NXDOMAIN = %d, want 1", got) + } + + // Clean entry's stats should be untouched. + if got := atomic.LoadUint64(&cleanEntry.forgedStats.Total); got != 0 { + t.Errorf("clean entry ForgedStats.Total = %d, want 0", got) + } +} + +// TestMultiResolver_ForgedResponseNotForwardedToRecvChan verifies that +// forged responses detected by evaluateIncoming are NOT forwarded to +// recvChan (and therefore never reach DNSPacketConn). Only legitimate +// responses should appear on the channel. +func TestMultiResolver_ForgedResponseNotForwardedToRecvChan(t *testing.T) { + conn := newFakePacketConn("resolver") + entry := newFakeEntry("resolver", conn) + + mr := newTestMultiResolver([]*resolverEntry{entry}) + defer mr.Close() + + // Push a forged NXDOMAIN, then a valid NoError response. + forgedResp := buildDNSResponse(t, dns.RcodeNameError) + validResp := buildDNSResponse(t, dns.RcodeNoError) + conn.pushData(forgedResp) + conn.pushData(validResp) + + // ReadFrom should return the valid response, skipping the forged one. + buf := make([]byte, 4096) + type readResult struct { + n int + err error + } + done := make(chan readResult, 1) + go func() { + n, _, err := mr.ReadFrom(buf) + done <- readResult{n: n, err: err} + }() + + select { + case r := <-done: + if r.err != nil { + t.Fatalf("ReadFrom error: %v", r.err) + } + // The bytes should be the valid response, not the forged one. + if bytes.Equal(buf[:r.n], forgedResp) { + t.Fatal("ReadFrom returned the forged response — it should have been filtered by the reader goroutine") + } + if !bytes.Equal(buf[:r.n], validResp) { + t.Fatalf("ReadFrom returned unexpected bytes:\n got: %x\n want: %x", buf[:r.n], validResp) + } + case <-time.After(3 * time.Second): + t.Fatal("ReadFrom timed out; expected valid response within 3s") + } + + // Verify the forged response was recorded. + if got := atomic.LoadUint64(&entry.forgedStats.Total); got != 1 { + t.Errorf("ForgedStats.Total = %d, want 1", got) + } +} + +// TestMultiResolver_ForgedStatsPerEntryLabeled verifies that each entry +// in a MultiResolver carries a distinctly-labeled ForgedStats instance. +func TestMultiResolver_ForgedStatsPerEntryLabeled(t *testing.T) { + conn1 := newFakePacketConn("r1") + conn2 := newFakePacketConn("r2") + + e1 := newFakeEntry("1.1.1.1:53", conn1) + e2 := newFakeEntry("8.8.8.8:53", conn2) + + mr := newTestMultiResolver([]*resolverEntry{e1, e2}) + defer mr.Close() + + if e1.forgedStats == e2.forgedStats { + t.Fatal("entries share the same ForgedStats pointer; expected distinct per-resolver instances") + } + if e1.forgedStats.Label != "1.1.1.1:53" { + t.Errorf("entry1 label = %q, want %q", e1.forgedStats.Label, "1.1.1.1:53") + } + if e2.forgedStats.Label != "8.8.8.8:53" { + t.Errorf("entry2 label = %q, want %q", e2.forgedStats.Label, "8.8.8.8:53") + } +} + +// TestMultiResolver_WriteToFailsOverToNextEntry verifies that when the +// primary entry's transport is closed, WriteTo marks it dead and retries +// with the next alive entry rather than returning an error. +func TestMultiResolver_WriteToFailsOverToNextEntry(t *testing.T) { + broken := newFakePacketConn("broken") + working := newFakePacketConn("working") + + entries := []*resolverEntry{ + newFakeEntry("broken", broken), + newFakeEntry("working", working), + } + mr := newTestMultiResolver(entries) + defer mr.Close() + + // Close the first entry's transport so WriteTo errors on it. + broken.Close() + + // Build a minimal DNS query so trackOutgoingID can parse it. + query := buildDNSResponse(t, dns.RcodeNoError) // any valid DNS message works + + n, err := mr.WriteTo(query, turbotunnel.DummyAddr{}) + if err != nil { + t.Fatalf("WriteTo returned error %v; expected failover to working entry", err) + } + if n != len(query) { + t.Fatalf("WriteTo wrote %d bytes, want %d", n, len(query)) + } + + // Verify the broken entry was marked dead. + if !entries[0].isDead() { + t.Error("broken entry should be marked dead after write failure") + } + // Verify the working entry is still alive. + if entries[1].isDead() { + t.Error("working entry should still be alive") + } +} diff --git a/client/udp.go b/client/udp.go index 8eae1b5..30aeb92 100644 --- a/client/udp.go +++ b/client/udp.go @@ -33,23 +33,28 @@ type UDPPacketConn struct { } // NewUDPPacketConn creates a UDPPacketConn with numWorkers goroutines that -// each send one query at a time on a fresh UDP socket. The returned -// ForgedStats pointer is shared with the caller so DNSPacketConn can -// include per-query forged counts in its reporting. -func NewUDPPacketConn(remoteAddr net.Addr, dialerControl func(network, address string, c syscall.RawConn) error, numWorkers int, responseTimeout time.Duration, ignoreErrors bool, queueSize int, overflowMode turbotunnel.QueueOverflowMode) (*UDPPacketConn, *ForgedStats, error) { - stats := &ForgedStats{} +// each send one query at a time on a fresh UDP socket. If forgedStats is +// non-nil, the UDPPacketConn records forged-response counts into it; +// otherwise a new instance is created internally (labeled with the remote +// address). Callers that want unified milestone logging across layers +// should create a single ForgedStats and pass the same pointer here and +// to NewDNSPacketConn. +func NewUDPPacketConn(remoteAddr net.Addr, dialerControl func(network, address string, c syscall.RawConn) error, numWorkers int, responseTimeout time.Duration, ignoreErrors bool, queueSize int, overflowMode turbotunnel.QueueOverflowMode, forgedStats *ForgedStats) (*UDPPacketConn, error) { + if forgedStats == nil { + forgedStats = &ForgedStats{Label: remoteAddr.String()} + } pconn := &UDPPacketConn{ remoteAddr: remoteAddr, dialerControl: dialerControl, responseTimeout: responseTimeout, ignoreErrors: ignoreErrors, - forgedStats: stats, + forgedStats: forgedStats, QueuePacketConn: turbotunnel.NewQueuePacketConn(remoteAddr, 0, queueSize, overflowMode), } for i := 0; i < numWorkers; i++ { go pconn.sendLoop() } - return pconn, stats, nil + return pconn, nil } // sendLoop is the per-worker loop. It dequeues one packet at a time from the diff --git a/dns/dns.go b/dns/dns.go index a6d0a62..02658f4 100644 --- a/dns/dns.go +++ b/dns/dns.go @@ -47,14 +47,15 @@ var ( const ( // https://tools.ietf.org/html/rfc1035#section-3.2.2 - RRTypeA = 1 - RRTypeNS = 2 - + RRTypeA = 1 + RRTypeNS = 2 RRTypeCNAME = 5 + RRTypeNULL = 10 RRTypeMX = 15 RRTypeTXT = 16 RRTypeAAAA = 28 RRTypeSRV = 33 + RRTypeCAA = 257 // https://tools.ietf.org/html/rfc6891#section-6.1.1 RRTypeOPT = 41 @@ -80,6 +81,8 @@ func ParseRecordType(s string) (uint16, error) { return RRTypeTXT, nil case "cname": return RRTypeCNAME, nil + case "null": + return RRTypeNULL, nil case "a": return RRTypeA, nil case "aaaa": @@ -90,8 +93,10 @@ func ParseRecordType(s string) (uint16, error) { return RRTypeNS, nil case "srv": return RRTypeSRV, nil + case "caa": + return RRTypeCAA, nil default: - return 0, fmt.Errorf("unknown record type %q: must be one of: txt, cname, a, aaaa, mx, ns, srv", s) + return 0, fmt.Errorf("unknown record type %q: must be one of: txt, cname, null, a, aaaa, mx, ns, srv, caa", s) } } @@ -694,6 +699,41 @@ func EncodeRDataTXT(p []byte) []byte { return buf.Bytes() } +// DecodeRDataNULL decodes NULL RDATA as a raw byte slice. +// https://tools.ietf.org/html/rfc1035#section-3.3.10 +func DecodeRDataNULL(p []byte) ([]byte, error) { return p, nil } + +// EncodeRDataNULL encodes a slice of bytes as NULL RDATA. +// https://tools.ietf.org/html/rfc1035#section-3.3.10 +func EncodeRDataNULL(p []byte) []byte { return p } + +// DecodeRDataCAA decodes CAA RDATA and returns the value portion. +// https://datatracker.ietf.org/doc/html/rfc8659 +func DecodeRDataCAA(p []byte) ([]byte, error) { + if len(p) < 2 { + return nil, io.ErrUnexpectedEOF + } + tagLen := int(p[1]) + p = p[2:] + if len(p) < tagLen { + return nil, io.ErrUnexpectedEOF + } + return p[tagLen:], nil +} + +// EncodeRDataCAA encodes a slice of bytes as CAA RDATA using a fixed +// "issue" tag so the payload lives entirely in the value portion. +// https://datatracker.ietf.org/doc/html/rfc8659 +func EncodeRDataCAA(p []byte) []byte { + const tag = "issue" + rdata := make([]byte, 2+len(tag)+len(p)) + // rdata[0] = 0 (flags; bit 7 is the "critical" flag per RFC 8659 §4.1) + rdata[1] = byte(len(tag)) + copy(rdata[2:], tag) + copy(rdata[2+len(tag):], p) + return rdata +} + // base32Encoding is a base32 encoding without padding, used for CNAME RDATA. var base32Encoding = base32.StdEncoding.WithPadding(base32.NoPadding) diff --git a/dns/dns_test.go b/dns/dns_test.go index 4c90b9e..59fe22f 100644 --- a/dns/dns_test.go +++ b/dns/dns_test.go @@ -805,6 +805,102 @@ func TestEncodeDecodeRDataAAAA(t *testing.T) { } } +func TestEncodeDecodeRDataNULL(t *testing.T) { + for _, p := range [][]byte{ + {}, + {0x00}, + {0x01, 0x02, 0x03}, + bytes.Repeat([]byte{0xab}, 100), + bytes.Repeat([]byte{0xff}, 1000), + } { + rdata := EncodeRDataNULL(p) + decoded, err := DecodeRDataNULL(rdata) + if err != nil { + t.Errorf("DecodeRDataNULL(%x): %v", rdata, err) + continue + } + if !bytes.Equal(decoded, p) { + t.Errorf("NULL round-trip failed for len=%d: got len=%d", len(p), len(decoded)) + } + } +} + +func TestRDataNULLIdentity(t *testing.T) { + // NULL encode/decode should be identity — no framing overhead. + p := []byte{0x01, 0x02, 0x03} + if !bytes.Equal(EncodeRDataNULL(p), p) { + t.Error("EncodeRDataNULL should return input unchanged") + } + decoded, _ := DecodeRDataNULL(p) + if !bytes.Equal(decoded, p) { + t.Error("DecodeRDataNULL should return input unchanged") + } +} + +func TestDecodeRDataCAA(t *testing.T) { + for _, test := range []struct { + desc string + p []byte + decoded []byte + err error + }{ + {"empty input", []byte{}, nil, io.ErrUnexpectedEOF}, + {"single byte", []byte{0x00}, nil, io.ErrUnexpectedEOF}, + {"tag length exceeds data", []byte{0x00, 0x05, 'a'}, nil, io.ErrUnexpectedEOF}, + {"tag only, no value", []byte{0x00, 0x05, 'i', 's', 's', 'u', 'e'}, []byte{}, nil}, + {"tag + value", []byte{0x00, 0x05, 'i', 's', 's', 'u', 'e', 0xaa, 0xbb}, []byte{0xaa, 0xbb}, nil}, + {"zero-length tag", []byte{0x00, 0x00, 0x01, 0x02}, []byte{0x01, 0x02}, nil}, + {"flags byte ignored", []byte{0x80, 0x05, 'i', 's', 's', 'u', 'e', 0xff}, []byte{0xff}, nil}, + } { + decoded, err := DecodeRDataCAA(test.p) + if err != test.err { + t.Errorf("%s: got err %v, want %v", test.desc, err, test.err) + continue + } + if err == nil && !bytes.Equal(decoded, test.decoded) { + t.Errorf("%s: got %x, want %x", test.desc, decoded, test.decoded) + } + } +} + +func TestEncodeRDataCAA(t *testing.T) { + p := []byte{0x01, 0x02, 0x03} + rdata := EncodeRDataCAA(p) + // Expected: flags(0) + tagLen(5) + "issue" + payload + expected := append([]byte{0x00, 0x05, 'i', 's', 's', 'u', 'e'}, p...) + if !bytes.Equal(rdata, expected) { + t.Errorf("EncodeRDataCAA(%x) = %x, want %x", p, rdata, expected) + } +} + +func TestEncodeRDataCAAEmpty(t *testing.T) { + rdata := EncodeRDataCAA([]byte{}) + // Even with empty payload, should have flags + tagLen + tag. + if len(rdata) != 7 { + t.Errorf("EncodeRDataCAA(empty) length = %d, want 7", len(rdata)) + } +} + +func TestRDataCAARoundTrip(t *testing.T) { + for _, p := range [][]byte{ + {}, + {0x00}, + {0x01, 0x02, 0x03}, + bytes.Repeat([]byte{0xab}, 100), + bytes.Repeat([]byte{0xff}, 1000), + } { + rdata := EncodeRDataCAA(p) + decoded, err := DecodeRDataCAA(rdata) + if err != nil { + t.Errorf("CAA round-trip decode error for len=%d: %v", len(p), err) + continue + } + if !bytes.Equal(decoded, p) { + t.Errorf("CAA round-trip failed for len=%d: got len=%d", len(p), len(decoded)) + } + } +} + func TestReadRRMXCompression(t *testing.T) { // DNS message with MX answer using compression pointer in exchange name. msg := []byte{ diff --git a/docs/client-library.md b/docs/client-library.md index cc1f84d..2989632 100644 --- a/docs/client-library.md +++ b/docs/client-library.md @@ -47,6 +47,21 @@ t.ListenAndServe("127.0.0.1:7000") // blocks, handles reconnection `ListenAndServe` opens a local TCP listener, creates tunnel sessions with automatic reconnection on failure, and forwards connections through the tunnel. +### Multi-resolver API + +Spread DNS queries across multiple resolvers for throughput and resilience: + +```go +r1, _ := client.NewResolver(client.ResolverTypeUDP, "8.8.8.8:53") +r2, _ := client.NewResolver(client.ResolverTypeDOH, "https://1.1.1.1/dns-query") +ts, _ := client.NewTunnelServer("t.example.com", "pubkey-hex") +t, _ := client.NewTunnelMulti([]client.Resolver{r1, r2}, ts) + +t.ListenAndServe("127.0.0.1:7000") +``` + +`NewTunnelMulti` accepts a slice of resolvers. The client multiplexes queries across them with round-robin selection and health-based routing. Per-resolver forged-response tracking logs which resolver is being targeted by DNS injection. + ## Key types | Type | Description | @@ -71,9 +86,12 @@ All configuration is done through struct fields before calling `Initiate*` or `L ```go // Resolver options r.UTLSClientHelloID = &utls.ClientHelloID{...} // TLS fingerprint +r.RoundTripper = customTransport // custom HTTP transport for DoH (overrides UTLSClientHelloID) +r.DialerControl = controlFunc // socket options callback (SO_MARK, SO_BINDTODEVICE, etc.) r.UDPWorkers = 200 // concurrent UDP workers -r.UDPSharedSocket = true // single socket mode -r.UDPTimeout = 500 * time.Millisecond // per-query timeout +r.UDPSharedSocket = true // single socket mode +r.UDPTimeout = 500 * time.Millisecond // per-query timeout +r.UDPAcceptErrors = true // accept non-NOERROR responses (disables forged filtering) // Tunnel server options ts.DnsttCompat = true // original dnstt wire format @@ -81,12 +99,14 @@ ts.ClientIDSize = 1 // smaller ClientID ts.MaxQnameLen = 101 // QNAME length constraint ts.MaxNumLabels = 2 // label count constraint ts.RPS = 200 // rate limit queries/second +ts.RecordType = "cname" // DNS record type for downstream data: txt, null, cname, a, aaaa, mx, ns, srv, caa (default: "txt") // Session options -t.IdleTimeout = 60 * time.Second -t.KeepAlive = 10 * time.Second +t.IdleTimeout = 10 * time.Second +t.KeepAlive = 2 * time.Second +t.OpenStreamTimeout = 10 * time.Second t.MaxStreams = 256 -t.SessionCheckInterval = 20 * time.Second +t.SessionCheckInterval = 500 * time.Millisecond t.ReconnectMinDelay = 1 * time.Second t.ReconnectMaxDelay = 30 * time.Second t.HandshakeTimeout = 15 * time.Second diff --git a/e2e/multi-resolver-dnstt-compat/docker-compose.yml b/e2e/multi-resolver-dnstt-compat/docker-compose.yml new file mode 100644 index 0000000..b492929 --- /dev/null +++ b/e2e/multi-resolver-dnstt-compat/docker-compose.yml @@ -0,0 +1,81 @@ +networks: + dns-net: + ipam: + config: + - subnet: 172.28.0.0/24 + backend-net: + +volumes: + keys: + +services: + keygen: + build: + context: ../.. + dockerfile: Dockerfile + volumes: + - keys:/keys + command: > + sh -c "vaydns-server -gen-key -privkey-file /keys/server.key -pubkey-file /keys/server.pub" + + dns1: + image: coredns/coredns + networks: + dns-net: + ipv4_address: 172.28.0.10 + volumes: + - ../Corefile:/Corefile + command: ["-conf", "/Corefile"] + + dns2: + image: coredns/coredns + networks: + dns-net: + ipv4_address: 172.28.0.11 + volumes: + - ../Corefile:/Corefile + command: ["-conf", "/Corefile"] + + backend: + image: nginx:alpine + networks: + - backend-net + + server: + build: + context: ../.. + dockerfile: Dockerfile + networks: + dns-net: + ipv4_address: 172.28.0.20 + backend-net: + volumes: + - keys:/keys + command: > + vaydns-server -udp :53 -privkey-file /keys/server.key + -domain t.example.com -upstream backend:80 + -dnstt-compat + depends_on: + keygen: + condition: service_completed_successfully + dns1: + condition: service_started + dns2: + condition: service_started + + client: + build: + context: ../.. + dockerfile: Dockerfile + networks: + - dns-net + volumes: + - keys:/keys + command: > + vaydns-client -udp 172.28.0.10:53 -udp 172.28.0.11:53 + -pubkey-file /keys/server.pub + -domain t.example.com -listen 0.0.0.0:7000 + -dnstt-compat + depends_on: + server: + condition: service_started diff --git a/e2e/multi-resolver-dnstt-compat/run.sh b/e2e/multi-resolver-dnstt-compat/run.sh new file mode 100755 index 0000000..66618c6 --- /dev/null +++ b/e2e/multi-resolver-dnstt-compat/run.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# Test: multi-resolver with dnstt-compat wire format. +# Verifies that -dnstt-compat (8-byte ClientID, padding prefixes, forced TXT, +# longer timeouts) works correctly when combined with multiple UDP resolvers. +# The wire format sits above the MultiResolver transport layer so there should +# be no interaction, but this test guards against regressions. +set -euo pipefail +cd "$(dirname "$0")" + +cleanup() { docker compose down -v 2>/dev/null; } +trap cleanup EXIT + +echo "--- Building and starting services ---" +docker compose up -d --build + +# dnstt-compat uses longer timeouts (2m idle, 10s keepalive) and the tunnel +# handshake may be slower due to the larger wire overhead, so allow more time. +echo "--- Waiting for tunnel (up to 45s) ---" +for i in $(seq 1 45); do + if docker compose exec -T client wget -q -O- http://localhost:7000 2>/dev/null | grep -q "Welcome to nginx"; then + echo "" + echo "=== PASS ===" + exit 0 + fi + printf "." + sleep 1 +done + +echo "" +echo "--- Tunnel did not come up. Dumping logs ---" +docker compose logs client server dns1 dns2 +echo "=== FAIL ===" +exit 1 diff --git a/e2e/multi-resolver-forge/Corefile.forge b/e2e/multi-resolver-forge/Corefile.forge new file mode 100644 index 0000000..224cbc9 --- /dev/null +++ b/e2e/multi-resolver-forge/Corefile.forge @@ -0,0 +1,6 @@ +. { + template IN ANY . { + rcode NXDOMAIN + } + log +} diff --git a/e2e/multi-resolver-forge/docker-compose.yml b/e2e/multi-resolver-forge/docker-compose.yml new file mode 100644 index 0000000..b5be9f2 --- /dev/null +++ b/e2e/multi-resolver-forge/docker-compose.yml @@ -0,0 +1,82 @@ +networks: + dns-net: + ipam: + config: + - subnet: 172.28.0.0/24 + backend-net: + +volumes: + keys: + +services: + keygen: + build: + context: ../.. + dockerfile: Dockerfile + volumes: + - keys:/keys + command: > + sh -c "vaydns-server -gen-key -privkey-file /keys/server.key -pubkey-file /keys/server.pub" + + dns-good: + image: coredns/coredns + networks: + dns-net: + ipv4_address: 172.28.0.10 + volumes: + - ../Corefile:/Corefile + command: ["-conf", "/Corefile"] + + dns-forge: + image: coredns/coredns + networks: + dns-net: + ipv4_address: 172.28.0.11 + volumes: + - ./Corefile.forge:/Corefile + command: ["-conf", "/Corefile"] + + backend: + image: nginx:alpine + networks: + - backend-net + + server: + build: + context: ../.. + dockerfile: Dockerfile + networks: + dns-net: + ipv4_address: 172.28.0.20 + backend-net: + volumes: + - keys:/keys + command: > + vaydns-server -udp :53 -privkey-file /keys/server.key + -domain t.example.com -upstream backend:80 + -idle-timeout 10s -keepalive 2s + depends_on: + keygen: + condition: service_completed_successfully + dns-good: + condition: service_started + dns-forge: + condition: service_started + + client: + build: + context: ../.. + dockerfile: Dockerfile + networks: + - dns-net + volumes: + - keys:/keys + command: > + vaydns-client -udp 172.28.0.10:53 -udp 172.28.0.11:53 + -pubkey-file /keys/server.pub + -domain t.example.com -listen 0.0.0.0:7000 + -idle-timeout 10s -keepalive 2s -session-check-interval 500ms + -reconnect-min 1s -reconnect-max 5s -log-level info + depends_on: + server: + condition: service_started diff --git a/e2e/multi-resolver-forge/run.sh b/e2e/multi-resolver-forge/run.sh new file mode 100755 index 0000000..2ee38ca --- /dev/null +++ b/e2e/multi-resolver-forge/run.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +# Test: one resolver returns forged responses while another works. +# dns-forge always replies with NXDOMAIN (CoreDNS template plugin), simulating +# a censor or broken resolver injecting fake responses. dns-good behaves +# normally. The tunnel must work through dns-good, with the per-query UDP +# worker's forged-response filter absorbing dns-forge's NXDOMAINs and +# MultiResolver's health state machine eventually routing around it. +set -euo pipefail +cd "$(dirname "$0")" + +cleanup() { docker compose down -v 2>/dev/null; } +trap cleanup EXIT + +fetch() { + docker compose exec -T client wget -q -O- http://localhost:7000 2>/dev/null | grep -q "Welcome to nginx" +} + +echo "--- Building and starting services ---" +docker compose up -d --build + +echo "--- Waiting for tunnel through dns-good while dns-forge injects NXDOMAINs (up to 60s) ---" +ok_count=0 +for i in $(seq 1 60); do + if fetch; then + ok_count=$((ok_count + 1)) + # Require two consecutive successes so one lucky query doesn't pass + # the test while the forging resolver is still in rotation. + if [ "$ok_count" -ge 2 ]; then + echo "" + # Sanity check: make sure dns-forge actually saw queries, so we + # know the tunnel was exercising the forging code path and didn't + # only hit dns-good by chance. CoreDNS's log plugin may buffer + # output briefly, so give it a moment to flush before grepping. + sleep 2 + forge_logs=$(docker compose logs dns-forge 2>&1) + if ! grep -q 'NXDOMAIN' <<<"$forge_logs"; then + echo "--- dns-forge never served NXDOMAIN; the forging code path may not have been exercised ---" + echo "$forge_logs" + echo "=== FAIL (forging path not exercised) ===" + exit 1 + fi + nxdomain_count=$(grep -c 'NXDOMAIN' <<<"$forge_logs" || true) + echo "--- dns-forge served $nxdomain_count NXDOMAIN responses; forging path exercised ---" + + # Verify the client-side milestone log fired with per-resolver + # attribution: "forged DNS responses from : ..." + # This proves the ForgedStats plumbing carries the resolver + # label end-to-end and that milestone thresholds fire correctly + # in multi-resolver mode. + client_logs=$(docker compose logs client 2>&1) + if ! grep -q 'forged DNS responses from 172.28.0.11' <<<"$client_logs"; then + echo "--- Client did not log labeled forged milestone for dns-forge (172.28.0.11) ---" + echo "$client_logs" | tail -30 + echo "=== FAIL (missing per-resolver forged milestone log) ===" + exit 1 + fi + milestone_line=$(grep 'forged DNS responses from 172.28.0.11' <<<"$client_logs" | tail -1) + echo "--- Client milestone: $milestone_line ---" + echo "--- Tunnel delivers consistent responses despite forged NXDOMAINs ---" + echo "=== PASS ===" + exit 0 + fi + else + ok_count=0 + fi + printf "." + sleep 1 +done + +echo "" +echo "--- Tunnel did not come up through dns-good ---" +docker compose logs client server dns-good dns-forge +echo "=== FAIL ===" +exit 1 diff --git a/e2e/multi-resolver-health-detection/docker-compose.yml b/e2e/multi-resolver-health-detection/docker-compose.yml new file mode 100644 index 0000000..9a3f25c --- /dev/null +++ b/e2e/multi-resolver-health-detection/docker-compose.yml @@ -0,0 +1,83 @@ +networks: + dns-net: + ipam: + config: + - subnet: 172.28.0.0/24 + backend-net: + +volumes: + keys: + +services: + keygen: + build: + context: ../.. + dockerfile: Dockerfile + volumes: + - keys:/keys + command: > + sh -c "vaydns-server -gen-key -privkey-file /keys/server.key -pubkey-file /keys/server.pub" + + dns-good: + image: coredns/coredns + networks: + dns-net: + ipv4_address: 172.28.0.10 + volumes: + - ../Corefile:/Corefile + command: ["-conf", "/Corefile"] + + dns-forge: + image: coredns/coredns + networks: + dns-net: + ipv4_address: 172.28.0.11 + volumes: + - ../multi-resolver-forge/Corefile.forge:/Corefile + command: ["-conf", "/Corefile"] + + backend: + image: nginx:alpine + networks: + - backend-net + + server: + build: + context: ../.. + dockerfile: Dockerfile + networks: + dns-net: + ipv4_address: 172.28.0.20 + backend-net: + volumes: + - keys:/keys + command: > + vaydns-server -udp :53 -privkey-file /keys/server.key + -domain t.example.com -upstream backend:80 + -idle-timeout 10s -keepalive 2s + depends_on: + keygen: + condition: service_completed_successfully + dns-good: + condition: service_started + dns-forge: + condition: service_started + + client: + build: + context: ../.. + dockerfile: Dockerfile + networks: + - dns-net + volumes: + - keys:/keys + command: > + vaydns-client -udp 172.28.0.10:53 -udp 172.28.0.11:53 + -udp-shared-socket + -pubkey-file /keys/server.pub + -domain t.example.com -listen 0.0.0.0:7000 + -idle-timeout 10s -keepalive 2s -session-check-interval 500ms + -reconnect-min 1s -reconnect-max 5s -log-level info + depends_on: + server: + condition: service_started diff --git a/e2e/multi-resolver-health-detection/run.sh b/e2e/multi-resolver-health-detection/run.sh new file mode 100755 index 0000000..b2162b4 --- /dev/null +++ b/e2e/multi-resolver-health-detection/run.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash +# Test: health state machine detects and excludes a forging resolver. +# +# dns-forge returns NXDOMAIN for every query. The client uses +# -udp-shared-socket so there is no per-query UDP worker filtering — +# forged NXDOMAIN responses reach evaluateIncoming directly, which is +# the code path affected by the per-response decay bug. +# +# With per-query UDP (the default), the UDP worker absorbs forged +# responses and detection happens through the pending-timeout path +# instead. That path works at high query rates even without the decay fix, +# so it cannot demonstrate the bug. Shared-socket mode is required to +# exercise the evaluateIncoming accumulation path. +# +# The health state machine decays penalty counters only on the 1/sec +# health tick, not per-response. This allows invalidCount to grow at +# the response rate (~several/sec) and cross the detection threshold +# within seconds. dns-forge should be marked Down and excluded from +# round-robin. During the measurement burst, dns-forge should receive +# only occasional probe queries, not 50% of all traffic. +set -euo pipefail +cd "$(dirname "$0")" + +cleanup() { docker compose down -v 2>/dev/null; } +trap cleanup EXIT + +fetch() { + docker compose exec -T client wget -q -O /dev/null http://localhost:7000 2>/dev/null +} + +query_count() { + # Count the number of DNS queries logged by a CoreDNS service. + docker compose logs "$1" 2>&1 | grep -c 't\.example\.com' || echo 0 +} + +echo "--- Building and starting services ---" +docker compose up -d --build + +echo "--- Waiting for tunnel (up to 30s) ---" +for i in $(seq 1 30); do + if fetch; then + echo "" + echo "--- Tunnel is up ---" + break + fi + if [ "$i" -eq 30 ]; then + echo "" + docker compose logs client server dns-good dns-forge + echo "=== FAIL (tunnel not ready) ===" + exit 1 + fi + printf "." + sleep 1 +done + +# Phase 1: let the health state machine observe the forging resolver. +# The pending-response timeout is 5s and the detection threshold is 8 +# timeouts / 5 invalids, so ~15s should be enough for the health machine +# to mark dns-forge as Down. +echo "--- Waiting 20s for health machine to observe dns-forge ---" +# Drive some traffic during the detection window so pending IDs accumulate. +for i in $(seq 1 20); do + fetch || true + sleep 1 +done + +# Phase 2: snapshot dns-forge query count, then drive a traffic burst +# and measure how many NEW queries dns-forge receives. +count_before=$(query_count dns-forge) +echo "--- dns-forge query count before burst: $count_before ---" + +echo "--- Driving 10 HTTP fetches through tunnel ---" +for i in $(seq 1 10); do + fetch || true + sleep 1 +done + +count_after=$(query_count dns-forge) +delta=$((count_after - count_before)) +echo "--- dns-forge query count after burst: $count_after (delta: $delta) ---" + +# Also check dns-good for comparison. +good_before_ignored=0 # not tracked, but log the total for debugging +good_total=$(query_count dns-good) +echo "--- dns-good total queries: $good_total ---" + +# dns-forge should receive very few queries during the burst (only probe +# queries at ~1/sec = ~10 max), not ~50% of all traffic. +max_allowed=15 +if [ "$delta" -gt "$max_allowed" ]; then + echo "--- dns-forge received $delta queries during the burst (max allowed: $max_allowed) ---" + echo "--- Health machine did not route away from the forging resolver (decay bug) ---" + docker compose logs client | tail -20 + echo "=== FAIL ===" + exit 1 +fi + +echo "--- dns-forge received only $delta queries during burst (≤$max_allowed); health machine routed away ---" +echo "=== PASS ===" diff --git a/e2e/multi-resolver-runtime-failure/docker-compose.yml b/e2e/multi-resolver-runtime-failure/docker-compose.yml new file mode 100644 index 0000000..d456182 --- /dev/null +++ b/e2e/multi-resolver-runtime-failure/docker-compose.yml @@ -0,0 +1,82 @@ +networks: + dns-net: + ipam: + config: + - subnet: 172.28.0.0/24 + backend-net: + +volumes: + keys: + +services: + keygen: + build: + context: ../.. + dockerfile: Dockerfile + volumes: + - keys:/keys + command: > + sh -c "vaydns-server -gen-key -privkey-file /keys/server.key -pubkey-file /keys/server.pub" + + dns1: + image: coredns/coredns + networks: + dns-net: + ipv4_address: 172.28.0.10 + volumes: + - ../Corefile:/Corefile + command: ["-conf", "/Corefile"] + + dns2: + image: coredns/coredns + networks: + dns-net: + ipv4_address: 172.28.0.11 + volumes: + - ../Corefile:/Corefile + command: ["-conf", "/Corefile"] + + backend: + image: nginx:alpine + networks: + - backend-net + + server: + build: + context: ../.. + dockerfile: Dockerfile + networks: + dns-net: + ipv4_address: 172.28.0.20 + backend-net: + volumes: + - keys:/keys + command: > + vaydns-server -udp :53 -privkey-file /keys/server.key + -domain t.example.com -upstream backend:80 + -idle-timeout 10s -keepalive 2s + depends_on: + keygen: + condition: service_completed_successfully + dns1: + condition: service_started + dns2: + condition: service_started + + client: + build: + context: ../.. + dockerfile: Dockerfile + networks: + - dns-net + volumes: + - keys:/keys + command: > + vaydns-client -udp 172.28.0.10:53 -udp 172.28.0.11:53 + -pubkey-file /keys/server.pub + -domain t.example.com -listen 0.0.0.0:7000 + -idle-timeout 10s -keepalive 2s -session-check-interval 500ms + -reconnect-min 1s -reconnect-max 5s -log-level info + depends_on: + server: + condition: service_started diff --git a/e2e/multi-resolver-runtime-failure/run.sh b/e2e/multi-resolver-runtime-failure/run.sh new file mode 100755 index 0000000..a863b79 --- /dev/null +++ b/e2e/multi-resolver-runtime-failure/run.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash +# Test: one of several UDP resolvers dies mid-flight. +# Start the tunnel with two working DNS resolvers, verify HTTP traffic works, +# kill one resolver, and verify traffic continues through the remaining one. +# +# A failing UDP resolver does not surface an error to MultiResolver (the +# per-query worker retries silently), so this test validates that the health +# state machine + round-robin selection routes around it without tearing +# down the tunnel session. +set -euo pipefail +cd "$(dirname "$0")" + +cleanup() { docker compose down -v 2>/dev/null; } +trap cleanup EXIT + +fetch() { + docker compose exec -T client wget -q -O- http://localhost:7000 2>/dev/null | grep -q "Welcome to nginx" +} + +echo "--- Building and starting services ---" +docker compose up -d --build + +echo "--- Waiting for initial tunnel (up to 30s) ---" +for i in $(seq 1 30); do + if fetch; then + echo "" + echo "--- Initial tunnel is up ---" + break + fi + if [ "$i" -eq 30 ]; then + echo "" + docker compose logs client server dns1 dns2 + echo "=== FAIL (initial tunnel not ready) ===" + exit 1 + fi + printf "." + sleep 1 +done + +# Snapshot the session id before the kill, to detect any reconnect later. +pre_kill_sessions=$(docker compose logs client 2>&1 | grep -c 'session .* ready' || true) + +echo "--- Killing dns1 (half the queries will start dropping) ---" +docker compose kill dns1 + +# Give the client a moment to notice and start routing around dns1. +sleep 3 + +echo "--- Verifying tunnel still works through dns2 (up to 45s) ---" +ok_count=0 +for i in $(seq 1 45); do + if fetch; then + ok_count=$((ok_count + 1)) + # Require two consecutive successes so we don't declare victory on + # a lucky query that happened to go to dns2. + if [ "$ok_count" -ge 2 ]; then + post_kill_sessions=$(docker compose logs client 2>&1 | grep -c 'session .* ready' || true) + new_sessions=$((post_kill_sessions - pre_kill_sessions)) + if [ "$new_sessions" -gt 0 ]; then + echo "" + echo "--- Tunnel recovered but triggered $new_sessions new session(s) — resolver failure should be isolated without a full reconnect ---" + docker compose logs client | tail -30 + echo "=== FAIL (session was rebuilt instead of isolated) ===" + exit 1 + fi + echo "" + echo "--- Tunnel survived with 0 new sessions (single-entry failure isolated) ---" + echo "=== PASS ===" + exit 0 + fi + else + ok_count=0 + fi + printf "." + sleep 1 +done + +echo "" +echo "--- Tunnel did not survive dns1 kill ---" +docker compose logs client server dns1 dns2 +echo "=== FAIL ===" +exit 1 diff --git a/e2e/multi-resolver/docker-compose.yml b/e2e/multi-resolver/docker-compose.yml new file mode 100644 index 0000000..36f9222 --- /dev/null +++ b/e2e/multi-resolver/docker-compose.yml @@ -0,0 +1,82 @@ +networks: + dns-net: + ipam: + config: + - subnet: 172.28.0.0/24 + backend-net: + +volumes: + keys: + +services: + keygen: + build: + context: ../.. + dockerfile: Dockerfile + volumes: + - keys:/keys + command: > + sh -c "vaydns-server -gen-key -privkey-file /keys/server.key -pubkey-file /keys/server.pub" + + dns1: + image: coredns/coredns + networks: + dns-net: + ipv4_address: 172.28.0.10 + volumes: + - ../Corefile:/Corefile + command: ["-conf", "/Corefile"] + + dns2: + image: coredns/coredns + networks: + dns-net: + ipv4_address: 172.28.0.11 + volumes: + - ../Corefile:/Corefile + command: ["-conf", "/Corefile"] + + backend: + image: nginx:alpine + networks: + - backend-net + + server: + build: + context: ../.. + dockerfile: Dockerfile + networks: + dns-net: + ipv4_address: 172.28.0.20 + backend-net: + volumes: + - keys:/keys + command: > + vaydns-server -udp :53 -privkey-file /keys/server.key + -domain t.example.com -upstream backend:80 + -idle-timeout 10s -keepalive 2s + depends_on: + keygen: + condition: service_completed_successfully + dns1: + condition: service_started + dns2: + condition: service_started + + client: + build: + context: ../.. + dockerfile: Dockerfile + networks: + - dns-net + volumes: + - keys:/keys + command: > + vaydns-client -udp 172.28.0.10:53 -udp 172.28.0.11:53 + -pubkey-file /keys/server.pub + -domain t.example.com -listen 0.0.0.0:7000 + -idle-timeout 10s -keepalive 2s -session-check-interval 500ms + -reconnect-min 1s -reconnect-max 5s + depends_on: + server: + condition: service_started diff --git a/e2e/multi-resolver/run.sh b/e2e/multi-resolver/run.sh new file mode 100755 index 0000000..c64921d --- /dev/null +++ b/e2e/multi-resolver/run.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# Test: multi-resolver smoke test. +# Verifies that vaydns-client works when configured with two UDP resolvers, +# and that an HTTP request through the tunnel succeeds. +set -euo pipefail +cd "$(dirname "$0")" + +cleanup() { docker compose down -v 2>/dev/null; } +trap cleanup EXIT + +echo "--- Building and starting services ---" +docker compose up -d --build + +echo "--- Waiting for tunnel (up to 30s) ---" +for i in $(seq 1 30); do + if docker compose exec -T client wget -q -O- http://localhost:7000 2>/dev/null | grep -q "Welcome to nginx"; then + echo "" + echo "=== PASS ===" + exit 0 + fi + printf "." + sleep 1 +done + +echo "" +echo "--- Tunnel did not come up. Dumping logs ---" +docker compose logs client server dns1 dns2 +echo "=== FAIL ===" +exit 1 diff --git a/e2e/run-test.sh b/e2e/run-test.sh index b07c0b7..70eaf89 100755 --- a/e2e/run-test.sh +++ b/e2e/run-test.sh @@ -20,7 +20,7 @@ for rt in txt cname a aaaa mx ns srv; do fi done -for test_dir in socks-download recovery transport-recovery; do +for test_dir in socks-download recovery transport-recovery multi-resolver multi-resolver-runtime-failure multi-resolver-forge multi-resolver-health-detection multi-resolver-dnstt-compat; do total=$((total + 1)) echo "" echo "========================================" diff --git a/man/vaydns-client.1 b/man/vaydns-client.1 index c277d78..0b92691 100644 --- a/man/vaydns-client.1 +++ b/man/vaydns-client.1 @@ -182,11 +182,6 @@ Must be >= reconnect-min. Default: .Cm 30s . -.It Fl session-check-interval Ar DURATION -How often to check whether the current session is alive. -Default: -.Cm 500ms . - .It Fl rps Ar N Rate limit outgoing DNS queries to .Ar N diff --git a/vaydns-client/main.go b/vaydns-client/main.go index 65bb893..7b4c986 100644 --- a/vaydns-client/main.go +++ b/vaydns-client/main.go @@ -30,6 +30,9 @@ func (s *StringSliceFlag) Set(value string) error { *s = append(*s, value) return nil } + +var version = "dev" + func readKeyFromFile(filename string) ([]byte, error) { f, err := os.Open(filename) if err != nil { @@ -40,6 +43,7 @@ func readKeyFromFile(filename string) ([]byte, error) { } func main() { + var showVersion bool var dohURLs StringSliceFlag var dotAddrs StringSliceFlag var domainArg string @@ -78,7 +82,23 @@ Examples: %[1]s -dot resolver.example:853 -pubkey-file server.pub -domain t.example.com -listen 127.0.0.1:7000 `, os.Args[0]) - flag.PrintDefaults() + flag.CommandLine.VisitAll(func(f *flag.Flag) { + if f.Name == "session-check-interval" { + return + } + fmt.Fprintf(flag.CommandLine.Output(), " -%s", f.Name) + name, usage := flag.UnquoteUsage(f) + if len(name) > 0 { + fmt.Fprintf(flag.CommandLine.Output(), " %s", name) + } + if len(f.DefValue) > 0 { + fmt.Fprintf(flag.CommandLine.Output(), " (default %s)", f.DefValue) + } + if len(usage) > 0 { + fmt.Fprintf(flag.CommandLine.Output(), "\n \t%s", usage) + } + fmt.Fprint(flag.CommandLine.Output(), "\n") + }) labels := make([]string, 0) labels = append(labels, "none") for _, entry := range client.UTLSClientHelloIDMap() { @@ -127,15 +147,21 @@ Known TLS fingerprints for -utls are: flag.BoolVar(&udpAcceptErrors, "udp-accept-errors", false, "accept DNS error responses instead of filtering them (disables censorship evasion)") flag.BoolVar(&compatDnstt, "dnstt-compat", false, "use original dnstt wire format (8-byte ClientID, padding prefixes)") flag.IntVar(&clientIDSize, "clientid-size", 2, "client ID size in bytes (ignored when -dnstt-compat is set)") - flag.StringVar(&recordTypeStr, "record-type", "txt", "DNS record type for downstream data (txt, cname, a, aaaa, mx, ns, srv)") + flag.StringVar(&recordTypeStr, "record-type", "txt", "DNS record type for downstream data (txt, null, cname, a, aaaa, mx, ns, srv, caa)") flag.IntVar(&queueSize, "queue-size", turbotunnel.QueueSize, "packet queue size for transport and DNS layers") flag.IntVar(&kcpWindowSize, "kcp-window-size", 0, "KCP send/receive window size in packets (0 = queue-size/2)") flag.StringVar(&queueOverflowStr, "queue-overflow", string(turbotunnel.DefaultQueueOverflowMode), "queue overflow behavior: drop or block") var logLevel string flag.StringVar(&logLevel, "log-level", "info", "log level (debug, info, warning, error)") + flag.BoolVar(&showVersion, "v", false, "print version and exit") flag.Parse() + if showVersion { + fmt.Println(version) + os.Exit(0) + } + level, err := log.ParseLevel(logLevel) if err != nil { fmt.Fprintf(os.Stderr, "invalid log level: %s\n", logLevel) @@ -212,10 +238,12 @@ Known TLS fingerprints for -utls are: resolvers = append(resolvers, resolver) } for _, dotAddr := range dotAddrs { - resolvers = append(resolvers, client.Resolver{ + resolver := client.Resolver{ ResolverType: client.ResolverTypeDOT, ResolverAddr: dotAddr, - }) + } + resolver.UTLSClientHelloID = utlsClientHelloID + resolvers = append(resolvers, resolver) } for _, udpAddr := range udpAddrs { resolver := client.Resolver{ @@ -361,7 +389,7 @@ Known TLS fingerprints for -utls are: ts.RecordType = recordTypeStr // Build tunnel. - tunnel, err := client.NewTunnel(resolvers, ts) + tunnel, err := client.NewTunnelMulti(resolvers, ts) if err != nil { fmt.Fprintf(os.Stderr, "%v\n", err) os.Exit(1) diff --git a/vaydns-server/main.go b/vaydns-server/main.go index ae8cbca..71e56ec 100644 --- a/vaydns-server/main.go +++ b/vaydns-server/main.go @@ -1,26 +1,26 @@ -// dnstt-server is the server end of a DNS tunnel. +// vaydns-server is the server end of a DNS tunnel. // // Usage: // -// dnstt-server -gen-key [-privkey-file PRIVKEYFILE] [-pubkey-file PUBKEYFILE] -// dnstt-server -udp ADDR [-privkey PRIVKEY|-privkey-file PRIVKEYFILE] [-fallback FALLBACKADDR] -domain DOMAIN -upstream UPSTREAMADDR +// vaydns-server -gen-key [-privkey-file PRIVKEYFILE] [-pubkey-file PUBKEYFILE] +// vaydns-server -udp ADDR [-privkey PRIVKEY|-privkey-file PRIVKEYFILE] [-fallback FALLBACKADDR] -domain DOMAIN -upstream UPSTREAMADDR // // Example: // -// dnstt-server -gen-key -privkey-file server.key -pubkey-file server.pub -// dnstt-server -udp :53 -privkey-file server.key -domain t.example.com -upstream 127.0.0.1:8000 +// vaydns-server -gen-key -privkey-file server.key -pubkey-file server.pub +// vaydns-server -udp :53 -privkey-file server.key -domain t.example.com -upstream 127.0.0.1:8000 // // With fallback for non-DNS traffic: // -// dnstt-server -udp :53 -privkey-file server.key -fallback 127.0.0.1:8888 -domain t.example.com -upstream 127.0.0.1:8000 +// vaydns-server -udp :53 -privkey-file server.key -fallback 127.0.0.1:8888 -domain t.example.com -upstream 127.0.0.1:8000 // // To generate a persistent server private key, first run with the -gen-key // option. By default the generated private and public keys are printed to // standard output. To save them to files instead, use the -privkey-file and // -pubkey-file options. // -// dnstt-server -gen-key -// dnstt-server -gen-key -privkey-file server.key -pubkey-file server.pub +// vaydns-server -gen-key +// vaydns-server -gen-key -privkey-file server.key -pubkey-file server.pub // // You can give the server's private key as a file or as a hex string. // @@ -73,8 +73,8 @@ import ( ) const ( - defaultIdleTimeout = 60 * time.Second - defaultKeepAlive = 10 * time.Second + defaultIdleTimeout = 10 * time.Second + defaultKeepAlive = 2 * time.Second // Bound the pre-smux handshake so half-open KCP sessions cannot linger // indefinitely and consume server resources. defaultHandshakeTimeout = 15 * time.Second @@ -421,6 +421,11 @@ func nextPacketDnstt(r *bytes.Reader) ([]byte, error) { // this query. If the returned dns.Message has an Rcode() of dns.RcodeNoError, // the message is a candidate for for carrying downstream data in a TXT record. func responseFor(query *dns.Message, domain dns.Name) (*dns.Message, []byte) { + responsePayloadSize := uint16(maxUDPPayload) + if int(responsePayloadSize) != maxUDPPayload { + responsePayloadSize = 0xffff + } + resp := &dns.Message{ ID: query.ID, Flags: 0x8000, // QR = 1, RCODE = no error @@ -455,7 +460,7 @@ func responseFor(query *dns.Message, domain dns.Name) (*dns.Message, []byte) { resp.Additional = append(resp.Additional, dns.RR{ Name: dns.Name{}, Type: dns.RRTypeOPT, - Class: 4096, // responder's UDP payload size + Class: responsePayloadSize, // responder's UDP payload size TTL: 0, Data: []byte{}, }) @@ -778,6 +783,10 @@ func encodeResponsePayload(rec *record, data []byte, domain dns.Name) error { Data: chunk, } } + case dns.RRTypeNULL: + rec.Resp.Answer[0].Data = dns.EncodeRDataNULL(data) + case dns.RRTypeCAA: + rec.Resp.Answer[0].Data = dns.EncodeRDataCAA(data) case dns.RRTypeCNAME: rdata, err := dns.EncodeRDataCNAME(data, domain) if err != nil { @@ -941,15 +950,15 @@ func sendLoop(dnsConn net.PacketConn, ttConn *turbotunnel.QueuePacketConn, ch <- return nil } -// computeMaxEncodedPayload computes the maximum amount of downstream TXT RR -// data that keep the overall response size less than maxUDPPayload, in the +// computeMaxEncodedPayload computes the maximum amount of downstream single-RR +// payload that keeps the overall response size less than maxUDPPayload, in the // worst case when the response answers a query that has a maximum-length name // in its Question section. Returns 0 in the case that no amount of data makes // the overall response size small enough. // // This function needs to be kept in sync with sendLoop with regard to how it // builds candidate responses. -func computeMaxEncodedPayload(limit int) int { +func computeMaxEncodedPayload(limit int, encode func([]byte) []byte) int { // 64+64+64+62 octets, needs to be base32-decodable. maxLengthName, err := dns.NewName([][]byte{ []byte("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"), @@ -1014,7 +1023,7 @@ func computeMaxEncodedPayload(limit int) int { high := 32768 for low+1 < high { mid := (low + high) / 2 - resp.Answer[0].Data = dns.EncodeRDataTXT(make([]byte, mid)) + resp.Answer[0].Data = encode(make([]byte, mid)) buf, err := resp.WireFormat() if err != nil { panic(err) @@ -1136,8 +1145,12 @@ func run(privkey []byte, domain dns.Name, upstream string, dnsConn net.PacketCon maxEncodedPayload = computeMaxEncodedPayloadMultiRR(maxUDPPayload, 4) case dns.RRTypeAAAA: maxEncodedPayload = computeMaxEncodedPayloadMultiRR(maxUDPPayload, 16) + case dns.RRTypeNULL: + maxEncodedPayload = computeMaxEncodedPayload(maxUDPPayload, dns.EncodeRDataNULL) + case dns.RRTypeCAA: + maxEncodedPayload = computeMaxEncodedPayload(maxUDPPayload, dns.EncodeRDataCAA) default: - maxEncodedPayload = computeMaxEncodedPayload(maxUDPPayload) + maxEncodedPayload = computeMaxEncodedPayload(maxUDPPayload, dns.EncodeRDataTXT) } // 2 bytes accounts for a packet length prefix. mtu := maxEncodedPayload - 2 @@ -1194,7 +1207,10 @@ func run(privkey []byte, domain dns.Name, upstream string, dnsConn net.PacketCon return recvLoop(domain, dnsConn, ttConn, ch, fallbackMgr, stats, wireConfig) } +var version = "dev" + func main() { + var showVersion bool var genKey bool var domainArg string var upstream string @@ -1243,15 +1259,21 @@ Example: flag.StringVar(&keepAliveStr, "keepalive", defaultKeepAlive.String(), "keepalive ping interval (e.g. 2s, 500ms); must be less than idle-timeout") flag.BoolVar(&compatDnstt, "dnstt-compat", false, "use original dnstt wire format (8-byte ClientID, padding prefixes)") flag.IntVar(&clientIDSize, "clientid-size", 2, "client ID size in bytes (ignored when -dnstt-compat is set)") - flag.StringVar(&recordTypeStr, "record-type", "txt", "DNS record type for downstream data (txt, cname, a, aaaa, mx, ns, srv)") + flag.StringVar(&recordTypeStr, "record-type", "txt", "DNS record type for downstream data (txt, null, cname, a, aaaa, mx, ns, srv, caa)") flag.IntVar(&queueSize, "queue-size", turbotunnel.QueueSize, "packet queue size for DNS tunnel transport") flag.IntVar(&kcpWindowSize, "kcp-window-size", 0, "KCP send/receive window size in packets (0 = queue-size/2)") flag.StringVar(&queueOverflowStr, "queue-overflow", string(turbotunnel.DefaultQueueOverflowMode), "queue overflow behavior: drop or block") var logLevel string flag.StringVar(&logLevel, "log-level", "info", "log level (debug, info, warning, error)") + flag.BoolVar(&showVersion, "v", false, "print version and exit") flag.Parse() + if showVersion { + fmt.Println(version) + os.Exit(0) + } + level, err := log.ParseLevel(logLevel) if err != nil { fmt.Fprintf(os.Stderr, "invalid log level: %s\n", logLevel) @@ -1395,8 +1417,8 @@ Example: fmt.Fprintf(os.Stderr, "-keepalive (%s) must be less than -idle-timeout (%s)\n", keepAlive, idleTimeout) os.Exit(1) } - if queueSize <= 0 { - fmt.Fprintf(os.Stderr, "-queue-size (%d) must be greater than 0\n", queueSize) + if queueSize < 32 { + fmt.Fprintf(os.Stderr, "-queue-size (%d) must be at least 32\n", queueSize) os.Exit(1) } if kcpWindowSize < 0 { @@ -1453,7 +1475,8 @@ Example: } log.Infof("wire config: clientid-size=%d compat=%v", wireConfig.ClientIDSize, wireConfig.Compat) - if recordType != dns.RRTypeTXT { + switch recordType { + case dns.RRTypeCNAME, dns.RRTypeNS, dns.RRTypeMX, dns.RRTypeSRV: explicitFlags := make(map[string]bool) flag.Visit(func(f *flag.Flag) { explicitFlags[f.Name] = true