diff --git a/cmd/ans-dns/main.go b/cmd/ans-dns/main.go index 3ac9ef6..b273976 100644 --- a/cmd/ans-dns/main.go +++ b/cmd/ans-dns/main.go @@ -30,7 +30,6 @@ import ( "flag" "fmt" "io" - "net" "net/http" "os" "os/signal" @@ -373,7 +372,3 @@ func runClear(args []string) error { fmt.Printf("cleared %d records for %s from %s\n", n, agentID, *zonePath) return nil } - -// Ensure net package stays referenced so future edits that thread a -// net.Listener / net.PacketConn through don't require chasing imports. -var _ = net.InterfaceAddrs diff --git a/cmd/ans-dns/main_test.go b/cmd/ans-dns/main_test.go new file mode 100644 index 0000000..18d66c0 --- /dev/null +++ b/cmd/ans-dns/main_test.go @@ -0,0 +1,178 @@ +package main + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/miekg/dns" +) + +// svcbWithKeyNNNNN is the Consolidated Approach SVCB presentation the +// RA's ANS_SVCB profile emits: ServiceMode `1 .` plus alpn, port, the +// well-known suffix as the RFC 9460 §14.3.1 Private Use key65280, and +// the capability digest as key65281. These keyNNNNN forms are what +// makes the value publishable — see the named-form negative case below. +const ( + svcbValueKeyNNNNN = `1 . alpn=a2a port=443 key65280=agent-card.json key65281=CY1lDMbSgN7kwPR0iadc8Xub-7rlMFGAbU4IQQiy_yc` + // svcbValueNamedWK is the pre-fix named form. dns.NewRR rejects it + // (`bad SVCB key`), so answersFor drops it and the server returns no + // answer — the unpublishability that Fix A's no-migration argument + // rests on. ans-dns serving this value is indistinguishable from + // NXDOMAIN to a resolver. + svcbValueNamedWK = `1 . alpn=a2a port=443 wk=agent-card.json` + tlsaValueSel0 = `3 0 1 deadbeefcafe1234` + txtValue = `v=ans1; version=1.0.0; p=a2a; mode=direct; url=https://agent.example.com/a2a` +) + +// TestAnswersFor_ServePathRoundTrip drives the serve path (answersFor) +// directly: each zone record is composed into a presentation line and +// parsed with dns.NewRR exactly as the running server does, so a value +// the parser rejects yields zero answers. Table-driven over the record +// shapes the RA emits post-Fix-A/B2. +func TestAnswersFor_ServePathRoundTrip(t *testing.T) { + const fqdn = "agent.example.com" + + tests := []struct { + name string + record zoneRecord + queryName string + queryType uint16 + wantAnswer bool // true → exactly one RR served + wantInRR string // substring required in the served RR string (when wantAnswer) + }{ + { + name: "svcb_keyNNNNN_parses_and_serves", + record: zoneRecord{Name: fqdn, Type: "SVCB", Value: svcbValueKeyNNNNN, TTL: 3600}, + queryName: fqdn, + queryType: dns.TypeSVCB, + wantAnswer: true, + // dns.NewRR re-renders Private Use SvcParams quoted; pin the + // key numbers survive the round-trip. + wantInRR: `key65280="agent-card.json"`, + }, + { + name: "svcb_keyNNNNN_carries_capability_digest", + record: zoneRecord{Name: fqdn, Type: "SVCB", Value: svcbValueKeyNNNNN, TTL: 3600}, + queryName: fqdn, + queryType: dns.TypeSVCB, + wantAnswer: true, + wantInRR: `key65281="CY1lDMbSgN7kwPR0iadc8Xub-7rlMFGAbU4IQQiy_yc"`, + }, + { + name: "svcb_named_wk_rejected_and_dropped", + record: zoneRecord{Name: fqdn, Type: "SVCB", Value: svcbValueNamedWK, TTL: 3600}, + queryName: fqdn, + queryType: dns.TypeSVCB, + wantAnswer: false, // dns.NewRR("… wk=…") errors → answersFor skips it + }, + { + name: "txt_value_served_quoted", + record: zoneRecord{Name: "_ans." + fqdn, Type: "TXT", Value: txtValue, TTL: 3600}, + queryName: "_ans." + fqdn, + queryType: dns.TypeTXT, + wantAnswer: true, + wantInRR: `"` + txtValue + `"`, + }, + { + name: "tlsa_selector0_served", + record: zoneRecord{Name: "_443._tcp." + fqdn, Type: "TLSA", Value: tlsaValueSel0, TTL: 3600}, + queryName: "_443._tcp." + fqdn, + queryType: dns.TypeTLSA, + wantAnswer: true, + wantInRR: "3 0 1 deadbeefcafe1234", + }, + { + name: "type_mismatch_yields_no_answer", + record: zoneRecord{Name: fqdn, Type: "SVCB", Value: svcbValueKeyNNNNN, TTL: 3600}, + queryName: fqdn, + queryType: dns.TypeA, // querying A against an SVCB record + wantAnswer: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + q := dns.Question{Name: dns.Fqdn(tc.queryName), Qtype: tc.queryType, Qclass: dns.ClassINET} + answers := answersFor(q, []zoneRecord{tc.record}) + + if !tc.wantAnswer { + if len(answers) != 0 { + t.Fatalf("want zero answers, got %d: %v", len(answers), answers) + } + return + } + if len(answers) != 1 { + t.Fatalf("want exactly one answer, got %d: %v", len(answers), answers) + } + got := answers[0].String() + if tc.wantInRR != "" && !strings.Contains(got, tc.wantInRR) { + t.Errorf("served RR %q does not contain %q", got, tc.wantInRR) + } + }) + } +} + +// TestLoadZoneThenServe pins the full disk-to-wire path: a JSON zone +// file written by `install` is loaded by loadZone, flattened, and +// served by answersFor. Exercises the keyNNNNN SVCB and selector-0 TLSA +// records together as one agent's record set, the way an operator +// publishes them. +func TestLoadZoneThenServe(t *testing.T) { + const fqdn = "agent.example.com" + dir := t.TempDir() + zonePath := filepath.Join(dir, "zone.json") + + zoneJSON := `{ + "records": { + "agent-1": [ + {"name": "agent.example.com", "type": "SVCB", "value": "1 . alpn=a2a port=443 key65280=agent-card.json key65281=CY1lDMbSgN7kwPR0iadc8Xub-7rlMFGAbU4IQQiy_yc", "ttl": 3600}, + {"name": "_443._tcp.agent.example.com", "type": "TLSA", "value": "3 0 1 deadbeefcafe1234", "ttl": 3600} + ] + } +}` + if err := os.WriteFile(zonePath, []byte(zoneJSON), 0o600); err != nil { + t.Fatal(err) + } + + z, err := loadZone(zonePath) + if err != nil { + t.Fatalf("loadZone: %v", err) + } + records := z.flatten() + if len(records) != 2 { + t.Fatalf("want 2 flattened records, got %d", len(records)) + } + + svcb := answersFor(dns.Question{Name: dns.Fqdn(fqdn), Qtype: dns.TypeSVCB, Qclass: dns.ClassINET}, records) + if len(svcb) != 1 { + t.Fatalf("want one SVCB answer, got %d", len(svcb)) + } + if !strings.Contains(svcb[0].String(), `key65280="agent-card.json"`) { + t.Errorf("SVCB answer missing key65280: %q", svcb[0].String()) + } + if !strings.Contains(svcb[0].String(), `key65281="CY1lDMbSgN7kwPR0iadc8Xub-7rlMFGAbU4IQQiy_yc"`) { + t.Errorf("SVCB answer missing key65281 capability digest after disk round-trip: %q", svcb[0].String()) + } + + tlsa := answersFor(dns.Question{Name: dns.Fqdn("_443._tcp." + fqdn), Qtype: dns.TypeTLSA, Qclass: dns.ClassINET}, records) + if len(tlsa) != 1 { + t.Fatalf("want one TLSA answer, got %d", len(tlsa)) + } + if !strings.Contains(tlsa[0].String(), "3 0 1 deadbeefcafe1234") { + t.Errorf("TLSA answer missing selector-0 binding: %q", tlsa[0].String()) + } +} + +// TestLoadZoneMissingFileIsEmpty pins loadZone's "no file → empty zone" +// contract that lets `serve` start before any `install` has run. +func TestLoadZoneMissingFileIsEmpty(t *testing.T) { + z, err := loadZone(filepath.Join(t.TempDir(), "does-not-exist.json")) + if err != nil { + t.Fatalf("loadZone on missing file must not error, got %v", err) + } + if len(z.flatten()) != 0 { + t.Errorf("missing-file zone must be empty, got %d records", len(z.flatten())) + } +} diff --git a/cmd/ans-ra/main.go b/cmd/ans-ra/main.go index b61a5b7..1dd61eb 100644 --- a/cmd/ans-ra/main.go +++ b/cmd/ans-ra/main.go @@ -34,6 +34,7 @@ import ( "github.com/godaddy/ans/internal/adapter/store/sqlite" "github.com/godaddy/ans/internal/adapter/tlclient" "github.com/godaddy/ans/internal/config" + "github.com/godaddy/ans/internal/domain" "github.com/godaddy/ans/internal/port" "github.com/godaddy/ans/internal/ra/handler" ramiddleware "github.com/godaddy/ans/internal/ra/middleware" @@ -163,21 +164,40 @@ func run(cfgPath string) error { // Event bus. bus := eventbus.NewInMemoryBus(logger) + // Discovery registry: composes the bundled ANS-family port.ProfileEmitter + // adapters (ANS_TXT, ANS_SVCB) the V2 register / verify-dns paths walk + // to compute `dnsRecordsProvisioned[]`. Insertion order here pins the + // canonical-bytes emission order for the §4.4.2 union case + // (`[TXT×N, HTTPS, SVCB×N, badge, TLSA]`). + discoveryReg, err := service.NewDefaultProfileRegistry(cfg.TLClient.PublicBaseURL) + if err != nil { + return fmt.Errorf("init discovery registry: %w", err) + } + if err := assertRegistryDomainCoherence(discoveryReg); err != nil { + return fmt.Errorf("discovery registry coherence: %w", err) + } + logger.Info(). + Strs("profiles", profileIDStrings(discoveryReg.IDs())). + Msg("discovery registry ready") + // Services. regSvc := service.NewRegistrationService( - agents, endpoints, certsStore, byoc, renewals, validator, identityCA, bus, outbox, db, + agents, endpoints, certsStore, byoc, renewals, validator, identityCA, bus, outbox, db, discoveryReg, ).WithSigner(service.EventSigner{ KeyManager: km, KeyID: signerKeyID, RaID: cfg.Signer.RaID, }).WithDNSVerifier(dnsVerifier). - WithServerCertificateAuthority(serverCA). - WithTLPublicBaseURL(cfg.TLClient.PublicBaseURL) + WithServerCertificateAuthority(serverCA) // HTTP. r := chi.NewRouter() r.Use(middleware.Recoverer) r.Use(middleware.RequestID) + // middleware.RealIP is deliberately NOT wired: chi 5.3 deprecates it + // as IP-spoofable (it trusts X-Forwarded-For / X-Real-IP regardless + // of whether a proxy set them), and nothing in this service reads + // RemoteAddr — no request logger, no rate limiter, no IP audit. r.Use(middleware.Timeout(30 * time.Second)) r.Use(middleware.AllowContentType("application/json")) r.Use(authProvider.Middleware()) @@ -301,9 +321,10 @@ func run(cfgPath string) error { logger.Info().Str("addr", addr).Msg("listening") // Hardened timeouts: // - ReadHeaderTimeout caps slowloris-style header dribbling. - // - WriteTimeout > the 30s chi handler timeout (line 176) so - // chi gets the chance to write a clean 503 before the server - // drops the connection on a slow handler/client. + // - WriteTimeout > the 30s chi middleware.Timeout wired on the + // router above, so chi gets the chance to write a clean 503 + // before the server drops the connection on a slow + // handler/client. // - IdleTimeout caps how long an idle keep-alive connection // can sit on the runner; pairs with HTTP/1.1 connection reuse // for SDK clients while keeping a hard ceiling. @@ -399,6 +420,50 @@ type providerWithAnonymous interface { Middleware() func(http.Handler) http.Handler } +// assertRegistryDomainCoherence verifies the discovery registry's +// wired profiles are exactly the set domain advertises as valid via +// domain.ValidDiscoveryProfiles(). Drift in either direction is a +// startup misconfig: registry-only profile means request-side validation +// rejects it (operator error noise); domain-only profile means +// applyDiscoveryProfiles accepts a value verify-dns can never satisfy +// (silent broken-by-omission). Both fail server start. +// profileIDStrings converts the registry's typed profile IDs to the +// plain strings the startup readiness log line wants. +func profileIDStrings(ids []domain.DiscoveryProfile) []string { + out := make([]string, len(ids)) + for i, id := range ids { + out[i] = string(id) + } + return out +} + +func assertRegistryDomainCoherence(reg port.ProfileRegistry) error { + registryIDs := make(map[string]bool) + for _, id := range reg.IDs() { + registryIDs[string(id)] = true + } + domainIDs := make(map[string]bool) + for _, s := range domain.ValidDiscoveryProfiles() { + domainIDs[s] = true + } + var registryOnly, domainOnly []string + for id := range registryIDs { + if !domainIDs[id] { + registryOnly = append(registryOnly, id) + } + } + for id := range domainIDs { + if !registryIDs[id] { + domainOnly = append(domainOnly, id) + } + } + if len(registryOnly) > 0 || len(domainOnly) > 0 { + return fmt.Errorf("drift between registry.IDs() and domain.ValidDiscoveryProfiles(): registry-only=%v, domain-only=%v", + registryOnly, domainOnly) + } + return nil +} + // selectDNSVerifier returns the configured DNS adapter. Returns a // port.DNSVerifier so the service layer can wire it directly. // diff --git a/cmd/ans-tl/main.go b/cmd/ans-tl/main.go index 611b5fe..ebc39a1 100644 --- a/cmd/ans-tl/main.go +++ b/cmd/ans-tl/main.go @@ -209,6 +209,10 @@ func run(cfgPath string) error { r := chi.NewRouter() r.Use(middleware.Recoverer) r.Use(middleware.RequestID) + // middleware.RealIP is deliberately NOT wired: chi 5.3 deprecates it + // as IP-spoofable (it trusts X-Forwarded-For / X-Real-IP regardless + // of whether a proxy set them), and nothing in this service reads + // RemoteAddr — no request logger, no rate limiter, no IP audit. r.Use(middleware.Timeout(30 * time.Second)) r.Use(authProvider.Middleware()) @@ -278,9 +282,10 @@ func run(cfgPath string) error { logger.Info().Str("addr", addr).Msg("listening") // Hardened timeouts — see the matching block in cmd/ans-ra/main.go // for the full rationale. WriteTimeout sits above the 30s chi - // handler timeout (line 213) so chi can write a clean 503 first; - // IdleTimeout caps keep-alive idle time; MaxHeaderBytes is the - // Go default written explicitly for audit visibility. + // middleware.Timeout wired on the router above so chi can write a + // clean 503 first; IdleTimeout caps keep-alive idle time; + // MaxHeaderBytes is the Go default written explicitly for audit + // visibility. srv := &http.Server{ Addr: addr, Handler: r, diff --git a/internal/adapter/discovery/ans/ansbadge.go b/internal/adapter/discovery/ans/ansbadge.go new file mode 100644 index 0000000..51e071a --- /dev/null +++ b/internal/adapter/discovery/ans/ansbadge.go @@ -0,0 +1,50 @@ +package ans + +import ( + "fmt" + "net/url" + + "github.com/godaddy/ans/internal/domain" +) + +// BadgeRecord returns the `_ans-badge.` TXT record every ANS-family +// style emits as the trust attestation hook. Returns a one-element slice +// when reg has at least one endpoint; returns an empty slice otherwise so +// callers can `append(records, BadgeRecord(reg, tlPublicBaseURL)...)` +// unconditionally. +// +// The badge url= points at the Transparency Log's badge endpoint for this +// agent (`/v1/agents/`) when tlPublicBaseURL is +// set and the registration carries an AgentID; otherwise it falls back to +// the first endpoint's own URL. Pointing at the TL is the correct target — +// badge verifiers resolve trust through the log, not the agent's host. +// +// Both ANS_SVCB and ANS_TXT styles call BadgeRecord. When both are in +// the resolved set the service walker dedupes on (Name, Type, Value), +// so the badge lands once per registration regardless of style count. +// +// Required=true: badge-verifying clients won't trust an agent that +// publishes its discovery records without a paired badge — every +// non-badge ANS record on the wire is meaningless without it. +func BadgeRecord(reg *domain.AgentRegistration, tlPublicBaseURL string) []domain.ExpectedDNSRecord { + if len(reg.Endpoints) == 0 { + return nil + } + version := reg.AnsName.Version().String() + badgeURL := reg.Endpoints[0].AgentURL + if tlPublicBaseURL != "" && reg.AgentID != "" { + // tlPublicBaseURL is validated at config load (https, no + // query/fragment/userinfo), so JoinPath cannot fail here. + badgeURL, _ = url.JoinPath(tlPublicBaseURL, "v1", "agents", reg.AgentID) + } + value := fmt.Sprintf("v=ans-badge1; version=%s; url=%s", + version, badgeURL) + return []domain.ExpectedDNSRecord{{ + Name: fmt.Sprintf("_ans-badge.%s", reg.FQDN()), + Type: domain.DNSRecordTXT, + Value: value, + Purpose: domain.PurposeBadge, + Required: true, + TTL: 3600, + }} +} diff --git a/internal/adapter/discovery/ans/ansbadge_test.go b/internal/adapter/discovery/ans/ansbadge_test.go new file mode 100644 index 0000000..cd1395b --- /dev/null +++ b/internal/adapter/discovery/ans/ansbadge_test.go @@ -0,0 +1,98 @@ +package ans + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/godaddy/ans/internal/domain" +) + +func TestBadgeRecord(t *testing.T) { + mustReg := func(t *testing.T, version string, host string, eps []domain.AgentEndpoint) *domain.AgentRegistration { + t.Helper() + v, err := domain.ParseSemVer(version) + require.NoError(t, err) + ansName, err := domain.NewAnsName(v, host) + require.NoError(t, err) + return &domain.AgentRegistration{AnsName: ansName, Endpoints: eps} + } + + tests := []struct { + name string + reg *domain.AgentRegistration + agentID string + tlBaseURL string + wantEmpty bool + wantName string + wantValue string + }{ + { + name: "no_endpoints_emits_no_badge", + reg: mustReg(t, "1.0.0", "agent.example.com", nil), + wantEmpty: true, + }, + { + name: "single_endpoint_emits_one_badge", + reg: mustReg(t, "1.2.3", "agent.example.com", []domain.AgentEndpoint{ + {Protocol: domain.ProtocolA2A, AgentURL: "https://agent.example.com/a2a"}, + }), + wantName: "_ans-badge.agent.example.com", + wantValue: "v=ans-badge1; version=1.2.3; url=https://agent.example.com/a2a", + }, + { + name: "multiple_endpoints_badge_uses_first_url", + reg: mustReg(t, "2.0.0", "agent.example.com", []domain.AgentEndpoint{ + {Protocol: domain.ProtocolMCP, AgentURL: "https://agent.example.com/mcp"}, + {Protocol: domain.ProtocolA2A, AgentURL: "https://agent.example.com/a2a"}, + }), + wantName: "_ans-badge.agent.example.com", + wantValue: "v=ans-badge1; version=2.0.0; url=https://agent.example.com/mcp", + }, + { + // When the deployment supplies a public TL URL and the + // registration carries an AgentID, the badge url= points at the + // TL's per-agent badge endpoint rather than the agent's own host + // — badge verifiers resolve trust through the log, not the agent. + name: "tl_base_url_points_badge_at_transparency_log", + reg: mustReg(t, "1.0.0", "agent.example.com", []domain.AgentEndpoint{ + {Protocol: domain.ProtocolMCP, AgentURL: "https://agent.example.com/mcp"}, + }), + agentID: "test-agent-id", + tlBaseURL: "https://tl.example.org", + wantName: "_ans-badge.agent.example.com", + wantValue: "v=ans-badge1; version=1.0.0; url=https://tl.example.org/v1/agents/test-agent-id", + }, + { + // Fallback: a TL URL is set but the registration has no AgentID, + // so the per-agent TL path can't be formed and the badge falls + // back to the first endpoint's URL. + name: "tl_base_url_without_agent_id_falls_back_to_endpoint", + reg: mustReg(t, "1.0.0", "agent.example.com", []domain.AgentEndpoint{ + {Protocol: domain.ProtocolMCP, AgentURL: "https://agent.example.com/mcp"}, + }), + tlBaseURL: "https://tl.example.org", + wantName: "_ans-badge.agent.example.com", + wantValue: "v=ans-badge1; version=1.0.0; url=https://agent.example.com/mcp", + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + tc.reg.AgentID = tc.agentID + got := BadgeRecord(tc.reg, tc.tlBaseURL) + if tc.wantEmpty { + assert.Empty(t, got) + return + } + require.Len(t, got, 1) + r := got[0] + assert.Equal(t, tc.wantName, r.Name) + assert.Equal(t, domain.DNSRecordTXT, r.Type) + assert.Equal(t, tc.wantValue, r.Value) + assert.Equal(t, domain.PurposeBadge, r.Purpose) + assert.True(t, r.Required, "_ans-badge is always Required=true alongside discovery records") + assert.Equal(t, 3600, r.TTL) + }) + } +} diff --git a/internal/adapter/discovery/ans/protocol.go b/internal/adapter/discovery/ans/protocol.go new file mode 100644 index 0000000..38c9b6a --- /dev/null +++ b/internal/adapter/discovery/ans/protocol.go @@ -0,0 +1,59 @@ +// Package ans implements the bundled ANS-family port.ProfileEmitter +// adapters: SVCBProfile (the Consolidated Approach SVCB shape per RFC 9460 +// plus the `_ans-badge` TXT extension) and TXTProfile (the original `_ans` +// TXT shape, supported indefinitely for operators with existing zone-edit +// tooling). Both profiles share two family-level trust records — the +// `_ans-badge` TXT and the server-cert TLSA — emitted by every ANS-family +// profile and deduped at the service walker. +// +// Helpers private to this package, by file: protocol.go holds the +// protocol-token and well-known-suffix mappings both profiles need; +// svcb.go holds svcbPortFor (consumed by both the SVCB rows and +// tlsa.go's port collection) and the capability-digest (key65281) +// conversion; tlsa.go and ansbadge.go hold the family-level trust +// records (per-port TLSA, `_ans-badge` TXT) every ANS-family profile +// emits. +package ans + +import ( + "github.com/godaddy/ans/internal/domain" +) + +// protocolToANSValue maps a protocol enum to the wire token used inside +// `_ans` TXT payloads (`p=`) and SVCB `alpn=` SvcParams. +// Unknown protocols pass through unchanged so a future protocol added +// to the domain layer surfaces in records without a parallel edit here. +func protocolToANSValue(p domain.Protocol) string { + switch p { + case domain.ProtocolA2A: + return "a2a" + case domain.ProtocolMCP: + return "mcp" + case domain.ProtocolHTTPAPI: + return "http-api" + default: + return string(p) + } +} + +// wkPathFor returns the suffix-only well-known path published in the +// Consolidated Approach SVCB record's well-known SvcParam (key65280, +// the draft's `wk`). Suffix-only matches the consolidated-draft +// examples (§4 line 134); clients prepend `/.well-known/` to construct +// the full path. Empty result means the caller SHOULD omit the SvcParam +// entirely (e.g. direct-mode agents that expose no canonical metadata +// file). +// +// A2A: `agent-card.json` (IANA-registered well-known per A2A spec). +// MCP: `mcp.json` (de-facto convention; see SEP-1649 progress). +// HTTP-API: empty (no per-protocol metadata file convention). +func wkPathFor(p domain.Protocol) string { + switch p { + case domain.ProtocolA2A: + return "agent-card.json" + case domain.ProtocolMCP: + return "mcp.json" + default: + return "" + } +} diff --git a/internal/adapter/discovery/ans/protocol_test.go b/internal/adapter/discovery/ans/protocol_test.go new file mode 100644 index 0000000..b27d374 --- /dev/null +++ b/internal/adapter/discovery/ans/protocol_test.go @@ -0,0 +1,46 @@ +package ans + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/godaddy/ans/internal/domain" +) + +func TestProtocolToANSValue(t *testing.T) { + tests := []struct { + name string + in domain.Protocol + want string + }{ + {name: "a2a", in: domain.ProtocolA2A, want: "a2a"}, + {name: "mcp", in: domain.ProtocolMCP, want: "mcp"}, + {name: "http_api", in: domain.ProtocolHTTPAPI, want: "http-api"}, + {name: "unknown_protocol_passes_through_unchanged", in: domain.Protocol("UNKNOWN"), want: "UNKNOWN"}, + {name: "empty_protocol_passes_through_as_empty", in: domain.Protocol(""), want: ""}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, protocolToANSValue(tc.in)) + }) + } +} + +func TestWkPathFor(t *testing.T) { + tests := []struct { + name string + in domain.Protocol + want string + }{ + {name: "a2a_returns_agent_card_json", in: domain.ProtocolA2A, want: "agent-card.json"}, + {name: "mcp_returns_mcp_json", in: domain.ProtocolMCP, want: "mcp.json"}, + {name: "http_api_returns_empty", in: domain.ProtocolHTTPAPI, want: ""}, + {name: "unknown_protocol_returns_empty", in: domain.Protocol("UNKNOWN"), want: ""}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, wkPathFor(tc.in)) + }) + } +} diff --git a/internal/adapter/discovery/ans/svcb.go b/internal/adapter/discovery/ans/svcb.go new file mode 100644 index 0000000..9bb2ef1 --- /dev/null +++ b/internal/adapter/discovery/ans/svcb.go @@ -0,0 +1,203 @@ +package ans + +import ( + "crypto/sha256" + "encoding/base64" + "encoding/hex" + "fmt" + "net/url" + "strconv" + "strings" + + "github.com/godaddy/ans/internal/domain" + "github.com/godaddy/ans/internal/port" +) + +// SVCBProfile implements port.ProfileEmitter for the Consolidated +// Approach SVCB shape (ANS_SVCB). It emits one SVCB row per protocol +// endpoint at the agent's bare FQDN, plus the ANS-family trust +// records (`_ans-badge` TXT and TLSA) so the profile is self-contained +// when registered alone. +// +// Records always returns SVCB rows with Required=true. The service +// walker post-processes the slice to flip Required=false on these +// rows when ANS_TXT is also in the resolved set (during the §4.4.2 +// transition the legacy `_ans` TXT family carries the operator's +// required signal and SVCB rides along as optional). Keeping the +// post-process at the service layer keeps SVCBProfile profile-local — +// it does not need to know which other profiles are in play. +// +// SvcParam composition per RFC 9460: +// - alpn: protocol token (a2a / mcp / http-api), distinguishes +// protocols within the same RRset. +// - port: from endpoint URL authority; defaults to 443 (https) / +// 80 (http) when the URL omits a port. +// - key65280 (well-known suffix): per-protocol metadata file suffix +// (agent-card.json for A2A, mcp.json for MCP, omitted for +// HTTP-API). The DNS-AID draft names this param `wk`. +// - key65281 (capability digest): base64url(raw SHA-256 bytes) +// sourced from this endpoint's MetadataHash when the operator +// submitted one. Absent when MetadataHash is empty or malformed; +// in that case verifiers fetch the metadata document and accept +// any payload whose URL matches. The DNS-AID draft names this +// param `cap-sha256`. +// +// Private Use SvcParamKeys (RFC 9460 §14.3.1). The draft's `wk` and +// `cap-sha256` have no IANA-assigned code point, so they MUST be +// emitted in the keyNNNNN presentation form (RFC 9460 §2.1) — the +// named forms are unparseable: miekg/dns (and therefore ans-dns) and +// real DNS providers reject `wk=`/`cap-sha256=` with "bad SVCB key", +// and the lookup verifier can only ever observe live records in +// keyNNNNN form. key65280 / key65281 sit in the §14.3.1 Private Use +// range (65280–65534). +// +// Caveats of squatting Private Use space: +// - The DNS-AID draft's own examples squat unassigned space +// (key65001) — we deliberately do not copy that point; 65280+ is +// the registered-as-private range, not merely unassigned. +// - Collision with another experiment that picks the same code +// points is intrinsic to Private Use, but it is bounded to +// denial-of-verification: the verifier's subset matcher requires +// equal values, so a colliding key with a different value can +// only cause a false negative (verify-dns fails), never a false +// accept. +// - If `wk`/`cap-sha256` are IANA-registered later, switching the +// presentation form back to named keys is a real operator-facing +// migration (the published record value changes), not a silent +// swap. +// +// Deliberately NOT emitted (see the package design notes): +// - mandatory=: emitting `mandatory=key65280` would make the whole +// record invisible to every generic RFC 9460 client that doesn't +// understand the key, defeating the §8 coexistence goal. The +// draft text that gates client behavior on these params is an +// upstream DNS-AID tension, not a reason to fence off the record. +// - ipv4hint/ipv6hint: the RA knows endpoint URLs, not addresses; +// registry-sourced address hints go stale and are out of scope. +type SVCBProfile struct { + // tlPublicBaseURL feeds the family `_ans-badge` url= via BadgeRecord + // (empty falls the badge back to the agent's own endpoint URL). Set + // once by NewSVCBProfile; profiles are immutable after wiring, so Records + // stays a pure function of reg. + tlPublicBaseURL string +} + +// RFC 9460 §14.3.1 Private Use SvcParamKeys for the DNS-AID draft +// params that have no IANA code point. Emitted in keyNNNNN +// presentation form because the named forms (`wk`, `cap-sha256`) are +// rejected by miekg/dns and real DNS providers (see SVCBProfile doc). +const ( + // svcbKeyWellKnown carries the per-protocol well-known suffix + // (draft param `wk`). + svcbKeyWellKnown = "key65280" + // svcbKeyCapSHA256 carries the base64url SHA-256 capability digest + // of the endpoint metadata document (draft param `cap-sha256`). + svcbKeyCapSHA256 = "key65281" +) + +// NewSVCBProfile builds an ANS_SVCB profile whose family `_ans-badge` record +// points at the transparency log at tlPublicBaseURL. Empty tlPublicBaseURL +// falls the badge back to the agent's own endpoint URL. +func NewSVCBProfile(tlPublicBaseURL string) SVCBProfile { + return SVCBProfile{tlPublicBaseURL: tlPublicBaseURL} +} + +// ID returns ANS_SVCB. +func (SVCBProfile) ID() domain.DiscoveryProfile { return domain.DiscoveryProfileANSSVCB } + +// Records returns the SVCB rows + family trust records the SVCB profile +// needs an operator to publish. +func (s SVCBProfile) Records(reg *domain.AgentRegistration) []domain.ExpectedDNSRecord { + fqdn := reg.FQDN() + // Capacity: N SVCB rows + badge + up to N TLSA (one per distinct TLS + // port, see TLSARecord/Fix B). + records := make([]domain.ExpectedDNSRecord, 0, 2*len(reg.Endpoints)+1) + for _, ep := range reg.Endpoints { + alpn := protocolToANSValue(ep.Protocol) + wk := wkPathFor(ep.Protocol) + port := svcbPortFor(ep.AgentURL) + capSHA := metadataHashToCapSHA256(ep.MetadataHash) + // RFC 9460 §2.1 presentation form: unquoted SvcParamValue when the + // value has no characters special to the presentation format. + // alpn tokens, port digits, well-known path suffixes, and + // base64url digests all qualify. key65280/key65281 are the + // §14.3.1 Private Use presentation of the draft wk/cap-sha256 + // params (named forms are unparseable — see SVCBProfile doc). + value := fmt.Sprintf(`1 . alpn=%s port=%d`, alpn, port) + if wk != "" { + value += fmt.Sprintf(` %s=%s`, svcbKeyWellKnown, wk) + } + if capSHA != "" { + value += fmt.Sprintf(` %s=%s`, svcbKeyCapSHA256, capSHA) + } + records = append(records, domain.ExpectedDNSRecord{ + Name: fqdn, + Type: domain.DNSRecordSVCB, + Value: value, + Purpose: domain.PurposeDiscovery, + Required: true, + TTL: 3600, + }) + } + records = append(records, BadgeRecord(reg, s.tlPublicBaseURL)...) + records = append(records, TLSARecord(reg)...) + return records +} + +// Compile-time interface satisfaction check. Catches accidental +// signature drift on port.ProfileEmitter without needing a runtime +// assertion in cmd/main. +var _ port.ProfileEmitter = SVCBProfile{} + +// svcbPortFor returns the TCP port to advertise in the SVCB SvcParam +// `port=`. Reads it from the endpoint URL's authority. Falls back to +// 443 (https) / 80 (http) when the URL omits a port. Empty input or +// unparseable URL returns 443 — the §4.4.2 default for agent endpoints. +// +// Without this, every endpoint would emit a hardcoded port=443 and +// silently break verify-dns for agents on non-443 endpoints (operator +// publishes their actual port; expected says 443; mismatch). +func svcbPortFor(agentURL string) int { + if agentURL == "" { + return 443 + } + u, err := url.Parse(agentURL) + if err != nil { + return 443 + } + if p := u.Port(); p != "" { + if n, perr := strconv.Atoi(p); perr == nil { + return n + } + } + if u.Scheme == "http" { + return 80 + } + return 443 +} + +// metadataHashToCapSHA256 converts an AgentEndpoint.MetadataHash +// (`SHA256:<64-hex-chars>`) into the base64url form (RFC 4648 §5, +// no padding) the SVCB capability-digest SvcParam (key65281, draft +// `cap-sha256`) expects. Empty input, missing prefix, malformed hex, +// or a digest that isn't exactly 32 bytes all return the empty +// string, which the caller treats as "omit the SvcParam entirely". +// The domain layer (endpoint.go's metadataHashPattern) validates the +// canonical shape on input, so the defensive returns here exist for +// boundary safety only — the length check makes this function's +// defensiveness match the SHA-256 contract it documents rather than +// relying solely on the upstream regex. +func metadataHashToCapSHA256(metadataHash string) string { + if metadataHash == "" { + return "" + } + const prefix = "SHA256:" + if !strings.HasPrefix(metadataHash, prefix) { + return "" + } + raw, err := hex.DecodeString(strings.TrimPrefix(metadataHash, prefix)) + if err != nil || len(raw) != sha256.Size { + return "" + } + return base64.RawURLEncoding.EncodeToString(raw) +} diff --git a/internal/adapter/discovery/ans/svcb_test.go b/internal/adapter/discovery/ans/svcb_test.go new file mode 100644 index 0000000..878b7ac --- /dev/null +++ b/internal/adapter/discovery/ans/svcb_test.go @@ -0,0 +1,272 @@ +package ans + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/godaddy/ans/internal/domain" +) + +func mustReg(t *testing.T, host string, eps []domain.AgentEndpoint, cert *domain.ByocServerCertificate) *domain.AgentRegistration { + t.Helper() + v, err := domain.NewSemVer(1, 0, 0) + require.NoError(t, err) + ansName, err := domain.NewAnsName(v, host) + require.NoError(t, err) + return &domain.AgentRegistration{ + AnsName: ansName, + Endpoints: eps, + ServerCert: cert, + } +} + +func TestSVCBProfile_ID(t *testing.T) { + assert.Equal(t, domain.DiscoveryProfileANSSVCB, SVCBProfile{}.ID()) +} + +// TestSVCBProfile_Records walks the SvcParam composition rules (alpn / +// port / key65280 / key65281) the consolidated-draft fixes, plus the +// always-Required default the service walker post-processes. The +// well-known suffix and capability digest ride in RFC 9460 §14.3.1 +// Private Use keyNNNNN SvcParams (key65280 / key65281), not the named +// forms `wk=` / `card-sha256=` — those have no IANA code point and the +// miekg/dns zone parser rejects them. +func TestSVCBProfile_Records(t *testing.T) { + const sampleMetadataHash = "SHA256:098d650cc6d280dee4c0f47489a75cf17b9bfbbae53051806d4e084108b2ff27" + const wantSampleCapBase64 = "CY1lDMbSgN7kwPR0iadc8Xub-7rlMFGAbU4IQQiy_yc" + + tests := []struct { + name string + eps []domain.AgentEndpoint + wantCount int // svcb rows expected + wantPort string + wantAlpn string + wantWk string // empty means MUST NOT appear (well-known suffix in key65280) + wantCap string // empty means MUST NOT appear (capability digest in key65281) + wantNotPort string // value MUST NOT contain this string (e.g. wrong default) + }{ + { + name: "a2a_https_default_port", + eps: []domain.AgentEndpoint{ + {Protocol: domain.ProtocolA2A, AgentURL: "https://agent.example.com"}, + }, + wantCount: 1, + wantPort: "port=443", + wantAlpn: "alpn=a2a", + wantWk: "key65280=agent-card.json", + }, + { + name: "mcp_emits_mcp_json_well_known", + eps: []domain.AgentEndpoint{ + {Protocol: domain.ProtocolMCP, AgentURL: "https://agent.example.com/mcp"}, + }, + wantCount: 1, + wantPort: "port=443", + wantAlpn: "alpn=mcp", + wantWk: "key65280=mcp.json", + }, + { + name: "http_api_omits_wk", + eps: []domain.AgentEndpoint{ + {Protocol: domain.ProtocolHTTPAPI, AgentURL: "https://agent.example.com"}, + }, + wantCount: 1, + wantPort: "port=443", + wantAlpn: "alpn=http-api", + wantWk: "", // HTTP-API has no per-protocol metadata file + }, + { + name: "cap_sha256_present_when_endpoint_metadata_hash_set", + eps: []domain.AgentEndpoint{ + { + Protocol: domain.ProtocolA2A, + AgentURL: "https://agent.example.com", + MetadataHash: sampleMetadataHash, + }, + }, + wantCount: 1, + wantPort: "port=443", + wantAlpn: "alpn=a2a", + wantWk: "key65280=agent-card.json", + wantCap: "key65281=" + wantSampleCapBase64, + }, + { + name: "non_443_port_from_url_authority", + eps: []domain.AgentEndpoint{ + {Protocol: domain.ProtocolA2A, AgentURL: "https://agent.example.com:8443"}, + }, + wantCount: 1, + wantPort: "port=8443", + wantAlpn: "alpn=a2a", + wantWk: "key65280=agent-card.json", + wantNotPort: "port=443", + }, + { + name: "http_scheme_defaults_port_80", + eps: []domain.AgentEndpoint{ + {Protocol: domain.ProtocolA2A, AgentURL: "http://agent.example.com"}, + }, + wantCount: 1, + wantPort: "port=80", + wantAlpn: "alpn=a2a", + wantWk: "key65280=agent-card.json", + }, + { + // First row asserted below; assertions on the A2A protocol's + // SvcParam composition (port, alpn, key65280). The MCP row's + // key65280=mcp.json is covered by the dedicated mcp test case + // above; here we only pin that the count is right and the row + // order tracks endpoint order. + name: "two_endpoints_emits_two_svcb_rows", + eps: []domain.AgentEndpoint{ + {Protocol: domain.ProtocolA2A, AgentURL: "https://agent.example.com/a2a"}, + {Protocol: domain.ProtocolMCP, AgentURL: "https://agent.example.com/mcp"}, + }, + wantCount: 2, + wantPort: "port=443", + wantAlpn: "alpn=a2a", + wantWk: "key65280=agent-card.json", + }, + { + name: "zero_endpoints_emits_no_svcb_rows", + eps: nil, + wantCount: 0, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + reg := mustReg(t, "agent.example.com", tc.eps, nil) + records := SVCBProfile{}.Records(reg) + + var svcbRows []domain.ExpectedDNSRecord + for _, r := range records { + if r.Type == domain.DNSRecordSVCB { + svcbRows = append(svcbRows, r) + } + } + require.Len(t, svcbRows, tc.wantCount, "SVCB row count") + + if tc.wantCount == 0 { + return + } + + r := svcbRows[0] + assert.Equal(t, "agent.example.com", r.Name, "SVCB lives at the bare FQDN") + assert.Equal(t, domain.PurposeDiscovery, r.Purpose) + assert.Equal(t, 3600, r.TTL) + assert.True(t, r.Required, "SVCB always returns Required=true; service post-processes for the union case") + assert.Contains(t, r.Value, `1 . `, "ServiceMode (priority 1) with TargetName .") + if tc.wantAlpn != "" { + assert.Contains(t, r.Value, tc.wantAlpn) + } + if tc.wantPort != "" { + assert.Contains(t, r.Value, tc.wantPort) + } + if tc.wantNotPort != "" { + assert.NotContains(t, r.Value, tc.wantNotPort) + } + if tc.wantWk != "" { + assert.Contains(t, r.Value, tc.wantWk) + } else { + assert.NotContains(t, r.Value, "key65280=", + "key65280 (well-known suffix) MUST be absent for protocols with no metadata file convention") + } + if tc.wantCap != "" { + assert.Contains(t, r.Value, tc.wantCap) + } else { + assert.NotContains(t, r.Value, "key65281=", + "key65281 (capability digest) MUST be absent when endpoint MetadataHash is empty") + } + // Named-form regression guards: every SVCB value must use the + // keyNNNNN Private Use presentation, never the unpublishable + // named forms. miekg/dns rejects `wk=` / `card-sha256=` at the + // zone parser (proven empirically), so a backslide here strands + // agents in PENDING_DNS under the lookup verifier. + assert.NotContains(t, r.Value, "wk=", + "named `wk=` SvcParam MUST NOT appear; key65280 is the publishable form") + assert.NotContains(t, r.Value, "cap-sha256", + "named `cap-sha256=` SvcParam MUST NOT appear; key65281 is the publishable form") + assert.NotContains(t, r.Value, "card-sha256", + "legacy `card-sha256=` SvcParam MUST NOT appear; key65281 is the publishable form") + }) + } +} + +// TestSVCBProfile_RecordsIncludesFamilyTrustRecords pins that SVCBProfile +// is self-contained — it emits the family's badge and TLSA records too, +// so registering ANS_SVCB alone produces a complete set without any +// service-layer trust-record plumbing. +func TestSVCBProfile_RecordsIncludesFamilyTrustRecords(t *testing.T) { + reg := mustReg(t, "agent.example.com", + []domain.AgentEndpoint{{Protocol: domain.ProtocolA2A, AgentURL: "https://agent.example.com"}}, + &domain.ByocServerCertificate{Fingerprint: "deadbeef"}) + + records := SVCBProfile{}.Records(reg) + + var sawBadge, sawTLSA bool + for _, r := range records { + if r.Purpose == domain.PurposeBadge { + sawBadge = true + assert.True(t, strings.HasPrefix(r.Name, "_ans-badge.")) + } + if r.Purpose == domain.PurposeCertificateBinding { + sawTLSA = true + assert.True(t, strings.HasPrefix(r.Name, "_443._tcp.")) + } + } + assert.True(t, sawBadge, "SVCB style must include the family `_ans-badge` record") + assert.True(t, sawTLSA, "SVCB style must include the TLSA record when ServerCert is set") +} + +func TestMetadataHashToCapSHA256(t *testing.T) { + tests := []struct { + name string + in string + want string + }{ + { + name: "valid_sha256_prefixed_hex_lowers_to_base64url", + in: "SHA256:098d650cc6d280dee4c0f47489a75cf17b9bfbbae53051806d4e084108b2ff27", + want: "CY1lDMbSgN7kwPR0iadc8Xub-7rlMFGAbU4IQQiy_yc", + }, + { + name: "all_zeros_round_trip", + in: "SHA256:0000000000000000000000000000000000000000000000000000000000000000", + want: "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", + }, + {name: "empty_input_empty_output", in: "", want: ""}, + {name: "missing_sha256_prefix_returns_empty", in: "098d650cc6d2", want: ""}, + {name: "wrong_prefix_returns_empty", in: "SHA1:abc", want: ""}, + {name: "malformed_hex_after_prefix_returns_empty", in: "SHA256:not hex", want: ""}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, metadataHashToCapSHA256(tc.in)) + }) + } +} + +func TestSVCBPortFor(t *testing.T) { + tests := []struct { + name string + in string + want int + }{ + {name: "https_default_443", in: "https://agent.example.com", want: 443}, + {name: "http_default_80", in: "http://agent.example.com", want: 80}, + {name: "explicit_port_8443", in: "https://agent.example.com:8443", want: 8443}, + {name: "explicit_port_8080_http", in: "http://agent.example.com:8080", want: 8080}, + {name: "with_path_keeps_port", in: "https://agent.example.com:9443/a2a", want: 9443}, + {name: "empty_url_defaults_443", in: "", want: 443}, + {name: "malformed_url_defaults_443", in: "://not-a-url", want: 443}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, svcbPortFor(tc.in)) + }) + } +} diff --git a/internal/adapter/discovery/ans/tlsa.go b/internal/adapter/discovery/ans/tlsa.go new file mode 100644 index 0000000..c40e615 --- /dev/null +++ b/internal/adapter/discovery/ans/tlsa.go @@ -0,0 +1,113 @@ +package ans + +import ( + "fmt" + "net/url" + "sort" + + "github.com/godaddy/ans/internal/domain" +) + +// TLSARecord returns the TLSA cert-binding records every ANS-family +// profile emits for the agent's server cert — one record per distinct +// TLS endpoint port. Returns an empty slice when reg.ServerCert is nil. +// +// Both ANS_SVCB and ANS_TXT profiles call TLSARecord. When both are in +// the resolved set the service walker dedupes on (Name, Type, Value) +// so each TLSA lands once. +// +// Owner name carries the port (RFC 6698): `_._tcp.`. A +// DANE client connecting to a non-443 endpoint (8443 etc.) queries +// `_8443._tcp.` — emitting only `_443._tcp.` strands it. Ports come +// from the endpoint URLs (via svcbPortFor, the same source the SVCB +// row's `port=` SvcParam uses, so the TLSA owner name and the +// advertised port agree). Plaintext `http` endpoints are skipped — +// there is no TLS to bind. When the collected port set is empty (no +// endpoints, or all plaintext) the 443 fallback fires so a ServerCert +// always gets a cert-binding record. +// +// Ports are sorted numerically ascending. Deterministic order is +// load-bearing twice: the TL canonical-bytes invariant +// (ComputeRequiredDNSRecords feeds the V2 leaf hash) and the V1 +// revoke map's injectivity (v1event keys the DNSRecordsToRemove map on +// Name, so distinct `_._tcp.` names must stay distinct — which +// requires ports be deduped). The sort is numeric, not lexical: port +// 443 sorts before 1024 even though "_1024" < "_443" as strings. +// +// Required=false: TLSA is only meaningful when the operator's zone is +// DNSSEC-signed, which is a runtime property the domain layer cannot +// know. The verify layer enforces a stricter rule at query time: when +// a TLSA response IS DNSSEC-validated, its value MUST match the +// expected fingerprint (otherwise an attacker rewrote the record in +// a signed zone — the worst failure mode). That post-verify check +// lives alongside the verifier (lifecycle.go), not in the record set. +// +// `3 0 1 ` = DANE-EE + full-certificate + SHA-256 (RFC 6698). +// Selector 0 (full cert), not 1 (SPKI): the hex is +// ServerCert.Fingerprint, which is SHA-256 over the full DER cert (see +// internal/crypto/x509.go CertificateFingerprint), so selector 0 is +// what actually matches those bytes. Selector 0 ties the binding to +// full-cert reissuance — the TLSA value changes on every cert rotation +// even when the key is unchanged. Surviving rotation (RFC 7671 §5.1's +// 3 1 1 SPKI-hash preference) would need a dedicated SPKI-hash helper, +// not CertificateFingerprint. +func TLSARecord(reg *domain.AgentRegistration) []domain.ExpectedDNSRecord { + if reg.ServerCert == nil { + return nil + } + + ports := tlsPorts(reg.Endpoints) + + records := make([]domain.ExpectedDNSRecord, 0, len(ports)) + for _, port := range ports { + records = append(records, domain.ExpectedDNSRecord{ + Name: fmt.Sprintf("_%d._tcp.%s", port, reg.FQDN()), + Type: domain.DNSRecordTLSA, + Value: fmt.Sprintf("3 0 1 %s", reg.ServerCert.Fingerprint), + Purpose: domain.PurposeCertificateBinding, + Required: false, + TTL: 3600, + }) + } + return records +} + +// tlsPorts returns the distinct TLS endpoint ports, sorted numerically +// ascending. Plaintext `http` endpoints are skipped. An empty result +// (no endpoints, or all plaintext) falls back to {443} so the cert +// binding always has a home. +func tlsPorts(endpoints []domain.AgentEndpoint) []int { + seen := make(map[int]struct{}, len(endpoints)) + for _, ep := range endpoints { + if isPlaintextHTTP(ep.AgentURL) { + continue + } + seen[svcbPortFor(ep.AgentURL)] = struct{}{} + } + if len(seen) == 0 { + return []int{443} + } + ports := make([]int, 0, len(seen)) + for p := range seen { + ports = append(ports, p) + } + sort.Ints(ports) + return ports +} + +// isPlaintextHTTP reports whether agentURL uses the plaintext `http` +// scheme (no TLS to bind). Any other scheme — including a malformed or +// schemeless URL — is treated as TLS, matching svcbPortFor's +// default-to-443 posture: a TLSA record for an endpoint we can't fully +// parse is harmless (Required=false), but skipping one would silently +// drop a cert binding. +func isPlaintextHTTP(agentURL string) bool { + if agentURL == "" { + return false + } + u, err := url.Parse(agentURL) + if err != nil { + return false + } + return u.Scheme == "http" +} diff --git a/internal/adapter/discovery/ans/tlsa_test.go b/internal/adapter/discovery/ans/tlsa_test.go new file mode 100644 index 0000000..10609f4 --- /dev/null +++ b/internal/adapter/discovery/ans/tlsa_test.go @@ -0,0 +1,142 @@ +package ans + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/godaddy/ans/internal/domain" +) + +func TestTLSARecord(t *testing.T) { + mustRegWithEndpoints := func(t *testing.T, host string, eps []domain.AgentEndpoint, cert *domain.ByocServerCertificate) *domain.AgentRegistration { + t.Helper() + v, err := domain.NewSemVer(1, 0, 0) + require.NoError(t, err) + ansName, err := domain.NewAnsName(v, host) + require.NoError(t, err) + return &domain.AgentRegistration{AnsName: ansName, Endpoints: eps, ServerCert: cert} + } + + const fp = "abcdef0123456789" + + tests := []struct { + name string + reg *domain.AgentRegistration + wantEmpty bool + // wantNamesInOrder is the exact sequence of TLSA owner names the + // record set must contain, in order. Ports are sorted numerically + // ascending, so this also pins the sort. + wantNamesInOrder []string + wantValue string // applied to every emitted record + }{ + { + name: "no_server_cert_emits_no_tlsa", + reg: mustRegWithEndpoints(t, "agent.example.com", nil, nil), + wantEmpty: true, + }, + { + // 443 fallback: ServerCert set but no endpoints carry a port. + // Exactly one record at _443._tcp. + name: "no_endpoints_falls_back_to_443", + reg: mustRegWithEndpoints(t, "agent.example.com", nil, + &domain.ByocServerCertificate{Fingerprint: fp}), + wantNamesInOrder: []string{"_443._tcp.agent.example.com"}, + wantValue: "3 0 1 " + fp, + }, + { + name: "single_https_non_443_port", + reg: mustRegWithEndpoints(t, "agent.example.com", + []domain.AgentEndpoint{ + {Protocol: domain.ProtocolA2A, AgentURL: "https://agent.example.com:8443"}, + }, + &domain.ByocServerCertificate{Fingerprint: fp}), + wantNamesInOrder: []string{"_8443._tcp.agent.example.com"}, + wantValue: "3 0 1 " + fp, + }, + { + // Numeric-not-lexical sort + dedupe. Ports 443 and 1024 sort + // 443 < 1024 numerically, but "_1024" < "_443" lexically + // (since '1' < '4'). The output MUST be _443 then _1024, + // proving the sort is by int, not by string. + name: "two_ports_443_and_1024_sort_numerically", + reg: mustRegWithEndpoints(t, "agent.example.com", + []domain.AgentEndpoint{ + {Protocol: domain.ProtocolA2A, AgentURL: "https://agent.example.com:443"}, + {Protocol: domain.ProtocolMCP, AgentURL: "https://agent.example.com:1024"}, + }, + &domain.ByocServerCertificate{Fingerprint: fp}), + wantNamesInOrder: []string{ + "_443._tcp.agent.example.com", + "_1024._tcp.agent.example.com", + }, + wantValue: "3 0 1 " + fp, + }, + { + // Duplicate ports across endpoints collapse to one record. + name: "duplicate_ports_deduped", + reg: mustRegWithEndpoints(t, "agent.example.com", + []domain.AgentEndpoint{ + {Protocol: domain.ProtocolA2A, AgentURL: "https://agent.example.com"}, + {Protocol: domain.ProtocolMCP, AgentURL: "https://agent.example.com/mcp"}, + }, + &domain.ByocServerCertificate{Fingerprint: fp}), + wantNamesInOrder: []string{"_443._tcp.agent.example.com"}, + wantValue: "3 0 1 " + fp, + }, + { + // All-plaintext (http) endpoints + ServerCert set. The cert + // binding still needs a home, so the 443 fallback fires: + // exactly one record at _443._tcp. + name: "all_plaintext_endpoints_with_cert_falls_back_to_443", + reg: mustRegWithEndpoints(t, "agent.example.com", + []domain.AgentEndpoint{ + {Protocol: domain.ProtocolA2A, AgentURL: "http://agent.example.com"}, + {Protocol: domain.ProtocolMCP, AgentURL: "http://agent.example.com:8080"}, + }, + &domain.ByocServerCertificate{Fingerprint: fp}), + wantNamesInOrder: []string{"_443._tcp.agent.example.com"}, + wantValue: "3 0 1 " + fp, + }, + { + // Boundary URLs: empty and unparseable AgentURLs are treated + // as TLS (not plaintext http), so they take svcbPortFor's + // default-443 path. Both collapse to one _443._tcp record. A + // TLSA for an endpoint we can't fully parse is harmless + // (Required=false); dropping one would silently lose a cert + // binding. + name: "empty_and_malformed_urls_treated_as_tls_default_443", + reg: mustRegWithEndpoints(t, "agent.example.com", + []domain.AgentEndpoint{ + {Protocol: domain.ProtocolA2A, AgentURL: ""}, + {Protocol: domain.ProtocolMCP, AgentURL: "://not-a-url"}, + }, + &domain.ByocServerCertificate{Fingerprint: fp}), + wantNamesInOrder: []string{"_443._tcp.agent.example.com"}, + wantValue: "3 0 1 " + fp, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := TLSARecord(tc.reg) + if tc.wantEmpty { + assert.Empty(t, got) + return + } + require.Len(t, got, len(tc.wantNamesInOrder), + "TLSA record count mismatch") + for i, r := range got { + assert.Equal(t, tc.wantNamesInOrder[i], r.Name, + "TLSA owner name / sort order mismatch at index %d", i) + assert.Equal(t, domain.DNSRecordTLSA, r.Type) + assert.Equal(t, tc.wantValue, r.Value, + "TLSA value must be `3 0 1 ` (DANE-EE, selector 0)") + assert.Equal(t, domain.PurposeCertificateBinding, r.Purpose) + assert.False(t, r.Required, + "TLSA is non-required because operator zones may not be DNSSEC-signed") + assert.Equal(t, 3600, r.TTL) + } + }) + } +} diff --git a/internal/adapter/discovery/ans/txt.go b/internal/adapter/discovery/ans/txt.go new file mode 100644 index 0000000..e8eb273 --- /dev/null +++ b/internal/adapter/discovery/ans/txt.go @@ -0,0 +1,79 @@ +package ans + +import ( + "fmt" + + "github.com/godaddy/ans/internal/domain" + "github.com/godaddy/ans/internal/port" +) + +// TXTProfile implements port.ProfileEmitter for the original `_ans` TXT +// shape (ANS_TXT). It emits one TXT row per protocol endpoint at +// `_ans.` plus an HTTPS RR at the bare FQDN (when at least one +// endpoint is present), plus the ANS-family trust records. +// +// Behavior change vs. the pre-refactor domain function: the HTTPS RR +// is now gated on `len(reg.Endpoints) > 0`. The pre-refactor code +// emitted an HTTPS RR unconditionally inside the `if emitTXT` branch, +// producing a degenerate `[HTTPS-RR-only]` record set when an +// operator selected ANS_TXT with zero endpoints — a service binding +// for a non-existent agent. The refactor folds this fix in; the PR +// description calls it out. +// +// The HTTPS RR carries `1 . alpn=h2` — service binding for HTTP/2. +// Required=false because operators on CNAME-fronted apex zones cannot +// publish this record at the same name (CNAME at @ blocks HTTPS RR +// per RFC 1034 §3.6.2); the spec does not block them on its absence. +type TXTProfile struct { + // tlPublicBaseURL feeds the family `_ans-badge` url= via BadgeRecord + // (empty falls the badge back to the agent's own endpoint URL). Set + // once by NewTXTProfile; profiles are immutable after wiring, so Records + // stays a pure function of reg. + tlPublicBaseURL string +} + +// NewTXTProfile builds an ANS_TXT profile whose family `_ans-badge` record +// points at the transparency log at tlPublicBaseURL. Empty tlPublicBaseURL +// falls the badge back to the agent's own endpoint URL. +func NewTXTProfile(tlPublicBaseURL string) TXTProfile { + return TXTProfile{tlPublicBaseURL: tlPublicBaseURL} +} + +// ID returns ANS_TXT. +func (TXTProfile) ID() domain.DiscoveryProfile { return domain.DiscoveryProfileANSTXT } + +// Records returns the `_ans` TXT rows (one per endpoint) plus the +// HTTPS RR (when at least one endpoint exists) plus the family trust +// records. +func (s TXTProfile) Records(reg *domain.AgentRegistration) []domain.ExpectedDNSRecord { + fqdn := reg.FQDN() + version := reg.AnsName.Version().String() + var records []domain.ExpectedDNSRecord + for _, ep := range reg.Endpoints { + value := fmt.Sprintf("v=ans1; version=%s; p=%s; mode=direct; url=%s", + version, protocolToANSValue(ep.Protocol), ep.AgentURL) + records = append(records, domain.ExpectedDNSRecord{ + Name: fmt.Sprintf("_ans.%s", fqdn), + Type: domain.DNSRecordTXT, + Value: value, + Purpose: domain.PurposeDiscovery, + Required: true, + TTL: 3600, + }) + } + if len(reg.Endpoints) > 0 { + records = append(records, domain.ExpectedDNSRecord{ + Name: fqdn, + Type: domain.DNSRecordHTTPS, + Value: `1 . alpn=h2`, + Purpose: domain.PurposeDiscovery, + Required: false, + TTL: 3600, + }) + } + records = append(records, BadgeRecord(reg, s.tlPublicBaseURL)...) + records = append(records, TLSARecord(reg)...) + return records +} + +var _ port.ProfileEmitter = TXTProfile{} diff --git a/internal/adapter/discovery/ans/txt_test.go b/internal/adapter/discovery/ans/txt_test.go new file mode 100644 index 0000000..d8ef2e8 --- /dev/null +++ b/internal/adapter/discovery/ans/txt_test.go @@ -0,0 +1,149 @@ +package ans + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/godaddy/ans/internal/domain" +) + +func TestTXTProfile_ID(t *testing.T) { + assert.Equal(t, domain.DiscoveryProfileANSTXT, TXTProfile{}.ID()) +} + +func TestTXTProfile_Records(t *testing.T) { + tests := []struct { + name string + eps []domain.AgentEndpoint + wantTXTRows int + wantHTTPS bool + // when wantTXTRows > 0, assertions on the first TXT row's value: + wantInValue []string + wantNotIn []string + }{ + { + name: "one_a2a_endpoint_emits_one_ans_txt_and_one_https_rr", + eps: []domain.AgentEndpoint{ + {Protocol: domain.ProtocolA2A, AgentURL: "https://agent.example.com/a2a"}, + }, + wantTXTRows: 1, + wantHTTPS: true, + wantInValue: []string{ + "v=ans1", + "version=1.0.0", + "p=a2a", + "mode=direct", + "url=https://agent.example.com/a2a", + }, + wantNotIn: []string{"v1.0.0", "1-0-0"}, + }, + { + name: "two_endpoints_emit_two_ans_txt_rows_one_https_rr", + eps: []domain.AgentEndpoint{ + {Protocol: domain.ProtocolA2A, AgentURL: "https://agent.example.com/a2a"}, + {Protocol: domain.ProtocolMCP, AgentURL: "https://agent.example.com/mcp"}, + }, + wantTXTRows: 2, + wantHTTPS: true, + }, + { + // Behavioral correction bundled in this refactor: zero + // endpoints with ANS_TXT previously emitted an HTTPS RR + // with no `_ans` TXT companions — a service binding for a + // non-existent agent. The gate on len(endpoints) > 0 + // closes that degenerate output. + name: "zero_endpoints_emits_no_records_no_https_rr", + eps: nil, + wantTXTRows: 0, + wantHTTPS: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + reg := mustReg(t, "agent.example.com", tc.eps, nil) + records := TXTProfile{}.Records(reg) + + var txtRows int + var httpsRows int + var firstTXTValue string + for _, r := range records { + if r.Type == domain.DNSRecordTXT && strings.HasPrefix(r.Name, "_ans.") { + if txtRows == 0 { + firstTXTValue = r.Value + } + txtRows++ + assert.True(t, r.Required, "_ans TXT must be Required=true") + assert.Equal(t, domain.PurposeDiscovery, r.Purpose) + assert.Equal(t, 3600, r.TTL) + } + if r.Type == domain.DNSRecordHTTPS { + httpsRows++ + assert.Equal(t, "agent.example.com", r.Name, "HTTPS RR lives at the bare FQDN") + assert.False(t, r.Required, "HTTPS RR is non-required (CNAME-at-apex precludes publishing)") + assert.Contains(t, r.Value, "alpn=h2") + } + } + assert.Equal(t, tc.wantTXTRows, txtRows, "_ans TXT row count") + if tc.wantHTTPS { + assert.Equal(t, 1, httpsRows, "exactly one HTTPS RR per registration") + } else { + assert.Zero(t, httpsRows, "zero endpoints must emit zero HTTPS RRs") + } + + for _, want := range tc.wantInValue { + assert.Contains(t, firstTXTValue, want) + } + for _, notWant := range tc.wantNotIn { + assert.NotContains(t, firstTXTValue, notWant) + } + }) + } +} + +// TestTXTProfile_RecordsIncludesFamilyTrustRecords pins that TXTProfile is +// self-contained in the same way as SVCBProfile: it emits the family +// badge and TLSA records. +func TestTXTProfile_RecordsIncludesFamilyTrustRecords(t *testing.T) { + reg := mustReg(t, "agent.example.com", + []domain.AgentEndpoint{{Protocol: domain.ProtocolA2A, AgentURL: "https://agent.example.com"}}, + &domain.ByocServerCertificate{Fingerprint: "deadbeef"}) + + records := TXTProfile{}.Records(reg) + + var sawBadge, sawTLSA bool + for _, r := range records { + if r.Purpose == domain.PurposeBadge { + sawBadge = true + } + if r.Purpose == domain.PurposeCertificateBinding { + sawTLSA = true + } + } + assert.True(t, sawBadge) + assert.True(t, sawTLSA) +} + +// TestTXTProfile_NoEndpointsSkipsAllFamilyAndDiscoveryRecords pins the +// existing-behavior contract that an empty endpoint list produces an +// empty record set when ServerCert is also nil. Zero endpoints + nil +// cert means there's nothing meaningful to publish. +func TestTXTProfile_NoEndpointsSkipsAllFamilyAndDiscoveryRecords(t *testing.T) { + reg := mustReg(t, "agent.example.com", nil, nil) + records := TXTProfile{}.Records(reg) + require.Empty(t, records) +} + +// TestTXTProfile_ZeroEndpointsWithCertOnlyEmitsTLSA pins that even with +// zero endpoints, a registration that has a server cert still gets the +// TLSA record. (The badge requires endpoints; TLSA does not.) +func TestTXTProfile_ZeroEndpointsWithCertOnlyEmitsTLSA(t *testing.T) { + reg := mustReg(t, "agent.example.com", nil, + &domain.ByocServerCertificate{Fingerprint: "abcd"}) + records := TXTProfile{}.Records(reg) + require.Len(t, records, 1) + assert.Equal(t, domain.DNSRecordTLSA, records[0].Type) +} diff --git a/internal/adapter/discovery/registry/registry.go b/internal/adapter/discovery/registry/registry.go new file mode 100644 index 0000000..c7817e8 --- /dev/null +++ b/internal/adapter/discovery/registry/registry.go @@ -0,0 +1,73 @@ +// Package registry holds the immutable composition facade for +// port.ProfileEmitter implementations. It is the bundled +// port.ProfileRegistry: cmd/ans-ra/main.go calls registry.New(...) +// with the profiles the binary should serve, and the service consumes the +// returned *Registry through the port interface. +// +// The registry itself is intentionally small (no global state, no +// init-time registration, no plug-in loading). Adding a new profile is a +// matter of registering an additional port.ProfileEmitter in +// cmd/ans-ra/main.go — the contributor walk-through lives in +// docs/contributing-discovery-profiles.md. +package registry + +import ( + "fmt" + + "github.com/godaddy/ans/internal/domain" + "github.com/godaddy/ans/internal/port" +) + +// Registry composes port.ProfileEmitter implementations by ID. Immutable +// post-construction: New stores both a lookup map (for O(1) Get) and an +// insertion-order slice (for stable IDs() iteration). Reads are safe for +// concurrent use without locking; there is no Add/Remove API. +type Registry struct { + profiles map[domain.DiscoveryProfile]port.ProfileEmitter + order []domain.DiscoveryProfile +} + +// New constructs a Registry from the given profiles in argument order. +// Returns an error when any profile's ID is invalid (per +// domain.DiscoveryProfile.IsValid) or when two profiles share the same ID — +// both are deterministic startup misconfigurations the wiring code in +// cmd/ans-ra/main.go must surface as a fail-loud server-start error, +// rather than degrading silently at the first registration. +// +// Iteration order matches argument order (stable across process restarts +// for a given wiring) so the service walker's emission order on the wire +// is determined here, not by request input. +func New(profiles ...port.ProfileEmitter) (*Registry, error) { + r := &Registry{ + profiles: make(map[domain.DiscoveryProfile]port.ProfileEmitter, len(profiles)), + order: make([]domain.DiscoveryProfile, 0, len(profiles)), + } + for _, s := range profiles { + id := s.ID() + if !id.IsValid() { + return nil, fmt.Errorf("registry: profile ID %q is not a valid DiscoveryProfile", id) + } + if _, dup := r.profiles[id]; dup { + return nil, fmt.Errorf("registry: duplicate profile ID %q", id) + } + r.profiles[id] = s + r.order = append(r.order, id) + } + return r, nil +} + +// Get returns the profile registered under id, or (nil, false) when no such +// profile is wired. Implements port.ProfileRegistry. +func (r *Registry) Get(id domain.DiscoveryProfile) (port.ProfileEmitter, bool) { + s, ok := r.profiles[id] + return s, ok +} + +// IDs returns the registered profile IDs in insertion order. The returned +// slice is a fresh copy; callers may mutate it without affecting the +// registry. Implements port.ProfileRegistry. +func (r *Registry) IDs() []domain.DiscoveryProfile { + out := make([]domain.DiscoveryProfile, len(r.order)) + copy(out, r.order) + return out +} diff --git a/internal/adapter/discovery/registry/registry_test.go b/internal/adapter/discovery/registry/registry_test.go new file mode 100644 index 0000000..0c8fe8a --- /dev/null +++ b/internal/adapter/discovery/registry/registry_test.go @@ -0,0 +1,158 @@ +package registry + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/godaddy/ans/internal/domain" + "github.com/godaddy/ans/internal/port" +) + +// fakeProfile is a minimal port.ProfileEmitter test double. ID() is the +// only behavior the registry inspects; Records() returns nil so the +// fake stays cheap to instantiate in tables. +type fakeProfile struct{ id domain.DiscoveryProfile } + +func (f fakeProfile) ID() domain.DiscoveryProfile { return f.id } +func (f fakeProfile) Records(*domain.AgentRegistration) []domain.ExpectedDNSRecord { + return nil +} + +func TestNew(t *testing.T) { + tests := []struct { + name string + profiles []port.ProfileEmitter + wantErr string // substring; empty means success expected + wantOrder []domain.DiscoveryProfile + }{ + { + name: "empty_registry_constructs", + profiles: nil, + wantOrder: []domain.DiscoveryProfile{}, + }, + { + name: "single_valid_profile", + profiles: []port.ProfileEmitter{fakeProfile{id: domain.DiscoveryProfileANSSVCB}}, + wantOrder: []domain.DiscoveryProfile{domain.DiscoveryProfileANSSVCB}, + }, + { + name: "two_valid_profiles_preserve_argument_order", + profiles: []port.ProfileEmitter{ + fakeProfile{id: domain.DiscoveryProfileANSTXT}, + fakeProfile{id: domain.DiscoveryProfileANSSVCB}, + }, + wantOrder: []domain.DiscoveryProfile{ + domain.DiscoveryProfileANSTXT, + domain.DiscoveryProfileANSSVCB, + }, + }, + { + name: "duplicate_id_rejected", + profiles: []port.ProfileEmitter{ + fakeProfile{id: domain.DiscoveryProfileANSSVCB}, + fakeProfile{id: domain.DiscoveryProfileANSSVCB}, + }, + wantErr: "duplicate profile ID", + }, + { + name: "invalid_id_rejected", + profiles: []port.ProfileEmitter{ + fakeProfile{id: domain.DiscoveryProfile("NOT_A_STYLE")}, + }, + wantErr: "is not a valid DiscoveryProfile", + }, + { + name: "invalid_id_rejected_after_valid_one", + profiles: []port.ProfileEmitter{ + fakeProfile{id: domain.DiscoveryProfileANSSVCB}, + fakeProfile{id: domain.DiscoveryProfile("")}, + }, + wantErr: "is not a valid DiscoveryProfile", + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + r, err := New(tc.profiles...) + if tc.wantErr != "" { + require.Error(t, err) + assert.Contains(t, err.Error(), tc.wantErr) + assert.Nil(t, r) + return + } + require.NoError(t, err) + require.NotNil(t, r) + assert.Equal(t, tc.wantOrder, r.IDs()) + }) + } +} + +func TestGet(t *testing.T) { + svcb := fakeProfile{id: domain.DiscoveryProfileANSSVCB} + txt := fakeProfile{id: domain.DiscoveryProfileANSTXT} + r, err := New(svcb, txt) + require.NoError(t, err) + + tests := []struct { + name string + id domain.DiscoveryProfile + wantHit bool + wantID domain.DiscoveryProfile + }{ + {name: "hit_svcb", id: domain.DiscoveryProfileANSSVCB, wantHit: true, wantID: domain.DiscoveryProfileANSSVCB}, + {name: "hit_txt", id: domain.DiscoveryProfileANSTXT, wantHit: true, wantID: domain.DiscoveryProfileANSTXT}, + {name: "miss_unknown_style", id: domain.DiscoveryProfile("UNKNOWN_FAMILY"), wantHit: false}, + {name: "miss_empty_id", id: domain.DiscoveryProfile(""), wantHit: false}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got, ok := r.Get(tc.id) + assert.Equal(t, tc.wantHit, ok) + if tc.wantHit { + require.NotNil(t, got) + assert.Equal(t, tc.wantID, got.ID()) + } else { + assert.Nil(t, got) + } + }) + } +} + +// TestIDs_ReturnsCopy pins that the slice IDs() returns is a fresh copy — +// callers mutating it must not affect the registry's internal order, so +// concurrent readers can safely iterate without coordination. +func TestIDs_ReturnsCopy(t *testing.T) { + r, err := New( + fakeProfile{id: domain.DiscoveryProfileANSTXT}, + fakeProfile{id: domain.DiscoveryProfileANSSVCB}, + ) + require.NoError(t, err) + + first := r.IDs() + first[0] = domain.DiscoveryProfile("MUTATED") + + second := r.IDs() + assert.Equal(t, []domain.DiscoveryProfile{ + domain.DiscoveryProfileANSTXT, + domain.DiscoveryProfileANSSVCB, + }, second, "mutating one IDs() result must not affect a subsequent call") +} + +// TestIDs_StableAcrossCalls pins that two consecutive IDs() calls return +// the same insertion order. Map iteration is non-deterministic in Go; +// the registry must materialize order from the order slice, not the map. +func TestIDs_StableAcrossCalls(t *testing.T) { + r, err := New( + fakeProfile{id: domain.DiscoveryProfileANSTXT}, + fakeProfile{id: domain.DiscoveryProfileANSSVCB}, + ) + require.NoError(t, err) + + for range 100 { + assert.Equal(t, []domain.DiscoveryProfile{ + domain.DiscoveryProfileANSTXT, + domain.DiscoveryProfileANSSVCB, + }, r.IDs()) + } +} diff --git a/internal/adapter/dns/dns_test.go b/internal/adapter/dns/dns_test.go index 0cb0347..f57bdcc 100644 --- a/internal/adapter/dns/dns_test.go +++ b/internal/adapter/dns/dns_test.go @@ -252,6 +252,204 @@ func TestLookupVerifier_HTTPSMatch(t *testing.T) { } } +// TestLookupVerifier_SVCB exercises the Consolidated Approach SVCB +// verifier across match, missing, and shape-mismatch paths. The match +// case tests the same presentation form the RA's profile emitters +// produce (SVCBProfile in internal/adapter/discovery/ans/svcb.go, +// composed by the service walker in internal/ra/service/dnsrecords.go). +// +// This is the Fix A acceptance gate: the RA emits the draft `wk`/ +// `cap-sha256` params in RFC 9460 §14.3.1 Private Use keyNNNNN form +// (key65280 / key65281) precisely because the named forms are +// unparseable. These cases drive live key65280/key65281 records +// through the in-process miekg/dns server — the same parser ans-dns +// and real resolvers use — and prove formatHTTPSValue renders them +// byte-identically to what parseSVCBValue expects, so the adapter's +// emitted value round-trips through a real DNS answer without any +// verifier-side normalization. +func TestLookupVerifier_SVCB(t *testing.T) { + tests := []struct { + name string + zoneName string // RR owner-name in zone fixture + zoneRR string // full RR as miekg/dns zone-file syntax + queryName string // ExpectedDNSRecord.Name + want string // ExpectedDNSRecord.Value + found bool + why string + }{ + { + name: "match", + zoneName: "agent.example.com.", + zoneRR: `agent.example.com. 3600 IN SVCB 1 . alpn=a2a port=443`, + queryName: "agent.example.com", + want: `1 . alpn=a2a port=443`, + found: true, + }, + { + name: "missing-different-name-in-zone", + zoneName: "other.example.com.", + zoneRR: `other.example.com. 3600 IN SVCB 1 . alpn=a2a`, + queryName: "agent.example.com", + want: `1 . alpn=a2a`, + found: false, + why: "SVCB must not be Found when the zone has no matching record", + }, + { + name: "alias-mode-vs-service-mode-mismatch", + zoneName: "agent.example.com.", + zoneRR: `agent.example.com. 3600 IN SVCB 0 host.provider.example.`, + queryName: "agent.example.com", + want: `1 . alpn=a2a`, + found: false, + why: "ServiceMode expectation should not match an AliasMode record", + }, + { + // RFC 9460 §8 unknown-key ignore: a live record with extra + // SvcParams (e.g. another agentic spec adding its own keys to + // the same SVCB row) must still match when our committed + // SvcParams are present with equal values. A strict-equality + // matcher would fail this and — under DNSSEC AD=true — trip + // the SVCB_DNSSEC_MISMATCH hard fail. + name: "extra-svcparams-tolerated-rfc9460-section-8", + zoneName: "agent.example.com.", + zoneRR: `agent.example.com. 3600 IN SVCB 1 . alpn=a2a port=443 mandatory=alpn`, + queryName: "agent.example.com", + want: `1 . alpn=a2a port=443`, + found: true, + why: "subset match: live record carries extra `mandatory` param, expected params still satisfied", + }, + { + // Mirror of the tolerance case to pin the missing-required- + // param failure: if the live record drops one of our + // committed SvcParams, the match must fail even though it + // shares priority+target with the expected value. + name: "missing-expected-param-fails-subset-match", + zoneName: "agent.example.com.", + zoneRR: `agent.example.com. 3600 IN SVCB 1 . alpn=a2a`, + queryName: "agent.example.com", + want: `1 . alpn=a2a port=443`, + found: false, + why: "subset match requires every expected SvcParam present in the live record", + }, + { + // Fix A acceptance gate, exact match. A live record carrying + // the keyNNNNN Private Use params the RA emits (key65280 = + // well-known suffix, key65281 = capability digest) parses + // through the miekg/dns zone fixture and matches the expected + // value verbatim. The named forms (`wk=`/`cap-sha256=`) would + // fail dns.NewRR here — that they parse at all proves the + // publishability the no-migration argument rests on. + name: "key65280-and-key65281-exact-match", + zoneName: "agent.example.com.", + zoneRR: `agent.example.com. 3600 IN SVCB 1 . alpn=a2a port=443 key65280=agent-card.json key65281=CY1lDMbSgN7kwPR0iadc8Xub-7rlMFGAbU4IQQiy_yc`, + queryName: "agent.example.com", + want: `1 . alpn=a2a port=443 key65280=agent-card.json key65281=CY1lDMbSgN7kwPR0iadc8Xub-7rlMFGAbU4IQQiy_yc`, + found: true, + why: "live keyNNNNN record must round-trip byte-symmetrically and match the RA's emitted value", + }, + { + // Coexistence (RFC 9460 §8): a live record carrying our + // key65280 plus an extra SvcParam from a coexisting spec must + // still match — the subset matcher tolerates the extra param. + name: "key65280-coexists-with-extra-svcparam", + zoneName: "agent.example.com.", + zoneRR: `agent.example.com. 3600 IN SVCB 1 . alpn=a2a port=443 key65280=agent-card.json key65282=somethingelse`, + queryName: "agent.example.com", + want: `1 . alpn=a2a port=443 key65280=agent-card.json`, + found: true, + why: "subset match: live record carries an extra keyNNNNN param, expected params still satisfied", + }, + { + // Collision: another experiment squats key65280 with a + // different value. The subset matcher requires equal values, + // so this is a clean not-found (false negative — denial of + // verification), never a false accept. This bounds the + // Private Use collision risk the svcb.go doc describes. + name: "key65280-value-collision-is-clean-not-found", + zoneName: "agent.example.com.", + zoneRR: `agent.example.com. 3600 IN SVCB 1 . alpn=a2a port=443 key65280=someone-elses-value`, + queryName: "agent.example.com", + want: `1 . alpn=a2a port=443 key65280=agent-card.json`, + found: false, + why: "a colliding key65280 with a different value must fail the value-equality check, not falsely match", + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + s := newTestServer(t) + s.add(tc.zoneName, "SVCB", tc.zoneRR) + + recs := []domain.ExpectedDNSRecord{{ + Name: tc.queryName, + Type: domain.DNSRecordSVCB, + Value: tc.want, + Required: false, + }} + got := s.verifyAgainst(t, recs) + if got[0].found != tc.found { + if tc.why != "" { + t.Error(tc.why) + } + t.Errorf("found=%v want %v; got=%+v", got[0].found, tc.found, got[0]) + } + }) + } +} + +// TestLookupVerifier_HTTPS_DNSSECFlagPropagates locks in that +// verifyHTTPS surfaces the AD bit so a DNSSEC-validated mismatch in a +// signed zone trips the lifecycle hard-fail rule (HTTPS_DNSSEC_MISMATCH) +// the same way TLSA_DNSSEC_MISMATCH does. Without this propagation the +// service layer would silently accept a rewritten HTTPS record. +func TestLookupVerifier_HTTPS_DNSSECFlagPropagates(t *testing.T) { + t.Parallel() + s := newTestServer(t) + s.setAD(true) + s.add("agent.example.com.", "HTTPS", + `agent.example.com. 3600 IN HTTPS 1 . alpn="h2"`) + + recs := []domain.ExpectedDNSRecord{{ + Name: "agent.example.com", Type: domain.DNSRecordHTTPS, + Value: `1 . alpn=h2`, + Required: false, + }} + got := s.verifyAgainst(t, recs) + if !got[0].found { + t.Errorf("HTTPS should match; got=%+v", got[0]) + } + if !got[0].dnssec { + t.Error("DNSSECVerified must surface true for HTTPS when the response carried AD=1") + } +} + +// TestLookupVerifier_SVCB_DNSSECFlagPropagates is the SVCB-side +// counterpart to the HTTPS test above. SVCB rows carry per-protocol +// service-binding parameters and the security-bearing capability +// digest (the draft cap-sha256 param, key65281 on the wire) when the +// endpoint has a MetadataHash, so the AD bit is load-bearing for the +// lifecycle SVCB_DNSSEC_MISMATCH rule. +func TestLookupVerifier_SVCB_DNSSECFlagPropagates(t *testing.T) { + t.Parallel() + s := newTestServer(t) + s.setAD(true) + s.add("agent.example.com.", "SVCB", + `agent.example.com. 3600 IN SVCB 1 . alpn=a2a port=443`) + + recs := []domain.ExpectedDNSRecord{{ + Name: "agent.example.com", Type: domain.DNSRecordSVCB, + Value: `1 . alpn=a2a port=443`, + Required: false, + }} + got := s.verifyAgainst(t, recs) + if !got[0].found { + t.Errorf("SVCB should match; got=%+v", got[0]) + } + if !got[0].dnssec { + t.Error("DNSSECVerified must surface true for SVCB when the response carried AD=1") + } +} + func TestLookupVerifier_NXDOMAINSurfacedAsError(t *testing.T) { t.Parallel() s := newTestServer(t) diff --git a/internal/adapter/dns/lookup.go b/internal/adapter/dns/lookup.go index ed5cc33..fd39a38 100644 --- a/internal/adapter/dns/lookup.go +++ b/internal/adapter/dns/lookup.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "net" + "strconv" "strings" "time" @@ -86,6 +87,8 @@ func (v *LookupVerifier) VerifyRecords( r = v.verifyTLSA(lookupCtx, server, rec) case domain.DNSRecordHTTPS: r = v.verifyHTTPS(lookupCtx, server, rec) + case domain.DNSRecordSVCB: + r = v.verifySVCB(lookupCtx, server, rec) default: r.Error = fmt.Sprintf("unsupported record type: %s", rec.Type) } @@ -211,6 +214,13 @@ func (v *LookupVerifier) verifyTLSA(ctx context.Context, server string, rec doma // verifyHTTPS checks for an HTTPS-type record (RFC 9460). Matching // compares the SvcPriority + TargetName + params text verbatim // against the expected value after whitespace normalization. +// +// Captures the DNSSEC AuthenticatedData bit on the response, mirroring +// verifyTLSA and verifySVCB. The service-layer post-verify rule +// (lifecycle.go verifyDNSRecords) treats a DNSSEC-authenticated HTTPS +// record whose value disagrees with the expected one as a hard fail +// — same threat shape as TLSA: an attacker rewrote a record in a +// signed zone. func (v *LookupVerifier) verifyHTTPS(ctx context.Context, server string, rec domain.ExpectedDNSRecord) port.RecordVerification { r := port.RecordVerification{Record: rec} resp, err := v.exchange(ctx, server, rec.Name, dns.TypeHTTPS) @@ -222,6 +232,7 @@ func (v *LookupVerifier) verifyHTTPS(ctx context.Context, server string, rec dom r.Error = fmt.Sprintf("rcode %s", dns.RcodeToString[resp.Rcode]) return r } + r.DNSSECVerified = resp.AuthenticatedData wantNorm := normalizeHTTPS(rec.Value) for _, rr := range resp.Answer { https, ok := rr.(*dns.HTTPS) @@ -253,6 +264,127 @@ func formatHTTPSValue(s *dns.SVCB) string { return sb.String() } +// verifySVCB checks for a Consolidated Approach SVCB record (RFC 9460) +// at the agent's bare FQDN. Multiple SVCB records can share one RRset +// name distinguished by alpn, and the Consolidated Approach explicitly +// designs for multi-family coexistence in a single record — sibling +// families can share one SVCB row, distinguished by their own +// SvcParamKeys. Verification therefore implements RFC 9460 §8 +// unknown-key ignore semantics as a *subset* match: priority and +// target must equal the expected value exactly, every expected +// SvcParam must be present in the live record with an equal value, +// and additional SvcParams in the live record are tolerated. +// +// A strict-equality matcher would mark a multi-spec record not-found +// and (in a DNSSEC-signed zone) trip the SVCB_DNSSEC_MISMATCH hard +// fail in the lifecycle layer — defeating the entire point of the +// Consolidated Approach. +func (v *LookupVerifier) verifySVCB(ctx context.Context, server string, rec domain.ExpectedDNSRecord) port.RecordVerification { + r := port.RecordVerification{Record: rec} + resp, err := v.exchange(ctx, server, rec.Name, dns.TypeSVCB) + if err != nil { + r.Error = err.Error() + return r + } + if resp.Rcode != dns.RcodeSuccess { + r.Error = fmt.Sprintf("rcode %s", dns.RcodeToString[resp.Rcode]) + return r + } + r.DNSSECVerified = resp.AuthenticatedData + + expected, err := parseSVCBValue(rec.Value) + if err != nil { + r.Error = fmt.Sprintf("expected SVCB value: %v", err) + return r + } + for _, rr := range resp.Answer { + svcb, ok := rr.(*dns.SVCB) + if !ok { + continue + } + gotStr := formatHTTPSValue(svcb) + if r.Actual == "" { + r.Actual = gotStr + } + actual, err := parseSVCBValue(gotStr) + if err != nil { + // Skip records we can't parse — they'll surface as + // not-found if no other answer matches. + continue + } + if matchesSVCBSubset(expected, actual) { + r.Found = true + r.Actual = gotStr + return r + } + } + return r +} + +// parsedSVCB is the structured form of an SVCB or HTTPS record's +// presentation value: priority, target, and a SvcParam map. Used for +// RFC 9460 §8-compliant subset matching in verifySVCB so that live +// records carrying extra SvcParams from coexisting specs aren't +// treated as mismatches. +type parsedSVCB struct { + priority int + target string + params map[string]string +} + +// parseSVCBValue parses the presentation form +// " [k=v] [k=v] ..." that formatHTTPSValue emits +// (and that ComputeRequiredDNSRecords stores in +// ExpectedDNSRecord.Value). Whitespace inside SvcParam values is not +// supported because neither side emits it. +func parseSVCBValue(s string) (parsedSVCB, error) { + fields := strings.Fields(s) + if len(fields) < 2 { + return parsedSVCB{}, fmt.Errorf("svcb: too few fields in %q", s) + } + priority, err := strconv.Atoi(fields[0]) + if err != nil { + return parsedSVCB{}, fmt.Errorf("svcb: priority %q: %w", fields[0], err) + } + out := parsedSVCB{ + priority: priority, + target: fields[1], + params: make(map[string]string, len(fields)-2), + } + for _, f := range fields[2:] { + eq := strings.IndexByte(f, '=') + if eq < 0 { + // Valueless SvcParamKey (e.g. `no-default-alpn`); store + // with empty value so an expected entry can still match. + out.params[f] = "" + continue + } + out.params[f[:eq]] = f[eq+1:] + } + return out, nil +} + +// matchesSVCBSubset reports whether `actual` carries all SvcParams +// in `expected` (with equal values), tolerating any additional +// SvcParams in `actual`. Priority and target must match exactly. +// +// This is the verifier-side embodiment of RFC 9460 §8 unknown-key +// ignore semantics: the RA only verifies the SvcParams it committed +// to write; SvcParams from other agentic specs sharing the same +// SVCB row pass through unexamined. +func matchesSVCBSubset(expected, actual parsedSVCB) bool { + if expected.priority != actual.priority || expected.target != actual.target { + return false + } + for k, want := range expected.params { + got, ok := actual.params[k] + if !ok || got != want { + return false + } + } + return true +} + // normalizeTLSA collapses whitespace and lowercases the hex so // "3 1 1 abcd..." matches "3 1 1 ABCD...". func normalizeTLSA(s string) string { diff --git a/internal/adapter/docsui/openapi/ra.yaml b/internal/adapter/docsui/openapi/ra.yaml index 9ad2749..24f89dd 100644 --- a/internal/adapter/docsui/openapi/ra.yaml +++ b/internal/adapter/docsui/openapi/ra.yaml @@ -1023,6 +1023,49 @@ components: type: string enum: [A2A, MCP, HTTP-API] + DiscoveryProfile: + type: string + enum: [ANS_SVCB, ANS_TXT] + description: | + Names one DNS record family the RA can emit for an agent + registration. Used as the element type of discoveryProfiles[]. + Each value provisions a complete record set, not a single + record — the bullets below enumerate exactly what an operator + must publish for that profile. + + - ANS_SVCB: the Consolidated Approach (RFC 9460), recommended + default for new integrations. Provisions: + 1. One SVCB row per protocol endpoint at the bare FQDN + (ServiceMode `1 .`), carrying `alpn` (the protocol + token: a2a / mcp / http-api), `port` (the endpoint's + TLS port), `key65280` (the well-known path suffix, e.g. + agent-card.json for A2A; omitted for protocols with no + metadata-file convention), and `key65281` (the + base64url capability digest, present only when the + endpoint declared a metaDataHash). `key65280` and + `key65281` are the RFC 9460 §14.3.1 Private Use + presentation of the draft `wk` / `cap-sha256` SvcParams, + which have no IANA code point yet; the named forms are + unpublishable (strict RFC 9460 parsers reject them), so + the keyNNNNN forms are what reaches DNS. They switch + back to named forms if/when IANA registers the keys. + 2. One `_ans-badge` TXT at `_ans-badge.{fqdn}` (the + transparency-log discovery hint). + 3. One TLSA per distinct TLS port at `_._tcp.{fqdn}` + binding the server certificate (DANE-EE, full cert, + SHA-256), emitted only when a server certificate exists. + - ANS_TXT: original `_ans` TXT shape (one row per protocol at + `_ans.{fqdn}`), supported indefinitely for operators with + existing zone-edit tooling that targets `_ans.{fqdn}`. Emits + an HTTPS RR at the bare FQDN alongside carrying only + `alpn=h2` (an IANA-registered SvcParam — no keyNNNNN form is + needed here; the asymmetry with ANS_SVCB is intentional), + since `_ans` TXT carries no connection hints. The HTTPS RR is + best-effort: operators on CNAME-fronted apexes cannot publish + a record at that name (RFC 1034 §3.6.2) and verification does + not require it. Provisions the same `_ans-badge` TXT and + per-port TLSA records as ANS_SVCB. + RevocationReason: type: string enum: @@ -1070,11 +1113,42 @@ components: from this CSR at verify-acme. When omitted, the agent registers without an identity certificate and cannot add one later — it must register a new version instead. + discoveryProfiles: + type: array + items: + $ref: '#/components/schemas/DiscoveryProfile' + uniqueItems: true + minItems: 1 + description: | + Set of DNS record families the RA tells the operator to + publish and emits in the AGENT_REGISTERED TL event's + attestations.dnsRecordsProvisioned[]. The computed records + surface to the client on GET /v2/ans/agents/{agentId} as + registrationPending.dnsRecords[] once the agent reaches + PENDING_DNS — not on the 202 register response, which + returns only the ACME challenge (production records are + deferred until verify-acme proves domain control and issues + the certificates the TLSA binding depends on). + + Each value names one record family; an operator publishing + the union (Consolidated Approach SVCB plus the original + `_ans` TXT shape) sends both. Order is not significant. + + Omitted normalizes to ["ANS_SVCB"] server-side (the + recommended default per RFC 9460). The `minItems`/ + `uniqueItems` schema constraints are the canonical client + contract — validate before sending. A non-conformant + request (an explicit empty array, or duplicate values) is + handled defensively rather than rejected: empty is treated + as omitted and duplicates are ignored, but conformant + clients never send either. + example: ["ANS_SVCB"] required: - agentDisplayName - - version - agentHost + - discoveryProfiles - endpoints + - version AgentRevocationRequest: type: object @@ -1287,7 +1361,7 @@ components: type: string type: type: string - enum: [HTTPS, TLSA, TXT] + enum: [HTTPS, SVCB, TLSA, TXT] value: type: string priority: @@ -1584,5 +1658,10 @@ components: $ref: '#/components/schemas/DnsRecord' found: type: string + description: | + The live record value observed when a record exists + but does not match the required value. expected: - type: string \ No newline at end of file + type: string + description: | + The required value; equals record.value. \ No newline at end of file diff --git a/internal/adapter/store/sqlite/agent.go b/internal/adapter/store/sqlite/agent.go index a06be3f..1986ca8 100644 --- a/internal/adapter/store/sqlite/agent.go +++ b/internal/adapter/store/sqlite/agent.go @@ -39,6 +39,7 @@ type agentRow struct { SupersedesRegistrationID sql.NullInt64 `db:"supersedes_registration_id"` ACMEDNS01Token sql.NullString `db:"acme_dns01_token"` ACMEChallengeExpiresAtMs sql.NullInt64 `db:"acme_challenge_expires_at_ms"` + DiscoveryProfiles sql.NullString `db:"discovery_profiles"` CreatedAtMs int64 `db:"created_at_ms"` UpdatedAtMs int64 `db:"updated_at_ms"` } @@ -73,9 +74,58 @@ func (r agentRow) toDomain() (*domain.AgentRegistration, error) { if r.ACMEChallengeExpiresAtMs.Valid { reg.ACMEChallenge.ExpiresAt = msToTime(r.ACMEChallengeExpiresAtMs.Int64) } + if r.DiscoveryProfiles.Valid && r.DiscoveryProfiles.String != "" { + profiles, err := decodeDiscoveryProfiles(r.DiscoveryProfiles.String) + if err != nil { + return nil, fmt.Errorf("sqlite: decode discovery_profiles: %w", err) + } + reg.DiscoveryProfiles = profiles + } return reg, nil } +// decodeDiscoveryProfiles parses the JSON-array string stored in +// agent_registrations.discovery_profiles into the typed domain slice. +// Empty array unmarshals to a nil slice (the domain layer treats +// empty as "use default") so post-load behavior matches a freshly +// registered agent that didn't set the field. +func decodeDiscoveryProfiles(raw string) ([]domain.DiscoveryProfile, error) { + var strs []string + if err := json.Unmarshal([]byte(raw), &strs); err != nil { + return nil, err + } + if len(strs) == 0 { + return nil, nil + } + out := make([]domain.DiscoveryProfile, len(strs)) + for i, s := range strs { + out[i] = domain.DiscoveryProfile(s) + } + return out, nil +} + +// encodeDiscoveryProfiles renders a typed profile slice as the canonical +// JSON-array string the agent_registrations.discovery_profiles column +// stores. nil/empty input renders empty string so nullableString() +// stamps SQL NULL — domain treats NULL the same as the default set +// per ComputeRequiredDNSRecords. +func encodeDiscoveryProfiles(profiles []domain.DiscoveryProfile) string { + if len(profiles) == 0 { + return "" + } + strs := make([]string, len(profiles)) + for i, s := range profiles { + strs[i] = string(s) + } + b, err := json.Marshal(strs) + if err != nil { + // Marshalling a []string never errors in practice; surface as + // empty so the column is NULL rather than corrupted JSON. + return "" + } + return string(b) +} + // Save inserts or updates an AgentRegistration. Endpoints, server cert, // and identity CSR are persisted via their dedicated tables — Save only // writes the root aggregate row. @@ -93,8 +143,9 @@ func (s *AgentStore) Save(ctx context.Context, agent *domain.AgentRegistration) registration_timestamp_ms, last_renewal_timestamp_ms, supersedes_registration_id, acme_dns01_token, acme_challenge_expires_at_ms, + discovery_profiles, created_at_ms, updated_at_ms - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` res, err := s.db.extx(ctx).ExecContext(ctx, q, agent.AgentID, agent.OwnerID, @@ -109,6 +160,7 @@ func (s *AgentStore) Save(ctx context.Context, agent *domain.AgentRegistration) nullableInt64(agent.SupersedesRegistrationID), nullableString(agent.ACMEChallenge.DNS01Token), nullableMs(agent.ACMEChallenge.ExpiresAt), + nullableString(encodeDiscoveryProfiles(agent.DiscoveryProfiles)), now, now, ) if err != nil { @@ -131,6 +183,7 @@ func (s *AgentStore) Save(ctx context.Context, agent *domain.AgentRegistration) supersedes_registration_id = ?, acme_dns01_token = ?, acme_challenge_expires_at_ms = ?, + discovery_profiles = ?, updated_at_ms = ? WHERE id = ?` _, err := s.db.extx(ctx).ExecContext(ctx, q, @@ -141,6 +194,7 @@ func (s *AgentStore) Save(ctx context.Context, agent *domain.AgentRegistration) nullableInt64(agent.SupersedesRegistrationID), nullableString(agent.ACMEChallenge.DNS01Token), nullableMs(agent.ACMEChallenge.ExpiresAt), + nullableString(encodeDiscoveryProfiles(agent.DiscoveryProfiles)), now, agent.ID, ) diff --git a/internal/adapter/store/sqlite/agent_test.go b/internal/adapter/store/sqlite/agent_test.go index 5915bbf..dbacb5a 100644 --- a/internal/adapter/store/sqlite/agent_test.go +++ b/internal/adapter/store/sqlite/agent_test.go @@ -3,6 +3,7 @@ package sqlite import ( "context" "errors" + "fmt" "testing" "time" @@ -92,6 +93,65 @@ func TestAgentStore_Save_NilAgent(t *testing.T) { } } +// TestAgentStore_DiscoveryProfilesRoundTrip pins the discovery_profiles +// JSON-array column codec (encodeDiscoveryProfiles/decodeDiscoveryProfiles) +// through a real Save → FindByID cycle — the only direct exercise of the +// column added by migration 006. +func TestAgentStore_DiscoveryProfilesRoundTrip(t *testing.T) { + db := newTestDB(t) + store := NewAgentStore(db) + ctx := context.Background() + + tests := []struct { + name string + profiles []domain.DiscoveryProfile + want []domain.DiscoveryProfile + }{ + { + name: "svcb_only", + profiles: []domain.DiscoveryProfile{domain.DiscoveryProfileANSSVCB}, + want: []domain.DiscoveryProfile{domain.DiscoveryProfileANSSVCB}, + }, + { + name: "union_preserves_order", + profiles: []domain.DiscoveryProfile{ + domain.DiscoveryProfileANSSVCB, domain.DiscoveryProfileANSTXT, + }, + want: []domain.DiscoveryProfile{ + domain.DiscoveryProfileANSSVCB, domain.DiscoveryProfileANSTXT, + }, + }, + { + name: "empty_round_trips_as_empty", + profiles: nil, + want: nil, + }, + } + + for i, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + agent := newAgentFixture(t, fmt.Sprintf("agent-profiles-%d", i), fmt.Sprintf("p%d.example.com", i)) + agent.DiscoveryProfiles = tc.profiles + if err := store.Save(ctx, agent); err != nil { + t.Fatalf("save: %v", err) + } + found, err := store.FindByID(ctx, agent.ID) + if err != nil { + t.Fatalf("FindByID: %v", err) + } + if len(found.DiscoveryProfiles) != len(tc.want) { + t.Fatalf("profiles length: got %d (%v), want %d (%v)", + len(found.DiscoveryProfiles), found.DiscoveryProfiles, len(tc.want), tc.want) + } + for j, p := range tc.want { + if found.DiscoveryProfiles[j] != p { + t.Errorf("profiles[%d]: got %q, want %q", j, found.DiscoveryProfiles[j], p) + } + } + }) + } +} + func TestAgentStore_Save_UniqueAnsNameViolation(t *testing.T) { store := NewAgentStore(newTestDB(t)) ctx := context.Background() diff --git a/internal/adapter/store/sqlite/migrations/006_agent_discovery_profiles.sql b/internal/adapter/store/sqlite/migrations/006_agent_discovery_profiles.sql new file mode 100644 index 0000000..00b1ae6 --- /dev/null +++ b/internal/adapter/store/sqlite/migrations/006_agent_discovery_profiles.sql @@ -0,0 +1,40 @@ +-- 006_agent_discovery_profiles.sql +-- Persist the operator's chosen set of DNS record families on the +-- registration row so verify-acme / verify-dns / badge responses +-- carry the same shape the operator chose at registration time. +-- +-- Stored as a JSON array of CONSTANT_CASE strings matching the V2 +-- register schema's DiscoveryProfile enum: +-- "ANS_SVCB" — Consolidated Approach SVCB rows + shared records +-- (RFC 9460; recommended default). +-- "ANS_TXT" — original `_ans` TXT shape + HTTPS RR + shared +-- records. Supported indefinitely for operators with +-- existing zone-edit tooling targeting `_ans.{fqdn}`. +-- +-- Examples: +-- '["ANS_SVCB"]' — default for new V2 registrations +-- '["ANS_TXT"]' — V1 lane + pre-PR rows +-- '["ANS_SVCB","ANS_TXT"]' — §4.4.2 transition union +-- +-- Nullable to allow rows that pre-date this migration to load. The +-- backfill below sets every such row to ["ANS_TXT"] because every +-- agent registered before this PR shipped received the original +-- `_ans` TXT shape — defaulting them to ["ANS_SVCB"] would silently +-- demand SVCB records they were never told to publish. CHECK uses +-- json_valid() (SQLite JSON1) so a malformed array fails at the +-- storage boundary instead of silently coercing in the domain. +-- Element-level validation lives in the service layer, where the +-- INVALID_DISCOVERY_PROFILE error is raised before the row is written. + +ALTER TABLE agent_registrations + ADD COLUMN discovery_profiles TEXT + CHECK (discovery_profiles IS NULL OR json_valid(discovery_profiles)); + +-- Backfill: every row registered before this migration shipped was +-- emitting the legacy `_ans` TXT shape (the only shape pre-PR-13). +-- Stamp them as ["ANS_TXT"] so post-deploy verify-dns calls demand +-- the record family the operator actually published. New rows get +-- the value written explicitly by applyDiscoveryProfiles in the service. +UPDATE agent_registrations + SET discovery_profiles = '["ANS_TXT"]' + WHERE discovery_profiles IS NULL; diff --git a/internal/domain/agent.go b/internal/domain/agent.go index 7f20a60..333f2f2 100644 --- a/internal/domain/agent.go +++ b/internal/domain/agent.go @@ -103,6 +103,14 @@ type AgentRegistration struct { // deviation. ACMEChallenge ACMEChallenge `json:"acmeChallenge,omitzero"` + // DiscoveryProfiles is the set of DNS record families the RA emits + // for this registration. Each value names one family — typically + // {ANS_SVCB} (Consolidated Approach), {ANS_TXT} (original `_ans` + // TXT shape), or the {ANS_SVCB, ANS_TXT} transition union. Empty + // at the domain layer is treated as DefaultDiscoveryProfiles() by + // the service-layer record walker (internal/ra/service/dnsrecords.go). + DiscoveryProfiles []DiscoveryProfile `json:"discoveryProfiles,omitempty"` + // PendingEvents holds domain events raised during this aggregate operation. // They are cleared after being published. PendingEvents []Event `json:"-"` diff --git a/internal/domain/dnsrecords.go b/internal/domain/dnsrecords.go index 4f0cb81..2d83a79 100644 --- a/internal/domain/dnsrecords.go +++ b/internal/domain/dnsrecords.go @@ -1,10 +1,74 @@ package domain -import ( - "fmt" - "net/url" +// DiscoveryProfile names one DNS record family the RA can emit for an +// agent registration. A registration carries a *set* of profiles +// (AgentRegistration.DiscoveryProfiles); operators publishing the union +// during a Consolidated Approach transition include both ANS_SVCB and +// ANS_TXT in the same set. +// +// Wire values are CONSTANT_CASE, matching every other enum on the V2 +// register schema (Protocol, RevocationReason, AgentLifecycleStatus, +// NextStep.action, ChallengeInfo.type, DnsRecord.type, etc.). The +// `ANS_` prefix anchors the namespace so a future second agentic spec +// adding its own SVCB family doesn't collide. +type DiscoveryProfile string + +const ( + // DiscoveryProfileANSSVCB emits Consolidated Approach SVCB records per + // RFC 9460 — one row per protocol at the bare FQDN, carrying alpn, + // port, key65280 (well-known suffix), and (when the endpoint has a + // MetadataHash) key65281 (capability digest) SvcParams. key65280/ + // key65281 are the RFC 9460 §14.3.1 Private Use presentation of the + // DNS-AID draft's wk/cap-sha256 params, which have no IANA code point; + // the adapter (internal/adapter/discovery/ans/svcb.go) documents why + // the named forms are unpublishable. + DiscoveryProfileANSSVCB DiscoveryProfile = "ANS_SVCB" + + // DiscoveryProfileANSTXT emits the original `_ans` TXT shape — one row + // per protocol at `_ans.{fqdn}`. Supported indefinitely for + // operators with existing zone-edit tooling that targets `_ans.`. + // Includes an HTTPS RR at the bare FQDN since `_ans` TXT carries + // no connection hints. + DiscoveryProfileANSTXT DiscoveryProfile = "ANS_TXT" ) +// DefaultDiscoveryProfiles is the set applied when the registration +// request omits discoveryProfiles entirely. Pinned to {ANS_SVCB} so new +// integrations follow §4.4.2's "publish one SVCB record... rather than +// parallel per-ecosystem record trees" SHOULD by default. Returned as a +// fresh slice so callers can mutate without affecting the canonical set. +func DefaultDiscoveryProfiles() []DiscoveryProfile { + return []DiscoveryProfile{DiscoveryProfileANSSVCB} +} + +// IsValid reports whether s is one of the defined profiles. Empty +// string is treated as invalid; callers normalize empty/missing +// discoveryProfiles to DefaultDiscoveryProfiles() before validation. +// +// Coherence with the discovery registry is enforced at server start: +// cmd/ans-ra/main.go asserts that every profile in +// ValidDiscoveryProfiles() has a registered port.ProfileEmitter adapter +// and vice versa. Drift fails server start, not the first verify-dns +// call. +func (s DiscoveryProfile) IsValid() bool { + switch s { + case DiscoveryProfileANSSVCB, DiscoveryProfileANSTXT: + return true + } + return false +} + +// ValidDiscoveryProfiles returns the canonical valid set as strings — +// the single source of truth for enum membership. Used by error +// messages and spec generation tooling so adding a third profile is a +// one-place change rather than a shotgun edit. +func ValidDiscoveryProfiles() []string { + return []string{ + string(DiscoveryProfileANSSVCB), + string(DiscoveryProfileANSTXT), + } +} + // DNSRecordType represents a DNS record type. type DNSRecordType string @@ -12,6 +76,13 @@ const ( DNSRecordTXT DNSRecordType = "TXT" DNSRecordTLSA DNSRecordType = "TLSA" DNSRecordHTTPS DNSRecordType = "HTTPS" + // DNSRecordSVCB is the "Consolidated Approach" service binding + // record (RFC 9460) emitted at the agent's bare FQDN. One SVCB + // record per protocol carries that protocol's connection hints and + // capability locators in a single DNS lookup. SvcParams from + // sibling families coexist in the same record per RFC 9460 §8 + // unknown-key ignore semantics. + DNSRecordSVCB DNSRecordType = "SVCB" ) // DNSRecordPurpose describes why a DNS record is needed. @@ -33,102 +104,3 @@ type ExpectedDNSRecord struct { Required bool `json:"required"` TTL int `json:"ttl"` } - -// ComputeRequiredDNSRecords generates the DNS records an operator must create -// for a given agent registration. The RA does not create these records — the -// operator manages their own DNS. The RA only verifies they exist. -// -// tlPublicBaseURL is the externally-reachable Transparency Log URL used in -// the _ans-badge record (e.g. "https://tl.example.org"). When non-empty the -// badge url= field points to the TL badge endpoint for this agent; when -// empty it falls back to the agent's own endpoint URL. -func ComputeRequiredDNSRecords(reg *AgentRegistration, tlPublicBaseURL string) []ExpectedDNSRecord { - fqdn := reg.FQDN() - // Version is emitted as a bare semver string ("1.2.0"). The - // `v`-prefixed form only appears inside the ANS name's hostname - // label — TXT record payloads carry the machine-readable semver - // directly, matching the shape a client would parse with any - // semver library. - version := reg.AnsName.Version().String() - var records []ExpectedDNSRecord - - // _ans TXT record for each protocol endpoint — agent discovery. - for _, ep := range reg.Endpoints { - value := fmt.Sprintf("v=ans1; version=%s; p=%s; mode=direct; url=%s", - version, protocolToANSValue(ep.Protocol), ep.AgentURL) - records = append(records, ExpectedDNSRecord{ - Name: fmt.Sprintf("_ans.%s", fqdn), - Type: DNSRecordTXT, - Value: value, - Purpose: PurposeDiscovery, - Required: true, - TTL: 3600, - }) - } - - // _ans-badge TXT record — trust badge. Required alongside _ans: - // resolvers and badge-verifying clients expect to find both, and - // publishing _ans without _ans-badge would advertise an agent - // that fails the public discovery handshake. - if len(reg.Endpoints) > 0 { - badgeURL := reg.Endpoints[0].AgentURL - if tlPublicBaseURL != "" && reg.AgentID != "" { - // tlPublicBaseURL is validated at config load (https, no - // query/fragment/userinfo), so JoinPath cannot fail here. - badgeURL, _ = url.JoinPath(tlPublicBaseURL, "v1", "agents", reg.AgentID) - } - badgeValue := fmt.Sprintf("v=ans-badge1; version=%s; url=%s", - version, badgeURL) - records = append(records, ExpectedDNSRecord{ - Name: fmt.Sprintf("_ans-badge.%s", fqdn), - Type: DNSRecordTXT, - Value: badgeValue, - Purpose: PurposeBadge, - Required: true, - TTL: 3600, - }) - } - - // TLSA record for certificate binding. Every registration has a - // server cert — either BYOC (operator-submitted) or CSR-signed - // (RA issues via its configured `ServerCertificateAuthority`). - // Both paths land through the same ByocServerCertificate struct, - // so `reg.ServerCert` is set for any registration that's reached - // verify-dns. - // - // `3 1 1 ` = DANE-EE + SubjectPublicKeyInfo + SHA-256 - // (RFC 6698). Required=false: operators whose zones aren't - // DNSSEC-signed can't produce a trustworthy TLSA record, so the - // RA doesn't block verify-dns on its presence. The verify layer - // enforces a stricter rule at query time: when a TLSA response - // IS DNSSEC-validated, its value must match the expected - // fingerprint (otherwise an attacker rewrote the record in a - // signed zone — the worst failure mode). That post-verify - // check lives alongside the verifier, not in the record set. - if reg.ServerCert == nil { - return records - } - records = append(records, ExpectedDNSRecord{ - Name: fmt.Sprintf("_443._tcp.%s", fqdn), - Type: DNSRecordTLSA, - Value: fmt.Sprintf("3 1 1 %s", reg.ServerCert.Fingerprint), - Purpose: PurposeCertificateBinding, - Required: false, - TTL: 3600, - }) - - return records -} - -func protocolToANSValue(p Protocol) string { - switch p { - case ProtocolA2A: - return "a2a" - case ProtocolMCP: - return "mcp" - case ProtocolHTTPAPI: - return "http-api" - default: - return string(p) - } -} diff --git a/internal/domain/dnsrecords_test.go b/internal/domain/dnsrecords_test.go index d5cd812..03803bc 100644 --- a/internal/domain/dnsrecords_test.go +++ b/internal/domain/dnsrecords_test.go @@ -1,139 +1,47 @@ package domain import ( - "strings" "testing" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" ) -func TestComputeRequiredDNSRecords_WithoutCert(t *testing.T) { - ansName, _ := NewAnsName(mustSemVer(1, 2, 3), "agent.example.com") - reg := &AgentRegistration{ - AnsName: ansName, - Endpoints: []AgentEndpoint{ - {Protocol: ProtocolMCP, AgentURL: "https://agent.example.com/mcp"}, - {Protocol: ProtocolA2A, AgentURL: "https://agent.example.com/a2a"}, - }, - } - - records := ComputeRequiredDNSRecords(reg, "") - require.NotEmpty(t, records) - - // 2 endpoints → 2 _ans TXT records + 1 badge record. - var anxCount, badgeCount, tlsaCount int - for _, r := range records { - switch r.Purpose { - case PurposeDiscovery: - anxCount++ - assert.Equal(t, DNSRecordTXT, r.Type) - assert.True(t, strings.HasPrefix(r.Name, "_ans.")) - assert.True(t, r.Required) - assert.Contains(t, r.Value, "v=ans1") - // Version is bare semver, not DNS-label form — TXT - // payloads carry the machine-parseable semver directly. - assert.Contains(t, r.Value, "version=1.2.3") - assert.NotContains(t, r.Value, "v1.2.3", "no v-prefix in TXT payload") - assert.NotContains(t, r.Value, "1-2-3", "no dash form anywhere") - case PurposeBadge: - badgeCount++ - assert.Equal(t, DNSRecordTXT, r.Type) - assert.True(t, strings.HasPrefix(r.Name, "_ans-badge.")) - // Both _ans and _ans-badge are required: badge-verifying - // clients won't trust an agent that publishes _ans alone. - assert.True(t, r.Required) - case PurposeCertificateBinding: - tlsaCount++ - } - } - - assert.Equal(t, 2, anxCount) - assert.Equal(t, 1, badgeCount) - assert.Equal(t, 0, tlsaCount, "no cert → no TLSA record") -} - -func TestComputeRequiredDNSRecords_WithCert(t *testing.T) { - ansName, _ := NewAnsName(mustSemVer(1, 0, 0), "agent.example.com") - reg := &AgentRegistration{ - AnsName: ansName, - Endpoints: []AgentEndpoint{ - {Protocol: ProtocolMCP, AgentURL: "https://agent.example.com/mcp"}, - }, - ServerCert: &ByocServerCertificate{Fingerprint: "abcdef"}, - } - - records := ComputeRequiredDNSRecords(reg, "") - - var tlsaFound bool - for _, r := range records { - if r.Purpose == PurposeCertificateBinding { - tlsaFound = true - assert.Equal(t, DNSRecordTLSA, r.Type) - assert.Contains(t, r.Name, "_443._tcp.") - assert.Contains(t, r.Value, "abcdef") - // Required=false: TLSA is only meaningful when the - // operator's zone is DNSSEC-signed, which is a runtime - // property the domain layer can't know. The verifier - // enforces a post-verify rule: if the TLSA response was - // DNSSEC-validated, its value must match. See - // RegistrationService.verifyDNSRecords. - assert.False(t, r.Required) - } - } - assert.True(t, tlsaFound) -} - -func TestComputeRequiredDNSRecords_NoEndpoints(t *testing.T) { - ansName, _ := NewAnsName(mustSemVer(1, 0, 0), "agent.example.com") - reg := &AgentRegistration{AnsName: ansName} - records := ComputeRequiredDNSRecords(reg, "") - assert.Empty(t, records) +// TestValidDiscoveryProfiles pins the canonical valid set of +// DiscoveryProfile values returned by the helper used in the V2 +// INVALID_DISCOVERY_PROFILE error message and (eventually) by spec +// generation tooling. Order and contents are stable so an external +// client's error-message fixtures can match. +func TestValidDiscoveryProfiles(t *testing.T) { + got := ValidDiscoveryProfiles() + want := []string{"ANS_SVCB", "ANS_TXT"} + assert.Equal(t, want, got) } -func TestComputeRequiredDNSRecords_BadgeURLPointsToTL(t *testing.T) { - ansName, _ := NewAnsName(mustSemVer(1, 0, 0), "agent.example.com") - reg := &AgentRegistration{ - AgentID: "test-agent-id", - AnsName: ansName, - Endpoints: []AgentEndpoint{ - {Protocol: ProtocolMCP, AgentURL: "https://agent.example.com/mcp"}, - }, - } - - records := ComputeRequiredDNSRecords(reg, "https://tl.example.org") - for _, r := range records { - if r.Purpose == PurposeBadge { - assert.Contains(t, r.Value, "url=https://tl.example.org/v1/agents/test-agent-id") - assert.NotContains(t, r.Value, "agent.example.com/mcp") - return - } - } - t.Fatal("no badge record found") +// TestDefaultDiscoveryProfiles pins the default set applied when a V2 +// register request omits discoveryProfiles. {ANS_SVCB} per §4.4.2. +func TestDefaultDiscoveryProfiles(t *testing.T) { + got := DefaultDiscoveryProfiles() + want := []DiscoveryProfile{DiscoveryProfileANSSVCB} + assert.Equal(t, want, got) } -func TestComputeRequiredDNSRecords_BadgeFallbackWithoutTLURL(t *testing.T) { - ansName, _ := NewAnsName(mustSemVer(1, 0, 0), "agent.example.com") - reg := &AgentRegistration{ - AnsName: ansName, - Endpoints: []AgentEndpoint{ - {Protocol: ProtocolMCP, AgentURL: "https://agent.example.com/mcp"}, - }, - } - - records := ComputeRequiredDNSRecords(reg, "") - for _, r := range records { - if r.Purpose == PurposeBadge { - assert.Contains(t, r.Value, "url=https://agent.example.com/mcp") - return - } +// TestDiscoveryProfile_IsValid covers the typed-enum membership predicate +// applyDiscoveryProfiles and the registry-coherence check both rely on. +func TestDiscoveryProfile_IsValid(t *testing.T) { + tests := []struct { + name string + s DiscoveryProfile + want bool + }{ + {name: "ans_svcb_is_valid", s: DiscoveryProfileANSSVCB, want: true}, + {name: "ans_txt_is_valid", s: DiscoveryProfileANSTXT, want: true}, + {name: "empty_is_invalid", s: DiscoveryProfile(""), want: false}, + {name: "unknown_is_invalid", s: DiscoveryProfile("UNKNOWN_FAMILY"), want: false}, + {name: "lowercase_is_invalid", s: DiscoveryProfile("ans_svcb"), want: false}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, tc.s.IsValid()) + }) } - t.Fatal("no badge record found") -} - -func TestProtocolToANSValue(t *testing.T) { - assert.Equal(t, "a2a", protocolToANSValue(ProtocolA2A)) - assert.Equal(t, "mcp", protocolToANSValue(ProtocolMCP)) - assert.Equal(t, "http-api", protocolToANSValue(ProtocolHTTPAPI)) - assert.Equal(t, "UNKNOWN", protocolToANSValue(Protocol("UNKNOWN"))) } diff --git a/internal/domain/endpoint.go b/internal/domain/endpoint.go index 565c19b..ecb25c7 100644 --- a/internal/domain/endpoint.go +++ b/internal/domain/endpoint.go @@ -4,6 +4,7 @@ import ( "fmt" "net/url" "regexp" + "strconv" "strings" ) @@ -208,5 +209,21 @@ func validateURL(rawURL, fieldName string) error { fmt.Sprintf("%s is not a valid URL: %q", fieldName, rawURL), ) } + // url.Parse accepts any positive integer port string. An + // out-of-range port (0, 99999, or a 64-bit overflow value) would + // flow into the SVCB `port=` SvcParam and the TLSA owner name + // (`_._tcp.`), producing a record set no DNS provider will + // accept — the operator would strand their own agent in + // PENDING_DNS with an unexplainable verify-dns mismatch. Reject it + // loudly here at the registration boundary instead. + if p := parsed.Port(); p != "" { + n, perr := strconv.Atoi(p) + if perr != nil || n < 1 || n > 65535 { + return NewValidationError( + "INVALID_ENDPOINT", + fmt.Sprintf("%s port %q is outside the valid range 1-65535", fieldName, p), + ) + } + } return nil } diff --git a/internal/domain/endpoint_test.go b/internal/domain/endpoint_test.go index 48f7bfa..6fe3aa0 100644 --- a/internal/domain/endpoint_test.go +++ b/internal/domain/endpoint_test.go @@ -68,6 +68,34 @@ func TestAgentEndpoint_Validate(t *testing.T) { assert.ErrorIs(t, e.Validate(), ErrValidation) }) + // Out-of-range ports parse fine through url.Parse but produce SVCB + // port= SvcParams and _._tcp. TLSA owner names no DNS + // provider accepts — the boundary must reject them loudly instead + // of stranding the operator at verify-dns. + t.Run("reject port above 65535", func(t *testing.T) { + e := valid + e.AgentURL = "https://agent.example.com:99999/mcp" + assert.ErrorIs(t, e.Validate(), ErrValidation) + }) + + t.Run("reject port zero", func(t *testing.T) { + e := valid + e.AgentURL = "https://agent.example.com:0/mcp" + assert.ErrorIs(t, e.Validate(), ErrValidation) + }) + + t.Run("reject overflowing port literal", func(t *testing.T) { + e := valid + e.AgentURL = "https://agent.example.com:443443443443/mcp" + assert.ErrorIs(t, e.Validate(), ErrValidation) + }) + + t.Run("accept explicit in-range port", func(t *testing.T) { + e := valid + e.AgentURL = "https://agent.example.com:8443/mcp" + assert.NoError(t, e.Validate()) + }) + t.Run("reject bad documentation url", func(t *testing.T) { e := valid e.DocumentationURL = "not a url" diff --git a/internal/port/discovery.go b/internal/port/discovery.go new file mode 100644 index 0000000..9d756b6 --- /dev/null +++ b/internal/port/discovery.go @@ -0,0 +1,56 @@ +package port + +import ( + "github.com/godaddy/ans/internal/domain" +) + +// ProfileEmitter is one named DNS discovery family the RA can emit for an +// agent registration. Implementations live under internal/adapter/discovery/ +// /. Today the bundled set is the ANS family (ANS_SVCB, ANS_TXT); +// additional families plug in as new vendor packages without touching the +// service or domain layers. +// +// Records is a pure function: no I/O, no context, no error. The service +// layer composes per-profile outputs by walking a ProfileRegistry and +// concatenating each profile's records, deduping by (Name, Type, Value) so +// per-family trust records (e.g. _ans-badge, TLSA) emitted by multiple +// profiles in the same family land once. +type ProfileEmitter interface { + // ID returns the wire-format identifier (e.g. "ANS_SVCB", "ANS_TXT"). + // Persisted on agent rows; surfaced on the V2 register schema; used + // as the registry key. + ID() domain.DiscoveryProfile + + // Records returns the DNS records this profile needs an operator to + // publish for reg. Includes both per-profile discovery records and any + // family-level trust attestation records the profile requires (e.g. + // _ans-badge for the ANS family, TLSA for any HTTPS-endpoint binding). + // The service walker dedupes across profiles, so a family's shared + // records emit once even when multiple sibling profiles request them. + // + // A profile that emits a transparency-log-relative trust record (the + // ANS family's _ans-badge) is configured with the deployment TL URL + // at construction — see ans.NewTXTProfile / ans.NewSVCBProfile — so this + // stays a pure function of reg with no per-call deployment input. + Records(reg *domain.AgentRegistration) []domain.ExpectedDNSRecord +} + +// ProfileRegistry is the lookup surface the service uses to compose +// per-profile outputs. Implementations are immutable post-construction so +// reads are safe under concurrent registration / verify-dns load without +// locking. The bundled implementation lives at +// internal/adapter/discovery/registry/registry.go. +type ProfileRegistry interface { + // Get returns the profile registered under id, or (nil, false) when no + // such profile is wired. A miss is informational, not an error: the + // service skips unknown stored profiles (e.g. post-decommission rows) + // and logs a WARN, rather than failing the registration. + Get(id domain.DiscoveryProfile) (ProfileEmitter, bool) + + // IDs returns every registered profile's ID in registry-wired + // insertion order. Order is stable across calls and process restarts + // for a given wiring — the service walker iterates IDs() and gates + // each by membership in reg.DiscoveryProfiles, so wiring order + // determines emission order on the wire (TL canonical bytes). + IDs() []domain.DiscoveryProfile +} diff --git a/internal/port/dns.go b/internal/port/dns.go index 03d96f2..55f0743 100644 --- a/internal/port/dns.go +++ b/internal/port/dns.go @@ -10,13 +10,26 @@ import ( type RecordVerification struct { Record domain.ExpectedDNSRecord Found bool - Actual string // What was actually returned by DNS (empty if not found). + // Actual carries the live answer DNS returned for this record. When + // records exist for the name/type but none matched the expected + // value, Actual is the first live answer (so the verify-dns 422 can + // show the operator what is actually in their zone). When nothing + // answered at all, Actual is empty. The service layer partitions on + // exactly this: !Found && Actual == "" is MISSING, !Found && Actual + // != "" is a value MISMATCH. When Found is true, Actual MAY differ + // benignly from the expected value (e.g. an SVCB subset match where + // the live record carries coexistence extras) and is informational + // only — Found is the verdict. + Actual string Error string // Lookup error, if any. // DNSSECVerified is true when the response carried an - // authenticated-data (AD) bit from a validating resolver. Only - // meaningful for TLSA records — surfacing this to the TL lets a - // downstream verifier trust the cert-binding assertion without - // re-querying DNS themselves. + // authenticated-data (AD) bit from a validating resolver. Set + // on TLSA, SVCB, and HTTPS responses; surfaced to the TL + // attestation so a downstream verifier can trust the cert / + // capability / service binding without re-querying DNS. The + // service layer enforces a hard-fail rule when AD=true and the + // record's value disagrees with the expected one (the threat + // shape: an attacker rewrote a record in a DNSSEC-signed zone). DNSSECVerified bool } diff --git a/internal/ra/handler/dto.go b/internal/ra/handler/dto.go index d2d3588..58e8bdb 100644 --- a/internal/ra/handler/dto.go +++ b/internal/ra/handler/dto.go @@ -87,10 +87,10 @@ type agentDetails struct { Links []linkDTO `json:"links"` } -func mapAgentDetails(res *service.DetailResult, r *http.Request, tlPublicBaseURL string) agentDetails { +func mapAgentDetails(res *service.DetailResult, r *http.Request, svc *service.RegistrationService) agentDetails { reg := res.Registration // Stamp endpoints onto the aggregate so the pending-block builder's - // call to domain.ComputeRequiredDNSRecords produces the full record + // call to svc.ComputeRequiredDNSRecords produces the full record // set (endpoints live in their own table and are returned as a // sibling slice by the service layer). reg.Endpoints = res.Endpoints @@ -104,7 +104,7 @@ func mapAgentDetails(res *service.DetailResult, r *http.Request, tlPublicBaseURL AgentStatus: string(reg.Status), Endpoints: mapEndpointsToDTO(res.Endpoints), RegistrationTimestamp: reg.Details.RegistrationTimestamp.Format("2006-01-02T15:04:05Z07:00"), - RegistrationPending: buildRegistrationPendingBlock(reg, r, tlPublicBaseURL), + RegistrationPending: buildRegistrationPendingBlock(reg, r, svc), Links: []linkDTO{ {Rel: "self", Href: agentURL(r, reg.AgentID)}, }, @@ -119,7 +119,7 @@ func mapAgentDetails(res *service.DetailResult, r *http.Request, tlPublicBaseURL // buildV1RegistrationPending. Agents still driving validation/DNS // expose the outstanding challenges + DNS records needed to // progress; terminal states omit the block. -func buildRegistrationPendingBlock(reg *domain.AgentRegistration, r *http.Request, tlPublicBaseURL string) *registrationPendingResponse { +func buildRegistrationPendingBlock(reg *domain.AgentRegistration, r *http.Request, svc *service.RegistrationService) *registrationPendingResponse { switch reg.Status { case domain.StatusPendingValidation: base := schemeOf(r) + "://" + r.Host + "/v2/ans/agents/" + reg.AgentID @@ -150,7 +150,7 @@ func buildRegistrationPendingBlock(reg *domain.AgentRegistration, r *http.Reques } case domain.StatusPendingDNS: base := schemeOf(r) + "://" + r.Host + "/v2/ans/agents/" + reg.AgentID - expected := domain.ComputeRequiredDNSRecords(reg, tlPublicBaseURL) + expected := svc.ComputeRequiredDNSRecords(reg) dnsRecords := make([]dnsRecordDTO, 0, len(expected)) for _, rec := range expected { dnsRecords = append(dnsRecords, dnsRecordDTO{ @@ -453,7 +453,7 @@ type incorrectRecordDTO struct { func dnsMissingFrom(mismatches []service.DNSMismatch) []dnsRecordDTO { var out []dnsRecordDTO for _, m := range mismatches { - if m.Code != "MISSING" { + if !m.IsMissing() { continue } out = append(out, dnsRecordDTO{ @@ -471,12 +471,12 @@ func dnsMissingFrom(mismatches []service.DNSMismatch) []dnsRecordDTO { func dnsIncorrectFrom(mismatches []service.DNSMismatch) []incorrectRecordDTO { var out []incorrectRecordDTO for _, m := range mismatches { - // MISMATCH = required record with wrong value. - // TLSA_DNSSEC_MISMATCH = TLSA response came back - // DNSSEC-authenticated but didn't match the expected cert - // fingerprint (signed-zone tampering). Both surface as - // incorrect records — same DTO shape. - if m.Code != "MISMATCH" && m.Code != "TLSA_DNSSEC_MISMATCH" { + // Incorrect = present but wrong: a plain value MISMATCH, or + // DNSSEC-authenticated tampering on a TLSA/SVCB/HTTPS record + // (_DNSSEC_MISMATCH, signed-zone tampering). All + // surface here as incorrect records — same DTO shape; missing + // records go to dnsMissingFrom. + if !m.IsIncorrect() { continue } out = append(out, incorrectRecordDTO{ diff --git a/internal/ra/handler/dto_helpers_test.go b/internal/ra/handler/dto_helpers_test.go index 355c765..55ace6e 100644 --- a/internal/ra/handler/dto_helpers_test.go +++ b/internal/ra/handler/dto_helpers_test.go @@ -40,30 +40,60 @@ func TestDNSMissingFrom_FiltersMissingOnly(t *testing.T) { } } -func TestDNSIncorrectFrom_FiltersMismatchOnly(t *testing.T) { - in := []service.DNSMismatch{ - { - Expected: domain.ExpectedDNSRecord{Name: "x", Type: domain.DNSRecordTXT, Value: "want"}, - Code: "MISSING", // filtered out - }, - { - Expected: domain.ExpectedDNSRecord{Name: "y", Type: domain.DNSRecordTXT, Value: "want"}, - Found: "actually-got", - Code: "MISMATCH", - }, - } - got := dnsIncorrectFrom(in) - if len(got) != 1 { - t.Fatalf("want 1 mismatch, got %d", len(got)) - } - if got[0].Record.Name != "y" { - t.Errorf("record.name: %q", got[0].Record.Name) - } - if got[0].Found != "actually-got" { - t.Errorf("found: %q", got[0].Found) +// The incorrect-record mappers must surface present-but-wrong records: a +// plain value MISMATCH and DNSSEC-authenticated tampering on EVERY +// DNSSEC-bearing record type (TLSA, SVCB, HTTPS) — not just TLSA, which +// was the original gap. MISSING records are excluded (they belong in the +// missing-records array). V2 and V1 classify identically, so one table +// drives both lanes. +func TestDNSIncorrectMappers_SurfaceMismatchAndAllDNSSEC(t *testing.T) { + cases := []struct { + name string + code string + wantSurface bool + }{ + {"plain_value_mismatch", "MISMATCH", true}, + {"tlsa_dnssec_tampering", "TLSA_DNSSEC_MISMATCH", true}, + {"svcb_dnssec_tampering", "SVCB_DNSSEC_MISMATCH", true}, + {"https_dnssec_tampering", "HTTPS_DNSSEC_MISMATCH", true}, + {"missing_is_excluded", "MISSING", false}, } - if got[0].Expected != "want" { - t.Errorf("expected: %q", got[0].Expected) + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + in := []service.DNSMismatch{{ + Expected: domain.ExpectedDNSRecord{ + Name: "rec.example.com", Type: domain.DNSRecordSVCB, Value: "want", + }, + Found: "wrong", + Code: tc.code, + }} + + v2 := dnsIncorrectFrom(in) + v1 := v1DNSIncorrectFrom(in) + + if !tc.wantSurface { + if len(v2) != 0 { + t.Errorf("V2: code %q must be excluded, got %d", tc.code, len(v2)) + } + if len(v1) != 0 { + t.Errorf("V1: code %q must be excluded, got %d", tc.code, len(v1)) + } + return + } + + if len(v2) != 1 { + t.Fatalf("V2: code %q must surface, got %d", tc.code, len(v2)) + } + if v2[0].Record.Name != "rec.example.com" || v2[0].Found != "wrong" || v2[0].Expected != "want" { + t.Errorf("V2 mapping wrong: %+v", v2[0]) + } + if len(v1) != 1 { + t.Fatalf("V1: code %q must surface, got %d", tc.code, len(v1)) + } + if v1[0].Name != "rec.example.com" || v1[0].Found != "wrong" || v1[0].Expected != "want" { + t.Errorf("V1 mapping wrong: %+v", v1[0]) + } + }) } } @@ -86,24 +116,6 @@ func TestV1DNSMissingFrom_FiltersMissingOnly(t *testing.T) { } } -func TestV1DNSIncorrectFrom_FiltersMismatchOnly(t *testing.T) { - in := []service.DNSMismatch{ - {Expected: domain.ExpectedDNSRecord{Name: "a"}, Code: "MISSING"}, - {Expected: domain.ExpectedDNSRecord{Name: "b", Type: domain.DNSRecordTXT, Value: "want"}, - Code: "MISMATCH", Found: "wrong"}, - } - got := v1DNSIncorrectFrom(in) - if len(got) != 1 { - t.Fatalf("got %d, want 1", len(got)) - } - if got[0].Name != "b" { - t.Errorf("name: %q", got[0].Name) - } - if got[0].Found != "wrong" || got[0].Expected != "want" { - t.Errorf("found/expected: %q/%q", got[0].Found, got[0].Expected) - } -} - // ----- V1 renewal mappers ----- func TestMapV1RenewalStatus_ActiveAndFailed(t *testing.T) { diff --git a/internal/ra/handler/lifecycle.go b/internal/ra/handler/lifecycle.go index 26db754..e42184d 100644 --- a/internal/ra/handler/lifecycle.go +++ b/internal/ra/handler/lifecycle.go @@ -91,7 +91,7 @@ func (h *LifecycleHandler) Detail(w http.ResponseWriter, r *http.Request) { WriteError(w, err) return } - WriteJSON(w, http.StatusOK, mapAgentDetails(res, r, h.svc.TLPublicBaseURL())) + WriteJSON(w, http.StatusOK, mapAgentDetails(res, r, h.svc)) } // ----- GET /v2/ans/agents/{agentId}/certificates/identity ----- diff --git a/internal/ra/handler/lifecycle_test.go b/internal/ra/handler/lifecycle_test.go index 73908e3..0fe43d5 100644 --- a/internal/ra/handler/lifecycle_test.go +++ b/internal/ra/handler/lifecycle_test.go @@ -852,8 +852,12 @@ func newHandlerFixture(t *testing.T) *handlerFixture { t.Fatal(err) } + discoveryReg, err := service.NewDefaultProfileRegistry("") + if err != nil { + t.Fatal(err) + } svc := service.NewRegistrationService( - agents, endpoints, certsStore, byoc, renewals, validator, identityCA, bus, outbox, db, + agents, endpoints, certsStore, byoc, renewals, validator, identityCA, bus, outbox, db, discoveryReg, ).WithSigner(service.EventSigner{ KeyManager: km, KeyID: "ra-signer", diff --git a/internal/ra/handler/registration.go b/internal/ra/handler/registration.go index 51656bc..efbcc4e 100644 --- a/internal/ra/handler/registration.go +++ b/internal/ra/handler/registration.go @@ -39,6 +39,14 @@ type registrationRequest struct { ServerCsrPEM string `json:"serverCsrPEM,omitempty"` ServerCertificatePEM string `json:"serverCertificatePEM,omitempty"` ServerCertificateChainPEM string `json:"serverCertificateChainPEM,omitempty"` + + // DiscoveryProfiles is the set of DNS record families the RA emits + // for this registration. Each element is one of "ANS_SVCB" or + // "ANS_TXT". Typical values: ["ANS_SVCB"] (default, recommended), + // ["ANS_TXT"], or ["ANS_SVCB", "ANS_TXT"] (transition union). + // Empty/missing → ["ANS_SVCB"]. Any invalid element rejected + // with 422 INVALID_DISCOVERY_PROFILE. See ANS_SPEC.md §4.4.2. + DiscoveryProfiles []string `json:"discoveryProfiles,omitempty"` } type endpointDTO struct { @@ -153,6 +161,7 @@ func (h *RegistrationHandler) Register(w http.ResponseWriter, r *http.Request) { ServerCsrPEM: req.ServerCsrPEM, ServerCertificatePEM: req.ServerCertificatePEM, ServerCertificateChainPEM: req.ServerCertificateChainPEM, + DiscoveryProfiles: toDomainDiscoveryProfiles(req.DiscoveryProfiles), }) if err != nil { WriteError(w, err) @@ -162,6 +171,24 @@ func (h *RegistrationHandler) Register(w http.ResponseWriter, r *http.Request) { WriteJSON(w, http.StatusAccepted, mapRegistrationResponse(resp, r)) } +// toDomainDiscoveryProfiles converts the wire []string into the typed +// domain slice. nil (field omitted in the JSON request) flows through +// as nil and a non-nil empty slice (explicit `"discoveryProfiles": []`) +// flows through as an empty non-nil []DiscoveryProfile; the service +// layer normalizes both to DefaultDiscoveryProfiles(), so the +// distinction no longer changes the outcome. Per-element validity and +// duplicate deduplication live in applyDiscoveryProfiles. +func toDomainDiscoveryProfiles(raw []string) []domain.DiscoveryProfile { + if raw == nil { + return nil + } + out := make([]domain.DiscoveryProfile, len(raw)) + for i, s := range raw { + out[i] = domain.DiscoveryProfile(s) + } + return out +} + // mapEndpointsFromDTO converts the incoming JSON endpoints to the // domain types, returning a validation error on malformed input. func mapEndpointsFromDTO(dtos []endpointDTO) ([]domain.AgentEndpoint, error) { diff --git a/internal/ra/handler/v1lifecycle.go b/internal/ra/handler/v1lifecycle.go index b026d59..2e1a73c 100644 --- a/internal/ra/handler/v1lifecycle.go +++ b/internal/ra/handler/v1lifecycle.go @@ -190,7 +190,7 @@ func (h *V1LifecycleHandler) Revoke(w http.ResponseWriter, r *http.Request) { func v1DNSMissingFrom(mismatches []service.DNSMismatch) []v1DNSRecordDTO { out := make([]v1DNSRecordDTO, 0) for _, m := range mismatches { - if m.Code != "MISSING" { + if !m.IsMissing() { continue } out = append(out, v1DNSRecordDTO{ @@ -208,10 +208,11 @@ func v1DNSMissingFrom(mismatches []service.DNSMismatch) []v1DNSRecordDTO { func v1DNSIncorrectFrom(mismatches []service.DNSMismatch) []v1DNSMismatchDTO { out := make([]v1DNSMismatchDTO, 0) for _, m := range mismatches { - // MISMATCH and TLSA_DNSSEC_MISMATCH both surface here as - // incorrect-record entries. See dto.go's dnsIncorrectFrom - // for the DNSSEC-mismatch rationale. - if m.Code != "MISMATCH" && m.Code != "TLSA_DNSSEC_MISMATCH" { + // Present-but-wrong records — plain MISMATCH or DNSSEC tampering + // on TLSA/SVCB/HTTPS (_DNSSEC_MISMATCH) — surface + // here as incorrect-record entries. See + // service.DNSMismatch.IsIncorrect for the classification. + if !m.IsIncorrect() { continue } out = append(out, v1DNSMismatchDTO{ diff --git a/internal/ra/handler/v1lifecycle_direct_test.go b/internal/ra/handler/v1lifecycle_direct_test.go index 9c8e66d..8d45225 100644 --- a/internal/ra/handler/v1lifecycle_direct_test.go +++ b/internal/ra/handler/v1lifecycle_direct_test.go @@ -417,24 +417,42 @@ func TestV1SubmitServerCSR_BadJSON(t *testing.T) { } } -// fakeDNSVerifier always returns a single MISSING + a single -// MISMATCH record. Used to drive the DNS-mismatch arm of -// VerifyDNS in both V1 and V2 lanes. +// Fixed record identities the fakeDNSVerifier reports, so the 422-bucket +// assertions below can match by name without depending on what +// ComputeRequiredDNSRecords produced for the test agent. +const ( + fakeMissingName = "_missing.example.com" + fakeMismatchName = "_mismatch.example.com" + fakeMismatchLive = "wrong-value" +) + +// fakeDNSVerifier deterministically reports exactly one MISSING and one +// MISMATCH record regardless of the expected record set it is handed, +// exercising both 422 buckets under the Fix C classification: +// - MISSING: Found=false, Actual="" (nothing answered) +// - MISMATCH: Found=false, Actual="wrong-value" (present but wrong; +// the live value rides through to incorrectRecords[].found) +// +// Ignoring the input records keeps the two buckets stable across agents +// whose computed record counts differ between the V1 and V2 lanes. type fakeDNSVerifier struct{} -func (fakeDNSVerifier) VerifyRecords(_ context.Context, _ string, recs []domain.ExpectedDNSRecord) (*port.VerificationResult, error) { - out := &port.VerificationResult{Results: make([]port.RecordVerification, 0, len(recs))} - for i, rec := range recs { - v := port.RecordVerification{Record: rec} - if i == 0 { - // first record reported as found mismatch - v.Found = false - v.Actual = "wrong-value" - } - out.Results = append(out.Results, v) - } - out.AllRequired = false - return out, nil +func (fakeDNSVerifier) VerifyRecords(_ context.Context, _ string, _ []domain.ExpectedDNSRecord) (*port.VerificationResult, error) { + return &port.VerificationResult{ + AllRequired: false, + Results: []port.RecordVerification{ + { + Record: domain.ExpectedDNSRecord{Name: fakeMissingName, Type: domain.DNSRecordTXT, Value: "expected-missing", Required: true}, + Found: false, + Actual: "", + }, + { + Record: domain.ExpectedDNSRecord{Name: fakeMismatchName, Type: domain.DNSRecordTXT, Value: "expected-mismatch", Required: true}, + Found: false, + Actual: fakeMismatchLive, + }, + }, + }, nil } // TestVerifyDNS_MismatchReturns422 covers the V2 VerifyDNS @@ -459,6 +477,46 @@ func TestVerifyDNS_MismatchReturns422(t *testing.T) { if rec.Code != http.StatusUnprocessableEntity { t.Fatalf("status: got %d want 422; body=%s", rec.Code, rec.Body) } + + // Fix C bucket membership: the MISSING record lands in missingRecords + // (by name only — no live value), the MISMATCH record lands in + // incorrectRecords carrying its live found value. V2 nests the + // incorrect record under .record. + var body struct { + MissingRecords []struct { + Name string `json:"name"` + } `json:"missingRecords"` + IncorrectRecords []struct { + Record struct { + Name string `json:"name"` + } `json:"record"` + Found string `json:"found"` + } `json:"incorrectRecords"` + } + if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil { + t.Fatalf("unmarshal 422 body: %v; body=%s", err, rec.Body) + } + missingSeen := false + for _, m := range body.MissingRecords { + if m.Name == fakeMissingName { + missingSeen = true + } + } + if !missingSeen { + t.Errorf("missingRecords missing %q; body=%s", fakeMissingName, rec.Body) + } + mismatchSeen := false + for _, ir := range body.IncorrectRecords { + if ir.Record.Name == fakeMismatchName { + mismatchSeen = true + if ir.Found != fakeMismatchLive { + t.Errorf("incorrectRecords[%q].found: got %q want %q", fakeMismatchName, ir.Found, fakeMismatchLive) + } + } + } + if !mismatchSeen { + t.Errorf("incorrectRecords missing %q; body=%s", fakeMismatchName, rec.Body) + } } // TestV1Revoke_ServiceErrorBranch covers the service-error arm @@ -710,6 +768,42 @@ func TestV1VerifyDNS_MismatchReturns422(t *testing.T) { if rec.Code != http.StatusUnprocessableEntity { t.Fatalf("status: got %d want 422; body=%s", rec.Code, rec.Body) } + + // Fix C bucket membership, V1 shape: missingRecords by name, and the + // flat incorrectRecords entry carrying name + found live value. + var body struct { + MissingRecords []struct { + Name string `json:"name"` + } `json:"missingRecords"` + IncorrectRecords []struct { + Name string `json:"name"` + Found string `json:"found"` + } `json:"incorrectRecords"` + } + if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil { + t.Fatalf("unmarshal 422 body: %v; body=%s", err, rec.Body) + } + missingSeen := false + for _, m := range body.MissingRecords { + if m.Name == fakeMissingName { + missingSeen = true + } + } + if !missingSeen { + t.Errorf("missingRecords missing %q; body=%s", fakeMissingName, rec.Body) + } + mismatchSeen := false + for _, ir := range body.IncorrectRecords { + if ir.Name == fakeMismatchName { + mismatchSeen = true + if ir.Found != fakeMismatchLive { + t.Errorf("incorrectRecords[%q].found: got %q want %q", fakeMismatchName, ir.Found, fakeMismatchLive) + } + } + } + if !mismatchSeen { + t.Errorf("incorrectRecords missing %q; body=%s", fakeMismatchName, rec.Body) + } } // TestVerifyRenewalACME_BYOCSync covers the BYOC + verify-acme diff --git a/internal/ra/handler/v1registration.go b/internal/ra/handler/v1registration.go index ec4baa6..cc5e4ab 100644 --- a/internal/ra/handler/v1registration.go +++ b/internal/ra/handler/v1registration.go @@ -246,7 +246,7 @@ func (h *V1RegistrationHandler) Detail(w http.ResponseWriter, r *http.Request) { WriteError(w, err) return } - WriteJSON(w, http.StatusOK, mapV1AgentDetail(res.Registration, res.Endpoints, r, h.svc.TLPublicBaseURL())) + WriteJSON(w, http.StatusOK, mapV1AgentDetail(res.Registration, res.Endpoints, r, h.svc)) } // ----- DTO mapping helpers ----- @@ -376,7 +376,7 @@ func rfc3339Zero(t time.Time) string { // Endpoints arrive as a separate slice because the domain aggregate // stores them in their own repository; the service layer gathers // both and hands them in. -func mapV1AgentDetail(reg *domain.AgentRegistration, endpoints []domain.AgentEndpoint, r *http.Request, tlPublicBaseURL string) *v1AgentDetailResponse { +func mapV1AgentDetail(reg *domain.AgentRegistration, endpoints []domain.AgentEndpoint, r *http.Request, svc *service.RegistrationService) *v1AgentDetailResponse { eps := make([]v1EndpointDTO, len(endpoints)) for i, e := range endpoints { fns := make([]v1FunctionDTO, len(e.Functions)) @@ -404,7 +404,7 @@ func mapV1AgentDetail(reg *domain.AgentRegistration, endpoints []domain.AgentEnd lastRenewal = reg.Details.LastRenewalTimestamp.UTC().Format("2006-01-02T15:04:05Z") } // Stamp endpoints onto the aggregate so the buildV1RegistrationPending - // helper's call to domain.ComputeRequiredDNSRecords produces the + // helper's call to svc.ComputeRequiredDNSRecords produces the // full TRUST / BADGE / DISCOVERY / TLSA record set. The service // layer returns endpoints as a sibling slice (they live in their // own table); the pending block builder needs them on the @@ -421,7 +421,7 @@ func mapV1AgentDetail(reg *domain.AgentRegistration, endpoints []domain.AgentEnd Endpoints: eps, RegistrationTimestamp: reg.Details.RegistrationTimestamp.UTC().Format("2006-01-02T15:04:05Z"), LastRenewalTimestamp: lastRenewal, - RegistrationPending: buildV1RegistrationPending(reg, r, tlPublicBaseURL), + RegistrationPending: buildV1RegistrationPending(reg, r, svc), Links: []v1LinkDTO{ {Rel: "self", Href: base}, }, @@ -444,7 +444,7 @@ func mapV1AgentDetail(reg *domain.AgentRegistration, endpoints []domain.AgentEnd // publish (DISCOVERY/TRUST/BADGE/ // CERTIFICATE_BINDING), VERIFY_DNS nextStep, // expiresAt scaled from the challenge deadline. -func buildV1RegistrationPending(reg *domain.AgentRegistration, r *http.Request, tlPublicBaseURL string) *v1RegistrationPendingResponse { +func buildV1RegistrationPending(reg *domain.AgentRegistration, r *http.Request, svc *service.RegistrationService) *v1RegistrationPendingResponse { switch reg.Status { case domain.StatusPendingValidation: base := schemeOf(r) + "://" + r.Host + "/v1/agents/" + reg.AgentID @@ -474,7 +474,7 @@ func buildV1RegistrationPending(reg *domain.AgentRegistration, r *http.Request, } case domain.StatusPendingDNS: base := schemeOf(r) + "://" + r.Host + "/v1/agents/" + reg.AgentID - expected := domain.ComputeRequiredDNSRecords(reg, tlPublicBaseURL) + expected := svc.ComputeRequiredDNSRecords(reg) dnsRecords := make([]v1DNSRecordDTO, 0, len(expected)) for _, rec := range expected { dnsRecords = append(dnsRecords, v1DNSRecordDTO{ diff --git a/internal/ra/service/discovery_default.go b/internal/ra/service/discovery_default.go new file mode 100644 index 0000000..ed956eb --- /dev/null +++ b/internal/ra/service/discovery_default.go @@ -0,0 +1,40 @@ +package service + +import ( + "github.com/godaddy/ans/internal/adapter/discovery/ans" + "github.com/godaddy/ans/internal/adapter/discovery/registry" + "github.com/godaddy/ans/internal/port" +) + +// NewDefaultProfileRegistry returns a registry pre-wired with the +// bundled ANS-family profiles (TXTProfile, SVCBProfile) in the canonical +// emission order — TXT first, SVCB second — that the V2 TL canonical +// bytes for the union case were established at. cmd/ans-ra/main.go +// uses it for production wiring; tests across the RA layer use it +// for fixture construction so all paths exercise the same emission +// shape. +// +// Iteration order is the load-bearing part: the service walker +// emits records in registry insertion order, and TL leaves carry +// `dnsRecordsProvisioned[]` byte-for-byte from that ordering. Any +// future production deployment that swaps in a different profile set +// MUST construct the registry with TXTProfile and SVCBProfile in this +// same relative order to preserve canonical-bytes parity for +// existing agents. +// +// Errors only when registry.New rejects the wiring (duplicate IDs, +// invalid IDs) — the bundled set passes both checks deterministically, +// but the error return preserves callers' ability to fail loudly on +// startup misconfig per the no-panic-in-request-paths rule. +// +// tlPublicBaseURL is the externally-reachable Transparency Log URL the +// ANS profiles stamp into the family `_ans-badge` record's url= (see +// ans.NewTXTProfile / ans.NewSVCBProfile). Empty — tests, or a deployment +// without a public TL URL — falls the badge back to the agent's own +// endpoint URL. +func NewDefaultProfileRegistry(tlPublicBaseURL string) (port.ProfileRegistry, error) { + return registry.New( + ans.NewTXTProfile(tlPublicBaseURL), + ans.NewSVCBProfile(tlPublicBaseURL), + ) +} diff --git a/internal/ra/service/dnsmismatch_test.go b/internal/ra/service/dnsmismatch_test.go new file mode 100644 index 0000000..51dc1b9 --- /dev/null +++ b/internal/ra/service/dnsmismatch_test.go @@ -0,0 +1,42 @@ +package service_test + +import ( + "testing" + + "github.com/godaddy/ans/internal/ra/service" +) + +// TestDNSMismatch_Classification pins the wire-code → predicate mapping +// the RA's 422 mappers depend on. verifyDNSRecords emits MISSING, +// MISMATCH, or a per-record-type "_DNSSEC_MISMATCH" code; +// IsMissing and IsIncorrect partition those so missingRecords and +// incorrectRecords stay complete. Codes are pinned as literals (not the +// unexported constants) so this also guards the wire contract — a TLSA-, +// SVCB-, or HTTPS-DNSSEC tampering code MUST classify as incorrect. +func TestDNSMismatch_Classification(t *testing.T) { + cases := []struct { + name string + code string + wantMissing bool + wantIncorrect bool + }{ + {"missing", "MISSING", true, false}, + {"plain_mismatch", "MISMATCH", false, true}, + {"tlsa_dnssec", "TLSA_DNSSEC_MISMATCH", false, true}, + {"svcb_dnssec", "SVCB_DNSSEC_MISMATCH", false, true}, + {"https_dnssec", "HTTPS_DNSSEC_MISMATCH", false, true}, + {"unknown_code_is_neither", "SOMETHING_ELSE", false, false}, + {"empty_code_is_neither", "", false, false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + m := service.DNSMismatch{Code: tc.code} + if got := m.IsMissing(); got != tc.wantMissing { + t.Errorf("IsMissing(%q) = %v, want %v", tc.code, got, tc.wantMissing) + } + if got := m.IsIncorrect(); got != tc.wantIncorrect { + t.Errorf("IsIncorrect(%q) = %v, want %v", tc.code, got, tc.wantIncorrect) + } + }) + } +} diff --git a/internal/ra/service/dnsrecords.go b/internal/ra/service/dnsrecords.go new file mode 100644 index 0000000..75f75de --- /dev/null +++ b/internal/ra/service/dnsrecords.go @@ -0,0 +1,167 @@ +package service + +import ( + "github.com/rs/zerolog/log" + + "github.com/godaddy/ans/internal/domain" +) + +// ComputeRequiredDNSRecords returns the DNS records the operator must +// publish for reg, composed by walking the discovery registry. The RA +// does not create these records — the operator manages their own DNS; +// the RA only verifies they exist and emits the same set onto the TL +// as `dnsRecordsProvisioned[]`. +// +// Composition rules: +// +// 1. The set of profiles to emit is reg.DiscoveryProfiles, filtered to +// those the registry actually has wired. Empty after filtering +// (operator omitted discoveryProfiles, or every entry was unknown +// to the registry) normalizes to domain.DefaultDiscoveryProfiles(). +// 2. Iteration order is the registry's insertion order (cmd/main +// wires [TXTProfile, SVCBProfile], so emission proceeds TXT-first +// then SVCB). User-supplied order on reg.DiscoveryProfiles has no +// effect — `discoveryProfiles` is set semantics on the wire. +// 3. Each profile's full record list (discovery + family trust records) +// is collected and deduped by (Name, Type, Value). Family trust +// records that overlap across sibling profiles in the same family +// (e.g. `_ans-badge` from both ANS_SVCB and ANS_TXT) emit once. +// 4. Records are reordered into discovery-then-trust groupings, +// preserving within-group iteration order. This pins the V2 TL +// `dnsRecordsProvisioned[]` canonical bytes for the union case +// to the historical `[discovery..., badge, TLSA]` shape. +// 5. SVCB rows arrive from the adapter with Required=true. When TXT +// is also resolved, every SVCB row is post-processed to +// Required=false — during the §4.4.2 transition the legacy +// `_ans` TXT family carries the operator's required signal and +// SVCB rides along as optional. +// +// Returns an empty (non-nil) slice when reg has no endpoints AND no +// server cert — nothing meaningful for the operator to publish. The +// nil-vs-empty distinction never reaches the wire: the V2 event +// builder re-wraps into its own slice. +// +// s.discoveryRegistry is guaranteed non-nil by NewRegistrationService +// (constructor panics on nil), so the walker dereferences it +// unconditionally. +func (s *RegistrationService) ComputeRequiredDNSRecords(reg *domain.AgentRegistration) []domain.ExpectedDNSRecord { + requested := s.resolveRequestedProfiles(reg) + + logger := log.Debug(). + Str("agentId", reg.AgentID). + Strs("requestedProfiles", profileStrings(reg.DiscoveryProfiles)). + Strs("resolvedProfiles", profileStrings(setToSlice(requested))) + logger.Msg("computing required DNS records") + + collected, seen := []domain.ExpectedDNSRecord{}, make(map[string]bool) + for _, id := range s.discoveryRegistry.IDs() { + if !requested[id] { + continue + } + profile, ok := s.discoveryRegistry.Get(id) + if !ok { + continue + } + emitted := profile.Records(reg) + log.Debug(). + Str("agentId", reg.AgentID). + Str("profile", string(id)). + Int("emittedCount", len(emitted)). + Msg("profile emitted records") + for _, r := range emitted { + // Dedup key deliberately omits Required: sibling profiles + // emitting the same family trust record (badge, TLSA) must + // agree on the flag (both adapters do — badge true, TLSA + // false), so first-seen wins. A profile that disagreed + // would have its flag silently dropped here — keep the + // flags aligned across adapters. + key := r.Name + "|" + string(r.Type) + "|" + r.Value + if seen[key] { + continue + } + seen[key] = true + collected = append(collected, r) + } + } + + // Group: discovery records first (in walker order), then trust + // records (badge, TLSA) — preserves the V2 union-case canonical + // bytes shape `[discovery..., badge, TLSA]`. + result := make([]domain.ExpectedDNSRecord, 0, len(collected)) + var trust []domain.ExpectedDNSRecord + for _, r := range collected { + if r.Purpose == domain.PurposeDiscovery { + result = append(result, r) + } else { + trust = append(trust, r) + } + } + result = append(result, trust...) + + // SVCB Required-flag post-process: §4.4.2 says TXT carries the + // required signal during the transition; SVCB stays optional + // alongside. + if requested[domain.DiscoveryProfileANSTXT] { + for i := range result { + if result[i].Type == domain.DNSRecordSVCB { + result[i].Required = false + } + } + } + + if len(result) == 0 && len(reg.Endpoints) > 0 { + log.Warn(). + Str("agentId", reg.AgentID). + Strs("resolvedProfiles", profileStrings(setToSlice(requested))). + Msg("DNS record computation produced no records despite having endpoints; check discovery registry wiring") + } + + return result +} + +// resolveRequestedProfiles filters reg.DiscoveryProfiles to those the +// registry has wired, normalizing empty/all-invalid to the default +// set. Unknown profiles trigger a WARN log so an operator can spot a +// post-decommission row in their data without parsing verify-dns +// failures. +func (s *RegistrationService) resolveRequestedProfiles(reg *domain.AgentRegistration) map[domain.DiscoveryProfile]bool { + requested := make(map[domain.DiscoveryProfile]bool) + for _, id := range reg.DiscoveryProfiles { + if _, ok := s.discoveryRegistry.Get(id); ok { + requested[id] = true + continue + } + log.Warn(). + Str("agentId", reg.AgentID). + Str("profile", string(id)). + Msg("registration carries discovery profile unknown to the running registry; skipping") + } + if len(requested) == 0 { + for _, id := range domain.DefaultDiscoveryProfiles() { + requested[id] = true + } + } + return requested +} + +func profileStrings(profiles []domain.DiscoveryProfile) []string { + out := make([]string, len(profiles)) + for i, s := range profiles { + out[i] = string(s) + } + return out +} + +// setToSlice converts the requested-set map to a deterministic slice +// for logging. Order tracks domain.ValidDiscoveryProfiles() so logs are +// stable across runs. +func setToSlice(set map[domain.DiscoveryProfile]bool) []domain.DiscoveryProfile { + var out []domain.DiscoveryProfile + for _, valid := range domain.ValidDiscoveryProfiles() { + id := domain.DiscoveryProfile(valid) + if set[id] { + out = append(out, id) + } + } + return out +} diff --git a/internal/ra/service/dnsrecords_test.go b/internal/ra/service/dnsrecords_test.go new file mode 100644 index 0000000..339c2be --- /dev/null +++ b/internal/ra/service/dnsrecords_test.go @@ -0,0 +1,539 @@ +package service_test + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/godaddy/ans/internal/adapter/discovery/registry" + anscrypto "github.com/godaddy/ans/internal/crypto" + "github.com/godaddy/ans/internal/domain" + "github.com/godaddy/ans/internal/port" + "github.com/godaddy/ans/internal/ra/service" +) + +// newTestRegistry returns the bundled ANS-family registry every +// service-level test uses. Mirrors cmd/ans-ra/main.go's wiring so +// emission order in tests matches production. +func newTestRegistry(t *testing.T) port.ProfileRegistry { + t.Helper() + r, err := service.NewDefaultProfileRegistry("") + require.NoError(t, err) + return r +} + +// newComputeOnlyService returns a RegistrationService wired only with +// the discovery registry — sufficient for ComputeRequiredDNSRecords +// tests, which never touch storage / signing / DNS verification. +// Other dependencies are passed nil; the walker is a pure function of +// reg + registry. +func newComputeOnlyService(t *testing.T) *service.RegistrationService { + t.Helper() + return service.NewRegistrationService( + nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, + newTestRegistry(t), + ) +} + +func mustReg(t *testing.T, host string, version string, eps []domain.AgentEndpoint, cert *domain.ByocServerCertificate, profiles []domain.DiscoveryProfile) *domain.AgentRegistration { + t.Helper() + v, err := domain.ParseSemVer(version) + require.NoError(t, err) + ansName, err := domain.NewAnsName(v, host) + require.NoError(t, err) + return &domain.AgentRegistration{ + AnsName: ansName, + Endpoints: eps, + ServerCert: cert, + DiscoveryProfiles: profiles, + } +} + +// TestComputeRequiredDNSRecords_BadgeURLFromRegistryConstruction pins the +// end-to-end wiring: NewDefaultProfileRegistry stamps the deployment TL +// URL into the ANS styles, so the family `_ans-badge` record points at the +// TL's per-agent endpoint rather than the agent's own host. The per-adapter +// ansbadge_test covers BadgeRecord directly; this guards the +// registry→style→walker path end to end — without the URL reaching the +// styles, the badge silently regresses to the agent's endpoint URL. +func TestComputeRequiredDNSRecords_BadgeURLFromRegistryConstruction(t *testing.T) { + discoveryReg, err := service.NewDefaultProfileRegistry("https://tl.example.org") + require.NoError(t, err) + svc := service.NewRegistrationService( + nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, + discoveryReg, + ) + + reg := mustReg(t, "agent.example.com", "1.0.0", + []domain.AgentEndpoint{{Protocol: domain.ProtocolMCP, AgentURL: "https://agent.example.com/mcp"}}, + nil, []domain.DiscoveryProfile{domain.DiscoveryProfileANSSVCB}) + reg.AgentID = "test-agent-id" + + records := svc.ComputeRequiredDNSRecords(reg) + + var badge *domain.ExpectedDNSRecord + for i := range records { + if records[i].Purpose == domain.PurposeBadge { + badge = &records[i] + break + } + } + require.NotNil(t, badge, "expected a _ans-badge record") + assert.Contains(t, badge.Value, "url=https://tl.example.org/v1/agents/test-agent-id") + assert.NotContains(t, badge.Value, "agent.example.com/mcp") +} + +// TestComputeRequiredDNSRecords_StyleMatrix_Integration is the +// migrated cross-style integration matrix from +// internal/domain/dnsrecords_test.go:105-284. Per-adapter tests cover +// within-style rules; this table is the regression suite for the +// styles cross-product (e.g. "SVCB-sole emits no HTTPS RR" — only +// testable across both adapters' output). +func TestComputeRequiredDNSRecords_StyleMatrix_Integration(t *testing.T) { + const sampleMetadataHash = "SHA256:098d650cc6d280dee4c0f47489a75cf17b9bfbbae53051806d4e084108b2ff27" + const wantSampleCapBase64 = "CY1lDMbSgN7kwPR0iadc8Xub-7rlMFGAbU4IQQiy_yc" + + tests := []struct { + name string + styles []domain.DiscoveryProfile + protocol domain.Protocol + agentURL string + metadataHash string // optional per-endpoint MetadataHash + wantHTTPS bool + wantSVCB bool + wantSVCBRequired bool // applies only when wantSVCB is true + wantLegacyTXT bool + wantSVCBPort string // substring expected in SVCB value (e.g. "port=443") + wantSVCBWk string // "" means SVCB MUST NOT contain "key65280=" (well-known suffix) + wantSVCBCap string // "" means SVCB MUST NOT contain "key65281=" (capability digest) + }{ + { + name: "ans_txt_only_emits_https_rr_no_svcb", + styles: []domain.DiscoveryProfile{domain.DiscoveryProfileANSTXT}, + protocol: domain.ProtocolA2A, + agentURL: "https://agent.example.com", + wantHTTPS: true, + wantLegacyTXT: true, + }, + { + name: "ans_svcb_only_omits_https_rr", + styles: []domain.DiscoveryProfile{domain.DiscoveryProfileANSSVCB}, + protocol: domain.ProtocolA2A, + agentURL: "https://agent.example.com", + wantSVCB: true, + wantSVCBRequired: true, // SVCB-sole: only PurposeDiscovery record, must be required + wantSVCBPort: "port=443", + wantSVCBWk: "key65280=agent-card.json", + }, + { + name: "union_emits_both_families", + styles: []domain.DiscoveryProfile{domain.DiscoveryProfileANSSVCB, domain.DiscoveryProfileANSTXT}, + protocol: domain.ProtocolA2A, + agentURL: "https://agent.example.com", + wantHTTPS: true, + wantLegacyTXT: true, + wantSVCB: true, + // wantSVCBRequired: false — legacy `_ans` TXT carries the + // Required signal during the §4.4.2 transition; SVCB rides + // along as optional. + wantSVCBPort: "port=443", + wantSVCBWk: "key65280=agent-card.json", + }, + { + name: "svcb_mcp_wk_mcp_json", + styles: []domain.DiscoveryProfile{domain.DiscoveryProfileANSSVCB}, + protocol: domain.ProtocolMCP, + agentURL: "https://agent.example.com/mcp", + wantSVCB: true, + wantSVCBRequired: true, + wantSVCBPort: "port=443", + wantSVCBWk: "key65280=mcp.json", + }, + { + name: "svcb_http_api_omits_wk", + styles: []domain.DiscoveryProfile{domain.DiscoveryProfileANSSVCB}, + protocol: domain.ProtocolHTTPAPI, + agentURL: "https://agent.example.com", + wantSVCB: true, + wantSVCBRequired: true, + wantSVCBPort: "port=443", + }, + { + name: "svcb_cap_sha256_from_endpoint_metadata_hash", + styles: []domain.DiscoveryProfile{domain.DiscoveryProfileANSSVCB}, + protocol: domain.ProtocolA2A, + agentURL: "https://agent.example.com", + metadataHash: sampleMetadataHash, + wantSVCB: true, + wantSVCBRequired: true, + wantSVCBPort: "port=443", + wantSVCBWk: "key65280=agent-card.json", + wantSVCBCap: "key65281=" + wantSampleCapBase64, + }, + { + name: "svcb_non_443_port_from_url", + styles: []domain.DiscoveryProfile{domain.DiscoveryProfileANSSVCB}, + protocol: domain.ProtocolA2A, + agentURL: "https://agent.example.com:8443", + wantSVCB: true, + wantSVCBRequired: true, + wantSVCBPort: "port=8443", + wantSVCBWk: "key65280=agent-card.json", + }, + { + name: "svcb_http_scheme_defaults_port_80", + styles: []domain.DiscoveryProfile{domain.DiscoveryProfileANSSVCB}, + protocol: domain.ProtocolA2A, + agentURL: "http://agent.example.com", + wantSVCB: true, + wantSVCBRequired: true, + wantSVCBPort: "port=80", + wantSVCBWk: "key65280=agent-card.json", + }, + { + name: "empty_styles_coerces_to_default", + styles: nil, + protocol: domain.ProtocolA2A, + agentURL: "https://agent.example.com", + wantSVCB: true, + wantSVCBRequired: true, // default ({ANS_SVCB}) is SVCB-sole + wantSVCBPort: "port=443", + wantSVCBWk: "key65280=agent-card.json", + }, + { + name: "all_invalid_styles_falls_back_to_default", + styles: []domain.DiscoveryProfile{domain.DiscoveryProfile("garbage"), domain.DiscoveryProfile("nonsense")}, + protocol: domain.ProtocolA2A, + agentURL: "https://agent.example.com", + wantSVCB: true, + wantSVCBRequired: true, // fallback default ({ANS_SVCB}) is SVCB-sole + wantSVCBPort: "port=443", + wantSVCBWk: "key65280=agent-card.json", + }, + } + + svc := newComputeOnlyService(t) + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + reg := mustReg(t, "agent.example.com", "1.0.0", + []domain.AgentEndpoint{{ + Protocol: tc.protocol, + AgentURL: tc.agentURL, + MetadataHash: tc.metadataHash, + }}, + nil, tc.styles) + + records := svc.ComputeRequiredDNSRecords(reg) + + var sawHTTPS, sawSVCB, sawLegacyTXT bool + var svcbValue string + var svcbRequired bool + for _, r := range records { + switch r.Type { + case domain.DNSRecordHTTPS: + sawHTTPS = true + case domain.DNSRecordSVCB: + sawSVCB = true + svcbValue = r.Value + svcbRequired = r.Required + case domain.DNSRecordTXT: + if strings.HasPrefix(r.Name, "_ans.") { + sawLegacyTXT = true + } + } + } + + assert.Equal(t, tc.wantHTTPS, sawHTTPS, "HTTPS RR presence") + assert.Equal(t, tc.wantSVCB, sawSVCB, "SVCB row presence") + assert.Equal(t, tc.wantLegacyTXT, sawLegacyTXT, "_ans TXT presence") + + if tc.wantSVCB { + assert.Equal(t, tc.wantSVCBRequired, svcbRequired, + "SVCB Required flag mismatch (true iff ANS_SVCB is the sole resolved style)") + assert.Contains(t, svcbValue, tc.wantSVCBPort, + "SVCB port SvcParam mismatch") + if tc.wantSVCBWk != "" { + assert.Contains(t, svcbValue, tc.wantSVCBWk, "SVCB well-known (key65280) SvcParam mismatch") + } else { + assert.NotContains(t, svcbValue, "key65280=", + "SVCB MUST NOT carry key65280 (well-known) when protocol has no metadata convention") + } + if tc.wantSVCBCap != "" { + assert.Contains(t, svcbValue, tc.wantSVCBCap, "SVCB capability digest (key65281) SvcParam mismatch") + } else { + assert.NotContains(t, svcbValue, "key65281=", + "SVCB MUST NOT carry key65281 (capability digest) when endpoint MetadataHash is empty") + } + // Named-form regression guards across the integration path. + assert.NotContains(t, svcbValue, "wk=", + "named `wk=` SvcParam MUST NOT appear; key65280 is the publishable form") + assert.NotContains(t, svcbValue, "cap-sha256", + "named `cap-sha256=` SvcParam MUST NOT appear; key65281 is the publishable form") + assert.NotContains(t, svcbValue, "card-sha256", + "legacy `card-sha256=` SvcParam MUST NOT appear; key65281 is the publishable form") + } + }) + } +} + +// TestComputeRequiredDNSRecords_UnionDedupesFamilyTrustRecords pins +// that when the union {ANS_SVCB, ANS_TXT} emits, family trust records +// (`_ans-badge`, TLSA) appear ONCE in the output even though both +// adapters emit them. Catches a regression where the dedup pass is +// removed or the dedup key drifts. +func TestComputeRequiredDNSRecords_UnionDedupesFamilyTrustRecords(t *testing.T) { + svc := newComputeOnlyService(t) + reg := mustReg(t, "agent.example.com", "1.0.0", + []domain.AgentEndpoint{{Protocol: domain.ProtocolA2A, AgentURL: "https://agent.example.com"}}, + &domain.ByocServerCertificate{Fingerprint: "abcdef"}, + []domain.DiscoveryProfile{domain.DiscoveryProfileANSSVCB, domain.DiscoveryProfileANSTXT}) + + records := svc.ComputeRequiredDNSRecords(reg) + + var badgeCount, tlsaCount int + for _, r := range records { + if r.Purpose == domain.PurposeBadge { + badgeCount++ + } + if r.Purpose == domain.PurposeCertificateBinding { + tlsaCount++ + } + } + assert.Equal(t, 1, badgeCount, "exactly one `_ans-badge` record across the union") + assert.Equal(t, 1, tlsaCount, "exactly one TLSA record across the union") +} + +// TestComputeRequiredDNSRecords_NoEndpoints pins the empty-input +// contract: no endpoints → no records (ServerCert + nil endpoints +// alone is also covered, but the typical case is the V1/V2 detail +// handler hitting an aggregate that hasn't reached PENDING_DNS yet). +func TestComputeRequiredDNSRecords_NoEndpoints(t *testing.T) { + svc := newComputeOnlyService(t) + reg := mustReg(t, "agent.example.com", "1.0.0", nil, nil, nil) + records := svc.ComputeRequiredDNSRecords(reg) + assert.Empty(t, records) +} + +// TestNewRegistrationService_PanicsOnNilProfileRegistry pins the +// fail-loud invariant the constructor enforces. A missing registry +// would silently emit zero `dnsRecordsProvisioned[]` and accept any +// DNS state at verify-dns — trust-root corruption masquerading as +// graceful degradation. Construction is process-start-time, not a +// request path, so the panic does not violate the no-panics-in- +// request-paths rule. +func TestNewRegistrationService_PanicsOnNilProfileRegistry(t *testing.T) { + defer func() { + r := recover() + require.NotNil(t, r, "constructor must panic when discoveryRegistry is nil") + msg, ok := r.(string) + require.True(t, ok, "panic value must be a string explaining the missing dependency") + assert.Contains(t, msg, "discoveryRegistry is required") + }() + _ = service.NewRegistrationService( + nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, + ) +} + +// TestComputeRequiredDNSRecords_UnknownStyleSkipped pins that a +// reg.DiscoveryProfiles entry the registry doesn't have is silently +// skipped (with a WARN log; not asserted in this test). The remaining +// valid profiles still emit. If every entry is unknown, the walker +// falls back to DefaultDiscoveryProfiles. +func TestComputeRequiredDNSRecords_UnknownStyleSkipped(t *testing.T) { + svc := newComputeOnlyService(t) + reg := mustReg(t, "agent.example.com", "1.0.0", + []domain.AgentEndpoint{{Protocol: domain.ProtocolA2A, AgentURL: "https://agent.example.com"}}, + nil, + []domain.DiscoveryProfile{domain.DiscoveryProfileANSSVCB, domain.DiscoveryProfile("UNKNOWN_FUTURE")}) + + records := svc.ComputeRequiredDNSRecords(reg) + + // SVCB is recognized → SVCB-sole emission. UNKNOWN_FUTURE is dropped. + var sawSVCB bool + for _, r := range records { + if r.Type == domain.DNSRecordSVCB { + sawSVCB = true + } + } + assert.True(t, sawSVCB, "valid style alongside unknown-style still emits") +} + +// TestComputeRequiredDNSRecords_UnionCanonicalBytesRegression pins +// the V2 TL `dnsRecordsProvisioned[]` canonical wire for the §4.4.2 +// transition union (ANS_SVCB + ANS_TXT). Any change to slice ORDER +// (JCS preserves array order per RFC 8785 §3.2.2) would shift the +// SHA-256, signal a wire-shape regression, and break offline-verifier +// hashes for in-flight agents at deploy time. +// +// The hex constant was REGENERATED for the keyNNNNN/selector-0 change: +// the SVCB rows now carry `key65280=`/`key65281=` (Fix A — RFC 9460 +// §14.3.1 Private Use presentation of the draft wk/cap-sha256 params, +// replacing the unpublishable named forms) and the TLSA row now carries +// `3 0 1` over the full cert (Fix B2 — selector 0 matching what +// CertificateFingerprint actually hashes). This is the intentional +// canonical-bytes change of the PR. The slice ORDER and the 7-record +// SHAPE are unchanged (both endpoints are https/443) — only the SVCB +// SvcParam values and the TLSA value move the hash. A future drift that +// is NOT one of those two value changes is a regression: investigate +// before touching this constant. +func TestComputeRequiredDNSRecords_UnionCanonicalBytesRegression(t *testing.T) { + const wantSHA256Hex = "0bc5f912c2a450dffd631b66d467ee6d5974e0cbea47e84fd676c6111387bda0" + + svc := newComputeOnlyService(t) + reg := mustReg(t, "agent.example.com", "1.2.3", + []domain.AgentEndpoint{ + { + Protocol: domain.ProtocolA2A, + AgentURL: "https://agent.example.com/a2a", + MetadataHash: "SHA256:098d650cc6d280dee4c0f47489a75cf17b9bfbbae53051806d4e084108b2ff27", + }, + { + Protocol: domain.ProtocolMCP, + AgentURL: "https://agent.example.com/mcp", + MetadataHash: "SHA256:1111111111111111111111111111111111111111111111111111111111111111", + }, + }, + &domain.ByocServerCertificate{Fingerprint: "deadbeefcafe1234"}, + []domain.DiscoveryProfile{domain.DiscoveryProfileANSSVCB, domain.DiscoveryProfileANSTXT}) + + records := svc.ComputeRequiredDNSRecords(reg) + + // The expected emission shape (order-preserved) is: + // 1. _ans. TXT (a2a) Required=true + // 2. _ans. TXT (mcp) Required=true + // 3. HTTPS (1 . alpn=h2) Required=false + // 4. SVCB (a2a) Required=false (TXT also resolved) + // 5. SVCB (mcp) Required=false + // 6. _ans-badge. TXT (badge) Required=true + // 7. _443._tcp. TLSA Required=false + require.Len(t, records, 7, "union case must emit exactly 7 records") + assert.Equal(t, "_ans.agent.example.com", records[0].Name) + assert.Equal(t, domain.DNSRecordTXT, records[0].Type) + assert.Equal(t, "_ans.agent.example.com", records[1].Name) + assert.Equal(t, "agent.example.com", records[2].Name) + assert.Equal(t, domain.DNSRecordHTTPS, records[2].Type) + assert.Equal(t, "agent.example.com", records[3].Name) + assert.Equal(t, domain.DNSRecordSVCB, records[3].Type) + assert.False(t, records[3].Required, "SVCB Required=false during transition (TXT carries the required signal)") + assert.Equal(t, "agent.example.com", records[4].Name) + assert.Equal(t, domain.DNSRecordSVCB, records[4].Type) + assert.Equal(t, "_ans-badge.agent.example.com", records[5].Name) + assert.Equal(t, "_443._tcp.agent.example.com", records[6].Name) + assert.Equal(t, domain.DNSRecordTLSA, records[6].Type) + + // SHA-256 over JCS-canonical bytes — pins the exact wire bytes + // the V2 TL leaf will canonicalize. + jsonBytes, err := json.Marshal(records) + require.NoError(t, err) + canonical, err := anscrypto.Canonicalize(jsonBytes) + require.NoError(t, err) + sum := sha256.Sum256(canonical) + gotHex := hex.EncodeToString(sum[:]) + assert.Equal(t, wantSHA256Hex, gotHex, + "V2 union canonical-bytes SHA-256 drifted; investigate before changing the constant") +} + +// TestNewDefaultProfileRegistry pins the default-wiring contract: +// returns a registry containing both ANS-family styles in TXT-then-SVCB +// insertion order. The order is the V2 canonical-bytes input. +func TestNewDefaultProfileRegistry(t *testing.T) { + r, err := service.NewDefaultProfileRegistry("") + require.NoError(t, err) + + got := r.IDs() + want := []domain.DiscoveryProfile{domain.DiscoveryProfileANSTXT, domain.DiscoveryProfileANSSVCB} + assert.Equal(t, want, got, "default registry must wire TXT before SVCB to preserve V2 union canonical bytes") +} + +// TestComputeRequiredDNSRecords_RegistryIterationOrderDeterminesEmission +// pins that a non-default registry wiring (SVCB before TXT) actually +// produces a different emission order — proving the walker honours +// registry insertion order rather than user-supplied +// reg.DiscoveryProfiles order. +func TestComputeRequiredDNSRecords_RegistryIterationOrderDeterminesEmission(t *testing.T) { + // Build a "production" service (default registry wiring: TXT, SVCB) + // and a custom one with SVCB before TXT. + defaultSvc := newComputeOnlyService(t) + + customReg, err := registry.New(svcStub{id: domain.DiscoveryProfileANSSVCB, marker: "S"}, svcStub{id: domain.DiscoveryProfileANSTXT, marker: "T"}) + require.NoError(t, err) + customSvc := service.NewRegistrationService( + nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, customReg) + + reg := mustReg(t, "agent.example.com", "1.0.0", + []domain.AgentEndpoint{{Protocol: domain.ProtocolA2A, AgentURL: "https://agent.example.com"}}, + nil, + []domain.DiscoveryProfile{domain.DiscoveryProfileANSSVCB, domain.DiscoveryProfileANSTXT}) + + defaultOut := defaultSvc.ComputeRequiredDNSRecords(reg) + customOut := customSvc.ComputeRequiredDNSRecords(reg) + + // Default: TXT first → first record is the `_ans` TXT. + require.NotEmpty(t, defaultOut) + assert.Equal(t, "_ans.agent.example.com", defaultOut[0].Name, + "default registry wires TXT first; first emitted record is `_ans` TXT") + + // Custom: SVCB stub (marker=S) first, no records (stub returns + // empty slice). Then TXT stub (marker=T), also empty. So custom + // out is empty — pinning that the custom registry was actually + // consulted (without the registry, the walker would fall back to + // the default and produce non-empty records). + assert.Empty(t, customOut, "stub registry produces no records; default fallback is gated by registry presence, not adapter output") +} + +// svcStub is a minimal port.ProfileEmitter for ordering tests; emits +// no records so the test asserts purely on walker behavior. +type svcStub struct { + id domain.DiscoveryProfile + marker string +} + +func (s svcStub) ID() domain.DiscoveryProfile { return s.id } +func (svcStub) Records(*domain.AgentRegistration) []domain.ExpectedDNSRecord { + return nil +} + +// inconsistentRegistry violates the IDs()/Get consistency contract: +// IDs() advertises a style that Get() does not have. The walker's +// defensive `if !ok { continue }` branch is the safety net for that +// contract violation. The bundled registry maintains the contract by +// construction, so this fake exercises a branch only a custom +// port.ProfileRegistry implementation could ever reach. +type inconsistentRegistry struct{} + +func (inconsistentRegistry) IDs() []domain.DiscoveryProfile { + return []domain.DiscoveryProfile{domain.DiscoveryProfileANSSVCB} +} + +func (inconsistentRegistry) Get(domain.DiscoveryProfile) (port.ProfileEmitter, bool) { + return nil, false +} + +// TestComputeRequiredDNSRecords_RegistryGetMissDoesNotPanic pins the +// defensive branch the walker takes when registry.IDs() and Get fall +// out of sync. The branch is unreachable in production wiring; it +// exists so a future custom port.ProfileRegistry implementation +// (e.g. one that hot-reloads styles and races between IDs() and Get) +// degrades to "skip the missing ID" instead of nil-dereferencing the +// returned style. +func TestComputeRequiredDNSRecords_RegistryGetMissDoesNotPanic(t *testing.T) { + svc := service.NewRegistrationService( + nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, + inconsistentRegistry{}) + reg := mustReg(t, "agent.example.com", "1.0.0", + []domain.AgentEndpoint{{Protocol: domain.ProtocolA2A, AgentURL: "https://agent.example.com"}}, + nil, + []domain.DiscoveryProfile{domain.DiscoveryProfileANSSVCB}) + + // IDs() returns SVCB; Get returns (nil, false). Walker must + // continue without dereferencing style. Result: empty record set + // since the walker has nothing to emit. No panic. + records := svc.ComputeRequiredDNSRecords(reg) + assert.Empty(t, records) +} diff --git a/internal/ra/service/helpers.go b/internal/ra/service/helpers.go index fda9cd6..c6de49e 100644 --- a/internal/ra/service/helpers.go +++ b/internal/ra/service/helpers.go @@ -6,11 +6,77 @@ import ( "encoding/pem" "errors" "fmt" + "strings" "time" "github.com/godaddy/ans/internal/domain" ) +// applyDiscoveryProfiles resolves the set of DNS record families the +// registration emits and stores it on the aggregate, normalizing the +// V2 request's discoveryProfiles field defensively at the API boundary. +// +// V1 lane is pinned to {ANS_TXT} regardless of the request: V1 +// callers predate the Consolidated Approach and their tooling expects +// the original `_ans` TXT shape. V1 has no discoveryProfiles field on +// the wire, so this branch is the only path V1 registrations take. +// +// V2 normalization: +// - Field absent (nil slice) → defaults to DefaultDiscoveryProfiles() +// ({ANS_SVCB}). The spec doesn't list discoveryProfiles in +// `required`, so omission is legal and the server picks the +// recommended Consolidated Approach. +// - Field present but empty (`"discoveryProfiles": []`) → also +// normalizes to DefaultDiscoveryProfiles(), same as omission. The +// spec's `minItems: 1` is the canonical client contract; the server +// does not reject a client that sends an empty array anyway. +// - Duplicate elements → silently deduped, first occurrence wins. The +// spec's `uniqueItems: true` is the canonical client contract; a +// duplicate carries no extra meaning, so we normalize rather than +// reject. +// - Invalid element (not in ValidDiscoveryProfiles()) → 422 +// INVALID_DISCOVERY_PROFILE. An unrecognized value can't be +// normalized away — it names a family the RA can't emit, so the +// caller must fix it. +// +// The handler-side conversion (toDomainDiscoveryProfiles) preserves +// the nil-vs-empty distinction, but both nil and empty normalize to the +// default here so the distinction no longer changes the outcome. +// +// V1 detection routes through isV1Lane (lifecycle.go) so a future +// schema-version evolution updates one site, not several. The error +// message references ValidDiscoveryProfiles() so adding a third profile +// is a one-place change. +func applyDiscoveryProfiles(reg *domain.AgentRegistration, req RegisterRequest) error { + if isV1Lane(req.SchemaVersion) { + reg.DiscoveryProfiles = []domain.DiscoveryProfile{domain.DiscoveryProfileANSTXT} + return nil + } + if len(req.DiscoveryProfiles) == 0 { + reg.DiscoveryProfiles = domain.DefaultDiscoveryProfiles() + return nil + } + seen := make(map[domain.DiscoveryProfile]struct{}, len(req.DiscoveryProfiles)) + out := make([]domain.DiscoveryProfile, 0, len(req.DiscoveryProfiles)) + for _, s := range req.DiscoveryProfiles { + if !s.IsValid() { + return domain.NewValidationError( + "INVALID_DISCOVERY_PROFILE", + fmt.Sprintf("discoveryProfiles element %q is not one of %s", + string(s), + strings.Join(domain.ValidDiscoveryProfiles(), ", ")), + ) + } + if _, dup := seen[s]; dup { + continue + } + seen[s] = struct{}{} + out = append(out, s) + } + reg.DiscoveryProfiles = out + return nil +} + // fingerprintOf returns the SHA-256 fingerprint of the DER certificate // inside the given PEM string, formatted as `SHA256:`. // The `SHA256:` prefix matches the algorithm-prefixed form the diff --git a/internal/ra/service/helpers_test.go b/internal/ra/service/helpers_test.go index 7eb0204..786ae06 100644 --- a/internal/ra/service/helpers_test.go +++ b/internal/ra/service/helpers_test.go @@ -14,6 +14,7 @@ import ( "crypto/x509" "crypto/x509/pkix" "encoding/pem" + "errors" "math/big" "strings" "testing" @@ -201,3 +202,190 @@ func selfSignedCertPEM(t *testing.T) string { } return string(pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der})) } + +// ----- applyDiscoveryProfiles ----- + +// TestApplyDiscoveryProfiles covers the V1-pin / V2-default / +// V2-normalize branches. The server normalizes defensively: an explicit +// empty array and field omission both fall back to the default set, and +// duplicates are silently deduped (first occurrence wins). Only an +// unrecognized value still surfaces as INVALID_DISCOVERY_PROFILE — it +// names a family the RA can't emit, so it cannot be normalized away. The +// integration tests follow happy paths through RegisterAgent and don't +// reach the normalization/rejection branches directly. +func TestApplyDiscoveryProfiles(t *testing.T) { + tests := []struct { + name string + req RegisterRequest + wantProfiles []domain.DiscoveryProfile + wantErrCode string + }{ + { + name: "v1_pins_to_ans_txt_ignoring_request_field", + req: RegisterRequest{ + SchemaVersion: "V1", + DiscoveryProfiles: []domain.DiscoveryProfile{domain.DiscoveryProfileANSSVCB}, + }, + wantProfiles: []domain.DiscoveryProfile{domain.DiscoveryProfileANSTXT}, + }, + { + name: "v2_nil_normalizes_to_default", + req: RegisterRequest{SchemaVersion: "V2"}, + wantProfiles: domain.DefaultDiscoveryProfiles(), + }, + { + // minItems: 1 is the canonical client contract, but the + // server normalizes an explicit empty array to the default + // set rather than rejecting it — same outcome as omission. + name: "v2_explicit_empty_slice_normalizes_to_default", + req: RegisterRequest{SchemaVersion: "V2", DiscoveryProfiles: []domain.DiscoveryProfile{}}, + wantProfiles: domain.DefaultDiscoveryProfiles(), + }, + { + name: "unset_schema_treated_as_v2_default", + req: RegisterRequest{}, + wantProfiles: domain.DefaultDiscoveryProfiles(), + }, + { + name: "v2_valid_ans_svcb_only", + req: RegisterRequest{ + SchemaVersion: "V2", + DiscoveryProfiles: []domain.DiscoveryProfile{domain.DiscoveryProfileANSSVCB}, + }, + wantProfiles: []domain.DiscoveryProfile{domain.DiscoveryProfileANSSVCB}, + }, + { + name: "v2_valid_ans_txt_only", + req: RegisterRequest{ + SchemaVersion: "V2", + DiscoveryProfiles: []domain.DiscoveryProfile{domain.DiscoveryProfileANSTXT}, + }, + wantProfiles: []domain.DiscoveryProfile{domain.DiscoveryProfileANSTXT}, + }, + { + name: "v2_valid_union_preserves_order", + req: RegisterRequest{ + SchemaVersion: "V2", + DiscoveryProfiles: []domain.DiscoveryProfile{ + domain.DiscoveryProfileANSSVCB, + domain.DiscoveryProfileANSTXT, + }, + }, + wantProfiles: []domain.DiscoveryProfile{ + domain.DiscoveryProfileANSSVCB, + domain.DiscoveryProfileANSTXT, + }, + }, + { + // uniqueItems: true is the canonical client contract, but the + // server normalizes duplicates by deduping (first occurrence + // wins) rather than rejecting. A duplicate carries no extra + // meaning, so the persisted set drops the repeat and keeps + // the original order of first appearances. + name: "v2_duplicate_elements_deduped_first_wins", + req: RegisterRequest{ + SchemaVersion: "V2", + DiscoveryProfiles: []domain.DiscoveryProfile{ + domain.DiscoveryProfileANSSVCB, + domain.DiscoveryProfileANSSVCB, + domain.DiscoveryProfileANSTXT, + }, + }, + wantProfiles: []domain.DiscoveryProfile{ + domain.DiscoveryProfileANSSVCB, + domain.DiscoveryProfileANSTXT, + }, + }, + { + name: "v2_invalid_element_rejected", + req: RegisterRequest{ + SchemaVersion: "V2", + DiscoveryProfiles: []domain.DiscoveryProfile{domain.DiscoveryProfile("garbage")}, + }, + wantErrCode: "INVALID_DISCOVERY_PROFILE", + }, + { + // CONSTANT_CASE is the wire form. lowercase is rejected so the + // V2 enum stays consistent with every other enum on the spec. + name: "v2_lowercase_element_rejected_as_invalid", + req: RegisterRequest{ + SchemaVersion: "V2", + DiscoveryProfiles: []domain.DiscoveryProfile{domain.DiscoveryProfile("ans_svcb")}, + }, + wantErrCode: "INVALID_DISCOVERY_PROFILE", + }, + { + // First valid, second invalid — error surfaces at the + // invalid element, no partial state stamped on the aggregate. + name: "v2_mixed_valid_then_invalid_rejected", + req: RegisterRequest{ + SchemaVersion: "V2", + DiscoveryProfiles: []domain.DiscoveryProfile{ + domain.DiscoveryProfileANSSVCB, + domain.DiscoveryProfile("garbage"), + }, + }, + wantErrCode: "INVALID_DISCOVERY_PROFILE", + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + reg := &domain.AgentRegistration{} + err := applyDiscoveryProfiles(reg, tc.req) + if tc.wantErrCode != "" { + if err == nil { + t.Fatalf("want error code %q, got nil", tc.wantErrCode) + } + var verr *domain.Error + if !errors.As(err, &verr) { + t.Fatalf("want *domain.Error, got %T: %v", err, err) + } + if verr.Code != tc.wantErrCode { + t.Errorf("code: got %q want %q", verr.Code, tc.wantErrCode) + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !sameProfiles(reg.DiscoveryProfiles, tc.wantProfiles) { + t.Errorf("DiscoveryProfiles: got %v want %v", reg.DiscoveryProfiles, tc.wantProfiles) + } + }) + } +} + +// TestApplyDiscoveryProfiles_ErrorMessageListsValidValues confirms the +// error detail enumerates the canonical valid set so SDK authors get +// an actionable message. Sourced from domain.ValidDiscoveryProfiles(). +func TestApplyDiscoveryProfiles_ErrorMessageListsValidValues(t *testing.T) { + reg := &domain.AgentRegistration{} + err := applyDiscoveryProfiles(reg, RegisterRequest{ + SchemaVersion: "V2", + DiscoveryProfiles: []domain.DiscoveryProfile{domain.DiscoveryProfile("garbage")}, + }) + if err == nil { + t.Fatal("expected error") + } + for _, want := range domain.ValidDiscoveryProfiles() { + if !strings.Contains(err.Error(), want) { + t.Errorf("error message must list %q; got %q", want, err.Error()) + } + } +} + +// sameProfiles compares two profile slices for set-equal-with-order. +// Used by TestApplyDiscoveryProfiles to assert ordering on the happy +// paths without pulling in reflect.DeepEqual semantics that +// distinguish nil from empty. +func sameProfiles(a, b []domain.DiscoveryProfile) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} diff --git a/internal/ra/service/lifecycle.go b/internal/ra/service/lifecycle.go index cf15a65..b4dfa74 100644 --- a/internal/ra/service/lifecycle.go +++ b/internal/ra/service/lifecycle.go @@ -8,6 +8,7 @@ import ( "time" "github.com/google/uuid" + "github.com/rs/zerolog/log" "github.com/godaddy/ans/internal/domain" "github.com/godaddy/ans/internal/port" @@ -480,16 +481,47 @@ type VerifyDNSResult struct { DNSMismatches []DNSMismatch // non-empty → handler emits 422 } +// DNS mismatch codes carried by DNSMismatch.Code. MISSING and MISMATCH +// are exact codes; DNSSEC-authenticated tampering is reported per record +// type as "_DNSSEC_MISMATCH" (TLSA_DNSSEC_MISMATCH, +// SVCB_DNSSEC_MISMATCH, HTTPS_DNSSEC_MISMATCH). Consumers classify with +// the IsMissing / IsIncorrect predicates rather than matching raw strings, +// so a new DNSSEC-bearing record type surfaces without editing every +// consumer — the drift that previously dropped SVCB/HTTPS tampering from +// the 422 body. +const ( + dnsCodeMissing = "MISSING" + dnsCodeMismatch = "MISMATCH" + dnssecMismatchSuffix = "_DNSSEC_MISMATCH" +) + // DNSMismatch names a missing or incorrect record encountered during // verification. Surface-level shape matches the V2 DnsVerificationError. type DNSMismatch struct { Expected domain.ExpectedDNSRecord Found string // empty if the record was missing entirely - Code string // "MISSING" | "MISMATCH" + // Code is "MISSING", "MISMATCH", or "_DNSSEC_MISMATCH". + // Classify with IsMissing / IsIncorrect rather than comparing raw + // strings — the DNSSEC codes are generated per record type. + Code string +} + +// IsMissing reports whether the expected record was absent from the zone. +// These records belong in the 422 body's missingRecords array. +func (m DNSMismatch) IsMissing() bool { return m.Code == dnsCodeMissing } + +// IsIncorrect reports whether the record was present but wrong: a plain +// value mismatch (MISMATCH) or DNSSEC-authenticated tampering on a TLSA, +// SVCB, or HTTPS record ("_DNSSEC_MISMATCH"). Matching the +// suffix rather than each type means a future DNSSEC-bearing record type +// lands in incorrectRecords automatically. These records belong in the +// 422 body's incorrectRecords array. +func (m DNSMismatch) IsIncorrect() bool { + return m.Code == dnsCodeMismatch || strings.HasSuffix(m.Code, dnssecMismatchSuffix) } // VerifyDNS checks the operator's authoritative nameserver for the -// required records (computed by domain.ComputeRequiredDNSRecords) and +// required records (computed by s.ComputeRequiredDNSRecords) and // advances the registration to ACTIVE on success. // // On success, emits an AGENT_ACTIVE event whose attestations carry @@ -535,13 +567,44 @@ func (s *RegistrationService) VerifyDNS(ctx context.Context, agentID string, in reg.ServerCert = byoc } - expected := domain.ComputeRequiredDNSRecords(reg, s.tlPublicBaseURL) + expected := s.ComputeRequiredDNSRecords(reg) mismatches, perRecord, err := s.verifyDNSRecords(ctx, reg.FQDN(), expected) if err != nil { + // Systemic verifier failure (DNS unreachable, resolver error) — + // the caller sees a 500 with nothing in the body. WARN here so + // on-call can grep the agent and FQDN that wedged. agentID is in + // scope here but not inside verifyDNSRecords, so this is the one + // WARN site for the verifier error. + log.Warn(). + Str("agentId", agentID). + Str("fqdn", reg.FQDN()). + Err(err). + Msg("dns verification failed with a systemic error") return nil, fmt.Errorf("dns verify: %w", err) } if len(mismatches) > 0 { + // A 422 is an expected operator-side condition (their zone isn't + // published yet / is wrong), so INFO, not ERROR. Log names, types, + // and classification codes only — never the expected or live + // values — so on-call can distinguish "one bad operator zone" from + // "our emission regressed" without operators pasting 422 bodies. + recs := make([]struct { + Name string `json:"name"` + Type string `json:"type"` + Code string `json:"code"` + }, len(mismatches)) + for i, m := range mismatches { + recs[i].Name = m.Expected.Name + recs[i].Type = string(m.Expected.Type) + recs[i].Code = m.Code + } + log.Info(). + Str("agentId", agentID). + Str("fqdn", reg.FQDN()). + Int("mismatchCount", len(mismatches)). + Interface("mismatchRecords", recs). + Msg("verify-dns returning mismatches") return &VerifyDNSResult{Registration: reg, Now: now, DNSMismatches: mismatches}, nil } @@ -634,27 +697,64 @@ func (s *RegistrationService) verifyDNSRecords(ctx context.Context, fqdn string, } var out []DNSMismatch for _, r := range res.Results { - // DNSSEC-authenticated TLSA that doesn't match is a hard - // fail regardless of the Required flag. `r.Found` from the - // TLSA verifier is true only when the actual matched the - // expected value after case-insensitive hex normalization, - // so `DNSSECVerified && !Found` captures "response was - // signed, but its content disagreed with the cert we - // issued" — the exact attack we block. - if r.Record.Type == domain.DNSRecordTLSA && r.DNSSECVerified && !r.Found { - out = append(out, DNSMismatch{ - Expected: r.Record, Found: r.Actual, Code: "TLSA_DNSSEC_MISMATCH", - }) - continue + // DNSSEC-authenticated record whose committed value disagrees + // with the expected one is a hard fail regardless of the + // Required flag. `r.Found` is true only when the actual + // matched after type-specific normalization, so + // `DNSSECVerified && !Found` captures "response was signed, + // but its content disagreed with what we issued" — the exact + // attack we block (an attacker rewrote a record in a signed + // zone). Applies to TLSA (cert binding), SVCB (per-protocol + // service binding, including card-sha256 commitments), and + // HTTPS (service binding). + if r.DNSSECVerified && !r.Found { + switch r.Record.Type { + case domain.DNSRecordTLSA, domain.DNSRecordSVCB, domain.DNSRecordHTTPS: + out = append(out, DNSMismatch{ + Expected: r.Record, Found: r.Actual, + Code: string(r.Record.Type) + dnssecMismatchSuffix, + }) + continue + case domain.DNSRecordTXT: + // TXT records (discovery, badge) carry no + // cryptographic commitment, so a DNSSEC-validated + // mismatch isn't a hard fail — fall through to the + // Required check below. + } } + // Optional records that aren't a DNSSEC-validated tamper (handled + // above) are skipped: TLSA without DNSSEC, the HTTPS RR that + // CNAME-at-apex operators cannot publish (RFC 1034 §3.6.2), and + // union-mode SVCB rows the walker flipped Required=false during the + // §4.4.2 transition are all optional by design — blocking on them + // would 422 operators forever on records the design says they need + // not publish. The DNSSEC hard-fail above is the only path that + // bypasses Required; this keeps the documented two-condition + // blocking contract (Required miss/mismatch, or DNSSEC tamper). if !r.Record.Required { continue } + // For Required records, partition by what DNS actually answered, + // using r.Found as the verdict (the verifier already applied + // type-specific matching: SVCB subset-match, TLSA case-insensitive + // hex, etc.). The two not-found cases split on r.Actual per the + // port.RecordVerification contract — empty Actual means nothing + // answered (truly absent), a non-empty Actual means the zone has a + // live record that didn't match: + // + // !Found && Actual == "" → MISSING (operator hasn't published it) + // !Found && Actual != "" → MISMATCH carrying the live value, so + // the 422 shows the operator what's + // actually in their zone vs expected + // Found → OK; the verifier's match is the verdict. + // A benign Actual≠Value delta (coexistence + // extras on an SVCB subset match) is not a + // mismatch. switch { + case !r.Found && r.Actual == "": + out = append(out, DNSMismatch{Expected: r.Record, Code: dnsCodeMissing}) case !r.Found: - out = append(out, DNSMismatch{Expected: r.Record, Code: "MISSING"}) - case r.Found && r.Actual != r.Record.Value: - out = append(out, DNSMismatch{Expected: r.Record, Found: r.Actual, Code: "MISMATCH"}) + out = append(out, DNSMismatch{Expected: r.Record, Found: r.Actual, Code: dnsCodeMismatch}) } } return out, res.Results, nil @@ -684,8 +784,8 @@ func (s *RegistrationService) buildAgentRegisteredEvent( // // DNSSECVerified carries forward from the per-record verification // result (set true by the lookup verifier when a validating - // resolver marked the response with the AD bit). Only ever true - // for TLSA today — TXT and HTTPS records don't carry the flag. + // resolver marked the response with the AD bit). True on TLSA, + // SVCB, and HTTPS records; TXT records don't carry the flag. dnssecByKey := make(map[string]bool, len(perRecord)) for _, r := range perRecord { if r.DNSSECVerified { @@ -831,7 +931,7 @@ func (s *RegistrationService) Revoke(ctx context.Context, agentID string, in Rev return &RevokeResult{ Registration: reg, RevokedAt: now, - DNSRecordsToRemove: domain.ComputeRequiredDNSRecords(reg, s.tlPublicBaseURL), + DNSRecordsToRemove: s.ComputeRequiredDNSRecords(reg), }, nil } @@ -945,6 +1045,6 @@ func (s *RegistrationService) Revoke(ctx context.Context, agentID string, in Rev return &RevokeResult{ Registration: reg, RevokedAt: now, - DNSRecordsToRemove: domain.ComputeRequiredDNSRecords(reg, s.tlPublicBaseURL), + DNSRecordsToRemove: s.ComputeRequiredDNSRecords(reg), }, nil } diff --git a/internal/ra/service/lifecycle_verifydns_test.go b/internal/ra/service/lifecycle_verifydns_test.go new file mode 100644 index 0000000..26a2c0c --- /dev/null +++ b/internal/ra/service/lifecycle_verifydns_test.go @@ -0,0 +1,262 @@ +package service + +// White-box table tests for verifyDNSRecords' mismatch classification. +// These pin the Fix C partition directly against the documented +// port.RecordVerification.Actual contract (port/dns.go): "first live +// answer when records exist but none matched; empty when nothing +// answered". The classification consumes that contract, so the stub +// verifier below implements it exactly — a future DNSVerifier adapter +// author who honors the documented contract gets a green test, and one +// who reintroduces the old "MISSING regardless of Actual" behavior gets +// a loud failure here rather than a silent regression at the wire. +// +// verifyDNSRecords is unexported, so this is an internal-package test +// (package service, like helpers_test.go). It builds a RegistrationService +// with only dnsVerifier wired — no store, no registry — because the +// function under test reads nothing else. + +import ( + "context" + "errors" + "testing" + + "github.com/godaddy/ans/internal/domain" + "github.com/godaddy/ans/internal/port" +) + +// stubDNSVerifier returns a fixed VerificationResult (and optional error) +// regardless of the expected records it is handed. Tests set results to +// the exact (Found, Actual, DNSSECVerified, Record) shapes they want to +// classify, honoring the documented Actual contract. +type stubDNSVerifier struct { + results []port.RecordVerification + err error +} + +func (s stubDNSVerifier) VerifyRecords( + _ context.Context, + _ string, + _ []domain.ExpectedDNSRecord, +) (*port.VerificationResult, error) { + if s.err != nil { + return nil, s.err + } + return &port.VerificationResult{Results: s.results}, nil +} + +// rec is a tiny constructor for an ExpectedDNSRecord with the fields the +// classification cares about (Name/Type/Value/Required). +func rec(name string, typ domain.DNSRecordType, value string, required bool) domain.ExpectedDNSRecord { + return domain.ExpectedDNSRecord{Name: name, Type: typ, Value: value, Required: required} +} + +func TestVerifyDNSRecords_Classification(t *testing.T) { + t.Parallel() + + const ( + svcbName = "agent.example.com" + svcbVal = "1 . alpn=mcp port=443 key65280=mcp" + txtName = "_ans.agent.example.com" + txtVal = "v=ans1; ..." + ) + + cases := []struct { + name string + results []port.RecordVerification + // want is the expected (Code, Found) pairs, in order. + want []struct { + code string + found string + } + }{ + { + // Nothing answered for a present-but-absent required record: + // MISSING, Found stays empty (no live value to surface). + name: "absent_required_is_missing", + results: []port.RecordVerification{ + {Record: rec(svcbName, domain.DNSRecordSVCB, svcbVal, true), Found: false, Actual: ""}, + }, + want: []struct { + code string + found string + }{{dnsCodeMissing, ""}}, + }, + { + // Records exist but none matched: MISMATCH carrying the first + // live answer. Pre-Fix-C this was coded MISSING and the live + // value was dropped from the 422. + name: "present_but_wrong_is_mismatch_carrying_live_value", + results: []port.RecordVerification{ + {Record: rec(svcbName, domain.DNSRecordSVCB, svcbVal, true), Found: false, Actual: "wrong"}, + }, + want: []struct { + code string + found string + }{{dnsCodeMismatch, "wrong"}}, + }, + { + // SVCB coexistence shape: the verifier's type-specific subset + // match succeeded (Found=true) even though the live Actual + // string differs benignly from Value (extra SvcParams from a + // sibling family per RFC 9460 §8). Found=true is the verdict — + // no mismatch. Pre-Fix-C the `Actual != Value` arm coded this a + // spurious MISMATCH and defeated coexistence. + name: "found_with_benign_actual_delta_is_ok", + results: []port.RecordVerification{ + {Record: rec(svcbName, domain.DNSRecordSVCB, svcbVal, true), Found: true, Actual: svcbVal + " ipv4hint=192.0.2.1"}, + }, + want: nil, + }, + { + // The Required flag DOES gate classification. Optional records + // in an UNSIGNED zone (no DNSSEC) are skipped whether they are + // truly absent (Actual="") or present-but-wrong (Actual="wrong") + // — TLSA without DNSSEC, the CNAME-at-apex HTTPS RR, and + // union-mode SVCB rows are all Required=false by design and must + // not 422-block the operator. Only the DNSSEC hard-fail above + // bypasses Required (covered by the tamper cases below). Two + // records here, both optional+unsigned, expect ZERO mismatches. + name: "optional_unsigned_records_are_skipped", + results: []port.RecordVerification{ + {Record: rec("_443._tcp.agent.example.com", domain.DNSRecordTLSA, "3 0 1 ab", false), Found: false, Actual: ""}, + {Record: rec(svcbName, domain.DNSRecordHTTPS, "1 . alpn=h2", false), Found: false, Actual: "wrong"}, + }, + want: nil, + }, + { + // DNSSEC hard-fail (untouched block): a DNSSEC-authenticated + // SVCB response whose content disagrees is tampering — coded + // SVCB_DNSSEC_MISMATCH, carrying the live value, regardless of + // Required. Pins the block above the switch stays byte-identical. + name: "dnssec_svcb_tamper_is_hardfail", + results: []port.RecordVerification{ + {Record: rec(svcbName, domain.DNSRecordSVCB, svcbVal, false), Found: false, Actual: "tampered", DNSSECVerified: true}, + }, + want: []struct { + code string + found string + }{{"SVCB" + dnssecMismatchSuffix, "tampered"}}, + }, + { + // DNSSEC hard-fail also fires for TLSA and HTTPS. Pin all three + // cert/service-binding types route through the hard-fail arm. + name: "dnssec_tlsa_and_https_tamper_are_hardfail", + results: []port.RecordVerification{ + {Record: rec("_443._tcp.agent.example.com", domain.DNSRecordTLSA, "3 0 1 ab", false), Found: false, Actual: "3 0 1 cd", DNSSECVerified: true}, + {Record: rec(svcbName, domain.DNSRecordHTTPS, "1 . alpn=h2", false), Found: false, Actual: "1 . alpn=h3", DNSSECVerified: true}, + }, + want: []struct { + code string + found string + }{ + {"TLSA" + dnssecMismatchSuffix, "3 0 1 cd"}, + {"HTTPS" + dnssecMismatchSuffix, "1 . alpn=h3"}, + }, + }, + { + // TXT carries no cryptographic commitment, so a + // DNSSEC-authenticated TXT mismatch is NOT a hard fail — it + // falls through to the standard classification. Here the TXT + // record is absent (Found=false, Actual="") → MISSING, the same + // verdict a non-DNSSEC absent record gets. + name: "dnssec_txt_falls_through_to_missing", + results: []port.RecordVerification{ + {Record: rec(txtName, domain.DNSRecordTXT, txtVal, true), Found: false, Actual: "", DNSSECVerified: true}, + }, + want: []struct { + code string + found string + }{{dnsCodeMissing, ""}}, + }, + { + // TXT DNSSEC mismatch with a live wrong value falls through to + // the standard classification too: present-but-wrong → MISMATCH + // carrying the live value. + name: "dnssec_txt_present_but_wrong_falls_through_to_mismatch", + results: []port.RecordVerification{ + {Record: rec(txtName, domain.DNSRecordTXT, txtVal, true), Found: false, Actual: "v=ans1; stale", DNSSECVerified: true}, + }, + want: []struct { + code string + found string + }{{dnsCodeMismatch, "v=ans1; stale"}}, + }, + { + // Mixed batch: one absent (MISSING), one present-but-wrong + // (MISMATCH carrying live), one satisfied subset (Found=true, + // no mismatch). Order is preserved. + name: "mixed_batch_preserves_order_and_partitions", + results: []port.RecordVerification{ + {Record: rec("a.example.com", domain.DNSRecordSVCB, "v1", true), Found: false, Actual: ""}, + {Record: rec("b.example.com", domain.DNSRecordSVCB, "v2", true), Found: false, Actual: "live2"}, + {Record: rec("c.example.com", domain.DNSRecordSVCB, "v3", true), Found: true, Actual: "v3 extra"}, + }, + want: []struct { + code string + found string + }{ + {dnsCodeMissing, ""}, + {dnsCodeMismatch, "live2"}, + }, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + svc := &RegistrationService{dnsVerifier: stubDNSVerifier{results: tc.results}} + got, perRecord, err := svc.verifyDNSRecords(context.Background(), "agent.example.com", nil) + if err != nil { + t.Fatalf("verifyDNSRecords: unexpected error %v", err) + } + // perRecord must echo the verifier's full result set so the + // attestation builder sees every record (matched or not). + if len(perRecord) != len(tc.results) { + t.Errorf("perRecord len: got %d want %d", len(perRecord), len(tc.results)) + } + if len(got) != len(tc.want) { + t.Fatalf("mismatch count: got %d want %d (%+v)", len(got), len(tc.want), got) + } + for i, w := range tc.want { + if got[i].Code != w.code { + t.Errorf("mismatch[%d] Code: got %q want %q", i, got[i].Code, w.code) + } + if got[i].Found != w.found { + t.Errorf("mismatch[%d] Found: got %q want %q", i, got[i].Found, w.found) + } + } + }) + } +} + +// TestVerifyDNSRecords_VerifierErrorIsReturned pins the WARN-path +// contract: when the underlying verifier returns a systemic error, +// verifyDNSRecords surfaces it (no panic, no swallow) so VerifyDNS can +// log a WARN and map it to a 500. The error value must propagate +// unchanged. +func TestVerifyDNSRecords_VerifierErrorIsReturned(t *testing.T) { + t.Parallel() + sentinel := errors.New("dns unreachable") + svc := &RegistrationService{dnsVerifier: stubDNSVerifier{err: sentinel}} + out, perRecord, err := svc.verifyDNSRecords(context.Background(), "agent.example.com", nil) + if !errors.Is(err, sentinel) { + t.Fatalf("error: got %v want %v", err, sentinel) + } + if out != nil || perRecord != nil { + t.Errorf("on error want nil slices, got out=%v perRecord=%v", out, perRecord) + } +} + +// TestVerifyDNSRecords_NilVerifierSkips pins the local-dev escape hatch: +// a nil dnsVerifier short-circuits to "DNS correct" (no mismatches, no +// error). Untouched by Fix C but exercised here so the early return +// stays covered alongside the classification. +func TestVerifyDNSRecords_NilVerifierSkips(t *testing.T) { + t.Parallel() + svc := &RegistrationService{} // dnsVerifier nil + out, perRecord, err := svc.verifyDNSRecords(context.Background(), "agent.example.com", + []domain.ExpectedDNSRecord{rec("a", domain.DNSRecordSVCB, "v", true)}) + if err != nil || out != nil || perRecord != nil { + t.Fatalf("nil verifier: want all-nil, got out=%v perRecord=%v err=%v", out, perRecord, err) + } +} diff --git a/internal/ra/service/registration.go b/internal/ra/service/registration.go index 652ef53..d27e6f0 100644 --- a/internal/ra/service/registration.go +++ b/internal/ra/service/registration.go @@ -75,6 +75,15 @@ type RegisterRequest struct { ServerCertificatePEM string ServerCertificateChainPEM string SchemaVersion string + + // DiscoveryProfiles is the set of DNS record families the RA emits + // in dnsRecordsProvisioned and tells the operator to publish. + // Each element is one of domain.ValidDiscoveryProfiles(); typical + // values are {ANS_SVCB} (default), {ANS_TXT}, or the + // {ANS_SVCB, ANS_TXT} transition union. Empty/nil normalizes to + // domain.DefaultDiscoveryProfiles(); any invalid element surfaces + // as INVALID_DISCOVERY_PROFILE before the aggregate is created. + DiscoveryProfiles []domain.DiscoveryProfile } // RegisterResponse is returned to the HTTP handler after a successful @@ -123,21 +132,19 @@ type OutboxPayload struct { // sqlx.Tx, cloud adapters can use TransactWriteItems-style atomic // batches. type RegistrationService struct { - agents port.AgentStore - endpoints port.EndpointStore - certs port.CertificateStore - byoc port.ByocCertificateStore - renewals port.RenewalStore - validator port.CertificateValidator - identityCA port.IdentityCertificateAuthority - serverCA port.ServerCertificateAuthority // optional; nil = CSR path rejected - bus port.EventBus - outbox OutboxEnqueuer - uow port.UnitOfWork - dnsVerifier port.DNSVerifier - // tlPublicBaseURL is the externally-reachable Transparency Log URL - // used in _ans-badge DNS records (e.g. "https://tl.example.org"). - tlPublicBaseURL string + agents port.AgentStore + endpoints port.EndpointStore + certs port.CertificateStore + byoc port.ByocCertificateStore + renewals port.RenewalStore + validator port.CertificateValidator + identityCA port.IdentityCertificateAuthority + serverCA port.ServerCertificateAuthority // optional; nil = CSR path rejected + bus port.EventBus + outbox OutboxEnqueuer + uow port.UnitOfWork + dnsVerifier port.DNSVerifier + discoveryRegistry port.ProfileRegistry // signer is the KeyManager + keyID + raID tuple used to sign // outbox events. When nil, events are still persisted but without // a signature — this is only valid for tests; production configs @@ -156,6 +163,18 @@ type EventSigner struct { // NewRegistrationService constructs a RegistrationService. Dependencies // are injected per SOLID; tests substitute fakes. +// +// discoveryRegistry is required at construction and the constructor +// panics on nil — a missing registry would silently emit zero +// `dnsRecordsProvisioned[]` and accept any DNS state at verify-dns +// (trust-root corruption masquerading as graceful degradation), so +// fail-loud at construction is the only correct policy. Construction +// runs at process start, never on a request path, so the no-panics-in- +// request-paths rule (CLAUDE.md) is upheld. Production builds wire the +// bundled ANS-family registry in cmd/ans-ra/main.go via +// registry.New(ans.TXTProfile{}, ans.SVCBProfile{}); tests build the same +// registry through service.NewDefaultProfileRegistry. There is no +// optional builder. func NewRegistrationService( agents port.AgentStore, endpoints port.EndpointStore, @@ -167,19 +186,24 @@ func NewRegistrationService( bus port.EventBus, outbox OutboxEnqueuer, uow port.UnitOfWork, + discoveryRegistry port.ProfileRegistry, ) *RegistrationService { + if discoveryRegistry == nil { + panic("service.NewRegistrationService: discoveryRegistry is required (nil interface — wire registry.New(...) at construction)") + } return &RegistrationService{ - agents: agents, - endpoints: endpoints, - certs: certs, - byoc: byoc, - renewals: renewals, - validator: validator, - identityCA: identityCA, - bus: bus, - outbox: outbox, - uow: uow, - clock: time.Now, + agents: agents, + endpoints: endpoints, + certs: certs, + byoc: byoc, + renewals: renewals, + validator: validator, + identityCA: identityCA, + bus: bus, + outbox: outbox, + uow: uow, + discoveryRegistry: discoveryRegistry, + clock: time.Now, } } @@ -209,19 +233,6 @@ func (s *RegistrationService) WithDNSVerifier(v port.DNSVerifier) *RegistrationS return s } -// WithTLPublicBaseURL sets the externally-reachable Transparency Log -// URL used in _ans-badge DNS TXT records. Without this, badge records -// fall back to the agent's own endpoint URL. -func (s *RegistrationService) WithTLPublicBaseURL(publicBaseURL string) *RegistrationService { - s.tlPublicBaseURL = publicBaseURL - return s -} - -// TLPublicBaseURL returns the configured public TL base URL. -func (s *RegistrationService) TLPublicBaseURL() string { - return s.tlPublicBaseURL -} - // RegisterAgent implements the V2 registration flow: // 1. Validate the request shape via domain constructors. // 2. Check ANS name uniqueness. @@ -333,6 +344,10 @@ func (s *RegistrationService) RegisterAgent(ctx context.Context, req RegisterReq } reg.ServerCSR = pendingServerCSR + if err := applyDiscoveryProfiles(reg, req); err != nil { + return nil, err + } + // Generate the ACME DNS-01 challenge token + expiry. The only // DNS action the operator should take before verify-acme. dns01, _, err := generateChallengeTokens() diff --git a/internal/ra/service/registration_test.go b/internal/ra/service/registration_test.go index 5eadb75..d62f6f9 100644 --- a/internal/ra/service/registration_test.go +++ b/internal/ra/service/registration_test.go @@ -68,6 +68,7 @@ func TestRegistration_NoSigner(t *testing.T) { svcNoSig := service.NewRegistrationService( fx.agents, fx.endpoints, fx.certs, fx.byoc, fx.renewals, fx.validator, fx.identityCA, fx.bus, fx.outboxStore, fx.uow, + fx.discoveryReg, ).WithServerCertificateAuthority(fx.serverCA) // Use a fresh ANS name + matching CSR + matching endpoints so @@ -124,6 +125,7 @@ func TestRegistration_RollsBackOnPartialFailure(t *testing.T) { svc := service.NewRegistrationService( fx.agents, failingEndpoints, fx.certs, fx.byoc, fx.renewals, fx.validator, fx.identityCA, fx.bus, fx.outboxStore, fx.uow, + fx.discoveryReg, ).WithServerCertificateAuthority(fx.serverCA) if _, err := svc.RegisterAgent(context.Background(), fx.req); err == nil { @@ -198,6 +200,7 @@ func TestRevoke_RollsBackOnOutboxFailure(t *testing.T) { svc := service.NewRegistrationService( fx.agents, fx.endpoints, fx.certs, fx.byoc, fx.renewals, fx.validator, fx.identityCA, fx.bus, &failingOutbox{}, fx.uow, + fx.discoveryReg, ).WithServerCertificateAuthority(fx.serverCA) if _, err := svc.Revoke(context.Background(), agentID, service.RevokeInput{ @@ -266,6 +269,7 @@ type regFixture struct { identityCA port.IdentityCertificateAuthority serverCA port.ServerCertificateAuthority bus port.EventBus + discoveryReg port.ProfileRegistry signerPubPEM string } @@ -321,8 +325,13 @@ func newRegFixture(t *testing.T) *regFixture { t.Fatal(err) } + discoveryReg, err := service.NewDefaultProfileRegistry("") + if err != nil { + t.Fatal(err) + } + svc := service.NewRegistrationService( - agents, endpoints, certsStore, byoc, renewals, validator, identityCA, bus, outbox, db, + agents, endpoints, certsStore, byoc, renewals, validator, identityCA, bus, outbox, db, discoveryReg, ).WithSigner(service.EventSigner{ KeyManager: km, KeyID: "ra-signer", @@ -349,6 +358,7 @@ func newRegFixture(t *testing.T) *regFixture { identityCA: identityCA, serverCA: serverCA, bus: bus, + discoveryReg: discoveryReg, signerPubPEM: pubPEM, req: service.RegisterRequest{ OwnerID: "owner-1", diff --git a/internal/ra/service/v1event.go b/internal/ra/service/v1event.go index edcb955..d42e95d 100644 --- a/internal/ra/service/v1event.go +++ b/internal/ra/service/v1event.go @@ -265,7 +265,7 @@ func (s *RegistrationService) buildAgentRevokedV1Event( // sees the full record set (including per-endpoint metadata // records). If it didn't, we'd get back an empty list and the // revoke envelope would ship with no DNS tear-down guidance. - expected := domain.ComputeRequiredDNSRecords(reg, s.tlPublicBaseURL) + expected := s.ComputeRequiredDNSRecords(reg) dnsMap := make(map[string]string, len(expected)) for _, r := range expected { dnsMap[r.Name] = r.Value diff --git a/scripts/demo/run-lifecycle.sh b/scripts/demo/run-lifecycle.sh index f0a4437..bbc17cb 100755 --- a/scripts/demo/run-lifecycle.sh +++ b/scripts/demo/run-lifecycle.sh @@ -178,8 +178,27 @@ curl_json GET "/v2/ans/agents/$AGENT_ID" >/dev/null # identity + server CSRs once the operator has proven domain control # via the ACME DNS-01 challenge. This is also when production DNS # records (TRUST/BADGE/DISCOVERY/TLSA) become computable — TLSA's -# value is `3 1 1 SHA-256(server-cert-SPKI)`, so it can't exist before -# the server cert does. +# value is `3 0 1 SHA-256(full server-cert DER)`, so it can't exist +# before the server cert does. +# +# When ans-dns is bundled (start.sh --with-dns), publish the ACME +# DNS-01 challenge TXT into the local authoritative zone first — +# this is the operator action the RA's lookup verifier checks. +# `ans-dns install` cannot do this stage: it reads the pending +# dnsRecords[] block, which is intentionally empty during +# PENDING_VALIDATION (the challenge rides in challenges[] instead). +# Falls through silently in noop mode. +if [ -n "${ANS_DNS_ZONE:-}" ] && [ -x "$BIN/ans-dns" ]; then + note "publishing ACME challenge TXT into $ANS_DNS_ZONE" + PENDING_RESP=$(curl_json GET "/v2/ans/agents/$AGENT_ID") + CH_NAME=$(printf '%s' "$PENDING_RESP" | jq -r '.registrationPending.challenges[0].dnsRecord.name // empty') + CH_VALUE=$(printf '%s' "$PENDING_RESP" | jq -r '.registrationPending.challenges[0].dnsRecord.value // empty') + [ -n "$CH_NAME" ] || fail "no ACME challenge dnsRecord in pending block" + [ -f "$ANS_DNS_ZONE" ] || printf '{"records":{}}' >"$ANS_DNS_ZONE" + jq --arg id "$AGENT_ID-acme" --arg name "$CH_NAME" --arg value "$CH_VALUE" \ + '.records[$id] = [{name:$name, type:"TXT", value:$value, ttl:60}]' \ + "$ANS_DNS_ZONE" >"$ANS_DNS_ZONE.tmp" && mv "$ANS_DNS_ZONE.tmp" "$ANS_DNS_ZONE" +fi header "3. POST /v2/ans/agents/$AGENT_ID/verify-acme (→ PENDING_DNS, issues identity + server certs)" curl_json POST "/v2/ans/agents/$AGENT_ID/verify-acme" >/dev/null assert_2xx "verify-acme" diff --git a/spec/api-spec-v2.yaml b/spec/api-spec-v2.yaml index 9ad2749..1356158 100644 --- a/spec/api-spec-v2.yaml +++ b/spec/api-spec-v2.yaml @@ -1023,6 +1023,49 @@ components: type: string enum: [A2A, MCP, HTTP-API] + DiscoveryProfile: + type: string + enum: [ANS_SVCB, ANS_TXT] + description: | + Names one DNS record family the RA can emit for an agent + registration. Used as the element type of discoveryProfiles[]. + Each value provisions a complete record set, not a single + record — the bullets below enumerate exactly what an operator + must publish for that profile. + + - ANS_SVCB: the Consolidated Approach (RFC 9460), recommended + default for new integrations. Provisions: + 1. One SVCB row per protocol endpoint at the bare FQDN + (ServiceMode `1 .`), carrying `alpn` (the protocol + token: a2a / mcp / http-api), `port` (the endpoint's + TLS port), `key65280` (the well-known path suffix, e.g. + agent-card.json for A2A; omitted for protocols with no + metadata-file convention), and `key65281` (the + base64url capability digest, present only when the + endpoint declared a metaDataHash). `key65280` and + `key65281` are the RFC 9460 §14.3.1 Private Use + presentation of the draft `wk` / `cap-sha256` SvcParams, + which have no IANA code point yet; the named forms are + unpublishable (strict RFC 9460 parsers reject them), so + the keyNNNNN forms are what reaches DNS. They switch + back to named forms if/when IANA registers the keys. + 2. One `_ans-badge` TXT at `_ans-badge.{fqdn}` (the + transparency-log discovery hint). + 3. One TLSA per distinct TLS port at `_._tcp.{fqdn}` + binding the server certificate (DANE-EE, full cert, + SHA-256), emitted only when a server certificate exists. + - ANS_TXT: original `_ans` TXT shape (one row per protocol at + `_ans.{fqdn}`), supported indefinitely for operators with + existing zone-edit tooling that targets `_ans.{fqdn}`. Emits + an HTTPS RR at the bare FQDN alongside carrying only + `alpn=h2` (an IANA-registered SvcParam — no keyNNNNN form is + needed here; the asymmetry with ANS_SVCB is intentional), + since `_ans` TXT carries no connection hints. The HTTPS RR is + best-effort: operators on CNAME-fronted apexes cannot publish + a record at that name (RFC 1034 §3.6.2) and verification does + not require it. Provisions the same `_ans-badge` TXT and + per-port TLSA records as ANS_SVCB. + RevocationReason: type: string enum: @@ -1070,11 +1113,42 @@ components: from this CSR at verify-acme. When omitted, the agent registers without an identity certificate and cannot add one later — it must register a new version instead. + discoveryProfiles: + type: array + items: + $ref: '#/components/schemas/DiscoveryProfile' + uniqueItems: true + minItems: 1 + description: | + Set of DNS record families the RA tells the operator to + publish and emits in the AGENT_REGISTERED TL event's + attestations.dnsRecordsProvisioned[]. The computed records + surface to the client on GET /v2/ans/agents/{agentId} as + registrationPending.dnsRecords[] once the agent reaches + PENDING_DNS — not on the 202 register response, which + returns only the ACME challenge (production records are + deferred until verify-acme proves domain control and issues + the certificates the TLSA binding depends on). + + Each value names one record family; an operator publishing + the union (Consolidated Approach SVCB plus the original + `_ans` TXT shape) sends both. Order is not significant. + + Omitted normalizes to ["ANS_SVCB"] server-side (the + recommended default per RFC 9460). The `minItems`/ + `uniqueItems` schema constraints are the canonical client + contract — validate before sending. A non-conformant + request (an explicit empty array, or duplicate values) is + handled defensively rather than rejected: empty is treated + as omitted and duplicates are ignored, but conformant + clients never send either. + example: ["ANS_SVCB"] required: - agentDisplayName - version - agentHost - endpoints + - discoveryProfiles AgentRevocationRequest: type: object @@ -1287,7 +1361,7 @@ components: type: string type: type: string - enum: [HTTPS, TLSA, TXT] + enum: [HTTPS, SVCB, TLSA, TXT] value: type: string priority: @@ -1584,5 +1658,10 @@ components: $ref: '#/components/schemas/DnsRecord' found: type: string + description: | + The live record value observed when a record exists + but does not match the required value. expected: - type: string \ No newline at end of file + type: string + description: | + The required value; equals record.value. \ No newline at end of file