diff --git a/config/extension-server/deployment.yaml b/config/extension-server/deployment.yaml index e9e81fd9..6e4f01d0 100644 --- a/config/extension-server/deployment.yaml +++ b/config/extension-server/deployment.yaml @@ -58,6 +58,7 @@ spec: - --tls-key=$(TLS_KEY) - --tls-client-ca=$(TLS_CLIENT_CA) - --server-config=$(SERVER_CONFIG) + - --enable-programmed-set=$(ENABLE_PROGRAMMED_SET) env: - name: GRPC_ADDR value: ":5005" @@ -73,6 +74,11 @@ spec: # mounted path to apply Coraza directives. - name: SERVER_CONFIG value: "" + # Off in production. The test-environment overlay sets this to "true" + # to serve the read-only /debug/programmed-set endpoint the parity + # test reads. + - name: ENABLE_PROGRAMMED_SET + value: "false" ports: - name: grpc containerPort: 5005 diff --git a/internal/extensionserver/cmd/run.go b/internal/extensionserver/cmd/run.go index 2ddae519..62b9353d 100644 --- a/internal/extensionserver/cmd/run.go +++ b/internal/extensionserver/cmd/run.go @@ -68,13 +68,15 @@ func envBool(key string, def bool) bool { // --tls-key=/tls/tls.key // --tls-client-ca=/tls/ca.crt // --server-config=/config/config.yaml (optional; provides Coraza WAF config) +// --enable-programmed-set=false (default off; test environments set true) type options struct { - grpcAddr string - healthAddr string - tlsCert string - tlsKey string - tlsClientCA string - serverCfgFile string + grpcAddr string + healthAddr string + tlsCert string + tlsKey string + tlsClientCA string + serverCfgFile string + enableProgrammedSet bool } // NewCommand returns the "extension-server" subcommand. @@ -99,6 +101,8 @@ func NewCommand() *cobra.Command { fs.StringVar(&o.tlsKey, "tls-key", "", "Path to server TLS key (PEM)") fs.StringVar(&o.tlsClientCA, "tls-client-ca", "", "Path to client CA certificate (PEM) for mTLS") fs.StringVar(&o.serverCfgFile, "server-config", "", "Path to operator config file (optional; provides Coraza WAF settings)") + fs.BoolVar(&o.enableProgrammedSet, "enable-programmed-set", false, + "Serve the read-only /debug/programmed-set endpoint used by the parity test (test environments only)") cmd := &cobra.Command{ Use: "envoy-gateway-extension-server", @@ -258,6 +262,9 @@ func run(o options) { ConnectorInternalListener: serverConfig.Gateway.ConnectorTunnelListenerName(), CorazaRouteBaseDirectives: coraza.RouteBaseDirectives, LocalReply: buildLocalReplyConfig(serverConfig.Gateway.ErrorPage, log), + // The programmed-set debug endpoint is for the test environment only; it + // stays off in production unless --enable-programmed-set is passed. + EnableProgrammedSet: o.enableProgrammedSet, } // --- gRPC panic recovery interceptor --- @@ -450,6 +457,12 @@ func run(o options) { } }) mux.Handle("/metrics", promhttp.Handler()) + // Read-only debug endpoint that reports what the last build changed, so a test + // can confirm the proxy is running exactly that. Test environment only; it is + // not served in production. Keeps only the last build. + if srvCfg.EnableProgrammedSet { + mux.HandleFunc(extserver.ProgrammedSetEndpointPath, extSrv.ProgrammedSetHandler()) + } healthServer := &http.Server{ Addr: healthAddr, Handler: mux, diff --git a/internal/extensionserver/server/programmedset.go b/internal/extensionserver/server/programmedset.go new file mode 100644 index 00000000..e4ebdf7a --- /dev/null +++ b/internal/extensionserver/server/programmedset.go @@ -0,0 +1,192 @@ +package server + +import ( + "encoding/json" + "net/http" + "sync" + "time" + + clusterv3 "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" + listenerv3 "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" + routev3 "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" + hcmv3 "github.com/envoyproxy/go-control-plane/envoy/extensions/filters/network/http_connection_manager/v3" +) + +// programmedSetEndpointPath is a read-only debug endpoint that reports the exact +// set of changes the last build intended to make, so a test can confirm the +// proxy is running that set and not merely the right number of things. +const programmedSetEndpointPath = "/debug/programmed-set" + +// Marker strings, kept in sync with where the configuration is written, used to +// recognize each change when reading the configuration back. +const ( + datumGatewayMetaKey = "datum-gateway" + connectorInternalTransport = "envoy.transport_sockets.internal_upstream" + hcmNetworkFilterName = "envoy.filters.network.http_connection_manager" +) + +// Family names a kind of change recorded in a snapshot. The strings are stable +// JSON keys the parity test depends on; do not rename. +type Family string + +const ( + FamilyWAFRoute Family = "waf_route" + FamilyWAFHCM Family = "waf_hcm" + FamilyLocalReply Family = "local_reply" + FamilyConnectorCluster Family = "connector_cluster" + FamilyConnectorRoute Family = "connector_route" + FamilyConnectorOffline Family = "connector_offline" + FamilyTLSPrune Family = "tls_prune" +) + +// ProgrammedSet is a snapshot of one build's intended changes. It is served as +// JSON and read back by the parity test. +type ProgrammedSet struct { + // BuildID increments once per build so a caller can tell two successive + // snapshots apart and know a fresh build has happened. + BuildID uint64 `json:"buildID"` + // CapturedAt is when the snapshot was recorded. + CapturedAt time.Time `json:"capturedAt"` + // Keys is, per kind of change, the identity of each thing changed. Each value + // is sorted and de-duplicated. The identity format is described on the + // record helpers below and mirrored by the parity test. + Keys map[Family][]string `json:"keys"` + // Counts holds the size of each set in Keys, plus the removed-certificate + // outcomes below that have no per-item identity. Used as a secondary check. + Counts map[Family]int `json:"counts"` + // Removing invalid TLS certificates is confirmed by counting outcomes rather + // than by identity, so the raw numbers are carried here. + TLSPrunedChains int `json:"tlsPrunedChains"` + TLSPrunedSecrets int `json:"tlsPrunedSecrets"` + TLSListenersLeftIntact int `json:"tlsListenersLeftIntact"` +} + +// programmedRecorder holds the most recent snapshot under a lock, shared between +// the build that writes it and the endpoint that reads it. Only the last build +// is kept; each recording replaces the previous one. +type programmedRecorder struct { + mu sync.RWMutex + buildID uint64 + last *ProgrammedSet +} + +// newProgrammedRecorder returns an empty recorder. Before the first build it +// reports a valid empty snapshot with a zero build count, so callers can tell +// "no build yet" from a real build. +func newProgrammedRecorder() *programmedRecorder { + return &programmedRecorder{} +} + +// snapshot returns the last recorded set, or an empty one if no build has run. +// The returned value is a copy safe to serialize without the lock. +func (r *programmedRecorder) snapshot() ProgrammedSet { + r.mu.RLock() + defer r.mu.RUnlock() + if r.last == nil { + return ProgrammedSet{Keys: map[Family][]string{}, Counts: map[Family]int{}} + } + return *r.last +} + +// record reads the configuration the build just produced and stores a snapshot +// of it. It only reads, never changes the configuration, and holds the write +// lock just long enough to store the result so it doesn't slow the build. +func (r *programmedRecorder) record( + listeners []*listenerv3.Listener, + routes []*routev3.RouteConfiguration, + clusters []*clusterv3.Cluster, + corazaFilterName string, + tlsPrunedChains, tlsPrunedSecrets, tlsListenersLeftIntact int, +) { + ps := buildProgrammedSet(listeners, routes, clusters, corazaFilterName, + tlsPrunedChains, tlsPrunedSecrets, tlsListenersLeftIntact) + + r.mu.Lock() + r.buildID++ + ps.BuildID = r.buildID + r.last = &ps + r.mu.Unlock() +} + +// buildProgrammedSet walks the produced configuration and extracts an identity +// for each change. It is a pure function so it can be unit tested directly. The +// identity formats must match what the parity test looks for; keep both sides +// in lockstep. +func buildProgrammedSet( + listeners []*listenerv3.Listener, + routes []*routev3.RouteConfiguration, + clusters []*clusterv3.Cluster, + corazaFilterName string, + tlsPrunedChains, tlsPrunedSecrets, tlsListenersLeftIntact int, +) ProgrammedSet { + keys := map[Family][]string{} + add := func(f Family, k string) { keys[f] = append(keys[f], k) } + + for _, rc := range routes { + rcName := rc.GetName() + for _, vh := range rc.GetVirtualHosts() { + vhName := vh.GetName() + for _, rt := range vh.GetRoutes() { + // The identity includes the protection policy governing the route, + // so a route protected by the wrong policy shows up as a mismatch. + if ns, name, mode, ok := datumGatewayTPP(rt); ok { + add(FamilyWAFRoute, wafRouteKey(rcName, vhName, rt.GetName(), ns, name, mode)) + } + if isConnectRoute(rt) { + add(FamilyConnectorRoute, connectorRouteKey(rcName, vhName, rt.GetName())) + } + if isOfflineDirectResponse(rt) { + add(FamilyConnectorOffline, connectorRouteKey(rcName, vhName, rt.GetName())) + } + } + } + } + + for _, cl := range clusters { + if isReplacedConnectorCluster(cl) { + add(FamilyConnectorCluster, cl.GetName()) + } + } + + for _, l := range listeners { + lName := l.GetName() + eachHCM(l, func(fcName string, hcm *hcmv3.HttpConnectionManager) { + if corazaFilterName != "" && hcmHasFilterAtZero(hcm, corazaFilterName) { + add(FamilyWAFHCM, listenerChainKey(lName, fcName)) + } + if hcm.GetLocalReplyConfig() != nil { + add(FamilyLocalReply, listenerChainKey(lName, fcName)) + } + }) + } + + // Sort and de-duplicate so the output can be compared as a set. + counts := map[Family]int{} + for f := range keys { + keys[f] = sortDedup(keys[f]) + counts[f] = len(keys[f]) + } + // Removed certificates are recorded by count, since they have no identity. + counts[FamilyTLSPrune] = tlsPrunedChains + + return ProgrammedSet{ + CapturedAt: time.Now().UTC(), + Keys: keys, + Counts: counts, + TLSPrunedChains: tlsPrunedChains, + TLSPrunedSecrets: tlsPrunedSecrets, + TLSListenersLeftIntact: tlsListenersLeftIntact, + } +} + +// programmedSetHandler serves the latest snapshot as JSON. It is a read-only, +// test-only debug endpoint and returns an empty snapshot before the first build. +func (r *programmedRecorder) programmedSetHandler() http.HandlerFunc { + return func(w http.ResponseWriter, _ *http.Request) { + ps := r.snapshot() + w.Header().Set("Content-Type", "application/json") + enc := json.NewEncoder(w) + enc.SetIndent("", " ") + _ = enc.Encode(ps) + } +} diff --git a/internal/extensionserver/server/programmedset_scan.go b/internal/extensionserver/server/programmedset_scan.go new file mode 100644 index 00000000..ad41abf2 --- /dev/null +++ b/internal/extensionserver/server/programmedset_scan.go @@ -0,0 +1,160 @@ +package server + +import ( + "sort" + "strings" + + clusterv3 "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" + listenerv3 "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" + routev3 "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" + hcmv3 "github.com/envoyproxy/go-control-plane/envoy/extensions/filters/network/http_connection_manager/v3" +) + +// These read-only scanners extract an identity for each change the build made. +// They mirror, in reverse, how the configuration is written, and they define +// the exact identity format the parity test must reproduce. The "##" separator +// is used because it cannot appear in any of the names it joins. + +const keySep = "##" + +// wafRouteKey is the identity of a route protected by a firewall: +// +// ######// +// +// The governing policy is part of the identity, so a route protected by the +// wrong policy produces a different identity and is caught as a mismatch. +func wafRouteKey(rc, vh, rt, tppNS, tppName, mode string) string { + return strings.Join([]string{rc, vh, rt, tppNS + "/" + tppName + "/" + mode}, keySep) +} + +// connectorRouteKey is the identity of a connector route (online or offline): +// +// #### +func connectorRouteKey(rc, vh, rt string) string { + return strings.Join([]string{rc, vh, rt}, keySep) +} + +// listenerChainKey is the identity of a changed listener filter chain: +// +// ## +func listenerChainKey(listener, chain string) string { + return strings.Join([]string{listener, chain}, keySep) +} + +// datumGatewayTPP returns the protection policy governing a route (namespace, +// name, mode). ok is false when the route carries no such marker, meaning it is +// not protected. +func datumGatewayTPP(rt *routev3.Route) (ns, name, mode string, ok bool) { + md := rt.GetMetadata() + if md == nil { + return "", "", "", false + } + dg := md.GetFilterMetadata()[datumGatewayMetaKey] + if dg == nil { + return "", "", "", false + } + res := dg.GetFields()["resources"].GetListValue() + if res == nil || len(res.GetValues()) == 0 { + return "", "", "", false + } + f := res.GetValues()[0].GetStructValue().GetFields() + return f["namespace"].GetStringValue(), + f["name"].GetStringValue(), + f["mode"].GetStringValue(), + true +} + +// isConnectRoute reports whether a route is an online connector tunnel route. +func isConnectRoute(rt *routev3.Route) bool { + ra := rt.GetRoute() + if ra == nil { + return false + } + for _, uc := range ra.GetUpgradeConfigs() { + if strings.EqualFold(uc.GetUpgradeType(), "CONNECT") { + return true + } + } + return false +} + +// isOfflineDirectResponse reports whether a route directly returns the +// tunnel-offline 503 response, covering both the dedicated offline route and +// user-facing routes rewritten to it. +func isOfflineDirectResponse(rt *routev3.Route) bool { + dr := rt.GetDirectResponse() + if dr == nil { + return false + } + if dr.GetStatus() != 503 { + return false + } + return dr.GetBody().GetInlineString() == offlineBodyMarker +} + +// offlineBodyMarker is the response body the connector offline path writes, +// duplicated here so the scanner needs no import dependency. +const offlineBodyMarker = "Tunnel not online" + +// isReplacedConnectorCluster reports whether a connector cluster has been +// replaced with its tunnel form. A cluster that has not been replaced means the +// substitution failed. +func isReplacedConnectorCluster(cl *clusterv3.Cluster) bool { + if cl.GetType() != clusterv3.Cluster_STATIC { + return false + } + return cl.GetTransportSocket().GetName() == connectorInternalTransport +} + +// eachHCM invokes fn for every connection manager across all of the listener's +// filter chains, including the default chain. One that can't be decoded is +// skipped, since recording must never fail the build. +func eachHCM(l *listenerv3.Listener, fn func(chainName string, hcm *hcmv3.HttpConnectionManager)) { + chains := make([]*listenerv3.FilterChain, 0, len(l.GetFilterChains())+1) + chains = append(chains, l.GetFilterChains()...) + if dfc := l.GetDefaultFilterChain(); dfc != nil { + chains = append(chains, dfc) + } + for _, fc := range chains { + for _, f := range fc.GetFilters() { + if f.GetName() != hcmNetworkFilterName { + continue + } + tc := f.GetTypedConfig() + if tc == nil { + continue + } + hcm := &hcmv3.HttpConnectionManager{} + if err := tc.UnmarshalTo(hcm); err != nil { + continue + } + fn(fc.GetName(), hcm) + } + } +} + +// hcmHasFilterAtZero reports whether the first filter is named filterName. The +// firewall is always inserted first, so checking the first position is the +// precise signal that it was injected. +func hcmHasFilterAtZero(hcm *hcmv3.HttpConnectionManager, filterName string) bool { + fs := hcm.GetHttpFilters() + return len(fs) > 0 && fs[0].GetName() == filterName +} + +// sortDedup returns a sorted, de-duplicated copy of in. +func sortDedup(in []string) []string { + if len(in) == 0 { + return in + } + seen := make(map[string]struct{}, len(in)) + out := make([]string, 0, len(in)) + for _, s := range in { + if _, ok := seen[s]; ok { + continue + } + seen[s] = struct{}{} + out = append(out, s) + } + sort.Strings(out) + return out +} diff --git a/internal/extensionserver/server/programmedset_test.go b/internal/extensionserver/server/programmedset_test.go new file mode 100644 index 00000000..a18cc253 --- /dev/null +++ b/internal/extensionserver/server/programmedset_test.go @@ -0,0 +1,363 @@ +package server + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + pb "github.com/envoyproxy/gateway/proto/extension" + clusterv3 "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" + corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" + endpointv3 "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3" + listenerv3 "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" + routev3 "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" + hcmv3 "github.com/envoyproxy/go-control-plane/envoy/extensions/filters/network/http_connection_manager/v3" + internalupstreamv3 "github.com/envoyproxy/go-control-plane/envoy/extensions/transport_sockets/internal_upstream/v3" + "google.golang.org/protobuf/types/known/anypb" + "google.golang.org/protobuf/types/known/structpb" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" + gatewayv1alpha2 "sigs.k8s.io/gateway-api/apis/v1alpha2" + + networkingv1alpha "go.datum.net/network-services-operator/api/v1alpha" + networkingv1alpha1 "go.datum.net/network-services-operator/api/v1alpha1" +) + +// --- buildProgrammedSet (pure function) --- + +// mkWAFRoute builds a route already carrying the datum-gateway metadata and a +// coraza typed_per_filter_config, simulating a route after ApplyTPPRouteConfig. +func mkWAFRoute(t *testing.T, tppNS, tppName string) *routev3.Route { + t.Helper() + meta, err := structpb.NewStruct(map[string]any{ + "resources": []any{ + map[string]any{ + "kind": "TrafficProtectionPolicy", + "namespace": tppNS, + "name": tppName, + "mode": "Observe", + }, + }, + }) + require.NoError(t, err) + return &routev3.Route{ + Name: "fwd", + Metadata: &corev3.Metadata{ + FilterMetadata: map[string]*structpb.Struct{datumGatewayMetaKey: meta}, + }, + } +} + +// mkCorazaHCMListener builds a listener whose HCM has the coraza filter at +// position 0 and a local_reply_config, simulating a fully-mutated listener. +func mkCorazaHCMListener(t *testing.T, name, corazaFilter string, withLocalReply bool) *listenerv3.Listener { + t.Helper() + hcm := &hcmv3.HttpConnectionManager{ + RouteSpecifier: &hcmv3.HttpConnectionManager_Rds{Rds: &hcmv3.Rds{RouteConfigName: name}}, + HttpFilters: []*hcmv3.HttpFilter{ + {Name: corazaFilter, Disabled: true}, + {Name: "envoy.filters.http.router"}, + }, + } + if withLocalReply { + hcm.LocalReplyConfig = &hcmv3.LocalReplyConfig{} + } + hcmAny, err := anypb.New(hcm) + require.NoError(t, err) + return &listenerv3.Listener{ + Name: name, + FilterChains: []*listenerv3.FilterChain{{ + Name: "fc", + Filters: []*listenerv3.Filter{{ + Name: hcmNetworkFilterName, + ConfigType: &listenerv3.Filter_TypedConfig{TypedConfig: hcmAny}, + }}, + }}, + } +} + +// mkReplacedConnectorCluster builds a STATIC internal-upstream cluster matching +// what buildConnectorCluster produces. +func mkReplacedConnectorCluster(t *testing.T, name string) *clusterv3.Cluster { + t.Helper() + ts, err := anypb.New(&internalupstreamv3.InternalUpstreamTransport{}) + require.NoError(t, err) + return &clusterv3.Cluster{ + Name: name, + ClusterDiscoveryType: &clusterv3.Cluster_Type{Type: clusterv3.Cluster_STATIC}, + LoadAssignment: &endpointv3.ClusterLoadAssignment{ClusterName: name}, + TransportSocket: &corev3.TransportSocket{ + Name: connectorInternalTransport, + ConfigType: &corev3.TransportSocket_TypedConfig{TypedConfig: ts}, + }, + } +} + +func TestBuildProgrammedSet_AllFamilies(t *testing.T) { + const corazaFilter = "coraza-waf" + + listeners := []*listenerv3.Listener{ + mkCorazaHCMListener(t, "gw/https", corazaFilter, true), + } + routes := []*routev3.RouteConfiguration{{ + Name: "gw/https", + VirtualHosts: []*routev3.VirtualHost{{ + Name: "vh", + Routes: []*routev3.Route{ + // online CONNECT route + { + Name: "connector-connect-vh", + Action: &routev3.Route_Route{Route: &routev3.RouteAction{ + UpgradeConfigs: []*routev3.RouteAction_UpgradeConfig{{UpgradeType: "CONNECT"}}, + }}, + }, + // WAF-governed forwarding route + mkWAFRoute(t, "proj-ns", "test-tpp"), + }, + }}, + }} + clusters := []*clusterv3.Cluster{ + mkReplacedConnectorCluster(t, "httproute/proj-ns/proxy/rule/0"), + {Name: "infra-cluster"}, // not a connector cluster + } + + ps := buildProgrammedSet(listeners, routes, clusters, corazaFilter, 2, 1, 0) + + assert.Equal(t, []string{wafRouteKey("gw/https", "vh", "fwd", "proj-ns", "test-tpp", "Observe")}, + ps.Keys[FamilyWAFRoute]) + assert.Equal(t, 1, ps.Counts[FamilyWAFRoute]) + + assert.Equal(t, []string{listenerChainKey("gw/https", "fc")}, ps.Keys[FamilyWAFHCM]) + assert.Equal(t, []string{listenerChainKey("gw/https", "fc")}, ps.Keys[FamilyLocalReply]) + + assert.Equal(t, []string{"httproute/proj-ns/proxy/rule/0"}, ps.Keys[FamilyConnectorCluster]) + assert.Equal(t, []string{connectorRouteKey("gw/https", "vh", "connector-connect-vh")}, + ps.Keys[FamilyConnectorRoute]) + + // TLS prune outcome carried directly. + assert.Equal(t, 2, ps.TLSPrunedChains) + assert.Equal(t, 1, ps.TLSPrunedSecrets) + assert.Equal(t, 0, ps.TLSListenersLeftIntact) + assert.Equal(t, 2, ps.Counts[FamilyTLSPrune]) +} + +func TestBuildProgrammedSet_OfflineConnector(t *testing.T) { + routes := []*routev3.RouteConfiguration{{ + Name: "gw/https", + VirtualHosts: []*routev3.VirtualHost{{ + Name: "vh", + Routes: []*routev3.Route{ + { + Name: "connector-offline-vh", + Action: &routev3.Route_DirectResponse{DirectResponse: &routev3.DirectResponseAction{ + Status: 503, + Body: &corev3.DataSource{Specifier: &corev3.DataSource_InlineString{InlineString: offlineBodyMarker}}, + }}, + }, + }, + }}, + }} + + ps := buildProgrammedSet(nil, routes, nil, "coraza-waf", 0, 0, 0) + assert.Equal(t, []string{connectorRouteKey("gw/https", "vh", "connector-offline-vh")}, + ps.Keys[FamilyConnectorOffline]) + // A 503 with a different body is NOT an offline connector marker. + assert.Empty(t, ps.Keys[FamilyConnectorRoute]) +} + +// TestBuildProgrammedSet_WrongKeyedOracle proves the WAF route key changes when +// the governing TPP changes even though the route count is identical — the +// wrong-keyed class the gate exists to catch. +func TestBuildProgrammedSet_WrongKeyedOracle(t *testing.T) { + routesRight := []*routev3.RouteConfiguration{{ + Name: "gw/https", + VirtualHosts: []*routev3.VirtualHost{{Name: "vh", Routes: []*routev3.Route{mkWAFRoute(t, "proj-a", "tpp-a")}}}, + }} + routesWrong := []*routev3.RouteConfiguration{{ + Name: "gw/https", + VirtualHosts: []*routev3.VirtualHost{{Name: "vh", Routes: []*routev3.Route{mkWAFRoute(t, "proj-b", "tpp-b")}}}, + }} + + right := buildProgrammedSet(nil, routesRight, nil, "coraza-waf", 0, 0, 0) + wrong := buildProgrammedSet(nil, routesWrong, nil, "coraza-waf", 0, 0, 0) + + assert.Equal(t, right.Counts[FamilyWAFRoute], wrong.Counts[FamilyWAFRoute], + "counts identical — count-only gate would pass") + assert.NotEqual(t, right.Keys[FamilyWAFRoute], wrong.Keys[FamilyWAFRoute], + "key set differs — wrong-keyed must be detectable") +} + +func TestBuildProgrammedSet_NoCorazaFilterName(t *testing.T) { + // With an empty filter name the WAF HCM family must not be recorded even + // though the HCM happens to carry a position-0 filter. + listeners := []*listenerv3.Listener{mkCorazaHCMListener(t, "gw/https", "coraza-waf", false)} + ps := buildProgrammedSet(listeners, nil, nil, "", 0, 0, 0) + assert.Empty(t, ps.Keys[FamilyWAFHCM]) +} + +// --- recorder + endpoint --- + +func TestProgrammedRecorder_EmptyBeforeFirstBuild(t *testing.T) { + r := newProgrammedRecorder() + ps := r.snapshot() + assert.Equal(t, uint64(0), ps.BuildID) + assert.NotNil(t, ps.Keys) + assert.NotNil(t, ps.Counts) +} + +func TestProgrammedRecorder_BuildIDIncrements(t *testing.T) { + r := newProgrammedRecorder() + r.record(nil, nil, nil, "coraza-waf", 0, 0, 0) + first := r.snapshot() + r.record(nil, nil, nil, "coraza-waf", 0, 0, 0) + second := r.snapshot() + assert.Equal(t, uint64(1), first.BuildID) + assert.Equal(t, uint64(2), second.BuildID) +} + +func TestProgrammedSetHandler_ServesJSON(t *testing.T) { + r := newProgrammedRecorder() + routes := []*routev3.RouteConfiguration{{ + Name: "gw/https", + VirtualHosts: []*routev3.VirtualHost{{Name: "vh", Routes: []*routev3.Route{mkWAFRoute(t, "proj-a", "tpp-a")}}}, + }} + r.record(nil, routes, nil, "coraza-waf", 0, 0, 0) + + req := httptest.NewRequest(http.MethodGet, programmedSetEndpointPath, nil) + w := httptest.NewRecorder() + r.programmedSetHandler()(w, req) + + require.Equal(t, http.StatusOK, w.Code) + assert.Equal(t, "application/json", w.Header().Get("Content-Type")) + + var got ProgrammedSet + require.NoError(t, json.Unmarshal(w.Body.Bytes(), &got)) + assert.Equal(t, uint64(1), got.BuildID) + assert.Equal(t, []string{wafRouteKey("gw/https", "vh", "fwd", "proj-a", "tpp-a", "Observe")}, + got.Keys[FamilyWAFRoute]) +} + +// TestPostTranslateModify_RecordsProgrammedSet drives the full hook with the +// same fixture as TestPostTranslateModify_FullSnapshot and asserts the +// programmed-set endpoint reflects the build (1 WAF route, 1 WAF HCM, 1 +// connector cluster, 1 connector route). +func TestPostTranslateModify_RecordsProgrammedSet(t *testing.T) { + const ( + upstreamNS = "test-project" + dsNS = upstreamNS + gwName = "pset-gw" + proxyName = "test-proxy" + connectorName = "test-connector" + nodeID = "test-node-id" + connCluster = "httproute/" + dsNS + "/" + proxyName + "/rule/0" + ) + ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: upstreamNS}} + tpp := &networkingv1alpha.TrafficProtectionPolicy{ + ObjectMeta: metav1.ObjectMeta{Name: "test-tpp", Namespace: upstreamNS}, + Spec: networkingv1alpha.TrafficProtectionPolicySpec{ + Mode: networkingv1alpha.TrafficProtectionPolicyObserve, + TargetRefs: []gatewayv1alpha2.LocalPolicyTargetReferenceWithSectionName{{ + LocalPolicyTargetReference: gatewayv1.LocalPolicyTargetReference{ + Kind: "Gateway", Name: gatewayv1.ObjectName(gwName), + }, + }}, + RuleSets: []networkingv1alpha.TrafficProtectionPolicyRuleSet{{ + Type: networkingv1alpha.TrafficProtectionPolicyOWASPCoreRuleSet, + OWASPCoreRuleSet: networkingv1alpha.OWASPCRS{ + ParanoiaLevels: networkingv1alpha.ParanoiaLevels{Blocking: 1, Detection: 1}, + ScoreThresholds: networkingv1alpha.OWASPScoreThresholds{Inbound: 5, Outbound: 4}, + }, + }}, + }, + } + proxy := &networkingv1alpha.HTTPProxy{ + ObjectMeta: metav1.ObjectMeta{Name: proxyName, Namespace: upstreamNS}, + Spec: networkingv1alpha.HTTPProxySpec{ + Rules: []networkingv1alpha.HTTPProxyRule{{ + Backends: []networkingv1alpha.HTTPProxyRuleBackend{{ + Endpoint: "http://backend.example.com:9000", + Connector: &networkingv1alpha.ConnectorReference{Name: connectorName}, + }}, + }}, + }, + } + connector := &networkingv1alpha1.Connector{ + ObjectMeta: metav1.ObjectMeta{Name: connectorName, Namespace: upstreamNS}, + Status: networkingv1alpha1.ConnectorStatus{ + Conditions: []metav1.Condition{{ + Type: networkingv1alpha1.ConnectorConditionReady, Status: metav1.ConditionTrue, + Reason: "Ready", LastTransitionTime: metav1.Now(), + }}, + ConnectionDetails: &networkingv1alpha1.ConnectorConnectionDetails{ + Type: networkingv1alpha1.PublicKeyConnectorConnectionType, + PublicKey: &networkingv1alpha1.ConnectorConnectionDetailsPublicKey{Id: nodeID}, + }, + }, + } + + scheme := testServerScheme(t) + cl := fake.NewClientBuilder().WithScheme(scheme). + WithObjects(ns, tpp, proxy). + WithStatusSubresource(connector).WithObjects(connector).Build() + // Recording only happens when the test-environment endpoint is enabled. + cfg := testServerConfig() + cfg.EnableProgrammedSet = true + srv := New(cl, cfg, discardLogger()) + + req := &pb.PostTranslateModifyRequest{ + Clusters: []*clusterv3.Cluster{{Name: connCluster}, {Name: "infra-cluster"}}, + Listeners: []*listenerv3.Listener{ + mkListenerWithHCM(t), + mkListenerWithStaticRoute(t, "envoy-gateway-proxy-ready-0.0.0.0-19003"), + }, + Routes: []*routev3.RouteConfiguration{{ + Name: "consumer-gw/pset-gw/https", + VirtualHosts: []*routev3.VirtualHost{{ + Name: "vh", Domains: []string{"app.example.com"}, Metadata: egGatewayMeta(t, dsNS, gwName), + Routes: []*routev3.Route{{ + Name: "fwd", + Action: &routev3.Route_Route{Route: &routev3.RouteAction{ + ClusterSpecifier: &routev3.RouteAction_Cluster{Cluster: connCluster}, + }}, + }}, + }}, + }}, + } + + _, err := srv.PostTranslateModify(context.Background(), req) + require.NoError(t, err) + + ps := srv.programmed.snapshot() + assert.Equal(t, uint64(1), ps.BuildID) + assert.Equal(t, 1, ps.Counts[FamilyWAFRoute], "one WAF-governed route") + assert.Equal(t, 1, ps.Counts[FamilyWAFHCM], "coraza injected into the one RDS HCM, not the readiness listener") + assert.Equal(t, 1, ps.Counts[FamilyConnectorCluster], "one connector cluster replaced") + assert.Equal(t, 1, ps.Counts[FamilyConnectorRoute], "one CONNECT route prepended") + + // The WAF route key must name the governing TPP (wrong-keyed oracle). + require.Len(t, ps.Keys[FamilyWAFRoute], 1) + assert.Contains(t, ps.Keys[FamilyWAFRoute][0], "test-project/test-tpp/Observe") +} + +// TestPostTranslateModify_NoRecordingWhenDisabled proves the production default: +// with the test-only endpoint off, a build records nothing, so the snapshot +// stays empty and no per-build work is done. +func TestPostTranslateModify_NoRecordingWhenDisabled(t *testing.T) { + cl := fake.NewClientBuilder().WithScheme(testServerScheme(t)).Build() + // testServerConfig() leaves EnableProgrammedSet at its false default. + srv := New(cl, testServerConfig(), discardLogger()) + + _, err := srv.PostTranslateModify(context.Background(), &pb.PostTranslateModifyRequest{}) + require.NoError(t, err) + + ps := srv.programmed.snapshot() + assert.Equal(t, uint64(0), ps.BuildID, "no build was recorded") + assert.Empty(t, ps.Keys, "nothing recorded while the endpoint is disabled") +} diff --git a/internal/extensionserver/server/server.go b/internal/extensionserver/server/server.go index 5fc21315..57a53cb1 100644 --- a/internal/extensionserver/server/server.go +++ b/internal/extensionserver/server/server.go @@ -7,6 +7,7 @@ package server import ( "context" "log/slog" + "net/http" "time" pb "github.com/envoyproxy/gateway/proto/extension" @@ -36,6 +37,12 @@ type ServerConfig struct { // empty, local-reply injection is a no-op. Sourced from // GatewayConfig.ErrorPage + the embedded/override HTML body. LocalReply mutate.LocalReplyConfig + // EnableProgrammedSet turns on the read-only /debug/programmed-set endpoint + // and the per-build recording that backs it. It exists only to let a test + // confirm the proxy is running exactly the set the build intended, so it is + // off in production and enabled only in the test environment. When off, the + // build does no extra work and the endpoint is not served. + EnableProgrammedSet bool } // Server implements pb.EnvoyGatewayExtensionServer for the NSO production @@ -51,15 +58,29 @@ type Server struct { client client.Client cfg ServerConfig log *slog.Logger + // programmed records what the last build changed, so a test can ask the proxy + // to prove it is running exactly that. Always non-nil; capturing it only reads + // what was already produced. + programmed *programmedRecorder } // New returns a production extension server backed by the given cache client. // In production, cl is the ctrl.Manager.GetClient() from NewManager(). // In tests, cl is a fake client pre-populated with the test objects. func New(cl client.Client, cfg ServerConfig, log *slog.Logger) *Server { - return &Server{client: cl, cfg: cfg, log: log} + return &Server{client: cl, cfg: cfg, log: log, programmed: newProgrammedRecorder()} } +// ProgrammedSetHandler serves what the last build changed, so a test can confirm +// the proxy is running exactly that. Read-only. +func (s *Server) ProgrammedSetHandler() http.HandlerFunc { + return s.programmed.programmedSetHandler() +} + +// ProgrammedSetEndpointPath is exported so the server and the test tooling share +// one definition of where this endpoint lives. +const ProgrammedSetEndpointPath = programmedSetEndpointPath + // PostTranslateModify applies the TPP/WAF and Connector mutation families to // the full xDS snapshot and returns the complete (mutated) resource set. // Secrets are passed through unchanged — the response replaces EG's entire @@ -317,6 +338,16 @@ func (s *Server) PostTranslateModify( extmetrics.ConnectorRoutesTotal.Add(float64(vhCount)) extmetrics.ConnectorOfflineRoutesTotal.Add(float64(offlineRtCount)) + // In the test environment, record what this build changed so a test can later + // confirm the proxy is running exactly that. This only reads the configuration + // just produced; it changes nothing. Off in production, where it does no work. + if s.cfg.EnableProgrammedSet { + s.programmed.record( + listeners, routes, clusters, s.cfg.Coraza.FilterName, + prunedChains, prunedSecrets, listenersLeftIntact, + ) + } + s.log.Info("PostTranslateModify", "clusters", len(clusters), "listeners", len(listeners),