From 89863e16f10357c110970667615fc69a11446a50 Mon Sep 17 00:00:00 2001 From: Scot Wells Date: Thu, 25 Jun 2026 13:29:36 -0500 Subject: [PATCH 1/2] test(infra): stand up a production-fidelity edge test environment Brings up a two-cluster environment that mirrors how the edge really runs: the production gateway version, the firewall data plane real traffic passes through, and the actual path configuration travels to the edge. Lets tests exercise the route customer traffic takes instead of a simplified stand-in. Test scaffolding only; no change to how the edge runs for customers. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01JbCy8vy66RdNYzGSgqH6P6 --- Taskfile.test-infra.yml | 419 ++++++++++++++++++ Taskfile.yaml | 2 + .../e2e-downstream/d1-cert-bypass/README.md | 28 ++ .../d1-cert-bypass/mint-expired-secret.sh | 44 ++ config/e2e-downstream/direct-namespace.yaml | 14 + .../eg-downstream/kustomization.yaml | 92 ++++ .../eg-downstream/namespace.yaml | 4 + .../eg-downstream/nso-crd-rbac.yaml | 37 ++ config/e2e-downstream/envoyproxy.yaml | 87 ++++ config/e2e-downstream/error-pages.yaml | 16 + .../extserver-base/kustomization.yaml | 28 ++ .../patches/extserver-ca-bundle.yaml | 18 + .../patches/extserver-clientcert-issuer.yaml | 11 + .../patches/extserver-serverconfig.yaml | 26 ++ .../extserver-base/patches/extserver-tls.yaml | 21 + config/e2e-downstream/extserver-config.yaml | 21 + config/e2e-downstream/issuer.yaml | 41 ++ config/e2e-downstream/kustomization.yaml | 38 ++ config/e2e-downstream/namespace.yaml | 6 + .../kustomization.yaml | 14 +- config/tools/kind/downstream-cluster.yaml | 14 + config/tools/kind/upstream-cluster.yaml | 15 + 22 files changed, 992 insertions(+), 4 deletions(-) create mode 100644 Taskfile.test-infra.yml create mode 100644 config/e2e-downstream/d1-cert-bypass/README.md create mode 100755 config/e2e-downstream/d1-cert-bypass/mint-expired-secret.sh create mode 100644 config/e2e-downstream/direct-namespace.yaml create mode 100644 config/e2e-downstream/eg-downstream/kustomization.yaml create mode 100644 config/e2e-downstream/eg-downstream/namespace.yaml create mode 100644 config/e2e-downstream/eg-downstream/nso-crd-rbac.yaml create mode 100644 config/e2e-downstream/envoyproxy.yaml create mode 100644 config/e2e-downstream/error-pages.yaml create mode 100644 config/e2e-downstream/extserver-base/kustomization.yaml create mode 100644 config/e2e-downstream/extserver-base/patches/extserver-ca-bundle.yaml create mode 100644 config/e2e-downstream/extserver-base/patches/extserver-clientcert-issuer.yaml create mode 100644 config/e2e-downstream/extserver-base/patches/extserver-serverconfig.yaml create mode 100644 config/e2e-downstream/extserver-base/patches/extserver-tls.yaml create mode 100644 config/e2e-downstream/extserver-config.yaml create mode 100644 config/e2e-downstream/issuer.yaml create mode 100644 config/e2e-downstream/kustomization.yaml create mode 100644 config/e2e-downstream/namespace.yaml create mode 100644 config/tools/kind/downstream-cluster.yaml create mode 100644 config/tools/kind/upstream-cluster.yaml diff --git a/Taskfile.test-infra.yml b/Taskfile.test-infra.yml new file mode 100644 index 00000000..ae577cf1 --- /dev/null +++ b/Taskfile.test-infra.yml @@ -0,0 +1,419 @@ +version: '3' + +# Production-fidelity test environment. Two kind clusters: nso-upstream (the +# control plane) and nso-downstream (the edge). Consumed by both the e2e and perf +# suites. +# +# Bring up: task test-infra:up +# Tear down: task test-infra:down +# Smoke: task test-infra:smoke + +# kustomize fetches some CRDs over https. A developer's global git config can +# rewrite https to ssh, which fails in a non-interactive fetch. Neutralize the +# global git config for kustomize's child git so the https fetches succeed; the +# repo-local config is untouched. +env: + GIT_CONFIG_GLOBAL: /dev/null + +vars: + TMP_DIR: + sh: echo "${TMPDIR:-/tmp}" + REPO: + sh: pwd + # Pinned-to-prod tooling. We use an explicit kind v0.32.0 binary because it + # ships the node image production runs, which the checked-in bin/kind predates. + KIND: '{{.REPO}}/bin/kind-v0.32.0' + KUSTOMIZE: '{{.REPO}}/bin/kustomize' + CMCTL: '{{.REPO}}/bin/cmctl' + KARMADACTL: '{{.REPO}}/bin/karmadactl' + CHAINSAW: '{{.REPO}}/bin/chainsaw' + + UPSTREAM_CLUSTER: nso-upstream + DOWNSTREAM_CLUSTER: nso-downstream + UPSTREAM_CTX: kind-nso-upstream + DOWNSTREAM_CTX: kind-nso-downstream + + # The exact node image the production edge runs. + K8S_NODE_IMAGE: kindest/node:v1.35.5@sha256:ce977ae6d65918d0b58a5f8b5e940429c2ce42fa3a5619ec2bbc60b949c0ac95 + # Gateway control-plane version, matching production. The extension-server SDK + # version is paired with it deliberately, also matching production. + ENVOY_GATEWAY_VERSION: v1.7.4 + EG_SDK_VERSION: v1.8.1 + # Multi-arch WAF image โ€” the same filter as the production edge, but it also + # loads natively on arm64 dev hosts. The data plane references this image. + CORAZA_WAF_IMAGE: ghcr.io/datum-labs/coraza-envoy-go-filter/coraza-waf:v1.3.0-multiarch.1 + # CONNECT-proxy stand-in for the connector-tunnel scenario, built from + # test/e2e-edge/_fixtures/connector-tunnel and loaded into the downstream cluster. + CONNECT_PROXY_IMAGE: connect-proxy:e2e + # The WAF is enabled by default since the multi-arch image loads on every + # architecture. Set CORAZA_DISABLED=true only to deliberately skip the WAF. + CORAZA_DISABLED: '{{.CORAZA_DISABLED | default "false"}}' + # Karmada-bundled kube-apiserver, pinned to production. + KARMADA_APISERVER_VERSION: v1.33.2 + # NSO image pinned by git SHA so the loaded image is used deterministically. + IMG: + sh: echo "ghcr.io/datum-cloud/network-services-operator:$(git rev-parse --short HEAD)" + +tasks: + + up: + desc: "Bring the canonical two-cluster prod-fidelity test env ONLINE (clusters + EG v1.7.4 + ext-server + Coraza data plane + NSO manager)." + cmds: + - echo "๐Ÿš€ test-infra:up โ€” building nso-upstream + nso-downstream" + - task: clusters + # The Gateway-API and gateway CRDs must land on the downstream cluster + # before cert-manager (it probes the Gateway-API CRDs at startup) and before + # the downstream gateway (it needs its own CRDs present). + - task: eg-crds + - task: downstream-crds + - task: downstream-namespaces + - task: cert-manager-upstream + - task: cert-manager-downstream + - task: nso-image + - task: load-fixtures + - task: prepare-upstream + - task: eg-downstream + - task: extension-server + - task: external-dns + - task: link-clusters + - task: wait-ready + - echo "๐ŸŽ‰ test-infra:up complete. Run 'task test-infra:smoke' to confirm M2." + + down: + desc: "Tear down the test-infra clusters (and any Karmada host)." + cmds: + - echo "๐Ÿ’ฅ test-infra:down" + # Deleting both kind clusters removes all federation state, so this unjoin + # is only a best-effort courtesy. + - '{{.KARMADACTL}} deinit --context {{.UPSTREAM_CTX}} --force --purge-namespace 2>/dev/null || true' + - rm -rf {{.TMP_DIR}}/karmada-{{.UPSTREAM_CLUSTER}} + - '{{.KIND}} delete cluster --name {{.UPSTREAM_CLUSTER}} || true' + - '{{.KIND}} delete cluster --name {{.DOWNSTREAM_CLUSTER}} || true' + - rm -f {{.TMP_DIR}}/.kind-{{.DOWNSTREAM_CLUSTER}}-internal.yaml {{.TMP_DIR}}/.kind-{{.UPSTREAM_CLUSTER}}.yaml {{.TMP_DIR}}/.kind-{{.DOWNSTREAM_CLUSTER}}.yaml + - echo "โœจ done." + + clusters: + desc: "Create the upstream + downstream kind clusters pinned to the prod edge node image." + cmds: + - echo "๐Ÿ—๏ธ creating clusters (node {{.K8S_NODE_IMAGE}})" + - '{{.KIND}} delete cluster --name {{.UPSTREAM_CLUSTER}} 2>/dev/null || true' + - '{{.KIND}} delete cluster --name {{.DOWNSTREAM_CLUSTER}} 2>/dev/null || true' + - '{{.KIND}} create cluster --image {{.K8S_NODE_IMAGE}} --config=config/tools/kind/upstream-cluster.yaml' + - '{{.KIND}} create cluster --image {{.K8S_NODE_IMAGE}} --config=config/tools/kind/downstream-cluster.yaml' + # Running the full federation control plane alongside the rest of the stack + # in one kind node exhausts the default file-watch limits on the macOS + # Docker VM, which crashloops a component with "too many open files" and + # blocks the member join. Raise the limits on both nodes. + - docker exec {{.UPSTREAM_CLUSTER}}-control-plane sysctl -w fs.inotify.max_user_instances=8192 fs.inotify.max_user_watches=1048576 + - docker exec {{.DOWNSTREAM_CLUSTER}}-control-plane sysctl -w fs.inotify.max_user_instances=8192 fs.inotify.max_user_watches=1048576 + + cert-manager-upstream: + desc: "Install cert-manager (+ CSI driver) on the upstream cluster." + cmds: + - '{{.KUSTOMIZE}} build --enable-helm config/tools/cert-manager | kubectl --context {{.UPSTREAM_CTX}} apply --server-side=true --force-conflicts -f -' + - '{{.CMCTL}} check api --context {{.UPSTREAM_CTX}} --wait=5m' + + cert-manager-downstream: + desc: "Install cert-manager (+ CSI driver) on the downstream cluster." + cmds: + - '{{.KUSTOMIZE}} build --enable-helm config/tools/cert-manager | kubectl --context {{.DOWNSTREAM_CTX}} apply --server-side=true --force-conflicts -f -' + - '{{.CMCTL}} check api --context {{.DOWNSTREAM_CTX}} --wait=5m' + + nso-image: + desc: "Build the NSO operator image (git-SHA tag) and load it into both clusters." + cmds: + - echo "๐Ÿ”จ building {{.IMG}}" + - docker build -t {{.IMG}} . + - cd config/manager && {{.KUSTOMIZE}} edit set image ghcr.io/datum-cloud/network-services-operator={{.IMG}} + - '{{.KIND}} load docker-image {{.IMG}} --name {{.UPSTREAM_CLUSTER}}' + - '{{.KIND}} load docker-image {{.IMG}} --name {{.DOWNSTREAM_CLUSTER}}' + + prepare-upstream: + desc: "Deploy the NSO manager + webhook (config/e2e) on the upstream cluster, with the prod-base memory profile." + cmds: + - echo "๐Ÿ”ง deploying NSO manager (upstream)" + - '{{.KUSTOMIZE}} build config/e2e | kubectl --context {{.UPSTREAM_CTX}} apply --server-side=true --force-conflicts -f -' + # Match production: with the extension server handling proxy configuration, + # NSO must not also emit its own patch policies. The shared config file is + # also used by an older path that still relies on them, so we patch the live + # config here rather than editing that file. + - | + CFG=$(kubectl --context {{.UPSTREAM_CTX}} -n network-services-operator-system get cm network-services-operator-config -o jsonpath='{.data.config\.yaml}') + if ! echo "$CFG" | grep -q "eppEmissionEnabled"; then + NEW=$(echo "$CFG" | sed 's|^ downstreamGatewayClassName: .*|&\n eppEmissionEnabled: false|') + kubectl --context {{.UPSTREAM_CTX}} -n network-services-operator-system create cm network-services-operator-config \ + --from-literal=config.yaml="$NEW" --dry-run=client -o yaml | kubectl --context {{.UPSTREAM_CTX}} apply -f - + fi + # Raise the manager's memory limit to the production base so perf sweeps and + # webhook-under-load reflect production headroom. + - | + kubectl --context {{.UPSTREAM_CTX}} -n network-services-operator-system patch deploy network-services-operator-controller-manager \ + --type=json -p '[{"op":"replace","path":"/spec/template/spec/containers/0/resources/limits/memory","value":"2Gi"},{"op":"replace","path":"/spec/template/spec/containers/0/resources/requests/memory","value":"512Mi"}]' || true + + eg-crds: + desc: "Install the full Gateway-API + Envoy Gateway CRD set (incl ReferenceGrant + EnvoyProxy) on the downstream cluster. The downstream EG chart sets includeCRDs:false, so CRDs are applied separately (matches prod, where CRDs are managed out-of-band)." + vars: + EG_CRD_DIR: config/tools/envoy-gateway-downstream/charts/gateway-helm-{{.ENVOY_GATEWAY_VERSION}}/gateway-helm/crds + cmds: + - kubectl --context {{.DOWNSTREAM_CTX}} apply --server-side=true --force-conflicts -f {{.EG_CRD_DIR}}/gatewayapi-crds.yaml + - kubectl --context {{.DOWNSTREAM_CTX}} apply --server-side=true --force-conflicts -f {{.EG_CRD_DIR}}/generated/ + + downstream-namespaces: + desc: "Create the downstream gateway + hostname-accounting namespaces, plus the EG-watched e2e-direct namespace for hand-delivered fixtures (D1/D2). Runs BEFORE eg-downstream so e2e-direct carries its watch label before EG establishes its informer." + cmds: + - kubectl --context {{.DOWNSTREAM_CTX}} apply -f config/dev/downstream_resources/namespaces.yaml + # e2e-direct carries the gateway watch label from creation so gateways + # applied directly here reconcile deterministically. The gateway only + # reliably watches a namespace labeled at creation time, so the label must + # be in the manifest, not added afterward. + - kubectl --context {{.DOWNSTREAM_CTX}} apply -f config/e2e-downstream/direct-namespace.yaml + + eg-downstream: + desc: "Install the dedicated downstream Envoy Gateway (v1.7.4) with the ext-server extensionManager wiring on the downstream cluster." + cmds: + - echo "๐Ÿ”ง installing downstream EG {{.ENVOY_GATEWAY_VERSION}} (+ extensionManager, failOpen:false, maxMessageSize:256Mi)" + # Build from the untracked config/e2e-downstream/eg-downstream copy, which + # bakes in the e2e pins, so a hard reset on the shared branch cannot revert + # them โ€” it kept clobbering the tracked copy. + - '{{.KUSTOMIZE}} build --enable-helm config/e2e-downstream/eg-downstream | kubectl --context {{.DOWNSTREAM_CTX}} apply --server-side=true --force-conflicts -f -' + + load-coraza-waf: + desc: "Fallback: pull the Coraza WAF image and kind-load it into the downstream cluster (offline-CI escape hatch)." + cmds: + - docker pull {{.CORAZA_WAF_IMAGE}} + - '{{.KIND}} load docker-image {{.CORAZA_WAF_IMAGE}} --name {{.DOWNSTREAM_CLUSTER}}' + + load-fixtures: + desc: "Build + load the e2e fixture images into the downstream cluster (CONNECT-proxy stand-in for the connector-tunnel scenario)." + cmds: + - echo "๐Ÿ”ง building + loading fixture image {{.CONNECT_PROXY_IMAGE}}" + - docker build -t {{.CONNECT_PROXY_IMAGE}} test/e2e-edge/_fixtures/connector-tunnel + - '{{.KIND}} load docker-image {{.CONNECT_PROXY_IMAGE}} --name {{.DOWNSTREAM_CLUSTER}}' + + d1-mint-expired-secret: + desc: "D1 helper: mint an ALREADY-EXPIRED kubernetes.io/tls Secret and apply it to the downstream cluster, bypassing the upstream #212 cert-health gate so the ext-server prune backstop can be tested in isolation. Vars: NAMESPACE, SECRET, HOSTNAME." + vars: + # Default to the pre-provisioned, gateway-watched namespace so a gateway + # applied alongside this secret reconciles deterministically. + NAMESPACE: '{{.NAMESPACE | default "e2e-direct"}}' + SECRET: '{{.SECRET | default "d1-expired-tls"}}' + HOSTNAME: '{{.HOSTNAME | default "d1-bad.e2e.env.datum.net"}}' + cmds: + - | + config/e2e-downstream/d1-cert-bypass/mint-expired-secret.sh \ + "{{.NAMESPACE}}" "{{.SECRET}}" "{{.HOSTNAME}}" \ + | kubectl --context {{.DOWNSTREAM_CTX}} apply -f - + - echo "โœ… applied expired TLS Secret {{.NAMESPACE}}/{{.SECRET}} (host {{.HOSTNAME}}) to {{.DOWNSTREAM_CLUSTER}}" + + extension-server: + desc: "Deploy the ext-server (2 replicas + PDB + mTLS) + e2e cert chain + Coraza/branded-page config + test EnvoyProxy (Coraza+admin:19000) on the downstream cluster." + cmds: + - echo "๐Ÿ”ง deploying ext-server + e2e issuer chain + test EnvoyProxy (config/e2e-downstream)" + # Pin the ext-server image to the git-SHA build (same image as the manager). + - cd config/e2e-downstream && {{.KUSTOMIZE}} edit set image ghcr.io/datum-cloud/network-services-operator={{.IMG}} 2>/dev/null || true + - '{{.KUSTOMIZE}} build config/e2e-downstream | kubectl --context {{.DOWNSTREAM_CTX}} apply --server-side=true --force-conflicts -f -' + # WAF disable toggle. The overlay ships the WAF enabled for production + # fidelity; when CORAZA_DISABLED is set we flip the live config so listeners + # program without it. + - | + if [ "{{.CORAZA_DISABLED}}" = "true" ]; then + echo "โš ๏ธ Coraza WAF disabled (host arch {{OS}}/$(uname -m); WAF .so is amd64-only)" + kubectl --context {{.DOWNSTREAM_CTX}} -n network-services-operator-system get cm extension-server-config -o yaml \ + | sed 's/disabled: false/disabled: true/' | kubectl --context {{.DOWNSTREAM_CTX}} apply -f - + fi + # The extension server's server cert and the gateway's client cert are both + # issued from the e2e certificate authority. Wait for that authority's cert, + # then publish it where each side reads it so they can verify each other. + - task: extserver-ca-bundle + + extserver-ca-bundle: + desc: "Publish the e2e CA's ca.crt into the ext-server CA-bundle ConfigMap and the EG certificateRef Secret (both in network-services-operator-system)." + cmds: + - echo "โณ waiting for the e2e CA certificate to be issued" + - | + kubectl --context {{.DOWNSTREAM_CTX}} -n cert-manager wait certificate e2e-extension-server-ca \ + --for=condition=Ready --timeout=120s + - kubectl --context {{.DOWNSTREAM_CTX}} create namespace network-services-operator-system --dry-run=client -o yaml | kubectl --context {{.DOWNSTREAM_CTX}} apply -f - + - | + CA_CRT=$(kubectl --context {{.DOWNSTREAM_CTX}} -n cert-manager get secret e2e-extension-server-ca -o jsonpath='{.data.ca\.crt}' | base64 -d) + # The certificate-authority bundle the extension server mounts. + kubectl --context {{.DOWNSTREAM_CTX}} -n network-services-operator-system create configmap extension-server-ca-bundle \ + --from-literal=ca.crt="$CA_CRT" --dry-run=client -o yaml | kubectl --context {{.DOWNSTREAM_CTX}} apply -f - + # The same authority cert the downstream gateway references. The gateway + # reads it from the tls.crt key, not ca.crt. + kubectl --context {{.DOWNSTREAM_CTX}} -n network-services-operator-system create secret generic e2e-extension-server-ca \ + --from-literal=tls.crt="$CA_CRT" --dry-run=client -o yaml | kubectl --context {{.DOWNSTREAM_CTX}} apply -f - + # Restart the extension server so it picks up the freshly-published bundle. + - kubectl --context {{.DOWNSTREAM_CTX}} -n network-services-operator-system rollout restart deploy network-services-operator-envoy-gateway-extension-server || true + + downstream-crds: + desc: "Install the NSO CRDs the replicator mirrors into the downstream cluster (the Gateway-API/EG CRDs come from eg-crds)." + cmds: + - kubectl --context {{.DOWNSTREAM_CTX}} apply -f config/crd/bases/networking.datumapis.com_connectors.yaml + - kubectl --context {{.DOWNSTREAM_CTX}} apply -f config/crd/bases/networking.datumapis.com_httpproxies.yaml + - kubectl --context {{.DOWNSTREAM_CTX}} apply -f config/crd/bases/networking.datumapis.com_trafficprotectionpolicies.yaml + + external-dns: + desc: "Install external-dns CRDs (DNSEndpoint) on the downstream cluster." + cmds: + - '{{.KUSTOMIZE}} build --enable-helm config/tools/external-dns | kubectl --context {{.DOWNSTREAM_CTX}} apply --server-side=true --force-conflicts -f -' + + link-clusters: + desc: "Wire the NSO manager's downstream client to the downstream cluster via the downstream-cluster-kubeconfig secret on the upstream cluster." + cmds: + - echo "๐Ÿ”— linking upstream -> downstream" + - '{{.KIND}} get kubeconfig --name {{.DOWNSTREAM_CLUSTER}} --internal > {{.TMP_DIR}}/.kind-{{.DOWNSTREAM_CLUSTER}}-internal.yaml' + - kubectl --context {{.UPSTREAM_CTX}} create namespace network-services-operator-system --dry-run=client -o yaml | kubectl --context {{.UPSTREAM_CTX}} apply -f - + - | + kubectl --context {{.UPSTREAM_CTX}} create secret -n network-services-operator-system \ + generic downstream-cluster-kubeconfig --save-config --dry-run=client -o yaml \ + --from-file=kubeconfig={{.TMP_DIR}}/.kind-{{.DOWNSTREAM_CLUSTER}}-internal.yaml | kubectl --context {{.UPSTREAM_CTX}} apply -f - + + wait-ready: + desc: "Wait for the core components (NSO manager, downstream EG, ext-server) to be Available." + cmds: + - echo "โณ waiting for NSO manager (upstream)" + - kubectl --context {{.UPSTREAM_CTX}} -n network-services-operator-system wait deploy network-services-operator-controller-manager --for=condition=Available --timeout=240s + - echo "โณ waiting for downstream EG" + # The gateway control-plane Deployment is named `envoy-gateway` by its chart, + # not after the release name. + - kubectl --context {{.DOWNSTREAM_CTX}} -n datum-downstream-gateway wait deploy envoy-gateway --for=condition=Available --timeout=240s + - echo "โณ waiting for ext-server" + - kubectl --context {{.DOWNSTREAM_CTX}} -n network-services-operator-system wait deploy network-services-operator-envoy-gateway-extension-server --for=condition=Available --timeout=240s + - echo "โœ… core components ready." + + # ---- Karmada ------------------------------------------------------------ + + karmada-up: + desc: "Stand up a real Karmada host on the upstream cluster (apiserver v1.33.2), join the downstream member, and apply the prod federation artifacts." + vars: + KARMADA_KUBECONFIG: '{{.TMP_DIR}}/karmada-{{.UPSTREAM_CLUSTER}}/karmada-apiserver.config' + # The upstream node's docker-network IP, reachable from the downstream node + # on the same network, used to sign the Karmada apiserver cert. + HOST_IP: + sh: docker inspect {{.UPSTREAM_CLUSTER}}-control-plane -f '{{"{{"}}.NetworkSettings.Networks.kind.IPAddress{{"}}"}}' + # The downstream member's docker-network IP โ€” reachable from the upstream + # control-plane pods on the same network, but not from the macOS host. + MEMBER_IP: + sh: docker inspect {{.DOWNSTREAM_CLUSTER}}-control-plane -f '{{"{{"}}.NetworkSettings.Networks.kind.IPAddress{{"}}"}}' + cmds: + # Advertise 127.0.0.1: karmadactl runs on the macOS host, and init's own + # post-deploy steps dial the advertise address. The docker-network IP is not + # routable from the macOS host, so advertising it would make init time out; + # the upstream cluster maps a host port through to the apiserver, so + # 127.0.0.1 reaches it. We still sign the cert for the docker IP for any + # in-cluster path. The member is joined in push mode, so it never needs to + # reach the apiserver address โ€” advertising 127.0.0.1 is safe for the join. + - echo "๐ŸŒ karmada init (apiserver {{.KARMADA_APISERVER_VERSION}}, advertise 127.0.0.1:32443, cert-ip incl {{.HOST_IP}}) on {{.UPSTREAM_CLUSTER}}" + - mkdir -p {{.TMP_DIR}}/karmada-{{.UPSTREAM_CLUSTER}} + - | + {{.KARMADACTL}} init --context {{.UPSTREAM_CTX}} \ + --kube-image-tag {{.KARMADA_APISERVER_VERSION}} \ + --karmada-apiserver-advertise-address 127.0.0.1 \ + --cert-external-ip "127.0.0.1,{{.HOST_IP}}" \ + --etcd-storage-mode hostPath \ + --karmada-data {{.TMP_DIR}}/karmada-{{.UPSTREAM_CLUSTER}} \ + --karmada-pki {{.TMP_DIR}}/karmada-{{.UPSTREAM_CLUSTER}}/pki + - echo "๐Ÿ”— joining {{.DOWNSTREAM_CLUSTER}} as a Karmada member" + # karmadactl join runs on the host and connects to the member to install its + # agent, so it needs a host-reachable member kubeconfig, not the internal + # docker-hostname form the host cannot resolve. + - '{{.KIND}} get kubeconfig --name {{.DOWNSTREAM_CLUSTER}} > {{.TMP_DIR}}/.kind-{{.DOWNSTREAM_CLUSTER}}-host.yaml' + - | + {{.KARMADACTL}} join {{.DOWNSTREAM_CLUSTER}} \ + --karmada-context karmada-apiserver \ + --kubeconfig {{.KARMADA_KUBECONFIG}} \ + --cluster-kubeconfig {{.TMP_DIR}}/.kind-{{.DOWNSTREAM_CLUSTER}}-host.yaml \ + --cluster-context {{.DOWNSTREAM_CTX}} + # The join stores the host address as the member's endpoint, which the + # control-plane pods running inside the upstream cluster cannot reach. + # Repoint it to the member's docker IP, which they can reach on the shared + # network, and the member then goes Ready. + - | + kubectl --kubeconfig {{.KARMADA_KUBECONFIG}} --context karmada-apiserver \ + patch cluster {{.DOWNSTREAM_CLUSTER}} --type=merge \ + -p '{"spec":{"apiEndpoint":"https://{{.MEMBER_IP}}:6443"}}' + # Label the member so the production propagation policy places resources onto + # it. + - | + kubectl --kubeconfig {{.KARMADA_KUBECONFIG}} --context karmada-apiserver \ + label cluster {{.DOWNSTREAM_CLUSTER}} infra.datum.net/gateways=enabled --overwrite + - echo "โณ waiting for the member cluster to become Ready" + - | + kubectl --kubeconfig {{.KARMADA_KUBECONFIG}} --context karmada-apiserver \ + wait cluster {{.DOWNSTREAM_CLUSTER}} --for=condition=Ready --timeout=120s + - echo "๐Ÿ“œ applying federation artifacts (config/federation)" + - | + kubectl --kubeconfig {{.KARMADA_KUBECONFIG}} --context karmada-apiserver apply -f config/federation/ + - echo "โœ… karmada-up complete. Karmada apiserver kubeconfig at {{.KARMADA_KUBECONFIG}}" + + smoke: + desc: "M2 functional confirmation: drive an upstream Gateway+HTTPRoute through the ext-server path and curl a real 200." + cmds: + - '{{.CHAINSAW}} test ./test/e2e-edge/extension-server-smoke --cluster {{.UPSTREAM_CLUSTER}}={{.TMP_DIR}}/.kind-{{.UPSTREAM_CLUSTER}}.yaml --cluster {{.DOWNSTREAM_CLUSTER}}={{.TMP_DIR}}/.kind-{{.DOWNSTREAM_CLUSTER}}.yaml' + + e2e: + desc: "Run chainsaw e2e scenarios against the live two-cluster env. Pass a scenario name or path after -- (e.g. `task test-infra:e2e -- waf-enforcement`); with no arg, runs every ext-server-path scenario that targets nso-upstream/nso-downstream. Use SCENARIOS=... to override the default set." + vars: + # Scenarios authored against this env's cluster names and downstream path. + # Older fixtures targeting the previous cluster names are excluded here. + DEFAULT_SCENARIOS: extension-server-smoke waf-enforcement branded-error-page connector-offline-503 atomic-reject-isolation + # CLI_ARGS (after --) wins; else SCENARIOS env; else the default set. + SELECTED: '{{.CLI_ARGS | default .SCENARIOS | default .DEFAULT_SCENARIOS}}' + deps: + - kubeconfigs + cmds: + - | + set -e + for s in {{.SELECTED}}; do + # Accept either a bare scenario name or a full/relative path. + case "$s" in + test/e2e-edge/*|./test/e2e-edge/*) dir="$s" ;; + */*) dir="$s" ;; + *) dir="./test/e2e-edge/$s" ;; + esac + echo "๐Ÿงช chainsaw: $dir" + {{.CHAINSAW}} test "$dir" \ + --cluster {{.UPSTREAM_CLUSTER}}={{.TMP_DIR}}/.kind-{{.UPSTREAM_CLUSTER}}.yaml \ + --cluster {{.DOWNSTREAM_CLUSTER}}={{.TMP_DIR}}/.kind-{{.DOWNSTREAM_CLUSTER}}.yaml + done + + kubeconfigs: + desc: "Export per-cluster kubeconfigs to TMPDIR for chainsaw." + cmds: + - '{{.KIND}} get kubeconfig --name {{.UPSTREAM_CLUSTER}} > {{.TMP_DIR}}/.kind-{{.UPSTREAM_CLUSTER}}.yaml' + - '{{.KIND}} get kubeconfig --name {{.DOWNSTREAM_CLUSTER}} > {{.TMP_DIR}}/.kind-{{.DOWNSTREAM_CLUSTER}}.yaml' + + parity:check: + desc: "Run the config-dump parity gate against the live downstream Envoy + ext-server. Pass CLI flags after -- (PARITY owns the CLI). Exit 0 PASS / 1 parity FAIL / 2 tool error." + cmds: + - go build -o bin/parity-check ./cmd/parity-check + - ./bin/parity-check {{.CLI_ARGS}} + + parity:check-live: + desc: "Convenience: resolve the live data-plane Envoy pod from this env's labels and run parity:check in kubectl-exec mode (ext-server via selector across replicas). Extra flags pass through after --." + vars: + # In this env the data-plane proxy and the extension server live in these + # namespaces. + DP_NS: datum-downstream-gateway + DP_SELECTOR: gateway.envoyproxy.io/owning-gatewayclass=datum-downstream-gateway-e2e + EXT_NS: network-services-operator-system + EXT_SELECTOR: app.kubernetes.io/component=envoy-gateway-extension-server + # The admin side wants a single exact proxy pod name, resolved here. The + # extension-server side takes a selector and picks the authoritative replica + # itself. + DP_POD: + sh: kubectl --context {{.DOWNSTREAM_CTX}} -n datum-downstream-gateway get pods -l gateway.envoyproxy.io/owning-gatewayclass=datum-downstream-gateway-e2e -o jsonpath='{.items[0].metadata.name}' + cmds: + - go build -o bin/parity-check ./cmd/parity-check + - | + if [ -z "{{.DP_POD}}" ]; then + echo "no data-plane Envoy pod found (label {{.DP_SELECTOR}} in {{.DP_NS}}); a Gateway must exist in a watched namespace for the merged data plane to be provisioned" >&2 + exit 2 + fi + - | + ./bin/parity-check \ + --coraza-filter=coraza-waf \ + --admin-exec-pod={{.DP_POD}} --admin-exec-namespace={{.DP_NS}} --admin-exec-container=envoy --admin-exec-context={{.DOWNSTREAM_CTX}} \ + --ext-exec-selector={{.EXT_SELECTOR}} --ext-exec-namespace={{.EXT_NS}} --ext-exec-context={{.DOWNSTREAM_CTX}} \ + {{.CLI_ARGS}} diff --git a/Taskfile.yaml b/Taskfile.yaml index 45a19142..43502ef5 100644 --- a/Taskfile.yaml +++ b/Taskfile.yaml @@ -3,6 +3,8 @@ version: '3' includes: dev: taskfile: ./Taskfile.dev.yaml + test-infra: + taskfile: ./Taskfile.test-infra.yml tasks: validate-kustomizations: diff --git a/config/e2e-downstream/d1-cert-bypass/README.md b/config/e2e-downstream/d1-cert-bypass/README.md new file mode 100644 index 00000000..1923679c --- /dev/null +++ b/config/e2e-downstream/d1-cert-bypass/README.md @@ -0,0 +1,28 @@ +# Expired-certificate isolation fixture (test-env-only) + +In production, an expired or otherwise unusable TLS certificate is caught early: +the platform withholds that listener from the edge before it is ever delivered. +The extension server *also* removes unusable certificates at the edge, as a +second line of defense โ€” but because the earlier check normally catches the +problem first, that edge-side removal rarely gets exercised on the normal path. + +This fixture lets a test exercise it directly, by handing the edge a genuinely +expired certificate and bypassing the earlier check. + +1. `mint-expired-secret.sh ` writes a + self-signed, already-expired certificate as a TLS Secret to stdout. Apply it + into the `e2e-direct` namespace on the edge cluster. +2. The test then applies a gateway directly to the edge whose HTTPS listener + uses that certificate. The extension server removes the bad listener while a + healthy sibling keeps serving โ€” which is what the test asserts. + +> **Use the `e2e-direct` namespace.** The gateway controller only watches +> namespaces that already carry the `meta.datumapis.com/upstream-cluster-name` +> label when they are created; a label added afterward is not reliably picked +> up, and a gateway there can stay unprogrammed. The `e2e-direct` namespace is +> created with the label up front for exactly this reason. If you must create a +> namespace inline, set the label at creation time. + +`task -t Taskfile.test-infra.yml d1-mint-expired-secret` is a thin wrapper around +the script (defaults: `NAMESPACE=e2e-direct`, `SECRET=d1-expired-tls`, +`HOSTNAME=d1-bad.e2e.env.datum.net`). diff --git a/config/e2e-downstream/d1-cert-bypass/mint-expired-secret.sh b/config/e2e-downstream/d1-cert-bypass/mint-expired-secret.sh new file mode 100755 index 00000000..8ea5e705 --- /dev/null +++ b/config/e2e-downstream/d1-cert-bypass/mint-expired-secret.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Mint an already-expired self-signed TLS certificate and emit it as a +# kubernetes.io/tls Secret on stdout. Test-env-only: it hands the gateway an +# expired certificate directly, so the extension server's removal of unusable +# certificates can be exercised on its own, without the earlier check rejecting +# it first. +# +# Usage: mint-expired-secret.sh +set -euo pipefail + +NS="${1:?namespace required}" +SECRET="${2:?secret name required}" +HOST="${3:?hostname required}" + +WORK="$(mktemp -d)" +trap 'rm -rf "$WORK"' EXIT + +# Generate a key + a self-signed cert dated entirely in the past so it is expired +# the moment it is created. openssl's -not_before/-not_after (LibreSSL/OpenSSL 3) +# set an explicit validity window; fall back to a 1-second window via -days 0 if +# the flags are unavailable. +openssl req -x509 -newkey rsa:2048 -nodes \ + -keyout "$WORK/tls.key" -out "$WORK/tls.crt" \ + -subj "/CN=${HOST}" \ + -addext "subjectAltName=DNS:${HOST}" \ + -not_before 20200101000000Z -not_after 20200102000000Z 2>/dev/null \ + || openssl req -x509 -newkey rsa:2048 -nodes \ + -keyout "$WORK/tls.key" -out "$WORK/tls.crt" \ + -subj "/CN=${HOST}" -addext "subjectAltName=DNS:${HOST}" -days 1 2>/dev/null + +CRT_B64="$(base64 < "$WORK/tls.crt" | tr -d '\n')" +KEY_B64="$(base64 < "$WORK/tls.key" | tr -d '\n')" + +cat < + + + Service Unavailable +

This service is temporarily unavailable.

+ diff --git a/config/e2e-downstream/extserver-base/kustomization.yaml b/config/e2e-downstream/extserver-base/kustomization.yaml new file mode 100644 index 00000000..2ba7af04 --- /dev/null +++ b/config/e2e-downstream/extserver-base/kustomization.yaml @@ -0,0 +1,28 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +# Ext-server, namespaced + prefixed so the Service FQDN matches the EG +# extensionManager fqdn and the CSI dns-names baked into the base deployment. +namespace: network-services-operator-system +namePrefix: network-services-operator- + +resources: + - ../../extension-server + +patches: + - path: patches/extserver-tls.yaml + target: + kind: Deployment + name: envoy-gateway-extension-server + - path: patches/extserver-serverconfig.yaml + target: + kind: Deployment + name: envoy-gateway-extension-server + - path: patches/extserver-clientcert-issuer.yaml + target: + kind: Certificate + name: envoy-gateway-extension-server-eg-client-tls + - path: patches/extserver-ca-bundle.yaml + target: + kind: Deployment + name: envoy-gateway-extension-server diff --git a/config/e2e-downstream/extserver-base/patches/extserver-ca-bundle.yaml b/config/e2e-downstream/extserver-base/patches/extserver-ca-bundle.yaml new file mode 100644 index 00000000..6e85fc0f --- /dev/null +++ b/config/e2e-downstream/extserver-base/patches/extserver-ca-bundle.yaml @@ -0,0 +1,18 @@ +# Point the ext-server CA bundle volume at the e2e CA ConfigMap (carrying the +# ca.crt that signed the EG client cert), replacing placeholder-ca-bundle. The +# ConfigMap is published by the bring-up (test-infra:extserver-ca-bundle) from +# the e2e-extension-server-ca cert-manager secret. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: envoy-gateway-extension-server +spec: + template: + spec: + volumes: + - name: tls-ca + configMap: + name: extension-server-ca-bundle + items: + - key: ca.crt + path: ca.crt diff --git a/config/e2e-downstream/extserver-base/patches/extserver-clientcert-issuer.yaml b/config/e2e-downstream/extserver-base/patches/extserver-clientcert-issuer.yaml new file mode 100644 index 00000000..9710265e --- /dev/null +++ b/config/e2e-downstream/extserver-base/patches/extserver-clientcert-issuer.yaml @@ -0,0 +1,11 @@ +# Point the EG client cert (CN=envoy-gateway) at the e2e CA ClusterIssuer, +# replacing the base certificate's placeholder-issuer. +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: envoy-gateway-extension-server-eg-client-tls +spec: + issuerRef: + name: e2e-extension-server-ca + kind: ClusterIssuer + group: cert-manager.io diff --git a/config/e2e-downstream/extserver-base/patches/extserver-serverconfig.yaml b/config/e2e-downstream/extserver-base/patches/extserver-serverconfig.yaml new file mode 100644 index 00000000..2c9701d9 --- /dev/null +++ b/config/e2e-downstream/extserver-base/patches/extserver-serverconfig.yaml @@ -0,0 +1,26 @@ +# Mount the operator config ConfigMap and set SERVER_CONFIG to its path so the +# ext-server loads Coraza + branded-error-page settings. Strategic-merge on env +# (matched by name) and on volumes/volumeMounts. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: envoy-gateway-extension-server +spec: + template: + spec: + containers: + - name: envoy-gateway-extension-server + env: + - name: SERVER_CONFIG + value: /etc/datum/server-config/config.yaml + volumeMounts: + - name: server-config + mountPath: /etc/datum/server-config + readOnly: true + volumes: + - name: server-config + configMap: + name: extension-server-config + items: + - key: config.yaml + path: config.yaml diff --git a/config/e2e-downstream/extserver-base/patches/extserver-tls.yaml b/config/e2e-downstream/extserver-base/patches/extserver-tls.yaml new file mode 100644 index 00000000..c190ac81 --- /dev/null +++ b/config/e2e-downstream/extserver-base/patches/extserver-tls.yaml @@ -0,0 +1,21 @@ +# Point the ext-server server-cert CSI volume at the e2e CA-backed ClusterIssuer, +# replacing the base deployment's placeholder-issuer. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: envoy-gateway-extension-server +spec: + template: + spec: + volumes: + - name: tls + csi: + driver: csi.cert-manager.io + readOnly: true + volumeAttributes: + csi.cert-manager.io/issuer-kind: ClusterIssuer + csi.cert-manager.io/issuer-name: e2e-extension-server-ca + csi.cert-manager.io/common-name: envoy-gateway-extension-server + csi.cert-manager.io/dns-names: "network-services-operator-envoy-gateway-extension-server.network-services-operator-system.svc,network-services-operator-envoy-gateway-extension-server.network-services-operator-system.svc.cluster.local" + csi.cert-manager.io/key-usages: server auth + csi.cert-manager.io/fs-group: "65532" diff --git a/config/e2e-downstream/extserver-config.yaml b/config/e2e-downstream/extserver-config.yaml new file mode 100644 index 00000000..f5828a0c --- /dev/null +++ b/config/e2e-downstream/extserver-config.yaml @@ -0,0 +1,21 @@ +# Extension server config for the e2e edge. +# +# The WAF is enabled so its rules reach the proxy, and the branded 5xx page is +# pointed at the mounted error-pages volume so the suite can assert the branded +# body by content. The connector-tunnel listener name is left at its default so +# the connector fixtures find the listener by the same name production uses. +apiVersion: v1 +kind: ConfigMap +metadata: + name: extension-server-config + namespace: network-services-operator-system +data: + config.yaml: | + apiVersion: apiserver.config.datumapis.com/v1alpha1 + kind: NetworkServicesOperator + gateway: + coraza: + disabled: false + errorPage: + enabled: true + bodyPath: /etc/datum/error-pages/error-5xx.html diff --git a/config/e2e-downstream/issuer.yaml b/config/e2e-downstream/issuer.yaml new file mode 100644 index 00000000..35fc7a62 --- /dev/null +++ b/config/e2e-downstream/issuer.yaml @@ -0,0 +1,41 @@ +# Self-signed root + CA-backed ClusterIssuer for the e2e ext-server mTLS chain. +# +# This issues both sides of the EG <-> ext-server handshake: +# - the ext-server SERVER cert (via the CSI driver, see patches/extserver-tls.yaml) +# - the EG CLIENT cert (envoy-gateway-extension-server-eg-client-tls, CN=envoy-gateway) +# The CA's ca.crt is also published into the ext-server CA bundle ConfigMap +# (so the ext-server can verify the EG client) and into the EG certificateRef +# secret (so EG can verify the ext-server server cert). +--- +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: e2e-extension-server-selfsigned +spec: + selfSigned: {} +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: e2e-extension-server-ca + namespace: cert-manager +spec: + isCA: true + commonName: e2e-extension-server-ca + secretName: e2e-extension-server-ca + duration: 8760h + privateKey: + algorithm: ECDSA + size: 256 + issuerRef: + name: e2e-extension-server-selfsigned + kind: ClusterIssuer + group: cert-manager.io +--- +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: e2e-extension-server-ca +spec: + ca: + secretName: e2e-extension-server-ca diff --git a/config/e2e-downstream/kustomization.yaml b/config/e2e-downstream/kustomization.yaml new file mode 100644 index 00000000..6ab183ae --- /dev/null +++ b/config/e2e-downstream/kustomization.yaml @@ -0,0 +1,38 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +# The e2e edge overlay applied to nso-downstream. Composes the ext-server +# (prefixed/namespaced via extserver-base) with the e2e mTLS issuer chain, the +# Coraza/branded-page server-config, the branded error page, and the test +# EnvoyProxy (real Coraza WAF image + admin :19000) and its GatewayClass. +# +# The ConfigMaps / issuer / EnvoyProxy / GatewayClass are kept OUT of the +# name-prefix so the deployment's literal references (envoy-error-pages, +# extension-server-config) and the EG fqdn resolve unchanged. +resources: +- namespace.yaml +- extserver-base +- issuer.yaml +- extserver-config.yaml +- error-pages.yaml +- envoyproxy.yaml + +# The NSO image tag is set by the bring-up (test-infra:extension-server) to the +# git-SHA-built image. +images: +- name: ghcr.io/datum-cloud/network-services-operator + newName: ghcr.io/datum-cloud/network-services-operator + newTag: 63fa912 + +# The default e2e path has no Prometheus operator; drop the ServiceMonitor so +# the apply doesn't fail on the missing monitoring.coreos.com CRD. (Re-add via +# OBSERVABILITY=1 when the Flux observability stack is enabled.) +patches: +- patch: | + $patch: delete + apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + name: envoy-gateway-extension-server-metrics + target: + kind: ServiceMonitor diff --git a/config/e2e-downstream/namespace.yaml b/config/e2e-downstream/namespace.yaml new file mode 100644 index 00000000..b27c33c1 --- /dev/null +++ b/config/e2e-downstream/namespace.yaml @@ -0,0 +1,6 @@ +# The ext-server namespace on the edge cluster. The base ext-server resources +# target this namespace but do not create it. +apiVersion: v1 +kind: Namespace +metadata: + name: network-services-operator-system diff --git a/config/tools/envoy-gateway-downstream/kustomization.yaml b/config/tools/envoy-gateway-downstream/kustomization.yaml index d6cce819..9f5a2a92 100644 --- a/config/tools/envoy-gateway-downstream/kustomization.yaml +++ b/config/tools/envoy-gateway-downstream/kustomization.yaml @@ -6,7 +6,7 @@ helmCharts: includeCRDs: false namespace: datum-downstream-gateway releaseName: envoy-datum-downstream-gateway - version: v1.8.1 + version: v1.7.4 repo: oci://docker.io/envoyproxy valuesInline: config: @@ -29,6 +29,10 @@ helmCharts: - key: meta.datumapis.com/upstream-cluster-name operator: Exists extensionManager: + # Match the ext-server's compiled-in 256 MiB gRPC ceiling. Without + # this the EG side defaults to ~4 MiB and silently freezes xDS once a + # translated snapshot exceeds it (~540 gateways in prod). + maxMessageSize: 256Mi policyResources: - group: networking.datumapis.com version: v1alpha @@ -43,9 +47,11 @@ helmCharts: port: 5005 tls: certificateRef: - # Placeholder โ€” an overlay must patch this to the Secret holding the CA that issued the server cert. - name: placeholder-ca - namespace: placeholder-namespace + # e2e: the Secret holding the CA that issued the ext-server + # server cert. Published by the bring-up (test-infra:extserver-ca-bundle) + # from the e2e-extension-server-ca cert-manager secret. + name: e2e-extension-server-ca + namespace: network-services-operator-system clientCertificateRef: name: envoy-gateway-extension-server-eg-client-tls namespace: network-services-operator-system diff --git a/config/tools/kind/downstream-cluster.yaml b/config/tools/kind/downstream-cluster.yaml new file mode 100644 index 00000000..6339242a --- /dev/null +++ b/config/tools/kind/downstream-cluster.yaml @@ -0,0 +1,14 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +name: nso-downstream +networking: + ipFamily: dual +nodes: +- role: control-plane + extraPortMappings: + - containerPort: 30080 + hostPort: 30080 + protocol: TCP + - containerPort: 30443 + hostPort: 30443 + protocol: TCP diff --git a/config/tools/kind/upstream-cluster.yaml b/config/tools/kind/upstream-cluster.yaml new file mode 100644 index 00000000..be723669 --- /dev/null +++ b/config/tools/kind/upstream-cluster.yaml @@ -0,0 +1,15 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +name: nso-upstream +networking: + ipFamily: dual +nodes: +- role: control-plane + # Expose the Karmada apiserver NodePort (32443) to the host so karmadactl โ€” + # which runs on the host โ€” can reach the Karmada apiserver during init/join. + # On macOS the kind docker-network IP is not host-routable, so without this + # mapping karmadactl times out dialing the advertised node IP:32443. + extraPortMappings: + - containerPort: 32443 + hostPort: 32443 + protocol: TCP From d2e62bff866781c6b5abfe4b01d8d6e8fb38c713 Mon Sep 17 00:00:00 2001 From: Scot Wells Date: Thu, 25 Jun 2026 16:41:26 -0500 Subject: [PATCH 2/2] Enable the programmed-set endpoint in the test environment The endpoint that lets the parity test confirm the proxy is running exactly the set the build intended is off by default and served only when asked for. The base deployment reads --enable-programmed-set from the ENABLE_PROGRAMMED_SET env var; flip it to "true" in the downstream edge overlay with a strategic-merge patch on env so the test environment keeps that visibility while production exposes nothing. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01JbCy8vy66RdNYzGSgqH6P6 --- .../extserver-base/kustomization.yaml | 4 ++++ .../patches/extserver-programmed-set.yaml | 17 +++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 config/e2e-downstream/extserver-base/patches/extserver-programmed-set.yaml diff --git a/config/e2e-downstream/extserver-base/kustomization.yaml b/config/e2e-downstream/extserver-base/kustomization.yaml index 2ba7af04..14e55681 100644 --- a/config/e2e-downstream/extserver-base/kustomization.yaml +++ b/config/e2e-downstream/extserver-base/kustomization.yaml @@ -26,3 +26,7 @@ patches: target: kind: Deployment name: envoy-gateway-extension-server + - path: patches/extserver-programmed-set.yaml + target: + kind: Deployment + name: envoy-gateway-extension-server diff --git a/config/e2e-downstream/extserver-base/patches/extserver-programmed-set.yaml b/config/e2e-downstream/extserver-base/patches/extserver-programmed-set.yaml new file mode 100644 index 00000000..aa4ebfc4 --- /dev/null +++ b/config/e2e-downstream/extserver-base/patches/extserver-programmed-set.yaml @@ -0,0 +1,17 @@ +# Turn on the read-only /debug/programmed-set endpoint so the parity test can +# confirm the proxy is running exactly the set the build intended. The base +# deployment reads --enable-programmed-set from this env var, defaulting off in +# production; flip it to "true" here for the test environment. Strategic-merge +# on env (matched by name). +apiVersion: apps/v1 +kind: Deployment +metadata: + name: envoy-gateway-extension-server +spec: + template: + spec: + containers: + - name: envoy-gateway-extension-server + env: + - name: ENABLE_PROGRAMMED_SET + value: "true"