diff --git a/config/federation/README.md b/config/federation/README.md new file mode 100644 index 00000000..eb464295 --- /dev/null +++ b/config/federation/README.md @@ -0,0 +1,41 @@ +# Federation + +The Datum edge runs across many clusters in many regions. Customers, though, +work against a single control plane: they create a Gateway, a route, or a +firewall policy in one place. **Federation is what carries that intent out to +the edge clusters that actually serve traffic.** + +This directory holds the federation configuration the test environment applies, +mirrored from production so the test edge fans configuration out the same way +the real one does. + +## Why it's tested as its own concern + +For most of this system's history, the test environment copied configuration +between clusters with a simple direct mechanism — nothing like production. But +several real incidents lived specifically in the federation layer: some +information (a backend's online/offline status) is intentionally *not* carried +to the edge, and the timing of cross-cluster delivery created races. None of +that is visible unless the test edge federates the way production does. + +So the production-fidelity environment stands up real federation and proves the +thing customers depend on: **configuration created in the control plane actually +arrives at the edge.** The test confirms a change made centrally shows up on a +downstream cluster within seconds. + +## What's here + +- A propagation policy describing *which* resources travel to the edge. +- Interpreter rules describing *how* each resource type is carried — including + the deliberate choice to propagate configuration but not live status, which is + the behavior that caused real "false offline" incidents and is now exercised + directly. + +## Implementation + +Federation is implemented with [Karmada](https://karmada.io/). The directory is +named for the responsibility — fanning configuration out to the edge — rather +than the tool, so the intent stays clear even if the underlying mechanism +changes. The environment that applies these artifacts is described in +[`Taskfile.test-infra.yml`](../../Taskfile.test-infra.yml) (`task +test-infra:karmada-up`). diff --git a/config/federation/clusterpropagationpolicy.yaml b/config/federation/clusterpropagationpolicy.yaml new file mode 100644 index 00000000..5299bb3b --- /dev/null +++ b/config/federation/clusterpropagationpolicy.yaml @@ -0,0 +1,158 @@ +apiVersion: policy.karmada.io/v1alpha1 +kind: ClusterPropagationPolicy +metadata: + name: nso-resources +spec: + conflictResolution: Overwrite + placement: + clusterAffinities: + - affinityName: gateway-enabled + labelSelector: + matchExpressions: + - key: infra.datum.net/gateways + operator: In + values: + - enabled + resourceSelectors: + - apiVersion: v1 + kind: Namespace + labelSelector: + matchExpressions: + - key: meta.datumapis.com/upstream-namespace + operator: Exists + - apiVersion: v1 + kind: ConfigMap + labelSelector: + matchExpressions: + - key: meta.datumapis.com/upstream-namespace + operator: Exists + - apiVersion: v1 + kind: Secret + labelSelector: + matchExpressions: + - key: meta.datumapis.com/upstream-namespace + operator: Exists + # TODO(jreese) clean up dupe secret policies + - apiVersion: v1 + kind: Secret + labelSelector: + matchExpressions: + - key: meta.datumapis.com/upstream-cluster-name + operator: Exists + - apiVersion: v1 + kind: Secret + labelSelector: + matchExpressions: + - key: cert-manager.io/issuer-name + operator: In + values: + - nso-gateway + - apiVersion: discovery.k8s.io/v1 + kind: EndpointSlice + labelSelector: + matchExpressions: + - key: meta.datumapis.com/upstream-cluster-name + operator: Exists + - apiVersion: v1 + kind: Service + # TODO(jreese) get labels on these patch policies + # labelSelector: + # matchExpressions: + # - key: meta.datumapis.com/upstream-cluster-name + # operator: Exists + + # Gateway API + - apiVersion: gateway.networking.k8s.io/v1 + kind: Gateway + labelSelector: + matchExpressions: + - key: meta.datumapis.com/upstream-cluster-name + operator: Exists + - apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute + labelSelector: + matchExpressions: + - key: meta.datumapis.com/upstream-cluster-name + operator: Exists + - apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute + labelSelector: + matchExpressions: + - key: meta.datumapis.com/http01-solver + operator: Exists + - apiVersion: gateway.networking.k8s.io/v1 + kind: BackendTLSPolicy + # TODO(jreese) get labels on these when they are created by the httpproxy + # controller + # labelSelector: + # matchExpressions: + # - key: meta.datumapis.com/upstream-cluster-name + # operator: Exists + + # Envoy Gateway API Extensions + - apiVersion: gateway.envoyproxy.io/v1alpha1 + kind: Backend + labelSelector: + matchExpressions: + - key: meta.datumapis.com/upstream-cluster-name + operator: Exists + - apiVersion: gateway.envoyproxy.io/v1alpha1 + kind: BackendTrafficPolicy + labelSelector: + matchExpressions: + - key: meta.datumapis.com/upstream-cluster-name + operator: Exists + - apiVersion: gateway.envoyproxy.io/v1alpha1 + kind: HTTPRouteFilter + labelSelector: + matchExpressions: + - key: meta.datumapis.com/upstream-cluster-name + operator: Exists + - apiVersion: gateway.envoyproxy.io/v1alpha1 + kind: HTTPRouteFilter + labelSelector: + matchExpressions: + - key: meta.datumapis.com/http01-solver + operator: Exists + - apiVersion: gateway.envoyproxy.io/v1alpha1 + kind: SecurityPolicy + labelSelector: + matchExpressions: + - key: meta.datumapis.com/upstream-cluster-name + operator: Exists + - apiVersion: gateway.envoyproxy.io/v1alpha1 + kind: EnvoyPatchPolicy + # TODO(jreese) get labels on these patch policies + # labelSelector: + # matchExpressions: + # - key: meta.datumapis.com/upstream-cluster-name + # operator: Exists + + # Network Services Operator CRDs (replicated for the extension server) + - apiVersion: networking.datumapis.com/v1alpha1 + kind: Connector + labelSelector: + matchExpressions: + - key: meta.datumapis.com/upstream-cluster-name + operator: Exists + - apiVersion: networking.datumapis.com/v1alpha + kind: TrafficProtectionPolicy + labelSelector: + matchExpressions: + - key: meta.datumapis.com/upstream-cluster-name + operator: Exists + - apiVersion: networking.datumapis.com/v1alpha + kind: HTTPProxy + labelSelector: + matchExpressions: + - key: meta.datumapis.com/upstream-cluster-name + operator: Exists + + # External DNS + - apiVersion: externaldns.k8s.io/v1alpha1 + kind: DNSEndpoint + # TODO(jreese) get labels on these + # labelSelector: + # matchExpressions: + # - key: meta.datumapis.com/upstream-cluster-name + # operator: Exists diff --git a/config/federation/resourceinterpreters.yaml b/config/federation/resourceinterpreters.yaml new file mode 100644 index 00000000..452a19ad --- /dev/null +++ b/config/federation/resourceinterpreters.yaml @@ -0,0 +1,234 @@ +# Future test coverage for resource interpreters can leverage the test framework +# once it's merged. +# +# See: https://github.com/karmada-io/karmada/pull/6938 +--- +apiVersion: config.karmada.io/v1alpha1 +kind: ResourceInterpreterCustomization +metadata: + name: gateway.networking.k8s.io-gateway +spec: + target: + apiVersion: gateway.networking.k8s.io/v1 + kind: Gateway + customizations: + statusAggregation: + luaScript: > + function AggregateStatus(desiredObj, statusItems) + if statusItems == nil or #statusItems == 0 then + return desiredObj + end + if desiredObj.status == nil then + desiredObj.status = {} + end + + local item = statusItems[1] + if item == nil or item.status == nil then + return desiredObj + end + + -- TODO(jreese) implement proper aggregation logic. Would be good to + -- think through how to represent propagation status across clusters. + if item.status.addresses ~= nil then + desiredObj.status.addresses = item.status.addresses + end + if item.status.conditions ~= nil then + desiredObj.status.conditions = item.status.conditions + end + if item.status.listeners ~= nil then + desiredObj.status.listeners = item.status.listeners + end + return desiredObj + end +--- +apiVersion: config.karmada.io/v1alpha1 +kind: ResourceInterpreterCustomization +metadata: + name: gateway.networking.k8s.io-httproute +spec: + target: + apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute + customizations: + statusAggregation: + luaScript: > + function AggregateStatus(desiredObj, statusItems) + if statusItems == nil or #statusItems == 0 then + return desiredObj + end + if desiredObj.status == nil then + desiredObj.status = {} + end + + local item = statusItems[1] + if item == nil or item.status == nil then + return desiredObj + end + + -- TODO(jreese) implement proper aggregation logic. Would be good to + -- think through how to represent propagation status across clusters. + if item.status.parents ~= nil then + desiredObj.status.parents = item.status.parents + end + return desiredObj + end +--- +apiVersion: config.karmada.io/v1alpha1 +kind: ResourceInterpreterCustomization +metadata: + name: gateway.networking.k8s.io-backendtlspolicy +spec: + target: + apiVersion: gateway.networking.k8s.io/v1 + kind: BackendTLSPolicy + customizations: + statusAggregation: + luaScript: > + function AggregateStatus(desiredObj, statusItems) + if statusItems == nil or #statusItems == 0 then + return desiredObj + end + if desiredObj.status == nil then + desiredObj.status = {} + end + + local item = statusItems[1] + if item == nil or item.status == nil then + return desiredObj + end + + -- TODO(jreese) implement proper aggregation logic. Would be good to + -- think through how to represent propagation status across clusters. + if item.status.ancestors ~= nil then + desiredObj.status.ancestors = item.status.ancestors + end + return desiredObj + end +--- +apiVersion: config.karmada.io/v1alpha1 +kind: ResourceInterpreterCustomization +metadata: + name: gateway.envoyproxy.io-backend +spec: + target: + apiVersion: gateway.envoyproxy.io/v1alpha1 + kind: Backend + customizations: + statusAggregation: + luaScript: > + function AggregateStatus(desiredObj, statusItems) + if statusItems == nil or #statusItems == 0 then + return desiredObj + end + if desiredObj.status == nil then + desiredObj.status = {} + end + + local item = statusItems[1] + if item == nil or item.status == nil then + return desiredObj + end + + -- TODO(jreese) implement proper aggregation logic. Would be good to + -- think through how to represent propagation status across clusters. + if item.status.conditions ~= nil then + desiredObj.status.conditions = item.status.conditions + end + return desiredObj + end +--- +apiVersion: config.karmada.io/v1alpha1 +kind: ResourceInterpreterCustomization +metadata: + name: gateway.envoyproxy.io-backendtrafficpolicy +spec: + target: + apiVersion: gateway.envoyproxy.io/v1alpha1 + kind: BackendTrafficPolicy + customizations: + statusAggregation: + luaScript: > + function AggregateStatus(desiredObj, statusItems) + if statusItems == nil or #statusItems == 0 then + return desiredObj + end + if desiredObj.status == nil then + desiredObj.status = {} + end + + local item = statusItems[1] + if item == nil or item.status == nil then + return desiredObj + end + + -- TODO(jreese) implement proper aggregation logic. Would be good to + -- think through how to represent propagation status across clusters. + if item.status.ancestors ~= nil then + desiredObj.status.ancestors = item.status.ancestors + end + return desiredObj + end +--- +apiVersion: config.karmada.io/v1alpha1 +kind: ResourceInterpreterCustomization +metadata: + name: gateway.envoyproxy.io-envoypatchpolicy +spec: + target: + apiVersion: gateway.envoyproxy.io/v1alpha1 + kind: EnvoyPatchPolicy + customizations: + statusAggregation: + luaScript: > + function AggregateStatus(desiredObj, statusItems) + if statusItems == nil or #statusItems == 0 then + return desiredObj + end + if desiredObj.status == nil then + desiredObj.status = {} + end + + local item = statusItems[1] + if item == nil or item.status == nil then + return desiredObj + end + + -- TODO(jreese) implement proper aggregation logic. Would be good to + -- think through how to represent propagation status across clusters. + if item.status.ancestors ~= nil then + desiredObj.status.ancestors = item.status.ancestors + end + return desiredObj + end +--- +apiVersion: config.karmada.io/v1alpha1 +kind: ResourceInterpreterCustomization +metadata: + name: externaldns.k8s.io-dnsendpoint +spec: + target: + apiVersion: externaldns.k8s.io/v1alpha1 + kind: DNSEndpoint + customizations: + statusAggregation: + luaScript: > + function AggregateStatus(desiredObj, statusItems) + if statusItems == nil or #statusItems == 0 then + return desiredObj + end + if desiredObj.status == nil then + desiredObj.status = {} + end + + local item = statusItems[1] + if item == nil or item.status == nil then + return desiredObj + end + + -- TODO(jreese) implement proper aggregation logic. Would be good to + -- think through how to represent propagation status across clusters. + if item.status.observedGeneration ~= nil then + desiredObj.status.observedGeneration = item.status.observedGeneration + end + return desiredObj + end