From 40d5e9e3e29f6b583c011d630a3533e8b04093c9 Mon Sep 17 00:00:00 2001 From: Ali Syed Date: Wed, 25 Mar 2026 09:50:16 +0000 Subject: [PATCH 1/5] feat: Expose ExternalDNS operand metrics via kube-rbac-proxy sidecar ExternalDNS operand pods bind metrics to 127.0.0.1, making them inaccessible from outside the pod. Prometheus cannot scrape them. This adds a kube-rbac-proxy sidecar to each operand deployment that proxies the localhost metrics over HTTPS, along with a Service (annotated for OpenShift serving cert) and ServiceMonitor for automatic Prometheus scraping. Resolves: OCPBUGS-58102 Assisted with Claude --- ...al-dns-operator.clusterserviceversion.yaml | 3 + config/rbac/operand_role.yaml | 12 + main.go | 1 + pkg/operator/config/config.go | 5 + .../controller/externaldns/controller.go | 30 ++- .../controller/externaldns/deployment.go | 60 ++++- .../controller/externaldns/deployment_test.go | 20 +- pkg/operator/controller/externaldns/pod.go | 102 +++++++ .../controller/externaldns/pod_test.go | 249 ++++++++++++++++++ .../controller/externaldns/service.go | 142 ++++++++++ .../controller/externaldns/service_test.go | 171 ++++++++++++ .../controller/externaldns/servicemonitor.go | 165 ++++++++++++ .../externaldns/servicemonitor_test.go | 197 ++++++++++++++ pkg/operator/controller/names.go | 15 ++ pkg/operator/operator.go | 15 +- 15 files changed, 1158 insertions(+), 29 deletions(-) create mode 100644 pkg/operator/controller/externaldns/service.go create mode 100644 pkg/operator/controller/externaldns/service_test.go create mode 100644 pkg/operator/controller/externaldns/servicemonitor.go create mode 100644 pkg/operator/controller/externaldns/servicemonitor_test.go diff --git a/bundle/manifests/external-dns-operator.clusterserviceversion.yaml b/bundle/manifests/external-dns-operator.clusterserviceversion.yaml index b63ca08d7..8b3c02629 100644 --- a/bundle/manifests/external-dns-operator.clusterserviceversion.yaml +++ b/bundle/manifests/external-dns-operator.clusterserviceversion.yaml @@ -470,6 +470,7 @@ spec: - --operator-namespace=$(OPERATOR_NAMESPACE) - --operand-namespace=$(OPERATOR_NAMESPACE) - --externaldns-image=$(RELATED_IMAGE_EXTERNAL_DNS) + - --kube-rbac-proxy-image=$(RELATED_IMAGE_KUBE_RBAC_PROXY) - --trusted-ca-configmap=$(TRUSTED_CA_CONFIGMAP_NAME) - --leader-elect - --webhook-disable-http2 @@ -480,6 +481,8 @@ spec: fieldPath: metadata.namespace - name: RELATED_IMAGE_EXTERNAL_DNS value: quay.io/external-dns-operator/external-dns:latest + - name: RELATED_IMAGE_KUBE_RBAC_PROXY + value: quay.io/openshift/origin-kube-rbac-proxy:latest - name: TRUSTED_CA_CONFIGMAP_NAME image: quay.io/openshift/origin-external-dns-operator:latest name: external-dns-operator diff --git a/config/rbac/operand_role.yaml b/config/rbac/operand_role.yaml index 5022a8ec8..30da38b3a 100644 --- a/config/rbac/operand_role.yaml +++ b/config/rbac/operand_role.yaml @@ -31,3 +31,15 @@ rules: - get - watch - list + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/main.go b/main.go index fa4fbaf00..6c7208bdf 100644 --- a/main.go +++ b/main.go @@ -42,6 +42,7 @@ func main() { flag.StringVar(&opCfg.OperatorNamespace, "operator-namespace", operatorconfig.DefaultOperatorNamespace, "The namespace that the operator is running in.") flag.StringVar(&opCfg.OperandNamespace, "operand-namespace", operatorconfig.DefaultOperandNamespace, "The namespace that ExternalDNS containers should run in.") flag.StringVar(&opCfg.ExternalDNSImage, "externaldns-image", operatorconfig.DefaultExternalDNSImage, "The container image used for running ExternalDNS.") + flag.StringVar(&opCfg.KubeRBACProxyImage, "kube-rbac-proxy-image", operatorconfig.DefaultKubeRBACProxyImage, "The container image used for the kube-rbac-proxy metrics sidecar.") flag.StringVar(&opCfg.CertDir, "cert-dir", operatorconfig.DefaultCertDir, "The directory for keys and certificates for serving the webhook.") flag.StringVar(&opCfg.TrustedCAConfigMapName, "trusted-ca-configmap", operatorconfig.DefaultTrustedCAConfigMapName, "The name of the config map containing TLS CA(s) which should be trusted by ExternalDNS containers. PEM encoded file under \"ca-bundle.crt\" key is expected.") flag.BoolVar(&opCfg.EnableWebhook, "enable-webhook", operatorconfig.DefaultEnableWebhook, "Enable the validating webhook server. Defaults to true.") diff --git a/pkg/operator/config/config.go b/pkg/operator/config/config.go index aa6e85f36..8aabca6d4 100644 --- a/pkg/operator/config/config.go +++ b/pkg/operator/config/config.go @@ -35,6 +35,7 @@ import ( const ( DefaultExternalDNSImage = "quay.io/external-dns-operator/external-dns:latest" + DefaultKubeRBACProxyImage = "quay.io/openshift/origin-kube-rbac-proxy:latest" DefaultMetricsAddr = "127.0.0.1:8080" DefaultOperatorNamespace = "external-dns-operator" DefaultOperandNamespace = "external-dns" @@ -59,6 +60,10 @@ type Config struct { // by the operator. ExternalDNSImage string + // KubeRBACProxyImage is the kube-rbac-proxy image for the metrics sidecar container + // in the ExternalDNS operand deployment. + KubeRBACProxyImage string + // MetricsBindAddress is the TCP address that the operator should bind to for // serving prometheus metrics. It can be set to "0" to disable the metrics serving. MetricsBindAddress string diff --git a/pkg/operator/controller/externaldns/controller.go b/pkg/operator/controller/externaldns/controller.go index eb1043900..557cb8873 100644 --- a/pkg/operator/controller/externaldns/controller.go +++ b/pkg/operator/controller/externaldns/controller.go @@ -50,6 +50,8 @@ type Config struct { Namespace string // Image is the ExternalDNS image to use. Image string + // KubeRBACProxyImage is the kube-rbac-proxy image for the metrics sidecar. + KubeRBACProxyImage string // OperatorNamespace is the namespace in which this operator is deployed. OperatorNamespace string // IsOpenShift is the flag which instructs the operator that it runs in OpenShift. @@ -102,6 +104,10 @@ func New(mgr manager.Manager, cfg Config) (controller.Controller, error) { return nil, err } + if err := c.Watch(source.Kind[client.Object](operatorCache, &corev1.Service{}, handler.EnqueueRequestForOwner(operatorScheme, operatorRESTMapper, &operatorv1beta1.ExternalDNS{}, handler.OnlyControllerOwner()))); err != nil { + return nil, err + } + // secret replicated by the credentials controller // needs to trigger the reconciliation of the corresponding ExternalDNS // because of the annotation with the secret's hash in the operand deployment @@ -207,11 +213,33 @@ func (r *reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu trustCAConfigMap = configMap } - _, currentDeployment, err := r.ensureExternalDNSDeployment(ctx, r.config.Namespace, r.config.Image, sa, credSecret, trustCAConfigMap, externalDNS) + _, currentDeployment, err := r.ensureExternalDNSDeployment(ctx, r.config.Namespace, r.config.Image, r.config.KubeRBACProxyImage, sa, credSecret, trustCAConfigMap, externalDNS) if err != nil { return reconcile.Result{}, fmt.Errorf("failed to ensure externalDNS deployment: %w", err) } + // Ensure metrics service and service monitor for Prometheus scraping. + // If kube-rbac-proxy image is not configured, clean up any existing metrics resources. + if r.config.KubeRBACProxyImage != "" { + if err := r.ensureExternalDNSMetricsService(ctx, r.config.Namespace, externalDNS); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to ensure externalDNS metrics service: %w", err) + } + if r.config.IsOpenShift { + if err := r.ensureExternalDNSServiceMonitor(ctx, r.config.Namespace, externalDNS); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to ensure externalDNS service monitor: %w", err) + } + } + } else { + if err := r.deleteExternalDNSMetricsService(ctx, r.config.Namespace, externalDNS); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to delete externalDNS metrics service: %w", err) + } + if r.config.IsOpenShift { + if err := r.deleteExternalDNSServiceMonitor(ctx, r.config.Namespace, externalDNS); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to delete externalDNS service monitor: %w", err) + } + } + } + if err := r.updateExternalDNSStatus(ctx, externalDNS, currentDeployment, true); err != nil { return reconcile.Result{}, fmt.Errorf("failed to update externalDNS custom resource %s: %w", externalDNS.Name, err) } diff --git a/pkg/operator/controller/externaldns/deployment.go b/pkg/operator/controller/externaldns/deployment.go index 3d9c80cd9..01c16d847 100644 --- a/pkg/operator/controller/externaldns/deployment.go +++ b/pkg/operator/controller/externaldns/deployment.go @@ -77,6 +77,7 @@ var sourceStringTable = map[operatorv1beta1.ExternalDNSSourceType]string{ type deploymentConfig struct { namespace string image string + kubeRBACProxyImage string serviceAccount *corev1.ServiceAccount externalDNS *operatorv1beta1.ExternalDNS isOpenShift bool @@ -89,7 +90,7 @@ type deploymentConfig struct { // ensureExternalDNSDeployment ensures that the externalDNS deployment exists. // Returns a Boolean value indicating whether the deployment exists, a pointer to the deployment, and an error when relevant. -func (r *reconciler) ensureExternalDNSDeployment(ctx context.Context, namespace, image string, serviceAccount *corev1.ServiceAccount, credSecret *corev1.Secret, trustCAConfigMap *corev1.ConfigMap, externalDNS *operatorv1beta1.ExternalDNS) (bool, *appsv1.Deployment, error) { +func (r *reconciler) ensureExternalDNSDeployment(ctx context.Context, namespace, image, kubeRBACProxyImage string, serviceAccount *corev1.ServiceAccount, credSecret *corev1.Secret, trustCAConfigMap *corev1.ConfigMap, externalDNS *operatorv1beta1.ExternalDNS) (bool, *appsv1.Deployment, error) { nsName := types.NamespacedName{Namespace: namespace, Name: controller.ExternalDNSResourceName(externalDNS)} // build credentials secret's hash @@ -109,16 +110,17 @@ func (r *reconciler) ensureExternalDNSDeployment(ctx context.Context, namespace, } desired, err := desiredExternalDNSDeployment(&deploymentConfig{ - namespace, - image, - serviceAccount, - externalDNS, - r.config.IsOpenShift, - r.config.PlatformStatus, - credSecret.Name, - credSecretHash, - trustCAConfigMapName, - trustCAConfigMapHash, + namespace: namespace, + image: image, + kubeRBACProxyImage: kubeRBACProxyImage, + serviceAccount: serviceAccount, + externalDNS: externalDNS, + isOpenShift: r.config.IsOpenShift, + platformStatus: r.config.PlatformStatus, + secret: credSecret.Name, + secretHash: credSecretHash, + trustedCAConfigMapName: trustCAConfigMapName, + trustedCAConfigMapHash: trustCAConfigMapHash, }) if err != nil { return false, nil, fmt.Errorf("failed to build externalDNS deployment: %w", err) @@ -296,6 +298,17 @@ func desiredExternalDNSDeployment(cfg *deploymentConfig) (*appsv1.Deployment, er depl.Spec.Template.Spec.Containers = append(depl.Spec.Template.Spec.Containers, *container) } } + // Add kube-rbac-proxy sidecar(s) and metrics cert volume for secure metrics exposure. + // One sidecar per zone container, each proxying the corresponding metrics port. + if cfg.kubeRBACProxyImage != "" { + for i := 0; i < cbld.counter; i++ { + proxyContainer := kubeRBACProxyContainer(cfg.kubeRBACProxyImage, i) + depl.Spec.Template.Spec.Containers = append(depl.Spec.Template.Spec.Containers, proxyContainer) + } + certVolume := metricsCertVolume(controller.ExternalDNSMetricsSecretName(cfg.externalDNS)) + depl.Spec.Template.Spec.Volumes = append(depl.Spec.Template.Spec.Volumes, certVolume) + } + return depl, nil } @@ -418,6 +431,10 @@ func externalDNSContainersChanged(current, expected, updated *appsv1.Deployment) updated.Spec.Template.Spec.Containers[currCont.Index].SecurityContext = updatedContext changed = true } + if !equalContainerPorts(currCont.Ports, expCont.Ports) { + updated.Spec.Template.Spec.Containers[currCont.Index].Ports = expCont.Ports + changed = true + } } else { // expected container is not present - add it updated.Spec.Template.Spec.Containers = append(updated.Spec.Template.Spec.Containers, expCont.Container) @@ -701,6 +718,27 @@ func securityContextChanged(current, updated, desired *corev1.SecurityContext) ( return changed, updated } +// equalContainerPorts returns true if 2 container port slices have the same content. +func equalContainerPorts(current, expected []corev1.ContainerPort) bool { + if len(current) != len(expected) { + return false + } + currentMap := map[string]corev1.ContainerPort{} + for _, p := range current { + currentMap[p.Name] = p + } + for _, ep := range expected { + cp, found := currentMap[ep.Name] + if !found { + return false + } + if cp.ContainerPort != ep.ContainerPort || cp.Protocol != ep.Protocol { + return false + } + } + return true +} + func equalBoolPtr(current, desired *bool) bool { if desired == nil { return true diff --git a/pkg/operator/controller/externaldns/deployment_test.go b/pkg/operator/controller/externaldns/deployment_test.go index e69b99705..009049193 100644 --- a/pkg/operator/controller/externaldns/deployment_test.go +++ b/pkg/operator/controller/externaldns/deployment_test.go @@ -3845,15 +3845,15 @@ func TestDesiredExternalDNSDeployment(t *testing.T) { } }() depl, err := desiredExternalDNSDeployment(&deploymentConfig{ - test.OperandNamespace, - test.OperandImage, - serviceAccount, - tc.inputExternalDNS, - tc.inputIsOpenShift, - tc.inputPlatformStatus, - tc.inputSecretName, - testSecretHash, - tc.inputTrustedCAConfigMapName, "", + namespace: test.OperandNamespace, + image: test.OperandImage, + serviceAccount: serviceAccount, + externalDNS: tc.inputExternalDNS, + isOpenShift: tc.inputIsOpenShift, + platformStatus: tc.inputPlatformStatus, + secret: tc.inputSecretName, + secretHash: testSecretHash, + trustedCAConfigMapName: tc.inputTrustedCAConfigMapName, }) if err != nil { t.Errorf("expected no error from calling desiredExternalDNSDeployment, but received %v", err) @@ -5999,7 +5999,7 @@ func TestEnsureExternalDNSDeployment(t *testing.T) { log: zap.New(zap.UseDevMode(true)), } - gotExist, gotDepl, err := r.ensureExternalDNSDeployment(context.TODO(), test.OperandNamespace, test.OperandImage, serviceAccount, tc.credSecret, tc.trustCAConfigMap, &tc.extDNS) + gotExist, gotDepl, err := r.ensureExternalDNSDeployment(context.TODO(), test.OperandNamespace, test.OperandImage, "", serviceAccount, tc.credSecret, tc.trustCAConfigMap, &tc.extDNS) if err != nil { if !tc.errExpected { t.Fatalf("unexpected error received: %v", err) diff --git a/pkg/operator/controller/externaldns/pod.go b/pkg/operator/controller/externaldns/pod.go index be6ab44eb..126517322 100644 --- a/pkg/operator/controller/externaldns/pod.go +++ b/pkg/operator/controller/externaldns/pod.go @@ -24,6 +24,7 @@ import ( "strings" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/ptr" @@ -57,6 +58,16 @@ const ( // all capabilities in the container security context allCapabilities = "ALL" // + // kube-rbac-proxy metrics sidecar + // + kubeRBACProxyContainerName = "kube-rbac-proxy" + kubeRBACProxySecurePort = 8443 + kubeRBACProxyPortName = "https" + metricsCertVolumeName = "metrics-cert" + metricsCertMountPath = "/var/run/secrets/serving-cert" + metricsCertTLSCertFile = metricsCertMountPath + "/tls.crt" + metricsCertTLSKeyFile = metricsCertMountPath + "/tls.key" + // // AWS // awsCredentialEnvVarName = "AWS_SHARED_CREDENTIALS_FILE" @@ -686,3 +697,94 @@ func (b *externalDNSVolumeBuilder) bluecatVolumes() []corev1.Volume { func addTXTPrefixFlag(args []string) []string { return append(args, fmt.Sprintf("--txt-prefix=%s", defaultTXTRecordPrefix)) } + +// numMetricsPorts returns the number of metrics ports needed for the given ExternalDNS instance. +// This mirrors the zone container creation logic in desiredExternalDNSDeployment. +func numMetricsPorts(externalDNS *operatorv1beta1.ExternalDNS) int { + if len(externalDNS.Spec.Zones) == 0 { + if externalDNS.Spec.Provider.Type == operatorv1beta1.ProviderTypeAzure { + return 2 + } + return 1 + } + return len(externalDNS.Spec.Zones) +} + +// kubeRBACProxyPortNameForSeq returns the port name for the kube-rbac-proxy sidecar +// at the given sequence index. +func kubeRBACProxyPortNameForSeq(seq int) string { + if seq == 0 { + return kubeRBACProxyPortName + } + return fmt.Sprintf("%s-%d", kubeRBACProxyPortName, seq) +} + +// kubeRBACProxyContainer returns the kube-rbac-proxy sidecar container definition +// that proxies metrics from the ExternalDNS container's localhost metrics port at the given sequence. +func kubeRBACProxyContainer(image string, seq int) corev1.Container { + securePort := kubeRBACProxySecurePort + seq + upstreamPort := defaultMetricsStartPort + seq + portName := kubeRBACProxyPortNameForSeq(seq) + containerName := kubeRBACProxyContainerName + if seq > 0 { + containerName = fmt.Sprintf("%s-%d", kubeRBACProxyContainerName, seq) + } + return corev1.Container{ + Name: containerName, + Image: image, + Args: []string{ + fmt.Sprintf("--secure-listen-address=0.0.0.0:%d", securePort), + fmt.Sprintf("--upstream=http://127.0.0.1:%d/", upstreamPort), + "--logtostderr=true", + "--v=10", + fmt.Sprintf("--tls-cert-file=%s", metricsCertTLSCertFile), + fmt.Sprintf("--tls-private-key-file=%s", metricsCertTLSKeyFile), + "--http2-disable", + }, + Ports: []corev1.ContainerPort{ + { + Name: portName, + ContainerPort: int32(securePort), + Protocol: corev1.ProtocolTCP, + }, + }, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("100m"), + corev1.ResourceMemory: resource.MustParse("20Mi"), + }, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: metricsCertVolumeName, + MountPath: metricsCertMountPath, + ReadOnly: true, + }, + }, + TerminationMessagePolicy: corev1.TerminationMessageFallbackToLogsOnError, + SecurityContext: &corev1.SecurityContext{ + Capabilities: &corev1.Capabilities{ + Drop: []corev1.Capability{allCapabilities}, + }, + Privileged: ptr.To[bool](false), + RunAsNonRoot: ptr.To[bool](true), + AllowPrivilegeEscalation: ptr.To[bool](false), + SeccompProfile: &corev1.SeccompProfile{ + Type: corev1.SeccompProfileTypeRuntimeDefault, + }, + }, + } +} + +// metricsCertVolume returns the volume for the metrics serving certificate secret. +func metricsCertVolume(secretName string) corev1.Volume { + return corev1.Volume{ + Name: metricsCertVolumeName, + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: secretName, + }, + }, + } +} + diff --git a/pkg/operator/controller/externaldns/pod_test.go b/pkg/operator/controller/externaldns/pod_test.go index 82bfafc2a..db1a1420d 100644 --- a/pkg/operator/controller/externaldns/pod_test.go +++ b/pkg/operator/controller/externaldns/pod_test.go @@ -1,10 +1,14 @@ package externaldnscontroller import ( + "fmt" "reflect" "strings" "testing" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/ptr" "github.com/openshift/external-dns-operator/api/v1beta1" @@ -260,3 +264,248 @@ func TestDomainFilters(t *testing.T) { }) } } + +func TestNumMetricsPorts(t *testing.T) { + testCases := []struct { + name string + extDNS *v1beta1.ExternalDNS + expected int + }{ + { + name: "no zones, non-Azure provider", + extDNS: &v1beta1.ExternalDNS{ + ObjectMeta: metav1.ObjectMeta{Name: "test"}, + Spec: v1beta1.ExternalDNSSpec{ + Provider: v1beta1.ExternalDNSProvider{Type: v1beta1.ProviderTypeAWS}, + }, + }, + expected: 1, + }, + { + name: "no zones, Azure provider", + extDNS: &v1beta1.ExternalDNS{ + ObjectMeta: metav1.ObjectMeta{Name: "test"}, + Spec: v1beta1.ExternalDNSSpec{ + Provider: v1beta1.ExternalDNSProvider{Type: v1beta1.ProviderTypeAzure}, + }, + }, + expected: 2, + }, + { + name: "3 zones", + extDNS: &v1beta1.ExternalDNS{ + ObjectMeta: metav1.ObjectMeta{Name: "test"}, + Spec: v1beta1.ExternalDNSSpec{ + Provider: v1beta1.ExternalDNSProvider{Type: v1beta1.ProviderTypeAWS}, + Zones: []string{"zone1", "zone2", "zone3"}, + }, + }, + expected: 3, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + got := numMetricsPorts(tc.extDNS) + if got != tc.expected { + t.Errorf("expected %d, got %d", tc.expected, got) + } + }) + } +} + +func TestKubeRBACProxyPortNameForSeq(t *testing.T) { + testCases := []struct { + seq int + expected string + }{ + {0, "https"}, + {1, "https-1"}, + {2, "https-2"}, + } + for _, tc := range testCases { + t.Run(fmt.Sprintf("seq=%d", tc.seq), func(t *testing.T) { + got := kubeRBACProxyPortNameForSeq(tc.seq) + if got != tc.expected { + t.Errorf("expected %q, got %q", tc.expected, got) + } + }) + } +} + +func TestKubeRBACProxyContainer(t *testing.T) { + image := "quay.io/openshift/origin-kube-rbac-proxy:latest" + + t.Run("first sidecar (seq=0)", func(t *testing.T) { + c := kubeRBACProxyContainer(image, 0) + + if c.Name != "kube-rbac-proxy" { + t.Errorf("expected name %q, got %q", "kube-rbac-proxy", c.Name) + } + if c.Image != image { + t.Errorf("expected image %q, got %q", image, c.Image) + } + if len(c.Ports) != 1 { + t.Fatalf("expected 1 port, got %d", len(c.Ports)) + } + if c.Ports[0].ContainerPort != int32(kubeRBACProxySecurePort) { + t.Errorf("expected port %d, got %d", kubeRBACProxySecurePort, c.Ports[0].ContainerPort) + } + if c.Ports[0].Name != "https" { + t.Errorf("expected port name %q, got %q", "https", c.Ports[0].Name) + } + + // Verify upstream points to the right metrics port. + foundUpstream := false + for _, arg := range c.Args { + if strings.Contains(arg, fmt.Sprintf("--upstream=http://127.0.0.1:%d/", defaultMetricsStartPort)) { + foundUpstream = true + } + } + if !foundUpstream { + t.Errorf("expected upstream arg pointing to port %d, args: %v", defaultMetricsStartPort, c.Args) + } + + // Verify volume mount. + if len(c.VolumeMounts) != 1 || c.VolumeMounts[0].Name != metricsCertVolumeName { + t.Errorf("expected volume mount for %q, got %v", metricsCertVolumeName, c.VolumeMounts) + } + + // Verify resource requests. + expectedCPU := resource.MustParse("100m") + expectedMem := resource.MustParse("20Mi") + if !c.Resources.Requests.Cpu().Equal(expectedCPU) { + t.Errorf("expected CPU request %s, got %s", expectedCPU.String(), c.Resources.Requests.Cpu().String()) + } + if !c.Resources.Requests.Memory().Equal(expectedMem) { + t.Errorf("expected memory request %s, got %s", expectedMem.String(), c.Resources.Requests.Memory().String()) + } + + // Verify security context. + if c.SecurityContext == nil { + t.Fatal("expected security context to be set") + } + if *c.SecurityContext.Privileged != false { + t.Error("expected privileged=false") + } + if *c.SecurityContext.RunAsNonRoot != true { + t.Error("expected runAsNonRoot=true") + } + if *c.SecurityContext.AllowPrivilegeEscalation != false { + t.Error("expected allowPrivilegeEscalation=false") + } + }) + + t.Run("second sidecar (seq=1)", func(t *testing.T) { + c := kubeRBACProxyContainer(image, 1) + + if c.Name != "kube-rbac-proxy-1" { + t.Errorf("expected name %q, got %q", "kube-rbac-proxy-1", c.Name) + } + if c.Ports[0].ContainerPort != int32(kubeRBACProxySecurePort+1) { + t.Errorf("expected port %d, got %d", kubeRBACProxySecurePort+1, c.Ports[0].ContainerPort) + } + if c.Ports[0].Name != "https-1" { + t.Errorf("expected port name %q, got %q", "https-1", c.Ports[0].Name) + } + + foundUpstream := false + for _, arg := range c.Args { + if strings.Contains(arg, fmt.Sprintf("--upstream=http://127.0.0.1:%d/", defaultMetricsStartPort+1)) { + foundUpstream = true + } + } + if !foundUpstream { + t.Errorf("expected upstream arg pointing to port %d", defaultMetricsStartPort+1) + } + }) +} + +func TestMetricsCertVolume(t *testing.T) { + secretName := "test-metrics-cert" + vol := metricsCertVolume(secretName) + + if vol.Name != metricsCertVolumeName { + t.Errorf("expected volume name %q, got %q", metricsCertVolumeName, vol.Name) + } + if vol.Secret == nil { + t.Fatal("expected secret volume source") + } + if vol.Secret.SecretName != secretName { + t.Errorf("expected secret name %q, got %q", secretName, vol.Secret.SecretName) + } +} + +func TestEqualContainerPorts(t *testing.T) { + testCases := []struct { + name string + current []corev1.ContainerPort + expected []corev1.ContainerPort + equal bool + }{ + { + name: "both empty", + current: nil, + expected: nil, + equal: true, + }, + { + name: "identical", + current: []corev1.ContainerPort{ + {Name: "https", ContainerPort: 8443, Protocol: corev1.ProtocolTCP}, + }, + expected: []corev1.ContainerPort{ + {Name: "https", ContainerPort: 8443, Protocol: corev1.ProtocolTCP}, + }, + equal: true, + }, + { + name: "different length", + current: []corev1.ContainerPort{ + {Name: "https", ContainerPort: 8443, Protocol: corev1.ProtocolTCP}, + }, + expected: []corev1.ContainerPort{ + {Name: "https", ContainerPort: 8443, Protocol: corev1.ProtocolTCP}, + {Name: "https-1", ContainerPort: 8444, Protocol: corev1.ProtocolTCP}, + }, + equal: false, + }, + { + name: "different port number", + current: []corev1.ContainerPort{ + {Name: "https", ContainerPort: 8443, Protocol: corev1.ProtocolTCP}, + }, + expected: []corev1.ContainerPort{ + {Name: "https", ContainerPort: 9999, Protocol: corev1.ProtocolTCP}, + }, + equal: false, + }, + { + name: "different protocol", + current: []corev1.ContainerPort{ + {Name: "https", ContainerPort: 8443, Protocol: corev1.ProtocolTCP}, + }, + expected: []corev1.ContainerPort{ + {Name: "https", ContainerPort: 8443, Protocol: corev1.ProtocolUDP}, + }, + equal: false, + }, + { + name: "missing port name", + current: []corev1.ContainerPort{ + {Name: "other", ContainerPort: 8443, Protocol: corev1.ProtocolTCP}, + }, + expected: []corev1.ContainerPort{ + {Name: "https", ContainerPort: 8443, Protocol: corev1.ProtocolTCP}, + }, + equal: false, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + got := equalContainerPorts(tc.current, tc.expected) + if got != tc.equal { + t.Errorf("expected %v, got %v", tc.equal, got) + } + }) + } +} diff --git a/pkg/operator/controller/externaldns/service.go b/pkg/operator/controller/externaldns/service.go new file mode 100644 index 000000000..345c0987a --- /dev/null +++ b/pkg/operator/controller/externaldns/service.go @@ -0,0 +1,142 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package externaldnscontroller + +import ( + "context" + "fmt" + "reflect" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + + operatorv1beta1 "github.com/openshift/external-dns-operator/api/v1beta1" + controller "github.com/openshift/external-dns-operator/pkg/operator/controller" +) + +// metricsService returns the desired metrics Service for the given ExternalDNS instance. +// It creates one port per zone container to match the kube-rbac-proxy sidecars. +func metricsService(namespace string, externalDNS *operatorv1beta1.ExternalDNS) *corev1.Service { + metricsSecretName := controller.ExternalDNSMetricsSecretName(externalDNS) + numZones := numMetricsPorts(externalDNS) + + ports := make([]corev1.ServicePort, numZones) + for i := 0; i < numZones; i++ { + portName := kubeRBACProxyPortNameForSeq(i) + ports[i] = corev1.ServicePort{ + Name: portName, + Port: int32(kubeRBACProxySecurePort + i), + TargetPort: intstr.FromString(portName), + Protocol: corev1.ProtocolTCP, + } + } + + return &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: controller.ExternalDNSMetricsServiceName(externalDNS), + Namespace: namespace, + Labels: map[string]string{ + appNameLabel: controller.ExternalDNSBaseName, + appInstanceLabel: externalDNS.Name, + }, + Annotations: map[string]string{ + "service.beta.openshift.io/serving-cert-secret-name": metricsSecretName, + }, + }, + Spec: corev1.ServiceSpec{ + Selector: map[string]string{ + appNameLabel: controller.ExternalDNSBaseName, + appInstanceLabel: externalDNS.Name, + }, + Ports: ports, + }, + } +} + +// ensureExternalDNSMetricsService ensures that the metrics service for the operand exists. +func (r *reconciler) ensureExternalDNSMetricsService(ctx context.Context, namespace string, externalDNS *operatorv1beta1.ExternalDNS) error { + desired := metricsService(namespace, externalDNS) + + if err := controllerutil.SetControllerReference(externalDNS, desired, r.scheme); err != nil { + return fmt.Errorf("failed to set the controller reference for metrics service: %w", err) + } + + current := &corev1.Service{} + nsName := types.NamespacedName{Namespace: desired.Namespace, Name: desired.Name} + err := r.client.Get(ctx, nsName, current) + if err != nil { + if !errors.IsNotFound(err) { + return fmt.Errorf("failed to get metrics service %s: %w", nsName, err) + } + if err := r.client.Create(ctx, desired); err != nil { + return fmt.Errorf("failed to create metrics service %s/%s: %w", desired.Namespace, desired.Name, err) + } + r.log.Info("created metrics service", "namespace", desired.Namespace, "name", desired.Name) + return nil + } + + if metricsServiceChanged(current, desired) { + // Preserve ClusterIP to avoid immutable field errors. + desired.Spec.ClusterIP = current.Spec.ClusterIP + desired.ResourceVersion = current.ResourceVersion + if err := r.client.Update(ctx, desired); err != nil { + return fmt.Errorf("failed to update metrics service %s/%s: %w", desired.Namespace, desired.Name, err) + } + r.log.Info("updated metrics service", "namespace", desired.Namespace, "name", desired.Name) + } + + return nil +} + +// deleteExternalDNSMetricsService deletes the metrics service if it exists. +func (r *reconciler) deleteExternalDNSMetricsService(ctx context.Context, namespace string, externalDNS *operatorv1beta1.ExternalDNS) error { + svc := &corev1.Service{} + nsName := types.NamespacedName{Namespace: namespace, Name: controller.ExternalDNSMetricsServiceName(externalDNS)} + if err := r.client.Get(ctx, nsName, svc); err != nil { + if errors.IsNotFound(err) { + return nil + } + return fmt.Errorf("failed to get metrics service %s: %w", nsName, err) + } + if err := r.client.Delete(ctx, svc); err != nil { + return fmt.Errorf("failed to delete metrics service %s: %w", nsName, err) + } + r.log.Info("deleted metrics service", "namespace", namespace, "name", nsName.Name) + return nil +} + +// metricsServiceChanged returns true if the current service needs to be updated to match the desired. +func metricsServiceChanged(current, desired *corev1.Service) bool { + if !reflect.DeepEqual(current.Spec.Selector, desired.Spec.Selector) { + return true + } + if len(current.Spec.Ports) != len(desired.Spec.Ports) { + return true + } + for i := range desired.Spec.Ports { + if current.Spec.Ports[i].Name != desired.Spec.Ports[i].Name || + current.Spec.Ports[i].Port != desired.Spec.Ports[i].Port || + current.Spec.Ports[i].TargetPort != desired.Spec.Ports[i].TargetPort { + return true + } + } + return false +} diff --git a/pkg/operator/controller/externaldns/service_test.go b/pkg/operator/controller/externaldns/service_test.go new file mode 100644 index 000000000..e33541ad4 --- /dev/null +++ b/pkg/operator/controller/externaldns/service_test.go @@ -0,0 +1,171 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package externaldnscontroller + +import ( + "testing" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/intstr" + + operatorv1beta1 "github.com/openshift/external-dns-operator/api/v1beta1" + controller "github.com/openshift/external-dns-operator/pkg/operator/controller" + "github.com/openshift/external-dns-operator/pkg/operator/controller/utils/test" +) + +func TestMetricsService(t *testing.T) { + testCases := []struct { + name string + externalDNS *operatorv1beta1.ExternalDNS + namespace string + expectedPorts int + expectedName string + }{ + { + name: "single zone AWS", + externalDNS: testAWSExternalDNS(operatorv1beta1.SourceTypeService), + namespace: test.OperandNamespace, + expectedPorts: 1, + expectedName: controller.ExternalDNSMetricsServiceName(testAWSExternalDNS(operatorv1beta1.SourceTypeService)), + }, + { + name: "multiple zones AWS", + externalDNS: testAWSExternalDNSZones([]string{test.PublicZone, test.PrivateZone}, operatorv1beta1.SourceTypeService), + namespace: test.OperandNamespace, + expectedPorts: 2, + expectedName: controller.ExternalDNSMetricsServiceName(testAWSExternalDNSZones([]string{test.PublicZone, test.PrivateZone}, operatorv1beta1.SourceTypeService)), + }, + { + name: "Azure no zones creates 2 ports", + externalDNS: testAzureExternalDNSNoZones(operatorv1beta1.SourceTypeService), + namespace: test.OperandNamespace, + expectedPorts: 2, + expectedName: controller.ExternalDNSMetricsServiceName(testAzureExternalDNSNoZones(operatorv1beta1.SourceTypeService)), + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + svc := metricsService(tc.namespace, tc.externalDNS) + + if svc.Name != tc.expectedName { + t.Errorf("expected service name %q, got %q", tc.expectedName, svc.Name) + } + if svc.Namespace != tc.namespace { + t.Errorf("expected namespace %q, got %q", tc.namespace, svc.Namespace) + } + if len(svc.Spec.Ports) != tc.expectedPorts { + t.Errorf("expected %d ports, got %d", tc.expectedPorts, len(svc.Spec.Ports)) + } + + // Verify serving cert annotation. + expectedSecretName := controller.ExternalDNSMetricsSecretName(tc.externalDNS) + if svc.Annotations["service.beta.openshift.io/serving-cert-secret-name"] != expectedSecretName { + t.Errorf("expected serving cert annotation %q, got %q", expectedSecretName, svc.Annotations["service.beta.openshift.io/serving-cert-secret-name"]) + } + + // Verify labels match selector. + if svc.Labels[appNameLabel] != controller.ExternalDNSBaseName { + t.Errorf("expected label %s=%s, got %s", appNameLabel, controller.ExternalDNSBaseName, svc.Labels[appNameLabel]) + } + + // Verify port names and numbering. + for i, port := range svc.Spec.Ports { + expectedPortName := kubeRBACProxyPortNameForSeq(i) + if port.Name != expectedPortName { + t.Errorf("port %d: expected name %q, got %q", i, expectedPortName, port.Name) + } + expectedPort := int32(kubeRBACProxySecurePort + i) + if port.Port != expectedPort { + t.Errorf("port %d: expected port %d, got %d", i, expectedPort, port.Port) + } + if port.TargetPort != intstr.FromString(expectedPortName) { + t.Errorf("port %d: expected target port %q, got %v", i, expectedPortName, port.TargetPort) + } + } + }) + } +} + +func TestMetricsServiceChanged(t *testing.T) { + extDNS := testAWSExternalDNS(operatorv1beta1.SourceTypeService) + base := metricsService(test.OperandNamespace, extDNS) + + testCases := []struct { + name string + mutate func(*corev1.Service) + changed bool + }{ + { + name: "no change", + mutate: func(s *corev1.Service) {}, + changed: false, + }, + { + name: "selector changed", + mutate: func(s *corev1.Service) { + s.Spec.Selector[appInstanceLabel] = "different" + }, + changed: true, + }, + { + name: "port count changed", + mutate: func(s *corev1.Service) { + s.Spec.Ports = append(s.Spec.Ports, corev1.ServicePort{ + Name: "extra", + Port: 9999, + }) + }, + changed: true, + }, + { + name: "port name changed", + mutate: func(s *corev1.Service) { + s.Spec.Ports[0].Name = "changed" + }, + changed: true, + }, + { + name: "port number changed", + mutate: func(s *corev1.Service) { + s.Spec.Ports[0].Port = 9999 + }, + changed: true, + }, + { + name: "target port changed", + mutate: func(s *corev1.Service) { + s.Spec.Ports[0].TargetPort = intstr.FromInt(1234) + }, + changed: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + current := base.DeepCopy() + tc.mutate(current) + desired := metricsService(test.OperandNamespace, extDNS) + + got := metricsServiceChanged(current, desired) + if got != tc.changed { + t.Errorf("expected changed=%v, got %v", tc.changed, got) + } + }) + } +} + diff --git a/pkg/operator/controller/externaldns/servicemonitor.go b/pkg/operator/controller/externaldns/servicemonitor.go new file mode 100644 index 000000000..95288e982 --- /dev/null +++ b/pkg/operator/controller/externaldns/servicemonitor.go @@ -0,0 +1,165 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package externaldnscontroller + +import ( + "context" + "fmt" + "reflect" + + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + + operatorv1beta1 "github.com/openshift/external-dns-operator/api/v1beta1" + controller "github.com/openshift/external-dns-operator/pkg/operator/controller" +) + +var serviceMonitorGVK = schema.GroupVersionKind{ + Group: "monitoring.coreos.com", + Version: "v1", + Kind: "ServiceMonitor", +} + +// ensureExternalDNSServiceMonitor ensures that the service monitor for the operand exists. +func (r *reconciler) ensureExternalDNSServiceMonitor(ctx context.Context, namespace string, externalDNS *operatorv1beta1.ExternalDNS) error { + desired := desiredServiceMonitor(namespace, externalDNS) + + // Set the controller reference so the ServiceMonitor is owned by the ExternalDNS CR. + ownerRef := metav1.NewControllerRef(externalDNS, operatorv1beta1.GroupVersion.WithKind("ExternalDNS")) + desired.SetOwnerReferences([]metav1.OwnerReference{*ownerRef}) + + current := &unstructured.Unstructured{} + current.SetGroupVersionKind(serviceMonitorGVK) + nsName := types.NamespacedName{Namespace: namespace, Name: controller.ExternalDNSServiceMonitorName(externalDNS)} + + err := r.client.Get(ctx, nsName, current) + if err != nil { + if !errors.IsNotFound(err) { + return fmt.Errorf("failed to get service monitor %s: %w", nsName, err) + } + if err := r.client.Create(ctx, desired); err != nil { + return fmt.Errorf("failed to create service monitor %s/%s: %w", namespace, desired.GetName(), err) + } + r.log.Info("created service monitor", "namespace", namespace, "name", desired.GetName()) + return nil + } + + // Update the spec if the fields we manage have drifted. + // We compare only the fields we set (endpoints, selector, namespaceSelector) + // rather than the full spec, because the API server may add defaulted fields + // that would cause reflect.DeepEqual to always detect drift. + if serviceMonitorChanged(current, desired) { + desiredSpec, _, _ := unstructured.NestedMap(desired.Object, "spec") + current.Object["spec"] = desiredSpec + if err := r.client.Update(ctx, current); err != nil { + return fmt.Errorf("failed to update service monitor %s/%s: %w", namespace, current.GetName(), err) + } + r.log.Info("updated service monitor", "namespace", namespace, "name", current.GetName()) + } + + return nil +} + +// deleteExternalDNSServiceMonitor deletes the service monitor if it exists. +func (r *reconciler) deleteExternalDNSServiceMonitor(ctx context.Context, namespace string, externalDNS *operatorv1beta1.ExternalDNS) error { + sm := &unstructured.Unstructured{} + sm.SetGroupVersionKind(serviceMonitorGVK) + nsName := types.NamespacedName{Namespace: namespace, Name: controller.ExternalDNSServiceMonitorName(externalDNS)} + if err := r.client.Get(ctx, nsName, sm); err != nil { + if errors.IsNotFound(err) { + return nil + } + return fmt.Errorf("failed to get service monitor %s: %w", nsName, err) + } + if err := r.client.Delete(ctx, sm); err != nil { + return fmt.Errorf("failed to delete service monitor %s: %w", nsName, err) + } + r.log.Info("deleted service monitor", "namespace", namespace, "name", nsName.Name) + return nil +} + +// desiredServiceMonitor returns the desired ServiceMonitor as an unstructured object. +// It creates one endpoint per zone container to match the kube-rbac-proxy sidecars, +// and configures TLS so that Prometheus can scrape the HTTPS endpoints. +func desiredServiceMonitor(namespace string, externalDNS *operatorv1beta1.ExternalDNS) *unstructured.Unstructured { + smName := controller.ExternalDNSServiceMonitorName(externalDNS) + serviceName := controller.ExternalDNSMetricsServiceName(externalDNS) + serverName := fmt.Sprintf("%s.%s.svc", serviceName, namespace) + + numZones := numMetricsPorts(externalDNS) + endpoints := make([]interface{}, numZones) + for i := 0; i < numZones; i++ { + endpoints[i] = map[string]interface{}{ + "interval": "30s", + "path": "/metrics", + "port": kubeRBACProxyPortNameForSeq(i), + "scheme": "https", + "bearerTokenFile": "/var/run/secrets/kubernetes.io/serviceaccount/token", + "tlsConfig": map[string]interface{}{ + "caFile": "/etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt", + "serverName": serverName, + }, + } + } + + sm := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "monitoring.coreos.com/v1", + "kind": "ServiceMonitor", + "metadata": map[string]interface{}{ + "name": smName, + "namespace": namespace, + "labels": map[string]interface{}{ + appNameLabel: controller.ExternalDNSBaseName, + appInstanceLabel: externalDNS.Name, + }, + }, + "spec": map[string]interface{}{ + "endpoints": endpoints, + "namespaceSelector": map[string]interface{}{ + "matchNames": []interface{}{ + namespace, + }, + }, + "selector": map[string]interface{}{ + "matchLabels": map[string]interface{}{ + appNameLabel: controller.ExternalDNSBaseName, + appInstanceLabel: externalDNS.Name, + }, + }, + }, + }, + } + sm.SetGroupVersionKind(serviceMonitorGVK) + return sm +} + +// serviceMonitorChanged returns true if the fields we manage have drifted +// between the current and desired ServiceMonitor objects. +func serviceMonitorChanged(current, desired *unstructured.Unstructured) bool { + for _, field := range []string{"endpoints", "selector", "namespaceSelector"} { + currentVal, _, _ := unstructured.NestedFieldNoCopy(current.Object, "spec", field) + desiredVal, _, _ := unstructured.NestedFieldNoCopy(desired.Object, "spec", field) + if !reflect.DeepEqual(currentVal, desiredVal) { + return true + } + } + return false +} diff --git a/pkg/operator/controller/externaldns/servicemonitor_test.go b/pkg/operator/controller/externaldns/servicemonitor_test.go new file mode 100644 index 000000000..ab0f746ae --- /dev/null +++ b/pkg/operator/controller/externaldns/servicemonitor_test.go @@ -0,0 +1,197 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package externaldnscontroller + +import ( + "fmt" + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + + operatorv1beta1 "github.com/openshift/external-dns-operator/api/v1beta1" + controller "github.com/openshift/external-dns-operator/pkg/operator/controller" + "github.com/openshift/external-dns-operator/pkg/operator/controller/utils/test" +) + +func TestDesiredServiceMonitor(t *testing.T) { + testCases := []struct { + name string + externalDNS *operatorv1beta1.ExternalDNS + namespace string + expectedEndpoints int + }{ + { + name: "single zone AWS", + externalDNS: testAWSExternalDNS(operatorv1beta1.SourceTypeService), + namespace: test.OperandNamespace, + expectedEndpoints: 1, + }, + { + name: "multiple zones AWS", + externalDNS: testAWSExternalDNSZones([]string{test.PublicZone, test.PrivateZone}, operatorv1beta1.SourceTypeService), + namespace: test.OperandNamespace, + expectedEndpoints: 2, + }, + { + name: "Azure no zones creates 2 endpoints", + externalDNS: testAzureExternalDNSNoZones(operatorv1beta1.SourceTypeService), + namespace: test.OperandNamespace, + expectedEndpoints: 2, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + sm := desiredServiceMonitor(tc.namespace, tc.externalDNS) + + // Verify GVK. + if sm.GetKind() != "ServiceMonitor" { + t.Errorf("expected kind ServiceMonitor, got %s", sm.GetKind()) + } + if sm.GetAPIVersion() != "monitoring.coreos.com/v1" { + t.Errorf("expected apiVersion monitoring.coreos.com/v1, got %s", sm.GetAPIVersion()) + } + + // Verify name and namespace. + expectedName := controller.ExternalDNSServiceMonitorName(tc.externalDNS) + if sm.GetName() != expectedName { + t.Errorf("expected name %q, got %q", expectedName, sm.GetName()) + } + if sm.GetNamespace() != tc.namespace { + t.Errorf("expected namespace %q, got %q", tc.namespace, sm.GetNamespace()) + } + + // Verify endpoints count. + endpoints, found, err := unstructured.NestedSlice(sm.Object, "spec", "endpoints") + if err != nil || !found { + t.Fatalf("failed to get endpoints from service monitor: found=%v, err=%v", found, err) + } + if len(endpoints) != tc.expectedEndpoints { + t.Errorf("expected %d endpoints, got %d", tc.expectedEndpoints, len(endpoints)) + } + + // Verify each endpoint has correct fields. + serviceName := controller.ExternalDNSMetricsServiceName(tc.externalDNS) + expectedServerName := fmt.Sprintf("%s.%s.svc", serviceName, tc.namespace) + for i, ep := range endpoints { + epMap, ok := ep.(map[string]interface{}) + if !ok { + t.Fatalf("endpoint %d is not a map", i) + } + if epMap["scheme"] != "https" { + t.Errorf("endpoint %d: expected scheme https, got %v", i, epMap["scheme"]) + } + if epMap["path"] != "/metrics" { + t.Errorf("endpoint %d: expected path /metrics, got %v", i, epMap["path"]) + } + expectedPort := kubeRBACProxyPortNameForSeq(i) + if epMap["port"] != expectedPort { + t.Errorf("endpoint %d: expected port %q, got %v", i, expectedPort, epMap["port"]) + } + tlsConfig, ok := epMap["tlsConfig"].(map[string]interface{}) + if !ok { + t.Fatalf("endpoint %d: tlsConfig missing or wrong type", i) + } + if tlsConfig["serverName"] != expectedServerName { + t.Errorf("endpoint %d: expected serverName %q, got %v", i, expectedServerName, tlsConfig["serverName"]) + } + } + + // Verify selector labels. + matchLabels, found, err := unstructured.NestedStringMap(sm.Object, "spec", "selector", "matchLabels") + if err != nil || !found { + t.Fatalf("failed to get selector matchLabels: found=%v, err=%v", found, err) + } + if matchLabels[appNameLabel] != controller.ExternalDNSBaseName { + t.Errorf("expected selector label %s=%s, got %s", appNameLabel, controller.ExternalDNSBaseName, matchLabels[appNameLabel]) + } + if matchLabels[appInstanceLabel] != tc.externalDNS.Name { + t.Errorf("expected selector label %s=%s, got %s", appInstanceLabel, tc.externalDNS.Name, matchLabels[appInstanceLabel]) + } + }) + } +} + +func TestServiceMonitorChanged(t *testing.T) { + extDNS := &operatorv1beta1.ExternalDNS{ + ObjectMeta: metav1.ObjectMeta{Name: test.Name}, + Spec: operatorv1beta1.ExternalDNSSpec{ + Provider: operatorv1beta1.ExternalDNSProvider{Type: operatorv1beta1.ProviderTypeAWS}, + Zones: []string{test.PublicZone}, + }, + } + base := desiredServiceMonitor(test.OperandNamespace, extDNS) + + testCases := []struct { + name string + mutate func(*unstructured.Unstructured) + changed bool + }{ + { + name: "no change", + mutate: func(sm *unstructured.Unstructured) {}, + changed: false, + }, + { + name: "extra defaulted field in spec does not trigger change", + mutate: func(sm *unstructured.Unstructured) { + // Simulate API server adding a defaulted field we don't manage. + _ = unstructured.SetNestedField(sm.Object, "None", "spec", "targetLabels") + }, + changed: false, + }, + { + name: "endpoints changed", + mutate: func(sm *unstructured.Unstructured) { + endpoints, _, _ := unstructured.NestedSlice(sm.Object, "spec", "endpoints") + if len(endpoints) > 0 { + ep := endpoints[0].(map[string]interface{}) + ep["interval"] = "60s" + _ = unstructured.SetNestedSlice(sm.Object, endpoints, "spec", "endpoints") + } + }, + changed: true, + }, + { + name: "selector changed", + mutate: func(sm *unstructured.Unstructured) { + _ = unstructured.SetNestedField(sm.Object, "different", "spec", "selector", "matchLabels", appInstanceLabel) + }, + changed: true, + }, + { + name: "namespaceSelector changed", + mutate: func(sm *unstructured.Unstructured) { + _ = unstructured.SetNestedStringSlice(sm.Object, []string{"other-ns"}, "spec", "namespaceSelector", "matchNames") + }, + changed: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + current := base.DeepCopy() + tc.mutate(current) + + got := serviceMonitorChanged(current, desiredServiceMonitor(test.OperandNamespace, extDNS)) + if got != tc.changed { + t.Errorf("expected changed=%v, got %v", tc.changed, got) + } + }) + } +} diff --git a/pkg/operator/controller/names.go b/pkg/operator/controller/names.go index 9239dc418..2a740f1a1 100644 --- a/pkg/operator/controller/names.go +++ b/pkg/operator/controller/names.go @@ -62,6 +62,21 @@ func ExternalDNSGlobalResourceName() string { return ExternalDNSBaseName } +// ExternalDNSMetricsServiceName returns the name for the metrics service of the given ExternalDNS instance. +func ExternalDNSMetricsServiceName(externalDNS *operatorv1beta1.ExternalDNS) string { + return ExternalDNSBaseName + "-" + externalDNS.Name + "-metrics" +} + +// ExternalDNSServiceMonitorName returns the name for the service monitor of the given ExternalDNS instance. +func ExternalDNSServiceMonitorName(externalDNS *operatorv1beta1.ExternalDNS) string { + return ExternalDNSBaseName + "-" + externalDNS.Name + "-metrics-monitor" +} + +// ExternalDNSMetricsSecretName returns the name for the metrics serving cert secret of the given ExternalDNS instance. +func ExternalDNSMetricsSecretName(externalDNS *operatorv1beta1.ExternalDNS) string { + return ExternalDNSBaseName + "-" + externalDNS.Name + "-metrics-cert" +} + // ExternalDNSContainerName returns the container name unique for the given DNS zone. func ExternalDNSContainerName(zone string) string { return ExternalDNSBaseName + "-" + hashString(zone) diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 7a810e334..3f89026c8 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -118,13 +118,14 @@ func New(cliCfg *rest.Config, opCfg *operatorconfig.Config) (*Operator, error) { // Create and register the externaldns controller with the operator manager. if _, err := externaldnsctrl.New(mgr, externaldnsctrl.Config{ - Namespace: opCfg.OperandNamespace, - Image: opCfg.ExternalDNSImage, - OperatorNamespace: opCfg.OperatorNamespace, - IsOpenShift: opCfg.IsOpenShift, - PlatformStatus: opCfg.PlatformStatus, - InjectTrustedCA: opCfg.InjectTrustedCA(), - RequeuePeriod: opCfg.RequeuePeriod(), + Namespace: opCfg.OperandNamespace, + Image: opCfg.ExternalDNSImage, + KubeRBACProxyImage: opCfg.KubeRBACProxyImage, + OperatorNamespace: opCfg.OperatorNamespace, + IsOpenShift: opCfg.IsOpenShift, + PlatformStatus: opCfg.PlatformStatus, + InjectTrustedCA: opCfg.InjectTrustedCA(), + RequeuePeriod: opCfg.RequeuePeriod(), }); err != nil { return nil, fmt.Errorf("failed to create externaldns controller: %w", err) } From 5be458d2089fad7355616c7ff4ac75431791e921 Mon Sep 17 00:00:00 2001 From: Ali Syed Date: Wed, 6 May 2026 13:15:47 +0100 Subject: [PATCH 2/5] fix: Address review feedback for operand metrics exposure - Gate metrics sidecar/service/servicemonitor behind IsOpenShift (serving-cert dependency) - Add Service and ServiceMonitor watches with KubeRBACProxyImage+IsOpenShift guard - Switch ServiceMonitor ownerRef to controllerutil.SetControllerReference - Remove manual delete logic; owner references handle cascade deletion - Add kubeRBACProxyContainerNameForSeq for naming consistency - Rename metricsService to desiredMetricsService - Add labels/annotations comparison to metricsServiceChanged and serviceMonitorChanged - Sync labels in ServiceMonitor update flow - Preserve immutable Service fields (ClusterIPs, IPFamilies, IPFamilyPolicy) on update - Clarify kube-rbac-proxy flag description for ExternalDNS operand - Add manager.yaml env var and namespace label for cluster monitoring - Sync bundle manifests via make bundle Assisted-by: Claude --- ...al-dns-operator.clusterserviceversion.yaml | 13 ++++++ ...c.authorization.k8s.io_v1_clusterrole.yaml | 12 ++++++ config/manager/manager.yaml | 4 ++ config/rbac/role.yaml | 13 ++++++ main.go | 2 +- .../controller/externaldns/controller.go | 43 +++++++++++-------- pkg/operator/controller/externaldns/pod.go | 15 ++++--- .../controller/externaldns/service.go | 35 +++++++-------- .../controller/externaldns/service_test.go | 21 +++++++-- .../controller/externaldns/servicemonitor.go | 34 ++++++--------- .../externaldns/servicemonitor_test.go | 9 ++++ pkg/operator/controller/names.go | 21 +++++++-- pkg/operator/operator.go | 2 + 13 files changed, 151 insertions(+), 73 deletions(-) diff --git a/bundle/manifests/external-dns-operator.clusterserviceversion.yaml b/bundle/manifests/external-dns-operator.clusterserviceversion.yaml index 8b3c02629..7c3d41803 100644 --- a/bundle/manifests/external-dns-operator.clusterserviceversion.yaml +++ b/bundle/manifests/external-dns-operator.clusterserviceversion.yaml @@ -560,6 +560,7 @@ spec: - configmaps - secrets - serviceaccounts + - services verbs: - create - delete @@ -588,6 +589,18 @@ spec: - patch - update - watch + - apiGroups: + - monitoring.coreos.com + resources: + - servicemonitors + verbs: + - create + - delete + - get + - list + - patch + - update + - watch - apiGroups: - coordination.k8s.io resources: diff --git a/bundle/manifests/external-dns_rbac.authorization.k8s.io_v1_clusterrole.yaml b/bundle/manifests/external-dns_rbac.authorization.k8s.io_v1_clusterrole.yaml index b8d87de90..8a6e040f8 100644 --- a/bundle/manifests/external-dns_rbac.authorization.k8s.io_v1_clusterrole.yaml +++ b/bundle/manifests/external-dns_rbac.authorization.k8s.io_v1_clusterrole.yaml @@ -31,3 +31,15 @@ rules: - get - watch - list +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index e6f3c2189..691c37874 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -3,6 +3,7 @@ kind: Namespace metadata: labels: name: external-dns-operator + openshift.io/cluster-monitoring: "true" name: external-dns-operator --- apiVersion: apps/v1 @@ -36,6 +37,7 @@ spec: - --operator-namespace=$(OPERATOR_NAMESPACE) - --operand-namespace=$(OPERATOR_NAMESPACE) - --externaldns-image=$(RELATED_IMAGE_EXTERNAL_DNS) + - --kube-rbac-proxy-image=$(RELATED_IMAGE_KUBE_RBAC_PROXY) - --trusted-ca-configmap=$(TRUSTED_CA_CONFIGMAP_NAME) - --leader-elect - --webhook-disable-http2 @@ -49,6 +51,8 @@ spec: # Use "latest" floating tag to avoid problems with the prunning of older mirorred images. # Ref: https://issues.redhat.com/browse/OCPBUGS-57339. value: quay.io/external-dns-operator/external-dns:latest + - name: RELATED_IMAGE_KUBE_RBAC_PROXY + value: quay.io/openshift/origin-kube-rbac-proxy:latest - name: TRUSTED_CA_CONFIGMAP_NAME securityContext: capabilities: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index ab7f32884..c9d0407b2 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -73,6 +73,7 @@ rules: - configmaps - secrets - serviceaccounts + - services verbs: - create - delete @@ -101,3 +102,15 @@ rules: - patch - update - watch +- apiGroups: + - monitoring.coreos.com + resources: + - servicemonitors + verbs: + - create + - delete + - get + - list + - patch + - update + - watch diff --git a/main.go b/main.go index 6c7208bdf..40b3be8c0 100644 --- a/main.go +++ b/main.go @@ -42,7 +42,7 @@ func main() { flag.StringVar(&opCfg.OperatorNamespace, "operator-namespace", operatorconfig.DefaultOperatorNamespace, "The namespace that the operator is running in.") flag.StringVar(&opCfg.OperandNamespace, "operand-namespace", operatorconfig.DefaultOperandNamespace, "The namespace that ExternalDNS containers should run in.") flag.StringVar(&opCfg.ExternalDNSImage, "externaldns-image", operatorconfig.DefaultExternalDNSImage, "The container image used for running ExternalDNS.") - flag.StringVar(&opCfg.KubeRBACProxyImage, "kube-rbac-proxy-image", operatorconfig.DefaultKubeRBACProxyImage, "The container image used for the kube-rbac-proxy metrics sidecar.") + flag.StringVar(&opCfg.KubeRBACProxyImage, "kube-rbac-proxy-image", operatorconfig.DefaultKubeRBACProxyImage, "The container image used for the kube-rbac-proxy sidecar that exposes ExternalDNS operand metrics.") flag.StringVar(&opCfg.CertDir, "cert-dir", operatorconfig.DefaultCertDir, "The directory for keys and certificates for serving the webhook.") flag.StringVar(&opCfg.TrustedCAConfigMapName, "trusted-ca-configmap", operatorconfig.DefaultTrustedCAConfigMapName, "The name of the config map containing TLS CA(s) which should be trusted by ExternalDNS containers. PEM encoded file under \"ca-bundle.crt\" key is expected.") flag.BoolVar(&opCfg.EnableWebhook, "enable-webhook", operatorconfig.DefaultEnableWebhook, "Enable the validating webhook server. Defaults to true.") diff --git a/pkg/operator/controller/externaldns/controller.go b/pkg/operator/controller/externaldns/controller.go index 557cb8873..3bf40243a 100644 --- a/pkg/operator/controller/externaldns/controller.go +++ b/pkg/operator/controller/externaldns/controller.go @@ -25,6 +25,7 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" @@ -104,8 +105,18 @@ func New(mgr manager.Manager, cfg Config) (controller.Controller, error) { return nil, err } - if err := c.Watch(source.Kind[client.Object](operatorCache, &corev1.Service{}, handler.EnqueueRequestForOwner(operatorScheme, operatorRESTMapper, &operatorv1beta1.ExternalDNS{}, handler.OnlyControllerOwner()))); err != nil { - return nil, err + // Metrics resources (Service, ServiceMonitor, kube-rbac-proxy sidecar) depend on + // OpenShift's serving-cert annotation for TLS certificate generation. + if cfg.KubeRBACProxyImage != "" && cfg.IsOpenShift { + if err := c.Watch(source.Kind[client.Object](operatorCache, &corev1.Service{}, handler.EnqueueRequestForOwner(operatorScheme, operatorRESTMapper, &operatorv1beta1.ExternalDNS{}, handler.OnlyControllerOwner()))); err != nil { + return nil, err + } + + smInformer := &unstructured.Unstructured{} + smInformer.SetGroupVersionKind(serviceMonitorGVK) + if err := c.Watch(source.Kind[client.Object](operatorCache, smInformer, handler.EnqueueRequestForOwner(operatorScheme, operatorRESTMapper, &operatorv1beta1.ExternalDNS{}, handler.OnlyControllerOwner()))); err != nil { + return nil, err + } } // secret replicated by the credentials controller @@ -213,30 +224,26 @@ func (r *reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu trustCAConfigMap = configMap } - _, currentDeployment, err := r.ensureExternalDNSDeployment(ctx, r.config.Namespace, r.config.Image, r.config.KubeRBACProxyImage, sa, credSecret, trustCAConfigMap, externalDNS) + // The kube-rbac-proxy sidecar relies on OpenShift's serving-cert controller + // to generate TLS certificates. Only inject it on OpenShift clusters. + kubeRBACProxyImage := "" + if r.config.IsOpenShift { + kubeRBACProxyImage = r.config.KubeRBACProxyImage + } + + _, currentDeployment, err := r.ensureExternalDNSDeployment(ctx, r.config.Namespace, r.config.Image, kubeRBACProxyImage, sa, credSecret, trustCAConfigMap, externalDNS) if err != nil { return reconcile.Result{}, fmt.Errorf("failed to ensure externalDNS deployment: %w", err) } // Ensure metrics service and service monitor for Prometheus scraping. - // If kube-rbac-proxy image is not configured, clean up any existing metrics resources. - if r.config.KubeRBACProxyImage != "" { + // Owner references on these resources ensure cascade deletion when the ExternalDNS CR is removed. + if kubeRBACProxyImage != "" { if err := r.ensureExternalDNSMetricsService(ctx, r.config.Namespace, externalDNS); err != nil { return reconcile.Result{}, fmt.Errorf("failed to ensure externalDNS metrics service: %w", err) } - if r.config.IsOpenShift { - if err := r.ensureExternalDNSServiceMonitor(ctx, r.config.Namespace, externalDNS); err != nil { - return reconcile.Result{}, fmt.Errorf("failed to ensure externalDNS service monitor: %w", err) - } - } - } else { - if err := r.deleteExternalDNSMetricsService(ctx, r.config.Namespace, externalDNS); err != nil { - return reconcile.Result{}, fmt.Errorf("failed to delete externalDNS metrics service: %w", err) - } - if r.config.IsOpenShift { - if err := r.deleteExternalDNSServiceMonitor(ctx, r.config.Namespace, externalDNS); err != nil { - return reconcile.Result{}, fmt.Errorf("failed to delete externalDNS service monitor: %w", err) - } + if err := r.ensureExternalDNSServiceMonitor(ctx, r.config.Namespace, externalDNS); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to ensure externalDNS service monitor: %w", err) } } diff --git a/pkg/operator/controller/externaldns/pod.go b/pkg/operator/controller/externaldns/pod.go index 126517322..9609a78a7 100644 --- a/pkg/operator/controller/externaldns/pod.go +++ b/pkg/operator/controller/externaldns/pod.go @@ -710,6 +710,15 @@ func numMetricsPorts(externalDNS *operatorv1beta1.ExternalDNS) int { return len(externalDNS.Spec.Zones) } +// kubeRBACProxyContainerNameForSeq returns the container name for the kube-rbac-proxy sidecar +// at the given sequence index. +func kubeRBACProxyContainerNameForSeq(seq int) string { + if seq == 0 { + return kubeRBACProxyContainerName + } + return fmt.Sprintf("%s-%d", kubeRBACProxyContainerName, seq) +} + // kubeRBACProxyPortNameForSeq returns the port name for the kube-rbac-proxy sidecar // at the given sequence index. func kubeRBACProxyPortNameForSeq(seq int) string { @@ -725,10 +734,7 @@ func kubeRBACProxyContainer(image string, seq int) corev1.Container { securePort := kubeRBACProxySecurePort + seq upstreamPort := defaultMetricsStartPort + seq portName := kubeRBACProxyPortNameForSeq(seq) - containerName := kubeRBACProxyContainerName - if seq > 0 { - containerName = fmt.Sprintf("%s-%d", kubeRBACProxyContainerName, seq) - } + containerName := kubeRBACProxyContainerNameForSeq(seq) return corev1.Container{ Name: containerName, Image: image, @@ -787,4 +793,3 @@ func metricsCertVolume(secretName string) corev1.Volume { }, } } - diff --git a/pkg/operator/controller/externaldns/service.go b/pkg/operator/controller/externaldns/service.go index 345c0987a..71745a166 100644 --- a/pkg/operator/controller/externaldns/service.go +++ b/pkg/operator/controller/externaldns/service.go @@ -32,9 +32,9 @@ import ( controller "github.com/openshift/external-dns-operator/pkg/operator/controller" ) -// metricsService returns the desired metrics Service for the given ExternalDNS instance. +// desiredMetricsService returns the desired metrics Service for the given ExternalDNS instance. // It creates one port per zone container to match the kube-rbac-proxy sidecars. -func metricsService(namespace string, externalDNS *operatorv1beta1.ExternalDNS) *corev1.Service { +func desiredMetricsService(namespace string, externalDNS *operatorv1beta1.ExternalDNS) *corev1.Service { metricsSecretName := controller.ExternalDNSMetricsSecretName(externalDNS) numZones := numMetricsPorts(externalDNS) @@ -73,7 +73,7 @@ func metricsService(namespace string, externalDNS *operatorv1beta1.ExternalDNS) // ensureExternalDNSMetricsService ensures that the metrics service for the operand exists. func (r *reconciler) ensureExternalDNSMetricsService(ctx context.Context, namespace string, externalDNS *operatorv1beta1.ExternalDNS) error { - desired := metricsService(namespace, externalDNS) + desired := desiredMetricsService(namespace, externalDNS) if err := controllerutil.SetControllerReference(externalDNS, desired, r.scheme); err != nil { return fmt.Errorf("failed to set the controller reference for metrics service: %w", err) @@ -94,8 +94,10 @@ func (r *reconciler) ensureExternalDNSMetricsService(ctx context.Context, namesp } if metricsServiceChanged(current, desired) { - // Preserve ClusterIP to avoid immutable field errors. desired.Spec.ClusterIP = current.Spec.ClusterIP + desired.Spec.ClusterIPs = current.Spec.ClusterIPs + desired.Spec.IPFamilies = current.Spec.IPFamilies + desired.Spec.IPFamilyPolicy = current.Spec.IPFamilyPolicy desired.ResourceVersion = current.ResourceVersion if err := r.client.Update(ctx, desired); err != nil { return fmt.Errorf("failed to update metrics service %s/%s: %w", desired.Namespace, desired.Name, err) @@ -106,25 +108,18 @@ func (r *reconciler) ensureExternalDNSMetricsService(ctx context.Context, namesp return nil } -// deleteExternalDNSMetricsService deletes the metrics service if it exists. -func (r *reconciler) deleteExternalDNSMetricsService(ctx context.Context, namespace string, externalDNS *operatorv1beta1.ExternalDNS) error { - svc := &corev1.Service{} - nsName := types.NamespacedName{Namespace: namespace, Name: controller.ExternalDNSMetricsServiceName(externalDNS)} - if err := r.client.Get(ctx, nsName, svc); err != nil { - if errors.IsNotFound(err) { - return nil +// metricsServiceChanged returns true if the current service needs to be updated to match the desired. +func metricsServiceChanged(current, desired *corev1.Service) bool { + for k, v := range desired.Labels { + if current.Labels[k] != v { + return true } - return fmt.Errorf("failed to get metrics service %s: %w", nsName, err) } - if err := r.client.Delete(ctx, svc); err != nil { - return fmt.Errorf("failed to delete metrics service %s: %w", nsName, err) + for k, v := range desired.Annotations { + if current.Annotations[k] != v { + return true + } } - r.log.Info("deleted metrics service", "namespace", namespace, "name", nsName.Name) - return nil -} - -// metricsServiceChanged returns true if the current service needs to be updated to match the desired. -func metricsServiceChanged(current, desired *corev1.Service) bool { if !reflect.DeepEqual(current.Spec.Selector, desired.Spec.Selector) { return true } diff --git a/pkg/operator/controller/externaldns/service_test.go b/pkg/operator/controller/externaldns/service_test.go index e33541ad4..434a65c08 100644 --- a/pkg/operator/controller/externaldns/service_test.go +++ b/pkg/operator/controller/externaldns/service_test.go @@ -60,7 +60,7 @@ func TestMetricsService(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - svc := metricsService(tc.namespace, tc.externalDNS) + svc := desiredMetricsService(tc.namespace, tc.externalDNS) if svc.Name != tc.expectedName { t.Errorf("expected service name %q, got %q", tc.expectedName, svc.Name) @@ -103,7 +103,7 @@ func TestMetricsService(t *testing.T) { func TestMetricsServiceChanged(t *testing.T) { extDNS := testAWSExternalDNS(operatorv1beta1.SourceTypeService) - base := metricsService(test.OperandNamespace, extDNS) + base := desiredMetricsService(test.OperandNamespace, extDNS) testCases := []struct { name string @@ -115,6 +115,20 @@ func TestMetricsServiceChanged(t *testing.T) { mutate: func(s *corev1.Service) {}, changed: false, }, + { + name: "label changed", + mutate: func(s *corev1.Service) { + s.Labels[appInstanceLabel] = "different" + }, + changed: true, + }, + { + name: "annotation changed", + mutate: func(s *corev1.Service) { + s.Annotations["service.beta.openshift.io/serving-cert-secret-name"] = "wrong-secret" + }, + changed: true, + }, { name: "selector changed", mutate: func(s *corev1.Service) { @@ -159,7 +173,7 @@ func TestMetricsServiceChanged(t *testing.T) { t.Run(tc.name, func(t *testing.T) { current := base.DeepCopy() tc.mutate(current) - desired := metricsService(test.OperandNamespace, extDNS) + desired := desiredMetricsService(test.OperandNamespace, extDNS) got := metricsServiceChanged(current, desired) if got != tc.changed { @@ -168,4 +182,3 @@ func TestMetricsServiceChanged(t *testing.T) { }) } } - diff --git a/pkg/operator/controller/externaldns/servicemonitor.go b/pkg/operator/controller/externaldns/servicemonitor.go index 95288e982..7d8196c32 100644 --- a/pkg/operator/controller/externaldns/servicemonitor.go +++ b/pkg/operator/controller/externaldns/servicemonitor.go @@ -22,10 +22,10 @@ import ( "reflect" "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" operatorv1beta1 "github.com/openshift/external-dns-operator/api/v1beta1" controller "github.com/openshift/external-dns-operator/pkg/operator/controller" @@ -41,9 +41,9 @@ var serviceMonitorGVK = schema.GroupVersionKind{ func (r *reconciler) ensureExternalDNSServiceMonitor(ctx context.Context, namespace string, externalDNS *operatorv1beta1.ExternalDNS) error { desired := desiredServiceMonitor(namespace, externalDNS) - // Set the controller reference so the ServiceMonitor is owned by the ExternalDNS CR. - ownerRef := metav1.NewControllerRef(externalDNS, operatorv1beta1.GroupVersion.WithKind("ExternalDNS")) - desired.SetOwnerReferences([]metav1.OwnerReference{*ownerRef}) + if err := controllerutil.SetControllerReference(externalDNS, desired, r.scheme); err != nil { + return fmt.Errorf("failed to set the controller reference for service monitor: %w", err) + } current := &unstructured.Unstructured{} current.SetGroupVersionKind(serviceMonitorGVK) @@ -68,6 +68,7 @@ func (r *reconciler) ensureExternalDNSServiceMonitor(ctx context.Context, namesp if serviceMonitorChanged(current, desired) { desiredSpec, _, _ := unstructured.NestedMap(desired.Object, "spec") current.Object["spec"] = desiredSpec + current.SetLabels(desired.GetLabels()) if err := r.client.Update(ctx, current); err != nil { return fmt.Errorf("failed to update service monitor %s/%s: %w", namespace, current.GetName(), err) } @@ -77,24 +78,6 @@ func (r *reconciler) ensureExternalDNSServiceMonitor(ctx context.Context, namesp return nil } -// deleteExternalDNSServiceMonitor deletes the service monitor if it exists. -func (r *reconciler) deleteExternalDNSServiceMonitor(ctx context.Context, namespace string, externalDNS *operatorv1beta1.ExternalDNS) error { - sm := &unstructured.Unstructured{} - sm.SetGroupVersionKind(serviceMonitorGVK) - nsName := types.NamespacedName{Namespace: namespace, Name: controller.ExternalDNSServiceMonitorName(externalDNS)} - if err := r.client.Get(ctx, nsName, sm); err != nil { - if errors.IsNotFound(err) { - return nil - } - return fmt.Errorf("failed to get service monitor %s: %w", nsName, err) - } - if err := r.client.Delete(ctx, sm); err != nil { - return fmt.Errorf("failed to delete service monitor %s: %w", nsName, err) - } - r.log.Info("deleted service monitor", "namespace", namespace, "name", nsName.Name) - return nil -} - // desiredServiceMonitor returns the desired ServiceMonitor as an unstructured object. // It creates one endpoint per zone container to match the kube-rbac-proxy sidecars, // and configures TLS so that Prometheus can scrape the HTTPS endpoints. @@ -154,6 +137,13 @@ func desiredServiceMonitor(namespace string, externalDNS *operatorv1beta1.Extern // serviceMonitorChanged returns true if the fields we manage have drifted // between the current and desired ServiceMonitor objects. func serviceMonitorChanged(current, desired *unstructured.Unstructured) bool { + desiredLabels := desired.GetLabels() + currentLabels := current.GetLabels() + for k, v := range desiredLabels { + if currentLabels[k] != v { + return true + } + } for _, field := range []string{"endpoints", "selector", "namespaceSelector"} { currentVal, _, _ := unstructured.NestedFieldNoCopy(current.Object, "spec", field) desiredVal, _, _ := unstructured.NestedFieldNoCopy(desired.Object, "spec", field) diff --git a/pkg/operator/controller/externaldns/servicemonitor_test.go b/pkg/operator/controller/externaldns/servicemonitor_test.go index ab0f746ae..2bdda81a3 100644 --- a/pkg/operator/controller/externaldns/servicemonitor_test.go +++ b/pkg/operator/controller/externaldns/servicemonitor_test.go @@ -147,6 +147,15 @@ func TestServiceMonitorChanged(t *testing.T) { mutate: func(sm *unstructured.Unstructured) {}, changed: false, }, + { + name: "label changed", + mutate: func(sm *unstructured.Unstructured) { + labels := sm.GetLabels() + labels[appInstanceLabel] = "different" + sm.SetLabels(labels) + }, + changed: true, + }, { name: "extra defaulted field in spec does not trigger change", mutate: func(sm *unstructured.Unstructured) { diff --git a/pkg/operator/controller/names.go b/pkg/operator/controller/names.go index 2a740f1a1..cdd12b53b 100644 --- a/pkg/operator/controller/names.go +++ b/pkg/operator/controller/names.go @@ -64,17 +64,17 @@ func ExternalDNSGlobalResourceName() string { // ExternalDNSMetricsServiceName returns the name for the metrics service of the given ExternalDNS instance. func ExternalDNSMetricsServiceName(externalDNS *operatorv1beta1.ExternalDNS) string { - return ExternalDNSBaseName + "-" + externalDNS.Name + "-metrics" + return truncatedName(ExternalDNSBaseName, externalDNS.Name, "-metrics") } // ExternalDNSServiceMonitorName returns the name for the service monitor of the given ExternalDNS instance. func ExternalDNSServiceMonitorName(externalDNS *operatorv1beta1.ExternalDNS) string { - return ExternalDNSBaseName + "-" + externalDNS.Name + "-metrics-monitor" + return truncatedName(ExternalDNSBaseName, externalDNS.Name, "-metrics-monitor") } // ExternalDNSMetricsSecretName returns the name for the metrics serving cert secret of the given ExternalDNS instance. func ExternalDNSMetricsSecretName(externalDNS *operatorv1beta1.ExternalDNS) string { - return ExternalDNSBaseName + "-" + externalDNS.Name + "-metrics-cert" + return truncatedName(ExternalDNSBaseName, externalDNS.Name, "-metrics-cert") } // ExternalDNSContainerName returns the container name unique for the given DNS zone. @@ -130,6 +130,21 @@ func ExternalDNSCredentialsSecretNameFromProvider(externalDNS *operatorv1beta1.E return "" } +const maxDNSLabelLength = 63 + +func truncatedName(base, name, suffix string) string { + result := base + "-" + name + suffix + if len(result) <= maxDNSLabelLength { + return result + } + h := hashString(name) + available := maxDNSLabelLength - len(base) - len(suffix) - len(h) - 2 + if available < 0 { + available = 0 + } + return base + "-" + name[:available] + "-" + h + suffix +} + func hashString(str string) string { hasher := getHasher() hasher.Write([]byte(str)) diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 3f89026c8..24d527c3d 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -51,8 +51,10 @@ type Operator struct { // +kubebuilder:rbac:groups=config.openshift.io,resources=infrastructures,verbs=get;list;watch // local role // +kubebuilder:rbac:groups="",namespace=external-dns-operator,resources=secrets;serviceaccounts;configmaps,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups="",namespace=external-dns-operator,resources=services,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="apps",namespace=external-dns-operator,resources=deployments,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",namespace=external-dns-operator,resources=pods,verbs=get;list;watch +// +kubebuilder:rbac:groups=monitoring.coreos.com,namespace=external-dns-operator,resources=servicemonitors,verbs=get;list;watch;create;update;patch;delete // New creates a new operator from cliCfg and opCfg. func New(cliCfg *rest.Config, opCfg *operatorconfig.Config) (*Operator, error) { From 7dd3f1c9ae06ace514db2635c7a4a90deb1e08bf Mon Sep 17 00:00:00 2001 From: Ali Syed Date: Wed, 6 May 2026 13:40:24 +0100 Subject: [PATCH 3/5] fix: Remove unnecessary IsOpenShift guard for metrics resources This operator exclusively targets OpenShift, so gating the metrics sidecar, Service, and ServiceMonitor behind IsOpenShift was redundant. They are now gated only on KubeRBACProxyImage being set. Assisted-by: Claude --- pkg/operator/controller/externaldns/controller.go | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/pkg/operator/controller/externaldns/controller.go b/pkg/operator/controller/externaldns/controller.go index 3bf40243a..468cf83c9 100644 --- a/pkg/operator/controller/externaldns/controller.go +++ b/pkg/operator/controller/externaldns/controller.go @@ -105,9 +105,7 @@ func New(mgr manager.Manager, cfg Config) (controller.Controller, error) { return nil, err } - // Metrics resources (Service, ServiceMonitor, kube-rbac-proxy sidecar) depend on - // OpenShift's serving-cert annotation for TLS certificate generation. - if cfg.KubeRBACProxyImage != "" && cfg.IsOpenShift { + if cfg.KubeRBACProxyImage != "" { if err := c.Watch(source.Kind[client.Object](operatorCache, &corev1.Service{}, handler.EnqueueRequestForOwner(operatorScheme, operatorRESTMapper, &operatorv1beta1.ExternalDNS{}, handler.OnlyControllerOwner()))); err != nil { return nil, err } @@ -224,21 +222,14 @@ func (r *reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu trustCAConfigMap = configMap } - // The kube-rbac-proxy sidecar relies on OpenShift's serving-cert controller - // to generate TLS certificates. Only inject it on OpenShift clusters. - kubeRBACProxyImage := "" - if r.config.IsOpenShift { - kubeRBACProxyImage = r.config.KubeRBACProxyImage - } - - _, currentDeployment, err := r.ensureExternalDNSDeployment(ctx, r.config.Namespace, r.config.Image, kubeRBACProxyImage, sa, credSecret, trustCAConfigMap, externalDNS) + _, currentDeployment, err := r.ensureExternalDNSDeployment(ctx, r.config.Namespace, r.config.Image, r.config.KubeRBACProxyImage, sa, credSecret, trustCAConfigMap, externalDNS) if err != nil { return reconcile.Result{}, fmt.Errorf("failed to ensure externalDNS deployment: %w", err) } // Ensure metrics service and service monitor for Prometheus scraping. // Owner references on these resources ensure cascade deletion when the ExternalDNS CR is removed. - if kubeRBACProxyImage != "" { + if r.config.KubeRBACProxyImage != "" { if err := r.ensureExternalDNSMetricsService(ctx, r.config.Namespace, externalDNS); err != nil { return reconcile.Result{}, fmt.Errorf("failed to ensure externalDNS metrics service: %w", err) } From a3a9474b3b4b86e446060b738ab3a40389c54ab2 Mon Sep 17 00:00:00 2001 From: Ali Syed Date: Thu, 7 May 2026 15:30:21 +0100 Subject: [PATCH 4/5] fix: Make kube-rbac-proxy unconditional, add metrics docs - Remove KubeRBACProxyImage guards from controller watches and reconcile; the sidecar, Service, and ServiceMonitor are now always created - Remove openshift.io/cluster-monitoring label from manager.yaml since this manifest is not in the bundle - Add operand metrics documentation to docs/openshift.md covering multi-zone metric differentiation via ports and instance labels Assisted-by: Claude --- config/manager/manager.yaml | 1 - docs/openshift.md | 15 ++++++++++ .../controller/externaldns/controller.go | 30 ++++++++----------- .../controller/externaldns/deployment.go | 2 -- 4 files changed, 27 insertions(+), 21 deletions(-) diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 691c37874..ed434b2c9 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -3,7 +3,6 @@ kind: Namespace metadata: labels: name: external-dns-operator - openshift.io/cluster-monitoring: "true" name: external-dns-operator --- apiVersion: apps/v1 diff --git a/docs/openshift.md b/docs/openshift.md index de59913fe..4e2bf3af8 100644 --- a/docs/openshift.md +++ b/docs/openshift.md @@ -10,3 +10,18 @@ Use the following convenience script to secure communication between the API and ```bash $ ./hack/add-serving-cert.sh --namespace external-dns-operator --service webhook-service --webhook validating-webhook-configuration --secret webhook-server-cert ``` + +## Operand metrics + +Each ExternalDNS operand deployment includes a [kube-rbac-proxy](https://github.com/brancz/kube-rbac-proxy) sidecar per zone container to expose metrics over HTTPS. A `Service` and `ServiceMonitor` are created per `ExternalDNS` CR for Prometheus discovery. + +### Multi-zone metric differentiation + +When an `ExternalDNS` instance manages multiple zones, one ExternalDNS container runs per zone — each exposing the same metric names. Metrics are kept separate (not combined or deduplicated) through the following mechanism: + +- Each zone container binds its metrics to a distinct localhost port (`:7979`, `:7980`, etc.). +- Each kube-rbac-proxy sidecar proxies one of those ports on a distinct secure port (`:9091`, `:9092`, etc.). +- The `ServiceMonitor` creates one endpoint entry per port. +- Prometheus assigns a unique `instance` label (`pod_ip:port`) to each scrape target. + +This means metrics from different zones within the same `ExternalDNS` instance are differentiated by port via the `instance` label. Metrics from different `ExternalDNS` instances are differentiated by pod or deployment name. diff --git a/pkg/operator/controller/externaldns/controller.go b/pkg/operator/controller/externaldns/controller.go index 468cf83c9..0a841222a 100644 --- a/pkg/operator/controller/externaldns/controller.go +++ b/pkg/operator/controller/externaldns/controller.go @@ -105,16 +105,14 @@ func New(mgr manager.Manager, cfg Config) (controller.Controller, error) { return nil, err } - if cfg.KubeRBACProxyImage != "" { - if err := c.Watch(source.Kind[client.Object](operatorCache, &corev1.Service{}, handler.EnqueueRequestForOwner(operatorScheme, operatorRESTMapper, &operatorv1beta1.ExternalDNS{}, handler.OnlyControllerOwner()))); err != nil { - return nil, err - } + if err := c.Watch(source.Kind[client.Object](operatorCache, &corev1.Service{}, handler.EnqueueRequestForOwner(operatorScheme, operatorRESTMapper, &operatorv1beta1.ExternalDNS{}, handler.OnlyControllerOwner()))); err != nil { + return nil, err + } - smInformer := &unstructured.Unstructured{} - smInformer.SetGroupVersionKind(serviceMonitorGVK) - if err := c.Watch(source.Kind[client.Object](operatorCache, smInformer, handler.EnqueueRequestForOwner(operatorScheme, operatorRESTMapper, &operatorv1beta1.ExternalDNS{}, handler.OnlyControllerOwner()))); err != nil { - return nil, err - } + smInformer := &unstructured.Unstructured{} + smInformer.SetGroupVersionKind(serviceMonitorGVK) + if err := c.Watch(source.Kind[client.Object](operatorCache, smInformer, handler.EnqueueRequestForOwner(operatorScheme, operatorRESTMapper, &operatorv1beta1.ExternalDNS{}, handler.OnlyControllerOwner()))); err != nil { + return nil, err } // secret replicated by the credentials controller @@ -227,15 +225,11 @@ func (r *reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu return reconcile.Result{}, fmt.Errorf("failed to ensure externalDNS deployment: %w", err) } - // Ensure metrics service and service monitor for Prometheus scraping. - // Owner references on these resources ensure cascade deletion when the ExternalDNS CR is removed. - if r.config.KubeRBACProxyImage != "" { - if err := r.ensureExternalDNSMetricsService(ctx, r.config.Namespace, externalDNS); err != nil { - return reconcile.Result{}, fmt.Errorf("failed to ensure externalDNS metrics service: %w", err) - } - if err := r.ensureExternalDNSServiceMonitor(ctx, r.config.Namespace, externalDNS); err != nil { - return reconcile.Result{}, fmt.Errorf("failed to ensure externalDNS service monitor: %w", err) - } + if err := r.ensureExternalDNSMetricsService(ctx, r.config.Namespace, externalDNS); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to ensure externalDNS metrics service: %w", err) + } + if err := r.ensureExternalDNSServiceMonitor(ctx, r.config.Namespace, externalDNS); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to ensure externalDNS service monitor: %w", err) } if err := r.updateExternalDNSStatus(ctx, externalDNS, currentDeployment, true); err != nil { diff --git a/pkg/operator/controller/externaldns/deployment.go b/pkg/operator/controller/externaldns/deployment.go index 01c16d847..bbf51760e 100644 --- a/pkg/operator/controller/externaldns/deployment.go +++ b/pkg/operator/controller/externaldns/deployment.go @@ -298,8 +298,6 @@ func desiredExternalDNSDeployment(cfg *deploymentConfig) (*appsv1.Deployment, er depl.Spec.Template.Spec.Containers = append(depl.Spec.Template.Spec.Containers, *container) } } - // Add kube-rbac-proxy sidecar(s) and metrics cert volume for secure metrics exposure. - // One sidecar per zone container, each proxying the corresponding metrics port. if cfg.kubeRBACProxyImage != "" { for i := 0; i < cbld.counter; i++ { proxyContainer := kubeRBACProxyContainer(cfg.kubeRBACProxyImage, i) From 8848be6c1831fcc2dbe0860eb8021d8aeebc901c Mon Sep 17 00:00:00 2001 From: Ali Syed Date: Fri, 8 May 2026 10:04:55 +0100 Subject: [PATCH 5/5] fix: Fail fast at startup if kube-rbac-proxy image is not configured The controller unconditionally creates metrics Service and ServiceMonitor, so a missing kube-rbac-proxy image would produce broken scrape targets. Validate at operator startup to catch this misconfiguration before any reconciliation runs. Assisted-by: Claude --- pkg/operator/operator.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 24d527c3d..63b3da2c3 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -118,6 +118,10 @@ func New(cliCfg *rest.Config, opCfg *operatorconfig.Config) (*Operator, error) { return nil, fmt.Errorf("failed to fill the platform details: %w", err) } + if opCfg.KubeRBACProxyImage == "" { + return nil, fmt.Errorf("kube-rbac-proxy image is required but not configured") + } + // Create and register the externaldns controller with the operator manager. if _, err := externaldnsctrl.New(mgr, externaldnsctrl.Config{ Namespace: opCfg.OperandNamespace,