diff --git a/cmd/machine-config-controller/start.go b/cmd/machine-config-controller/start.go index 81ec8793f4..b8580e6512 100644 --- a/cmd/machine-config-controller/start.go +++ b/cmd/machine-config-controller/start.go @@ -107,6 +107,7 @@ func runStartCmd(_ *cobra.Command, _ []string) { ctrlctx.KubeNamespacedInformerFactory.Core().V1().Secrets(), ctrlctx.KubeNamespacedInformerFactory.Core().V1().ConfigMaps(), ctrlctx.ConfigInformerFactory.Config().V1().Infrastructures(), + ctrlctx.FeatureGatesHandler, ) if err != nil { klog.Fatalf("unable to start cert rotation controller: %v", err) diff --git a/pkg/controller/certrotation/certrotation_controller.go b/pkg/controller/certrotation/certrotation_controller.go index 823e347eb3..26659b3cbf 100644 --- a/pkg/controller/certrotation/certrotation_controller.go +++ b/pkg/controller/certrotation/certrotation_controller.go @@ -3,17 +3,21 @@ package certrotationcontroller import ( "bytes" "context" + "crypto/x509" "encoding/json" + "encoding/pem" "fmt" "time" "github.com/vincent-petithory/dataurl" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/labels" utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" coreinformersv1 "k8s.io/client-go/informers/core/v1" "k8s.io/client-go/kubernetes" @@ -24,10 +28,12 @@ import ( "k8s.io/utils/clock" configv1 "github.com/openshift/api/config/v1" + "github.com/openshift/api/features" configclientset "github.com/openshift/client-go/config/clientset/versioned" machineclientset "github.com/openshift/client-go/machine/clientset/versioned" "github.com/openshift/library-go/pkg/controller/factory" + "github.com/openshift/library-go/pkg/crypto" "github.com/openshift/library-go/pkg/operator/certrotation" "github.com/openshift/library-go/pkg/operator/events" @@ -46,6 +52,7 @@ const ( mcsCARefresh = 8 * oneYear mcsTLSKeyExpiry = mcsCAExpiry mcsTLSKeyRefresh = mcsCARefresh + iriTLSKeyExpiry = mcsCAExpiry workQueueKey = "key" ) @@ -70,6 +77,8 @@ type CertRotationController struct { recorder events.Recorder cachesToSync []cache.InformerSynced + + featureGatesHandler ctrlcommon.FeatureGatesHandler } // New returns a new cert rotation controller. @@ -82,6 +91,7 @@ func New( mcoSecretInformer coreinformersv1.SecretInformer, mcoConfigMapInfomer coreinformersv1.ConfigMapInformer, infraInformer configinformers.InfrastructureInformer, + featureGatesHandler ctrlcommon.FeatureGatesHandler, ) (*CertRotationController, error) { recorder := events.NewLoggingEventRecorder(componentName, clock.RealClock{}) @@ -106,8 +116,9 @@ func New( hostnamesQueue: workqueue.NewTypedRateLimitingQueueWithConfig( workqueue.DefaultTypedControllerRateLimiter[string](), workqueue.TypedRateLimitingQueueConfig[string]{Name: "Hostnames"}), - infraInformer: infraInformer, - infraLister: infraInformer.Lister(), + infraInformer: infraInformer, + infraLister: infraInformer.Lister(), + featureGatesHandler: featureGatesHandler, } // The cert controller will begin creating "machine-config-server-ca" configmap & secret in the MCO namespace. @@ -331,11 +342,14 @@ func (c *CertRotationController) addConfigMap(obj interface{}) { return } - klog.Infof("configMap %s added, reconciling all user data secrets", configMap.Name) + klog.Infof("configMap %s added, reconciling user data secrets and IRI certificate", configMap.Name) go func() { c.reconcileUserDataSecrets() }() + go func() { + c.reconcileIRICertificate() + }() } func (c *CertRotationController) updateConfigMap(oldCM, newCM interface{}) { @@ -353,12 +367,15 @@ func (c *CertRotationController) updateConfigMap(oldCM, newCM interface{}) { return } - klog.Infof("configMap %s updated, reconciling all user data secrets", oldConfigMap.Name) + klog.Infof("configMap %s updated, reconciling user data secrets and IRI certificate", oldConfigMap.Name) // Reconcile all user data secrets go func() { c.reconcileUserDataSecrets() }() + go func() { + c.reconcileIRICertificate() + }() } func (c *CertRotationController) addSecret(obj interface{}) { @@ -467,3 +484,139 @@ func (c *CertRotationController) reconcileSecret(secret corev1.Secret) error { klog.Infof("Successfully modified %s secret \n", secret.Name) return nil } + +func (c *CertRotationController) reconcileIRICertificate() { + if !c.featureGatesHandler.Enabled(features.FeatureGateNoRegistryClusterInstall) { + klog.V(4).Infof("Skipping IRI certificate reconciliation: %s feature gate is not enabled", features.FeatureGateNoRegistryClusterInstall) + return + } + klog.Infof("Reconciling IRI certificate") + + // Get the MCS CA secret (fresh get, not from lister) + caSecret, err := c.kubeClient.CoreV1().Secrets(ctrlcommon.MCONamespace).Get(context.TODO(), ctrlcommon.MachineConfigServerCAName, metav1.GetOptions{}) + if err != nil { + klog.Errorf("Cannot get MCS CA secret for IRI cert reconciliation: %v", err) + return + } + + caCert := caSecret.Data[corev1.TLSCertKey] + caKey := caSecret.Data[corev1.TLSPrivateKeyKey] + if len(caCert) == 0 || len(caKey) == 0 { + klog.Errorf("MCS CA secret %s is missing cert or key data", ctrlcommon.MachineConfigServerCAName) + return + } + + // Load the CA + ca, err := crypto.GetCAFromBytes(caCert, caKey) + if err != nil { + klog.Errorf("Cannot load MCS CA for IRI cert generation: %v", err) + return + } + + // Check if the existing IRI cert is already valid under the current CA + iriSecret, err := c.kubeClient.CoreV1().Secrets(ctrlcommon.MCONamespace).Get(context.TODO(), ctrlcommon.InternalReleaseImageTLSSecretName, metav1.GetOptions{}) + secretExists := err == nil + if secretExists { + if c.isIRICertValid(iriSecret, ca) { + klog.Infof("IRI TLS certificate is still valid under the current MCS CA, skipping rotation") + return + } + } else if !errors.IsNotFound(err) { + klog.Errorf("Cannot get IRI TLS secret: %v", err) + return + } + + // Get hostnames from the dynamic serving rotation (includes api-int hostname and platform VIPs) + hostnames := c.hostnamesRotation.GetHostnames() + if len(hostnames) == 0 { + klog.Errorf("No hostnames available for IRI cert generation") + return + } + // IRI registry also serves locally on each master node, matching the installer's SANs + hostnames = append(hostnames, "localhost", "127.0.0.1", "::1") + + // Generate a new IRI TLS certificate signed by the MCS CA + certConfig, err := ca.MakeServerCert(sets.New(hostnames...), iriTLSKeyExpiry) + if err != nil { + klog.Errorf("Cannot generate IRI TLS certificate: %v", err) + return + } + + certPEM, keyPEM, err := certConfig.GetPEMBytes() + if err != nil { + klog.Errorf("Cannot get PEM bytes for IRI TLS certificate: %v", err) + return + } + + if !secretExists { + // Create new secret + newSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: ctrlcommon.InternalReleaseImageTLSSecretName, + Namespace: ctrlcommon.MCONamespace, + }, + Type: corev1.SecretTypeTLS, + Data: map[string][]byte{ + corev1.TLSCertKey: certPEM, + corev1.TLSPrivateKeyKey: keyPEM, + }, + } + if _, err := c.kubeClient.CoreV1().Secrets(ctrlcommon.MCONamespace).Create(context.TODO(), newSecret, metav1.CreateOptions{}); err != nil { + klog.Errorf("Cannot create IRI TLS secret: %v", err) + return + } + klog.Infof("Successfully created IRI TLS secret %s", ctrlcommon.InternalReleaseImageTLSSecretName) + return + } + + // Update existing secret + updatedSecret := iriSecret.DeepCopy() + updatedSecret.Data[corev1.TLSCertKey] = certPEM + updatedSecret.Data[corev1.TLSPrivateKeyKey] = keyPEM + if _, err := c.kubeClient.CoreV1().Secrets(ctrlcommon.MCONamespace).Update(context.TODO(), updatedSecret, metav1.UpdateOptions{}); err != nil { + klog.Errorf("Cannot update IRI TLS secret: %v", err) + return + } + klog.Infof("Successfully updated IRI TLS secret %s", ctrlcommon.InternalReleaseImageTLSSecretName) +} + +// isIRICertValid checks whether the existing IRI certificate is signed by the given CA +// and has not expired. +func (c *CertRotationController) isIRICertValid(iriSecret *corev1.Secret, ca *crypto.CA) bool { + certPEM := iriSecret.Data[corev1.TLSCertKey] + if len(certPEM) == 0 { + return false + } + + // Decode the first PEM block (the leaf certificate) + block, _ := pem.Decode(certPEM) + if block == nil { + klog.Warningf("Cannot decode PEM from existing IRI TLS certificate") + return false + } + + iriCert, err := x509.ParseCertificate(block.Bytes) + if err != nil { + klog.Warningf("Cannot parse existing IRI TLS certificate: %v", err) + return false + } + + // Build a cert pool with the current CA to verify against + caPool := x509.NewCertPool() + for _, caCert := range ca.Config.Certs { + caPool.AddCert(caCert) + } + + // Verify the IRI cert is signed by the current CA and is not expired + _, err = iriCert.Verify(x509.VerifyOptions{ + Roots: caPool, + CurrentTime: time.Now(), + KeyUsages: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, + }) + if err != nil { + klog.Infof("Existing IRI TLS certificate is not valid under current MCS CA: %v", err) + return false + } + + return true +} diff --git a/pkg/controller/certrotation/certrotation_controller_test.go b/pkg/controller/certrotation/certrotation_controller_test.go index 231a1ea672..1109570f16 100644 --- a/pkg/controller/certrotation/certrotation_controller_test.go +++ b/pkg/controller/certrotation/certrotation_controller_test.go @@ -12,10 +12,12 @@ import ( "github.com/stretchr/testify/require" configv1 "github.com/openshift/api/config/v1" + "github.com/openshift/api/features" configinformers "github.com/openshift/client-go/config/informers/externalversions" "github.com/openshift/library-go/pkg/controller/factory" "github.com/openshift/library-go/pkg/operator/certrotation" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -111,7 +113,11 @@ func (f *fixture) newController() *CertRotationController { f.infraLister = append(f.infraLister, infra.(*configv1.Infrastructure)) } - c, err := New(f.kubeClient, f.configClient, f.machineClient, f.aroClient, f.k8sI.Core().V1().Secrets(), f.k8sI.Core().V1().Secrets(), f.k8sI.Core().V1().ConfigMaps(), f.infraInformer.Config().V1().Infrastructures()) + fgHandler := ctrlcommon.NewFeatureGatesHardcodedHandler( + []configv1.FeatureGateName{features.FeatureGateNoRegistryClusterInstall}, + nil, + ) + c, err := New(f.kubeClient, f.configClient, f.machineClient, f.aroClient, f.k8sI.Core().V1().Secrets(), f.k8sI.Core().V1().Secrets(), f.k8sI.Core().V1().ConfigMaps(), f.infraInformer.Config().V1().Infrastructures(), fgHandler) require.NoError(f.t, err) c.StartInformers() @@ -349,6 +355,180 @@ func TestMCSCARotation(t *testing.T) { } } +func TestIRICertificateRotation(t *testing.T) { + tests := []struct { + name string + iriSecretAlreadyExists bool + forceRotation bool + }{ + { + // Covers the case where the IRI secret has been manually deleted + // (e.g., accidental "oc delete secret internal-release-image-tls") + // and the controller recreates it. + name: "IRI secret is created when it does not already exist", + iriSecretAlreadyExists: false, + forceRotation: false, + }, + { + name: "IRI secret is updated on CA rotation", + iriSecretAlreadyExists: true, + forceRotation: true, + }, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + t.Parallel() + f := newFixture(t) + maoSecret := getGoodMAOSecret("test-user-data") + f.machineObjects = append(f.machineObjects, getMachineSet("test-machine")) + f.objects = append(f.objects, maoSecret) + f.maoSecretLister = append(f.maoSecretLister, maoSecret) + f.controller = f.newController() + + // Initial sync to create CA and MCS TLS cert + f.runController() + + var originalCertData []byte + if test.iriSecretAlreadyExists { + // Create the IRI cert under the current CA + f.controller.reconcileIRICertificate() + existingSecret, err := f.kubeClient.CoreV1().Secrets(ctrlcommon.MCONamespace).Get(context.TODO(), ctrlcommon.InternalReleaseImageTLSSecretName, metav1.GetOptions{}) + require.NoError(t, err, "IRI TLS secret should exist after initial reconciliation") + originalCertData = existingSecret.Data[corev1.TLSCertKey] + } else { + _, err := f.kubeClient.CoreV1().Secrets(ctrlcommon.MCONamespace).Get(context.TODO(), ctrlcommon.InternalReleaseImageTLSSecretName, metav1.GetOptions{}) + require.True(t, errors.IsNotFound(err), "IRI TLS secret should not exist before reconciliation") + } + + if test.forceRotation { + t.Log("Forcing CA rotation") + secret, err := f.kubeClient.CoreV1().Secrets(ctrlcommon.MCONamespace).Get(context.TODO(), ctrlcommon.MachineConfigServerCAName, metav1.GetOptions{}) + require.NoError(t, err) + newSecret := secret.DeepCopy() + newSecret.Annotations[certrotation.CertificateNotAfterAnnotation] = time.Now().Add(-time.Hour).Format(time.RFC3339) + _, err = f.kubeClient.CoreV1().Secrets(ctrlcommon.MCONamespace).Update(context.TODO(), newSecret, metav1.UpdateOptions{}) + require.NoError(t, err) + f.syncListers(t) + f.runController() + } + + // Reconcile the IRI certificate + f.controller.reconcileIRICertificate() + + // Verify the IRI TLS secret was created/updated + iriSecret, err := f.kubeClient.CoreV1().Secrets(ctrlcommon.MCONamespace).Get(context.TODO(), ctrlcommon.InternalReleaseImageTLSSecretName, metav1.GetOptions{}) + require.NoError(t, err, "IRI TLS secret should exist after reconciliation") + require.Equal(t, corev1.SecretTypeTLS, iriSecret.Type, "IRI secret should be of type TLS") + + // If the IRI secret existed before, verify the cert data actually changed + if test.iriSecretAlreadyExists { + require.NotEqual(t, originalCertData, iriSecret.Data[corev1.TLSCertKey], "IRI certificate data should have changed after CA rotation") + } + + f.verifyIRICertificate(t) + }) + } + + // Verifies idempotency: if the IRI cert is already valid under the + // current CA, reconcileIRICertificate should skip regeneration. + t.Run("IRI secret is not regenerated when already valid", func(t *testing.T) { + t.Parallel() + f := newFixture(t) + maoSecret := getGoodMAOSecret("test-user-data") + f.machineObjects = append(f.machineObjects, getMachineSet("test-machine")) + f.objects = append(f.objects, maoSecret) + f.maoSecretLister = append(f.maoSecretLister, maoSecret) + f.controller = f.newController() + + // Initial sync to create CA and MCS TLS cert + f.runController() + + // First reconciliation creates the IRI cert + f.controller.reconcileIRICertificate() + + // Get the IRI secret after first reconciliation + iriSecret, err := f.kubeClient.CoreV1().Secrets(ctrlcommon.MCONamespace).Get(context.TODO(), ctrlcommon.InternalReleaseImageTLSSecretName, metav1.GetOptions{}) + require.NoError(t, err, "IRI TLS secret should exist after first reconciliation") + originalResourceVersion := iriSecret.ResourceVersion + originalCertData := iriSecret.Data[corev1.TLSCertKey] + + // Second reconciliation should skip regeneration + f.controller.reconcileIRICertificate() + + // Verify the secret was not updated + iriSecret, err = f.kubeClient.CoreV1().Secrets(ctrlcommon.MCONamespace).Get(context.TODO(), ctrlcommon.InternalReleaseImageTLSSecretName, metav1.GetOptions{}) + require.NoError(t, err) + require.Equal(t, originalResourceVersion, iriSecret.ResourceVersion, "IRI secret should not have been updated") + require.Equal(t, originalCertData, iriSecret.Data[corev1.TLSCertKey], "IRI certificate data should not have changed") + + t.Logf("Successfully verified IRI certificate was not regenerated when already valid") + }) +} + +// verifyIRICertificate checks that the IRI TLS certificate is signed by the current +// MCS CA and contains the expected SANs (hostnames from hostnamesRotation + localhost SANs). +func (f *fixture) verifyIRICertificate(t *testing.T) { + t.Helper() + + iriSecret, err := f.kubeClient.CoreV1().Secrets(ctrlcommon.MCONamespace).Get(context.TODO(), ctrlcommon.InternalReleaseImageTLSSecretName, metav1.GetOptions{}) + require.NoError(t, err, "IRI TLS secret should exist") + + iriCertData := iriSecret.Data[corev1.TLSCertKey] + require.NotEmpty(t, iriCertData, "IRI certificate data should not be empty") + + block, _ := pem.Decode(iriCertData) + require.NotNil(t, block, "Should be able to decode IRI PEM certificate") + + iriCert, err := x509.ParseCertificate(block.Bytes) + require.NoError(t, err, "Should be able to parse IRI certificate") + + // Verify the IRI cert is signed by the MCS CA + caSecret, err := f.kubeClient.CoreV1().Secrets(ctrlcommon.MCONamespace).Get(context.TODO(), ctrlcommon.MachineConfigServerCAName, metav1.GetOptions{}) + require.NoError(t, err) + caCertData := caSecret.Data[corev1.TLSCertKey] + require.NotEmpty(t, caCertData, "CA certificate data should not be empty") + + caBlock, _ := pem.Decode(caCertData) + require.NotNil(t, caBlock, "Should be able to decode CA PEM certificate") + caCert, err := x509.ParseCertificate(caBlock.Bytes) + require.NoError(t, err, "Should be able to parse CA certificate") + + err = iriCert.CheckSignatureFrom(caCert) + require.NoError(t, err, "IRI certificate should be signed by the MCS CA") + + // Verify the IRI cert has the correct SANs (hostnames from hostnamesRotation + localhost SANs) + expectedHostnames := f.controller.hostnamesRotation.GetHostnames() + require.NotEmpty(t, expectedHostnames, "Expected hostnames should not be empty") + expectedHostnames = append(expectedHostnames, "localhost", "127.0.0.1", "::1") + + for _, hostname := range expectedHostnames { + ip := net.ParseIP(hostname) + if ip != nil { + found := false + for _, certIP := range iriCert.IPAddresses { + if certIP.Equal(ip) { + found = true + break + } + } + require.True(t, found, "IP %s should be present in IRI certificate SAN IP addresses", hostname) + } else { + found := false + for _, dnsName := range iriCert.DNSNames { + if dnsName == hostname { + found = true + break + } + } + require.True(t, found, "Hostname %s should be present in IRI certificate SAN DNS names", hostname) + } + } + + t.Logf("Successfully verified IRI certificate: signed by MCS CA, correct SANs") +} + // Update the controller's indexers to capture the new secrets and configmaps func (f *fixture) syncListers(t *testing.T) { signingSecret, err := f.kubeClient.CoreV1().Secrets(ctrlcommon.MCONamespace).Get(context.TODO(), ctrlcommon.MachineConfigServerCAName, metav1.GetOptions{})