-
Notifications
You must be signed in to change notification settings - Fork 474
AGENT-1443: IRI Add certificate regeneration to MCS cert rotation controller #5721
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -3,17 +3,21 @@ package certrotationcontroller | |||||||||
| import ( | ||||||||||
| "bytes" | ||||||||||
| "context" | ||||||||||
| "crypto/x509" | ||||||||||
| "encoding/json" | ||||||||||
| "encoding/pem" | ||||||||||
| "fmt" | ||||||||||
| "time" | ||||||||||
|
|
||||||||||
| "github.com/vincent-petithory/dataurl" | ||||||||||
|
|
||||||||||
| corev1 "k8s.io/api/core/v1" | ||||||||||
| "k8s.io/apimachinery/pkg/api/errors" | ||||||||||
| metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||||||||
| "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" | ||||||||||
| "k8s.io/apimachinery/pkg/labels" | ||||||||||
| utilruntime "k8s.io/apimachinery/pkg/util/runtime" | ||||||||||
| "k8s.io/apimachinery/pkg/util/sets" | ||||||||||
| "k8s.io/apimachinery/pkg/util/wait" | ||||||||||
| coreinformersv1 "k8s.io/client-go/informers/core/v1" | ||||||||||
| "k8s.io/client-go/kubernetes" | ||||||||||
|
|
@@ -24,10 +28,12 @@ import ( | |||||||||
| "k8s.io/utils/clock" | ||||||||||
|
|
||||||||||
| configv1 "github.com/openshift/api/config/v1" | ||||||||||
| "github.com/openshift/api/features" | ||||||||||
| configclientset "github.com/openshift/client-go/config/clientset/versioned" | ||||||||||
| machineclientset "github.com/openshift/client-go/machine/clientset/versioned" | ||||||||||
|
|
||||||||||
| "github.com/openshift/library-go/pkg/controller/factory" | ||||||||||
| "github.com/openshift/library-go/pkg/crypto" | ||||||||||
| "github.com/openshift/library-go/pkg/operator/certrotation" | ||||||||||
| "github.com/openshift/library-go/pkg/operator/events" | ||||||||||
|
|
||||||||||
|
|
@@ -46,6 +52,7 @@ const ( | |||||||||
| mcsCARefresh = 8 * oneYear | ||||||||||
| mcsTLSKeyExpiry = mcsCAExpiry | ||||||||||
| mcsTLSKeyRefresh = mcsCARefresh | ||||||||||
| iriTLSKeyExpiry = mcsCAExpiry | ||||||||||
| workQueueKey = "key" | ||||||||||
| ) | ||||||||||
|
|
||||||||||
|
|
@@ -70,6 +77,8 @@ type CertRotationController struct { | |||||||||
| recorder events.Recorder | ||||||||||
|
|
||||||||||
| cachesToSync []cache.InformerSynced | ||||||||||
|
|
||||||||||
| featureGatesHandler ctrlcommon.FeatureGatesHandler | ||||||||||
| } | ||||||||||
|
|
||||||||||
| // New returns a new cert rotation controller. | ||||||||||
|
|
@@ -82,6 +91,7 @@ func New( | |||||||||
| mcoSecretInformer coreinformersv1.SecretInformer, | ||||||||||
| mcoConfigMapInfomer coreinformersv1.ConfigMapInformer, | ||||||||||
| infraInformer configinformers.InfrastructureInformer, | ||||||||||
| featureGatesHandler ctrlcommon.FeatureGatesHandler, | ||||||||||
| ) (*CertRotationController, error) { | ||||||||||
|
|
||||||||||
| recorder := events.NewLoggingEventRecorder(componentName, clock.RealClock{}) | ||||||||||
|
|
@@ -106,8 +116,9 @@ func New( | |||||||||
| hostnamesQueue: workqueue.NewTypedRateLimitingQueueWithConfig( | ||||||||||
| workqueue.DefaultTypedControllerRateLimiter[string](), | ||||||||||
| workqueue.TypedRateLimitingQueueConfig[string]{Name: "Hostnames"}), | ||||||||||
| infraInformer: infraInformer, | ||||||||||
| infraLister: infraInformer.Lister(), | ||||||||||
| infraInformer: infraInformer, | ||||||||||
| infraLister: infraInformer.Lister(), | ||||||||||
| featureGatesHandler: featureGatesHandler, | ||||||||||
| } | ||||||||||
|
|
||||||||||
| // The cert controller will begin creating "machine-config-server-ca" configmap & secret in the MCO namespace. | ||||||||||
|
|
@@ -331,11 +342,14 @@ func (c *CertRotationController) addConfigMap(obj interface{}) { | |||||||||
| return | ||||||||||
| } | ||||||||||
|
|
||||||||||
| klog.Infof("configMap %s added, reconciling all user data secrets", configMap.Name) | ||||||||||
| klog.Infof("configMap %s added, reconciling user data secrets and IRI certificate", configMap.Name) | ||||||||||
|
|
||||||||||
| go func() { | ||||||||||
| c.reconcileUserDataSecrets() | ||||||||||
| }() | ||||||||||
| go func() { | ||||||||||
| c.reconcileIRICertificate() | ||||||||||
| }() | ||||||||||
| } | ||||||||||
|
|
||||||||||
| func (c *CertRotationController) updateConfigMap(oldCM, newCM interface{}) { | ||||||||||
|
|
@@ -353,12 +367,15 @@ func (c *CertRotationController) updateConfigMap(oldCM, newCM interface{}) { | |||||||||
| return | ||||||||||
| } | ||||||||||
|
|
||||||||||
| klog.Infof("configMap %s updated, reconciling all user data secrets", oldConfigMap.Name) | ||||||||||
| klog.Infof("configMap %s updated, reconciling user data secrets and IRI certificate", oldConfigMap.Name) | ||||||||||
|
|
||||||||||
| // Reconcile all user data secrets | ||||||||||
| go func() { | ||||||||||
| c.reconcileUserDataSecrets() | ||||||||||
| }() | ||||||||||
| go func() { | ||||||||||
| c.reconcileIRICertificate() | ||||||||||
| }() | ||||||||||
| } | ||||||||||
|
|
||||||||||
| func (c *CertRotationController) addSecret(obj interface{}) { | ||||||||||
|
|
@@ -467,3 +484,139 @@ func (c *CertRotationController) reconcileSecret(secret corev1.Secret) error { | |||||||||
| klog.Infof("Successfully modified %s secret \n", secret.Name) | ||||||||||
| return nil | ||||||||||
| } | ||||||||||
|
|
||||||||||
| func (c *CertRotationController) reconcileIRICertificate() { | ||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this internal method is very long, please evaluate to refactor it in smaller methods |
||||||||||
| if !c.featureGatesHandler.Enabled(features.FeatureGateNoRegistryClusterInstall) { | ||||||||||
| klog.V(4).Infof("Skipping IRI certificate reconciliation: %s feature gate is not enabled", features.FeatureGateNoRegistryClusterInstall) | ||||||||||
| return | ||||||||||
| } | ||||||||||
| klog.Infof("Reconciling IRI certificate") | ||||||||||
|
|
||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is also required to check for the presence of the IRI |
||||||||||
| // Get the MCS CA secret (fresh get, not from lister) | ||||||||||
| caSecret, err := c.kubeClient.CoreV1().Secrets(ctrlcommon.MCONamespace).Get(context.TODO(), ctrlcommon.MachineConfigServerCAName, metav1.GetOptions{}) | ||||||||||
| if err != nil { | ||||||||||
| klog.Errorf("Cannot get MCS CA secret for IRI cert reconciliation: %v", err) | ||||||||||
| return | ||||||||||
| } | ||||||||||
|
|
||||||||||
| caCert := caSecret.Data[corev1.TLSCertKey] | ||||||||||
| caKey := caSecret.Data[corev1.TLSPrivateKeyKey] | ||||||||||
| if len(caCert) == 0 || len(caKey) == 0 { | ||||||||||
| klog.Errorf("MCS CA secret %s is missing cert or key data", ctrlcommon.MachineConfigServerCAName) | ||||||||||
| return | ||||||||||
| } | ||||||||||
|
|
||||||||||
| // Load the CA | ||||||||||
| ca, err := crypto.GetCAFromBytes(caCert, caKey) | ||||||||||
| if err != nil { | ||||||||||
| klog.Errorf("Cannot load MCS CA for IRI cert generation: %v", err) | ||||||||||
| return | ||||||||||
| } | ||||||||||
|
|
||||||||||
| // Check if the existing IRI cert is already valid under the current CA | ||||||||||
| iriSecret, err := c.kubeClient.CoreV1().Secrets(ctrlcommon.MCONamespace).Get(context.TODO(), ctrlcommon.InternalReleaseImageTLSSecretName, metav1.GetOptions{}) | ||||||||||
| secretExists := err == nil | ||||||||||
| if secretExists { | ||||||||||
| if c.isIRICertValid(iriSecret, ca) { | ||||||||||
| klog.Infof("IRI TLS certificate is still valid under the current MCS CA, skipping rotation") | ||||||||||
| return | ||||||||||
| } | ||||||||||
| } else if !errors.IsNotFound(err) { | ||||||||||
| klog.Errorf("Cannot get IRI TLS secret: %v", err) | ||||||||||
| return | ||||||||||
| } | ||||||||||
|
|
||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like this whole block could be simplified with the happy path, and Later you could simply check for iriSecret != nil |
||||||||||
| // Get hostnames from the dynamic serving rotation (includes api-int hostname and platform VIPs) | ||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are the hostnames from the infra object required for the IRI cert? I was under the impression it wasn't. It's possible the infra hostnames can get updated outside of the configmap being updated(very unlikely, but plauisble), that could result in the hostnames in the IRI cert would be stale. The MCS TLS cert handles this as it feeds of the dynamic serving rotation so I thought it would be worth mentioning: machine-config-operator/pkg/controller/certrotation/certrotation_controller.go Lines 155 to 158 in caa75db
If we do want infra hostnames changes to be accounted for, we can call
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. During the installation, when the initial IRI TLS certificate is generated (see here), we don't use hostnames, just |
||||||||||
| hostnames := c.hostnamesRotation.GetHostnames() | ||||||||||
| if len(hostnames) == 0 { | ||||||||||
| klog.Errorf("No hostnames available for IRI cert generation") | ||||||||||
| return | ||||||||||
| } | ||||||||||
| // IRI registry also serves locally on each master node, matching the installer's SANs | ||||||||||
| hostnames = append(hostnames, "localhost", "127.0.0.1", "::1") | ||||||||||
|
|
||||||||||
| // Generate a new IRI TLS certificate signed by the MCS CA | ||||||||||
| certConfig, err := ca.MakeServerCert(sets.New(hostnames...), iriTLSKeyExpiry) | ||||||||||
| if err != nil { | ||||||||||
| klog.Errorf("Cannot generate IRI TLS certificate: %v", err) | ||||||||||
| return | ||||||||||
| } | ||||||||||
|
|
||||||||||
| certPEM, keyPEM, err := certConfig.GetPEMBytes() | ||||||||||
| if err != nil { | ||||||||||
| klog.Errorf("Cannot get PEM bytes for IRI TLS certificate: %v", err) | ||||||||||
| return | ||||||||||
| } | ||||||||||
|
|
||||||||||
| if !secretExists { | ||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need to take into account this case? It doesn't seem a valid one (at least here): if the secret does not exist for any reason, the IRI controller will be broken and won't work as well |
||||||||||
| // Create new secret | ||||||||||
| newSecret := &corev1.Secret{ | ||||||||||
| ObjectMeta: metav1.ObjectMeta{ | ||||||||||
| Name: ctrlcommon.InternalReleaseImageTLSSecretName, | ||||||||||
| Namespace: ctrlcommon.MCONamespace, | ||||||||||
| }, | ||||||||||
| Type: corev1.SecretTypeTLS, | ||||||||||
| Data: map[string][]byte{ | ||||||||||
| corev1.TLSCertKey: certPEM, | ||||||||||
| corev1.TLSPrivateKeyKey: keyPEM, | ||||||||||
| }, | ||||||||||
| } | ||||||||||
| if _, err := c.kubeClient.CoreV1().Secrets(ctrlcommon.MCONamespace).Create(context.TODO(), newSecret, metav1.CreateOptions{}); err != nil { | ||||||||||
| klog.Errorf("Cannot create IRI TLS secret: %v", err) | ||||||||||
| return | ||||||||||
| } | ||||||||||
| klog.Infof("Successfully created IRI TLS secret %s", ctrlcommon.InternalReleaseImageTLSSecretName) | ||||||||||
| return | ||||||||||
| } | ||||||||||
|
|
||||||||||
| // Update existing secret | ||||||||||
| updatedSecret := iriSecret.DeepCopy() | ||||||||||
| updatedSecret.Data[corev1.TLSCertKey] = certPEM | ||||||||||
| updatedSecret.Data[corev1.TLSPrivateKeyKey] = keyPEM | ||||||||||
| if _, err := c.kubeClient.CoreV1().Secrets(ctrlcommon.MCONamespace).Update(context.TODO(), updatedSecret, metav1.UpdateOptions{}); err != nil { | ||||||||||
| klog.Errorf("Cannot update IRI TLS secret: %v", err) | ||||||||||
| return | ||||||||||
| } | ||||||||||
| klog.Infof("Successfully updated IRI TLS secret %s", ctrlcommon.InternalReleaseImageTLSSecretName) | ||||||||||
| } | ||||||||||
|
|
||||||||||
| // isIRICertValid checks whether the existing IRI certificate is signed by the given CA | ||||||||||
| // and has not expired. | ||||||||||
| func (c *CertRotationController) isIRICertValid(iriSecret *corev1.Secret, ca *crypto.CA) bool { | ||||||||||
| certPEM := iriSecret.Data[corev1.TLSCertKey] | ||||||||||
| if len(certPEM) == 0 { | ||||||||||
| return false | ||||||||||
| } | ||||||||||
|
|
||||||||||
| // Decode the first PEM block (the leaf certificate) | ||||||||||
| block, _ := pem.Decode(certPEM) | ||||||||||
| if block == nil { | ||||||||||
| klog.Warningf("Cannot decode PEM from existing IRI TLS certificate") | ||||||||||
| return false | ||||||||||
| } | ||||||||||
|
|
||||||||||
| iriCert, err := x509.ParseCertificate(block.Bytes) | ||||||||||
| if err != nil { | ||||||||||
| klog.Warningf("Cannot parse existing IRI TLS certificate: %v", err) | ||||||||||
| return false | ||||||||||
| } | ||||||||||
|
|
||||||||||
| // Build a cert pool with the current CA to verify against | ||||||||||
| caPool := x509.NewCertPool() | ||||||||||
| for _, caCert := range ca.Config.Certs { | ||||||||||
| caPool.AddCert(caCert) | ||||||||||
| } | ||||||||||
|
|
||||||||||
| // Verify the IRI cert is signed by the current CA and is not expired | ||||||||||
| _, err = iriCert.Verify(x509.VerifyOptions{ | ||||||||||
| Roots: caPool, | ||||||||||
| CurrentTime: time.Now(), | ||||||||||
| KeyUsages: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, | ||||||||||
| }) | ||||||||||
| if err != nil { | ||||||||||
| klog.Infof("Existing IRI TLS certificate is not valid under current MCS CA: %v", err) | ||||||||||
| return false | ||||||||||
| } | ||||||||||
|
|
||||||||||
| return true | ||||||||||
| } | ||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I forgot about these being individual go routines in my original implementation. There is a very small chance of a race here on create or updates. Given that the CA rotates once every 8 years or so, the risk should be minimal. Even if there is a race, the fresh GETs on the CA should result in valid creates/updates.
If we want to be careful we can combine these into one thread and set up a mutex, but IMO that's not a blocker for merging this. The safest way to do this would be via a workqueue like our other controllers, but again definitely not a blocker, just me lamenting my original decisions 😄
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about serializing them? Ie: