diff --git a/internal/controller/pgupgrade/jobs.go b/internal/controller/pgupgrade/jobs.go index f3a00bb9c..51f2e0f92 100644 --- a/internal/controller/pgupgrade/jobs.go +++ b/internal/controller/pgupgrade/jobs.go @@ -25,15 +25,6 @@ import ( // Upgrade job -// pgUpgradeJob returns the ObjectMeta for the pg_upgrade Job utilized to -// upgrade from one major PostgreSQL version to another -func pgUpgradeJob(upgrade *v1beta1.PGUpgrade) metav1.ObjectMeta { - return metav1.ObjectMeta{ - Namespace: upgrade.Namespace, - Name: upgrade.Name + "-pgdata", - } -} - // upgradeCommand returns an entrypoint that prepares the filesystem for // and performs a PostgreSQL major version upgrade using pg_upgrade. func upgradeCommand(oldVersion, newVersion int, fetchKeyCommand string, availableCPUs int) []string { @@ -142,7 +133,7 @@ func (r *PGUpgradeReconciler) generateUpgradeJob( job.SetGroupVersionKind(batchv1.SchemeGroupVersion.WithKind("Job")) job.Namespace = upgrade.Namespace - job.Name = pgUpgradeJob(upgrade).Name + job.Name = naming.SafeDNSUniqueName(upgrade.Name + "-pgdata") job.Labels = Merge(upgrade.Spec.Metadata.GetLabelsOrNil(), commonLabels(pgUpgrade, upgrade), //FIXME role pgupgrade @@ -302,7 +293,7 @@ func (r *PGUpgradeReconciler) generateRemoveDataJob( job.SetGroupVersionKind(batchv1.SchemeGroupVersion.WithKind("Job")) job.Namespace = upgrade.Namespace - job.Name = upgrade.Name + "-" + sts.Name + job.Name = naming.SafeDNSUniqueName(upgrade.Name + "-" + sts.Name) job.Labels = labels.Merge(upgrade.Spec.Metadata.GetLabelsOrNil(), commonLabels(removeData, upgrade)) //FIXME role removedata diff --git a/internal/controller/pgupgrade/jobs_test.go b/internal/controller/pgupgrade/jobs_test.go index 27a71c5a9..1f1fef12d 100644 --- a/internal/controller/pgupgrade/jobs_test.go +++ b/internal/controller/pgupgrade/jobs_test.go @@ -7,6 +7,7 @@ package pgupgrade import ( "context" "os" + "regexp" "strings" "testing" @@ -240,6 +241,29 @@ spec: status: {} `)) + t.Run("LongName", func(t *testing.T) { + // Test with an intentionally long name that exceeds 63 characters + longNameUpgrade := &v1beta1.PGUpgrade{} + longNameUpgrade.Name = "very-long-upgrade-name-that-will-exceed-the-maximum-dns-label-limit" + longNameUpgrade.Namespace = "ns1" + longNameUpgrade.Spec.PostgresClusterName = "very-long-cluster-name-that-also-exceeds-limits" + longNameUpgrade.Spec.FromPostgresVersion = 14 + longNameUpgrade.Spec.ToPostgresVersion = 15 + + longJob := reconciler.generateUpgradeJob(ctx, longNameUpgrade, startup, "") + + // Verify the job name fits within DNS limits and has the correct format + assert.Assert(t, len(longJob.Name) <= 63, "job name %q exceeds 63 characters", longJob.Name) + assert.Assert(t, len(longJob.Name) == 63, "truncated job name %q should be exactly 63 characters", longJob.Name) + // Verify the name ends with a dash followed by 4 alphanumeric characters (the deterministic suffix) + // Pattern: -<4 alphanumeric chars> + assert.Assert(t, regexp.MustCompile(`-[a-zA-Z0-9]{4}$`).MatchString(longJob.Name), + "job name %q should end with -<4 alphanumeric chars>", longJob.Name) + // Verify the suffix is deterministic (same input always produces same output) + longJob2 := reconciler.generateUpgradeJob(ctx, longNameUpgrade, startup, "") + assert.Assert(t, longJob.Name == longJob2.Name, "job name should be deterministic: %q vs %q", longJob.Name, longJob2.Name) + }) + t.Run(feature.PGUpgradeCPUConcurrency+"Enabled", func(t *testing.T) { gate := feature.NewGate() assert.NilError(t, gate.SetFromMap(map[string]bool{ diff --git a/internal/controller/pgupgrade/pgupgrade_controller.go b/internal/controller/pgupgrade/pgupgrade_controller.go index 7887c6cf0..1e3760548 100644 --- a/internal/controller/pgupgrade/pgupgrade_controller.go +++ b/internal/controller/pgupgrade/pgupgrade_controller.go @@ -250,7 +250,17 @@ func (r *PGUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // Get the status version and check the jobs to see if this upgrade has completed statusVersion := int64(world.Cluster.Status.PostgresVersion) - upgradeJob := world.Jobs[pgUpgradeJob(upgrade).Name] + + // Find the upgrade job by its role and PGUpgrade name labels + var upgradeJob *batchv1.Job + for _, job := range world.Jobs { + if job.GetLabels()[LabelRole] == pgUpgrade && + job.GetLabels()[LabelPGUpgrade] == upgrade.Name { + upgradeJob = job + break + } + } + upgradeJobComplete := upgradeJob != nil && jobCompleted(upgradeJob) upgradeJobFailed := upgradeJob != nil && @@ -259,7 +269,8 @@ func (r *PGUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( var removeDataJobsFailed bool var removeDataJobsCompleted []*batchv1.Job for _, job := range world.Jobs { - if job.GetLabels()[LabelRole] == removeData { + if job.GetLabels()[LabelRole] == removeData && + job.GetLabels()[LabelPGUpgrade] == upgrade.Name { if jobCompleted(job) { removeDataJobsCompleted = append(removeDataJobsCompleted, job) } else if jobFailed(job) { @@ -502,7 +513,7 @@ func (r *PGUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( func setStatusToProgressingIfReasonWas(reason string, upgrade *v1beta1.PGUpgrade) { progressing := meta.FindStatusCondition(upgrade.Status.Conditions, ConditionPGUpgradeProgressing) - if progressing == nil || (progressing != nil && progressing.Reason == reason) { + if progressing == nil || progressing.Reason == reason { meta.SetStatusCondition(&upgrade.Status.Conditions, metav1.Condition{ ObservedGeneration: upgrade.GetGeneration(), Type: ConditionPGUpgradeProgressing, diff --git a/internal/naming/dns.go b/internal/naming/dns.go index 76d28d435..9b7d14c6d 100644 --- a/internal/naming/dns.go +++ b/internal/naming/dns.go @@ -6,14 +6,62 @@ package naming import ( "context" + "fmt" + "hash/fnv" "net" "strings" "github.com/pkg/errors" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/rand" ) +// maxDNSSafeLength is the maximum length for a Kubernetes DNS-1123 label (63 characters). +// This is the universal limit for resource names that get used in DNS. +// https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-label-names +const maxDNSSafeLength = 63 + +// SafeDNSName truncates a name to fit within the Kubernetes DNS-1123 label limit of 63 characters. +// It also ensures the name doesn't end with a hyphen, which is invalid for DNS labels. +// This should be used for any resource name that may be used in DNS (Pods, Services, Jobs, etc.). +func SafeDNSName(name string) string { + // Strip trailing hyphens which are invalid in DNS labels + name = strings.TrimRight(name, "-") + if len(name) <= maxDNSSafeLength { + return name + } + name = name[:maxDNSSafeLength] + // Strip any trailing hyphens created by truncation + return strings.TrimRight(name, "-") +} + +// SafeDNSUniqueName ensures the name fits within the 63-character DNS-1123 label limit. +// If the name exceeds the limit, it truncates to 58 characters and appends a 4-character +// deterministic suffix based on the input name to maintain consistency across reconciles. +// It also ensures the name doesn't end with a hyphen, which is invalid for DNS labels. +// This is useful for resources that need unique names like Jobs or Pods. +func SafeDNSUniqueName(name string) string { + // Strip trailing hyphens which are invalid in DNS labels + name = strings.TrimRight(name, "-") + if len(name) <= maxDNSSafeLength { + return name + } + + // Reserve 5 characters for the dash + 4 char suffix + prefix := name[:maxDNSSafeLength-5] + // Strip trailing hyphens from the truncated prefix + prefix = strings.TrimRight(prefix, "-") + + // Use a deterministic suffix based on the cleaned name (not random!) + // This ensures the same name always produces the same output across reconciles + hash := fnv.New32a() + hash.Write([]byte(name)) + suffix := rand.SafeEncodeString(fmt.Sprint(hash.Sum32()))[:4] + + return prefix + "-" + suffix +} + // InstancePodDNSNames returns the possible DNS names for instance. The first // name is the fully qualified domain name (FQDN). func InstancePodDNSNames(ctx context.Context, instance *appsv1.StatefulSet, dnsSuffix string) []string { diff --git a/internal/naming/dns_test.go b/internal/naming/dns_test.go index 148046cd4..c0a651199 100644 --- a/internal/naming/dns_test.go +++ b/internal/naming/dns_test.go @@ -52,6 +52,45 @@ func TestInstancePodDNSNames(t *testing.T) { assert.Assert(t, !strings.HasSuffix(names[0], "."), "not expected root, got %q", names[0]) } +func TestSafeDNSName(t *testing.T) { + assert.Equal(t, "hello", SafeDNSName("hello"), "short name should be unchanged") + assert.Equal(t, "hello", SafeDNSName("hello--"), "trailing hyphens should be stripped") + + long := strings.Repeat("a", 63) + assert.Equal(t, long, SafeDNSName(long), "exactly 63 chars should pass through") + + long = strings.Repeat("a", 70) + assert.Equal(t, strings.Repeat("a", 63), SafeDNSName(long), "names over 63 chars should be truncated") + + assert.Equal(t, strings.Repeat("a", 62), + SafeDNSName(strings.Repeat("a", 62)+"--"), + "truncation should remove any trailing hyphen") + + assert.Equal(t, "", SafeDNSName("---"), "all hyphens should become empty") + assert.Equal(t, "", SafeDNSName(""), "empty string should stay empty") +} + +func TestSafeDNSUniqueName(t *testing.T) { + assert.Equal(t, "hello", SafeDNSUniqueName("hello"), "short name should be unchanged") + assert.Equal(t, "hello", SafeDNSUniqueName("hello--"), "trailing hyphens should be stripped") + + long := strings.Repeat("a", 63) + assert.Equal(t, long, SafeDNSUniqueName(long), "exactly 63 chars should pass through") + + long = strings.Repeat("a", 70) + result := SafeDNSUniqueName(long) + assert.Assert(t, len(result) <= 63, "expected <= 63 chars, got %d", len(result)) + assert.Assert(t, !strings.HasSuffix(result, "-"), "unexpected trailing hyphen: %q", result) + + assert.Equal(t, result, SafeDNSUniqueName(long), "same input should produce same output") + + a := SafeDNSUniqueName("something-long-enough-to-trigger-truncation-aaaaaaaa") + b := SafeDNSUniqueName("something-long-enough-to-trigger-truncation-bbbbbbbb") + assert.Assert(t, a != b, "different inputs should produce different results") + + assert.Equal(t, "", SafeDNSUniqueName(""), "empty string should stay empty") +} + func TestServiceDNSNames(t *testing.T) { ctx, cancel := context.WithTimeout(t.Context(), time.Second) defer cancel()