From d3ec2fdce2df526f45954fbbb1fe143f994d4ffb Mon Sep 17 00:00:00 2001 From: yoav-katz Date: Fri, 19 Jun 2026 00:21:01 +0300 Subject: [PATCH 01/18] feat(etcd) --- cmd/postgres-operator/main.go | 9 + docs/etcd-dcs.md | 132 ++++++++ internal/patroni/config.go | 170 ++++++---- internal/patroni/config_test.go | 293 ++++++++++++++++++ internal/patroni/reconcile.go | 52 ++++ percona/controller/pgcluster/controller.go | 32 ++ percona/controller/pgcluster/patroni_etcd.go | 69 +++++ .../controller/pgcluster/patroni_etcd_test.go | 214 +++++++++++++ .../v2/perconapgcluster_types.go | 42 +++ .../v2/perconapgcluster_types_test.go | 82 +++++ .../v1beta1/patroni_types.go | 65 +++- .../v1beta1/zz_generated.deepcopy.go | 46 +++ 12 files changed, 1143 insertions(+), 63 deletions(-) create mode 100644 docs/etcd-dcs.md create mode 100644 percona/controller/pgcluster/patroni_etcd.go create mode 100644 percona/controller/pgcluster/patroni_etcd_test.go diff --git a/cmd/postgres-operator/main.go b/cmd/postgres-operator/main.go index f61123c4b..33179b680 100644 --- a/cmd/postgres-operator/main.go +++ b/cmd/postgres-operator/main.go @@ -185,6 +185,15 @@ func addControllersToManager(ctx context.Context, mgr manager.Manager) error { return err } + if err := mgr.GetFieldIndexer().IndexField( + context.Background(), + &v2.PerconaPGCluster{}, + v2.IndexFieldPatroniEtcdSecrets, + v2.PatroniEtcdSecretsIndexerFunc, + ); err != nil { + return err + } + externalEvents := make(chan event.GenericEvent) stopChan := make(chan event.DeleteEvent) diff --git a/docs/etcd-dcs.md b/docs/etcd-dcs.md new file mode 100644 index 000000000..6f2189ed6 --- /dev/null +++ b/docs/etcd-dcs.md @@ -0,0 +1,132 @@ +# External etcd DCS for Patroni + +By default, the operator uses Kubernetes Endpoints as the Patroni distributed configuration store (DCS). Clusters on managed Kubernetes platforms where the control plane API is not accessible from within the cluster, or clusters requiring an HA DCS independent of the Kubernetes API, can use an external etcd cluster instead. + +## Configuration + +Set `spec.patroni.dcs.type: etcd` and provide at least one endpoint: + +```yaml +spec: + patroni: + dcs: + type: etcd + etcd: + endpoints: + - https://etcd.etcd-cluster.svc:2379 +``` + +### Field reference + +| Field | Type | Required | Description | +|---|---|---|---| +| `dcs.type` | `kubernetes` \| `etcd` | No (default: `kubernetes`) | DCS backend. **Immutable after cluster creation.** | +| `dcs.etcd.endpoints` | `[]string` | Yes (when type is etcd) | etcd endpoint URLs including scheme and port. All endpoints must use the same scheme (`http://` or `https://`). | +| `dcs.etcd.tlsSecret` | `string` | No | Name of a Secret in the same namespace containing TLS client credentials. | +| `dcs.etcd.authSecret` | `string` | No | Name of a Secret in the same namespace containing etcd username/password. | + +## Secrets + +### TLS secret (`tlsSecret`) + +Required when the etcd cluster uses TLS. The Secret must contain exactly these keys: + +``` +ca.crt — PEM-encoded CA certificate that signed the etcd server certificate +tls.crt — PEM-encoded client certificate (for mutual TLS) +tls.key — PEM-encoded private key for the client certificate +``` + +Example: + +```bash +kubectl create secret generic etcd-tls \ + --from-file=ca.crt=ca.pem \ + --from-file=tls.crt=client.pem \ + --from-file=tls.key=client-key.pem +``` + +Reference it in the CR: + +```yaml +spec: + patroni: + dcs: + type: etcd + etcd: + endpoints: + - https://etcd.etcd-cluster.svc:2379 + tlsSecret: etcd-tls +``` + +The secret is mounted read-only at `/etc/patroni/etcd-tls/` inside each PostgreSQL pod. + +### Auth secret (`authSecret`) + +Required when the etcd cluster uses username/password authentication. The Secret must contain exactly these keys: + +``` +username — etcd username +password — etcd password +``` + +Example: + +```bash +kubectl create secret generic etcd-auth \ + --from-literal=username=patroni \ + --from-literal=password=supersecret +``` + +Reference it in the CR: + +```yaml +spec: + patroni: + dcs: + type: etcd + etcd: + endpoints: + - https://etcd.etcd-cluster.svc:2379 + authSecret: etcd-auth +``` + +Credentials are injected as `PATRONI_ETCD3_USERNAME` and `PATRONI_ETCD3_PASSWORD` environment variables in each PostgreSQL pod. They are **not** written to any ConfigMap. + +## Secret rotation + +**Rotating TLS certificates or auth credentials requires a rolling restart of all PostgreSQL pods.** Patroni loads DCS credentials and TLS mounts only at startup. It does not watch or dynamically reload environment variables or volume mounts after the process starts. + +To rotate credentials: + +1. Update the Secret with new values: + ```bash + kubectl create secret generic etcd-auth \ + --from-literal=username=patroni \ + --from-literal=password=newsecret \ + --dry-run=client -o yaml | kubectl apply -f - + ``` + +2. Trigger a rolling restart of each instance StatefulSet: + ```bash + kubectl rollout restart statefulset/-instance1 + ``` + + Repeat for each instance set in the cluster. + +## Immutability + +`spec.patroni.dcs.type` is immutable after cluster creation. The admission webhook rejects updates that attempt to change this field. Switching DCS backends requires deleting and recreating the cluster. + +## Validation + +The operator validates the etcd configuration at both admission time and during each reconcile: + +- **Admission**: CEL rules reject empty endpoints when `type: etcd`, mixed `http://`/`https://` schemes in the same endpoint list, endpoints that do not match `^https?://`, and changes to `dcs.type` after creation. +- **Reconcile**: If `tlsSecret` or `authSecret` is set but the referenced Secret does not exist or is missing a required key, the operator emits a `Warning` event on the `PerconaPGCluster` object and requeues the reconcile. No PostgreSQL pods are affected until the secret is corrected. + +Inspect events with: + +```bash +kubectl describe perconapgcluster +``` diff --git a/internal/patroni/config.go b/internal/patroni/config.go index 64752d08d..f642ba042 100644 --- a/internal/patroni/config.go +++ b/internal/patroni/config.go @@ -7,6 +7,7 @@ package patroni import ( "fmt" "maps" + "net/url" "path" "strings" @@ -36,6 +37,33 @@ const ( "# If you want to override the config, annotate this ConfigMap with " + naming.OverrideConfigAnnotation + "=true\n" ) +// etcdHosts strips the URL scheme from each endpoint and returns bare host:port strings. +func etcdHosts(endpoints []string) []string { + hosts := make([]string, 0, len(endpoints)) + for _, ep := range endpoints { + u, err := url.Parse(ep) + if err != nil || u.Host == "" { + hosts = append(hosts, ep) + continue + } + hosts = append(hosts, u.Host) + } + return hosts +} + +// etcdProtocol returns the URL scheme of the first endpoint ("http" or "https"). +// Defaults to "http" when the scheme cannot be determined. +func etcdProtocol(endpoints []string) string { + if len(endpoints) == 0 { + return "http" + } + u, err := url.Parse(endpoints[0]) + if err != nil || (u.Scheme != "http" && u.Scheme != "https") { + return "http" + } + return u.Scheme +} + // quoteShellWord ensures that s is interpreted by a shell as single word. func quoteShellWord(s string) string { // https://www.gnu.org/software/bash/manual/html_node/Quoting.html @@ -48,33 +76,11 @@ func clusterYAML( pgHBAs postgres.HBAs, pgParameters postgres.Parameters, ) (string, error) { - labels := map[string]string{naming.LabelCluster: cluster.Name} - if cluster.CompareVersion("2.9.0") >= 0 { - labels = naming.Merge(cluster.Spec.Metadata.GetLabelsOrNil(), labels) - } - root := map[string]any{ // The cluster identifier. This value cannot change during the cluster's // lifetime. "scope": naming.PatroniScope(cluster), - // Use Kubernetes Endpoints for the distributed configuration store (DCS). - // These values cannot change during the cluster's lifetime. - // - // NOTE(cbandy): It *might* be possible to *carefully* change the role and - // scope labels, but there is no way to reconfigure all instances at once. - "kubernetes": map[string]any{ - "namespace": cluster.Namespace, - "role_label": naming.LabelRole, - "scope_label": naming.LabelPatroni, - "use_endpoints": true, - - // In addition to "scope_label" above, Patroni will add the following to - // every object it creates. It will also use these as filters when doing - // any lookups. - "labels": labels, - }, - "postgresql": map[string]any{ // TODO(cbandy): "callbacks" @@ -158,6 +164,41 @@ func clusterYAML( }, } + // DCS configuration — either external etcd or the Kubernetes-native Endpoints backend. + // These values cannot change during the cluster's lifetime. + if dcs := cluster.Spec.Patroni.GetDCS(); dcs != nil && dcs.Type == v1beta1.PatroniDCSTypeEtcd && dcs.Etcd != nil { + etcd3 := map[string]any{ + "hosts": etcdHosts(dcs.Etcd.Endpoints), + "protocol": etcdProtocol(dcs.Etcd.Endpoints), + } + if dcs.Etcd.TLSSecret != "" { + etcd3["cacert"] = path.Join(configDirectory, "etcd-tls", "ca.crt") + etcd3["cert"] = path.Join(configDirectory, "etcd-tls", "tls.crt") + etcd3["key"] = path.Join(configDirectory, "etcd-tls", "tls.key") + } + root["etcd3"] = etcd3 + } else { + // Use Kubernetes Endpoints for the distributed configuration store (DCS). + // + // NOTE(cbandy): It *might* be possible to *carefully* change the role and + // scope labels, but there is no way to reconfigure all instances at once. + labels := map[string]string{naming.LabelCluster: cluster.Name} + if cluster.CompareVersion("2.9.0") >= 0 { + labels = naming.Merge(cluster.Spec.Metadata.GetLabelsOrNil(), labels) + } + root["kubernetes"] = map[string]any{ + "namespace": cluster.Namespace, + "role_label": naming.LabelRole, + "scope_label": naming.LabelPatroni, + "use_endpoints": true, + + // In addition to "scope_label" above, Patroni will add the following to + // every object it creates. It will also use these as filters when doing + // any lookups. + "labels": labels, + } + } + if !ClusterBootstrapped(cluster) { // Patroni has not yet bootstrapped. Populate the "bootstrap.dcs" field to // facilitate it. When Patroni is already bootstrapped, this field is ignored. @@ -371,7 +412,7 @@ func instanceEnvironment( // - https://github.com/zalando/patroni/blob/v2.0.2/patroni/postgresql/postmaster.py#L215-L216 variables := []corev1.EnvVar{ - // Set "name" to the v1.Pod's name. Required when using Kubernetes for DCS. + // Set "name" to the v1.Pod's name. Required by Patroni for node identity. // Patroni must be restarted when changing this value. { Name: "PATRONI_NAME", @@ -380,31 +421,38 @@ func instanceEnvironment( FieldPath: "metadata.name", }}, }, + } - // Set "kubernetes.pod_ip" to the v1.Pod's primary IP address. - // Patroni must be restarted when changing this value. - { - Name: "PATRONI_KUBERNETES_POD_IP", - ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{ - APIVersion: "v1", - FieldPath: "status.podIP", - }}, - }, + // The PATRONI_KUBERNETES_* variables are only needed for the Kubernetes DCS backend. + if dcs := cluster.Spec.Patroni.GetDCS(); dcs == nil || dcs.Type != v1beta1.PatroniDCSTypeEtcd { + variables = append(variables, + // Set "kubernetes.pod_ip" to the v1.Pod's primary IP address. + // Patroni must be restarted when changing this value. + corev1.EnvVar{ + Name: "PATRONI_KUBERNETES_POD_IP", + ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{ + APIVersion: "v1", + FieldPath: "status.podIP", + }}, + }, - // When using Endpoints for DCS, Patroni needs to replicate the leader - // ServicePort definitions. Set "kubernetes.ports" to the YAML of this - // Pod's equivalent EndpointPort definitions. - // - // This is connascent with PATRONI_POSTGRESQL_CONNECT_ADDRESS below. - // Patroni must be restarted when changing this value. - { - Name: "PATRONI_KUBERNETES_PORTS", - Value: string(portsYAML), - }, + // When using Endpoints for DCS, Patroni needs to replicate the leader + // ServicePort definitions. Set "kubernetes.ports" to the YAML of this + // Pod's equivalent EndpointPort definitions. + // + // This is connascent with PATRONI_POSTGRESQL_CONNECT_ADDRESS below. + // Patroni must be restarted when changing this value. + corev1.EnvVar{ + Name: "PATRONI_KUBERNETES_PORTS", + Value: string(portsYAML), + }, + ) + } + variables = append(variables, // Set "postgresql.connect_address" using the Pod's stable DNS name. // PostgreSQL must be restarted when changing this value. - { + corev1.EnvVar{ Name: "PATRONI_POSTGRESQL_CONNECT_ADDRESS", Value: fmt.Sprintf("%s.%s:%d", "$(PATRONI_NAME)", podSubdomain, postgresPort), }, @@ -414,28 +462,28 @@ func instanceEnvironment( // // This is connascent with PATRONI_POSTGRESQL_CONNECT_ADDRESS above. // PostgreSQL must be restarted when changing this value. - { + corev1.EnvVar{ Name: "PATRONI_POSTGRESQL_LISTEN", Value: fmt.Sprintf("*:%d", postgresPort), }, // Set "postgresql.config_dir" to PostgreSQL's $PGDATA directory. // Patroni must be restarted when changing this value. - { + corev1.EnvVar{ Name: "PATRONI_POSTGRESQL_CONFIG_DIR", Value: postgres.ConfigDirectory(cluster), }, // Set "postgresql.data_dir" to PostgreSQL's "data_directory". // Patroni must be restarted when changing this value. - { + corev1.EnvVar{ Name: "PATRONI_POSTGRESQL_DATA_DIR", Value: postgres.DataDirectory(cluster), }, // Set "restapi.connect_address" using the Pod's stable DNS name. // Patroni must be reloaded when changing this value. - { + corev1.EnvVar{ Name: "PATRONI_RESTAPI_CONNECT_ADDRESS", Value: fmt.Sprintf("%s.%s:%d", "$(PATRONI_NAME)", podSubdomain, patroniPort), }, @@ -443,17 +491,17 @@ func instanceEnvironment( // Set "restapi.listen" using the special address "*" to mean all TCP interfaces. // This is connascent with PATRONI_RESTAPI_CONNECT_ADDRESS above. // Patroni must be reloaded when changing this value. - { + corev1.EnvVar{ Name: "PATRONI_RESTAPI_LISTEN", Value: fmt.Sprintf("*:%d", patroniPort), }, // The Patroni client `patronictl` looks here for its configuration file(s). - { + corev1.EnvVar{ Name: "PATRONICTL_CONFIG_FILE", Value: configDirectory, }, - } + ) return variables } @@ -497,15 +545,6 @@ func instanceYAML( // created. That value should be injected using the downward API and the // PATRONI_NAME environment variable. - "kubernetes": map[string]any{ - // Missing here is "pod_ip" which cannot be known until the instance Pod is - // created. That value should be injected using the downward API and the - // PATRONI_KUBERNETES_POD_IP environment variable. - - // Missing here is "ports" which is is connascent with "postgresql.connect_address". - // See the PATRONI_KUBERNETES_PORTS env variable. - }, - "restapi": map[string]any{ // Missing here is "connect_address" which cannot be known until the // instance Pod is created. That value should be injected using the downward @@ -521,6 +560,19 @@ func instanceYAML( }, } + // For the Kubernetes DCS backend, include an empty kubernetes section that + // receives pod_ip and ports from the PATRONI_KUBERNETES_* environment variables. + if dcs := cluster.Spec.Patroni.GetDCS(); dcs == nil || dcs.Type != v1beta1.PatroniDCSTypeEtcd { + root["kubernetes"] = map[string]any{ + // Missing here is "pod_ip" which cannot be known until the instance Pod is + // created. That value should be injected using the downward API and the + // PATRONI_KUBERNETES_POD_IP environment variable. + + // Missing here is "ports" which is connascent with "postgresql.connect_address". + // See the PATRONI_KUBERNETES_PORTS env variable. + } + } + postgresql := map[string]any{ // TODO(cbandy): "bin_dir" diff --git a/internal/patroni/config_test.go b/internal/patroni/config_test.go index 8598ca58f..e6cfb532a 100644 --- a/internal/patroni/config_test.go +++ b/internal/patroni/config_test.go @@ -252,6 +252,227 @@ watchdog: mode: "off" `)+"\n") }) + + t.Run("etcd DCS - minimal", func(t *testing.T) { + cluster := new(v1beta1.PostgresCluster) + assert.NilError(t, cluster.Default(context.Background(), nil)) + cluster.Namespace = "some-namespace" + cluster.Name = "cluster-name" + cluster.Spec.Patroni = &v1beta1.PatroniSpec{ + DCS: &v1beta1.PatroniDCS{ + Type: v1beta1.PatroniDCSTypeEtcd, + Etcd: &v1beta1.PatroniEtcdSpec{ + Endpoints: []string{"https://etcd.etcd-cluster.svc:2379"}, + }, + }, + } + cluster.Spec.Patroni.Default() + + data, err := clusterYAML(cluster, postgres.HBAs{}, postgres.Parameters{}) + assert.NilError(t, err) + + var parsed map[string]any + assert.NilError(t, yaml.Unmarshal([]byte(data), &parsed)) + + _, hasKubernetes := parsed["kubernetes"] + assert.Assert(t, !hasKubernetes, "expected no kubernetes section for etcd DCS") + + etcd3, ok := parsed["etcd3"].(map[string]any) + assert.Assert(t, ok, "expected etcd3 section") + assert.Equal(t, etcd3["protocol"], "https") + + hosts, ok := etcd3["hosts"].([]any) + assert.Assert(t, ok, "expected etcd3.hosts to be a list") + assert.Equal(t, len(hosts), 1) + assert.Equal(t, hosts[0], "etcd.etcd-cluster.svc:2379") + + _, hasCacert := etcd3["cacert"] + assert.Assert(t, !hasCacert, "expected no cacert without tlsSecret") + _, hasUsername := etcd3["username"] + assert.Assert(t, !hasUsername, "expected no username in etcd3 section") + _, hasPassword := etcd3["password"] + assert.Assert(t, !hasPassword, "expected no password in etcd3 section") + }) + + t.Run("etcd DCS - http protocol", func(t *testing.T) { + cluster := new(v1beta1.PostgresCluster) + assert.NilError(t, cluster.Default(context.Background(), nil)) + cluster.Namespace = "some-namespace" + cluster.Name = "cluster-name" + cluster.Spec.Patroni = &v1beta1.PatroniSpec{ + DCS: &v1beta1.PatroniDCS{ + Type: v1beta1.PatroniDCSTypeEtcd, + Etcd: &v1beta1.PatroniEtcdSpec{ + Endpoints: []string{"http://etcd:2379"}, + }, + }, + } + cluster.Spec.Patroni.Default() + + data, err := clusterYAML(cluster, postgres.HBAs{}, postgres.Parameters{}) + assert.NilError(t, err) + + var parsed map[string]any + assert.NilError(t, yaml.Unmarshal([]byte(data), &parsed)) + + etcd3, ok := parsed["etcd3"].(map[string]any) + assert.Assert(t, ok, "expected etcd3 section") + assert.Equal(t, etcd3["protocol"], "http") + + hosts, ok := etcd3["hosts"].([]any) + assert.Assert(t, ok, "expected etcd3.hosts to be a list") + assert.Equal(t, hosts[0], "etcd:2379") + }) + + t.Run("etcd DCS - with TLS", func(t *testing.T) { + cluster := new(v1beta1.PostgresCluster) + assert.NilError(t, cluster.Default(context.Background(), nil)) + cluster.Namespace = "some-namespace" + cluster.Name = "cluster-name" + cluster.Spec.Patroni = &v1beta1.PatroniSpec{ + DCS: &v1beta1.PatroniDCS{ + Type: v1beta1.PatroniDCSTypeEtcd, + Etcd: &v1beta1.PatroniEtcdSpec{ + Endpoints: []string{"https://etcd:2379"}, + TLSSecret: "etcd-tls-secret", + }, + }, + } + cluster.Spec.Patroni.Default() + + data, err := clusterYAML(cluster, postgres.HBAs{}, postgres.Parameters{}) + assert.NilError(t, err) + + var parsed map[string]any + assert.NilError(t, yaml.Unmarshal([]byte(data), &parsed)) + + etcd3, ok := parsed["etcd3"].(map[string]any) + assert.Assert(t, ok, "expected etcd3 section") + assert.Equal(t, etcd3["cacert"], "/etc/patroni/etcd-tls/ca.crt") + assert.Equal(t, etcd3["cert"], "/etc/patroni/etcd-tls/tls.crt") + assert.Equal(t, etcd3["key"], "/etc/patroni/etcd-tls/tls.key") + }) + + t.Run("etcd DCS - multiple endpoints", func(t *testing.T) { + cluster := new(v1beta1.PostgresCluster) + assert.NilError(t, cluster.Default(context.Background(), nil)) + cluster.Namespace = "some-namespace" + cluster.Name = "cluster-name" + cluster.Spec.Patroni = &v1beta1.PatroniSpec{ + DCS: &v1beta1.PatroniDCS{ + Type: v1beta1.PatroniDCSTypeEtcd, + Etcd: &v1beta1.PatroniEtcdSpec{ + Endpoints: []string{ + "https://etcd0:2379", + "https://etcd1:2379", + "https://etcd2:2379", + }, + }, + }, + } + cluster.Spec.Patroni.Default() + + data, err := clusterYAML(cluster, postgres.HBAs{}, postgres.Parameters{}) + assert.NilError(t, err) + + var parsed map[string]any + assert.NilError(t, yaml.Unmarshal([]byte(data), &parsed)) + + etcd3, ok := parsed["etcd3"].(map[string]any) + assert.Assert(t, ok, "expected etcd3 section") + + hosts, ok := etcd3["hosts"].([]any) + assert.Assert(t, ok, "expected etcd3.hosts to be a list") + assert.Equal(t, len(hosts), 3) + assert.Equal(t, hosts[0], "etcd0:2379") + assert.Equal(t, hosts[1], "etcd1:2379") + assert.Equal(t, hosts[2], "etcd2:2379") + }) +} + +func TestEtcdHosts(t *testing.T) { + t.Parallel() + + for _, tt := range []struct { + name string + endpoints []string + want []string + }{ + { + name: "strips https scheme", + endpoints: []string{"https://etcd.cluster.svc:2379"}, + want: []string{"etcd.cluster.svc:2379"}, + }, + { + name: "strips http scheme", + endpoints: []string{"http://etcd:2379"}, + want: []string{"etcd:2379"}, + }, + { + name: "multiple endpoints", + endpoints: []string{"https://etcd0:2379", "https://etcd1:2379"}, + want: []string{"etcd0:2379", "etcd1:2379"}, + }, + { + name: "bare host passthrough", + endpoints: []string{"etcd:2379"}, + want: []string{"etcd:2379"}, + }, + { + name: "empty list", + endpoints: []string{}, + want: []string{}, + }, + } { + t.Run(tt.name, func(t *testing.T) { + got := etcdHosts(tt.endpoints) + assert.Equal(t, len(got), len(tt.want)) + for i := range tt.want { + assert.Equal(t, got[i], tt.want[i]) + } + }) + } +} + +func TestEtcdProtocol(t *testing.T) { + t.Parallel() + + for _, tt := range []struct { + name string + endpoints []string + want string + }{ + { + name: "https from first endpoint", + endpoints: []string{"https://etcd:2379"}, + want: "https", + }, + { + name: "http from first endpoint", + endpoints: []string{"http://etcd:2379"}, + want: "http", + }, + { + name: "first endpoint determines protocol", + endpoints: []string{"https://etcd0:2379", "http://etcd1:2379"}, + want: "https", + }, + { + name: "bare host defaults to http", + endpoints: []string{"etcd:2379"}, + want: "http", + }, + { + name: "empty list defaults to http", + endpoints: []string{}, + want: "http", + }, + } { + t.Run(tt.name, func(t *testing.T) { + got := etcdProtocol(tt.endpoints) + assert.Equal(t, got, tt.want) + }) + } } func TestDynamicConfiguration(t *testing.T) { @@ -1123,6 +1344,47 @@ func TestInstanceEnvironment(t *testing.T) { value: /etc/patroni `)) }) + + t.Run("etcd DCS - no PATRONI_KUBERNETES_* vars", func(t *testing.T) { + clusterEtcd := new(v1beta1.PostgresCluster) + assert.NilError(t, clusterEtcd.Default(context.Background(), nil)) + clusterEtcd.Spec.PostgresVersion = 12 + clusterEtcd.Spec.Patroni = &v1beta1.PatroniSpec{ + DCS: &v1beta1.PatroniDCS{ + Type: v1beta1.PatroniDCSTypeEtcd, + Etcd: &v1beta1.PatroniEtcdSpec{ + Endpoints: []string{"https://etcd:2379"}, + }, + }, + } + clusterEtcd.Spec.Patroni.Default() + + etcdVars := instanceEnvironment(clusterEtcd, podService, leaderService, nil) + + for _, ev := range etcdVars { + assert.Assert(t, ev.Name != "PATRONI_KUBERNETES_POD_IP", + "expected no PATRONI_KUBERNETES_POD_IP for etcd DCS") + assert.Assert(t, ev.Name != "PATRONI_KUBERNETES_PORTS", + "expected no PATRONI_KUBERNETES_PORTS for etcd DCS") + } + + var names []string + for _, ev := range etcdVars { + names = append(names, ev.Name) + } + assert.Assert(t, contains(names, "PATRONI_NAME"), "expected PATRONI_NAME") + assert.Assert(t, contains(names, "PATRONI_POSTGRESQL_CONNECT_ADDRESS"), "expected PATRONI_POSTGRESQL_CONNECT_ADDRESS") + assert.Assert(t, contains(names, "PATRONICTL_CONFIG_FILE"), "expected PATRONICTL_CONFIG_FILE") + }) +} + +func contains(slice []string, s string) bool { + for _, v := range slice { + if v == s { + return true + } + } + return false } func TestInstanceYAML(t *testing.T) { @@ -1250,6 +1512,37 @@ postgresql: restapi: {} tags: {} `, "\t\n")+"\n") + + t.Run("etcd DCS - no kubernetes section", func(t *testing.T) { + clusterEtcd := &v1beta1.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + pNaming.ToCrunchyAnnotation(pNaming.AnnotationPatroniVersion): "4.0.1", + }, + }, + Spec: v1beta1.PostgresClusterSpec{ + PostgresVersion: 12, + Patroni: &v1beta1.PatroniSpec{ + DCS: &v1beta1.PatroniDCS{ + Type: v1beta1.PatroniDCSTypeEtcd, + Etcd: &v1beta1.PatroniEtcdSpec{ + Endpoints: []string{"https://etcd:2379"}, + }, + }, + }, + }, + } + etcdInstance := new(v1beta1.PostgresInstanceSetSpec) + + etcdData, err := instanceYAML(clusterEtcd, etcdInstance, nil) + assert.NilError(t, err) + + var parsed map[string]any + assert.NilError(t, yaml.Unmarshal([]byte(etcdData), &parsed)) + + _, hasKubernetes := parsed["kubernetes"] + assert.Assert(t, !hasKubernetes, "expected no kubernetes section for etcd DCS") + }) } func TestPGBackRestCreateReplicaCommand(t *testing.T) { diff --git a/internal/patroni/reconcile.go b/internal/patroni/reconcile.go index ece6cea75..12241dbcd 100644 --- a/internal/patroni/reconcile.go +++ b/internal/patroni/reconcile.go @@ -6,11 +6,13 @@ package patroni import ( "context" + "path" "strings" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/ptr" "github.com/percona/percona-postgresql-operator/v2/internal/initialize" "github.com/percona/percona-postgresql-operator/v2/internal/naming" @@ -135,6 +137,56 @@ func InstancePod(ctx context.Context, ReadOnly: true, }) + // Mount etcd TLS Secret and inject auth env vars when the etcd DCS backend is configured. + if p := inCluster.Spec.Patroni; p != nil { + if dcs := p.GetDCS(); dcs != nil && dcs.Type == v1beta1.PatroniDCSTypeEtcd && dcs.Etcd != nil { + etcd := dcs.Etcd + + if etcd.TLSSecret != "" { + etcdTLSVol := corev1.Volume{ + Name: "patroni-etcd-tls", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: etcd.TLSSecret, + DefaultMode: ptr.To(int32(0o400)), + }, + }, + } + outInstancePod.Spec.Volumes = append(outInstancePod.Spec.Volumes, etcdTLSVol) + container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{ + Name: "patroni-etcd-tls", + MountPath: path.Join(configDirectory, "etcd-tls"), + ReadOnly: true, + }) + } + + if etcd.AuthSecret != "" { + // Patroni reads PATRONI_ETCD3_USERNAME and PATRONI_ETCD3_PASSWORD + // as overrides for etcd3.username and etcd3.password in the config. + container.Env = append(container.Env, + corev1.EnvVar{ + Name: "PATRONI_ETCD3_USERNAME", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: etcd.AuthSecret}, + Key: "username", + }, + }, + }, + corev1.EnvVar{ + Name: "PATRONI_ETCD3_PASSWORD", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: etcd.AuthSecret}, + Key: "password", + }, + }, + }, + ) + } + } + } + instanceProbes(inCluster, container) // K8SPG-708 diff --git a/percona/controller/pgcluster/controller.go b/percona/controller/pgcluster/controller.go index aacbb4efa..d92e9d7dd 100644 --- a/percona/controller/pgcluster/controller.go +++ b/percona/controller/pgcluster/controller.go @@ -107,6 +107,7 @@ func (r *PGClusterReconciler) SetupWithManager(ctx context.Context, mgr manager. Owns(&v1beta1.PostgresCluster{}). WatchesRawSource(source.Kind(mgr.GetCache(), &corev1.Service{}, r.watchServices())). Watches(&corev1.Secret{}, r.watchEnvFromSecrets()). + Watches(&corev1.Secret{}, r.watchPatroniEtcdSecrets()). WatchesRawSource(source.Kind(mgr.GetCache(), &corev1.Secret{}, r.watchSecrets())). WatchesRawSource(source.Kind(mgr.GetCache(), &batchv1.Job{}, r.watchBackupJobs())). WatchesRawSource(source.Kind(mgr.GetCache(), &v2.PerconaPGBackup{}, r.watchPGBackups())). @@ -194,6 +195,33 @@ func (r *PGClusterReconciler) watchEnvFromSecrets() handler.TypedEventHandler[cl }) } +func (r *PGClusterReconciler) watchPatroniEtcdSecrets() handler.TypedEventHandler[client.Object, reconcile.Request] { + return handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []reconcile.Request { + log := logf.FromContext(ctx).WithName("watchPatroniEtcdSecrets") + + secret, ok := obj.(*corev1.Secret) + if !ok { + return nil + } + + var clusters v2.PerconaPGClusterList + if err := r.Client.List(ctx, &clusters, client.MatchingFields{ + v2.IndexFieldPatroniEtcdSecrets: secret.Name, + }, client.InNamespace(secret.Namespace)); err != nil { + log.Error(err, "Failed to list clusters by patroni etcd secrets index", "key", client.ObjectKeyFromObject(secret).String()) + return nil + } + + reqs := make([]reconcile.Request, 0, len(clusters.Items)) + for _, cr := range clusters.Items { + reqs = append(reqs, reconcile.Request{ + NamespacedName: client.ObjectKeyFromObject(&cr), + }) + } + return reqs + }) +} + func (r *PGClusterReconciler) watchSecrets() handler.TypedFuncs[*corev1.Secret, reconcile.Request] { return handler.TypedFuncs[*corev1.Secret, reconcile.Request]{ UpdateFunc: func(ctx context.Context, e event.TypedUpdateEvent[*corev1.Secret], q workqueue.TypedRateLimitingInterface[reconcile.Request]) { @@ -242,6 +270,10 @@ func (r *PGClusterReconciler) Reconcile(ctx context.Context, request reconcile.R return reconcile.Result{}, errors.Wrap(err, "validate PerconaPGCluster") } + if err := r.reconcilePatroniEtcd(ctx, cr); err != nil { + return reconcile.Result{}, errors.Wrap(err, "reconcile patroni etcd") + } + if cr.Spec.OpenShift == nil { cr.Spec.OpenShift = &r.IsOpenShift } diff --git a/percona/controller/pgcluster/patroni_etcd.go b/percona/controller/pgcluster/patroni_etcd.go new file mode 100644 index 000000000..e508bb96a --- /dev/null +++ b/percona/controller/pgcluster/patroni_etcd.go @@ -0,0 +1,69 @@ +// Copyright 2021 - 2024 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package pgcluster + +import ( + "context" + + "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" + v1beta1 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/upstream.pgv2.percona.com/v1beta1" +) + +// reconcilePatroniEtcd validates that secrets referenced in the etcd DCS +// configuration exist and contain the required keys. Emits Warning events and +// returns an error (triggering requeue) when any secret is missing or incomplete. +func (r *PGClusterReconciler) reconcilePatroniEtcd(ctx context.Context, cr *v2.PerconaPGCluster) error { + if cr.Spec.Patroni == nil { + return nil + } + dcs := cr.Spec.Patroni.GetDCS() + if dcs == nil || dcs.Type != v1beta1.PatroniDCSTypeEtcd || dcs.Etcd == nil { + return nil + } + etcd := dcs.Etcd + + if etcd.TLSSecret != "" { + if err := r.requireSecret(ctx, cr, etcd.TLSSecret, "EtcdTLSSecretNotFound", + []string{"ca.crt", "tls.crt", "tls.key"}); err != nil { + return err + } + } + if etcd.AuthSecret != "" { + if err := r.requireSecret(ctx, cr, etcd.AuthSecret, "EtcdAuthSecretNotFound", + []string{"username", "password"}); err != nil { + return err + } + } + return nil +} + +// requireSecret fetches the named Secret and validates that each key in +// requiredKeys is present in Secret.Data. Emits a Warning event and returns +// an error if the Secret is missing or any required key is absent. +func (r *PGClusterReconciler) requireSecret(ctx context.Context, cr *v2.PerconaPGCluster, name, reason string, requiredKeys []string) error { + secret := &corev1.Secret{} + err := r.Client.Get(ctx, types.NamespacedName{Name: name, Namespace: cr.Namespace}, secret) + if client.IgnoreNotFound(err) != nil { + return errors.Wrapf(err, "get secret %q", name) + } + if err != nil { + r.Recorder.Eventf(cr, corev1.EventTypeWarning, reason, + "Secret %q not found in namespace %q", name, cr.Namespace) + return errors.Errorf("secret %q not found", name) + } + for _, key := range requiredKeys { + if _, ok := secret.Data[key]; !ok { + r.Recorder.Eventf(cr, corev1.EventTypeWarning, reason, + "Secret %q is missing required key %q", name, key) + return errors.Errorf("secret %q missing required key %q", name, key) + } + } + return nil +} diff --git a/percona/controller/pgcluster/patroni_etcd_test.go b/percona/controller/pgcluster/patroni_etcd_test.go new file mode 100644 index 000000000..567e60f2b --- /dev/null +++ b/percona/controller/pgcluster/patroni_etcd_test.go @@ -0,0 +1,214 @@ +// Copyright 2021 - 2024 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package pgcluster + +import ( + "context" + "strings" + "testing" + + "gotest.tools/v3/assert" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/tools/record" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" + v1beta1 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/upstream.pgv2.percona.com/v1beta1" +) + +func newEtcdTestReconciler(t *testing.T, objs ...corev1.Secret) *PGClusterReconciler { + t.Helper() + + s := scheme.Scheme + if err := v1beta1.AddToScheme(s); err != nil { + t.Fatalf("AddToScheme: %v", err) + } + if err := v2.AddToScheme(s); err != nil { + t.Fatalf("AddToScheme: %v", err) + } + + builder := fake.NewClientBuilder().WithScheme(s) + for i := range objs { + builder = builder.WithObjects(&objs[i]) + } + + return &PGClusterReconciler{ + Client: builder.Build(), + Recorder: record.NewFakeRecorder(10), + } +} + +func drainEvents(r *PGClusterReconciler) []string { + ch := r.Recorder.(*record.FakeRecorder).Events + var events []string + for { + select { + case e := <-ch: + events = append(events, e) + default: + return events + } + } +} + +func etcdCR(namespace string, dcs *v1beta1.PatroniDCS) *v2.PerconaPGCluster { + return &v2.PerconaPGCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "test-cluster", Namespace: namespace}, + Spec: v2.PerconaPGClusterSpec{ + Patroni: &v1beta1.PatroniSpec{DCS: dcs}, + }, + } +} + +func TestReconcilePatroniEtcd(t *testing.T) { + t.Parallel() + ctx := context.Background() + + t.Run("nil patroni spec returns nil", func(t *testing.T) { + r := newEtcdTestReconciler(t) + cr := &v2.PerconaPGCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "cluster", Namespace: "ns"}, + } + assert.NilError(t, r.reconcilePatroniEtcd(ctx, cr)) + assert.Equal(t, len(drainEvents(r)), 0) + }) + + t.Run("kubernetes DCS returns nil", func(t *testing.T) { + r := newEtcdTestReconciler(t) + cr := etcdCR("ns", &v1beta1.PatroniDCS{Type: v1beta1.PatroniDCSTypeKubernetes}) + assert.NilError(t, r.reconcilePatroniEtcd(ctx, cr)) + assert.Equal(t, len(drainEvents(r)), 0) + }) + + t.Run("nil dcs returns nil", func(t *testing.T) { + r := newEtcdTestReconciler(t) + cr := etcdCR("ns", nil) + assert.NilError(t, r.reconcilePatroniEtcd(ctx, cr)) + assert.Equal(t, len(drainEvents(r)), 0) + }) + + t.Run("etcd DCS no secrets returns nil", func(t *testing.T) { + r := newEtcdTestReconciler(t) + cr := etcdCR("ns", &v1beta1.PatroniDCS{ + Type: v1beta1.PatroniDCSTypeEtcd, + Etcd: &v1beta1.PatroniEtcdSpec{ + Endpoints: []string{"https://etcd:2379"}, + }, + }) + assert.NilError(t, r.reconcilePatroniEtcd(ctx, cr)) + assert.Equal(t, len(drainEvents(r)), 0) + }) + + t.Run("tls secret missing emits warning and error", func(t *testing.T) { + r := newEtcdTestReconciler(t) + cr := etcdCR("ns", &v1beta1.PatroniDCS{ + Type: v1beta1.PatroniDCSTypeEtcd, + Etcd: &v1beta1.PatroniEtcdSpec{ + Endpoints: []string{"https://etcd:2379"}, + TLSSecret: "missing-tls-secret", + }, + }) + err := r.reconcilePatroniEtcd(ctx, cr) + assert.ErrorContains(t, err, "missing-tls-secret") + events := drainEvents(r) + assert.Equal(t, len(events), 1) + assert.Assert(t, strings.Contains(events[0], "EtcdTLSSecretNotFound")) + }) + + t.Run("tls secret missing ca.crt key emits warning and error", func(t *testing.T) { + secret := corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "tls-secret", Namespace: "ns"}, + Data: map[string][]byte{ + "tls.crt": []byte("cert"), + "tls.key": []byte("key"), + // ca.crt is intentionally absent + }, + } + r := newEtcdTestReconciler(t, secret) + cr := etcdCR("ns", &v1beta1.PatroniDCS{ + Type: v1beta1.PatroniDCSTypeEtcd, + Etcd: &v1beta1.PatroniEtcdSpec{ + Endpoints: []string{"https://etcd:2379"}, + TLSSecret: "tls-secret", + }, + }) + err := r.reconcilePatroniEtcd(ctx, cr) + assert.ErrorContains(t, err, "ca.crt") + events := drainEvents(r) + assert.Equal(t, len(events), 1) + assert.Assert(t, strings.Contains(events[0], "EtcdTLSSecretNotFound")) + }) + + t.Run("auth secret missing emits warning and error", func(t *testing.T) { + r := newEtcdTestReconciler(t) + cr := etcdCR("ns", &v1beta1.PatroniDCS{ + Type: v1beta1.PatroniDCSTypeEtcd, + Etcd: &v1beta1.PatroniEtcdSpec{ + Endpoints: []string{"https://etcd:2379"}, + AuthSecret: "missing-auth-secret", + }, + }) + err := r.reconcilePatroniEtcd(ctx, cr) + assert.ErrorContains(t, err, "missing-auth-secret") + events := drainEvents(r) + assert.Equal(t, len(events), 1) + assert.Assert(t, strings.Contains(events[0], "EtcdAuthSecretNotFound")) + }) + + t.Run("auth secret missing password key emits warning and error", func(t *testing.T) { + secret := corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "auth-secret", Namespace: "ns"}, + Data: map[string][]byte{ + "username": []byte("etcduser"), + // password is intentionally absent + }, + } + r := newEtcdTestReconciler(t, secret) + cr := etcdCR("ns", &v1beta1.PatroniDCS{ + Type: v1beta1.PatroniDCSTypeEtcd, + Etcd: &v1beta1.PatroniEtcdSpec{ + Endpoints: []string{"https://etcd:2379"}, + AuthSecret: "auth-secret", + }, + }) + err := r.reconcilePatroniEtcd(ctx, cr) + assert.ErrorContains(t, err, "password") + events := drainEvents(r) + assert.Equal(t, len(events), 1) + assert.Assert(t, strings.Contains(events[0], "EtcdAuthSecretNotFound")) + }) + + t.Run("both secrets valid returns nil", func(t *testing.T) { + tlsSecret := corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "tls-secret", Namespace: "ns"}, + Data: map[string][]byte{ + "ca.crt": []byte("ca"), + "tls.crt": []byte("cert"), + "tls.key": []byte("key"), + }, + } + authSecret := corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "auth-secret", Namespace: "ns"}, + Data: map[string][]byte{ + "username": []byte("etcduser"), + "password": []byte("etcdpass"), + }, + } + r := newEtcdTestReconciler(t, tlsSecret, authSecret) + cr := etcdCR("ns", &v1beta1.PatroniDCS{ + Type: v1beta1.PatroniDCSTypeEtcd, + Etcd: &v1beta1.PatroniEtcdSpec{ + Endpoints: []string{"https://etcd:2379"}, + TLSSecret: "tls-secret", + AuthSecret: "auth-secret", + }, + }) + assert.NilError(t, r.reconcilePatroniEtcd(ctx, cr)) + assert.Equal(t, len(drainEvents(r)), 0) + }) +} + diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go index 7231fc176..f5a703615 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go @@ -326,12 +326,29 @@ func (cr *PerconaPGCluster) Validate() error { ptr.Deref(cr.Spec.Extensions.BuiltIn.PGStatStatements, false) { return errors.New("pg_stat_monitor and pg_stat_statements cannot both be enabled") } + if err := cr.ValidatePatroniDCS(); err != nil { + return err + } if err := cr.ValidateDynamicConfiguration(); err != nil { return err } return nil } +func (cr *PerconaPGCluster) ValidatePatroniDCS() error { + if cr.Spec.Patroni == nil { + return nil + } + dcs := cr.Spec.Patroni.GetDCS() + if dcs == nil || dcs.Type != crunchyv1beta1.PatroniDCSTypeEtcd { + return nil + } + if dcs.Etcd == nil || len(dcs.Etcd.Endpoints) == 0 { + return errors.New("spec.patroni.dcs.etcd.endpoints must be non-empty when type is etcd") + } + return nil +} + func (cr *PerconaPGCluster) ValidateDynamicConfiguration() error { if cr.Spec.Patroni == nil || cr.Spec.Patroni.DynamicConfiguration == nil { return nil @@ -1356,3 +1373,28 @@ var EnvFromSecretsIndexerFunc client.IndexerFunc = func(obj client.Object) []str } return cr.EnvFromSecrets() } + +// IndexFieldPatroniEtcdSecrets is the field index name for secrets referenced +// by spec.patroni.dcs.etcd (tlsSecret and authSecret). +const IndexFieldPatroniEtcdSecrets = "pgCluster.patroniEtcdSecrets" //nolint:gosec + +// PatroniEtcdSecretsIndexerFunc returns the names of secrets referenced by +// the etcd DCS configuration so that Secret changes can trigger reconciliation. +var PatroniEtcdSecretsIndexerFunc client.IndexerFunc = func(obj client.Object) []string { + cr, ok := obj.(*PerconaPGCluster) + if !ok || cr.Spec.Patroni == nil { + return nil + } + dcs := cr.Spec.Patroni.GetDCS() + if dcs == nil || dcs.Etcd == nil { + return nil + } + var names []string + if dcs.Etcd.TLSSecret != "" { + names = append(names, dcs.Etcd.TLSSecret) + } + if dcs.Etcd.AuthSecret != "" { + names = append(names, dcs.Etcd.AuthSecret) + } + return names +} diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types_test.go b/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types_test.go index 693640fb2..901b69814 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types_test.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types_test.go @@ -747,3 +747,85 @@ func TestShouldCheckStandbyLag(t *testing.T) { }) } } + +func TestValidatePatroniDCS(t *testing.T) { + // CEL-level validations (mixed schemes, items:Pattern, immutability) are + // enforced at admission and require an envtest API server; they are not + // unit-testable here. + + t.Run("nil patroni spec returns nil", func(t *testing.T) { + cr := &PerconaPGCluster{} + require.NoError(t, cr.ValidatePatroniDCS()) + }) + + t.Run("nil DCS returns nil", func(t *testing.T) { + cr := &PerconaPGCluster{ + Spec: PerconaPGClusterSpec{ + Patroni: &crunchyv1beta1.PatroniSpec{}, + }, + } + require.NoError(t, cr.ValidatePatroniDCS()) + }) + + t.Run("kubernetes DCS returns nil", func(t *testing.T) { + cr := &PerconaPGCluster{ + Spec: PerconaPGClusterSpec{ + Patroni: &crunchyv1beta1.PatroniSpec{ + DCS: &crunchyv1beta1.PatroniDCS{ + Type: crunchyv1beta1.PatroniDCSTypeKubernetes, + }, + }, + }, + } + require.NoError(t, cr.ValidatePatroniDCS()) + }) + + t.Run("etcd DCS with nil etcd field returns error", func(t *testing.T) { + cr := &PerconaPGCluster{ + Spec: PerconaPGClusterSpec{ + Patroni: &crunchyv1beta1.PatroniSpec{ + DCS: &crunchyv1beta1.PatroniDCS{ + Type: crunchyv1beta1.PatroniDCSTypeEtcd, + }, + }, + }, + } + err := cr.ValidatePatroniDCS() + require.Error(t, err) + assert.Contains(t, err.Error(), "endpoints must be non-empty") + }) + + t.Run("etcd DCS with empty endpoints returns error", func(t *testing.T) { + cr := &PerconaPGCluster{ + Spec: PerconaPGClusterSpec{ + Patroni: &crunchyv1beta1.PatroniSpec{ + DCS: &crunchyv1beta1.PatroniDCS{ + Type: crunchyv1beta1.PatroniDCSTypeEtcd, + Etcd: &crunchyv1beta1.PatroniEtcdSpec{ + Endpoints: []string{}, + }, + }, + }, + }, + } + err := cr.ValidatePatroniDCS() + require.Error(t, err) + assert.Contains(t, err.Error(), "endpoints must be non-empty") + }) + + t.Run("etcd DCS with one endpoint returns nil", func(t *testing.T) { + cr := &PerconaPGCluster{ + Spec: PerconaPGClusterSpec{ + Patroni: &crunchyv1beta1.PatroniSpec{ + DCS: &crunchyv1beta1.PatroniDCS{ + Type: crunchyv1beta1.PatroniDCSTypeEtcd, + Etcd: &crunchyv1beta1.PatroniEtcdSpec{ + Endpoints: []string{"https://etcd.etcd-cluster.svc:2379"}, + }, + }, + }, + }, + } + require.NoError(t, cr.ValidatePatroniDCS()) + }) +} diff --git a/pkg/apis/upstream.pgv2.percona.com/v1beta1/patroni_types.go b/pkg/apis/upstream.pgv2.percona.com/v1beta1/patroni_types.go index 873d6aa7b..8c4da668a 100644 --- a/pkg/apis/upstream.pgv2.percona.com/v1beta1/patroni_types.go +++ b/pkg/apis/upstream.pgv2.percona.com/v1beta1/patroni_types.go @@ -46,16 +46,73 @@ type PatroniSpec struct { // +optional CreateReplicaMethods []CreateReplicaMethod `json:"createReplicaMethods,omitempty"` - // TODO(cbandy): Add UseConfigMaps bool, default false. - // TODO(cbandy): Allow other DCS: etcd, raft, etc? - // N.B. changing this will cause downtime. - // - https://patroni.readthedocs.io/en/latest/kubernetes.html + // DCS configures the distributed configuration store backend. + // Defaults to the Kubernetes-native backend (Endpoints). + // N.B. Changing the DCS type causes downtime; all instances must restart simultaneously. + // +optional + DCS *PatroniDCS `json:"dcs,omitempty"` // RemoveDataDirectoryOnDivergedTimelines allows controlling remove_data_directory_on_diverged_timelines in Patroni cluster config. // +optional RemoveDataDirectoryOnDivergedTimelines bool `json:"removeDataDirectoryOnDivergedTimelines,omitempty"` } +// GetDCS returns the DCS configuration, or nil if unset. +func (s *PatroniSpec) GetDCS() *PatroniDCS { + if s == nil { + return nil + } + return s.DCS +} + +// PatroniDCSType identifies which DCS backend Patroni should use. +// +kubebuilder:validation:Enum={kubernetes,etcd} +type PatroniDCSType string + +const ( + PatroniDCSTypeKubernetes PatroniDCSType = "kubernetes" + PatroniDCSTypeEtcd PatroniDCSType = "etcd" +) + +// PatroniDCS configures the Patroni distributed configuration store (DCS). +// +kubebuilder:validation:XValidation:rule="self.type != 'etcd' || (has(self.etcd) && size(self.etcd.endpoints) > 0)",message="etcd.endpoints must be non-empty when type is etcd" +// +kubebuilder:validation:XValidation:rule="!has(oldSelf.type) || oldSelf.type == self.type",message="DCS type is immutable after cluster creation" +type PatroniDCS struct { + // Type of DCS backend. Defaults to "kubernetes". + // Changing this value causes cluster downtime; all instances must restart. + // This field is immutable after cluster creation. + // +optional + // +kubebuilder:default=kubernetes + Type PatroniDCSType `json:"type,omitempty"` + + // Etcd holds settings for the external etcd DCS backend. + // Required when type is "etcd". + // +optional + Etcd *PatroniEtcdSpec `json:"etcd,omitempty"` +} + +// PatroniEtcdSpec defines connectivity to an external etcd cluster used as DCS. +// +kubebuilder:validation:XValidation:rule="self.endpoints.all(e, e.startsWith('https://')) || self.endpoints.all(e, e.startsWith('http://'))",message="all endpoints must use the same scheme (http or https)" +type PatroniEtcdSpec struct { + // Endpoints is the list of etcd endpoints including scheme and port. + // Example: ["https://etcd.etcd-cluster.svc:2379"] + // The scheme of the first endpoint determines the protocol used. + // All endpoints must use the same scheme. + // +kubebuilder:validation:MinItems=1 + // +kubebuilder:validation:items:Pattern=`^https?://[^/]` + Endpoints []string `json:"endpoints"` + + // TLSSecret is the name of a Secret in the same namespace with keys + // ca.crt, tls.crt, and tls.key for mutual TLS with etcd. + // +optional + TLSSecret string `json:"tlsSecret,omitempty"` + + // AuthSecret is the name of a Secret in the same namespace with keys + // username and password for etcd authentication. + // +optional + AuthSecret string `json:"authSecret,omitempty"` +} + // +kubebuilder:validation:Enum={basebackup,pgbackrest} type CreateReplicaMethod string diff --git a/pkg/apis/upstream.pgv2.percona.com/v1beta1/zz_generated.deepcopy.go b/pkg/apis/upstream.pgv2.percona.com/v1beta1/zz_generated.deepcopy.go index b3f88c929..5e9d627a2 100644 --- a/pkg/apis/upstream.pgv2.percona.com/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/upstream.pgv2.percona.com/v1beta1/zz_generated.deepcopy.go @@ -1634,6 +1634,47 @@ func (in *PGUpgradeStatus) DeepCopy() *PGUpgradeStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PatroniDCS) DeepCopyInto(out *PatroniDCS) { + *out = *in + if in.Etcd != nil { + in, out := &in.Etcd, &out.Etcd + *out = new(PatroniEtcdSpec) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PatroniDCS. +func (in *PatroniDCS) DeepCopy() *PatroniDCS { + if in == nil { + return nil + } + out := new(PatroniDCS) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PatroniEtcdSpec) DeepCopyInto(out *PatroniEtcdSpec) { + *out = *in + if in.Endpoints != nil { + in, out := &in.Endpoints, &out.Endpoints + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PatroniEtcdSpec. +func (in *PatroniEtcdSpec) DeepCopy() *PatroniEtcdSpec { + if in == nil { + return nil + } + out := new(PatroniEtcdSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PatroniSpec) DeepCopyInto(out *PatroniSpec) { *out = *in @@ -1663,6 +1704,11 @@ func (in *PatroniSpec) DeepCopyInto(out *PatroniSpec) { *out = make([]CreateReplicaMethod, len(*in)) copy(*out, *in) } + if in.DCS != nil { + in, out := &in.DCS, &out.DCS + *out = new(PatroniDCS) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PatroniSpec. From a07a8831125d4d49384141fe1a735ac0929071f1 Mon Sep 17 00:00:00 2001 From: yoav-katz Date: Fri, 19 Jun 2026 00:43:21 +0300 Subject: [PATCH 02/18] fix --- docs/etcd-dcs.md | 132 ----------------------------------------------- 1 file changed, 132 deletions(-) delete mode 100644 docs/etcd-dcs.md diff --git a/docs/etcd-dcs.md b/docs/etcd-dcs.md deleted file mode 100644 index 6f2189ed6..000000000 --- a/docs/etcd-dcs.md +++ /dev/null @@ -1,132 +0,0 @@ -# External etcd DCS for Patroni - -By default, the operator uses Kubernetes Endpoints as the Patroni distributed configuration store (DCS). Clusters on managed Kubernetes platforms where the control plane API is not accessible from within the cluster, or clusters requiring an HA DCS independent of the Kubernetes API, can use an external etcd cluster instead. - -## Configuration - -Set `spec.patroni.dcs.type: etcd` and provide at least one endpoint: - -```yaml -spec: - patroni: - dcs: - type: etcd - etcd: - endpoints: - - https://etcd.etcd-cluster.svc:2379 -``` - -### Field reference - -| Field | Type | Required | Description | -|---|---|---|---| -| `dcs.type` | `kubernetes` \| `etcd` | No (default: `kubernetes`) | DCS backend. **Immutable after cluster creation.** | -| `dcs.etcd.endpoints` | `[]string` | Yes (when type is etcd) | etcd endpoint URLs including scheme and port. All endpoints must use the same scheme (`http://` or `https://`). | -| `dcs.etcd.tlsSecret` | `string` | No | Name of a Secret in the same namespace containing TLS client credentials. | -| `dcs.etcd.authSecret` | `string` | No | Name of a Secret in the same namespace containing etcd username/password. | - -## Secrets - -### TLS secret (`tlsSecret`) - -Required when the etcd cluster uses TLS. The Secret must contain exactly these keys: - -``` -ca.crt — PEM-encoded CA certificate that signed the etcd server certificate -tls.crt — PEM-encoded client certificate (for mutual TLS) -tls.key — PEM-encoded private key for the client certificate -``` - -Example: - -```bash -kubectl create secret generic etcd-tls \ - --from-file=ca.crt=ca.pem \ - --from-file=tls.crt=client.pem \ - --from-file=tls.key=client-key.pem -``` - -Reference it in the CR: - -```yaml -spec: - patroni: - dcs: - type: etcd - etcd: - endpoints: - - https://etcd.etcd-cluster.svc:2379 - tlsSecret: etcd-tls -``` - -The secret is mounted read-only at `/etc/patroni/etcd-tls/` inside each PostgreSQL pod. - -### Auth secret (`authSecret`) - -Required when the etcd cluster uses username/password authentication. The Secret must contain exactly these keys: - -``` -username — etcd username -password — etcd password -``` - -Example: - -```bash -kubectl create secret generic etcd-auth \ - --from-literal=username=patroni \ - --from-literal=password=supersecret -``` - -Reference it in the CR: - -```yaml -spec: - patroni: - dcs: - type: etcd - etcd: - endpoints: - - https://etcd.etcd-cluster.svc:2379 - authSecret: etcd-auth -``` - -Credentials are injected as `PATRONI_ETCD3_USERNAME` and `PATRONI_ETCD3_PASSWORD` environment variables in each PostgreSQL pod. They are **not** written to any ConfigMap. - -## Secret rotation - -**Rotating TLS certificates or auth credentials requires a rolling restart of all PostgreSQL pods.** Patroni loads DCS credentials and TLS mounts only at startup. It does not watch or dynamically reload environment variables or volume mounts after the process starts. - -To rotate credentials: - -1. Update the Secret with new values: - ```bash - kubectl create secret generic etcd-auth \ - --from-literal=username=patroni \ - --from-literal=password=newsecret \ - --dry-run=client -o yaml | kubectl apply -f - - ``` - -2. Trigger a rolling restart of each instance StatefulSet: - ```bash - kubectl rollout restart statefulset/-instance1 - ``` - - Repeat for each instance set in the cluster. - -## Immutability - -`spec.patroni.dcs.type` is immutable after cluster creation. The admission webhook rejects updates that attempt to change this field. Switching DCS backends requires deleting and recreating the cluster. - -## Validation - -The operator validates the etcd configuration at both admission time and during each reconcile: - -- **Admission**: CEL rules reject empty endpoints when `type: etcd`, mixed `http://`/`https://` schemes in the same endpoint list, endpoints that do not match `^https?://`, and changes to `dcs.type` after creation. -- **Reconcile**: If `tlsSecret` or `authSecret` is set but the referenced Secret does not exist or is missing a required key, the operator emits a `Warning` event on the `PerconaPGCluster` object and requeues the reconcile. No PostgreSQL pods are affected until the secret is corrected. - -Inspect events with: - -```bash -kubectl describe perconapgcluster -``` From e5ecd24c3e1cc589878c275a1fa6e348d1982e4a Mon Sep 17 00:00:00 2001 From: yoav-katz Date: Fri, 19 Jun 2026 01:20:06 +0300 Subject: [PATCH 03/18] Self Code Review --- internal/patroni/config_test.go | 16 +--- internal/patroni/rbac.go | 47 ++++++----- percona/controller/pgcluster/controller.go | 82 ++++++++----------- percona/controller/pgcluster/patroni_etcd.go | 27 +++--- .../controller/pgcluster/patroni_etcd_test.go | 21 ++++- .../v2/perconapgcluster_types.go | 2 +- .../v1beta1/patroni_types.go | 1 + .../v1beta1/zz_generated.deepcopy.go | 1 - 8 files changed, 105 insertions(+), 92 deletions(-) diff --git a/internal/patroni/config_test.go b/internal/patroni/config_test.go index e6cfb532a..826484231 100644 --- a/internal/patroni/config_test.go +++ b/internal/patroni/config_test.go @@ -9,6 +9,7 @@ import ( "os" "os/exec" "path/filepath" + "slices" "strings" "testing" @@ -1372,21 +1373,12 @@ func TestInstanceEnvironment(t *testing.T) { for _, ev := range etcdVars { names = append(names, ev.Name) } - assert.Assert(t, contains(names, "PATRONI_NAME"), "expected PATRONI_NAME") - assert.Assert(t, contains(names, "PATRONI_POSTGRESQL_CONNECT_ADDRESS"), "expected PATRONI_POSTGRESQL_CONNECT_ADDRESS") - assert.Assert(t, contains(names, "PATRONICTL_CONFIG_FILE"), "expected PATRONICTL_CONFIG_FILE") + assert.Assert(t, slices.Contains(names, "PATRONI_NAME"), "expected PATRONI_NAME") + assert.Assert(t, slices.Contains(names, "PATRONI_POSTGRESQL_CONNECT_ADDRESS"), "expected PATRONI_POSTGRESQL_CONNECT_ADDRESS") + assert.Assert(t, slices.Contains(names, "PATRONICTL_CONFIG_FILE"), "expected PATRONICTL_CONFIG_FILE") }) } -func contains(slice []string, s string) bool { - for _, v := range slice { - if v == s { - return true - } - } - return false -} - func TestInstanceYAML(t *testing.T) { t.Parallel() diff --git a/internal/patroni/rbac.go b/internal/patroni/rbac.go index 2e19339ed..de6793906 100644 --- a/internal/patroni/rbac.go +++ b/internal/patroni/rbac.go @@ -34,22 +34,29 @@ import ( // Permissions returns the RBAC rules Patroni needs for cluster. func Permissions(cluster *v1beta1.PostgresCluster) []rbacv1.PolicyRule { - // TODO(cbandy): This must change when using ConfigMaps for DCS. - rules := make([]rbacv1.PolicyRule, 0, 4) - rules = append(rules, rbacv1.PolicyRule{ - APIGroups: []string{corev1.SchemeGroupVersion.Group}, - Resources: []string{"endpoints"}, - Verbs: []string{"create", "deletecollection", "get", "list", "patch", "watch"}, - }) + usingKubernetesDCS := cluster.Spec.Patroni == nil || + cluster.Spec.Patroni.GetDCS() == nil || + cluster.Spec.Patroni.GetDCS().Type != v1beta1.PatroniDCSTypeEtcd - if cluster.Spec.OpenShift != nil && *cluster.Spec.OpenShift { + if usingKubernetesDCS { + // When using Endpoints for DCS, "create", "list", "patch", and "watch" + // are required. The `patronictl scaffold` and `patronictl remove` + // commands require "deletecollection". rules = append(rules, rbacv1.PolicyRule{ APIGroups: []string{corev1.SchemeGroupVersion.Group}, - Resources: []string{"endpoints/restricted"}, - Verbs: []string{"create"}, + Resources: []string{"endpoints"}, + Verbs: []string{"create", "deletecollection", "get", "list", "patch", "watch"}, }) + + if cluster.Spec.OpenShift != nil && *cluster.Spec.OpenShift { + rules = append(rules, rbacv1.PolicyRule{ + APIGroups: []string{corev1.SchemeGroupVersion.Group}, + Resources: []string{"endpoints/restricted"}, + Verbs: []string{"create"}, + }) + } } rules = append(rules, rbacv1.PolicyRule{ @@ -58,15 +65,17 @@ func Permissions(cluster *v1beta1.PostgresCluster) []rbacv1.PolicyRule { Verbs: []string{"get", "list", "patch", "watch"}, }) - // When using Endpoints for DCS, Patroni tries to create the "{scope}-config" service. - // NOTE(cbandy): The PostgresCluster controller already creates this Service; - // it might be possible to eliminate this permission if it also created the - // Endpoints. - rules = append(rules, rbacv1.PolicyRule{ - APIGroups: []string{corev1.SchemeGroupVersion.Group}, - Resources: []string{"services"}, - Verbs: []string{"create"}, - }) + if usingKubernetesDCS { + // Patroni tries to create the "{scope}-config" service. + // NOTE(cbandy): The PostgresCluster controller already creates this + // Service; it might be possible to eliminate this permission if it + // also created the Endpoints. + rules = append(rules, rbacv1.PolicyRule{ + APIGroups: []string{corev1.SchemeGroupVersion.Group}, + Resources: []string{"services"}, + Verbs: []string{"create"}, + }) + } return rules } diff --git a/percona/controller/pgcluster/controller.go b/percona/controller/pgcluster/controller.go index d92e9d7dd..1a77dbd0c 100644 --- a/percona/controller/pgcluster/controller.go +++ b/percona/controller/pgcluster/controller.go @@ -106,8 +106,10 @@ func (r *PGClusterReconciler) SetupWithManager(ctx context.Context, mgr manager. For(&v2.PerconaPGCluster{}). Owns(&v1beta1.PostgresCluster{}). WatchesRawSource(source.Kind(mgr.GetCache(), &corev1.Service{}, r.watchServices())). - Watches(&corev1.Secret{}, r.watchEnvFromSecrets()). - Watches(&corev1.Secret{}, r.watchPatroniEtcdSecrets()). + Watches(&corev1.Secret{}, r.watchNamedSecrets( + namedSecretIndex{v2.IndexFieldEnvFromSecrets, "watchEnvFromSecrets"}, + namedSecretIndex{v2.IndexFieldPatroniEtcdSecrets, "watchPatroniEtcdSecrets"}, + )). WatchesRawSource(source.Kind(mgr.GetCache(), &corev1.Secret{}, r.watchSecrets())). WatchesRawSource(source.Kind(mgr.GetCache(), &batchv1.Job{}, r.watchBackupJobs())). WatchesRawSource(source.Kind(mgr.GetCache(), &v2.PerconaPGBackup{}, r.watchPGBackups())). @@ -168,55 +170,41 @@ func (r *PGClusterReconciler) watchPGBackups() handler.TypedFuncs[*v2.PerconaPGB } } -func (r *PGClusterReconciler) watchEnvFromSecrets() handler.TypedEventHandler[client.Object, reconcile.Request] { - return handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []reconcile.Request { - log := logf.FromContext(ctx).WithName("watchEnvFromSecrets") - - secret, ok := obj.(*corev1.Secret) - if !ok { - return nil - } - - var clusters v2.PerconaPGClusterList - if err := r.Client.List(ctx, &clusters, client.MatchingFields{ - v2.IndexFieldEnvFromSecrets: secret.Name, - }, client.InNamespace(secret.Namespace)); err != nil { - log.Error(err, "Failed to list clusters by env from secrets index failed", "key", client.ObjectKeyFromObject(secret).String()) - return nil - } - - reqs := make([]reconcile.Request, 0, len(clusters.Items)) - for _, cr := range clusters.Items { - reqs = append(reqs, reconcile.Request{ - NamespacedName: client.ObjectKeyFromObject(&cr), - }) - } - return reqs - }) +// namedSecretIndex pairs a field index name with a log label for watchNamedSecrets. +type namedSecretIndex struct { + field string + logName string } -func (r *PGClusterReconciler) watchPatroniEtcdSecrets() handler.TypedEventHandler[client.Object, reconcile.Request] { +// watchNamedSecrets returns a single handler that, for each Secret event, +// queries all provided field indexes and enqueues deduplicated reconcile +// requests. Using one Watches registration avoids redundant per-event +// iterations when multiple indexes cover the same Secret GVK. +func (r *PGClusterReconciler) watchNamedSecrets(indexes ...namedSecretIndex) handler.TypedEventHandler[client.Object, reconcile.Request] { return handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []reconcile.Request { - log := logf.FromContext(ctx).WithName("watchPatroniEtcdSecrets") - secret, ok := obj.(*corev1.Secret) if !ok { return nil } - var clusters v2.PerconaPGClusterList - if err := r.Client.List(ctx, &clusters, client.MatchingFields{ - v2.IndexFieldPatroniEtcdSecrets: secret.Name, - }, client.InNamespace(secret.Namespace)); err != nil { - log.Error(err, "Failed to list clusters by patroni etcd secrets index", "key", client.ObjectKeyFromObject(secret).String()) - return nil - } - - reqs := make([]reconcile.Request, 0, len(clusters.Items)) - for _, cr := range clusters.Items { - reqs = append(reqs, reconcile.Request{ - NamespacedName: client.ObjectKeyFromObject(&cr), - }) + seen := make(map[types.NamespacedName]struct{}) + var reqs []reconcile.Request + for _, idx := range indexes { + log := logf.FromContext(ctx).WithName(idx.logName) + var clusters v2.PerconaPGClusterList + if err := r.Client.List(ctx, &clusters, + client.MatchingFields{idx.field: secret.Name}, + client.InNamespace(secret.Namespace)); err != nil { + log.Error(err, "list clusters by secret index", "key", client.ObjectKeyFromObject(secret).String()) + continue + } + for _, cl := range clusters.Items { + key := client.ObjectKeyFromObject(&cl) + if _, dup := seen[key]; !dup { + seen[key] = struct{}{} + reqs = append(reqs, reconcile.Request{NamespacedName: key}) + } + } } return reqs }) @@ -270,10 +258,6 @@ func (r *PGClusterReconciler) Reconcile(ctx context.Context, request reconcile.R return reconcile.Result{}, errors.Wrap(err, "validate PerconaPGCluster") } - if err := r.reconcilePatroniEtcd(ctx, cr); err != nil { - return reconcile.Result{}, errors.Wrap(err, "reconcile patroni etcd") - } - if cr.Spec.OpenShift == nil { cr.Spec.OpenShift = &r.IsOpenShift } @@ -309,6 +293,10 @@ func (r *PGClusterReconciler) Reconcile(ctx context.Context, request reconcile.R return reconcile.Result{}, errors.Wrap(err, "ensure finalizers") } + if err := r.reconcilePatroniEtcd(ctx, cr); err != nil { + return reconcile.Result{}, errors.Wrap(err, "reconcile patroni etcd") + } + if err := r.reconcilePatroniVersionCheckPod(ctx, cr); err != nil { if errors.Is(err, errPatroniVersionCheckWait) { return reconcile.Result{ diff --git a/percona/controller/pgcluster/patroni_etcd.go b/percona/controller/pgcluster/patroni_etcd.go index e508bb96a..43c14dfe8 100644 --- a/percona/controller/pgcluster/patroni_etcd.go +++ b/percona/controller/pgcluster/patroni_etcd.go @@ -7,6 +7,7 @@ package pgcluster import ( "context" + "github.com/hashicorp/go-multierror" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" @@ -17,8 +18,9 @@ import ( ) // reconcilePatroniEtcd validates that secrets referenced in the etcd DCS -// configuration exist and contain the required keys. Emits Warning events and -// returns an error (triggering requeue) when any secret is missing or incomplete. +// configuration exist and contain the required keys. Emits Warning events for +// every problem found and returns a combined error so the operator surfaces all +// issues in a single reconcile cycle. func (r *PGClusterReconciler) reconcilePatroniEtcd(ctx context.Context, cr *v2.PerconaPGCluster) error { if cr.Spec.Patroni == nil { return nil @@ -29,38 +31,43 @@ func (r *PGClusterReconciler) reconcilePatroniEtcd(ctx context.Context, cr *v2.P } etcd := dcs.Etcd + var errs []error if etcd.TLSSecret != "" { - if err := r.requireSecret(ctx, cr, etcd.TLSSecret, "EtcdTLSSecretNotFound", + if err := r.requireSecret(ctx, cr, etcd.TLSSecret, + "EtcdTLSSecretNotFound", "EtcdTLSSecretInvalid", []string{"ca.crt", "tls.crt", "tls.key"}); err != nil { - return err + errs = append(errs, err) } } if etcd.AuthSecret != "" { - if err := r.requireSecret(ctx, cr, etcd.AuthSecret, "EtcdAuthSecretNotFound", + if err := r.requireSecret(ctx, cr, etcd.AuthSecret, + "EtcdAuthSecretNotFound", "EtcdAuthSecretInvalid", []string{"username", "password"}); err != nil { - return err + errs = append(errs, err) } } - return nil + return multierror.Append(nil, errs...).ErrorOrNil() } // requireSecret fetches the named Secret and validates that each key in // requiredKeys is present in Secret.Data. Emits a Warning event and returns // an error if the Secret is missing or any required key is absent. -func (r *PGClusterReconciler) requireSecret(ctx context.Context, cr *v2.PerconaPGCluster, name, reason string, requiredKeys []string) error { +// notFoundReason is used when the Secret does not exist; invalidReason is used +// when the Secret exists but is missing a required key. +func (r *PGClusterReconciler) requireSecret(ctx context.Context, cr *v2.PerconaPGCluster, name, notFoundReason, invalidReason string, requiredKeys []string) error { secret := &corev1.Secret{} err := r.Client.Get(ctx, types.NamespacedName{Name: name, Namespace: cr.Namespace}, secret) if client.IgnoreNotFound(err) != nil { return errors.Wrapf(err, "get secret %q", name) } if err != nil { - r.Recorder.Eventf(cr, corev1.EventTypeWarning, reason, + r.Recorder.Eventf(cr, corev1.EventTypeWarning, notFoundReason, "Secret %q not found in namespace %q", name, cr.Namespace) return errors.Errorf("secret %q not found", name) } for _, key := range requiredKeys { if _, ok := secret.Data[key]; !ok { - r.Recorder.Eventf(cr, corev1.EventTypeWarning, reason, + r.Recorder.Eventf(cr, corev1.EventTypeWarning, invalidReason, "Secret %q is missing required key %q", name, key) return errors.Errorf("secret %q missing required key %q", name, key) } diff --git a/percona/controller/pgcluster/patroni_etcd_test.go b/percona/controller/pgcluster/patroni_etcd_test.go index 567e60f2b..7af4bed72 100644 --- a/percona/controller/pgcluster/patroni_etcd_test.go +++ b/percona/controller/pgcluster/patroni_etcd_test.go @@ -140,7 +140,7 @@ func TestReconcilePatroniEtcd(t *testing.T) { assert.ErrorContains(t, err, "ca.crt") events := drainEvents(r) assert.Equal(t, len(events), 1) - assert.Assert(t, strings.Contains(events[0], "EtcdTLSSecretNotFound")) + assert.Assert(t, strings.Contains(events[0], "EtcdTLSSecretInvalid")) }) t.Run("auth secret missing emits warning and error", func(t *testing.T) { @@ -179,7 +179,7 @@ func TestReconcilePatroniEtcd(t *testing.T) { assert.ErrorContains(t, err, "password") events := drainEvents(r) assert.Equal(t, len(events), 1) - assert.Assert(t, strings.Contains(events[0], "EtcdAuthSecretNotFound")) + assert.Assert(t, strings.Contains(events[0], "EtcdAuthSecretInvalid")) }) t.Run("both secrets valid returns nil", func(t *testing.T) { @@ -210,5 +210,22 @@ func TestReconcilePatroniEtcd(t *testing.T) { assert.NilError(t, r.reconcilePatroniEtcd(ctx, cr)) assert.Equal(t, len(drainEvents(r)), 0) }) + + t.Run("both secrets missing reports both in one cycle", func(t *testing.T) { + r := newEtcdTestReconciler(t) + cr := etcdCR("ns", &v1beta1.PatroniDCS{ + Type: v1beta1.PatroniDCSTypeEtcd, + Etcd: &v1beta1.PatroniEtcdSpec{ + Endpoints: []string{"https://etcd:2379"}, + TLSSecret: "missing-tls", + AuthSecret: "missing-auth", + }, + }) + err := r.reconcilePatroniEtcd(ctx, cr) + assert.ErrorContains(t, err, "missing-tls") + assert.ErrorContains(t, err, "missing-auth") + events := drainEvents(r) + assert.Equal(t, len(events), 2) + }) } diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go index f5a703615..8cda380ec 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go @@ -1386,7 +1386,7 @@ var PatroniEtcdSecretsIndexerFunc client.IndexerFunc = func(obj client.Object) [ return nil } dcs := cr.Spec.Patroni.GetDCS() - if dcs == nil || dcs.Etcd == nil { + if dcs == nil || dcs.Type != crunchyv1beta1.PatroniDCSTypeEtcd || dcs.Etcd == nil { return nil } var names []string diff --git a/pkg/apis/upstream.pgv2.percona.com/v1beta1/patroni_types.go b/pkg/apis/upstream.pgv2.percona.com/v1beta1/patroni_types.go index 8c4da668a..dc93597b2 100644 --- a/pkg/apis/upstream.pgv2.percona.com/v1beta1/patroni_types.go +++ b/pkg/apis/upstream.pgv2.percona.com/v1beta1/patroni_types.go @@ -4,6 +4,7 @@ package v1beta1 +// +kubebuilder:validation:XValidation:rule="(has(oldSelf.dcs) ? oldSelf.dcs.type : 'kubernetes') == (has(self.dcs) ? self.dcs.type : 'kubernetes')",message="DCS type is immutable after cluster creation" type PatroniSpec struct { // Patroni dynamic configuration settings. Changes to this value will be // automatically reloaded without validation. Changes to certain PostgreSQL diff --git a/pkg/apis/upstream.pgv2.percona.com/v1beta1/zz_generated.deepcopy.go b/pkg/apis/upstream.pgv2.percona.com/v1beta1/zz_generated.deepcopy.go index 5e9d627a2..f28303fab 100644 --- a/pkg/apis/upstream.pgv2.percona.com/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/upstream.pgv2.percona.com/v1beta1/zz_generated.deepcopy.go @@ -1634,7 +1634,6 @@ func (in *PGUpgradeStatus) DeepCopy() *PGUpgradeStatus { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PatroniDCS) DeepCopyInto(out *PatroniDCS) { *out = *in From 241cf083fe2ab6785c51e170d08dfdff71ded034 Mon Sep 17 00:00:00 2001 From: yoav-katz Date: Fri, 19 Jun 2026 18:06:09 +0300 Subject: [PATCH 04/18] feat(e2e_tests) --- e2e-tests/tests/etcd-dcs/00-assert.yaml | 24 +++ .../tests/etcd-dcs/00-deploy-operator.yaml | 13 ++ e2e-tests/tests/etcd-dcs/01-assert.yaml | 10 ++ e2e-tests/tests/etcd-dcs/01-etcd-setup.yaml | 61 ++++++++ e2e-tests/tests/etcd-dcs/02-assert.yaml | 106 ++++++++++++++ .../tests/etcd-dcs/02-create-cluster.yaml | 19 +++ e2e-tests/tests/etcd-dcs/03-write-data.yaml | 16 ++ e2e-tests/tests/etcd-dcs/04-assert.yaml | 10 ++ .../tests/etcd-dcs/04-read-from-primary.yaml | 13 ++ .../etcd-dcs/05-check-password-leak.yaml | 11 ++ e2e-tests/tests/etcd-dcs/06-assert.yaml | 138 ++++++++++++++++++ .../etcd-dcs/07-check-patroni-config.yaml | 20 +++ .../tests/etcd-dcs/08-check-patronictl.yaml | 20 +++ .../tests/etcd-dcs/09-check-etcd-keys.yaml | 16 ++ .../etcd-dcs/10-check-no-warning-events.yaml | 20 +++ .../99-remove-cluster-gracefully.yaml | 30 ++++ 16 files changed, 527 insertions(+) create mode 100644 e2e-tests/tests/etcd-dcs/00-assert.yaml create mode 100644 e2e-tests/tests/etcd-dcs/00-deploy-operator.yaml create mode 100644 e2e-tests/tests/etcd-dcs/01-assert.yaml create mode 100644 e2e-tests/tests/etcd-dcs/01-etcd-setup.yaml create mode 100644 e2e-tests/tests/etcd-dcs/02-assert.yaml create mode 100644 e2e-tests/tests/etcd-dcs/02-create-cluster.yaml create mode 100644 e2e-tests/tests/etcd-dcs/03-write-data.yaml create mode 100644 e2e-tests/tests/etcd-dcs/04-assert.yaml create mode 100644 e2e-tests/tests/etcd-dcs/04-read-from-primary.yaml create mode 100644 e2e-tests/tests/etcd-dcs/05-check-password-leak.yaml create mode 100644 e2e-tests/tests/etcd-dcs/06-assert.yaml create mode 100644 e2e-tests/tests/etcd-dcs/07-check-patroni-config.yaml create mode 100644 e2e-tests/tests/etcd-dcs/08-check-patronictl.yaml create mode 100644 e2e-tests/tests/etcd-dcs/09-check-etcd-keys.yaml create mode 100644 e2e-tests/tests/etcd-dcs/10-check-no-warning-events.yaml create mode 100644 e2e-tests/tests/etcd-dcs/99-remove-cluster-gracefully.yaml diff --git a/e2e-tests/tests/etcd-dcs/00-assert.yaml b/e2e-tests/tests/etcd-dcs/00-assert.yaml new file mode 100644 index 000000000..ae5a062d8 --- /dev/null +++ b/e2e-tests/tests/etcd-dcs/00-assert.yaml @@ -0,0 +1,24 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: perconapgclusters.pgv2.percona.com +spec: + group: pgv2.percona.com + names: + kind: PerconaPGCluster + listKind: PerconaPGClusterList + plural: perconapgclusters + singular: perconapgcluster + scope: Namespaced +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +metadata: + name: check-operator-deploy-status +timeout: 120 +commands: + - script: kubectl assert exist-enhanced deployment percona-postgresql-operator -n ${OPERATOR_NS:-$NAMESPACE} --field-selector status.readyReplicas=1 diff --git a/e2e-tests/tests/etcd-dcs/00-deploy-operator.yaml b/e2e-tests/tests/etcd-dcs/00-deploy-operator.yaml new file mode 100644 index 000000000..1aaca58be --- /dev/null +++ b/e2e-tests/tests/etcd-dcs/00-deploy-operator.yaml @@ -0,0 +1,13 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + init_temp_dir # do this only in the first TestStep + + deploy_operator + deploy_client diff --git a/e2e-tests/tests/etcd-dcs/01-assert.yaml b/e2e-tests/tests/etcd-dcs/01-assert.yaml new file mode 100644 index 000000000..c335c6b20 --- /dev/null +++ b/e2e-tests/tests/etcd-dcs/01-assert.yaml @@ -0,0 +1,10 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: etcd +status: + readyReplicas: 1 diff --git a/e2e-tests/tests/etcd-dcs/01-etcd-setup.yaml b/e2e-tests/tests/etcd-dcs/01-etcd-setup.yaml new file mode 100644 index 000000000..3bb80aea0 --- /dev/null +++ b/e2e-tests/tests/etcd-dcs/01-etcd-setup.yaml @@ -0,0 +1,61 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + kubectl -n "${NAMESPACE}" apply -f - < Date: Fri, 19 Jun 2026 20:27:27 +0300 Subject: [PATCH 05/18] Update run-pr.csv --- e2e-tests/run-pr.csv | 1 + 1 file changed, 1 insertion(+) diff --git a/e2e-tests/run-pr.csv b/e2e-tests/run-pr.csv index 110dff48e..d95708ed7 100644 --- a/e2e-tests/run-pr.csv +++ b/e2e-tests/run-pr.csv @@ -33,3 +33,4 @@ telemetry-transfer upgrade-consistency upgrade-minor users +etcd-dcs \ No newline at end of file From f12b81702421408f5ccc03f60edfeea7d2081095 Mon Sep 17 00:00:00 2001 From: yoav-katz <111126160+yoav-katz@users.noreply.github.com> Date: Fri, 19 Jun 2026 20:27:47 +0300 Subject: [PATCH 06/18] Update run-release.csv --- e2e-tests/run-release.csv | 1 + 1 file changed, 1 insertion(+) diff --git a/e2e-tests/run-release.csv b/e2e-tests/run-release.csv index 4a6fb3d52..081dc3a0b 100644 --- a/e2e-tests/run-release.csv +++ b/e2e-tests/run-release.csv @@ -38,6 +38,7 @@ telemetry-transfer upgrade-consistency upgrade-minor users +etcd-dcs migration-from-crunchy-backup-restore migration-from-crunchy-pv migration-from-crunchy-standby From 3f1de899354f8312ffd9bfad8c07b630038c7be8 Mon Sep 17 00:00:00 2001 From: yoav-katz Date: Sat, 20 Jun 2026 00:34:46 +0300 Subject: [PATCH 07/18] fix(crd) --- .../pgv2.percona.com_perconapgclusters.yaml | 62 +++++++++ .../pgv2.percona.com_perconapgclusters.yaml | 62 +++++++++ ...eam.pgv2.percona.com_postgresclusters.yaml | 62 +++++++++ deploy/crd.yaml | 124 ++++++++++++++++++ e2e-tests/tests/etcd-dcs/02-assert.yaml | 18 +-- .../tests/etcd-dcs/09-check-etcd-keys.yaml | 2 +- .../controller/postgrescluster/patroni.go | 44 ++++--- 7 files changed, 337 insertions(+), 37 deletions(-) diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml index 594b73af5..46fb7df0e 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml @@ -22031,6 +22031,64 @@ spec: - pgbackrest type: string type: array + dcs: + description: |- + DCS configures the distributed configuration store backend. + Defaults to the Kubernetes-native backend (Endpoints). + N.B. Changing the DCS type causes downtime; all instances must restart simultaneously. + properties: + etcd: + description: |- + Etcd holds settings for the external etcd DCS backend. + Required when type is "etcd". + properties: + authSecret: + description: |- + AuthSecret is the name of a Secret in the same namespace with keys + username and password for etcd authentication. + type: string + endpoints: + description: |- + Endpoints is the list of etcd endpoints including scheme and port. + Example: ["https://etcd.etcd-cluster.svc:2379"] + The scheme of the first endpoint determines the protocol used. + All endpoints must use the same scheme. + items: + pattern: ^https?://[^/] + type: string + maxItems: 7 + minItems: 1 + type: array + tlsSecret: + description: |- + TLSSecret is the name of a Secret in the same namespace with keys + ca.crt, tls.crt, and tls.key for mutual TLS with etcd. + type: string + required: + - endpoints + type: object + x-kubernetes-validations: + - message: all endpoints must use the same scheme (http or + https) + rule: self.endpoints.all(e, e.startsWith('https://')) || + self.endpoints.all(e, e.startsWith('http://')) + type: + default: kubernetes + description: |- + Type of DCS backend. Defaults to "kubernetes". + Changing this value causes cluster downtime; all instances must restart. + This field is immutable after cluster creation. + enum: + - kubernetes + - etcd + type: string + type: object + x-kubernetes-validations: + - message: etcd.endpoints must be non-empty when type is etcd + rule: self.type != 'etcd' || (has(self.etcd) && size(self.etcd.endpoints) + > 0) + - message: DCS type is immutable after cluster creation + rule: '!has(oldSelf.type) || oldSelf.type == self.type' dynamicConfiguration: description: |- Patroni dynamic configuration settings. Changes to this value will be @@ -22101,6 +22159,10 @@ spec: minimum: 1 type: integer type: object + x-kubernetes-validations: + - message: DCS type is immutable after cluster creation + rule: '(has(oldSelf.dcs) ? oldSelf.dcs.type : ''kubernetes'') == + (has(self.dcs) ? self.dcs.type : ''kubernetes'')' pause: description: |- Whether or not the PostgreSQL cluster should be stopped. diff --git a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml index 195fd6a63..8b7fb1622 100644 --- a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml +++ b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml @@ -22726,6 +22726,64 @@ spec: - pgbackrest type: string type: array + dcs: + description: |- + DCS configures the distributed configuration store backend. + Defaults to the Kubernetes-native backend (Endpoints). + N.B. Changing the DCS type causes downtime; all instances must restart simultaneously. + properties: + etcd: + description: |- + Etcd holds settings for the external etcd DCS backend. + Required when type is "etcd". + properties: + authSecret: + description: |- + AuthSecret is the name of a Secret in the same namespace with keys + username and password for etcd authentication. + type: string + endpoints: + description: |- + Endpoints is the list of etcd endpoints including scheme and port. + Example: ["https://etcd.etcd-cluster.svc:2379"] + The scheme of the first endpoint determines the protocol used. + All endpoints must use the same scheme. + items: + pattern: ^https?://[^/] + type: string + maxItems: 7 + minItems: 1 + type: array + tlsSecret: + description: |- + TLSSecret is the name of a Secret in the same namespace with keys + ca.crt, tls.crt, and tls.key for mutual TLS with etcd. + type: string + required: + - endpoints + type: object + x-kubernetes-validations: + - message: all endpoints must use the same scheme (http or + https) + rule: self.endpoints.all(e, e.startsWith('https://')) || + self.endpoints.all(e, e.startsWith('http://')) + type: + default: kubernetes + description: |- + Type of DCS backend. Defaults to "kubernetes". + Changing this value causes cluster downtime; all instances must restart. + This field is immutable after cluster creation. + enum: + - kubernetes + - etcd + type: string + type: object + x-kubernetes-validations: + - message: etcd.endpoints must be non-empty when type is etcd + rule: self.type != 'etcd' || (has(self.etcd) && size(self.etcd.endpoints) + > 0) + - message: DCS type is immutable after cluster creation + rule: '!has(oldSelf.type) || oldSelf.type == self.type' dynamicConfiguration: description: |- Patroni dynamic configuration settings. Changes to this value will be @@ -22796,6 +22854,10 @@ spec: minimum: 1 type: integer type: object + x-kubernetes-validations: + - message: DCS type is immutable after cluster creation + rule: '(has(oldSelf.dcs) ? oldSelf.dcs.type : ''kubernetes'') == + (has(self.dcs) ? self.dcs.type : ''kubernetes'')' pause: description: |- Whether or not the PostgreSQL cluster should be stopped. diff --git a/config/crd/bases/upstream.pgv2.percona.com_postgresclusters.yaml b/config/crd/bases/upstream.pgv2.percona.com_postgresclusters.yaml index 37b17f1ad..77826be98 100644 --- a/config/crd/bases/upstream.pgv2.percona.com_postgresclusters.yaml +++ b/config/crd/bases/upstream.pgv2.percona.com_postgresclusters.yaml @@ -22378,6 +22378,64 @@ spec: - pgbackrest type: string type: array + dcs: + description: |- + DCS configures the distributed configuration store backend. + Defaults to the Kubernetes-native backend (Endpoints). + N.B. Changing the DCS type causes downtime; all instances must restart simultaneously. + properties: + etcd: + description: |- + Etcd holds settings for the external etcd DCS backend. + Required when type is "etcd". + properties: + authSecret: + description: |- + AuthSecret is the name of a Secret in the same namespace with keys + username and password for etcd authentication. + type: string + endpoints: + description: |- + Endpoints is the list of etcd endpoints including scheme and port. + Example: ["https://etcd.etcd-cluster.svc:2379"] + The scheme of the first endpoint determines the protocol used. + All endpoints must use the same scheme. + items: + pattern: ^https?://[^/] + type: string + maxItems: 7 + minItems: 1 + type: array + tlsSecret: + description: |- + TLSSecret is the name of a Secret in the same namespace with keys + ca.crt, tls.crt, and tls.key for mutual TLS with etcd. + type: string + required: + - endpoints + type: object + x-kubernetes-validations: + - message: all endpoints must use the same scheme (http or + https) + rule: self.endpoints.all(e, e.startsWith('https://')) || + self.endpoints.all(e, e.startsWith('http://')) + type: + default: kubernetes + description: |- + Type of DCS backend. Defaults to "kubernetes". + Changing this value causes cluster downtime; all instances must restart. + This field is immutable after cluster creation. + enum: + - kubernetes + - etcd + type: string + type: object + x-kubernetes-validations: + - message: etcd.endpoints must be non-empty when type is etcd + rule: self.type != 'etcd' || (has(self.etcd) && size(self.etcd.endpoints) + > 0) + - message: DCS type is immutable after cluster creation + rule: '!has(oldSelf.type) || oldSelf.type == self.type' dynamicConfiguration: description: |- Patroni dynamic configuration settings. Changes to this value will be @@ -22448,6 +22506,10 @@ spec: minimum: 1 type: integer type: object + x-kubernetes-validations: + - message: DCS type is immutable after cluster creation + rule: '(has(oldSelf.dcs) ? oldSelf.dcs.type : ''kubernetes'') == + (has(self.dcs) ? self.dcs.type : ''kubernetes'')' paused: description: |- Suspends the rollout and reconciliation of changes made to the diff --git a/deploy/crd.yaml b/deploy/crd.yaml index 505329580..5410923ca 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -23023,6 +23023,64 @@ spec: - pgbackrest type: string type: array + dcs: + description: |- + DCS configures the distributed configuration store backend. + Defaults to the Kubernetes-native backend (Endpoints). + N.B. Changing the DCS type causes downtime; all instances must restart simultaneously. + properties: + etcd: + description: |- + Etcd holds settings for the external etcd DCS backend. + Required when type is "etcd". + properties: + authSecret: + description: |- + AuthSecret is the name of a Secret in the same namespace with keys + username and password for etcd authentication. + type: string + endpoints: + description: |- + Endpoints is the list of etcd endpoints including scheme and port. + Example: ["https://etcd.etcd-cluster.svc:2379"] + The scheme of the first endpoint determines the protocol used. + All endpoints must use the same scheme. + items: + pattern: ^https?://[^/] + type: string + maxItems: 7 + minItems: 1 + type: array + tlsSecret: + description: |- + TLSSecret is the name of a Secret in the same namespace with keys + ca.crt, tls.crt, and tls.key for mutual TLS with etcd. + type: string + required: + - endpoints + type: object + x-kubernetes-validations: + - message: all endpoints must use the same scheme (http or + https) + rule: self.endpoints.all(e, e.startsWith('https://')) || + self.endpoints.all(e, e.startsWith('http://')) + type: + default: kubernetes + description: |- + Type of DCS backend. Defaults to "kubernetes". + Changing this value causes cluster downtime; all instances must restart. + This field is immutable after cluster creation. + enum: + - kubernetes + - etcd + type: string + type: object + x-kubernetes-validations: + - message: etcd.endpoints must be non-empty when type is etcd + rule: self.type != 'etcd' || (has(self.etcd) && size(self.etcd.endpoints) + > 0) + - message: DCS type is immutable after cluster creation + rule: '!has(oldSelf.type) || oldSelf.type == self.type' dynamicConfiguration: description: |- Patroni dynamic configuration settings. Changes to this value will be @@ -23093,6 +23151,10 @@ spec: minimum: 1 type: integer type: object + x-kubernetes-validations: + - message: DCS type is immutable after cluster creation + rule: '(has(oldSelf.dcs) ? oldSelf.dcs.type : ''kubernetes'') == + (has(self.dcs) ? self.dcs.type : ''kubernetes'')' pause: description: |- Whether or not the PostgreSQL cluster should be stopped. @@ -60442,6 +60504,64 @@ spec: - pgbackrest type: string type: array + dcs: + description: |- + DCS configures the distributed configuration store backend. + Defaults to the Kubernetes-native backend (Endpoints). + N.B. Changing the DCS type causes downtime; all instances must restart simultaneously. + properties: + etcd: + description: |- + Etcd holds settings for the external etcd DCS backend. + Required when type is "etcd". + properties: + authSecret: + description: |- + AuthSecret is the name of a Secret in the same namespace with keys + username and password for etcd authentication. + type: string + endpoints: + description: |- + Endpoints is the list of etcd endpoints including scheme and port. + Example: ["https://etcd.etcd-cluster.svc:2379"] + The scheme of the first endpoint determines the protocol used. + All endpoints must use the same scheme. + items: + pattern: ^https?://[^/] + type: string + maxItems: 7 + minItems: 1 + type: array + tlsSecret: + description: |- + TLSSecret is the name of a Secret in the same namespace with keys + ca.crt, tls.crt, and tls.key for mutual TLS with etcd. + type: string + required: + - endpoints + type: object + x-kubernetes-validations: + - message: all endpoints must use the same scheme (http or + https) + rule: self.endpoints.all(e, e.startsWith('https://')) || + self.endpoints.all(e, e.startsWith('http://')) + type: + default: kubernetes + description: |- + Type of DCS backend. Defaults to "kubernetes". + Changing this value causes cluster downtime; all instances must restart. + This field is immutable after cluster creation. + enum: + - kubernetes + - etcd + type: string + type: object + x-kubernetes-validations: + - message: etcd.endpoints must be non-empty when type is etcd + rule: self.type != 'etcd' || (has(self.etcd) && size(self.etcd.endpoints) + > 0) + - message: DCS type is immutable after cluster creation + rule: '!has(oldSelf.type) || oldSelf.type == self.type' dynamicConfiguration: description: |- Patroni dynamic configuration settings. Changes to this value will be @@ -60512,6 +60632,10 @@ spec: minimum: 1 type: integer type: object + x-kubernetes-validations: + - message: DCS type is immutable after cluster creation + rule: '(has(oldSelf.dcs) ? oldSelf.dcs.type : ''kubernetes'') == + (has(self.dcs) ? self.dcs.type : ''kubernetes'')' paused: description: |- Suspends the rollout and reconciliation of changes made to the diff --git a/e2e-tests/tests/etcd-dcs/02-assert.yaml b/e2e-tests/tests/etcd-dcs/02-assert.yaml index d98612be8..cd1c6bde0 100644 --- a/e2e-tests/tests/etcd-dcs/02-assert.yaml +++ b/e2e-tests/tests/etcd-dcs/02-assert.yaml @@ -1,6 +1,6 @@ apiVersion: kuttl.dev/v1beta1 kind: TestAssert -timeout: 120 +timeout: 600 --- kind: StatefulSet apiVersion: apps/v1 @@ -44,22 +44,6 @@ status: updatedReplicas: 1 readyReplicas: 1 --- -kind: Job -apiVersion: batch/v1 -metadata: - labels: - postgres-operator.crunchydata.com/cluster: etcd-dcs - postgres-operator.crunchydata.com/pgbackrest: '' - postgres-operator.crunchydata.com/pgbackrest-backup: replica-create - postgres-operator.crunchydata.com/pgbackrest-repo: repo1 - ownerReferences: - - apiVersion: pgv2.percona.com/v2 - kind: PerconaPGBackup - controller: true - blockOwnerDeletion: true -status: - succeeded: 1 ---- apiVersion: upstream.pgv2.percona.com/v1beta1 kind: PostgresCluster metadata: diff --git a/e2e-tests/tests/etcd-dcs/09-check-etcd-keys.yaml b/e2e-tests/tests/etcd-dcs/09-check-etcd-keys.yaml index cd2383e6a..bdce64540 100644 --- a/e2e-tests/tests/etcd-dcs/09-check-etcd-keys.yaml +++ b/e2e-tests/tests/etcd-dcs/09-check-etcd-keys.yaml @@ -9,7 +9,7 @@ commands: source ../../functions keys=$(kubectl -n "${NAMESPACE}" exec etcd-0 -- \ - env ETCDCTL_API=3 etcdctl --endpoints=http://localhost:2379 \ + /usr/local/bin/etcdctl --endpoints=http://localhost:2379 \ get / --prefix --keys-only) echo "${keys}" diff --git a/internal/controller/postgrescluster/patroni.go b/internal/controller/postgrescluster/patroni.go index 858b6cafd..c9270c7c4 100644 --- a/internal/controller/postgrescluster/patroni.go +++ b/internal/controller/postgrescluster/patroni.go @@ -336,28 +336,34 @@ func (r *Reconciler) reconcilePatroniStatus( } } - dcs := &corev1.Endpoints{ObjectMeta: naming.PatroniDistributedConfiguration(cluster)} - err := errors.WithStack(client.IgnoreNotFound( - r.Client.Get(ctx, client.ObjectKeyFromObject(dcs), dcs))) - - if err == nil { - if dcs.Annotations["initialize"] != "" { - // After bootstrap, Patroni writes the cluster system identifier to DCS. - cluster.Status.Patroni.SystemIdentifier = dcs.Annotations["initialize"] - } else if readyInstance { - // While we typically expect a value for the initialize key to be present in the - // Endpoints above by the time the StatefulSet for any instance indicates "ready" - // (since Patroni writes this value after successful cluster bootstrap, at which time - // the initial primary should transition to "ready"), sometimes this is not the case - // and the "initialize" key is not yet present. Therefore, if a "ready" instance - // is detected in the cluster we assume this is the case, and simply log a message and - // requeue in order to try again until the expected value is found. - log.Info("detected ready instance but no initialize value") - requeue = time.Second + // When using an external etcd DCS, Patroni writes the "initialize" value to etcd rather + // than to a Kubernetes Endpoints object. Skip the Kubernetes Endpoints lookup in that case. + if patroniDCS := cluster.Spec.Patroni.GetDCS(); patroniDCS == nil || patroniDCS.Type != v1beta1.PatroniDCSTypeEtcd { + dcs := &corev1.Endpoints{ObjectMeta: naming.PatroniDistributedConfiguration(cluster)} + err := errors.WithStack(client.IgnoreNotFound( + r.Client.Get(ctx, client.ObjectKeyFromObject(dcs), dcs))) + + if err == nil { + if dcs.Annotations["initialize"] != "" { + // After bootstrap, Patroni writes the cluster system identifier to DCS. + cluster.Status.Patroni.SystemIdentifier = dcs.Annotations["initialize"] + } else if readyInstance { + // While we typically expect a value for the initialize key to be present in the + // Endpoints above by the time the StatefulSet for any instance indicates "ready" + // (since Patroni writes this value after successful cluster bootstrap, at which time + // the initial primary should transition to "ready"), sometimes this is not the case + // and the "initialize" key is not yet present. Therefore, if a "ready" instance + // is detected in the cluster we assume this is the case, and simply log a message and + // requeue in order to try again until the expected value is found. + log.Info("detected ready instance but no initialize value") + requeue = time.Second + } } + + return requeue, err } - return requeue, err + return requeue, nil } // reconcileReplicationSecret creates a secret containing the TLS From 2b0ac024a2c62c16bc5072b1f5e059c5505df9c9 Mon Sep 17 00:00:00 2001 From: yoav-katz Date: Sat, 20 Jun 2026 10:38:38 +0300 Subject: [PATCH 08/18] fix(bug) --- .../controller/postgrescluster/pgbackrest.go | 27 ++++++++++++++++ internal/patroni/api.go | 32 +++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/internal/controller/postgrescluster/pgbackrest.go b/internal/controller/postgrescluster/pgbackrest.go index a83804d62..0c3de3450 100644 --- a/internal/controller/postgrescluster/pgbackrest.go +++ b/internal/controller/postgrescluster/pgbackrest.go @@ -2989,6 +2989,33 @@ func (r *Reconciler) reconcileStanzaCreate(ctx context.Context, } } + // When using etcd DCS, Patroni does not update pod annotations or labels + // with the member role, so IsWritable() will never return true. Fall back + // to querying patronictl in a running instance pod. + if !clusterWritable { + if dcs := postgresCluster.Spec.Patroni.GetDCS(); dcs != nil && dcs.Type == v1beta1.PatroniDCSTypeEtcd { + for _, instance := range instances.forCluster { + if terminating, known := instance.IsTerminating(); terminating || !known { + continue + } + if running, known := instance.IsRunning(naming.ContainerDatabase); !running || !known { + continue + } + candidatePod := instance.Name + "-0" + tryExec := func(ctx context.Context, stdin io.Reader, stdout, stderr io.Writer, command ...string) error { + return r.PodExec(ctx, postgresCluster.GetNamespace(), candidatePod, naming.ContainerDatabase, stdin, stdout, stderr, command...) + } + leaderName, err := patroni.Executor(tryExec).GetLeaderName(ctx) + if err != nil || leaderName == "" { + continue + } + clusterWritable = true + writableInstanceName = leaderName + break + } + } + } + stanzasCreated := true for _, repoStatus := range postgresCluster.Status.PGBackRest.Repos { if !repoStatus.StanzaCreated { diff --git a/internal/patroni/api.go b/internal/patroni/api.go index 5f047267c..6a3782bf6 100644 --- a/internal/patroni/api.go +++ b/internal/patroni/api.go @@ -214,3 +214,35 @@ func (exec Executor) GetTimeline(ctx context.Context) (int64, error) { return 0, err } + +// GetLeaderName returns the pod name of the current Patroni leader by calling +// "patronictl list". This is used when the Kubernetes DCS is not in use (e.g. +// when etcd is the DCS), since in that case Patroni does not update pod +// annotations or labels with the member role. +// Returns an empty string when no running leader is found. +func (exec Executor) GetLeaderName(ctx context.Context) (string, error) { + var stdout, stderr bytes.Buffer + + err := exec(ctx, nil, &stdout, &stderr, + "patronictl", "list", "--format", "json") + if err != nil { + return "", err + } + + var members []struct { + Name string `json:"Member"` + Role string `json:"Role"` + State string `json:"State"` + } + if err := json.Unmarshal(stdout.Bytes(), &members); err != nil { + return "", err + } + + for _, member := range members { + if member.Role == "Leader" && member.State == "running" { + return member.Name, nil + } + } + + return "", nil +} From 48082f54510c119afd51947ad8935a741957a8e2 Mon Sep 17 00:00:00 2001 From: yoav-katz Date: Sat, 20 Jun 2026 12:02:50 +0300 Subject: [PATCH 09/18] regenerate-crds --- config/bundle/kustomization.yaml | 2 +- config/cw-bundle/kustomization.yaml | 2 +- config/manager/cluster/kustomization.yaml | 2 +- config/manager/namespace/kustomization.yaml | 2 +- deploy/bundle.yaml | 126 +++++++++++++++++- deploy/cw-bundle.yaml | 126 +++++++++++++++++- deploy/cw-operator.yaml | 2 +- deploy/operator.yaml | 2 +- .../v1beta1/patroni_types.go | 1 + 9 files changed, 257 insertions(+), 8 deletions(-) diff --git a/config/bundle/kustomization.yaml b/config/bundle/kustomization.yaml index 90cf89956..b661e8b11 100644 --- a/config/bundle/kustomization.yaml +++ b/config/bundle/kustomization.yaml @@ -7,4 +7,4 @@ resources: images: - name: postgres-operator newName: docker.io/perconalab/percona-postgresql-operator - newTag: main + newTag: etcd-dcs diff --git a/config/cw-bundle/kustomization.yaml b/config/cw-bundle/kustomization.yaml index 440d09873..80a11374e 100644 --- a/config/cw-bundle/kustomization.yaml +++ b/config/cw-bundle/kustomization.yaml @@ -8,4 +8,4 @@ resources: images: - name: postgres-operator newName: docker.io/perconalab/percona-postgresql-operator - newTag: main + newTag: etcd-dcs diff --git a/config/manager/cluster/kustomization.yaml b/config/manager/cluster/kustomization.yaml index 6f8f4bf12..79a7e1850 100644 --- a/config/manager/cluster/kustomization.yaml +++ b/config/manager/cluster/kustomization.yaml @@ -9,4 +9,4 @@ patchesStrategicMerge: images: - name: postgres-operator newName: docker.io/perconalab/percona-postgresql-operator - newTag: main + newTag: etcd-dcs diff --git a/config/manager/namespace/kustomization.yaml b/config/manager/namespace/kustomization.yaml index a57ff786e..fc41e9668 100644 --- a/config/manager/namespace/kustomization.yaml +++ b/config/manager/namespace/kustomization.yaml @@ -10,4 +10,4 @@ patchesStrategicMerge: images: - name: postgres-operator newName: docker.io/perconalab/percona-postgresql-operator - newTag: main + newTag: etcd-dcs diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index aa9ffe309..060d98ff0 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -23023,6 +23023,64 @@ spec: - pgbackrest type: string type: array + dcs: + description: |- + DCS configures the distributed configuration store backend. + Defaults to the Kubernetes-native backend (Endpoints). + N.B. Changing the DCS type causes downtime; all instances must restart simultaneously. + properties: + etcd: + description: |- + Etcd holds settings for the external etcd DCS backend. + Required when type is "etcd". + properties: + authSecret: + description: |- + AuthSecret is the name of a Secret in the same namespace with keys + username and password for etcd authentication. + type: string + endpoints: + description: |- + Endpoints is the list of etcd endpoints including scheme and port. + Example: ["https://etcd.etcd-cluster.svc:2379"] + The scheme of the first endpoint determines the protocol used. + All endpoints must use the same scheme. + items: + pattern: ^https?://[^/] + type: string + maxItems: 7 + minItems: 1 + type: array + tlsSecret: + description: |- + TLSSecret is the name of a Secret in the same namespace with keys + ca.crt, tls.crt, and tls.key for mutual TLS with etcd. + type: string + required: + - endpoints + type: object + x-kubernetes-validations: + - message: all endpoints must use the same scheme (http or + https) + rule: self.endpoints.all(e, e.startsWith('https://')) || + self.endpoints.all(e, e.startsWith('http://')) + type: + default: kubernetes + description: |- + Type of DCS backend. Defaults to "kubernetes". + Changing this value causes cluster downtime; all instances must restart. + This field is immutable after cluster creation. + enum: + - kubernetes + - etcd + type: string + type: object + x-kubernetes-validations: + - message: etcd.endpoints must be non-empty when type is etcd + rule: self.type != 'etcd' || (has(self.etcd) && size(self.etcd.endpoints) + > 0) + - message: DCS type is immutable after cluster creation + rule: '!has(oldSelf.type) || oldSelf.type == self.type' dynamicConfiguration: description: |- Patroni dynamic configuration settings. Changes to this value will be @@ -23093,6 +23151,10 @@ spec: minimum: 1 type: integer type: object + x-kubernetes-validations: + - message: DCS type is immutable after cluster creation + rule: '(has(oldSelf.dcs) ? oldSelf.dcs.type : ''kubernetes'') == + (has(self.dcs) ? self.dcs.type : ''kubernetes'')' pause: description: |- Whether or not the PostgreSQL cluster should be stopped. @@ -60442,6 +60504,64 @@ spec: - pgbackrest type: string type: array + dcs: + description: |- + DCS configures the distributed configuration store backend. + Defaults to the Kubernetes-native backend (Endpoints). + N.B. Changing the DCS type causes downtime; all instances must restart simultaneously. + properties: + etcd: + description: |- + Etcd holds settings for the external etcd DCS backend. + Required when type is "etcd". + properties: + authSecret: + description: |- + AuthSecret is the name of a Secret in the same namespace with keys + username and password for etcd authentication. + type: string + endpoints: + description: |- + Endpoints is the list of etcd endpoints including scheme and port. + Example: ["https://etcd.etcd-cluster.svc:2379"] + The scheme of the first endpoint determines the protocol used. + All endpoints must use the same scheme. + items: + pattern: ^https?://[^/] + type: string + maxItems: 7 + minItems: 1 + type: array + tlsSecret: + description: |- + TLSSecret is the name of a Secret in the same namespace with keys + ca.crt, tls.crt, and tls.key for mutual TLS with etcd. + type: string + required: + - endpoints + type: object + x-kubernetes-validations: + - message: all endpoints must use the same scheme (http or + https) + rule: self.endpoints.all(e, e.startsWith('https://')) || + self.endpoints.all(e, e.startsWith('http://')) + type: + default: kubernetes + description: |- + Type of DCS backend. Defaults to "kubernetes". + Changing this value causes cluster downtime; all instances must restart. + This field is immutable after cluster creation. + enum: + - kubernetes + - etcd + type: string + type: object + x-kubernetes-validations: + - message: etcd.endpoints must be non-empty when type is etcd + rule: self.type != 'etcd' || (has(self.etcd) && size(self.etcd.endpoints) + > 0) + - message: DCS type is immutable after cluster creation + rule: '!has(oldSelf.type) || oldSelf.type == self.type' dynamicConfiguration: description: |- Patroni dynamic configuration settings. Changes to this value will be @@ -60512,6 +60632,10 @@ spec: minimum: 1 type: integer type: object + x-kubernetes-validations: + - message: DCS type is immutable after cluster creation + rule: '(has(oldSelf.dcs) ? oldSelf.dcs.type : ''kubernetes'') == + (has(self.dcs) ? self.dcs.type : ''kubernetes'')' paused: description: |- Suspends the rollout and reconciliation of changes made to the @@ -69560,7 +69684,7 @@ spec: value: 10s - name: PGO_FEATURE_GATES value: "" - image: docker.io/perconalab/percona-postgresql-operator:main + image: docker.io/perconalab/percona-postgresql-operator:etcd-dcs imagePullPolicy: Always livenessProbe: failureThreshold: 3 diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index 0216f2dda..50277d40f 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -23023,6 +23023,64 @@ spec: - pgbackrest type: string type: array + dcs: + description: |- + DCS configures the distributed configuration store backend. + Defaults to the Kubernetes-native backend (Endpoints). + N.B. Changing the DCS type causes downtime; all instances must restart simultaneously. + properties: + etcd: + description: |- + Etcd holds settings for the external etcd DCS backend. + Required when type is "etcd". + properties: + authSecret: + description: |- + AuthSecret is the name of a Secret in the same namespace with keys + username and password for etcd authentication. + type: string + endpoints: + description: |- + Endpoints is the list of etcd endpoints including scheme and port. + Example: ["https://etcd.etcd-cluster.svc:2379"] + The scheme of the first endpoint determines the protocol used. + All endpoints must use the same scheme. + items: + pattern: ^https?://[^/] + type: string + maxItems: 7 + minItems: 1 + type: array + tlsSecret: + description: |- + TLSSecret is the name of a Secret in the same namespace with keys + ca.crt, tls.crt, and tls.key for mutual TLS with etcd. + type: string + required: + - endpoints + type: object + x-kubernetes-validations: + - message: all endpoints must use the same scheme (http or + https) + rule: self.endpoints.all(e, e.startsWith('https://')) || + self.endpoints.all(e, e.startsWith('http://')) + type: + default: kubernetes + description: |- + Type of DCS backend. Defaults to "kubernetes". + Changing this value causes cluster downtime; all instances must restart. + This field is immutable after cluster creation. + enum: + - kubernetes + - etcd + type: string + type: object + x-kubernetes-validations: + - message: etcd.endpoints must be non-empty when type is etcd + rule: self.type != 'etcd' || (has(self.etcd) && size(self.etcd.endpoints) + > 0) + - message: DCS type is immutable after cluster creation + rule: '!has(oldSelf.type) || oldSelf.type == self.type' dynamicConfiguration: description: |- Patroni dynamic configuration settings. Changes to this value will be @@ -23093,6 +23151,10 @@ spec: minimum: 1 type: integer type: object + x-kubernetes-validations: + - message: DCS type is immutable after cluster creation + rule: '(has(oldSelf.dcs) ? oldSelf.dcs.type : ''kubernetes'') == + (has(self.dcs) ? self.dcs.type : ''kubernetes'')' pause: description: |- Whether or not the PostgreSQL cluster should be stopped. @@ -60442,6 +60504,64 @@ spec: - pgbackrest type: string type: array + dcs: + description: |- + DCS configures the distributed configuration store backend. + Defaults to the Kubernetes-native backend (Endpoints). + N.B. Changing the DCS type causes downtime; all instances must restart simultaneously. + properties: + etcd: + description: |- + Etcd holds settings for the external etcd DCS backend. + Required when type is "etcd". + properties: + authSecret: + description: |- + AuthSecret is the name of a Secret in the same namespace with keys + username and password for etcd authentication. + type: string + endpoints: + description: |- + Endpoints is the list of etcd endpoints including scheme and port. + Example: ["https://etcd.etcd-cluster.svc:2379"] + The scheme of the first endpoint determines the protocol used. + All endpoints must use the same scheme. + items: + pattern: ^https?://[^/] + type: string + maxItems: 7 + minItems: 1 + type: array + tlsSecret: + description: |- + TLSSecret is the name of a Secret in the same namespace with keys + ca.crt, tls.crt, and tls.key for mutual TLS with etcd. + type: string + required: + - endpoints + type: object + x-kubernetes-validations: + - message: all endpoints must use the same scheme (http or + https) + rule: self.endpoints.all(e, e.startsWith('https://')) || + self.endpoints.all(e, e.startsWith('http://')) + type: + default: kubernetes + description: |- + Type of DCS backend. Defaults to "kubernetes". + Changing this value causes cluster downtime; all instances must restart. + This field is immutable after cluster creation. + enum: + - kubernetes + - etcd + type: string + type: object + x-kubernetes-validations: + - message: etcd.endpoints must be non-empty when type is etcd + rule: self.type != 'etcd' || (has(self.etcd) && size(self.etcd.endpoints) + > 0) + - message: DCS type is immutable after cluster creation + rule: '!has(oldSelf.type) || oldSelf.type == self.type' dynamicConfiguration: description: |- Patroni dynamic configuration settings. Changes to this value will be @@ -60512,6 +60632,10 @@ spec: minimum: 1 type: integer type: object + x-kubernetes-validations: + - message: DCS type is immutable after cluster creation + rule: '(has(oldSelf.dcs) ? oldSelf.dcs.type : ''kubernetes'') == + (has(self.dcs) ? self.dcs.type : ''kubernetes'')' paused: description: |- Suspends the rollout and reconciliation of changes made to the @@ -69558,7 +69682,7 @@ spec: value: 10s - name: PGO_FEATURE_GATES value: "" - image: docker.io/perconalab/percona-postgresql-operator:main + image: docker.io/perconalab/percona-postgresql-operator:etcd-dcs imagePullPolicy: Always livenessProbe: failureThreshold: 3 diff --git a/deploy/cw-operator.yaml b/deploy/cw-operator.yaml index 48a775866..9983fd162 100644 --- a/deploy/cw-operator.yaml +++ b/deploy/cw-operator.yaml @@ -58,7 +58,7 @@ spec: value: 10s - name: PGO_FEATURE_GATES value: "" - image: docker.io/perconalab/percona-postgresql-operator:main + image: docker.io/perconalab/percona-postgresql-operator:etcd-dcs imagePullPolicy: Always livenessProbe: failureThreshold: 3 diff --git a/deploy/operator.yaml b/deploy/operator.yaml index f7f95a088..4070d5e1e 100644 --- a/deploy/operator.yaml +++ b/deploy/operator.yaml @@ -61,7 +61,7 @@ spec: value: 10s - name: PGO_FEATURE_GATES value: "" - image: docker.io/perconalab/percona-postgresql-operator:main + image: docker.io/perconalab/percona-postgresql-operator:etcd-dcs imagePullPolicy: Always livenessProbe: failureThreshold: 3 diff --git a/pkg/apis/upstream.pgv2.percona.com/v1beta1/patroni_types.go b/pkg/apis/upstream.pgv2.percona.com/v1beta1/patroni_types.go index dc93597b2..c421394cb 100644 --- a/pkg/apis/upstream.pgv2.percona.com/v1beta1/patroni_types.go +++ b/pkg/apis/upstream.pgv2.percona.com/v1beta1/patroni_types.go @@ -100,6 +100,7 @@ type PatroniEtcdSpec struct { // The scheme of the first endpoint determines the protocol used. // All endpoints must use the same scheme. // +kubebuilder:validation:MinItems=1 + // +kubebuilder:validation:MaxItems=7 // +kubebuilder:validation:items:Pattern=`^https?://[^/]` Endpoints []string `json:"endpoints"` From 70d51b517f9ec783b9ffbba87f739355331e7a06 Mon Sep 17 00:00:00 2001 From: yoav-katz Date: Sat, 20 Jun 2026 12:41:57 +0300 Subject: [PATCH 10/18] feat(e2e_tests): added check for immutable dcs after creation --- .../etcd-dcs/11-check-dcs-immutability.yaml | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 e2e-tests/tests/etcd-dcs/11-check-dcs-immutability.yaml diff --git a/e2e-tests/tests/etcd-dcs/11-check-dcs-immutability.yaml b/e2e-tests/tests/etcd-dcs/11-check-dcs-immutability.yaml new file mode 100644 index 000000000..02382a426 --- /dev/null +++ b/e2e-tests/tests/etcd-dcs/11-check-dcs-immutability.yaml @@ -0,0 +1,22 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + # Attempt to change the DCS type from etcd to kubernetes after cluster creation. + # The CRD CEL validation rule must reject this with "DCS type is immutable after cluster creation". + rejection_output=$(kubectl -n "${NAMESPACE}" patch perconapgcluster etcd-dcs \ + --type=merge \ + -p '{"spec":{"patroni":{"dcs":{"type":"kubernetes"}}}}' 2>&1 || true) + + if echo "${rejection_output}" | grep -q "DCS type is immutable after cluster creation"; then + echo "OK: DCS type change correctly rejected by the API server" + else + echo "ERROR: expected rejection with 'DCS type is immutable after cluster creation', got: ${rejection_output}" + exit 1 + fi From 05c00429eca0f5e3f22b5fffd621e12c10665b94 Mon Sep 17 00:00:00 2001 From: yoav-katz Date: Sat, 20 Jun 2026 13:49:54 +0300 Subject: [PATCH 11/18] fix(tag) --- config/bundle/kustomization.yaml | 2 +- config/cw-bundle/kustomization.yaml | 2 +- config/manager/cluster/kustomization.yaml | 2 +- config/manager/namespace/kustomization.yaml | 2 +- deploy/bundle.yaml | 2 +- deploy/cw-bundle.yaml | 2 +- deploy/cw-operator.yaml | 2 +- deploy/operator.yaml | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/config/bundle/kustomization.yaml b/config/bundle/kustomization.yaml index b661e8b11..90cf89956 100644 --- a/config/bundle/kustomization.yaml +++ b/config/bundle/kustomization.yaml @@ -7,4 +7,4 @@ resources: images: - name: postgres-operator newName: docker.io/perconalab/percona-postgresql-operator - newTag: etcd-dcs + newTag: main diff --git a/config/cw-bundle/kustomization.yaml b/config/cw-bundle/kustomization.yaml index 80a11374e..440d09873 100644 --- a/config/cw-bundle/kustomization.yaml +++ b/config/cw-bundle/kustomization.yaml @@ -8,4 +8,4 @@ resources: images: - name: postgres-operator newName: docker.io/perconalab/percona-postgresql-operator - newTag: etcd-dcs + newTag: main diff --git a/config/manager/cluster/kustomization.yaml b/config/manager/cluster/kustomization.yaml index 79a7e1850..6f8f4bf12 100644 --- a/config/manager/cluster/kustomization.yaml +++ b/config/manager/cluster/kustomization.yaml @@ -9,4 +9,4 @@ patchesStrategicMerge: images: - name: postgres-operator newName: docker.io/perconalab/percona-postgresql-operator - newTag: etcd-dcs + newTag: main diff --git a/config/manager/namespace/kustomization.yaml b/config/manager/namespace/kustomization.yaml index fc41e9668..a57ff786e 100644 --- a/config/manager/namespace/kustomization.yaml +++ b/config/manager/namespace/kustomization.yaml @@ -10,4 +10,4 @@ patchesStrategicMerge: images: - name: postgres-operator newName: docker.io/perconalab/percona-postgresql-operator - newTag: etcd-dcs + newTag: main diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index 060d98ff0..636b83483 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -69684,7 +69684,7 @@ spec: value: 10s - name: PGO_FEATURE_GATES value: "" - image: docker.io/perconalab/percona-postgresql-operator:etcd-dcs + image: docker.io/perconalab/percona-postgresql-operator:main imagePullPolicy: Always livenessProbe: failureThreshold: 3 diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index 50277d40f..a4a21ef09 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -69682,7 +69682,7 @@ spec: value: 10s - name: PGO_FEATURE_GATES value: "" - image: docker.io/perconalab/percona-postgresql-operator:etcd-dcs + image: docker.io/perconalab/percona-postgresql-operator:main imagePullPolicy: Always livenessProbe: failureThreshold: 3 diff --git a/deploy/cw-operator.yaml b/deploy/cw-operator.yaml index 9983fd162..48a775866 100644 --- a/deploy/cw-operator.yaml +++ b/deploy/cw-operator.yaml @@ -58,7 +58,7 @@ spec: value: 10s - name: PGO_FEATURE_GATES value: "" - image: docker.io/perconalab/percona-postgresql-operator:etcd-dcs + image: docker.io/perconalab/percona-postgresql-operator:main imagePullPolicy: Always livenessProbe: failureThreshold: 3 diff --git a/deploy/operator.yaml b/deploy/operator.yaml index 4070d5e1e..f7f95a088 100644 --- a/deploy/operator.yaml +++ b/deploy/operator.yaml @@ -61,7 +61,7 @@ spec: value: 10s - name: PGO_FEATURE_GATES value: "" - image: docker.io/perconalab/percona-postgresql-operator:etcd-dcs + image: docker.io/perconalab/percona-postgresql-operator:main imagePullPolicy: Always livenessProbe: failureThreshold: 3 From 8f26d3cb2a7f0604ede0c999ec21295039eb7a87 Mon Sep 17 00:00:00 2001 From: yoav-katz Date: Sat, 20 Jun 2026 17:16:45 +0300 Subject: [PATCH 12/18] feat(patroni-role-change) --- build/postgres-operator/Dockerfile | 1 + build/postgres-operator/init-entrypoint.sh | 1 + .../postgres-operator/patroni-role-change.sh | 25 +++++++++++++++ e2e-tests/tests/etcd-dcs/01-etcd-setup.yaml | 2 +- .../etcd-dcs/07-check-patroni-config.yaml | 2 ++ .../tests/etcd-dcs/12-check-pod-labels.yaml | 22 +++++++++++++ .../controller/postgrescluster/pgbackrest.go | 27 ---------------- internal/patroni/api.go | 32 ------------------- internal/patroni/config.go | 6 ++++ 9 files changed, 58 insertions(+), 60 deletions(-) create mode 100755 build/postgres-operator/patroni-role-change.sh create mode 100644 e2e-tests/tests/etcd-dcs/12-check-pod-labels.yaml diff --git a/build/postgres-operator/Dockerfile b/build/postgres-operator/Dockerfile index 45c38b974..a3de6223a 100644 --- a/build/postgres-operator/Dockerfile +++ b/build/postgres-operator/Dockerfile @@ -70,6 +70,7 @@ COPY build/postgres-operator/postgres-entrypoint.sh /usr/local/bin COPY build/postgres-operator/postgres-liveness-check.sh /usr/local/bin COPY build/postgres-operator/postgres-readiness-check.sh /usr/local/bin COPY build/postgres-operator/restore-command-wrapper.sh /usr/local/bin +COPY build/postgres-operator/patroni-role-change.sh /usr/local/bin COPY hack/tools/queries /opt/crunchy/conf RUN chgrp -R 0 /opt/crunchy/conf && chmod -R g=u opt/crunchy/conf diff --git a/build/postgres-operator/init-entrypoint.sh b/build/postgres-operator/init-entrypoint.sh index 3090212fb..e7c503538 100755 --- a/build/postgres-operator/init-entrypoint.sh +++ b/build/postgres-operator/init-entrypoint.sh @@ -11,3 +11,4 @@ install -o "$(id -u)" -g "$(id -g)" -m 0755 -D "/usr/local/bin/postgres-liveness install -o "$(id -u)" -g "$(id -g)" -m 0755 -D "/usr/local/bin/postgres-readiness-check.sh" "${CRUNCHY_BINDIR}/bin/postgres-readiness-check.sh" install -o "$(id -u)" -g "$(id -g)" -m 0755 -D "/usr/local/bin/relocate-extensions.sh" "${CRUNCHY_BINDIR}/bin/relocate-extensions.sh" install -o "$(id -u)" -g "$(id -g)" -m 0755 -D "/usr/local/bin/restore-command-wrapper.sh" "${CRUNCHY_BINDIR}/bin/restore-command-wrapper.sh" +install -o "$(id -u)" -g "$(id -g)" -m 0755 -D "/usr/local/bin/patroni-role-change.sh" "${CRUNCHY_BINDIR}/bin/patroni-role-change.sh" diff --git a/build/postgres-operator/patroni-role-change.sh b/build/postgres-operator/patroni-role-change.sh new file mode 100755 index 000000000..4ecb46bb5 --- /dev/null +++ b/build/postgres-operator/patroni-role-change.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Patroni on_role_change callback. +# Called by Patroni as: