From 518d535c21ecace7aa0f88e0ccf02ea78aeee6aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Saparelli?= Date: Thu, 2 Jul 2026 15:41:44 +1200 Subject: [PATCH] feat(canopy): tear down ephemeral (verify) replicas after verification A `verify` replica's job is to prove a snapshot restores, not to serve queries. It was booting postgres and then idling forever. Add `spec.ephemeral` (default false; set true by the verify intent): once a restore reaches Active (postgres came up healthy, and for canopy replicas the RestoreVerification was reported in the switchover block), the reconciler records `status.verifiedSnapshotId` and deletes the restore, reclaiming the Deployment + PVC. The replica CR and namespace stay so canopy's worklist stays satisfied. Re-restore is gated on the verified marker: with no active restore after teardown, the reconciler compares the desired snapshot against `verifiedSnapshotId` instead of a (now absent) active restore, so it only restores again when canopy offers a newer snapshot (canopy path) or the schedule fires (legacy path). Without the marker the active-restore-deleted / desired-changed triggers would loop. The analytics intents keep ephemeral=false (long-lived query replicas). Gated entirely behind spec.ephemeral, so non-ephemeral replicas are unchanged. Adds an integration test (tests/ephemeral.rs, new CI matrix entry) that does not need stub-canopy: it drives a legacy ephemeral replica through restore -> Active -> teardown and asserts no re-restore loop. Does NOT include the requested health_details on RestoreVerification: bestool-canopy 0.4.3 has no such field yet. Deferred until the crate ships it. --- .github/workflows/integration.yml | 5 + README.md | 2 + crds.yaml | 20 ++++ src/controllers/canopy/intent.rs | 41 ++++++++ src/controllers/replica.rs | 71 ++++++++++++- src/controllers/replica/scheduling.rs | 1 + src/controllers/replica/schema_migration.rs | 1 + src/controllers/replica/tests.rs | 2 + src/controllers/restore/tests.rs | 2 + src/types/replica.rs | 18 ++++ tests/ephemeral.rs | 107 ++++++++++++++++++++ tests/helpers.rs | 3 + 12 files changed, 271 insertions(+), 2 deletions(-) create mode 100644 tests/ephemeral.rs diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 2b8a567..b3d24a2 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -56,6 +56,11 @@ jobs: test-ps-all-missing needs_non_pg_snapshot: false + - name: ephemeral + namespaces: >- + test-ephemeral + needs_non_pg_snapshot: false + # canopy_integration is a scaffold in tests/canopy_integration.rs # but no stub-canopy HTTP server exists yet, so the happy-path # test times out. Re-add this matrix entry when the stub lands: diff --git a/README.md b/README.md index da9688b..a754599 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,7 @@ Defines a continuously-refreshed replica of a PostgreSQL database restored from | `affinity` | `Affinity` | No | — | Pod scheduling affinity rules. | | `tolerations` | `[]Toleration` | No | `[]` | Pod tolerations. | | `readOnly` | `bool` | No | `true` | Set the restored database to read-only mode. | +| `ephemeral` | `bool` | No | `false` | Tear the restore down once it reaches `Active` (postgres came up healthy) instead of keeping it running. The replica only restores again when a newer snapshot is offered (canopy path) or the schedule next fires (legacy path). Used by the `verify` intent, whose job is just to prove the snapshot restores. | | `postgresExtraConfig` | `string` | No | — | Extra lines appended to `postgresql.conf` (e.g. `shared_preload_libraries`). | | `notifications` | `[]NotificationConfig` | No | `[]` | Notification targets called on restore events. | | `persistentSchemas` | `[]string` | No | — | List of schema names to migrate from the previous restore to the new restore on each switchover. See [Persistent schemas](#persistent-schemas) below for the migration time budget and what happens on timeout. | @@ -180,6 +181,7 @@ Additional fields for `target: graphQL`: | `nextScheduledRestore` | `Time` | When the next scheduled restore will occur. | | `latestAvailableSnapshot` | `string` | Snapshot ID of the latest available snapshot matching the filter. | | `canopyDesiredSnapshotId` | `string` | For canopy-sourced replicas: the snapshot the canopy worklist syncer wants restored. The reconciler triggers a new restore when this differs from the current one. | +| `verifiedSnapshotId` | `string` | For `ephemeral` replicas: the last snapshot that was verified and then torn down. Gates re-restore — the reconciler only restores again when the desired snapshot differs from this. | | `connectionInfo` | `ConnectionInfo` | Connection details (host, port, database, username, password secret). | | `queuePosition` | `uint32` | Position in the global restore queue. | | `notifications` | `[]NotificationStatus` | Status of each configured notification target. | diff --git a/crds.yaml b/crds.yaml index 303bb4d..d38ed30 100644 --- a/crds.yaml +++ b/crds.yaml @@ -571,6 +571,17 @@ spec: - group - type type: object + ephemeral: + default: false + description: |- + Ephemeral replica: once a restore reaches `Active` (postgres came up + healthy and, for canopy replicas, the verification was reported), + tear the restore down instead of keeping it running. The replica CR + stays; it only restores again when a new snapshot is offered (canopy + path) or the schedule next fires (legacy path). Used by the `verify` + intent, whose whole job is "prove the snapshot restores" — keeping + the database idling afterward just wastes cluster resources. + type: boolean kopiaSecretRef: description: |- Reference to a Secret containing kopia repository credentials. @@ -934,6 +945,15 @@ spec: serviceName: nullable: true type: string + verifiedSnapshotId: + description: |- + Last snapshot id an ephemeral replica (`spec.ephemeral`) verified + and then tore down. After teardown there is no `currentRestore` to + compare against, so this marker is what stops the reconciler from + immediately re-restoring the same snapshot; a restore is only + re-triggered when the desired snapshot differs from this. + nullable: true + type: string type: object required: - spec diff --git a/src/controllers/canopy/intent.rs b/src/controllers/canopy/intent.rs index b5b6387..9fb13a6 100644 --- a/src/controllers/canopy/intent.rs +++ b/src/controllers/canopy/intent.rs @@ -41,6 +41,11 @@ pub struct IntentConfig { pub service_annotations: Option>, pub switchover_grace_period: TimeSpan, pub storage_size_override: Quantity, + /// Tear the restore down once it's verified healthy rather than keeping + /// it running. Materialised into `PostgresPhysicalReplicaSpec.ephemeral`. + /// True for `verify` (throwaway snapshot check), false for the + /// analytics intents (long-lived query replicas). + pub ephemeral: bool, /// Floor on the postgres pod's `/dev/shm` sizing. Materialised into /// `PostgresPhysicalReplicaSpec.shm_size_floor` so the shared /// Deployment builder picks `max(computed_from_resources, floor)`. @@ -79,6 +84,7 @@ pub fn config_for(intent: &str) -> Option { service_annotations: None, switchover_grace_period: TimeSpan(Span::new().minutes(5)), storage_size_override: Quantity("20Gi".to_string()), + ephemeral: true, shm_size_floor: Quantity("512Mi".to_string()), }), "analytics-dev" => Some(IntentConfig { @@ -89,6 +95,7 @@ pub fn config_for(intent: &str) -> Option { service_annotations: None, switchover_grace_period: TimeSpan(Span::new().minutes(5)), storage_size_override: Quantity("50Gi".to_string()), + ephemeral: false, shm_size_floor: Quantity("2Gi".to_string()), }), "analytics-dbt" => Some(IntentConfig { @@ -105,6 +112,7 @@ pub fn config_for(intent: &str) -> Option { ])), switchover_grace_period: TimeSpan(Span::new().minutes(2)), storage_size_override: Quantity("50Gi".to_string()), + ephemeral: false, shm_size_floor: Quantity("2Gi".to_string()), }), _ => None, @@ -160,6 +168,7 @@ impl IntentConfig { affinity: None, tolerations: Vec::new(), read_only: self.read_only, + ephemeral: self.ephemeral, postgres_extra_config: None, notifications, persistent_schemas: self.persistent_schemas.clone(), @@ -213,6 +222,38 @@ mod tests { assert!(cfg.read_only); assert!(cfg.minimum_ttl.is_none()); assert!(cfg.persistent_schemas.is_none()); + assert!(cfg.ephemeral, "verify replicas are torn down after verify"); + } + + #[test] + fn only_verify_is_ephemeral() { + assert!(config_for("verify").unwrap().ephemeral); + assert!( + !config_for("analytics-dev").unwrap().ephemeral, + "analytics-dev is a long-lived query replica" + ); + assert!( + !config_for("analytics-dbt").unwrap().ephemeral, + "analytics-dbt is a long-lived query replica" + ); + } + + #[test] + fn to_replica_spec_carries_ephemeral() { + let e = entry("verify", "test"); + assert!( + config_for("verify") + .unwrap() + .to_replica_spec(&e, vec![]) + .ephemeral + ); + let e = entry("analytics-dev", "test"); + assert!( + !config_for("analytics-dev") + .unwrap() + .to_replica_spec(&e, vec![]) + .ephemeral + ); } #[test] diff --git a/src/controllers/replica.rs b/src/controllers/replica.rs index 4d9afec..379b553 100644 --- a/src/controllers/replica.rs +++ b/src/controllers/replica.rs @@ -357,6 +357,60 @@ pub async fn reconcile(replica: Arc, ctx: Arc) return Ok(Action::requeue(Duration::from_secs(10))); } + // Ephemeral teardown. For a `spec.ephemeral` replica (e.g. the `verify` + // intent) the point of the restore is to prove the snapshot comes up — + // once it's Active (the switchover block above ran and, for canopy + // replicas, reported the verification), keeping the database running + // just wastes resources. Record the verified snapshot and delete the + // restore. The replica CR stays; the should_restore logic below only + // re-triggers when a newer snapshot is offered (canopy) or the schedule + // fires (legacy), gated by `verifiedSnapshotId`. + if replica.spec.ephemeral + && let Some(active) = active_restore + && active.metadata.deletion_timestamp.is_none() + { + let active_name = active.name_any(); + let verified_snapshot = active.spec.snapshot.clone(); + info!( + replica = name, + restore = active_name, + snapshot = verified_snapshot, + "ephemeral replica: snapshot verified, tearing down restore" + ); + + // Record the marker BEFORE deleting so that even if the delete or a + // crash interleaves, the reconciler won't treat the vanished + // restore as an accidental deletion and immediately re-restore. + let replicas: Api = Api::namespaced(client.clone(), &namespace); + let patch = serde_json::json!({ + "status": { + "verifiedSnapshotId": verified_snapshot, + "currentRestore": null, + "previousRestore": null, + } + }); + replicas + .patch_status( + &name, + &PatchParams::apply("postgres-restore-operator"), + &Patch::Merge(&patch), + ) + .await?; + + if let Err(e) = restores.delete(&active_name, &Default::default()).await { + warn!( + replica = name, + restore = %active_name, + error = %e, + "failed to delete ephemeral restore; will retry" + ); + } + if let Some(promoted) = ctx.release_restore_slot(&name).await { + info!(promoted = %promoted, "promoted queued restore after ephemeral teardown"); + } + return Ok(Action::requeue(Duration::from_secs(30))); + } + // Sweep stale Active restores after grace period. // // Any Active restore for this replica that isn't the current one is a @@ -909,17 +963,30 @@ pub async fn reconcile(replica: Arc, ctx: Arc) // On the canopy path, the worklist syncer updates // `status.canopyDesiredSnapshotId` when canopy offers a newer snapshot. // Trigger a restore whenever the desired snapshot differs from what the - // active restore already carries, in addition to the usual schedule / + // replica already carries, in addition to the usual schedule / // never-restored / active-deleted triggers. minimum_ttl still gates: // the intent may declare an explicit lower bound on restore frequency // even in the face of a newer canopy snapshot. + // + // "What the replica already carries" is normally the active restore's + // snapshot, but an ephemeral replica has no active restore after it + // tears one down — there we fall back to `verifiedSnapshotId`, the + // marker recording the last snapshot we verified. Without that + // fallback the `(Some, None)` arm would re-fire forever. + let effective_current_snapshot = + active_restore.map(|r| r.spec.snapshot.clone()).or_else(|| { + replica + .status + .as_ref() + .and_then(|s| s.verified_snapshot_id.clone()) + }); let canopy_desired_changed = is_canopy && match ( replica .status .as_ref() .and_then(|s| s.canopy_desired_snapshot_id.as_ref()), - active_restore.map(|r| r.spec.snapshot.as_str()), + effective_current_snapshot.as_deref(), ) { (Some(desired), Some(current)) => desired != current, (Some(_), None) => true, diff --git a/src/controllers/replica/scheduling.rs b/src/controllers/replica/scheduling.rs index fe4957f..5576503 100644 --- a/src/controllers/replica/scheduling.rs +++ b/src/controllers/replica/scheduling.rs @@ -273,6 +273,7 @@ mod tests { affinity: None, tolerations: Vec::new(), read_only: true, + ephemeral: false, postgres_extra_config: None, notifications: Vec::new(), storage_size_maximum: k8s_openapi::apimachinery::pkg::api::resource::Quantity( diff --git a/src/controllers/replica/schema_migration.rs b/src/controllers/replica/schema_migration.rs index 3a66884..ef35f5b 100644 --- a/src/controllers/replica/schema_migration.rs +++ b/src/controllers/replica/schema_migration.rs @@ -302,6 +302,7 @@ mod tests { affinity: None, tolerations: vec![], read_only: true, + ephemeral: false, postgres_extra_config: None, notifications: vec![], storage_size_maximum: k8s_openapi::apimachinery::pkg::api::resource::Quantity( diff --git a/src/controllers/replica/tests.rs b/src/controllers/replica/tests.rs index a85572e..ecb9a89 100644 --- a/src/controllers/replica/tests.rs +++ b/src/controllers/replica/tests.rs @@ -38,6 +38,7 @@ fn make_replica( affinity: None, tolerations: vec![], read_only: true, + ephemeral: false, postgres_extra_config: None, notifications: vec![], persistent_schemas, @@ -198,6 +199,7 @@ fn snapshot_list_job_rotates_kopia_logs() { affinity: None, tolerations: vec![], read_only: true, + ephemeral: false, postgres_extra_config: None, notifications: vec![], persistent_schemas: None, diff --git a/src/controllers/restore/tests.rs b/src/controllers/restore/tests.rs index 676e2e2..0987568 100644 --- a/src/controllers/restore/tests.rs +++ b/src/controllers/restore/tests.rs @@ -37,6 +37,7 @@ fn deployment_uses_affinity_not_node_selector() { affinity: None, tolerations: vec![], read_only: true, + ephemeral: false, postgres_extra_config: None, notifications: vec![], @@ -123,6 +124,7 @@ fn test_restore_and_replica() -> (PostgresPhysicalRestore, PostgresPhysicalRepli affinity: None, tolerations: vec![], read_only: true, + ephemeral: false, postgres_extra_config: None, notifications: vec![], diff --git a/src/types/replica.rs b/src/types/replica.rs index 33524c0..89f95d3 100644 --- a/src/types/replica.rs +++ b/src/types/replica.rs @@ -118,6 +118,16 @@ pub struct PostgresPhysicalReplicaSpec { #[serde(default = "default_read_only")] pub read_only: bool, + /// Ephemeral replica: once a restore reaches `Active` (postgres came up + /// healthy and, for canopy replicas, the verification was reported), + /// tear the restore down instead of keeping it running. The replica CR + /// stays; it only restores again when a new snapshot is offered (canopy + /// path) or the schedule next fires (legacy path). Used by the `verify` + /// intent, whose whole job is "prove the snapshot restores" — keeping + /// the database idling afterward just wastes cluster resources. + #[serde(default)] + pub ephemeral: bool, + /// Extra lines appended to postgresql.conf (e.g. shared_preload_libraries) #[serde(default, skip_serializing_if = "Option::is_none")] pub postgres_extra_config: Option, @@ -313,6 +323,14 @@ pub struct PostgresPhysicalReplicaStatus { #[serde(default, skip_serializing_if = "Option::is_none")] pub canopy_desired_snapshot_id: Option, + /// Last snapshot id an ephemeral replica (`spec.ephemeral`) verified + /// and then tore down. After teardown there is no `currentRestore` to + /// compare against, so this marker is what stops the reconciler from + /// immediately re-restoring the same snapshot; a restore is only + /// re-triggered when the desired snapshot differs from this. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub verified_snapshot_id: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] pub connection_info: Option, diff --git a/tests/ephemeral.rs b/tests/ephemeral.rs new file mode 100644 index 0000000..bee851a --- /dev/null +++ b/tests/ephemeral.rs @@ -0,0 +1,107 @@ +use std::time::Duration; + +use kube::Api; +use kube::api::PostParams; +use postgres_restore_operator::types::PostgresPhysicalReplica; + +use helpers::*; + +mod helpers; + +/// An `ephemeral` replica should restore, come up healthy, then tear the +/// restore down — leaving no running database — and record the verified +/// snapshot so it doesn't immediately re-restore. +#[tokio::test] +#[ignore = "requires a running Kubernetes cluster with MinIO and kopia"] +async fn ephemeral_replica_tears_down_after_verify() { + let client = make_client().await; + let ns = "test-ephemeral"; + + setup_namespace(&client, ns).await; + cleanup_namespace(&client, ns, &["ephemeral-replica"]).await; + + let secrets: Api = Api::namespaced(client.clone(), ns); + let replicas: Api = Api::namespaced(client.clone(), ns); + + println!("--- creating kopia secret"); + secrets + .create( + &PostParams::default(), + &build_kopia_secret(ns, "ephemeral-kopia-creds", "test-bucket"), + ) + .await + .expect("failed to create kopia secret"); + + println!("--- creating ephemeral PostgresPhysicalReplica"); + // Long schedule so the only restore we observe is the initial one; after + // teardown it must NOT restore again within the test window. + let mut replica = build_replica( + "ephemeral-replica", + "ephemeral-kopia-creds", + ReplicaOpts { + schedule: "0 0 1 1 *".into(), // once a year + ephemeral: true, + ..Default::default() + }, + ); + replica.metadata.namespace = Some(ns.into()); + replicas + .create(&PostParams::default(), &replica) + .await + .expect("failed to create replica"); + + // Don't wait for the `Ready` phase: on an ephemeral replica it's + // transient — the switchover sets Ready and the very next reconcile + // (~tens of ms later) tears the restore down and the replica drops back + // to Pending. Polling every couple of seconds reliably misses that + // window. Wait directly for the durable end state instead: the restore + // gone, `verifiedSnapshotId` recorded, `currentRestore` cleared. + // + // The window before that end state covers the whole restore cycle + // (snapshot-list → restore Job → deployment ready → switchover → + // teardown), so allow the full phase timeout. + println!("--- waiting for the restore to be verified and torn down"); + let deadline = tokio::time::Instant::now() + PHASE_TIMEOUT; + loop { + let count = count_restores_for_replica(&client, ns, "ephemeral-replica").await; + let replica = replicas + .get("ephemeral-replica") + .await + .expect("get replica"); + let verified = replica + .status + .as_ref() + .and_then(|s| s.verified_snapshot_id.clone()); + if count == 0 && verified.is_some() { + println!("--- torn down; verifiedSnapshotId={verified:?}"); + // currentRestore must be cleared so the reconciler doesn't treat + // the vanished restore as an accidental deletion. + assert!( + replica + .status + .as_ref() + .and_then(|s| s.current_restore.as_ref()) + .is_none(), + "currentRestore must be cleared after ephemeral teardown" + ); + break; + } + if tokio::time::Instant::now() >= deadline { + panic!( + "restore was not torn down within timeout (count={count}, verified={verified:?})" + ); + } + tokio::time::sleep(POLL_INTERVAL).await; + } + + println!("--- confirming it stays torn down (no re-restore loop)"); + tokio::time::sleep(Duration::from_secs(30)).await; + let count = count_restores_for_replica(&client, ns, "ephemeral-replica").await; + assert_eq!( + count, 0, + "ephemeral replica must not re-restore the same snapshot after teardown" + ); + + println!("--- all assertions passed, cleaning up"); + cleanup_namespace(&client, ns, &["ephemeral-replica"]).await; +} diff --git a/tests/helpers.rs b/tests/helpers.rs index d6b7f72..66b5c43 100644 --- a/tests/helpers.rs +++ b/tests/helpers.rs @@ -121,6 +121,7 @@ pub struct ReplicaOpts { pub minimum_ttl: Option, pub schedule_jitter: Option, pub read_only: bool, + pub ephemeral: bool, } impl Default for ReplicaOpts { @@ -130,6 +131,7 @@ impl Default for ReplicaOpts { minimum_ttl: None, schedule_jitter: None, read_only: true, + ephemeral: false, } } } @@ -158,6 +160,7 @@ pub fn build_replica(name: &str, secret_ref: &str, opts: ReplicaOpts) -> Postgre affinity: None, tolerations: vec![], read_only: opts.read_only, + ephemeral: opts.ephemeral, postgres_extra_config: None, notifications: vec![], persistent_schemas: None,