From 518d535c21ecace7aa0f88e0ccf02ea78aeee6aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Saparelli?= <felix@bes.au>
Date: Thu, 2 Jul 2026 15:41:44 +1200
Subject: [PATCH] feat(canopy): tear down ephemeral (verify) replicas after
 verification

A `verify` replica's job is to prove a snapshot restores, not to serve
queries. It was booting postgres and then idling forever. Add
`spec.ephemeral` (default false; set true by the verify intent): once a
restore reaches Active (postgres came up healthy, and for canopy
replicas the RestoreVerification was reported in the switchover block),
the reconciler records `status.verifiedSnapshotId` and deletes the
restore, reclaiming the Deployment + PVC. The replica CR and namespace
stay so canopy's worklist stays satisfied.

Re-restore is gated on the verified marker: with no active restore after
teardown, the reconciler compares the desired snapshot against
`verifiedSnapshotId` instead of a (now absent) active restore, so it only
restores again when canopy offers a newer snapshot (canopy path) or the
schedule fires (legacy path). Without the marker the
active-restore-deleted / desired-changed triggers would loop.

The analytics intents keep ephemeral=false (long-lived query replicas).
Gated entirely behind spec.ephemeral, so non-ephemeral replicas are
unchanged.

Adds an integration test (tests/ephemeral.rs, new CI matrix entry) that
does not need stub-canopy: it drives a legacy ephemeral replica through
restore -> Active -> teardown and asserts no re-restore loop.

Does NOT include the requested health_details on RestoreVerification:
bestool-canopy 0.4.3 has no such field yet. Deferred until the crate
ships it.
---
 .github/workflows/integration.yml           |   5 +
 README.md                                   |   2 +
 crds.yaml                                   |  20 ++++
 src/controllers/canopy/intent.rs            |  41 ++++++++
 src/controllers/replica.rs                  |  71 ++++++++++++-
 src/controllers/replica/scheduling.rs       |   1 +
 src/controllers/replica/schema_migration.rs |   1 +
 src/controllers/replica/tests.rs            |   2 +
 src/controllers/restore/tests.rs            |   2 +
 src/types/replica.rs                        |  18 ++++
 tests/ephemeral.rs                          | 107 ++++++++++++++++++++
 tests/helpers.rs                            |   3 +
 12 files changed, 271 insertions(+), 2 deletions(-)
 create mode 100644 tests/ephemeral.rs

diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 2b8a567..b3d24a2 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -56,6 +56,11 @@ jobs:
               test-ps-all-missing
             needs_non_pg_snapshot: false
 
+          - name: ephemeral
+            namespaces: >-
+              test-ephemeral
+            needs_non_pg_snapshot: false
+
           # canopy_integration is a scaffold in tests/canopy_integration.rs
           # but no stub-canopy HTTP server exists yet, so the happy-path
           # test times out. Re-add this matrix entry when the stub lands:
diff --git a/README.md b/README.md
index da9688b..a754599 100644
--- a/README.md
+++ b/README.md
@@ -103,6 +103,7 @@ Defines a continuously-refreshed replica of a PostgreSQL database restored from
 | `affinity` | `Affinity` | No | — | Pod scheduling affinity rules. |
 | `tolerations` | `[]Toleration` | No | `[]` | Pod tolerations. |
 | `readOnly` | `bool` | No | `true` | Set the restored database to read-only mode. |
+| `ephemeral` | `bool` | No | `false` | Tear the restore down once it reaches `Active` (postgres came up healthy) instead of keeping it running. The replica only restores again when a newer snapshot is offered (canopy path) or the schedule next fires (legacy path). Used by the `verify` intent, whose job is just to prove the snapshot restores. |
 | `postgresExtraConfig` | `string` | No | — | Extra lines appended to `postgresql.conf` (e.g. `shared_preload_libraries`). |
 | `notifications` | `[]NotificationConfig` | No | `[]` | Notification targets called on restore events. |
 | `persistentSchemas` | `[]string` | No | — | List of schema names to migrate from the previous restore to the new restore on each switchover. See [Persistent schemas](#persistent-schemas) below for the migration time budget and what happens on timeout. |
@@ -180,6 +181,7 @@ Additional fields for `target: graphQL`:
 | `nextScheduledRestore` | `Time` | When the next scheduled restore will occur. |
 | `latestAvailableSnapshot` | `string` | Snapshot ID of the latest available snapshot matching the filter. |
 | `canopyDesiredSnapshotId` | `string` | For canopy-sourced replicas: the snapshot the canopy worklist syncer wants restored. The reconciler triggers a new restore when this differs from the current one. |
+| `verifiedSnapshotId` | `string` | For `ephemeral` replicas: the last snapshot that was verified and then torn down. Gates re-restore — the reconciler only restores again when the desired snapshot differs from this. |
 | `connectionInfo` | `ConnectionInfo` | Connection details (host, port, database, username, password secret). |
 | `queuePosition` | `uint32` | Position in the global restore queue. |
 | `notifications` | `[]NotificationStatus` | Status of each configured notification target. |
diff --git a/crds.yaml b/crds.yaml
index 303bb4d..d38ed30 100644
--- a/crds.yaml
+++ b/crds.yaml
@@ -571,6 +571,17 @@ spec:
                 - group
                 - type
                 type: object
+              ephemeral:
+                default: false
+                description: |-
+                  Ephemeral replica: once a restore reaches `Active` (postgres came up
+                  healthy and, for canopy replicas, the verification was reported),
+                  tear the restore down instead of keeping it running. The replica CR
+                  stays; it only restores again when a new snapshot is offered (canopy
+                  path) or the schedule next fires (legacy path). Used by the `verify`
+                  intent, whose whole job is "prove the snapshot restores" — keeping
+                  the database idling afterward just wastes cluster resources.
+                type: boolean
               kopiaSecretRef:
                 description: |-
                   Reference to a Secret containing kopia repository credentials.
@@ -934,6 +945,15 @@ spec:
               serviceName:
                 nullable: true
                 type: string
+              verifiedSnapshotId:
+                description: |-
+                  Last snapshot id an ephemeral replica (`spec.ephemeral`) verified
+                  and then tore down. After teardown there is no `currentRestore` to
+                  compare against, so this marker is what stops the reconciler from
+                  immediately re-restoring the same snapshot; a restore is only
+                  re-triggered when the desired snapshot differs from this.
+                nullable: true
+                type: string
             type: object
         required:
         - spec
diff --git a/src/controllers/canopy/intent.rs b/src/controllers/canopy/intent.rs
index b5b6387..9fb13a6 100644
--- a/src/controllers/canopy/intent.rs
+++ b/src/controllers/canopy/intent.rs
@@ -41,6 +41,11 @@ pub struct IntentConfig {
 	pub service_annotations: Option<BTreeMap<String, String>>,
 	pub switchover_grace_period: TimeSpan,
 	pub storage_size_override: Quantity,
+	/// Tear the restore down once it's verified healthy rather than keeping
+	/// it running. Materialised into `PostgresPhysicalReplicaSpec.ephemeral`.
+	/// True for `verify` (throwaway snapshot check), false for the
+	/// analytics intents (long-lived query replicas).
+	pub ephemeral: bool,
 	/// Floor on the postgres pod's `/dev/shm` sizing. Materialised into
 	/// `PostgresPhysicalReplicaSpec.shm_size_floor` so the shared
 	/// Deployment builder picks `max(computed_from_resources, floor)`.
@@ -79,6 +84,7 @@ pub fn config_for(intent: &str) -> Option<IntentConfig> {
 			service_annotations: None,
 			switchover_grace_period: TimeSpan(Span::new().minutes(5)),
 			storage_size_override: Quantity("20Gi".to_string()),
+			ephemeral: true,
 			shm_size_floor: Quantity("512Mi".to_string()),
 		}),
 		"analytics-dev" => Some(IntentConfig {
@@ -89,6 +95,7 @@ pub fn config_for(intent: &str) -> Option<IntentConfig> {
 			service_annotations: None,
 			switchover_grace_period: TimeSpan(Span::new().minutes(5)),
 			storage_size_override: Quantity("50Gi".to_string()),
+			ephemeral: false,
 			shm_size_floor: Quantity("2Gi".to_string()),
 		}),
 		"analytics-dbt" => Some(IntentConfig {
@@ -105,6 +112,7 @@ pub fn config_for(intent: &str) -> Option<IntentConfig> {
 			])),
 			switchover_grace_period: TimeSpan(Span::new().minutes(2)),
 			storage_size_override: Quantity("50Gi".to_string()),
+			ephemeral: false,
 			shm_size_floor: Quantity("2Gi".to_string()),
 		}),
 		_ => None,
@@ -160,6 +168,7 @@ impl IntentConfig {
 			affinity: None,
 			tolerations: Vec::new(),
 			read_only: self.read_only,
+			ephemeral: self.ephemeral,
 			postgres_extra_config: None,
 			notifications,
 			persistent_schemas: self.persistent_schemas.clone(),
@@ -213,6 +222,38 @@ mod tests {
 		assert!(cfg.read_only);
 		assert!(cfg.minimum_ttl.is_none());
 		assert!(cfg.persistent_schemas.is_none());
+		assert!(cfg.ephemeral, "verify replicas are torn down after verify");
+	}
+
+	#[test]
+	fn only_verify_is_ephemeral() {
+		assert!(config_for("verify").unwrap().ephemeral);
+		assert!(
+			!config_for("analytics-dev").unwrap().ephemeral,
+			"analytics-dev is a long-lived query replica"
+		);
+		assert!(
+			!config_for("analytics-dbt").unwrap().ephemeral,
+			"analytics-dbt is a long-lived query replica"
+		);
+	}
+
+	#[test]
+	fn to_replica_spec_carries_ephemeral() {
+		let e = entry("verify", "test");
+		assert!(
+			config_for("verify")
+				.unwrap()
+				.to_replica_spec(&e, vec![])
+				.ephemeral
+		);
+		let e = entry("analytics-dev", "test");
+		assert!(
+			!config_for("analytics-dev")
+				.unwrap()
+				.to_replica_spec(&e, vec![])
+				.ephemeral
+		);
 	}
 
 	#[test]
diff --git a/src/controllers/replica.rs b/src/controllers/replica.rs
index 4d9afec..379b553 100644
--- a/src/controllers/replica.rs
+++ b/src/controllers/replica.rs
@@ -357,6 +357,60 @@ pub async fn reconcile(replica: Arc<PostgresPhysicalReplica>, ctx: Arc<Context>)
 		return Ok(Action::requeue(Duration::from_secs(10)));
 	}
 
+	// Ephemeral teardown. For a `spec.ephemeral` replica (e.g. the `verify`
+	// intent) the point of the restore is to prove the snapshot comes up —
+	// once it's Active (the switchover block above ran and, for canopy
+	// replicas, reported the verification), keeping the database running
+	// just wastes resources. Record the verified snapshot and delete the
+	// restore. The replica CR stays; the should_restore logic below only
+	// re-triggers when a newer snapshot is offered (canopy) or the schedule
+	// fires (legacy), gated by `verifiedSnapshotId`.
+	if replica.spec.ephemeral
+		&& let Some(active) = active_restore
+		&& active.metadata.deletion_timestamp.is_none()
+	{
+		let active_name = active.name_any();
+		let verified_snapshot = active.spec.snapshot.clone();
+		info!(
+			replica = name,
+			restore = active_name,
+			snapshot = verified_snapshot,
+			"ephemeral replica: snapshot verified, tearing down restore"
+		);
+
+		// Record the marker BEFORE deleting so that even if the delete or a
+		// crash interleaves, the reconciler won't treat the vanished
+		// restore as an accidental deletion and immediately re-restore.
+		let replicas: Api<PostgresPhysicalReplica> = Api::namespaced(client.clone(), &namespace);
+		let patch = serde_json::json!({
+			"status": {
+				"verifiedSnapshotId": verified_snapshot,
+				"currentRestore": null,
+				"previousRestore": null,
+			}
+		});
+		replicas
+			.patch_status(
+				&name,
+				&PatchParams::apply("postgres-restore-operator"),
+				&Patch::Merge(&patch),
+			)
+			.await?;
+
+		if let Err(e) = restores.delete(&active_name, &Default::default()).await {
+			warn!(
+				replica = name,
+				restore = %active_name,
+				error = %e,
+				"failed to delete ephemeral restore; will retry"
+			);
+		}
+		if let Some(promoted) = ctx.release_restore_slot(&name).await {
+			info!(promoted = %promoted, "promoted queued restore after ephemeral teardown");
+		}
+		return Ok(Action::requeue(Duration::from_secs(30)));
+	}
+
 	// Sweep stale Active restores after grace period.
 	//
 	// Any Active restore for this replica that isn't the current one is a
@@ -909,17 +963,30 @@ pub async fn reconcile(replica: Arc<PostgresPhysicalReplica>, ctx: Arc<Context>)
 	// On the canopy path, the worklist syncer updates
 	// `status.canopyDesiredSnapshotId` when canopy offers a newer snapshot.
 	// Trigger a restore whenever the desired snapshot differs from what the
-	// active restore already carries, in addition to the usual schedule /
+	// replica already carries, in addition to the usual schedule /
 	// never-restored / active-deleted triggers. minimum_ttl still gates:
 	// the intent may declare an explicit lower bound on restore frequency
 	// even in the face of a newer canopy snapshot.
+	//
+	// "What the replica already carries" is normally the active restore's
+	// snapshot, but an ephemeral replica has no active restore after it
+	// tears one down — there we fall back to `verifiedSnapshotId`, the
+	// marker recording the last snapshot we verified. Without that
+	// fallback the `(Some, None)` arm would re-fire forever.
+	let effective_current_snapshot =
+		active_restore.map(|r| r.spec.snapshot.clone()).or_else(|| {
+			replica
+				.status
+				.as_ref()
+				.and_then(|s| s.verified_snapshot_id.clone())
+		});
 	let canopy_desired_changed = is_canopy
 		&& match (
 			replica
 				.status
 				.as_ref()
 				.and_then(|s| s.canopy_desired_snapshot_id.as_ref()),
-			active_restore.map(|r| r.spec.snapshot.as_str()),
+			effective_current_snapshot.as_deref(),
 		) {
 			(Some(desired), Some(current)) => desired != current,
 			(Some(_), None) => true,
diff --git a/src/controllers/replica/scheduling.rs b/src/controllers/replica/scheduling.rs
index fe4957f..5576503 100644
--- a/src/controllers/replica/scheduling.rs
+++ b/src/controllers/replica/scheduling.rs
@@ -273,6 +273,7 @@ mod tests {
 				affinity: None,
 				tolerations: Vec::new(),
 				read_only: true,
+				ephemeral: false,
 				postgres_extra_config: None,
 				notifications: Vec::new(),
 				storage_size_maximum: k8s_openapi::apimachinery::pkg::api::resource::Quantity(
diff --git a/src/controllers/replica/schema_migration.rs b/src/controllers/replica/schema_migration.rs
index 3a66884..ef35f5b 100644
--- a/src/controllers/replica/schema_migration.rs
+++ b/src/controllers/replica/schema_migration.rs
@@ -302,6 +302,7 @@ mod tests {
 				affinity: None,
 				tolerations: vec![],
 				read_only: true,
+				ephemeral: false,
 				postgres_extra_config: None,
 				notifications: vec![],
 				storage_size_maximum: k8s_openapi::apimachinery::pkg::api::resource::Quantity(
diff --git a/src/controllers/replica/tests.rs b/src/controllers/replica/tests.rs
index a85572e..ecb9a89 100644
--- a/src/controllers/replica/tests.rs
+++ b/src/controllers/replica/tests.rs
@@ -38,6 +38,7 @@ fn make_replica(
 			affinity: None,
 			tolerations: vec![],
 			read_only: true,
+			ephemeral: false,
 			postgres_extra_config: None,
 			notifications: vec![],
 			persistent_schemas,
@@ -198,6 +199,7 @@ fn snapshot_list_job_rotates_kopia_logs() {
 			affinity: None,
 			tolerations: vec![],
 			read_only: true,
+			ephemeral: false,
 			postgres_extra_config: None,
 			notifications: vec![],
 			persistent_schemas: None,
diff --git a/src/controllers/restore/tests.rs b/src/controllers/restore/tests.rs
index 676e2e2..0987568 100644
--- a/src/controllers/restore/tests.rs
+++ b/src/controllers/restore/tests.rs
@@ -37,6 +37,7 @@ fn deployment_uses_affinity_not_node_selector() {
 			affinity: None,
 			tolerations: vec![],
 			read_only: true,
+			ephemeral: false,
 			postgres_extra_config: None,
 			notifications: vec![],
 
@@ -123,6 +124,7 @@ fn test_restore_and_replica() -> (PostgresPhysicalRestore, PostgresPhysicalRepli
 			affinity: None,
 			tolerations: vec![],
 			read_only: true,
+			ephemeral: false,
 			postgres_extra_config: None,
 			notifications: vec![],
 
diff --git a/src/types/replica.rs b/src/types/replica.rs
index 33524c0..89f95d3 100644
--- a/src/types/replica.rs
+++ b/src/types/replica.rs
@@ -118,6 +118,16 @@ pub struct PostgresPhysicalReplicaSpec {
 	#[serde(default = "default_read_only")]
 	pub read_only: bool,
 
+	/// Ephemeral replica: once a restore reaches `Active` (postgres came up
+	/// healthy and, for canopy replicas, the verification was reported),
+	/// tear the restore down instead of keeping it running. The replica CR
+	/// stays; it only restores again when a new snapshot is offered (canopy
+	/// path) or the schedule next fires (legacy path). Used by the `verify`
+	/// intent, whose whole job is "prove the snapshot restores" — keeping
+	/// the database idling afterward just wastes cluster resources.
+	#[serde(default)]
+	pub ephemeral: bool,
+
 	/// Extra lines appended to postgresql.conf (e.g. shared_preload_libraries)
 	#[serde(default, skip_serializing_if = "Option::is_none")]
 	pub postgres_extra_config: Option<String>,
@@ -313,6 +323,14 @@ pub struct PostgresPhysicalReplicaStatus {
 	#[serde(default, skip_serializing_if = "Option::is_none")]
 	pub canopy_desired_snapshot_id: Option<String>,
 
+	/// Last snapshot id an ephemeral replica (`spec.ephemeral`) verified
+	/// and then tore down. After teardown there is no `currentRestore` to
+	/// compare against, so this marker is what stops the reconciler from
+	/// immediately re-restoring the same snapshot; a restore is only
+	/// re-triggered when the desired snapshot differs from this.
+	#[serde(default, skip_serializing_if = "Option::is_none")]
+	pub verified_snapshot_id: Option<String>,
+
 	#[serde(default, skip_serializing_if = "Option::is_none")]
 	pub connection_info: Option<ConnectionInfo>,
 
diff --git a/tests/ephemeral.rs b/tests/ephemeral.rs
new file mode 100644
index 0000000..bee851a
--- /dev/null
+++ b/tests/ephemeral.rs
@@ -0,0 +1,107 @@
+use std::time::Duration;
+
+use kube::Api;
+use kube::api::PostParams;
+use postgres_restore_operator::types::PostgresPhysicalReplica;
+
+use helpers::*;
+
+mod helpers;
+
+/// An `ephemeral` replica should restore, come up healthy, then tear the
+/// restore down — leaving no running database — and record the verified
+/// snapshot so it doesn't immediately re-restore.
+#[tokio::test]
+#[ignore = "requires a running Kubernetes cluster with MinIO and kopia"]
+async fn ephemeral_replica_tears_down_after_verify() {
+	let client = make_client().await;
+	let ns = "test-ephemeral";
+
+	setup_namespace(&client, ns).await;
+	cleanup_namespace(&client, ns, &["ephemeral-replica"]).await;
+
+	let secrets: Api<k8s_openapi::api::core::v1::Secret> = Api::namespaced(client.clone(), ns);
+	let replicas: Api<PostgresPhysicalReplica> = Api::namespaced(client.clone(), ns);
+
+	println!("--- creating kopia secret");
+	secrets
+		.create(
+			&PostParams::default(),
+			&build_kopia_secret(ns, "ephemeral-kopia-creds", "test-bucket"),
+		)
+		.await
+		.expect("failed to create kopia secret");
+
+	println!("--- creating ephemeral PostgresPhysicalReplica");
+	// Long schedule so the only restore we observe is the initial one; after
+	// teardown it must NOT restore again within the test window.
+	let mut replica = build_replica(
+		"ephemeral-replica",
+		"ephemeral-kopia-creds",
+		ReplicaOpts {
+			schedule: "0 0 1 1 *".into(), // once a year
+			ephemeral: true,
+			..Default::default()
+		},
+	);
+	replica.metadata.namespace = Some(ns.into());
+	replicas
+		.create(&PostParams::default(), &replica)
+		.await
+		.expect("failed to create replica");
+
+	// Don't wait for the `Ready` phase: on an ephemeral replica it's
+	// transient — the switchover sets Ready and the very next reconcile
+	// (~tens of ms later) tears the restore down and the replica drops back
+	// to Pending. Polling every couple of seconds reliably misses that
+	// window. Wait directly for the durable end state instead: the restore
+	// gone, `verifiedSnapshotId` recorded, `currentRestore` cleared.
+	//
+	// The window before that end state covers the whole restore cycle
+	// (snapshot-list → restore Job → deployment ready → switchover →
+	// teardown), so allow the full phase timeout.
+	println!("--- waiting for the restore to be verified and torn down");
+	let deadline = tokio::time::Instant::now() + PHASE_TIMEOUT;
+	loop {
+		let count = count_restores_for_replica(&client, ns, "ephemeral-replica").await;
+		let replica = replicas
+			.get("ephemeral-replica")
+			.await
+			.expect("get replica");
+		let verified = replica
+			.status
+			.as_ref()
+			.and_then(|s| s.verified_snapshot_id.clone());
+		if count == 0 && verified.is_some() {
+			println!("--- torn down; verifiedSnapshotId={verified:?}");
+			// currentRestore must be cleared so the reconciler doesn't treat
+			// the vanished restore as an accidental deletion.
+			assert!(
+				replica
+					.status
+					.as_ref()
+					.and_then(|s| s.current_restore.as_ref())
+					.is_none(),
+				"currentRestore must be cleared after ephemeral teardown"
+			);
+			break;
+		}
+		if tokio::time::Instant::now() >= deadline {
+			panic!(
+				"restore was not torn down within timeout (count={count}, verified={verified:?})"
+			);
+		}
+		tokio::time::sleep(POLL_INTERVAL).await;
+	}
+
+	println!("--- confirming it stays torn down (no re-restore loop)");
+	tokio::time::sleep(Duration::from_secs(30)).await;
+	let count = count_restores_for_replica(&client, ns, "ephemeral-replica").await;
+	assert_eq!(
+		count, 0,
+		"ephemeral replica must not re-restore the same snapshot after teardown"
+	);
+
+	println!("--- all assertions passed, cleaning up");
+	cleanup_namespace(&client, ns, &["ephemeral-replica"]).await;
+}
diff --git a/tests/helpers.rs b/tests/helpers.rs
index d6b7f72..66b5c43 100644
--- a/tests/helpers.rs
+++ b/tests/helpers.rs
@@ -121,6 +121,7 @@ pub struct ReplicaOpts {
 	pub minimum_ttl: Option<TimeSpan>,
 	pub schedule_jitter: Option<TimeSpan>,
 	pub read_only: bool,
+	pub ephemeral: bool,
 }
 
 impl Default for ReplicaOpts {
@@ -130,6 +131,7 @@ impl Default for ReplicaOpts {
 			minimum_ttl: None,
 			schedule_jitter: None,
 			read_only: true,
+			ephemeral: false,
 		}
 	}
 }
@@ -158,6 +160,7 @@ pub fn build_replica(name: &str, secret_ref: &str, opts: ReplicaOpts) -> Postgre
 			affinity: None,
 			tolerations: vec![],
 			read_only: opts.read_only,
+			ephemeral: opts.ephemeral,
 			postgres_extra_config: None,
 			notifications: vec![],
 			persistent_schemas: None,