From 09eab16e6efd5d093ed92b4142d65c45d98595b7 Mon Sep 17 00:00:00 2001
From: Chris Randles <randles.chris@gmail.com>
Date: Wed, 27 May 2026 09:43:08 -0400
Subject: [PATCH 1/8] fix+test: [healthcheck,alb] auto-probe unconfigured pool
 members

Signed-off-by: Chris Randles <randles.chris@gmail.com>
---
 integration/alb_missing_healthcheck_test.go   | 117 ++++++++++++++++++
 integration/alb_testdata_test.go              |   2 +-
 .../alb_missing_hc/missing_hc.yaml.tmpl       |  36 ++++++
 pkg/backends/alb/client.go                    |  10 +-
 pkg/backends/alb/errors/errors.go             |  11 ++
 pkg/backends/backends.go                      |  49 ++++++--
 pkg/backends/healthcheck/healthcheck.go       |   4 +-
 pkg/backends/healthcheck/options/defaults.go  |   6 +
 pkg/observability/metrics/metrics.go          |  15 +++
 9 files changed, 239 insertions(+), 11 deletions(-)
 create mode 100644 integration/alb_missing_healthcheck_test.go
 create mode 100644 integration/testdata/alb_missing_hc/missing_hc.yaml.tmpl

diff --git a/integration/alb_missing_healthcheck_test.go b/integration/alb_missing_healthcheck_test.go
new file mode 100644
index 000000000..ef1ff2645
--- /dev/null
+++ b/integration/alb_missing_healthcheck_test.go
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2018 The Trickster Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package integration
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+	"os"
+	"path/filepath"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"github.com/trickstercache/trickster/v2/integration/internal/portutil"
+)
+
+// TestALBPoolMemberWithoutHealthcheckNotRouted verifies that a non-virtual ALB
+// pool member configured without a `healthcheck:` block does not receive
+// fanout traffic when its upstream is unhealthy. The provider's default
+// healthcheck must be auto-applied so the pool's dispatch-time filter can
+// observe Failing status and exclude the member.
+func TestALBPoolMemberWithoutHealthcheckNotRouted(t *testing.T) {
+	healthyResp := albTestdata(t, "alb_unavail/healthy.json")
+
+	healthy := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(http.StatusOK)
+		switch r.URL.Path {
+		case "/api/v1/status/buildinfo":
+			fmt.Fprint(w, `{"status":"success","data":{"version":"2.0"}}`)
+		case "/api/v1/query":
+			fmt.Fprint(w, `{"status":"success","data":{"resultType":"vector","result":[]}}`)
+		default:
+			fmt.Fprint(w, healthyResp)
+		}
+	}))
+	t.Cleanup(healthy.Close)
+
+	var brokenDataHits atomic.Int64
+	broken := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path == "/api/v1/query_range" {
+			brokenDataHits.Add(1)
+		}
+		w.WriteHeader(http.StatusInternalServerError)
+	}))
+	t.Cleanup(broken.Close)
+
+	ports, release := portutil.Reserve(t, 3)
+	frontPort, metricsPort, mgmtPort := ports[0], ports[1], ports[2]
+
+	yaml := fmt.Sprintf(albTestdata(t, "alb_missing_hc/missing_hc.yaml.tmpl"),
+		frontPort, metricsPort, mgmtPort, healthy.URL, broken.URL)
+
+	cfgPath := filepath.Join(t.TempDir(), "trickster.yaml")
+	require.NoError(t, os.WriteFile(cfgPath, []byte(yaml), 0644))
+
+	ctx, cancel := context.WithCancel(context.Background())
+	t.Cleanup(cancel)
+	release()
+	go startTrickster(t, ctx, expectedStartError{}, "-config", cfgPath)
+
+	metricsAddr := fmt.Sprintf("127.0.0.1:%d", metricsPort)
+	waitForTrickster(t, metricsAddr)
+
+	// Give the auto-applied probe time to fire, trip the broken backend to
+	// Failing, and let the ALB pool refresh its healthy snapshot.
+	time.Sleep(2 * time.Second)
+	dataBaseline := brokenDataHits.Load()
+
+	frontAddr := fmt.Sprintf("127.0.0.1:%d", frontPort)
+	const reqs = 50
+	now := time.Now().Unix()
+	for i := range reqs {
+		// query_range is mergeable in TSM and therefore triggers fanout to
+		// every healthy pool member. Use unique query strings so the cache
+		// doesn't short-circuit subsequent requests.
+		q := url.Values{
+			"query": {fmt.Sprintf("up + 0*%d", i)},
+			"start": {fmt.Sprintf("%d", now-300)},
+			"end":   {fmt.Sprintf("%d", now)},
+			"step":  {"15"},
+		}
+		u := fmt.Sprintf("http://%s/alb-tsm-test/api/v1/query_range?%s",
+			frontAddr, q.Encode())
+		resp, err := http.Get(u)
+		require.NoError(t, err)
+		resp.Body.Close()
+	}
+
+	// Allow a small drain window in case any in-flight fanout slot was still
+	// dispatching when the assertion fired.
+	require.EventuallyWithT(t, func(collect *assert.CollectT) {
+		dataDelta := brokenDataHits.Load() - dataBaseline
+		assert.Equalf(collect, int64(0), dataDelta,
+			"broken backend received %d data requests via the ALB after probe transitioned it to Failing",
+			dataDelta)
+	}, 3*time.Second, 200*time.Millisecond)
+}
diff --git a/integration/alb_testdata_test.go b/integration/alb_testdata_test.go
index 308658135..d28f3b31d 100644
--- a/integration/alb_testdata_test.go
+++ b/integration/alb_testdata_test.go
@@ -21,7 +21,7 @@ import (
 	"testing"
 )
 
-//go:embed testdata/alb_cache testdata/alb_tsm_correctness testdata/alb_response_headers testdata/alb_nested testdata/alb_per_path testdata/alb_unavail testdata/alb_request_headers testdata/alb_compose
+//go:embed testdata/alb_cache testdata/alb_tsm_correctness testdata/alb_response_headers testdata/alb_nested testdata/alb_per_path testdata/alb_unavail testdata/alb_missing_hc testdata/alb_request_headers testdata/alb_compose
 var albTestdataFS embed.FS
 
 func albTestdata(t testing.TB, name string) string {
diff --git a/integration/testdata/alb_missing_hc/missing_hc.yaml.tmpl b/integration/testdata/alb_missing_hc/missing_hc.yaml.tmpl
new file mode 100644
index 000000000..dc2de70c5
--- /dev/null
+++ b/integration/testdata/alb_missing_hc/missing_hc.yaml.tmpl
@@ -0,0 +1,36 @@
+
+frontend:
+  listen_port: %d
+metrics:
+  listen_port: %d
+mgmt:
+  listen_port: %d
+logging:
+  log_level: warn
+caches:
+  mem1:
+    provider: memory
+backends:
+  prom-healthy:
+    provider: prometheus
+    origin_url: %s
+    cache_name: mem1
+    healthcheck:
+      path: /api/v1/status/buildinfo
+      query: ""
+      interval: 100ms
+      timeout: 500ms
+      failure_threshold: 1
+      recovery_threshold: 1
+  prom-broken:
+    provider: prometheus
+    origin_url: %s
+    cache_name: mem1
+  alb-tsm-test:
+    provider: alb
+    alb:
+      mechanism: tsm
+      output_format: prometheus
+      pool:
+        - prom-healthy
+        - prom-broken
diff --git a/pkg/backends/alb/client.go b/pkg/backends/alb/client.go
index ae09f6038..e3ade0789 100644
--- a/pkg/backends/alb/client.go
+++ b/pkg/backends/alb/client.go
@@ -171,7 +171,15 @@ func (c *Client) ValidateAndStartPool(clients backends.Backends, hcs healthcheck
 		}
 		hc, ok := hcs[n]
 		if !ok {
-			// virtual backends (rule, alb) have no health checks; treat as passing
+			// Only virtual backends (rule, alb) may legitimately be absent from
+			// the healthcheck registry. A non-virtual miss means the operator
+			// did not configure healthcheck on this backend and the upstream
+			// auto-apply path also declined to register a probe; fabricating a
+			// dummy Status here would silently route fanout traffic regardless
+			// of upstream health.
+			if !backends.IsVirtual(tc.Configuration().Provider) {
+				return alberr.NewErrMissingHealthCheck(c.Name(), n)
+			}
 			hc = healthcheck.NewStatus(n, "virtual", "", healthcheck.StatusPassing, time.Time{}, nil)
 		}
 		targets = append(targets, pool.NewTarget(tc.Router(), hc, tc))
diff --git a/pkg/backends/alb/errors/errors.go b/pkg/backends/alb/errors/errors.go
index 8335156ac..5755c69c0 100644
--- a/pkg/backends/alb/errors/errors.go
+++ b/pkg/backends/alb/errors/errors.go
@@ -63,3 +63,14 @@ func NewErrInvalidUserRouterCreds(albName string) error {
 			albName),
 	}
 }
+
+// NewErrMissingHealthCheck reports a non-virtual ALB pool member that has no
+// registered healthcheck Status. Without this guard the pool would route
+// traffic to the member regardless of upstream health.
+func NewErrMissingHealthCheck(albName, poolMemberName string) error {
+	return &InvalidALBOptionsError{
+		error: fmt.Errorf("alb [%s] non-virtual pool member [%s] has no health check; "+
+			"configure healthcheck options on the backend",
+			albName, poolMemberName),
+	}
+}
diff --git a/pkg/backends/backends.go b/pkg/backends/backends.go
index 28f97de06..074a50fc1 100644
--- a/pkg/backends/backends.go
+++ b/pkg/backends/backends.go
@@ -21,8 +21,12 @@ import (
 	"net/http"
 
 	"github.com/trickstercache/trickster/v2/pkg/backends/healthcheck"
+	ho "github.com/trickstercache/trickster/v2/pkg/backends/healthcheck/options"
 	bo "github.com/trickstercache/trickster/v2/pkg/backends/options"
 	"github.com/trickstercache/trickster/v2/pkg/backends/providers"
+	"github.com/trickstercache/trickster/v2/pkg/observability/logging"
+	"github.com/trickstercache/trickster/v2/pkg/observability/logging/logger"
+	"github.com/trickstercache/trickster/v2/pkg/observability/metrics"
 )
 
 // Backends represents a map of Backends keyed by Name
@@ -40,20 +44,44 @@ func (b Backends) StartHealthChecks(knownStatuses healthcheck.StatusLookup) (hea
 		}
 		if IsVirtual(bo.Provider) {
 			// Virtual backends have no upstream to probe; register a synthetic
-			// passing status so nested ALBs surface in the health page and in
-			// outer pools' availablePoolMembers (issue #996).
+			// passing status so they surface in the health page and in outer
+			// ALB pool reporting.
 			hc.RegisterVirtual(k, bo.Provider)
 			continue
 		}
 		hco := bo.HealthCheck
-		if hco == nil {
+		def := c.DefaultHealthCheckConfig()
+		if hco == nil && def == nil {
+			// no probe possible for this provider; the ALB pool will reject
+			// this backend as a non-virtual member with no Status.
 			continue
 		}
-		bo.HealthCheck = c.DefaultHealthCheckConfig()
-		if bo.HealthCheck == nil {
-			bo.HealthCheck = hco
+		if def != nil {
+			bo.HealthCheck = def
+			if hco != nil {
+				bo.HealthCheck.Overlay(hco)
+			}
 		} else {
-			bo.HealthCheck.Overlay(hco)
+			bo.HealthCheck = hco
+		}
+		var autoApplied bool
+		if bo.HealthCheck.Interval <= 0 {
+			// The operator did not configure a probe interval. A registered
+			// target with Interval == 0 never ticks, so its Status sticks at
+			// StatusUnchecked and the ALB pool's dispatch-time filter keeps
+			// routing fanout traffic regardless of upstream health. Apply a
+			// fast auto-default and surface that we did so.
+			bo.HealthCheck.Interval = ho.DefaultAutoProbeInterval
+			if bo.HealthCheck.FailureThreshold <= 0 {
+				bo.HealthCheck.FailureThreshold = 1
+			}
+			if bo.HealthCheck.RecoveryThreshold <= 0 {
+				bo.HealthCheck.RecoveryThreshold = 1
+			}
+			autoApplied = true
+			metrics.BackendsDefaultHealthCheckApplied.WithLabelValues(k, bo.Provider).Inc()
+			logger.Warn("auto-applied default healthcheck for backend",
+				logging.Pairs{"backend_name": k, "provider": bo.Provider})
 		}
 		st, err := hc.Register(k, bo.Provider, bo.HealthCheck, c.HealthCheckHTTPClient())
 		if err != nil {
@@ -63,6 +91,13 @@ func (b Backends) StartHealthChecks(knownStatuses healthcheck.StatusLookup) (hea
 			if v := oldSt.Get(); v != healthcheck.StatusInitializing {
 				st.Set(v)
 			}
+		} else if autoApplied {
+			// Override the registration-time Initializing with Unchecked so the
+			// pool's dispatch filter accepts the target during the brief window
+			// before the first auto-probe completes. Targets the operator
+			// explicitly configured keep Initializing so they're held out
+			// until the probe confirms them.
+			st.Set(healthcheck.StatusUnchecked)
 		}
 		c.SetHealthCheckProbe(st.Prober())
 	}
diff --git a/pkg/backends/healthcheck/healthcheck.go b/pkg/backends/healthcheck/healthcheck.go
index 6c70f426f..5cb2ad01a 100644
--- a/pkg/backends/healthcheck/healthcheck.go
+++ b/pkg/backends/healthcheck/healthcheck.go
@@ -32,8 +32,8 @@ type HealthChecker interface {
 	// Register a health check Target
 	Register(name string, description string, options *ho.Options, client *http.Client) (*Status, error)
 	// RegisterVirtual records a synthetic always-passing Status for a virtual
-	// backend (rule, alb) that has no upstream to probe. Without this,
-	// nested ALBs render as "nc" on the health page (issue #996).
+	// backend (rule, alb) that has no upstream to probe, so it surfaces in
+	// the health page and in outer ALB pool reporting.
 	RegisterVirtual(name, description string) *Status
 	// Remove a health check Target
 	Unregister(name string)
diff --git a/pkg/backends/healthcheck/options/defaults.go b/pkg/backends/healthcheck/options/defaults.go
index 8535bca85..5856cfc6c 100644
--- a/pkg/backends/healthcheck/options/defaults.go
+++ b/pkg/backends/healthcheck/options/defaults.go
@@ -23,4 +23,10 @@ import (
 const (
 	// DefaultHealthCheckTimeout is the default duration for health check probes to wait before timing out
 	DefaultHealthCheckTimeout = 3 * time.Second
+
+	// DefaultAutoProbeInterval is applied when StartHealthChecks auto-installs
+	// a provider's default health-check config because the operator did not
+	// configure one. The auto-applied probe must fire on a tick or the
+	// downstream pool filter will never see a status transition.
+	DefaultAutoProbeInterval = 5 * time.Second
 )
diff --git a/pkg/observability/metrics/metrics.go b/pkg/observability/metrics/metrics.go
index 010ef87c3..54ef8f0ae 100644
--- a/pkg/observability/metrics/metrics.go
+++ b/pkg/observability/metrics/metrics.go
@@ -453,6 +453,20 @@ var (
 		},
 		[]string{"backend_name"},
 	)
+
+	// BackendsDefaultHealthCheckApplied counts backends for which the provider's
+	// DefaultHealthCheckConfig was auto-installed by StartHealthChecks because
+	// the operator did not configure healthcheck options. A non-zero value
+	// flags configs that depended on an absent probe and benefit from auditing.
+	BackendsDefaultHealthCheckApplied = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace: metricNamespace,
+			Subsystem: healthSubsystem,
+			Name:      "default_config_applied_total",
+			Help:      "Count of backends that received an auto-applied default healthcheck config because none was configured.",
+		},
+		[]string{"backend_name", "provider"},
+	)
 )
 
 func init() {
@@ -478,6 +492,7 @@ func init() {
 	prometheus.MustRegister(CacheIndexPanicRecovered)
 	prometheus.MustRegister(HealthHandlerPanicRecovered)
 	prometheus.MustRegister(HealthcheckStatusNotifyPanicRecovered)
+	prometheus.MustRegister(BackendsDefaultHealthCheckApplied)
 	prometheus.MustRegister(CacheObjectOperations)
 	prometheus.MustRegister(CacheByteOperations)
 	prometheus.MustRegister(CacheEvents)

From 18f777eb49a3beb25cc1595327ede6693e9d1c57 Mon Sep 17 00:00:00 2001
From: Chris Randles <randles.chris@gmail.com>
Date: Wed, 27 May 2026 10:37:21 -0400
Subject: [PATCH 2/8] test: demonstrate dpc chunk test wallclock regression

Signed-off-by: Chris Randles <randles.chris@gmail.com>

From dc0c4c507c78b74485ad3f4ffe1024b5df6471e6 Mon Sep 17 00:00:00 2001
From: Chris Randles <randles.chris@gmail.com>
Date: Wed, 27 May 2026 10:46:10 -0400
Subject: [PATCH 3/8] test: [proxy/engines] anchor chunk-test reference time to
 wallclock

Signed-off-by: Chris Randles <randles.chris@gmail.com>
---
 .../engines/deltaproxycache_chunk_test.go     | 43 ++++++++++++++++---
 1 file changed, 38 insertions(+), 5 deletions(-)

diff --git a/pkg/proxy/engines/deltaproxycache_chunk_test.go b/pkg/proxy/engines/deltaproxycache_chunk_test.go
index 85c30202b..46f9f2686 100644
--- a/pkg/proxy/engines/deltaproxycache_chunk_test.go
+++ b/pkg/proxy/engines/deltaproxycache_chunk_test.go
@@ -29,6 +29,32 @@ import (
 	"github.com/trickstercache/trickster/v2/pkg/timeseries"
 )
 
+// DeltaProxyCacheRequest rejects requests whose extent ends before
+// now - (step * TimeseriesRetention) by routing them straight through
+// DoProxy (engine=HTTPProxy, status=proxy-only). With default
+// TimeseriesRetention=1024 and step=1h that window is ~42.67d, measured
+// against real wallclock. The two helpers below anchor each test's
+// reference 'now' to a recent chunk bucket so the queries stay inside
+// that window regardless of when the test runs, while preserving the
+// in-bucket vs cross-bucket topologies the tests need to exercise.
+//
+// chunkSize = step * cache/options/defaults.DefaultTimeseriesChunkFactor (420).
+
+func chunkBucketAnchor(step time.Duration) time.Time {
+	chunkSize := step * 420
+	// midpoint of the most-recently-completed chunk bucket; queries within
+	// ±200h of this point stay inside one bucket.
+	return time.Now().Add(-chunkSize).Truncate(chunkSize).Add(chunkSize / 2)
+}
+
+func chunkBucketStraddle(step time.Duration) time.Time {
+	chunkSize := step * 420
+	// 11h after a chunk-bucket boundary; the Q1 (-30h..-12h) and
+	// Q2 (-10h..-8h) extents used by the cross-bucket tests fall on
+	// opposite sides of the boundary just before 'now'.
+	return time.Now().Add(-chunkSize).Truncate(chunkSize).Add(11 * time.Hour)
+}
+
 func TestDeltaProxyCacheRequestMissThenHitChunksChunks(t *testing.T) {
 	ts, w, r, rsc, err := setupTestHarnessDPC()
 	rsc.CacheConfig.UseCacheChunking = true
@@ -608,8 +634,12 @@ func TestDeltaProxyCacheRequestRangeMissChunks(t *testing.T) {
 
 	step := time.Duration(3600) * time.Second
 
-	// fixed to keep all three queries in the same 17.5d chunk bucket; see the _CrossBucket test
-	now := time.Date(2026, 4, 15, 12, 0, 0, 0, time.UTC)
+	// anchor 'now' to the midpoint of a recently-completed 17.5d chunk bucket
+	// so all three queries land in the same bucket while staying inside the
+	// DPC eviction retention window (step * TimeseriesRetention = 42.67d at
+	// defaults). A fixed calendar date would eventually drift outside that
+	// window relative to wallclock and trip the proxy-only bypass.
+	now := chunkBucketAnchor(step)
 	end := now.Add(-time.Duration(12) * time.Hour)
 
 	extr := timeseries.Extent{Start: end.Add(-time.Duration(18) * time.Hour), End: end}
@@ -756,8 +786,11 @@ func TestDeltaProxyCacheRequestRangeMissChunks_CrossBucket(t *testing.T) {
 
 	step := time.Duration(3600) * time.Second
 
-	// kmiss write in bucket ending 2026-04-20T00:00Z; high-end read in next bucket
-	now := time.Date(2026, 4, 20, 11, 0, 0, 0, time.UTC)
+	// position 'now' 11h after a chunk-bucket boundary so the Q1 kmiss write
+	// (now-30h..now-12h) lands in the prior bucket and the Q2 high-end read
+	// (now-10h..now-8h) lands in the next bucket, exercising the cross-bucket
+	// recovery path while staying inside DPC eviction retention.
+	now := chunkBucketStraddle(step)
 	end := now.Add(-time.Duration(12) * time.Hour)
 
 	extr := timeseries.Extent{Start: end.Add(-time.Duration(18) * time.Hour), End: end}
@@ -837,7 +870,7 @@ func TestDeltaProxyCacheRequestRangeMissChunks_CrossBucketPreservesPriorChunks(t
 	o.FastForwardDisable = true
 
 	step := time.Duration(3600) * time.Second
-	now := time.Date(2026, 4, 20, 11, 0, 0, 0, time.UTC)
+	now := chunkBucketStraddle(step)
 
 	runQuery := func(start, end time.Time) *http.Response {
 		u := r.URL

From 4c61fd597d9426a0de394a2b7eea058768a104c2 Mon Sep 17 00:00:00 2001
From: Chris Randles <randles.chris@gmail.com>
Date: Wed, 27 May 2026 10:59:26 -0400
Subject: [PATCH 4/8] test: [proxy/engines] pin retention so fixed chunk
 fixtures clear wallclock gate

Signed-off-by: Chris Randles <randles.chris@gmail.com>
---
 .../engines/deltaproxycache_chunk_test.go     | 46 ++++---------------
 1 file changed, 8 insertions(+), 38 deletions(-)

diff --git a/pkg/proxy/engines/deltaproxycache_chunk_test.go b/pkg/proxy/engines/deltaproxycache_chunk_test.go
index 46f9f2686..23d442b71 100644
--- a/pkg/proxy/engines/deltaproxycache_chunk_test.go
+++ b/pkg/proxy/engines/deltaproxycache_chunk_test.go
@@ -29,32 +29,6 @@ import (
 	"github.com/trickstercache/trickster/v2/pkg/timeseries"
 )
 
-// DeltaProxyCacheRequest rejects requests whose extent ends before
-// now - (step * TimeseriesRetention) by routing them straight through
-// DoProxy (engine=HTTPProxy, status=proxy-only). With default
-// TimeseriesRetention=1024 and step=1h that window is ~42.67d, measured
-// against real wallclock. The two helpers below anchor each test's
-// reference 'now' to a recent chunk bucket so the queries stay inside
-// that window regardless of when the test runs, while preserving the
-// in-bucket vs cross-bucket topologies the tests need to exercise.
-//
-// chunkSize = step * cache/options/defaults.DefaultTimeseriesChunkFactor (420).
-
-func chunkBucketAnchor(step time.Duration) time.Time {
-	chunkSize := step * 420
-	// midpoint of the most-recently-completed chunk bucket; queries within
-	// ±200h of this point stay inside one bucket.
-	return time.Now().Add(-chunkSize).Truncate(chunkSize).Add(chunkSize / 2)
-}
-
-func chunkBucketStraddle(step time.Duration) time.Time {
-	chunkSize := step * 420
-	// 11h after a chunk-bucket boundary; the Q1 (-30h..-12h) and
-	// Q2 (-10h..-8h) extents used by the cross-bucket tests fall on
-	// opposite sides of the boundary just before 'now'.
-	return time.Now().Add(-chunkSize).Truncate(chunkSize).Add(11 * time.Hour)
-}
-
 func TestDeltaProxyCacheRequestMissThenHitChunksChunks(t *testing.T) {
 	ts, w, r, rsc, err := setupTestHarnessDPC()
 	rsc.CacheConfig.UseCacheChunking = true
@@ -631,15 +605,12 @@ func TestDeltaProxyCacheRequestRangeMissChunks(t *testing.T) {
 	rsc.CacheConfig.Provider = "test"
 
 	o.FastForwardDisable = true
+	o.TimeseriesRetention = 1 << 20 // pin against wallclock drift on fixed fixture
 
 	step := time.Duration(3600) * time.Second
 
-	// anchor 'now' to the midpoint of a recently-completed 17.5d chunk bucket
-	// so all three queries land in the same bucket while staying inside the
-	// DPC eviction retention window (step * TimeseriesRetention = 42.67d at
-	// defaults). A fixed calendar date would eventually drift outside that
-	// window relative to wallclock and trip the proxy-only bypass.
-	now := chunkBucketAnchor(step)
+	// fixed to keep all three queries in the same 17.5d chunk bucket; see the _CrossBucket test
+	now := time.Date(2026, 4, 15, 12, 0, 0, 0, time.UTC)
 	end := now.Add(-time.Duration(12) * time.Hour)
 
 	extr := timeseries.Extent{Start: end.Add(-time.Duration(18) * time.Hour), End: end}
@@ -783,14 +754,12 @@ func TestDeltaProxyCacheRequestRangeMissChunks_CrossBucket(t *testing.T) {
 	o := rsc.BackendOptions
 	rsc.CacheConfig.Provider = "test"
 	o.FastForwardDisable = true
+	o.TimeseriesRetention = 1 << 20 // pin against wallclock drift on fixed fixture
 
 	step := time.Duration(3600) * time.Second
 
-	// position 'now' 11h after a chunk-bucket boundary so the Q1 kmiss write
-	// (now-30h..now-12h) lands in the prior bucket and the Q2 high-end read
-	// (now-10h..now-8h) lands in the next bucket, exercising the cross-bucket
-	// recovery path while staying inside DPC eviction retention.
-	now := chunkBucketStraddle(step)
+	// kmiss write in bucket ending 2026-04-20T00:00Z; high-end read in next bucket
+	now := time.Date(2026, 4, 20, 11, 0, 0, 0, time.UTC)
 	end := now.Add(-time.Duration(12) * time.Hour)
 
 	extr := timeseries.Extent{Start: end.Add(-time.Duration(18) * time.Hour), End: end}
@@ -868,9 +837,10 @@ func TestDeltaProxyCacheRequestRangeMissChunks_CrossBucketPreservesPriorChunks(t
 	o := rsc.BackendOptions
 	rsc.CacheConfig.Provider = "test"
 	o.FastForwardDisable = true
+	o.TimeseriesRetention = 1 << 20 // pin against wallclock drift on fixed fixture
 
 	step := time.Duration(3600) * time.Second
-	now := chunkBucketStraddle(step)
+	now := time.Date(2026, 4, 20, 11, 0, 0, 0, time.UTC)
 
 	runQuery := func(start, end time.Time) *http.Response {
 		u := r.URL

From baba35b8e088099aa502673de935e61834969224 Mon Sep 17 00:00:00 2001
From: Chris Randles <randles.chris@gmail.com>
Date: Wed, 27 May 2026 11:23:00 -0400
Subject: [PATCH 5/8] test: [integration/engines] ignore auto-applied
 healthcheck probes in origin counter

Signed-off-by: Chris Randles <randles.chris@gmail.com>
---
 integration/engines_test.go | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/integration/engines_test.go b/integration/engines_test.go
index c39782cac..e2f9e62ac 100644
--- a/integration/engines_test.go
+++ b/integration/engines_test.go
@@ -70,6 +70,14 @@ func engineSetup(t *testing.T) *engineFakeOrigin {
 		srv := &httptest.Server{
 			Listener: ln,
 			Config: &http.Server{Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+				// auto-applied prometheus healthcheck probes `/api/v1/query?query=up`;
+				// short-circuit them so per-test counters only see data traffic
+				if r.URL.Path == "/api/v1/query" && r.URL.Query().Get("query") == "up" {
+					w.Header().Set("Content-Type", "application/json")
+					w.WriteHeader(http.StatusOK)
+					_, _ = io.WriteString(w, `{"status":"success","data":{"resultType":"vector","result":[]}}`)
+					return
+				}
 				o.mu.Lock()
 				h := o.handler
 				o.mu.Unlock()

From 1ef6e30fdf77104e87371f61416c3ca95ede5979 Mon Sep 17 00:00:00 2001
From: Chris Randles <randles.chris@gmail.com>
Date: Wed, 27 May 2026 13:03:34 -0400
Subject: [PATCH 6/8] fix: [alb] warn + metric when healthy_floor admits
 failing members

Signed-off-by: Chris Randles <randles.chris@gmail.com>
---
 integration/alb_missing_healthcheck_test.go   | 54 +++++++++++++++++++
 .../alb_missing_hc/floor_warn.yaml.tmpl       | 52 ++++++++++++++++++
 pkg/backends/alb/client.go                    | 17 ++++++
 pkg/observability/metrics/metrics.go          | 16 ++++++
 4 files changed, 139 insertions(+)
 create mode 100644 integration/testdata/alb_missing_hc/floor_warn.yaml.tmpl

diff --git a/integration/alb_missing_healthcheck_test.go b/integration/alb_missing_healthcheck_test.go
index ef1ff2645..1d652bb1a 100644
--- a/integration/alb_missing_healthcheck_test.go
+++ b/integration/alb_missing_healthcheck_test.go
@@ -24,6 +24,7 @@ import (
 	"net/url"
 	"os"
 	"path/filepath"
+	"strings"
 	"sync/atomic"
 	"testing"
 	"time"
@@ -115,3 +116,56 @@ func TestALBPoolMemberWithoutHealthcheckNotRouted(t *testing.T) {
 			dataDelta)
 	}, 3*time.Second, 200*time.Millisecond)
 }
+
+// TestALBHealthyFloorAdmitsFailingMetric verifies the warning surface for an
+// ALB whose healthy_floor admits Failing members. An operator who lowered
+// healthy_floor below 0 to keep traffic flowing during the Initializing
+// startup window also admits members the probe has confirmed broken; the
+// `trickster_alb_pool_admits_failing` gauge surfaces that misconfiguration.
+func TestALBHealthyFloorAdmitsFailingMetric(t *testing.T) {
+	healthy := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(http.StatusOK)
+		fmt.Fprint(w, `{"status":"success","data":{"version":"2.0"}}`)
+	}))
+	t.Cleanup(healthy.Close)
+	broken := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusInternalServerError)
+	}))
+	t.Cleanup(broken.Close)
+
+	ports, release := portutil.Reserve(t, 3)
+	frontPort, metricsPort, mgmtPort := ports[0], ports[1], ports[2]
+
+	yaml := fmt.Sprintf(albTestdata(t, "alb_missing_hc/floor_warn.yaml.tmpl"),
+		frontPort, metricsPort, mgmtPort, healthy.URL, broken.URL)
+	cfgPath := filepath.Join(t.TempDir(), "trickster.yaml")
+	require.NoError(t, os.WriteFile(cfgPath, []byte(yaml), 0644))
+
+	ctx, cancel := context.WithCancel(context.Background())
+	t.Cleanup(cancel)
+	release()
+	go startTrickster(t, ctx, expectedStartError{}, "-config", cfgPath)
+
+	metricsAddr := fmt.Sprintf("127.0.0.1:%d", metricsPort)
+	waitForTrickster(t, metricsAddr)
+
+	require.EventuallyWithT(t, func(collect *assert.CollectT) {
+		lines := checkTricksterMetrics(t, metricsAddr)
+		var admits, excludes string
+		for _, l := range lines {
+			if strings.HasPrefix(l, "trickster_alb_pool_admits_failing{") {
+				if strings.Contains(l, `backend_name="alb-admits-failing"`) {
+					admits = l
+				}
+				if strings.Contains(l, `backend_name="alb-excludes-failing"`) {
+					excludes = l
+				}
+			}
+		}
+		assert.True(collect, strings.HasSuffix(admits, " 1"),
+			"alb-admits-failing must report 1: %q", admits)
+		assert.True(collect, strings.HasSuffix(excludes, " 0"),
+			"alb-excludes-failing must report 0: %q", excludes)
+	}, 5*time.Second, 200*time.Millisecond)
+}
diff --git a/integration/testdata/alb_missing_hc/floor_warn.yaml.tmpl b/integration/testdata/alb_missing_hc/floor_warn.yaml.tmpl
new file mode 100644
index 000000000..d11121ee8
--- /dev/null
+++ b/integration/testdata/alb_missing_hc/floor_warn.yaml.tmpl
@@ -0,0 +1,52 @@
+
+frontend:
+  listen_port: %d
+metrics:
+  listen_port: %d
+mgmt:
+  listen_port: %d
+logging:
+  log_level: warn
+caches:
+  mem1:
+    provider: memory
+backends:
+  prom-healthy:
+    provider: prometheus
+    origin_url: %s
+    cache_name: mem1
+    healthcheck:
+      path: /api/v1/status/buildinfo
+      query: ""
+      interval: 100ms
+      timeout: 500ms
+      failure_threshold: 1
+      recovery_threshold: 1
+  prom-broken:
+    provider: prometheus
+    origin_url: %s
+    cache_name: mem1
+    healthcheck:
+      path: /api/v1/status/buildinfo
+      query: ""
+      interval: 100ms
+      timeout: 500ms
+      failure_threshold: 1
+      recovery_threshold: 1
+  alb-admits-failing:
+    provider: alb
+    alb:
+      mechanism: tsm
+      output_format: prometheus
+      healthy_floor: -1
+      pool:
+        - prom-healthy
+        - prom-broken
+  alb-excludes-failing:
+    provider: alb
+    alb:
+      mechanism: tsm
+      output_format: prometheus
+      pool:
+        - prom-healthy
+        - prom-broken
diff --git a/pkg/backends/alb/client.go b/pkg/backends/alb/client.go
index e3ade0789..fc68d552b 100644
--- a/pkg/backends/alb/client.go
+++ b/pkg/backends/alb/client.go
@@ -34,6 +34,9 @@ import (
 	rt "github.com/trickstercache/trickster/v2/pkg/backends/providers/registry/types"
 	"github.com/trickstercache/trickster/v2/pkg/cache"
 	"github.com/trickstercache/trickster/v2/pkg/errors"
+	"github.com/trickstercache/trickster/v2/pkg/observability/logging"
+	"github.com/trickstercache/trickster/v2/pkg/observability/logging/logger"
+	"github.com/trickstercache/trickster/v2/pkg/observability/metrics"
 	authopt "github.com/trickstercache/trickster/v2/pkg/proxy/authenticator/options"
 	authreg "github.com/trickstercache/trickster/v2/pkg/proxy/authenticator/registry"
 	"github.com/trickstercache/trickster/v2/pkg/proxy/handlers"
@@ -187,6 +190,20 @@ func (c *Client) ValidateAndStartPool(clients backends.Backends, hcs healthcheck
 	if pm, ok := c.handler.(types.PoolMechanism); ok {
 		pm.SetPool(pool.New(targets, o.HealthyFloor))
 	}
+	if o.HealthyFloor <= int(healthcheck.StatusFailing) {
+		// floor admits members whose probe has confirmed them down; operators
+		// who lowered the floor to keep traffic flowing during the startup
+		// Initializing window may not realize Failing slips in too.
+		metrics.ALBPoolAdmitsFailing.WithLabelValues(c.Name()).Set(1)
+		logger.Warn("alb healthy_floor admits members in Failing state",
+			logging.Pairs{
+				"backend_name":  c.Name(),
+				"healthy_floor": o.HealthyFloor,
+				"hint":          "set healthy_floor: 0 to exclude probed-failing members",
+			})
+	} else {
+		metrics.ALBPoolAdmitsFailing.WithLabelValues(c.Name()).Set(0)
+	}
 	return nil
 }
 
diff --git a/pkg/observability/metrics/metrics.go b/pkg/observability/metrics/metrics.go
index 54ef8f0ae..cf2f31e8f 100644
--- a/pkg/observability/metrics/metrics.go
+++ b/pkg/observability/metrics/metrics.go
@@ -467,6 +467,21 @@ var (
 		},
 		[]string{"backend_name", "provider"},
 	)
+
+	// ALBPoolAdmitsFailing flags ALB pools whose healthy_floor admits a Failing
+	// status (floor <= StatusFailing). Operators who set floor below 0 to keep
+	// traffic flowing during the Initializing window may not realize they're
+	// also admitting members the probe has confirmed broken; the gauge surfaces
+	// that misconfiguration without spamming logs.
+	ALBPoolAdmitsFailing = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Namespace: metricNamespace,
+			Subsystem: albSubsystem,
+			Name:      "pool_admits_failing",
+			Help:      "1 when an ALB pool's healthy_floor admits members in Failing state; 0 otherwise.",
+		},
+		[]string{"backend_name"},
+	)
 )
 
 func init() {
@@ -493,6 +508,7 @@ func init() {
 	prometheus.MustRegister(HealthHandlerPanicRecovered)
 	prometheus.MustRegister(HealthcheckStatusNotifyPanicRecovered)
 	prometheus.MustRegister(BackendsDefaultHealthCheckApplied)
+	prometheus.MustRegister(ALBPoolAdmitsFailing)
 	prometheus.MustRegister(CacheObjectOperations)
 	prometheus.MustRegister(CacheByteOperations)
 	prometheus.MustRegister(CacheEvents)

From 6c24f39fdfc6e1566f5261000b6db655665858a1 Mon Sep 17 00:00:00 2001
From: Chris Randles <randles.chris@gmail.com>
Date: Wed, 27 May 2026 13:36:51 -0400
Subject: [PATCH 7/8] docs: [alb,metrics] document healthy_floor pitfall and
 new admits-failing gauge

Signed-off-by: Chris Randles <randles.chris@gmail.com>
---
 docs/alb.md                         |  2 ++
 docs/metrics.md                     |  4 ++++
 pkg/backends/alb/options/options.go | 14 ++++++++------
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/docs/alb.md b/docs/alb.md
index 562aeb412..cd391db0a 100644
--- a/docs/alb.md
+++ b/docs/alb.md
@@ -447,6 +447,8 @@ Each ALB has a configurable `healthy_floor` value, which is the threshold for de
 
 Backends that do not have a [health check interval](./health#example+health+check+configuration+for+use+in+alb) configured will remain in a permanent state of `unknown`. Backends will also be in an `unknown` state from the time Trickster starts until the first of any configured automated health check is completed. Note that if an ALB is configured with `healthy_floor: 1`, any pool members that are not configured with an automated health check interval will never be included in the ALB's healthy pool, as their state is permanently `0`.
 
+Setting `healthy_floor` below `0` admits members the probe has confirmed `unavailable`, not just members in the transient `unknown` state. If your goal is to keep traffic flowing during the cold-start window before the first probes complete, lower the pool members' `recovery_threshold` so they transition out of `unknown` faster -- don't lower the floor. When `healthy_floor < 0` Trickster emits a startup warning and sets the `trickster_alb_pool_admits_failing{backend_name}` gauge to `1`.
+
 ### Example ALB Configuration Routing Only To Known Healthy Backends
 
 ```yaml
diff --git a/docs/metrics.md b/docs/metrics.md
index 917d55862..d95bf4e38 100644
--- a/docs/metrics.md
+++ b/docs/metrics.md
@@ -91,6 +91,10 @@ The following metrics are available for polling with any Trickster configuration
     * `operation` - the name of the operation being performed (read, write, etc.)
     * `status` - the result of the operation being performed
 
+* `trickster_alb_pool_admits_failing` (Gauge) - 1 when an ALB pool's `healthy_floor` admits members in the `unavailable` state, 0 otherwise. See [alb.md](./alb.md#health-based-backend-selection) for the recommended floor.
+  * labels:
+    * `backend_name` - the name of the configured ALB backend
+
 ---
 
 The following metrics are available only for Caches Types whose object lifecycle Trickster manages internally (Memory, Filesystem and bbolt):
diff --git a/pkg/backends/alb/options/options.go b/pkg/backends/alb/options/options.go
index 97023d0eb..7551fe1b5 100644
--- a/pkg/backends/alb/options/options.go
+++ b/pkg/backends/alb/options/options.go
@@ -38,12 +38,14 @@ type Options struct {
 	MechanismName string `yaml:"mechanism,omitempty"`
 	// Pool provides the list of backend names to be used by the load balancer
 	Pool []string `yaml:"pool,omitempty"`
-	// HealthyFloor is the minimum health check status value to be considered Available in the pool
-	// -1 : all pool members are Available regardless of health check status
-	//  0 (default) : pool members with status of unknown (0) or healthy (1) are Available
-	//  1 : only pool members with status of healthy (1) are Available
-	// unknown means the first hc hasn't returned yet,
-	// or (more likely) HealthCheck Interval on target backend is not set
+	// HealthyFloor is the minimum health check status value admitted to the pool.
+	// Values below 0 admit members the probe has confirmed Failing; only set
+	// this below 0 if you intend to route to known-broken upstreams.
+	// -1 : all pool members admitted, including Failing
+	//  0 (default) : Unknown (0) and Healthy (1) admitted, Failing rejected
+	//  1 : only Healthy (1) admitted
+	// Unknown means the first health check hasn't returned yet, or the target
+	// backend has no health check interval configured.
 	HealthyFloor int `yaml:"healthy_floor,omitempty"`
 	// MaxCaptureBytes overrides the backend-level max_capture_bytes for this
 	// ALB's fanout members. Set this when the ALB's expected response shape

From c7780a8391a23f5df1ebae2c33a3744114180ab4 Mon Sep 17 00:00:00 2001
From: Chris Randles <randles.chris@gmail.com>
Date: Thu, 28 May 2026 10:48:15 -0400
Subject: [PATCH 8/8] fix: [alb] validate healthy_floor and warn on degraded
 single-member dispatch

Signed-off-by: Chris Randles <randles.chris@gmail.com>
---
 docs/alb.md                                   |   2 +-
 docs/health.md                                |   2 +
 docs/metrics.md                               |   4 +
 integration/alb_missing_healthcheck_test.go   | 203 +++++++++++-------
 integration/engines_test.go                   |   8 -
 ...missing_hc.yaml.tmpl => degrade.yaml.tmpl} |  17 +-
 .../alb_missing_hc/floor_reset.yaml.tmpl      |  25 +++
 pkg/backends/alb/client.go                    |  36 +++-
 pkg/backends/alb/errors/errors.go             |  11 -
 .../alb/mech/tsm/time_series_merge.go         |  42 +++-
 pkg/backends/alb/pool/pool.go                 |   8 +
 pkg/backends/backends.go                      |  45 +---
 pkg/backends/healthcheck/options/defaults.go  |   6 -
 pkg/observability/metrics/metrics.go          |  30 +--
 14 files changed, 255 insertions(+), 184 deletions(-)
 rename integration/testdata/alb_missing_hc/{missing_hc.yaml.tmpl => degrade.yaml.tmpl} (69%)
 create mode 100644 integration/testdata/alb_missing_hc/floor_reset.yaml.tmpl

diff --git a/docs/alb.md b/docs/alb.md
index cd391db0a..c5d0a7709 100644
--- a/docs/alb.md
+++ b/docs/alb.md
@@ -445,7 +445,7 @@ A backend will report one of three possible health states to its ALBs: `unavaila
 
 Each ALB has a configurable `healthy_floor` value, which is the threshold for determining which pool members are included in the healthy pool, based on their instantaneous health state. The `healthy_floor` represents the minimum acceptable health state value for inclusion in the healthy pool. The default `healthy_floor` value is `0`, meaning Backends in a state `>= 0` (`unknown` and `available`) are included in the healthy pool. Setting `healthy_floor: 1` would include only `available` Backends, while a value of `-1` will include all backends in the configured pool, including those marked as `unavailable`.
 
-Backends that do not have a [health check interval](./health#example+health+check+configuration+for+use+in+alb) configured will remain in a permanent state of `unknown`. Backends will also be in an `unknown` state from the time Trickster starts until the first of any configured automated health check is completed. Note that if an ALB is configured with `healthy_floor: 1`, any pool members that are not configured with an automated health check interval will never be included in the ALB's healthy pool, as their state is permanently `0`.
+Backends that do not have a [health check interval](./health#example+health+check+configuration+for+use+in+alb) configured will remain in a permanent state of `unknown`. Backends will also be in an `unknown` state from the time Trickster starts until the first of any configured automated health check is completed. A pool member in a permanent `unknown` state can never reach `available`, so a `healthy_floor: 1` ALB whose members lack health checks would have an empty pool and return `502` for every request. To avoid that, Trickster resets such an ALB's effective floor to `0` at startup, emits a warning naming the ALB and the un-probed members, and sets the `trickster_alb_pool_floor_reset{backend_name}` gauge to `1`. Configure a health check interval on those members if you want `healthy_floor: 1` to apply.
 
 Setting `healthy_floor` below `0` admits members the probe has confirmed `unavailable`, not just members in the transient `unknown` state. If your goal is to keep traffic flowing during the cold-start window before the first probes complete, lower the pool members' `recovery_threshold` so they transition out of `unknown` faster -- don't lower the floor. When `healthy_floor < 0` Trickster emits a startup warning and sets the `trickster_alb_pool_admits_failing{backend_name}` gauge to `1`.
 
diff --git a/docs/health.md b/docs/health.md
index 950438ba4..c3345d739 100644
--- a/docs/health.md
+++ b/docs/health.md
@@ -102,6 +102,8 @@ backends:
       recovery_threshold: 3 # backend is healthy after 3 consecutive successes
 ```
 
+The Prometheus default probe is `/api/v1/query?query=up`. Some multi-tenant Prometheus gateways reject an unbounded `up` with `400 bad_data: "too many series found"`, which keeps the member out of any ALB pool it belongs to. Override `healthcheck.query` with a bounded expression the backend accepts (for example `query=vector(1)`) when probing such backends.
+
 ## Other Ways to Monitor Health
 
 In addition to the out-of-the-box health checks to determine up-or-down status, you may want to setup alarms and thresholds based on the metrics instrumented by Trickster. See [metrics.md](metrics.md) for collecting performance metrics about Trickster.
diff --git a/docs/metrics.md b/docs/metrics.md
index d95bf4e38..0cc9a546e 100644
--- a/docs/metrics.md
+++ b/docs/metrics.md
@@ -95,6 +95,10 @@ The following metrics are available for polling with any Trickster configuration
   * labels:
     * `backend_name` - the name of the configured ALB backend
 
+* `trickster_alb_pool_floor_reset` (Gauge) - 1 when an ALB pool's `healthy_floor` was reset to 0 at startup because pool members have no health check and could never reach the configured floor, 0 otherwise. See [alb.md](./alb.md#health-based-backend-selection).
+  * labels:
+    * `backend_name` - the name of the configured ALB backend
+
 ---
 
 The following metrics are available only for Caches Types whose object lifecycle Trickster manages internally (Memory, Filesystem and bbolt):
diff --git a/integration/alb_missing_healthcheck_test.go b/integration/alb_missing_healthcheck_test.go
index 1d652bb1a..be46d9db7 100644
--- a/integration/alb_missing_healthcheck_test.go
+++ b/integration/alb_missing_healthcheck_test.go
@@ -18,7 +18,9 @@ package integration
 
 import (
 	"context"
+	"encoding/json"
 	"fmt"
+	"io"
 	"net/http"
 	"net/http/httptest"
 	"net/url"
@@ -34,33 +36,19 @@ import (
 	"github.com/trickstercache/trickster/v2/integration/internal/portutil"
 )
 
-// TestALBPoolMemberWithoutHealthcheckNotRouted verifies that a non-virtual ALB
-// pool member configured without a `healthcheck:` block does not receive
-// fanout traffic when its upstream is unhealthy. The provider's default
-// healthcheck must be auto-applied so the pool's dispatch-time filter can
-// observe Failing status and exclude the member.
-func TestALBPoolMemberWithoutHealthcheckNotRouted(t *testing.T) {
-	healthyResp := albTestdata(t, "alb_unavail/healthy.json")
-
+// TestALBHealthyFloorAdmitsFailingMetric verifies the warning surface for an
+// ALB whose healthy_floor admits Failing members. An operator who lowered
+// healthy_floor below 0 to keep traffic flowing during the Initializing
+// startup window also admits members the probe has confirmed broken; the
+// `trickster_alb_pool_admits_failing` gauge surfaces that misconfiguration.
+func TestALBHealthyFloorAdmitsFailingMetric(t *testing.T) {
 	healthy := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		w.Header().Set("Content-Type", "application/json")
 		w.WriteHeader(http.StatusOK)
-		switch r.URL.Path {
-		case "/api/v1/status/buildinfo":
-			fmt.Fprint(w, `{"status":"success","data":{"version":"2.0"}}`)
-		case "/api/v1/query":
-			fmt.Fprint(w, `{"status":"success","data":{"resultType":"vector","result":[]}}`)
-		default:
-			fmt.Fprint(w, healthyResp)
-		}
+		fmt.Fprint(w, `{"status":"success","data":{"version":"2.0"}}`)
 	}))
 	t.Cleanup(healthy.Close)
-
-	var brokenDataHits atomic.Int64
 	broken := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		if r.URL.Path == "/api/v1/query_range" {
-			brokenDataHits.Add(1)
-		}
 		w.WriteHeader(http.StatusInternalServerError)
 	}))
 	t.Cleanup(broken.Close)
@@ -68,9 +56,8 @@ func TestALBPoolMemberWithoutHealthcheckNotRouted(t *testing.T) {
 	ports, release := portutil.Reserve(t, 3)
 	frontPort, metricsPort, mgmtPort := ports[0], ports[1], ports[2]
 
-	yaml := fmt.Sprintf(albTestdata(t, "alb_missing_hc/missing_hc.yaml.tmpl"),
+	yaml := fmt.Sprintf(albTestdata(t, "alb_missing_hc/floor_warn.yaml.tmpl"),
 		frontPort, metricsPort, mgmtPort, healthy.URL, broken.URL)
-
 	cfgPath := filepath.Join(t.TempDir(), "trickster.yaml")
 	require.NoError(t, os.WriteFile(cfgPath, []byte(yaml), 0644))
 
@@ -82,63 +69,100 @@ func TestALBPoolMemberWithoutHealthcheckNotRouted(t *testing.T) {
 	metricsAddr := fmt.Sprintf("127.0.0.1:%d", metricsPort)
 	waitForTrickster(t, metricsAddr)
 
-	// Give the auto-applied probe time to fire, trip the broken backend to
-	// Failing, and let the ALB pool refresh its healthy snapshot.
-	time.Sleep(2 * time.Second)
-	dataBaseline := brokenDataHits.Load()
-
-	frontAddr := fmt.Sprintf("127.0.0.1:%d", frontPort)
-	const reqs = 50
-	now := time.Now().Unix()
-	for i := range reqs {
-		// query_range is mergeable in TSM and therefore triggers fanout to
-		// every healthy pool member. Use unique query strings so the cache
-		// doesn't short-circuit subsequent requests.
-		q := url.Values{
-			"query": {fmt.Sprintf("up + 0*%d", i)},
-			"start": {fmt.Sprintf("%d", now-300)},
-			"end":   {fmt.Sprintf("%d", now)},
-			"step":  {"15"},
+	require.EventuallyWithT(t, func(collect *assert.CollectT) {
+		lines := checkTricksterMetrics(t, metricsAddr)
+		var admits, excludes string
+		for _, l := range lines {
+			if strings.HasPrefix(l, "trickster_alb_pool_admits_failing{") {
+				if strings.Contains(l, `backend_name="alb-admits-failing"`) {
+					admits = l
+				}
+				if strings.Contains(l, `backend_name="alb-excludes-failing"`) {
+					excludes = l
+				}
+			}
 		}
-		u := fmt.Sprintf("http://%s/alb-tsm-test/api/v1/query_range?%s",
-			frontAddr, q.Encode())
-		resp, err := http.Get(u)
-		require.NoError(t, err)
-		resp.Body.Close()
-	}
+		assert.True(collect, strings.HasSuffix(admits, " 1"),
+			"alb-admits-failing must report 1: %q", admits)
+		assert.True(collect, strings.HasSuffix(excludes, " 0"),
+			"alb-excludes-failing must report 0: %q", excludes)
+	}, 5*time.Second, 200*time.Millisecond)
+}
+
+// TestALBHealthyFloorResetWhenMemberHasNoHealthcheck covers #1015: an ALB with
+// healthy_floor: 1 whose only member has no health check. The member is stuck
+// Unchecked and would be permanently excluded (empty pool -> 502). Trickster
+// resets the effective floor to 0, sets trickster_alb_pool_floor_reset, and
+// keeps serving 200.
+func TestALBHealthyFloorResetWhenMemberHasNoHealthcheck(t *testing.T) {
+	vector := albTestdata(t, "alb_unavail/healthy.json")
+	origin := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(http.StatusOK)
+		fmt.Fprint(w, vector)
+	}))
+	t.Cleanup(origin.Close)
+
+	ports, release := portutil.Reserve(t, 3)
+	frontPort, metricsPort, mgmtPort := ports[0], ports[1], ports[2]
+	yaml := fmt.Sprintf(albTestdata(t, "alb_missing_hc/floor_reset.yaml.tmpl"),
+		frontPort, metricsPort, mgmtPort, origin.URL)
+	cfgPath := filepath.Join(t.TempDir(), "trickster.yaml")
+	require.NoError(t, os.WriteFile(cfgPath, []byte(yaml), 0644))
+
+	ctx, cancel := context.WithCancel(context.Background())
+	t.Cleanup(cancel)
+	release()
+	go startTrickster(t, ctx, expectedStartError{}, "-config", cfgPath)
+
+	metricsAddr := fmt.Sprintf("127.0.0.1:%d", metricsPort)
+	waitForTrickster(t, metricsAddr)
 
-	// Allow a small drain window in case any in-flight fanout slot was still
-	// dispatching when the assertion fired.
 	require.EventuallyWithT(t, func(collect *assert.CollectT) {
-		dataDelta := brokenDataHits.Load() - dataBaseline
-		assert.Equalf(collect, int64(0), dataDelta,
-			"broken backend received %d data requests via the ALB after probe transitioned it to Failing",
-			dataDelta)
-	}, 3*time.Second, 200*time.Millisecond)
+		var line string
+		for _, l := range checkTricksterMetrics(t, metricsAddr) {
+			if strings.HasPrefix(l, "trickster_alb_pool_floor_reset{") &&
+				strings.Contains(l, `backend_name="alb-floor1"`) {
+				line = l
+			}
+		}
+		assert.True(collect, strings.HasSuffix(line, " 1"),
+			"alb-floor1 floor-reset gauge must be 1: %q", line)
+	}, 5*time.Second, 200*time.Millisecond)
+
+	// member admitted under the reset floor -> 200, not an empty-pool 502
+	frontAddr := fmt.Sprintf("127.0.0.1:%d", frontPort)
+	resp, err := http.Get(fmt.Sprintf("http://%s/alb-floor1/api/v1/query?query=up", frontAddr))
+	require.NoError(t, err)
+	defer resp.Body.Close()
+	require.Equal(t, http.StatusOK, resp.StatusCode)
 }
 
-// TestALBHealthyFloorAdmitsFailingMetric verifies the warning surface for an
-// ALB whose healthy_floor admits Failing members. An operator who lowered
-// healthy_floor below 0 to keep traffic flowing during the Initializing
-// startup window also admits members the probe has confirmed broken; the
-// `trickster_alb_pool_admits_failing` gauge surfaces that misconfiguration.
-func TestALBHealthyFloorAdmitsFailingMetric(t *testing.T) {
-	healthy := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+// TestALBPoolDegradeWarnsInResponse covers thinker0's silent single-member
+// degrade: a 2-member TSM pool where one member's probe fails drops to one live
+// member. TSM still serves 200 from the survivor, but the response must carry a
+// `warnings` entry so the caller knows the merge collapsed to a single shard.
+func TestALBPoolDegradeWarnsInResponse(t *testing.T) {
+	vector := albTestdata(t, "alb_unavail/healthy.json")
+	ok := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		w.Header().Set("Content-Type", "application/json")
 		w.WriteHeader(http.StatusOK)
-		fmt.Fprint(w, `{"status":"success","data":{"version":"2.0"}}`)
+		fmt.Fprint(w, vector)
 	}))
-	t.Cleanup(healthy.Close)
-	broken := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+	t.Cleanup(ok.Close)
+	var badData atomic.Int64
+	bad := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != "/api/v1/status/buildinfo" {
+			badData.Add(1)
+		}
 		w.WriteHeader(http.StatusInternalServerError)
 	}))
-	t.Cleanup(broken.Close)
+	t.Cleanup(bad.Close)
 
 	ports, release := portutil.Reserve(t, 3)
 	frontPort, metricsPort, mgmtPort := ports[0], ports[1], ports[2]
-
-	yaml := fmt.Sprintf(albTestdata(t, "alb_missing_hc/floor_warn.yaml.tmpl"),
-		frontPort, metricsPort, mgmtPort, healthy.URL, broken.URL)
+	yaml := fmt.Sprintf(albTestdata(t, "alb_missing_hc/degrade.yaml.tmpl"),
+		frontPort, metricsPort, mgmtPort, ok.URL, bad.URL)
 	cfgPath := filepath.Join(t.TempDir(), "trickster.yaml")
 	require.NoError(t, os.WriteFile(cfgPath, []byte(yaml), 0644))
 
@@ -147,25 +171,40 @@ func TestALBHealthyFloorAdmitsFailingMetric(t *testing.T) {
 	release()
 	go startTrickster(t, ctx, expectedStartError{}, "-config", cfgPath)
 
+	frontAddr := fmt.Sprintf("127.0.0.1:%d", frontPort)
 	metricsAddr := fmt.Sprintf("127.0.0.1:%d", metricsPort)
 	waitForTrickster(t, metricsAddr)
 
+	var n atomic.Int64
 	require.EventuallyWithT(t, func(collect *assert.CollectT) {
-		lines := checkTricksterMetrics(t, metricsAddr)
-		var admits, excludes string
-		for _, l := range lines {
-			if strings.HasPrefix(l, "trickster_alb_pool_admits_failing{") {
-				if strings.Contains(l, `backend_name="alb-admits-failing"`) {
-					admits = l
-				}
-				if strings.Contains(l, `backend_name="alb-excludes-failing"`) {
-					excludes = l
-				}
+		// unique query per attempt so the cache can't mask the live merge
+		q := fmt.Sprintf("up + 0*%d", n.Add(1))
+		u := fmt.Sprintf("http://%s/alb-degrade/api/v1/query?query=%s", frontAddr, url.QueryEscape(q))
+		resp, err := http.Get(u)
+		if !assert.NoError(collect, err) {
+			return
+		}
+		defer resp.Body.Close()
+		b, _ := io.ReadAll(resp.Body)
+		if !assert.Equal(collect, http.StatusOK, resp.StatusCode, "body: %s", b) {
+			return
+		}
+		var pr struct {
+			Status   string   `json:"status"`
+			Warnings []string `json:"warnings"`
+		}
+		if !assert.NoError(collect, json.Unmarshal(b, &pr)) {
+			return
+		}
+		var found bool
+		for _, wn := range pr.Warnings {
+			if strings.Contains(wn, "1 of 2 pool members") {
+				found = true
 			}
 		}
-		assert.True(collect, strings.HasSuffix(admits, " 1"),
-			"alb-admits-failing must report 1: %q", admits)
-		assert.True(collect, strings.HasSuffix(excludes, " 0"),
-			"alb-excludes-failing must report 0: %q", excludes)
-	}, 5*time.Second, 200*time.Millisecond)
+		assert.True(collect, found,
+			"expected degrade warning in response warnings: %v", pr.Warnings)
+	}, 6*time.Second, 200*time.Millisecond)
+
+	require.Zero(t, badData.Load(), "failing member must not receive data requests")
 }
diff --git a/integration/engines_test.go b/integration/engines_test.go
index e2f9e62ac..c39782cac 100644
--- a/integration/engines_test.go
+++ b/integration/engines_test.go
@@ -70,14 +70,6 @@ func engineSetup(t *testing.T) *engineFakeOrigin {
 		srv := &httptest.Server{
 			Listener: ln,
 			Config: &http.Server{Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-				// auto-applied prometheus healthcheck probes `/api/v1/query?query=up`;
-				// short-circuit them so per-test counters only see data traffic
-				if r.URL.Path == "/api/v1/query" && r.URL.Query().Get("query") == "up" {
-					w.Header().Set("Content-Type", "application/json")
-					w.WriteHeader(http.StatusOK)
-					_, _ = io.WriteString(w, `{"status":"success","data":{"resultType":"vector","result":[]}}`)
-					return
-				}
 				o.mu.Lock()
 				h := o.handler
 				o.mu.Unlock()
diff --git a/integration/testdata/alb_missing_hc/missing_hc.yaml.tmpl b/integration/testdata/alb_missing_hc/degrade.yaml.tmpl
similarity index 69%
rename from integration/testdata/alb_missing_hc/missing_hc.yaml.tmpl
rename to integration/testdata/alb_missing_hc/degrade.yaml.tmpl
index dc2de70c5..eee8ba57c 100644
--- a/integration/testdata/alb_missing_hc/missing_hc.yaml.tmpl
+++ b/integration/testdata/alb_missing_hc/degrade.yaml.tmpl
@@ -11,7 +11,7 @@ caches:
   mem1:
     provider: memory
 backends:
-  prom-healthy:
+  prom-ok:
     provider: prometheus
     origin_url: %s
     cache_name: mem1
@@ -22,15 +22,22 @@ backends:
       timeout: 500ms
       failure_threshold: 1
       recovery_threshold: 1
-  prom-broken:
+  prom-bad:
     provider: prometheus
     origin_url: %s
     cache_name: mem1
-  alb-tsm-test:
+    healthcheck:
+      path: /api/v1/status/buildinfo
+      query: ""
+      interval: 100ms
+      timeout: 500ms
+      failure_threshold: 1
+      recovery_threshold: 1
+  alb-degrade:
     provider: alb
     alb:
       mechanism: tsm
       output_format: prometheus
       pool:
-        - prom-healthy
-        - prom-broken
+        - prom-ok
+        - prom-bad
diff --git a/integration/testdata/alb_missing_hc/floor_reset.yaml.tmpl b/integration/testdata/alb_missing_hc/floor_reset.yaml.tmpl
new file mode 100644
index 000000000..1497e73e7
--- /dev/null
+++ b/integration/testdata/alb_missing_hc/floor_reset.yaml.tmpl
@@ -0,0 +1,25 @@
+
+frontend:
+  listen_port: %d
+metrics:
+  listen_port: %d
+mgmt:
+  listen_port: %d
+logging:
+  log_level: warn
+caches:
+  mem1:
+    provider: memory
+backends:
+  prom-noprobe:
+    provider: prometheus
+    origin_url: %s
+    cache_name: mem1
+  alb-floor1:
+    provider: alb
+    alb:
+      mechanism: tsm
+      output_format: prometheus
+      healthy_floor: 1
+      pool:
+        - prom-noprobe
diff --git a/pkg/backends/alb/client.go b/pkg/backends/alb/client.go
index fc68d552b..dd0ef0778 100644
--- a/pkg/backends/alb/client.go
+++ b/pkg/backends/alb/client.go
@@ -19,6 +19,7 @@ package alb
 import (
 	"fmt"
 	"net/http"
+	"strings"
 	"time"
 
 	"github.com/trickstercache/trickster/v2/pkg/backends"
@@ -167,6 +168,7 @@ func (c *Client) ValidateAndStartPool(clients backends.Backends, hcs healthcheck
 		return c.validateAndStartUserRouter(clients, hcs)
 	}
 	targets := make(pool.Targets, 0, len(o.Pool))
+	var unprobed []string
 	for _, n := range o.Pool {
 		tc, ok := clients[n]
 		if !ok {
@@ -174,21 +176,35 @@ func (c *Client) ValidateAndStartPool(clients backends.Backends, hcs healthcheck
 		}
 		hc, ok := hcs[n]
 		if !ok {
-			// Only virtual backends (rule, alb) may legitimately be absent from
-			// the healthcheck registry. A non-virtual miss means the operator
-			// did not configure healthcheck on this backend and the upstream
-			// auto-apply path also declined to register a probe; fabricating a
-			// dummy Status here would silently route fanout traffic regardless
-			// of upstream health.
-			if !backends.IsVirtual(tc.Configuration().Provider) {
-				return alberr.NewErrMissingHealthCheck(c.Name(), n)
-			}
+			// virtual backends (rule, alb) have no health checks; treat as passing
 			hc = healthcheck.NewStatus(n, "virtual", "", healthcheck.StatusPassing, time.Time{}, nil)
 		}
+		if mo := tc.Configuration(); !backends.IsVirtual(mo.Provider) &&
+			(mo.HealthCheck == nil || mo.HealthCheck.Interval <= 0) {
+			unprobed = append(unprobed, n)
+		}
 		targets = append(targets, pool.NewTarget(tc.Router(), hc, tc))
 	}
+	effectiveFloor := o.HealthyFloor
+	if o.HealthyFloor >= int(healthcheck.StatusPassing) && len(unprobed) > 0 {
+		// floor requires Passing, but these members have no probe interval and
+		// can never leave Unchecked, so they would be permanently excluded --
+		// which can empty the pool entirely and 502 every request. Reset to 0
+		// so unchecked members are admitted, and surface it loudly.
+		effectiveFloor = int(healthcheck.StatusUnchecked)
+		metrics.ALBPoolFloorReset.WithLabelValues(c.Name()).Set(1)
+		logger.Warn("alb healthy_floor reset to 0: pool members have no health check",
+			logging.Pairs{
+				"backend_name":  c.Name(),
+				"healthy_floor": o.HealthyFloor,
+				"members":       strings.Join(unprobed, ","),
+				"hint":          "configure healthcheck.interval on these members, or set healthy_floor: 0",
+			})
+	} else {
+		metrics.ALBPoolFloorReset.WithLabelValues(c.Name()).Set(0)
+	}
 	if pm, ok := c.handler.(types.PoolMechanism); ok {
-		pm.SetPool(pool.New(targets, o.HealthyFloor))
+		pm.SetPool(pool.New(targets, effectiveFloor))
 	}
 	if o.HealthyFloor <= int(healthcheck.StatusFailing) {
 		// floor admits members whose probe has confirmed them down; operators
diff --git a/pkg/backends/alb/errors/errors.go b/pkg/backends/alb/errors/errors.go
index 5755c69c0..8335156ac 100644
--- a/pkg/backends/alb/errors/errors.go
+++ b/pkg/backends/alb/errors/errors.go
@@ -63,14 +63,3 @@ func NewErrInvalidUserRouterCreds(albName string) error {
 			albName),
 	}
 }
-
-// NewErrMissingHealthCheck reports a non-virtual ALB pool member that has no
-// registered healthcheck Status. Without this guard the pool would route
-// traffic to the member regardless of upstream health.
-func NewErrMissingHealthCheck(albName, poolMemberName string) error {
-	return &InvalidALBOptionsError{
-		error: fmt.Errorf("alb [%s] non-virtual pool member [%s] has no health check; "+
-			"configure healthcheck options on the backend",
-			albName, poolMemberName),
-	}
-}
diff --git a/pkg/backends/alb/mech/tsm/time_series_merge.go b/pkg/backends/alb/mech/tsm/time_series_merge.go
index 6b8cfcf5e..567798dc2 100644
--- a/pkg/backends/alb/mech/tsm/time_series_merge.go
+++ b/pkg/backends/alb/mech/tsm/time_series_merge.go
@@ -17,6 +17,7 @@
 package tsm
 
 import (
+	"fmt"
 	"math"
 	"net/http"
 	"strconv"
@@ -64,6 +65,10 @@ type handler struct {
 	// poolVersion increments on every SetPool so cached pool-derived data
 	// (stripKeys) can be invalidated without locking.
 	poolVersion atomic.Uint64
+	// degradeActive is true while a configured-multi-member pool is dispatching
+	// with only one live member. It throttles the operator WARN to once per
+	// healthy->degraded transition instead of once per request.
+	degradeActive atomic.Bool
 	// cachedStripKeys memoizes the stripKeys slice across requests as long
 	// as the pool hasn't been replaced. Hot path is a single atomic load
 	// plus a uint64 compare.
@@ -320,16 +325,41 @@ func (h *handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 		stripKeys = h.computeStripKeys(hl)
 	}
 
+	// A configured multi-member pool serving from a single live member is
+	// degraded: the merge has silently collapsed to one shard. Warn once per
+	// healthy->degraded transition (not per request) and route through the
+	// merge path so the warning reaches the response `warnings` field.
+	configured := p.ConfiguredLen()
+	degraded := configured > 1 && l == 1
+	if degraded {
+		if h.degradeActive.CompareAndSwap(false, true) {
+			bn := ""
+			if rsc.BackendOptions != nil {
+				bn = rsc.BackendOptions.Name
+			}
+			logger.Warn("alb tsm pool degraded to single live member",
+				logging.Pairs{"backend_name": bn, "configured": configured, "live": l})
+		}
+		dw := fmt.Sprintf("trickster: served from 1 of %d pool members; results may be incomplete", configured)
+		if warnMsg == "" {
+			warnMsg = dw
+		} else {
+			warnMsg += "; " + dw
+		}
+	} else {
+		h.degradeActive.Store(false)
+	}
+
 	// Single-live-member fast path: with one shard there is nothing to merge
 	// or dedup against, so the lone backend's response IS the answer. Skip
 	// only when label stripping or dual-query rewriting would still need to
 	// happen (D1 covers the strip case; weighted-avg covers the dual-query
-	// case). Otherwise the merge path's OnResult stripping handles solo pools
-	// correctly, and degraded N-pools where N-1 are unhealthy now return the
-	// surviving member's response directly instead of 502'ing on a one-shard
-	// merge that has no peer to dedup or cross-merge against.
-	// dual-query still needs merge even for one healthy target; one-shard fast path requires all three conditions
-	if l == 1 && len(stripKeys) == 0 && !needsDualQuery {
+	// case), or when the pool is degraded (configured > 1, one live) so the
+	// merge path can attach the degrade warning. Otherwise the merge path's
+	// OnResult stripping handles solo pools correctly, and genuinely
+	// single-member pools return the lone response directly instead of 502'ing
+	// on a one-shard merge that has no peer to dedup or cross-merge against.
+	if l == 1 && len(stripKeys) == 0 && !needsDualQuery && !degraded {
 		defaultHandler.ServeHTTP(w, r)
 		return
 	}
diff --git a/pkg/backends/alb/pool/pool.go b/pkg/backends/alb/pool/pool.go
index 583239758..209e87f5c 100644
--- a/pkg/backends/alb/pool/pool.go
+++ b/pkg/backends/alb/pool/pool.go
@@ -32,6 +32,10 @@ type Pool interface {
 	// between a status flip and the asynchronous healthy-list refresh, so it
 	// is the correct method for request dispatch.
 	Targets() Targets
+	// ConfiguredLen returns the number of pool members as configured, regardless
+	// of current health. Mechanisms compare this against len(Targets()) to
+	// detect a pool degraded to a subset of its configured members.
+	ConfiguredLen() int
 	// SetHealthy seeds the pool's healthy set from a handler list. Intended
 	// for tests and bootstrap paths that don't drive status updates through
 	// healthcheck subscribers.
@@ -105,6 +109,10 @@ func (p *pool) snapshot() Targets {
 	return nil
 }
 
+func (p *pool) ConfiguredLen() int {
+	return len(p.targets)
+}
+
 func (p *pool) Targets() Targets {
 	if lt := p.liveTargets.Load(); lt != nil && !p.refreshPending.Load() {
 		cached := *lt
diff --git a/pkg/backends/backends.go b/pkg/backends/backends.go
index 074a50fc1..c4bc618b6 100644
--- a/pkg/backends/backends.go
+++ b/pkg/backends/backends.go
@@ -21,12 +21,8 @@ import (
 	"net/http"
 
 	"github.com/trickstercache/trickster/v2/pkg/backends/healthcheck"
-	ho "github.com/trickstercache/trickster/v2/pkg/backends/healthcheck/options"
 	bo "github.com/trickstercache/trickster/v2/pkg/backends/options"
 	"github.com/trickstercache/trickster/v2/pkg/backends/providers"
-	"github.com/trickstercache/trickster/v2/pkg/observability/logging"
-	"github.com/trickstercache/trickster/v2/pkg/observability/logging/logger"
-	"github.com/trickstercache/trickster/v2/pkg/observability/metrics"
 )
 
 // Backends represents a map of Backends keyed by Name
@@ -50,38 +46,14 @@ func (b Backends) StartHealthChecks(knownStatuses healthcheck.StatusLookup) (hea
 			continue
 		}
 		hco := bo.HealthCheck
-		def := c.DefaultHealthCheckConfig()
-		if hco == nil && def == nil {
-			// no probe possible for this provider; the ALB pool will reject
-			// this backend as a non-virtual member with no Status.
+		if hco == nil {
 			continue
 		}
-		if def != nil {
-			bo.HealthCheck = def
-			if hco != nil {
-				bo.HealthCheck.Overlay(hco)
-			}
-		} else {
+		bo.HealthCheck = c.DefaultHealthCheckConfig()
+		if bo.HealthCheck == nil {
 			bo.HealthCheck = hco
-		}
-		var autoApplied bool
-		if bo.HealthCheck.Interval <= 0 {
-			// The operator did not configure a probe interval. A registered
-			// target with Interval == 0 never ticks, so its Status sticks at
-			// StatusUnchecked and the ALB pool's dispatch-time filter keeps
-			// routing fanout traffic regardless of upstream health. Apply a
-			// fast auto-default and surface that we did so.
-			bo.HealthCheck.Interval = ho.DefaultAutoProbeInterval
-			if bo.HealthCheck.FailureThreshold <= 0 {
-				bo.HealthCheck.FailureThreshold = 1
-			}
-			if bo.HealthCheck.RecoveryThreshold <= 0 {
-				bo.HealthCheck.RecoveryThreshold = 1
-			}
-			autoApplied = true
-			metrics.BackendsDefaultHealthCheckApplied.WithLabelValues(k, bo.Provider).Inc()
-			logger.Warn("auto-applied default healthcheck for backend",
-				logging.Pairs{"backend_name": k, "provider": bo.Provider})
+		} else {
+			bo.HealthCheck.Overlay(hco)
 		}
 		st, err := hc.Register(k, bo.Provider, bo.HealthCheck, c.HealthCheckHTTPClient())
 		if err != nil {
@@ -91,13 +63,6 @@ func (b Backends) StartHealthChecks(knownStatuses healthcheck.StatusLookup) (hea
 			if v := oldSt.Get(); v != healthcheck.StatusInitializing {
 				st.Set(v)
 			}
-		} else if autoApplied {
-			// Override the registration-time Initializing with Unchecked so the
-			// pool's dispatch filter accepts the target during the brief window
-			// before the first auto-probe completes. Targets the operator
-			// explicitly configured keep Initializing so they're held out
-			// until the probe confirms them.
-			st.Set(healthcheck.StatusUnchecked)
 		}
 		c.SetHealthCheckProbe(st.Prober())
 	}
diff --git a/pkg/backends/healthcheck/options/defaults.go b/pkg/backends/healthcheck/options/defaults.go
index 5856cfc6c..8535bca85 100644
--- a/pkg/backends/healthcheck/options/defaults.go
+++ b/pkg/backends/healthcheck/options/defaults.go
@@ -23,10 +23,4 @@ import (
 const (
 	// DefaultHealthCheckTimeout is the default duration for health check probes to wait before timing out
 	DefaultHealthCheckTimeout = 3 * time.Second
-
-	// DefaultAutoProbeInterval is applied when StartHealthChecks auto-installs
-	// a provider's default health-check config because the operator did not
-	// configure one. The auto-applied probe must fire on a tick or the
-	// downstream pool filter will never see a status transition.
-	DefaultAutoProbeInterval = 5 * time.Second
 )
diff --git a/pkg/observability/metrics/metrics.go b/pkg/observability/metrics/metrics.go
index cf2f31e8f..0e2791622 100644
--- a/pkg/observability/metrics/metrics.go
+++ b/pkg/observability/metrics/metrics.go
@@ -454,20 +454,6 @@ var (
 		[]string{"backend_name"},
 	)
 
-	// BackendsDefaultHealthCheckApplied counts backends for which the provider's
-	// DefaultHealthCheckConfig was auto-installed by StartHealthChecks because
-	// the operator did not configure healthcheck options. A non-zero value
-	// flags configs that depended on an absent probe and benefit from auditing.
-	BackendsDefaultHealthCheckApplied = prometheus.NewCounterVec(
-		prometheus.CounterOpts{
-			Namespace: metricNamespace,
-			Subsystem: healthSubsystem,
-			Name:      "default_config_applied_total",
-			Help:      "Count of backends that received an auto-applied default healthcheck config because none was configured.",
-		},
-		[]string{"backend_name", "provider"},
-	)
-
 	// ALBPoolAdmitsFailing flags ALB pools whose healthy_floor admits a Failing
 	// status (floor <= StatusFailing). Operators who set floor below 0 to keep
 	// traffic flowing during the Initializing window may not realize they're
@@ -482,6 +468,20 @@ var (
 		},
 		[]string{"backend_name"},
 	)
+
+	// ALBPoolFloorReset flags ALB pools whose healthy_floor was reset to 0 at
+	// startup because one or more pool members have no health check and could
+	// never reach the configured floor (>= Passing), which would otherwise
+	// empty the pool and 502 every request.
+	ALBPoolFloorReset = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Namespace: metricNamespace,
+			Subsystem: albSubsystem,
+			Name:      "pool_floor_reset",
+			Help:      "1 when an ALB pool's healthy_floor was reset to 0 because members lack health checks; 0 otherwise.",
+		},
+		[]string{"backend_name"},
+	)
 )
 
 func init() {
@@ -507,8 +507,8 @@ func init() {
 	prometheus.MustRegister(CacheIndexPanicRecovered)
 	prometheus.MustRegister(HealthHandlerPanicRecovered)
 	prometheus.MustRegister(HealthcheckStatusNotifyPanicRecovered)
-	prometheus.MustRegister(BackendsDefaultHealthCheckApplied)
 	prometheus.MustRegister(ALBPoolAdmitsFailing)
+	prometheus.MustRegister(ALBPoolFloorReset)
 	prometheus.MustRegister(CacheObjectOperations)
 	prometheus.MustRegister(CacheByteOperations)
 	prometheus.MustRegister(CacheEvents)