diff --git a/docs/alb.md b/docs/alb.md
index 562aeb412..c5d0a7709 100644
--- a/docs/alb.md
+++ b/docs/alb.md
@@ -445,7 +445,9 @@ A backend will report one of three possible health states to its ALBs: `unavaila
 
 Each ALB has a configurable `healthy_floor` value, which is the threshold for determining which pool members are included in the healthy pool, based on their instantaneous health state. The `healthy_floor` represents the minimum acceptable health state value for inclusion in the healthy pool. The default `healthy_floor` value is `0`, meaning Backends in a state `>= 0` (`unknown` and `available`) are included in the healthy pool. Setting `healthy_floor: 1` would include only `available` Backends, while a value of `-1` will include all backends in the configured pool, including those marked as `unavailable`.
 
-Backends that do not have a [health check interval](./health#example+health+check+configuration+for+use+in+alb) configured will remain in a permanent state of `unknown`. Backends will also be in an `unknown` state from the time Trickster starts until the first of any configured automated health check is completed. Note that if an ALB is configured with `healthy_floor: 1`, any pool members that are not configured with an automated health check interval will never be included in the ALB's healthy pool, as their state is permanently `0`.
+Backends that do not have a [health check interval](./health#example+health+check+configuration+for+use+in+alb) configured will remain in a permanent state of `unknown`. Backends will also be in an `unknown` state from the time Trickster starts until the first of any configured automated health check is completed. A pool member in a permanent `unknown` state can never reach `available`, so a `healthy_floor: 1` ALB whose members lack health checks would have an empty pool and return `502` for every request. To avoid that, Trickster resets such an ALB's effective floor to `0` at startup, emits a warning naming the ALB and the un-probed members, and sets the `trickster_alb_pool_floor_reset{backend_name}` gauge to `1`. Configure a health check interval on those members if you want `healthy_floor: 1` to apply.
+
+Setting `healthy_floor` below `0` admits members the probe has confirmed `unavailable`, not just members in the transient `unknown` state. If your goal is to keep traffic flowing during the cold-start window before the first probes complete, lower the pool members' `recovery_threshold` so they transition out of `unknown` faster -- don't lower the floor. When `healthy_floor < 0` Trickster emits a startup warning and sets the `trickster_alb_pool_admits_failing{backend_name}` gauge to `1`.
 
 ### Example ALB Configuration Routing Only To Known Healthy Backends
 
diff --git a/docs/health.md b/docs/health.md
index 950438ba4..c3345d739 100644
--- a/docs/health.md
+++ b/docs/health.md
@@ -102,6 +102,8 @@ backends:
       recovery_threshold: 3 # backend is healthy after 3 consecutive successes
 ```
 
+The Prometheus default probe is `/api/v1/query?query=up`. Some multi-tenant Prometheus gateways reject an unbounded `up` with `400 bad_data: "too many series found"`, which keeps the member out of any ALB pool it belongs to. Override `healthcheck.query` with a bounded expression the backend accepts (for example `query=vector(1)`) when probing such backends.
+
 ## Other Ways to Monitor Health
 
 In addition to the out-of-the-box health checks to determine up-or-down status, you may want to setup alarms and thresholds based on the metrics instrumented by Trickster. See [metrics.md](metrics.md) for collecting performance metrics about Trickster.
diff --git a/docs/metrics.md b/docs/metrics.md
index 917d55862..0cc9a546e 100644
--- a/docs/metrics.md
+++ b/docs/metrics.md
@@ -91,6 +91,14 @@ The following metrics are available for polling with any Trickster configuration
     * `operation` - the name of the operation being performed (read, write, etc.)
     * `status` - the result of the operation being performed
 
+* `trickster_alb_pool_admits_failing` (Gauge) - 1 when an ALB pool's `healthy_floor` admits members in the `unavailable` state, 0 otherwise. See [alb.md](./alb.md#health-based-backend-selection) for the recommended floor.
+  * labels:
+    * `backend_name` - the name of the configured ALB backend
+
+* `trickster_alb_pool_floor_reset` (Gauge) - 1 when an ALB pool's `healthy_floor` was reset to 0 at startup because pool members have no health check and could never reach the configured floor, 0 otherwise. See [alb.md](./alb.md#health-based-backend-selection).
+  * labels:
+    * `backend_name` - the name of the configured ALB backend
+
 ---
 
 The following metrics are available only for Caches Types whose object lifecycle Trickster manages internally (Memory, Filesystem and bbolt):
diff --git a/integration/alb_missing_healthcheck_test.go b/integration/alb_missing_healthcheck_test.go
new file mode 100644
index 000000000..be46d9db7
--- /dev/null
+++ b/integration/alb_missing_healthcheck_test.go
@@ -0,0 +1,210 @@
+/*
+ * Copyright 2018 The Trickster Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package integration
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"github.com/trickstercache/trickster/v2/integration/internal/portutil"
+)
+
+// TestALBHealthyFloorAdmitsFailingMetric verifies the warning surface for an
+// ALB whose healthy_floor admits Failing members. An operator who lowered
+// healthy_floor below 0 to keep traffic flowing during the Initializing
+// startup window also admits members the probe has confirmed broken; the
+// `trickster_alb_pool_admits_failing` gauge surfaces that misconfiguration.
+func TestALBHealthyFloorAdmitsFailingMetric(t *testing.T) {
+	healthy := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(http.StatusOK)
+		fmt.Fprint(w, `{"status":"success","data":{"version":"2.0"}}`)
+	}))
+	t.Cleanup(healthy.Close)
+	broken := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusInternalServerError)
+	}))
+	t.Cleanup(broken.Close)
+
+	ports, release := portutil.Reserve(t, 3)
+	frontPort, metricsPort, mgmtPort := ports[0], ports[1], ports[2]
+
+	yaml := fmt.Sprintf(albTestdata(t, "alb_missing_hc/floor_warn.yaml.tmpl"),
+		frontPort, metricsPort, mgmtPort, healthy.URL, broken.URL)
+	cfgPath := filepath.Join(t.TempDir(), "trickster.yaml")
+	require.NoError(t, os.WriteFile(cfgPath, []byte(yaml), 0644))
+
+	ctx, cancel := context.WithCancel(context.Background())
+	t.Cleanup(cancel)
+	release()
+	go startTrickster(t, ctx, expectedStartError{}, "-config", cfgPath)
+
+	metricsAddr := fmt.Sprintf("127.0.0.1:%d", metricsPort)
+	waitForTrickster(t, metricsAddr)
+
+	require.EventuallyWithT(t, func(collect *assert.CollectT) {
+		lines := checkTricksterMetrics(t, metricsAddr)
+		var admits, excludes string
+		for _, l := range lines {
+			if strings.HasPrefix(l, "trickster_alb_pool_admits_failing{") {
+				if strings.Contains(l, `backend_name="alb-admits-failing"`) {
+					admits = l
+				}
+				if strings.Contains(l, `backend_name="alb-excludes-failing"`) {
+					excludes = l
+				}
+			}
+		}
+		assert.True(collect, strings.HasSuffix(admits, " 1"),
+			"alb-admits-failing must report 1: %q", admits)
+		assert.True(collect, strings.HasSuffix(excludes, " 0"),
+			"alb-excludes-failing must report 0: %q", excludes)
+	}, 5*time.Second, 200*time.Millisecond)
+}
+
+// TestALBHealthyFloorResetWhenMemberHasNoHealthcheck covers #1015: an ALB with
+// healthy_floor: 1 whose only member has no health check. The member is stuck
+// Unchecked and would be permanently excluded (empty pool -> 502). Trickster
+// resets the effective floor to 0, sets trickster_alb_pool_floor_reset, and
+// keeps serving 200.
+func TestALBHealthyFloorResetWhenMemberHasNoHealthcheck(t *testing.T) {
+	vector := albTestdata(t, "alb_unavail/healthy.json")
+	origin := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(http.StatusOK)
+		fmt.Fprint(w, vector)
+	}))
+	t.Cleanup(origin.Close)
+
+	ports, release := portutil.Reserve(t, 3)
+	frontPort, metricsPort, mgmtPort := ports[0], ports[1], ports[2]
+	yaml := fmt.Sprintf(albTestdata(t, "alb_missing_hc/floor_reset.yaml.tmpl"),
+		frontPort, metricsPort, mgmtPort, origin.URL)
+	cfgPath := filepath.Join(t.TempDir(), "trickster.yaml")
+	require.NoError(t, os.WriteFile(cfgPath, []byte(yaml), 0644))
+
+	ctx, cancel := context.WithCancel(context.Background())
+	t.Cleanup(cancel)
+	release()
+	go startTrickster(t, ctx, expectedStartError{}, "-config", cfgPath)
+
+	metricsAddr := fmt.Sprintf("127.0.0.1:%d", metricsPort)
+	waitForTrickster(t, metricsAddr)
+
+	require.EventuallyWithT(t, func(collect *assert.CollectT) {
+		var line string
+		for _, l := range checkTricksterMetrics(t, metricsAddr) {
+			if strings.HasPrefix(l, "trickster_alb_pool_floor_reset{") &&
+				strings.Contains(l, `backend_name="alb-floor1"`) {
+				line = l
+			}
+		}
+		assert.True(collect, strings.HasSuffix(line, " 1"),
+			"alb-floor1 floor-reset gauge must be 1: %q", line)
+	}, 5*time.Second, 200*time.Millisecond)
+
+	// member admitted under the reset floor -> 200, not an empty-pool 502
+	frontAddr := fmt.Sprintf("127.0.0.1:%d", frontPort)
+	resp, err := http.Get(fmt.Sprintf("http://%s/alb-floor1/api/v1/query?query=up", frontAddr))
+	require.NoError(t, err)
+	defer resp.Body.Close()
+	require.Equal(t, http.StatusOK, resp.StatusCode)
+}
+
+// TestALBPoolDegradeWarnsInResponse covers thinker0's silent single-member
+// degrade: a 2-member TSM pool where one member's probe fails drops to one live
+// member. TSM still serves 200 from the survivor, but the response must carry a
+// `warnings` entry so the caller knows the merge collapsed to a single shard.
+func TestALBPoolDegradeWarnsInResponse(t *testing.T) {
+	vector := albTestdata(t, "alb_unavail/healthy.json")
+	ok := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(http.StatusOK)
+		fmt.Fprint(w, vector)
+	}))
+	t.Cleanup(ok.Close)
+	var badData atomic.Int64
+	bad := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != "/api/v1/status/buildinfo" {
+			badData.Add(1)
+		}
+		w.WriteHeader(http.StatusInternalServerError)
+	}))
+	t.Cleanup(bad.Close)
+
+	ports, release := portutil.Reserve(t, 3)
+	frontPort, metricsPort, mgmtPort := ports[0], ports[1], ports[2]
+	yaml := fmt.Sprintf(albTestdata(t, "alb_missing_hc/degrade.yaml.tmpl"),
+		frontPort, metricsPort, mgmtPort, ok.URL, bad.URL)
+	cfgPath := filepath.Join(t.TempDir(), "trickster.yaml")
+	require.NoError(t, os.WriteFile(cfgPath, []byte(yaml), 0644))
+
+	ctx, cancel := context.WithCancel(context.Background())
+	t.Cleanup(cancel)
+	release()
+	go startTrickster(t, ctx, expectedStartError{}, "-config", cfgPath)
+
+	frontAddr := fmt.Sprintf("127.0.0.1:%d", frontPort)
+	metricsAddr := fmt.Sprintf("127.0.0.1:%d", metricsPort)
+	waitForTrickster(t, metricsAddr)
+
+	var n atomic.Int64
+	require.EventuallyWithT(t, func(collect *assert.CollectT) {
+		// unique query per attempt so the cache can't mask the live merge
+		q := fmt.Sprintf("up + 0*%d", n.Add(1))
+		u := fmt.Sprintf("http://%s/alb-degrade/api/v1/query?query=%s", frontAddr, url.QueryEscape(q))
+		resp, err := http.Get(u)
+		if !assert.NoError(collect, err) {
+			return
+		}
+		defer resp.Body.Close()
+		b, _ := io.ReadAll(resp.Body)
+		if !assert.Equal(collect, http.StatusOK, resp.StatusCode, "body: %s", b) {
+			return
+		}
+		var pr struct {
+			Status   string   `json:"status"`
+			Warnings []string `json:"warnings"`
+		}
+		if !assert.NoError(collect, json.Unmarshal(b, &pr)) {
+			return
+		}
+		var found bool
+		for _, wn := range pr.Warnings {
+			if strings.Contains(wn, "1 of 2 pool members") {
+				found = true
+			}
+		}
+		assert.True(collect, found,
+			"expected degrade warning in response warnings: %v", pr.Warnings)
+	}, 6*time.Second, 200*time.Millisecond)
+
+	require.Zero(t, badData.Load(), "failing member must not receive data requests")
+}
diff --git a/integration/alb_testdata_test.go b/integration/alb_testdata_test.go
index 308658135..d28f3b31d 100644
--- a/integration/alb_testdata_test.go
+++ b/integration/alb_testdata_test.go
@@ -21,7 +21,7 @@ import (
 	"testing"
 )
 
-//go:embed testdata/alb_cache testdata/alb_tsm_correctness testdata/alb_response_headers testdata/alb_nested testdata/alb_per_path testdata/alb_unavail testdata/alb_request_headers testdata/alb_compose
+//go:embed testdata/alb_cache testdata/alb_tsm_correctness testdata/alb_response_headers testdata/alb_nested testdata/alb_per_path testdata/alb_unavail testdata/alb_missing_hc testdata/alb_request_headers testdata/alb_compose
 var albTestdataFS embed.FS
 
 func albTestdata(t testing.TB, name string) string {
diff --git a/integration/testdata/alb_missing_hc/degrade.yaml.tmpl b/integration/testdata/alb_missing_hc/degrade.yaml.tmpl
new file mode 100644
index 000000000..eee8ba57c
--- /dev/null
+++ b/integration/testdata/alb_missing_hc/degrade.yaml.tmpl
@@ -0,0 +1,43 @@
+
+frontend:
+  listen_port: %d
+metrics:
+  listen_port: %d
+mgmt:
+  listen_port: %d
+logging:
+  log_level: warn
+caches:
+  mem1:
+    provider: memory
+backends:
+  prom-ok:
+    provider: prometheus
+    origin_url: %s
+    cache_name: mem1
+    healthcheck:
+      path: /api/v1/status/buildinfo
+      query: ""
+      interval: 100ms
+      timeout: 500ms
+      failure_threshold: 1
+      recovery_threshold: 1
+  prom-bad:
+    provider: prometheus
+    origin_url: %s
+    cache_name: mem1
+    healthcheck:
+      path: /api/v1/status/buildinfo
+      query: ""
+      interval: 100ms
+      timeout: 500ms
+      failure_threshold: 1
+      recovery_threshold: 1
+  alb-degrade:
+    provider: alb
+    alb:
+      mechanism: tsm
+      output_format: prometheus
+      pool:
+        - prom-ok
+        - prom-bad
diff --git a/integration/testdata/alb_missing_hc/floor_reset.yaml.tmpl b/integration/testdata/alb_missing_hc/floor_reset.yaml.tmpl
new file mode 100644
index 000000000..1497e73e7
--- /dev/null
+++ b/integration/testdata/alb_missing_hc/floor_reset.yaml.tmpl
@@ -0,0 +1,25 @@
+
+frontend:
+  listen_port: %d
+metrics:
+  listen_port: %d
+mgmt:
+  listen_port: %d
+logging:
+  log_level: warn
+caches:
+  mem1:
+    provider: memory
+backends:
+  prom-noprobe:
+    provider: prometheus
+    origin_url: %s
+    cache_name: mem1
+  alb-floor1:
+    provider: alb
+    alb:
+      mechanism: tsm
+      output_format: prometheus
+      healthy_floor: 1
+      pool:
+        - prom-noprobe
diff --git a/integration/testdata/alb_missing_hc/floor_warn.yaml.tmpl b/integration/testdata/alb_missing_hc/floor_warn.yaml.tmpl
new file mode 100644
index 000000000..d11121ee8
--- /dev/null
+++ b/integration/testdata/alb_missing_hc/floor_warn.yaml.tmpl
@@ -0,0 +1,52 @@
+
+frontend:
+  listen_port: %d
+metrics:
+  listen_port: %d
+mgmt:
+  listen_port: %d
+logging:
+  log_level: warn
+caches:
+  mem1:
+    provider: memory
+backends:
+  prom-healthy:
+    provider: prometheus
+    origin_url: %s
+    cache_name: mem1
+    healthcheck:
+      path: /api/v1/status/buildinfo
+      query: ""
+      interval: 100ms
+      timeout: 500ms
+      failure_threshold: 1
+      recovery_threshold: 1
+  prom-broken:
+    provider: prometheus
+    origin_url: %s
+    cache_name: mem1
+    healthcheck:
+      path: /api/v1/status/buildinfo
+      query: ""
+      interval: 100ms
+      timeout: 500ms
+      failure_threshold: 1
+      recovery_threshold: 1
+  alb-admits-failing:
+    provider: alb
+    alb:
+      mechanism: tsm
+      output_format: prometheus
+      healthy_floor: -1
+      pool:
+        - prom-healthy
+        - prom-broken
+  alb-excludes-failing:
+    provider: alb
+    alb:
+      mechanism: tsm
+      output_format: prometheus
+      pool:
+        - prom-healthy
+        - prom-broken
diff --git a/pkg/backends/alb/client.go b/pkg/backends/alb/client.go
index ae09f6038..dd0ef0778 100644
--- a/pkg/backends/alb/client.go
+++ b/pkg/backends/alb/client.go
@@ -19,6 +19,7 @@ package alb
 import (
 	"fmt"
 	"net/http"
+	"strings"
 	"time"
 
 	"github.com/trickstercache/trickster/v2/pkg/backends"
@@ -34,6 +35,9 @@ import (
 	rt "github.com/trickstercache/trickster/v2/pkg/backends/providers/registry/types"
 	"github.com/trickstercache/trickster/v2/pkg/cache"
 	"github.com/trickstercache/trickster/v2/pkg/errors"
+	"github.com/trickstercache/trickster/v2/pkg/observability/logging"
+	"github.com/trickstercache/trickster/v2/pkg/observability/logging/logger"
+	"github.com/trickstercache/trickster/v2/pkg/observability/metrics"
 	authopt "github.com/trickstercache/trickster/v2/pkg/proxy/authenticator/options"
 	authreg "github.com/trickstercache/trickster/v2/pkg/proxy/authenticator/registry"
 	"github.com/trickstercache/trickster/v2/pkg/proxy/handlers"
@@ -164,6 +168,7 @@ func (c *Client) ValidateAndStartPool(clients backends.Backends, hcs healthcheck
 		return c.validateAndStartUserRouter(clients, hcs)
 	}
 	targets := make(pool.Targets, 0, len(o.Pool))
+	var unprobed []string
 	for _, n := range o.Pool {
 		tc, ok := clients[n]
 		if !ok {
@@ -174,10 +179,46 @@ func (c *Client) ValidateAndStartPool(clients backends.Backends, hcs healthcheck
 			// virtual backends (rule, alb) have no health checks; treat as passing
 			hc = healthcheck.NewStatus(n, "virtual", "", healthcheck.StatusPassing, time.Time{}, nil)
 		}
+		if mo := tc.Configuration(); !backends.IsVirtual(mo.Provider) &&
+			(mo.HealthCheck == nil || mo.HealthCheck.Interval <= 0) {
+			unprobed = append(unprobed, n)
+		}
 		targets = append(targets, pool.NewTarget(tc.Router(), hc, tc))
 	}
+	effectiveFloor := o.HealthyFloor
+	if o.HealthyFloor >= int(healthcheck.StatusPassing) && len(unprobed) > 0 {
+		// floor requires Passing, but these members have no probe interval and
+		// can never leave Unchecked, so they would be permanently excluded --
+		// which can empty the pool entirely and 502 every request. Reset to 0
+		// so unchecked members are admitted, and surface it loudly.
+		effectiveFloor = int(healthcheck.StatusUnchecked)
+		metrics.ALBPoolFloorReset.WithLabelValues(c.Name()).Set(1)
+		logger.Warn("alb healthy_floor reset to 0: pool members have no health check",
+			logging.Pairs{
+				"backend_name":  c.Name(),
+				"healthy_floor": o.HealthyFloor,
+				"members":       strings.Join(unprobed, ","),
+				"hint":          "configure healthcheck.interval on these members, or set healthy_floor: 0",
+			})
+	} else {
+		metrics.ALBPoolFloorReset.WithLabelValues(c.Name()).Set(0)
+	}
 	if pm, ok := c.handler.(types.PoolMechanism); ok {
-		pm.SetPool(pool.New(targets, o.HealthyFloor))
+		pm.SetPool(pool.New(targets, effectiveFloor))
+	}
+	if o.HealthyFloor <= int(healthcheck.StatusFailing) {
+		// floor admits members whose probe has confirmed them down; operators
+		// who lowered the floor to keep traffic flowing during the startup
+		// Initializing window may not realize Failing slips in too.
+		metrics.ALBPoolAdmitsFailing.WithLabelValues(c.Name()).Set(1)
+		logger.Warn("alb healthy_floor admits members in Failing state",
+			logging.Pairs{
+				"backend_name":  c.Name(),
+				"healthy_floor": o.HealthyFloor,
+				"hint":          "set healthy_floor: 0 to exclude probed-failing members",
+			})
+	} else {
+		metrics.ALBPoolAdmitsFailing.WithLabelValues(c.Name()).Set(0)
 	}
 	return nil
 }
diff --git a/pkg/backends/alb/mech/tsm/time_series_merge.go b/pkg/backends/alb/mech/tsm/time_series_merge.go
index 6b8cfcf5e..567798dc2 100644
--- a/pkg/backends/alb/mech/tsm/time_series_merge.go
+++ b/pkg/backends/alb/mech/tsm/time_series_merge.go
@@ -17,6 +17,7 @@
 package tsm
 
 import (
+	"fmt"
 	"math"
 	"net/http"
 	"strconv"
@@ -64,6 +65,10 @@ type handler struct {
 	// poolVersion increments on every SetPool so cached pool-derived data
 	// (stripKeys) can be invalidated without locking.
 	poolVersion atomic.Uint64
+	// degradeActive is true while a configured-multi-member pool is dispatching
+	// with only one live member. It throttles the operator WARN to once per
+	// healthy->degraded transition instead of once per request.
+	degradeActive atomic.Bool
 	// cachedStripKeys memoizes the stripKeys slice across requests as long
 	// as the pool hasn't been replaced. Hot path is a single atomic load
 	// plus a uint64 compare.
@@ -320,16 +325,41 @@ func (h *handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 		stripKeys = h.computeStripKeys(hl)
 	}
 
+	// A configured multi-member pool serving from a single live member is
+	// degraded: the merge has silently collapsed to one shard. Warn once per
+	// healthy->degraded transition (not per request) and route through the
+	// merge path so the warning reaches the response `warnings` field.
+	configured := p.ConfiguredLen()
+	degraded := configured > 1 && l == 1
+	if degraded {
+		if h.degradeActive.CompareAndSwap(false, true) {
+			bn := ""
+			if rsc.BackendOptions != nil {
+				bn = rsc.BackendOptions.Name
+			}
+			logger.Warn("alb tsm pool degraded to single live member",
+				logging.Pairs{"backend_name": bn, "configured": configured, "live": l})
+		}
+		dw := fmt.Sprintf("trickster: served from 1 of %d pool members; results may be incomplete", configured)
+		if warnMsg == "" {
+			warnMsg = dw
+		} else {
+			warnMsg += "; " + dw
+		}
+	} else {
+		h.degradeActive.Store(false)
+	}
+
 	// Single-live-member fast path: with one shard there is nothing to merge
 	// or dedup against, so the lone backend's response IS the answer. Skip
 	// only when label stripping or dual-query rewriting would still need to
 	// happen (D1 covers the strip case; weighted-avg covers the dual-query
-	// case). Otherwise the merge path's OnResult stripping handles solo pools
-	// correctly, and degraded N-pools where N-1 are unhealthy now return the
-	// surviving member's response directly instead of 502'ing on a one-shard
-	// merge that has no peer to dedup or cross-merge against.
-	// dual-query still needs merge even for one healthy target; one-shard fast path requires all three conditions
-	if l == 1 && len(stripKeys) == 0 && !needsDualQuery {
+	// case), or when the pool is degraded (configured > 1, one live) so the
+	// merge path can attach the degrade warning. Otherwise the merge path's
+	// OnResult stripping handles solo pools correctly, and genuinely
+	// single-member pools return the lone response directly instead of 502'ing
+	// on a one-shard merge that has no peer to dedup or cross-merge against.
+	if l == 1 && len(stripKeys) == 0 && !needsDualQuery && !degraded {
 		defaultHandler.ServeHTTP(w, r)
 		return
 	}
diff --git a/pkg/backends/alb/options/options.go b/pkg/backends/alb/options/options.go
index 97023d0eb..7551fe1b5 100644
--- a/pkg/backends/alb/options/options.go
+++ b/pkg/backends/alb/options/options.go
@@ -38,12 +38,14 @@ type Options struct {
 	MechanismName string `yaml:"mechanism,omitempty"`
 	// Pool provides the list of backend names to be used by the load balancer
 	Pool []string `yaml:"pool,omitempty"`
-	// HealthyFloor is the minimum health check status value to be considered Available in the pool
-	// -1 : all pool members are Available regardless of health check status
-	//  0 (default) : pool members with status of unknown (0) or healthy (1) are Available
-	//  1 : only pool members with status of healthy (1) are Available
-	// unknown means the first hc hasn't returned yet,
-	// or (more likely) HealthCheck Interval on target backend is not set
+	// HealthyFloor is the minimum health check status value admitted to the pool.
+	// Values below 0 admit members the probe has confirmed Failing; only set
+	// this below 0 if you intend to route to known-broken upstreams.
+	// -1 : all pool members admitted, including Failing
+	//  0 (default) : Unknown (0) and Healthy (1) admitted, Failing rejected
+	//  1 : only Healthy (1) admitted
+	// Unknown means the first health check hasn't returned yet, or the target
+	// backend has no health check interval configured.
 	HealthyFloor int `yaml:"healthy_floor,omitempty"`
 	// MaxCaptureBytes overrides the backend-level max_capture_bytes for this
 	// ALB's fanout members. Set this when the ALB's expected response shape
diff --git a/pkg/backends/alb/pool/pool.go b/pkg/backends/alb/pool/pool.go
index 583239758..209e87f5c 100644
--- a/pkg/backends/alb/pool/pool.go
+++ b/pkg/backends/alb/pool/pool.go
@@ -32,6 +32,10 @@ type Pool interface {
 	// between a status flip and the asynchronous healthy-list refresh, so it
 	// is the correct method for request dispatch.
 	Targets() Targets
+	// ConfiguredLen returns the number of pool members as configured, regardless
+	// of current health. Mechanisms compare this against len(Targets()) to
+	// detect a pool degraded to a subset of its configured members.
+	ConfiguredLen() int
 	// SetHealthy seeds the pool's healthy set from a handler list. Intended
 	// for tests and bootstrap paths that don't drive status updates through
 	// healthcheck subscribers.
@@ -105,6 +109,10 @@ func (p *pool) snapshot() Targets {
 	return nil
 }
 
+func (p *pool) ConfiguredLen() int {
+	return len(p.targets)
+}
+
 func (p *pool) Targets() Targets {
 	if lt := p.liveTargets.Load(); lt != nil && !p.refreshPending.Load() {
 		cached := *lt
diff --git a/pkg/backends/backends.go b/pkg/backends/backends.go
index 28f97de06..c4bc618b6 100644
--- a/pkg/backends/backends.go
+++ b/pkg/backends/backends.go
@@ -40,8 +40,8 @@ func (b Backends) StartHealthChecks(knownStatuses healthcheck.StatusLookup) (hea
 		}
 		if IsVirtual(bo.Provider) {
 			// Virtual backends have no upstream to probe; register a synthetic
-			// passing status so nested ALBs surface in the health page and in
-			// outer pools' availablePoolMembers (issue #996).
+			// passing status so they surface in the health page and in outer
+			// ALB pool reporting.
 			hc.RegisterVirtual(k, bo.Provider)
 			continue
 		}
diff --git a/pkg/backends/healthcheck/healthcheck.go b/pkg/backends/healthcheck/healthcheck.go
index 6c70f426f..5cb2ad01a 100644
--- a/pkg/backends/healthcheck/healthcheck.go
+++ b/pkg/backends/healthcheck/healthcheck.go
@@ -32,8 +32,8 @@ type HealthChecker interface {
 	// Register a health check Target
 	Register(name string, description string, options *ho.Options, client *http.Client) (*Status, error)
 	// RegisterVirtual records a synthetic always-passing Status for a virtual
-	// backend (rule, alb) that has no upstream to probe. Without this,
-	// nested ALBs render as "nc" on the health page (issue #996).
+	// backend (rule, alb) that has no upstream to probe, so it surfaces in
+	// the health page and in outer ALB pool reporting.
 	RegisterVirtual(name, description string) *Status
 	// Remove a health check Target
 	Unregister(name string)
diff --git a/pkg/observability/metrics/metrics.go b/pkg/observability/metrics/metrics.go
index 010ef87c3..0e2791622 100644
--- a/pkg/observability/metrics/metrics.go
+++ b/pkg/observability/metrics/metrics.go
@@ -453,6 +453,35 @@ var (
 		},
 		[]string{"backend_name"},
 	)
+
+	// ALBPoolAdmitsFailing flags ALB pools whose healthy_floor admits a Failing
+	// status (floor <= StatusFailing). Operators who set floor below 0 to keep
+	// traffic flowing during the Initializing window may not realize they're
+	// also admitting members the probe has confirmed broken; the gauge surfaces
+	// that misconfiguration without spamming logs.
+	ALBPoolAdmitsFailing = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Namespace: metricNamespace,
+			Subsystem: albSubsystem,
+			Name:      "pool_admits_failing",
+			Help:      "1 when an ALB pool's healthy_floor admits members in Failing state; 0 otherwise.",
+		},
+		[]string{"backend_name"},
+	)
+
+	// ALBPoolFloorReset flags ALB pools whose healthy_floor was reset to 0 at
+	// startup because one or more pool members have no health check and could
+	// never reach the configured floor (>= Passing), which would otherwise
+	// empty the pool and 502 every request.
+	ALBPoolFloorReset = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Namespace: metricNamespace,
+			Subsystem: albSubsystem,
+			Name:      "pool_floor_reset",
+			Help:      "1 when an ALB pool's healthy_floor was reset to 0 because members lack health checks; 0 otherwise.",
+		},
+		[]string{"backend_name"},
+	)
 )
 
 func init() {
@@ -478,6 +507,8 @@ func init() {
 	prometheus.MustRegister(CacheIndexPanicRecovered)
 	prometheus.MustRegister(HealthHandlerPanicRecovered)
 	prometheus.MustRegister(HealthcheckStatusNotifyPanicRecovered)
+	prometheus.MustRegister(ALBPoolAdmitsFailing)
+	prometheus.MustRegister(ALBPoolFloorReset)
 	prometheus.MustRegister(CacheObjectOperations)
 	prometheus.MustRegister(CacheByteOperations)
 	prometheus.MustRegister(CacheEvents)
diff --git a/pkg/proxy/engines/deltaproxycache_chunk_test.go b/pkg/proxy/engines/deltaproxycache_chunk_test.go
index 85c30202b..23d442b71 100644
--- a/pkg/proxy/engines/deltaproxycache_chunk_test.go
+++ b/pkg/proxy/engines/deltaproxycache_chunk_test.go
@@ -605,6 +605,7 @@ func TestDeltaProxyCacheRequestRangeMissChunks(t *testing.T) {
 	rsc.CacheConfig.Provider = "test"
 
 	o.FastForwardDisable = true
+	o.TimeseriesRetention = 1 << 20 // pin against wallclock drift on fixed fixture
 
 	step := time.Duration(3600) * time.Second
 
@@ -753,6 +754,7 @@ func TestDeltaProxyCacheRequestRangeMissChunks_CrossBucket(t *testing.T) {
 	o := rsc.BackendOptions
 	rsc.CacheConfig.Provider = "test"
 	o.FastForwardDisable = true
+	o.TimeseriesRetention = 1 << 20 // pin against wallclock drift on fixed fixture
 
 	step := time.Duration(3600) * time.Second
 
@@ -835,6 +837,7 @@ func TestDeltaProxyCacheRequestRangeMissChunks_CrossBucketPreservesPriorChunks(t
 	o := rsc.BackendOptions
 	rsc.CacheConfig.Provider = "test"
 	o.FastForwardDisable = true
+	o.TimeseriesRetention = 1 << 20 // pin against wallclock drift on fixed fixture
 
 	step := time.Duration(3600) * time.Second
 	now := time.Date(2026, 4, 20, 11, 0, 0, 0, time.UTC)