diff --git a/cmd/stellar-rpc/internal/events/extract.go b/cmd/stellar-rpc/internal/events/extract.go
index b7055c4de..1c1c1b865 100644
--- a/cmd/stellar-rpc/internal/events/extract.go
+++ b/cmd/stellar-rpc/internal/events/extract.go
@@ -34,7 +34,8 @@ import (
 // (ingest.ExtractLedgerEvents — one TxProcessing walk yields hash + events
 // together). This function adds only the RPC-specific Payload shape, the
 // Stage→(TxIdx, OpIdx) cursor-sentinel mapping, EventIdx, and the cursor
-// ordering.
+// ordering — all in PayloadsFromLedgerEvents, over which this is the thin
+// view-reading wrapper.
 func LCMViewToPayloads(lcm xdr.LedgerCloseMetaView) ([]Payload, error) {
 	ledgerSeq, err := lcm.LedgerSequence()
 	if err != nil {
@@ -44,11 +45,26 @@ func LCMViewToPayloads(lcm xdr.LedgerCloseMetaView) ([]Payload, error) {
 	if err != nil {
 		return nil, err
 	}
-
 	txEvents, err := ingest.ExtractLedgerEvents(lcm)
 	if err != nil {
 		return nil, err
 	}
+	return PayloadsFromLedgerEvents(txEvents, ledgerSeq, ledgerClosedAt)
+}
+
+// PayloadsFromLedgerEvents shapes an already-extracted per-transaction event
+// slice (ingest.ExtractLedgerEvents output) into cursor-ordered Payloads. It is
+// the body of LCMViewToPayloads minus the SDK walk, so a caller that already
+// holds the txEvents — the hot ingest path, which also needs the paired tx
+// hashes (txEvents[i].Hash) — can feed BOTH txhash and events from ONE
+// ExtractLedgerEvents call instead of walking TxProcessing twice. ledgerSeq and
+// ledgerClosedAt are the view's header values (cheap reads, not a walk). The
+// cursor ordering and EventIdx assignment are IDENTICAL to what LCMViewToPayloads
+// produced inline, so event IDs are unchanged across the refactor.
+func PayloadsFromLedgerEvents(
+	txEvents []ingest.LedgerTransactionEvents, ledgerSeq uint32, ledgerClosedAt int64,
+) ([]Payload, error) {
+	var err error
 	at := func(i int) (uint32, xdr.Hash) {
 		return uint32(i) + 1, xdr.Hash(txEvents[i].Hash) //nolint:gosec // 1-based, matching ingest reader's tx.Index
 	}
diff --git a/cmd/stellar-rpc/internal/fullhistory/backfill/hotsource_test.go b/cmd/stellar-rpc/internal/fullhistory/backfill/hotsource_test.go
new file mode 100644
index 000000000..fc67d74b1
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/backfill/hotsource_test.go
@@ -0,0 +1,85 @@
+package backfill
+
+import (
+	"context"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger"
+)
+
+// seedReadyHotChunk brackets a "ready" hot DB for c (transient -> create -> ready)
+// and commits ONE ledgers-CF entry at seq `top` so MaxCommittedSeq reads back
+// `top`. It writes just the ledgers CF (the only CF the completeness gate reads)
+// and closes the store — hygiene, not a lock requirement: a read-only open takes
+// no RocksDB LOCK and would succeed against a writer-held DB too. The daemon opens
+// this exact on-disk DB by its Layout path.
+func seedReadyHotChunk(t *testing.T, cat *catalog.Catalog, c chunk.ID, top uint32) {
+	t.Helper()
+	require.NoError(t, cat.PutHotTransient(c))
+	store, err := rocksdb.New(rocksdb.Config{
+		Path:           cat.Layout().HotChunkPath(c),
+		ColumnFamilies: hotchunk.ColumnFamilies(),
+		Logger:         silentLogger(),
+	})
+	require.NoError(t, err)
+	h := ledger.NewWithStore(store)
+	require.NoError(t, store.Batch(func(b *rocksdb.BatchWriter) error {
+		return h.AddLedgerToBatch(b, ledger.Entry{Seq: top, Bytes: []byte("ledger")})
+	}))
+	require.NoError(t, store.Close())
+	require.NoError(t, cat.FlipHotReady(c))
+}
+
+// TestBackfillSource_HotComplete: a "ready" hot DB whose committed frontier
+// reaches the chunk's last ledger IS the source — backfillSource returns it with
+// NO backend configured, so success alone proves the hot branch was taken.
+func TestBackfillSource_HotComplete(t *testing.T) {
+	cat, _ := testCatalog(t)
+	cfg := testProcessConfig(t, cat) // no Backend
+
+	c := chunk.ID(0)
+	seedReadyHotChunk(t, cat, c, c.LastLedger()) // complete: maxSeq == last ledger
+
+	src, closeSrc, err := backfillSource(context.Background(), c, catalog.AllArtifacts(), cfg)
+	require.NoError(t, err, "complete hot tier is used; no bulk backend needed")
+	require.NotNil(t, src)
+	require.NoError(t, closeSrc())
+}
+
+// TestBackfillSource_HotIncompleteFallsThrough: a "ready" but incomplete hot DB is
+// staleness — backfillSource falls past it. With no pack and no backend, that
+// fall-through surfaces as the "no bulk backend" error (not a hot-tier error).
+func TestBackfillSource_HotIncompleteFallsThrough(t *testing.T) {
+	cat, _ := testCatalog(t)
+	cfg := testProcessConfig(t, cat) // no Backend, no frozen pack
+
+	c := chunk.ID(0)
+	seedReadyHotChunk(t, cat, c, c.FirstLedger()) // incomplete: maxSeq < last ledger
+
+	_, _, err := backfillSource(context.Background(), c, catalog.AllArtifacts(), cfg)
+	require.Error(t, err)
+	require.Contains(t, err.Error(), "no bulk backend",
+		"an incomplete hot tier falls through; it is not itself an error")
+}
+
+// TestBackfillSource_HotReadyButDirMissing: a "ready" key whose hot DB won't open
+// (dir gone) is an ordinary restartable error — the read-only open never
+// auto-heals it into a fresh empty DB.
+func TestBackfillSource_HotReadyButDirMissing(t *testing.T) {
+	cat, _ := testCatalog(t)
+	cfg := testProcessConfig(t, cat)
+
+	c := chunk.ID(0)
+	require.NoError(t, cat.PutHotTransient(c))
+	require.NoError(t, cat.FlipHotReady(c)) // ready key, NO dir on disk
+
+	_, _, err := backfillSource(context.Background(), c, catalog.AllArtifacts(), cfg)
+	require.Error(t, err)
+	require.Contains(t, err.Error(), "won't open")
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/backfill/process.go b/cmd/stellar-rpc/internal/fullhistory/backfill/process.go
index d688737bc..3e059b688 100644
--- a/cmd/stellar-rpc/internal/fullhistory/backfill/process.go
+++ b/cmd/stellar-rpc/internal/fullhistory/backfill/process.go
@@ -17,6 +17,7 @@ import (
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/ingest"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger"
 )
 
@@ -84,11 +85,12 @@ func processChunk(ctx context.Context, chunkID chunk.ID, artifacts catalog.Artif
 
 	// Choose the source before marking "freezing": a source error (a missing pack
 	// or a coverage timeout) must not leave "freezing" debris for a chunk we then
-	// refuse to produce.
-	src, err := backfillSource(ctx, chunkID, artifacts, cfg)
+	// refuse to produce. closeSource releases any opened hot DB after the pass.
+	src, closeSource, err := backfillSource(ctx, chunkID, artifacts, cfg)
 	if err != nil {
 		return err
 	}
+	defer func() { _ = closeSource() }()
 
 	// The one-write protocol, straight-line (see catalog_protocol.go header). The
 	// // one-write: labels keep the four steps greppable without a wrapper.
@@ -101,9 +103,9 @@ func processChunk(ctx context.Context, chunkID chunk.ID, artifacts catalog.Artif
 	// one-write:create — materialize this chunk's cold artifacts from the resolved
 	// source's raw ledger iterator. WriteColdChunk is source-blind.
 	dirs := ingest.ColdDirs{
-		Ledgers: layout.LedgersRoot(),
-		Txhash:  layout.TxHashRawRoot(),
-		Events:  layout.EventsRoot(),
+		LedgerPack: layout.LedgerPackPath(chunkID),
+		TxhashBin:  layout.TxHashBinPath(chunkID),
+		EventsDir:  layout.EventsBucketDir(chunkID),
 	}
 	raw := src.RawLedgers(ctx, ledgerbackend.BoundedRange(chunkID.FirstLedger(), chunkID.LastLedger()))
 	if rerr := ingest.WriteColdChunk(
@@ -130,37 +132,53 @@ func processChunk(ctx context.Context, chunkID chunk.ID, artifacts catalog.Artif
 	return nil
 }
 
-// backfillSource picks a chunk's ledger source as a bare ledgerbackend.LedgerStream:
-//  1. the frozen local .pack, unless ledgers is itself requested (circular);
-//  2. the bulk backend (cfg.Backend), gated by a bounded waitForCoverage on its Tip.
-//
-// The local pack needs no coverage wait (it is complete) and no close (its reader
-// is opened and closed per RawLedgers call). The bulk backend is caller-owned (the
-// daemon Closes it), so backfillSource returns no closer either.
+// backfillSource picks a chunk's ledger source (+ a closer for an opened hot DB;
+// no-op otherwise), in preference order:
+//  1. a ready, COMPLETE hot tier (decision (a): maxCommittedSeq >= last ledger);
+//     incomplete-but-present is staleness that falls through (re-derivation
+//     recovers it); a "ready" DB that won't open is an ordinary restartable error
+//     (read-only open, never auto-healed);
+//  2. the frozen local .pack, unless ledgers is itself requested (circular);
+//  3. the bulk backend, gated by a bounded waitForCoverage on its Tip.
 func backfillSource(
 	ctx context.Context, chunkID chunk.ID, artifacts catalog.ArtifactSet, cfg ProcessConfig,
-) (ledgerbackend.LedgerStream, error) {
+) (ledgerbackend.LedgerStream, func() error, error) {
+	noClose := func() error { return nil }
 	cat := cfg.Catalog
 	layout := cat.Layout()
 
+	// (1) Hot branch: only when the hot key is "ready". A "transient" key (mid-op
+	// or recovery-demoted) is not a read source; an absent key falls through.
+	src, closer, used, herr := resolveHotSource(chunkID, cfg)
+	if herr != nil {
+		return nil, noClose, herr // hot-DB open failure — restartable, never auto-healed
+	}
+	if used {
+		cfg.Logger.Debugf("backfillSource: chunk %s from complete hot tier", chunkID)
+		return src, closer, nil
+	}
+
+	// (2) Frozen local .pack, only when ledgers is not requested (producing ledgers
+	// from the pack we'd write would be circular).
 	ledgersState, err := cat.State(chunkID, geometry.KindLedgers)
 	if err != nil {
-		return nil, fmt.Errorf("read ledgers state chunk %s: %w", chunkID, err)
+		return nil, noClose, fmt.Errorf("read ledgers state chunk %s: %w", chunkID, err)
 	}
 	if ledgersState == geometry.StateFrozen && !artifacts.Has(geometry.KindLedgers) {
 		packPath := layout.LedgerPackPath(chunkID)
 		if _, serr := os.Stat(packPath); serr == nil {
 			cfg.Logger.Debugf("backfillSource: chunk %s re-derived from frozen .pack", chunkID)
-			return ledger.NewPackStream(packPath), nil
+			return ledger.NewPackStream(packPath), noClose, nil
 		}
 		// frozen ⇒ file exists; a missing pack is a bug, not a re-download trigger.
-		return nil, fmt.Errorf(
+		return nil, noClose, fmt.Errorf(
 			"chunk %s ledgers is %q but pack file is missing at %s",
 			chunkID, geometry.StateFrozen, packPath)
 	}
 
+	// (3) Bulk backend — the only source for a chunk with no local copy.
 	if cfg.Backend == nil {
-		return nil, fmt.Errorf(
+		return nil, noClose, fmt.Errorf(
 			"chunk %s has no local copy and no bulk backend is configured", chunkID)
 	}
 	// The coverage wait is mandatory before reading the bulk backend: the freeze
@@ -169,8 +187,59 @@ func backfillSource(
 	if werr := waitForCoverage(
 		ctx, cfg.Backend, chunkID.LastLedger(), defaultCoveragePollInterval, defaultCoverageTimeout,
 	); werr != nil {
-		return nil, werr
+		return nil, noClose, werr
 	}
 	cfg.Logger.Debugf("backfillSource: chunk %s from bulk backend", chunkID)
-	return cfg.Backend, nil
+	return cfg.Backend, noClose, nil
+}
+
+// resolveHotSource applies the hot branch end to end: it reads the hot key and,
+// only when "ready", tries the hot tier. used=true → src/closer are the hot
+// source; used=false → no "ready" key or present-but-incomplete (caller falls
+// through); err → a "ready" DB that won't open (restartable). Keeps backfillSource's
+// hot branch flat.
+func resolveHotSource(
+	chunkID chunk.ID, cfg ProcessConfig,
+) (ledgerbackend.LedgerStream, func() error, bool, error) {
+	hotState, err := cfg.Catalog.HotState(chunkID)
+	if err != nil {
+		return nil, nil, false, fmt.Errorf("read hot state chunk %s: %w", chunkID, err)
+	}
+	if hotState != geometry.HotReady {
+		return nil, nil, false, nil // "transient"/absent: not a read source
+	}
+	return tryHotSource(chunkID, cfg)
+}
+
+// tryHotSource handles the hot branch under a "ready" key: it opens the chunk's
+// shared hot DB read-only (never auto-healed) straight from its Layout path.
+// used=true when present AND complete; used=false when present-but-incomplete
+// (staleness, caller falls through); err when a "ready" DB is absent or unopenable
+// — an ordinary restartable error, detected lazily on the open.
+func tryHotSource(chunkID chunk.ID, cfg ProcessConfig) (ledgerbackend.LedgerStream, func() error, bool, error) {
+	dir := cfg.Catalog.Layout().HotChunkPath(chunkID)
+	// Open the chunk's shared multi-CF DB READ-ONLY: the freeze reads its ledgers to
+	// re-derive the cold artifacts and must never mutate it (the read-only open
+	// replays any un-synced WAL into memtables but persists nothing). An absent or
+	// gutted "ready" DB fails the open — restartable, never auto-created.
+	hot, err := hotchunk.OpenReadOnly(dir, chunkID, cfg.Logger)
+	if err != nil {
+		return nil, nil, false, fmt.Errorf("chunk %s is ready but its hot DB won't open: %w", chunkID, err)
+	}
+	maxSeq, present, merr := hot.MaxCommittedSeq()
+	if merr != nil {
+		_ = hot.Close()
+		// A read error against an opened DB: the DB opened but cannot answer its
+		// own progress. Surface it (restartable), don't treat as staleness.
+		return nil, nil, false, fmt.Errorf("chunk %s: read hot max committed seq: %w", chunkID, merr)
+	}
+	// decision (a): complete iff the single DB's maxCommittedSeq reaches the chunk's
+	// last ledger. An empty DB (present==false) cannot be complete.
+	if present && maxSeq >= chunkID.LastLedger() {
+		return hot.Source(), hot.Close, true, nil
+	}
+	// Present but incomplete: legitimate staleness — caller falls through.
+	cfg.Logger.Debugf("backfillSource: chunk %s hot tier present but incomplete; falling through", chunkID)
+	_ = hot.Close()
+	return nil, nil, false, nil
 }
diff --git a/cmd/stellar-rpc/internal/fullhistory/backfill/process_test.go b/cmd/stellar-rpc/internal/fullhistory/backfill/process_test.go
index 78cf6540c..5c917c005 100644
--- a/cmd/stellar-rpc/internal/fullhistory/backfill/process_test.go
+++ b/cmd/stellar-rpc/internal/fullhistory/backfill/process_test.go
@@ -330,8 +330,9 @@ func TestBackfillSource_PrefersFrozenPackWhenLFSNotRequested(t *testing.T) {
 	cfg.Backend = bulk
 
 	set := catalog.NewArtifactSet(geometry.KindEvents, geometry.KindTxHash) // ledgers NOT requested
-	src, err := backfillSource(context.Background(), chunkID, set, cfg)
+	src, closeSrc, err := backfillSource(context.Background(), chunkID, set, cfg)
 	require.NoError(t, err)
+	defer func() { require.NoError(t, closeSrc()) }()
 	// It is a pack stream (re-derivation without download); the bulk backend was
 	// not consulted.
 	require.IsType(t, ledger.NewPackStream(""), src)
@@ -354,8 +355,9 @@ func TestBackfillSource_DoesNotUsePackWhenLFSRequested(t *testing.T) {
 
 	// ledgers IS requested — the pack branch is skipped (circular), so it goes to
 	// the bulk backend (whose tip covers the chunk, so the wait passes).
-	src, err := backfillSource(context.Background(), chunkID, catalog.AllArtifacts(), cfg)
+	src, closeSrc, err := backfillSource(context.Background(), chunkID, catalog.AllArtifacts(), cfg)
 	require.NoError(t, err)
+	defer func() { require.NoError(t, closeSrc()) }()
 	require.Same(t, bulk, src)
 }
 
@@ -369,7 +371,7 @@ func TestBackfillSource_BulkCoverageErrorAborts(t *testing.T) {
 	chunkID := chunk.ID(0)
 	cfg.Backend = &fakeBackend{t: t, gen: zeroTxLCMBytes, tipErr: errors.New("boom")}
 
-	_, err := backfillSource(context.Background(), chunkID, catalog.AllArtifacts(), cfg)
+	_, _, err := backfillSource(context.Background(), chunkID, catalog.AllArtifacts(), cfg)
 	require.Error(t, err)
 	require.Contains(t, err.Error(), "backend tip query")
 }
@@ -379,7 +381,7 @@ func TestBackfillSource_NoBackendConfigured(t *testing.T) {
 	cfg := testProcessConfig(t, cat)
 	cfg.Backend = nil
 
-	_, err := backfillSource(context.Background(), chunk.ID(0), catalog.AllArtifacts(), cfg)
+	_, _, err := backfillSource(context.Background(), chunk.ID(0), catalog.AllArtifacts(), cfg)
 	require.Error(t, err)
 	require.Contains(t, err.Error(), "no bulk backend")
 }
@@ -453,7 +455,7 @@ func writeRealPack(t *testing.T, cat *catalog.Catalog, chunkID chunk.ID) {
 	stream := &fullChunkStream{t: t, gen: zeroTxLCMBytes}
 	raw := stream.RawLedgers(context.Background(),
 		ledgerbackend.BoundedRange(chunkID.FirstLedger(), chunkID.LastLedger()))
-	dirs := ingest.ColdDirs{Ledgers: cat.Layout().LedgersRoot()}
+	dirs := ingest.ColdDirs{LedgerPack: cat.Layout().LedgerPackPath(chunkID)}
 	require.NoError(t, ingest.WriteColdChunk(
 		context.Background(), silentLogger(), chunkID, raw, dirs,
 		ingest.NopSink{}, ingest.Config{Ledgers: true}))
diff --git a/cmd/stellar-rpc/internal/fullhistory/backfill/recorder_test.go b/cmd/stellar-rpc/internal/fullhistory/backfill/recorder_test.go
index 23d440123..74261ef90 100644
--- a/cmd/stellar-rpc/internal/fullhistory/backfill/recorder_test.go
+++ b/cmd/stellar-rpc/internal/fullhistory/backfill/recorder_test.go
@@ -41,7 +41,11 @@ func (r *recordingMetrics) Prune(count int, d time.Duration) {
 	r.prune = append(r.prune, pruneRec{count, d})
 }
 
-func (*recordingMetrics) LastCommitted(uint32, uint32) {}
-func (*recordingMetrics) BackfillPass(time.Duration)   {}
+func (*recordingMetrics) LastCommitted(uint32)       {}
+func (*recordingMetrics) RetentionFloor(uint32)      {}
+func (*recordingMetrics) ChunkBoundary()             {}
+func (*recordingMetrics) BackfillPass(time.Duration) {}
+func (*recordingMetrics) LiveHotChunks(int)          {}
+func (*recordingMetrics) Discard(int, time.Duration) {}
 
 var _ observability.Metrics = (*recordingMetrics)(nil)
diff --git a/cmd/stellar-rpc/internal/fullhistory/backfill/resolve.go b/cmd/stellar-rpc/internal/fullhistory/backfill/resolve.go
index 8ea961990..6814e66c0 100644
--- a/cmd/stellar-rpc/internal/fullhistory/backfill/resolve.go
+++ b/cmd/stellar-rpc/internal/fullhistory/backfill/resolve.go
@@ -30,11 +30,6 @@ type coverageRange struct {
 	Lo, Hi chunk.ID
 }
 
-// covers reports whether this range fully contains other (other ⊆ this).
-func (r coverageRange) covers(other coverageRange) bool {
-	return r.Lo <= other.Lo && r.Hi >= other.Hi
-}
-
 // resolve diffs the desired state (every artifact of [rangeStart, rangeEnd] durable)
 // against the catalog, emitting a Plan. A pure read — recomputes from durable keys
 // every run, so a restart re-plans cleanly.
@@ -98,12 +93,11 @@ func resolveTxHashIndex(
 		Hi: min(txLayout.LastChunk(w), rangeEnd), // capped by range end
 	}
 
-	frozen, hasFrozen, err := cat.FrozenTxHashIndex(w)
+	covered, err := cat.FrozenIndexCoversRange(w, desired.Lo, desired.Hi)
 	if err != nil {
 		return IndexBuild{}, false, err
 	}
-	stored := coverageRange{Lo: frozen.Lo, Hi: frozen.Hi}
-	if hasFrozen && stored.covers(desired) {
+	if covered {
 		// Frozen coverage already spans desired, so no rebuild is due — steady state, a
 		// risen floor, or a finalized window. Any non-frozen leftover a crashed build
 		// stranded (a superseded "pruning"/"freezing" coverage or a demoted .bin) is the
diff --git a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog.go b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog.go
index cd63ac64c..bbdf9f27f 100644
--- a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog.go
+++ b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog.go
@@ -3,12 +3,13 @@ package catalog
 import (
 	"errors"
 	"fmt"
+	"slices"
 	"strconv"
 
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/metastore"
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
 )
 
 // Catalog is the streaming daemon's view of durable state. It WRAPS
@@ -55,6 +56,20 @@ func (c *Catalog) State(chunkID chunk.ID, kind geometry.Kind) (geometry.State, e
 	return geometry.State(v), nil
 }
 
+// HotState returns the HotState of a chunk's hot-DB key, or empty (key absent).
+// The key's mere existence (any value) marks the chunk as owned by ingestion, and
+// most consumers branch on the value: the freeze source and last-committed
+// derivation treat only "ready" as usable (see ReadyHotChunkKeys), and
+// openHotDBForChunk picks its recovery action from it. Only the discard scan is
+// value-blind (any state means "a hot dir may exist, sweep it").
+func (c *Catalog) HotState(chunkID chunk.ID) (geometry.HotState, error) {
+	v, ok, err := c.get(geometry.HotChunkKey(chunkID))
+	if err != nil || !ok {
+		return "", err
+	}
+	return geometry.HotState(v), nil
+}
+
 // ---------------------------------------------------------------------------
 // Scans. Every "find work" operation iterates keys via PrefixScan; nothing
 // lists a directory. Results are returned sorted so callers need no second
@@ -84,6 +99,19 @@ func (c *Catalog) TxHashIndexKeys(w geometry.TxHashIndexID) ([]geometry.TxHashIn
 	return c.txhashIndexKeysByPrefix(geometry.TxHashIndexPrefixFor(w))
 }
 
+// HotChunkKeys returns every hot-DB chunk id (value-blind), sorted ascending.
+// The highest is the live chunk — the ingestion/lifecycle partition boundary.
+func (c *Catalog) HotChunkKeys() ([]chunk.ID, error) {
+	return c.hotChunkKeysWith(nil)
+}
+
+// ReadyHotChunkKeys returns only the chunks whose hot-DB key is "ready", sorted
+// ascending. The last-committed ledger counts only these — a "transient" key never advances
+// the bound, which lets recovery demote any hot key without disturbing it.
+func (c *Catalog) ReadyHotChunkKeys() ([]chunk.ID, error) {
+	return c.hotChunkKeysWith(func(s geometry.HotState) bool { return s == geometry.HotReady })
+}
+
 // AllTxHashIndexKeys is TxHashIndexKeys across all indexes.
 func (c *Catalog) AllTxHashIndexKeys() ([]geometry.TxHashIndexCoverage, error) {
 	return c.txhashIndexKeysByPrefix(geometry.TxHashIndexPrefix)
@@ -118,6 +146,29 @@ func (c *Catalog) FrozenTxHashIndex(w geometry.TxHashIndexID) (geometry.TxHashIn
 	return frozen, found, nil
 }
 
+// FrozenIndexCoversRange reports whether index w's UNIQUE frozen coverage spans
+// the whole inclusive [lo, hi] chunk range. It reads through FrozenTxHashIndex,
+// so INV-2 (at most one frozen coverage per index) is asserted on every call.
+// This is the single "covered by a frozen index" predicate the resolve diff
+// (backfill), the discard eligibility scan, and the watermark derivation all
+// share, so they can never disagree about the same catalog snapshot. Reports
+// false (no error) when the index has no frozen coverage yet.
+func (c *Catalog) FrozenIndexCoversRange(w geometry.TxHashIndexID, lo, hi chunk.ID) (bool, error) {
+	frozen, ok, err := c.FrozenTxHashIndex(w)
+	if err != nil {
+		return false, err
+	}
+	return ok && frozen.Lo <= lo && hi <= frozen.Hi, nil
+}
+
+// FrozenIndexCovers reports whether chunk ch's OWN index window has a frozen
+// coverage containing it. A chunk belongs to exactly one window, so its own
+// window is the only one that can cover it — the degenerate single-chunk case of
+// FrozenIndexCoversRange.
+func (c *Catalog) FrozenIndexCovers(ch chunk.ID) (bool, error) {
+	return c.FrozenIndexCoversRange(c.txhashIndex.TxHashIndexID(ch), ch, ch)
+}
+
 // ---------------------------------------------------------------------------
 // Config pins. Written once on first start, immutable thereafter.
 // ---------------------------------------------------------------------------
@@ -171,6 +222,28 @@ func (c *Catalog) has(key string) (bool, error) {
 	return ok, err
 }
 
+// hotChunkKeysWith returns the chunks whose hot-DB key matches keep, sorted
+// ascending. A nil keep matches every value (value-blind).
+func (c *Catalog) hotChunkKeysWith(keep func(geometry.HotState) bool) ([]chunk.ID, error) {
+	var ids []chunk.ID
+	for e, err := range c.store.PrefixScan(geometry.HotChunkPrefix) {
+		if err != nil {
+			return nil, err
+		}
+		id, ok := geometry.ParseHotChunkKey(e.Key)
+		if !ok {
+			return nil, fmt.Errorf("malformed hot key %q", e.Key)
+		}
+		if keep == nil || keep(geometry.HotState(e.Value)) {
+			ids = append(ids, id)
+		}
+	}
+	// PrefixScan yields byte-lex order == numeric under the 8-digit padding, so
+	// the slice is already ascending; sort defensively against a width change.
+	slices.Sort(ids)
+	return ids, nil
+}
+
 // txhashIndexKeysByPrefix scans coverage keys under prefix, attaching each scanned
 // value as State.
 func (c *Catalog) txhashIndexKeysByPrefix(prefix string) ([]geometry.TxHashIndexCoverage, error) {
diff --git a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_protocol.go b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_protocol.go
index 31d7e0f86..85d3f44dd 100644
--- a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_protocol.go
+++ b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_protocol.go
@@ -3,9 +3,9 @@ package catalog
 import (
 	"errors"
 
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/metastore"
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
 )
 
 // The one write protocol — mark-then-write. Every durable artifact (per-chunk
@@ -151,3 +151,24 @@ func (c *Catalog) txhashIndexChunkKeysPresent(lo, hi chunk.ID) ([]string, error)
 	}
 	return keys, nil
 }
+
+// --- Hot-DB key bracket: the file protocol's transient/ready bracket applied to
+// the chunk's hot directory. ---
+
+// PutHotTransient marks a hot-DB key "transient" — the open end, written before
+// the dir is created or a discard begins removing it. A crash mid-operation is
+// detectable from this value alone.
+func (c *Catalog) PutHotTransient(chunkID chunk.ID) error {
+	return c.store.Put(geometry.HotChunkKey(chunkID), string(geometry.HotTransient))
+}
+
+// FlipHotReady marks a hot-DB key "ready" (dir exists and usable). The caller
+// MUST have fsynced the dir (and its parent on creation) first.
+func (c *Catalog) FlipHotReady(chunkID chunk.ID) error {
+	return c.store.Put(geometry.HotChunkKey(chunkID), string(geometry.HotReady))
+}
+
+// DeleteHotKey removes a hot-DB key — the close end, after rmdir. Idempotent.
+func (c *Catalog) DeleteHotKey(chunkID chunk.ID) error {
+	return c.store.Delete(geometry.HotChunkKey(chunkID))
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_protocol_test.go b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_protocol_test.go
index 2bb384a34..f70e9e536 100644
--- a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_protocol_test.go
+++ b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_protocol_test.go
@@ -5,8 +5,8 @@ import (
 
 	"github.com/stretchr/testify/require"
 
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
 )
 
 // ---------------------------------------------------------------------------
@@ -43,6 +43,29 @@ func TestCommitIndexPromoteAndDemote(t *testing.T) {
 	require.Equal(t, geometry.StateFrozen, states[geometry.TxHashIndexKey(5, 5100, 5350)])
 }
 
+// TestFrozenIndexCoversRange_AssertsUniqueness pins that the shared "covered by a
+// frozen index" predicate (#37) propagates the INV-2 assertion FrozenTxHashIndex
+// makes: two frozen coverages in one window must make EVERY read error, so
+// watermark derivation (progress), discard eligibility, and the resolve diff can
+// never disagree — one silently tolerating the duplicate while another aborts.
+func TestFrozenIndexCoversRange_AssertsUniqueness(t *testing.T) {
+	cat, _ := testCatalog(t)
+
+	// Plant two frozen coverages in window 5, bypassing the promote/demote commit
+	// path (which never leaves two frozen) to stage the corrupt snapshot directly.
+	require.NoError(t, cat.store.Put(geometry.TxHashIndexKey(5, 5100, 5349), string(geometry.StateFrozen)))
+	require.NoError(t, cat.store.Put(geometry.TxHashIndexKey(5, 5100, 5350), string(geometry.StateFrozen)))
+
+	_, rangeErr := cat.FrozenIndexCoversRange(5, 5100, 5349)
+	require.Error(t, rangeErr, "the range predicate must surface the uniqueness violation")
+	require.Contains(t, rangeErr.Error(), "two frozen coverages")
+
+	// The per-chunk convenience form resolves a chunk to its window and inherits
+	// the same assertion.
+	_, chunkErr := cat.FrozenIndexCovers(5100)
+	require.Error(t, chunkErr, "the per-chunk predicate inherits the uniqueness assertion")
+}
+
 func TestCommitIndexTerminalDemotesTxhashKeys(t *testing.T) {
 	cat, _ := testCatalog(t)
 
diff --git a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_sweep.go b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_sweep.go
index 9eb1825e0..02f1f4bbb 100644
--- a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_sweep.go
+++ b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_sweep.go
@@ -1,8 +1,13 @@
 package catalog
 
 import (
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/metastore"
+	"fmt"
+	"os"
+	"path/filepath"
+
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/metastore"
 )
 
 // Key-driven sweeps — the ONLY two deletion bodies in the system, one per key
@@ -85,3 +90,35 @@ func (c *Catalog) SweepTxHashIndexKey(cov geometry.TxHashIndexCoverage) error {
 	geometry.RmdirIfEmpty(dir) // best-effort; an empty dir is not an artifact
 	return nil
 }
+
+// DiscardHotChunk retires a chunk's hot DB once its cold artifacts are durable
+// (or it fell past retention), following the same crash order as the two sweeps
+// above: mark "transient" -> rmdir -> fsync(parent) -> delete key. The key
+// outlives the durable rmdir, so a crash anywhere leaves the key "transient" for
+// the next scan to finish — idempotent, and an absent key is a no-op. The caller
+// MUST have closed the chunk's hot write handle (discard runs after the freeze).
+func (c *Catalog) DiscardHotChunk(chunkID chunk.ID) error {
+	state, err := c.HotState(chunkID)
+	if err != nil {
+		return fmt.Errorf("read hot key chunk %s: %w", chunkID, err)
+	}
+	if state == "" {
+		return nil
+	}
+	if err := c.PutHotTransient(chunkID); err != nil {
+		return fmt.Errorf("mark hot transient chunk %s: %w", chunkID, err)
+	}
+	dir := c.layout.HotChunkPath(chunkID)
+	if err := os.RemoveAll(dir); err != nil {
+		return fmt.Errorf("rmdir hot dir %s: %w", dir, err)
+	}
+	// rmdir durable BEFORE the key delete: the key outlives the dir, so a crash
+	// re-runs the discard rather than leaving a key-less dir.
+	if err := geometry.FsyncDir(filepath.Dir(dir)); err != nil {
+		return fmt.Errorf("fsync hot parent dir %s: %w", filepath.Dir(dir), err)
+	}
+	if err := c.DeleteHotKey(chunkID); err != nil {
+		return fmt.Errorf("delete hot key chunk %s: %w", chunkID, err)
+	}
+	return nil
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_sweep_test.go b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_sweep_test.go
index 2a287f61b..762e48ba8 100644
--- a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_sweep_test.go
+++ b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_sweep_test.go
@@ -1,11 +1,13 @@
 package catalog
 
 import (
+	"os"
 	"testing"
 
 	"github.com/stretchr/testify/require"
 
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
 )
 
 // ---------------------------------------------------------------------------
@@ -104,3 +106,32 @@ func TestSweepEmptyRefsNoop(t *testing.T) {
 	cat, _ := testCatalog(t)
 	require.NoError(t, cat.SweepChunkArtifacts(nil))
 }
+
+// TestDiscardHotChunkResumesTransient mirrors the sweep siblings' crash-resume
+// coverage for the hot-DB discard: a "transient" key (a discard that crashed after
+// marking transient but before deleting the key) plus a leftover dir must be
+// finished by the next DiscardHotChunk — the dir removed and the key deleted.
+func TestDiscardHotChunkResumesTransient(t *testing.T) {
+	cat, _ := testCatalog(t)
+	c := chunk.ID(4)
+
+	// The mid-discard crash state: a "transient" key + a real leftover dir.
+	require.NoError(t, cat.PutHotTransient(c))
+	dir := cat.layout.HotChunkPath(c)
+	require.NoError(t, os.MkdirAll(dir, 0o755))
+
+	require.NoError(t, cat.DiscardHotChunk(c))
+
+	// The resume completed it: key gone, dir gone.
+	state, err := cat.HotState(c)
+	require.NoError(t, err)
+	require.Equal(t, geometry.HotState(""), state, "transient key finished")
+	require.NoDirExists(t, dir, "leftover hot dir swept")
+}
+
+// TestDiscardHotChunkAbsentKeyNoop: an absent hot key is a clean no-op (nothing
+// to finish).
+func TestDiscardHotChunkAbsentKeyNoop(t *testing.T) {
+	cat, _ := testCatalog(t)
+	require.NoError(t, cat.DiscardHotChunk(chunk.ID(9)))
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_test.go b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_test.go
index 3f3ccaeab..c9b986eb9 100644
--- a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_test.go
+++ b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_test.go
@@ -5,8 +5,8 @@ import (
 
 	"github.com/stretchr/testify/require"
 
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
 )
 
 // PinEarliestLedger writes the sole config pin; EarliestLedger reads it back.
@@ -27,6 +27,27 @@ func TestConfigPins(t *testing.T) {
 	require.Equal(t, uint32(2), el)
 }
 
+// ---------------------------------------------------------------------------
+// Scans: HotChunkKeys (value-blind) vs ReadyHotChunkKeys (ready-only).
+// ---------------------------------------------------------------------------
+
+func TestHotChunkKeysValueBlindVsReadyOnly(t *testing.T) {
+	cat, _ := testCatalog(t)
+
+	require.NoError(t, cat.PutHotTransient(3))
+	require.NoError(t, cat.FlipHotReady(5))
+	require.NoError(t, cat.PutHotTransient(9))
+	require.NoError(t, cat.FlipHotReady(12))
+
+	all, err := cat.HotChunkKeys()
+	require.NoError(t, err)
+	require.Equal(t, []chunk.ID{3, 5, 9, 12}, all, "value-blind: every hot key")
+
+	ready, err := cat.ReadyHotChunkKeys()
+	require.NoError(t, err)
+	require.Equal(t, []chunk.ID{5, 12}, ready, "ready-only excludes transient")
+}
+
 func TestChunkArtifactKeys(t *testing.T) {
 	cat, _ := testCatalog(t)
 
diff --git a/cmd/stellar-rpc/internal/fullhistory/catalog/helpers_test.go b/cmd/stellar-rpc/internal/fullhistory/catalog/helpers_test.go
index 29906238b..3f34f3291 100644
--- a/cmd/stellar-rpc/internal/fullhistory/catalog/helpers_test.go
+++ b/cmd/stellar-rpc/internal/fullhistory/catalog/helpers_test.go
@@ -13,9 +13,9 @@ import (
 
 	supportlog "github.com/stellar/go-stellar-sdk/support/log"
 
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/metastore"
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
 )
 
 func silentLogger() *supportlog.Entry {
diff --git a/cmd/stellar-rpc/internal/fullhistory/catalog/keys_roundtrip_test.go b/cmd/stellar-rpc/internal/fullhistory/catalog/keys_roundtrip_test.go
index 03c16009f..3669aac91 100644
--- a/cmd/stellar-rpc/internal/fullhistory/catalog/keys_roundtrip_test.go
+++ b/cmd/stellar-rpc/internal/fullhistory/catalog/keys_roundtrip_test.go
@@ -5,8 +5,8 @@ import (
 
 	"github.com/stretchr/testify/require"
 
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
 )
 
 // ---------------------------------------------------------------------------
diff --git a/cmd/stellar-rpc/internal/fullhistory/config.go b/cmd/stellar-rpc/internal/fullhistory/config.go
index 6ad1e2c16..3f30a5842 100644
--- a/cmd/stellar-rpc/internal/fullhistory/config.go
+++ b/cmd/stellar-rpc/internal/fullhistory/config.go
@@ -4,7 +4,6 @@ import (
 	"bytes"
 	"fmt"
 	"os"
-	"path/filepath"
 	"runtime"
 
 	"github.com/pelletier/go-toml"
@@ -88,10 +87,27 @@ type BackfillConfig struct {
 	BSB ledgerbackend.BufferedStorageBackendConfig `toml:"bsb"`
 }
 
-// IngestionConfig is [ingestion] — the live-network ingestion settings.
+// IngestionConfig is [ingestion] — the live-network ingestion (captive-core)
+// settings. The captive-core config FILE is the single source of truth for what
+// it can hold (notably NETWORK_PASSPHRASE, read back at startup); the remaining
+// keys are the things that don't live in that file — the plain history-archive
+// URLs (the file's [HISTORY.*] entries are shell commands, not the URLs the SDK's
+// archive client needs), and, optionally, the stellar-core binary path and the
+// captive-core storage directory.
 type IngestionConfig struct {
-	// Path to the CaptiveStellarCore config file. Required.
+	// CaptiveCoreConfig is the path to the CaptiveStellarCore (stellar-core) config
+	// file. Required for live ingestion. Must define NETWORK_PASSPHRASE.
 	CaptiveCoreConfig string `toml:"captive_core_config"`
+	// HistoryArchiveURLs are the plain history-archive URLs the SDK reads
+	// checkpoints from. Required for live ingestion (not derivable from the
+	// captive-core file's [HISTORY.*] get-commands).
+	HistoryArchiveURLs []string `toml:"history_archive_urls"`
+	// StellarCoreBinaryPath is the path to the stellar-core binary. Optional —
+	// defaults to the "stellar-core" found on PATH.
+	StellarCoreBinaryPath string `toml:"stellar_core_binary_path"`
+	// CaptiveCoreStoragePath is captive core's BUCKET_DIR_PATH base; optional,
+	// defaults to {default_data_dir}/captive-core.
+	CaptiveCoreStoragePath string `toml:"captive_core_storage_path"`
 }
 
 // LoggingConfig is [logging].
@@ -187,23 +203,27 @@ type Paths struct {
 
 // ResolvePaths fills every storage path, defaulting under default_data_dir.
 // Relative overrides are kept relative (resolved against the caller's working
-// dir); only the defaults are joined to the data dir.
+// dir); only the defaults are joined to the data dir. The default tree is spelled
+// ONCE, by geometry.NewLayout — production flows through here and every package's
+// test helpers through NewLayout, so a rename to the tree can't leave the two
+// disagreeing.
 func (cfg Config) ResolvePaths() Paths {
 	dataDir := cfg.Service.DefaultDataDir
-	pick := func(override, def string) string {
+	def := geometry.NewLayout(dataDir)
+	pick := func(override, defPath string) string {
 		if override != "" {
 			return override
 		}
-		return def
+		return defPath
 	}
 	return Paths{
 		DataDir:     dataDir,
-		Catalog:     pick(cfg.Storage.Catalog, filepath.Join(dataDir, "catalog", "rocksdb")),
-		Ledgers:     pick(cfg.Storage.Ledgers, filepath.Join(dataDir, "ledgers")),
-		Events:      pick(cfg.Storage.Events, filepath.Join(dataDir, "events")),
-		TxhashRaw:   pick(cfg.Storage.TxhashRaw, filepath.Join(dataDir, "txhash", "raw")),
-		TxhashIndex: pick(cfg.Storage.TxhashIndex, filepath.Join(dataDir, "txhash", "index")),
-		HotStorage:  pick(cfg.Storage.Hot, filepath.Join(dataDir, "hot")),
+		Catalog:     pick(cfg.Storage.Catalog, def.CatalogPath()),
+		Ledgers:     pick(cfg.Storage.Ledgers, def.LedgersRoot()),
+		Events:      pick(cfg.Storage.Events, def.EventsRoot()),
+		TxhashRaw:   pick(cfg.Storage.TxhashRaw, def.TxHashRawRoot()),
+		TxhashIndex: pick(cfg.Storage.TxhashIndex, def.TxHashIndexRoot()),
+		HotStorage:  pick(cfg.Storage.Hot, def.HotRoot()),
 	}
 }
 
diff --git a/cmd/stellar-rpc/internal/fullhistory/daemon.go b/cmd/stellar-rpc/internal/fullhistory/daemon.go
index 8b0dbc059..1e5937bb1 100644
--- a/cmd/stellar-rpc/internal/fullhistory/daemon.go
+++ b/cmd/stellar-rpc/internal/fullhistory/daemon.go
@@ -4,11 +4,16 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
 	"time"
 
+	"github.com/pelletier/go-toml"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/sirupsen/logrus"
 
+	"github.com/stellar/go-stellar-sdk/ingest/ledgerbackend"
 	supportlog "github.com/stellar/go-stellar-sdk/support/log"
 
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/daemon/interfaces"
@@ -34,6 +39,11 @@ type daemonOptions struct {
 	// frontfill-only daemon when no datastore is configured). Tests inject a fakeBackend.
 	Backend backfill.Backend
 
+	// Core starts captive core at the resume ledger and yields the live getter the
+	// ingestion loop polls. nil ⇒ runDaemonWith builds a captiveCoreOpener from
+	// [ingestion] (a complete production opener). Tests inject a fake getter.
+	Core CoreOpener
+
 	// ServeReads launches the RPC read server; it must return promptly, not block.
 	// nil ⇒ the #772 no-op placeholder (reads still come from the v1 SQLite daemon).
 	ServeReads func(ctx context.Context) error
@@ -49,6 +59,11 @@ type daemonOptions struct {
 
 	// IngestSink is the per-type cold-path ingest sink; nil ⇒ a *ingest.PrometheusSink.
 	IngestSink ingest.MetricSink
+
+	// chunksPerTxhashIndex overrides the tx-hash index width (test-only). 0 ⇒ the
+	// fixed geometry.ChunksPerTxhashIndex. Tests set it to 1 so a single chunk's
+	// freeze is a terminal index (exercising the fold+prune path cheaply).
+	chunksPerTxhashIndex uint32
 }
 
 const defaultRestartBackoff = 5 * time.Second
@@ -88,7 +103,11 @@ func runDaemonWith(ctx context.Context, configPath string, opts daemonOptions) e
 	}
 	defer func() { _ = store.Close() }()
 
-	txLayout, err := geometry.NewTxHashIndexLayout(geometry.ChunksPerTxhashIndex)
+	cpi := geometry.ChunksPerTxhashIndex
+	if opts.chunksPerTxhashIndex != 0 {
+		cpi = opts.chunksPerTxhashIndex
+	}
+	txLayout, err := geometry.NewTxHashIndexLayout(cpi)
 	if err != nil {
 		return err
 	}
@@ -130,8 +149,21 @@ func runDaemonWith(ctx context.Context, configPath string, opts daemonOptions) e
 	registry := prometheus.NewRegistry()
 	metrics, sink := buildSinks(opts, registry)
 
+	// Resolve the captive-core opener: injected (tests) or built from
+	// [ingestion].captive_core_config (a complete production opener) — done after
+	// validateConfig so config errors surface first.
+	core := opts.Core
+	if core == nil {
+		built, cerr := newCaptiveCoreOpener(cfg.Ingestion, cfg.Service.DefaultDataDir, logger)
+		if cerr != nil {
+			return cerr
+		}
+		core = built
+	}
+
 	// --- Assemble the StartConfig and run the supervised run loop. ---
-	start := startConfig(cfg, cat, logger, backend, networkTip, serveReads, metrics, sink, tipBackoff, tipMaxAttempts)
+	start := startConfig(
+		cfg, cat, logger, backend, networkTip, core, serveReads, metrics, sink, tipBackoff, tipMaxAttempts)
 
 	backoff := opts.RestartBackoff
 	if backoff <= 0 {
@@ -140,10 +172,12 @@ func runDaemonWith(ctx context.Context, configPath string, opts daemonOptions) e
 	return supervise(ctx, start, logger, backoff)
 }
 
-// startConfig assembles the StartConfig run consumes.
+// startConfig assembles the StartConfig run consumes. run() builds the
+// lifecycle.Config from Exec + RetentionChunks, so backfill and the lifecycle
+// goroutine share ONE catalog, worker pool, and retention floor by construction.
 func startConfig(
 	cfg Config, cat *catalog.Catalog, logger *supportlog.Entry,
-	backend backfill.Backend, networkTip NetworkTipBackend, serveReads func(context.Context) error,
+	backend backfill.Backend, networkTip NetworkTipBackend, core CoreOpener, serveReads func(context.Context) error,
 	metrics observability.Metrics, sink ingest.MetricSink, tipBackoff time.Duration, tipMaxAttempts int,
 ) StartConfig {
 	exec := backfill.ExecConfig{
@@ -161,6 +195,7 @@ func startConfig(
 		Exec:            exec,
 		RetentionChunks: deref(cfg.Retention.RetentionChunks),
 		NetworkTip:      networkTip,
+		Core:            core,
 		ServeReads:      serveReads,
 		TipBackoff:      tipBackoff,
 		TipMaxAttempts:  tipMaxAttempts,
@@ -181,9 +216,11 @@ func buildSinks(opts daemonOptions, registry *prometheus.Registry) (observabilit
 	return metrics, sink
 }
 
-// supervise restarts run on a restartable error after a backoff ("startup is the
-// recovery path"); a clean shutdown or ctx cancel returns nil; ErrFirstStartNoTip
-// is fatal and surfaces up.
+// supervise is the daemon's clean-vs-restart decision point ("startup is the
+// recovery path"): nil or a ctx cancel is a clean shutdown, everything else is
+// warned and retried after a backoff. There is deliberately no fatal-and-exit
+// class — genuine loss presents as a crash-loop with a clear warn line. The
+// never-auto-heal guarantee lives in the must-exist open (openHotDBForChunk), not here.
 func supervise(
 	ctx context.Context, start StartConfig, logger *supportlog.Entry, backoff time.Duration,
 ) error {
@@ -195,10 +232,6 @@ func supervise(
 		if ctx.Err() != nil {
 			return nil //nolint:nilerr // ctx canceled is a clean shutdown, not a run failure
 		}
-		// Unrecoverable: a fresh start cannot heal it, so don't spin restarting.
-		if errors.Is(err, ErrFirstStartNoTip) {
-			return err
-		}
 		logger.WithError(err).Warnf("daemon run failed; restarting in %s", backoff)
 		if sleepCtx(ctx, backoff) != nil {
 			return nil //nolint:nilerr // ctx canceled mid-backoff is a clean shutdown, not a failure
@@ -207,7 +240,7 @@ func supervise(
 }
 
 // sleepCtx blocks for d or until ctx is canceled, returning ctx.Err() if canceled
-// first and nil otherwise. supervise's three-way clean/fatal/restart loop can't be
+// first and nil otherwise. supervise's clean-vs-restart loop can't be
 // a backoff.Retry, so it keeps a hand-rolled sleep — but shares this one helper
 // rather than re-rolling the timer/select (and its easy-to-forget timer.Stop).
 func sleepCtx(ctx context.Context, d time.Duration) error {
@@ -244,6 +277,106 @@ func buildBackfillBackend(
 	return backend, cleanup, nil
 }
 
+// ---------------------------------------------------------------------------
+// Production captive-core opener (the live ingestion source).
+// ---------------------------------------------------------------------------
+
+// captiveCoreOpener is the production CoreOpener. It holds a resolved
+// CaptiveCoreConfig and hands back a captive-core LedgerStream that builds a FRESH
+// core per run (each supervised restart reopens core anew) — the stream owns the
+// process lifecycle, so there is no eager prepare or explicit closer here.
+// Construction mirrors the RPC daemon's newCaptiveCore so the full-history daemon
+// runs captive core and the ledgerbackend the same way (#772 can unify them at
+// the cutover).
+type captiveCoreOpener struct {
+	config ledgerbackend.CaptiveCoreConfig
+}
+
+// newCaptiveCoreOpener resolves the captive-core config, treating the
+// captive_core_config FILE as the single source of truth: NETWORK_PASSPHRASE is
+// read back from it, and the stellar-core binary defaults to the one on PATH.
+// Only the plain history-archive URLs (not derivable from the file's [HISTORY.*]
+// get-commands) come from [ingestion].history_archive_urls. The toml params
+// mirror the RPC daemon (strict, unified events, soroban diagnostic/meta
+// enforcement) so the ingested meta is what the events + txhash stores need.
+func newCaptiveCoreOpener(ing IngestionConfig, dataDir string, logger *supportlog.Entry) (*captiveCoreOpener, error) {
+	if ing.CaptiveCoreConfig == "" {
+		return nil, errors.New("[ingestion].captive_core_config is required for live ingestion")
+	}
+	if len(ing.HistoryArchiveURLs) == 0 {
+		return nil, errors.New("[ingestion].history_archive_urls is required for live ingestion")
+	}
+
+	// NETWORK_PASSPHRASE lives in the captive-core file; read it back so the
+	// operator configures it in one place. (go-toml v1 ignores the other fields.)
+	data, err := os.ReadFile(ing.CaptiveCoreConfig)
+	if err != nil {
+		return nil, fmt.Errorf("read captive_core_config %q: %w", ing.CaptiveCoreConfig, err)
+	}
+	var peek struct {
+		NetworkPassphrase string `toml:"NETWORK_PASSPHRASE"`
+	}
+	if perr := toml.Unmarshal(data, &peek); perr != nil {
+		return nil, fmt.Errorf("parse captive_core_config %q: %w", ing.CaptiveCoreConfig, perr)
+	}
+	if peek.NetworkPassphrase == "" {
+		return nil, fmt.Errorf("captive_core_config %q must define NETWORK_PASSPHRASE", ing.CaptiveCoreConfig)
+	}
+
+	// stellar-core binary: explicit path, else the one on PATH (RPC daemon default).
+	binaryPath := ing.StellarCoreBinaryPath
+	if binaryPath == "" {
+		found, lerr := exec.LookPath("stellar-core")
+		if lerr != nil {
+			return nil, fmt.Errorf(
+				"[ingestion].stellar_core_binary_path unset and stellar-core not found on PATH: %w", lerr)
+		}
+		binaryPath = found
+	}
+
+	storagePath := ing.CaptiveCoreStoragePath
+	if storagePath == "" {
+		storagePath = filepath.Join(dataDir, "captive-core")
+	}
+
+	// Build the toml from the bytes already read, not the path — re-reading via
+	// NewCaptiveCoreTomlFromFile would parse the file twice and, worse, could
+	// observe a different NETWORK_PASSPHRASE than the one peeked above if the file
+	// changed between the two reads (surfacing as the SDK's confusing mismatch error).
+	coreToml, err := ledgerbackend.NewCaptiveCoreTomlFromData(data, ledgerbackend.CaptiveCoreTomlParams{
+		HistoryArchiveURLs:                 ing.HistoryArchiveURLs,
+		NetworkPassphrase:                  peek.NetworkPassphrase,
+		Strict:                             true,
+		EnforceSorobanDiagnosticEvents:     true,
+		EnforceSorobanTransactionMetaExtV1: true,
+		EmitUnifiedEvents:                  true,
+		CoreBinaryPath:                     binaryPath,
+	})
+	if err != nil {
+		return nil, fmt.Errorf("invalid captive-core toml %q: %w", ing.CaptiveCoreConfig, err)
+	}
+
+	return &captiveCoreOpener{
+		config: ledgerbackend.CaptiveCoreConfig{
+			BinaryPath:         binaryPath,
+			StoragePath:        storagePath,
+			NetworkPassphrase:  peek.NetworkPassphrase,
+			HistoryArchiveURLs: ing.HistoryArchiveURLs,
+			Log:                logger.WithField("subservice", "stellar-core"),
+			Toml:               coreToml,
+			UserAgent:          "stellar-rpc-fullhistory",
+		},
+	}, nil
+}
+
+// OpenCore returns the live ingestion stream backed by captive stellar-core. A
+// fresh core per run keeps supervised restarts clean.
+func (c *captiveCoreOpener) OpenCore(ctx context.Context) (ledgerbackend.LedgerStream, error) {
+	cfg := c.config
+	cfg.Context = ctx
+	return ledgerbackend.NewCaptiveCoreStream(cfg, c.config.Log), nil
+}
+
 // resolveNetworkTip adapts the backfill backend to backfill's tip sampler — its Tip
 // frontier (so the tip and the freeze's coverage frontier are one source) — or the
 // not-configured placeholder for a frontfill-only daemon (nil backend).
@@ -286,6 +419,7 @@ func newLogger(cfg LoggingConfig) (*supportlog.Entry, error) {
 
 // compile-time interface checks.
 var (
+	_ CoreOpener        = (*captiveCoreOpener)(nil)
 	_ NetworkTipBackend = notConfiguredTip{}
 	_ NetworkTipBackend = backendTip{}
 )
diff --git a/cmd/stellar-rpc/internal/fullhistory/daemon_test.go b/cmd/stellar-rpc/internal/fullhistory/daemon_test.go
index d5f09bff9..ddb9bc47b 100644
--- a/cmd/stellar-rpc/internal/fullhistory/daemon_test.go
+++ b/cmd/stellar-rpc/internal/fullhistory/daemon_test.go
@@ -59,26 +59,34 @@ format = "text"
 // runDaemonWith — the full entrypoint flow against an injected backend.
 // ---------------------------------------------------------------------------
 
-// Happy path pins earliest_ledger and serves reads once. The injected backend's
-// young-network tip (inside chunk 0) ⇒ no-op backfill, no LedgerStream needed.
+// Happy path pins earliest_ledger, serves reads once, then ingests. The injected
+// backend's young-network tip (inside chunk 0) ⇒ no-op backfill; the injected core
+// blocks until ctx cancel (the daemon's steady state), and a ctx cancel is a clean
+// shutdown. No LedgerStream needed.
 func TestRunDaemon_LoadValidateWireStartCleanShutdown(t *testing.T) {
 	configPath, dataDir := writeTempConfig(t, "")
 
 	var served atomic.Int32
 	opts := daemonOptions{
 		Backend:    &fakeBackend{tip: chunk.FirstLedgerSeq + 10},
+		Core:       &fakeCore{}, // default getter blocks until ctx cancel
 		ServeReads: func(context.Context) error { served.Add(1); return nil },
 		Logger:     silentLogger(),
 	}
 
+	ctx, cancel := context.WithCancel(context.Background())
 	errCh := make(chan error, 1)
-	go func() { errCh <- runDaemonWith(context.Background(), configPath, opts) }()
+	go func() { errCh <- runDaemonWith(ctx, configPath, opts) }()
+
+	// ServeReads is called after backfill, just before the (blocking) ingestion loop.
+	require.Eventually(t, func() bool { return served.Load() == 1 }, 3*time.Second, 5*time.Millisecond)
+	cancel()
 
 	select {
 	case err := <-errCh:
-		require.NoError(t, err, "cold backfill + serve returns cleanly")
+		require.NoError(t, err, "a ctx-canceled ingestion loop is a clean shutdown")
 	case <-time.After(3 * time.Second):
-		t.Fatal("runDaemonWith did not return")
+		t.Fatal("runDaemonWith did not return after ctx cancel")
 	}
 
 	assert.Equal(t, int32(1), served.Load(), "reads served once")
@@ -106,7 +114,8 @@ func someTxBackend(t *testing.T) *fakeBackend {
 		if seq%2500 != 0 {
 			return zeroTxLCMBytes(t, seq)
 		}
-		return oneTxLCMBytes(t, seq, src)
+		raw, _ := oneTxLCMBytes(t, seq, src)
+		return raw
 	}
 	return &fakeBackend{
 		LedgerStream: &fullChunkStream{t: t, gen: gen},
@@ -116,8 +125,10 @@ func someTxBackend(t *testing.T) *fakeBackend {
 }
 
 // oneTxLCMBytes is zeroTxLCMBytes plus one tx (per-seq SeqNum ⇒ unique hash) so
-// ExtractTxHashes yields exactly one key for seq.
-func oneTxLCMBytes(t *testing.T, seq uint32, src xdr.MuxedAccount) []byte {
+// ExtractTxHashes yields exactly one key for seq. Returns the wire bytes and the
+// real, network-hashed transaction hash (the hash the daemon commits for seq), so
+// callers can assert a getTransaction-style hash→seq lookup.
+func oneTxLCMBytes(t *testing.T, seq uint32, src xdr.MuxedAccount) ([]byte, [32]byte) {
 	t.Helper()
 	envelope := xdr.TransactionEnvelope{
 		Type: xdr.EnvelopeTypeEnvelopeTypeTx,
@@ -169,7 +180,7 @@ func oneTxLCMBytes(t *testing.T, seq uint32, src xdr.MuxedAccount) []byte {
 	}
 	raw, err := lcm.MarshalBinary()
 	require.NoError(t, err)
-	return raw
+	return raw, hash
 }
 
 // #815 acceptance: one TOML boots the daemon and it backfills the complete chunk
@@ -182,23 +193,36 @@ func TestRunDaemon_BackfillMaterializesAllColdTypesAndIndex(t *testing.T) {
 
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
+	// ServeReads runs after backfill completes, just before the blocking ingestion
+	// loop — so it is the "backfill done" signal. The injected core then blocks until
+	// the ctx cancel below, and a ctx-canceled ingestion loop is a clean shutdown.
+	servedCh := make(chan struct{}, 1)
 	errCh := make(chan error, 1)
 	go func() {
 		errCh <- runDaemonWith(ctx, configPath, daemonOptions{
 			// Backend's tip is chunk 0's last ledger ⇒ chunk 0 complete, backfill freezes it.
 			// The network tip is derived from this same backend's Tip.
 			Backend:    someTxBackend(t),
-			ServeReads: func(context.Context) error { return nil },
+			Core:       &fakeCore{}, // default getter blocks until ctx cancel
+			ServeReads: func(context.Context) error { servedCh <- struct{}{}; return nil },
 			Logger:     silentLogger(),
 		})
 	}()
 	select {
+	case <-servedCh: // backfill complete; the daemon is now parked in ingestion
 	case err := <-errCh:
-		require.NoError(t, err, "daemon backfills to tip then exits cleanly (no-op ServeReads)")
+		t.Fatalf("daemon returned before backfill completed: %v", err)
 	case <-time.After(60 * time.Second):
 		cancel()
 		t.Fatal("runDaemonWith did not finish backfill within 60s (regressed into a hang/restart loop?)")
 	}
+	cancel() // request a clean shutdown of the parked ingestion loop
+	select {
+	case err := <-errCh:
+		require.NoError(t, err, "a ctx-canceled ingestion loop is a clean shutdown")
+	case <-time.After(10 * time.Second):
+		t.Fatal("runDaemonWith did not return after ctx cancel")
+	}
 
 	// Read the catalog back after the daemon released locks + closed its store.
 	store, err := openMetaAt(t, filepath.Join(dataDir, "catalog", "rocksdb"))
@@ -380,7 +404,7 @@ func TestSupervise_RetriesThenCleanShutdown(t *testing.T) {
 
 	var attempts atomic.Int32
 	tip := &fakeTipBackend{tips: []uint32{chunk.FirstLedgerSeq + 10}} // young: no backfill
-	start := startTestConfig(t, cat, tip, nil)
+	start := startTestConfig(t, cat, tip, &fakeCore{}, nil)
 	// An always-erroring ServeReads makes each attempt a restartable failure.
 	start.ServeReads = func(context.Context) error {
 		attempts.Add(1)
@@ -406,16 +430,33 @@ func TestSupervise_RetriesThenCleanShutdown(t *testing.T) {
 	assert.GreaterOrEqual(t, attempts.Load(), int32(2), "restarted on the transient failure")
 }
 
-// Fatal sentinels surface up, not retried (a fresh start cannot heal them).
-func TestSupervise_FatalSentinelSurfaces(t *testing.T) {
+// A first start with no reachable tip is now RESTARTABLE (previously a fatal
+// sentinel): supervise retries it on a backoff rather than surfacing it, and a
+// ctx cancel returns clean. Loss/misconfig can't be told from a transient inside
+// the process, so there is no fatal-and-exit class.
+func TestSupervise_FirstStartNoTipRetries(t *testing.T) {
 	cat, _ := testCatalog(t)
 	pinGenesis(t, cat)
-	// Unreachable tip + no local progress ⇒ fatal ErrFirstStartNoTip.
+	// Unreachable tip + no local progress: every run fails the first-start check.
 	tip := &fakeTipBackend{err: errors.New("unreachable"), errFirst: 99}
-	start := startTestConfig(t, cat, tip, nil)
+	start := startTestConfig(t, cat, tip, &fakeCore{}, nil)
+	start.TipMaxAttempts = 1 // one tip poll per run, so callCount tracks restart count
+
+	ctx, cancel := context.WithCancel(context.Background())
+	errCh := make(chan error, 1)
+	go func() { errCh <- supervise(ctx, start, silentLogger(), 5*time.Millisecond) }()
 
-	err := supervise(context.Background(), start, silentLogger(), time.Hour)
-	require.ErrorIs(t, err, ErrFirstStartNoTip, "fatal sentinel surfaces immediately, no retry")
+	require.Eventually(t, func() bool {
+		return tip.callCount() >= 2
+	}, 3*time.Second, 5*time.Millisecond, "first-start-no-tip is retried, not surfaced as fatal")
+	cancel()
+
+	select {
+	case err := <-errCh:
+		require.NoError(t, err, "ctx cancel returns clean, even though runs kept failing")
+	case <-time.After(3 * time.Second):
+		t.Fatal("supervise did not return after cancel")
+	}
 }
 
 // ---------------------------------------------------------------------------
diff --git a/cmd/stellar-rpc/internal/fullhistory/e2e_test.go b/cmd/stellar-rpc/internal/fullhistory/e2e_test.go
new file mode 100644
index 000000000..a2f754297
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/e2e_test.go
@@ -0,0 +1,520 @@
+package fullhistory
+
+// =============================================================================
+// In-process end-to-end integration of the full-history daemon.
+//
+// WHAT IS REAL HERE
+//   Everything inside the process is the real production code path:
+//     - runDaemonWith (the true daemon entrypoint): TOML load + form-validate,
+//       per-root flock, meta-store open + Catalog bind, the stateful
+//       validateConfig gate (pins the floor), and the supervised run loop.
+//     - run → backfillToTip → openHotDBForChunk → runIngestionLoop (the real
+//       atomic per-ledger WriteBatch across all CFs of the real per-chunk
+//       hotchunk RocksDB), the real boundary handoff, the real boundary signal.
+//     - lifecycle.Loop / runLifecycle: the real resolve + executePlan
+//       freeze (cold artifacts derived FROM the live hot DB), the real txhash
+//       index fold (a real streamhash .idx on disk), the real discard + prune.
+//     - The real txhash stores on both sides of a getTransaction-style hash→seq
+//       lookup: the cold ColdReader over the frozen .idx and the live hot CF.
+//
+// WHAT IS FAKED (the two EXTERNAL boundaries the daemon injects on purpose)
+//     - The ledger SOURCE. Production drives ingestion from captive
+//       stellar-core and backfill from a bulk object-store backend. Here both
+//       cross their injected interfaces (CoreOpener / backfill.Backend) and are
+//       fed synthetic-but-well-formed LedgerCloseMeta. No captive core, no
+//       object store, no network.
+//     - ServeReads is a no-op recorder (the read cutover is #772). The read PATH
+//       exercised is the txhash index lookup getTransaction will sit on.
+//
+// cpi=1 (the chunksPerTxhashIndex test seam) makes every one-chunk window
+// terminal the instant its chunk freezes, so the freeze→fold→discard→prune
+// sequence completes on a boundary tick without ingesting 1000 chunks.
+// =============================================================================
+
+import (
+	"context"
+	"fmt"
+	"iter"
+	"os"
+	"path/filepath"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/stellar/go-stellar-sdk/ingest/ledgerbackend"
+	"github.com/stellar/go-stellar-sdk/keypair"
+	"github.com/stellar/go-stellar-sdk/xdr"
+
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/lifecycle"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/observability"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash"
+)
+
+// e2eCore is the CoreOpener handing back a fresh e2eStream per daemon run (a
+// restart opens core anew). frames is the seq→raw backlog every stream serves;
+// the atomics aggregate observations across opens for the restart assertions.
+type e2eCore struct {
+	frames    map[uint32][]byte
+	fromSeen  atomic.Uint32
+	delivered atomic.Uint32
+	opens     atomic.Int32
+}
+
+func (c *e2eCore) OpenCore(context.Context) (ledgerbackend.LedgerStream, error) {
+	c.opens.Add(1)
+	return &e2eStream{core: c}, nil
+}
+
+// e2eStream is the FAKE captive-core LedgerStream the ingestion loop consumes: it
+// yields the backlog frames contiguously from the range's From() and, once it runs
+// past the synthetic backlog, blocks until ctx is canceled (a live tip stream ends
+// only on shutdown). It records (into its core) the FIRST seq it was asked for
+// (the range From), so the restart step can assert the daemon re-derived the
+// watermark and resumed with no gap.
+type e2eStream struct {
+	core    *e2eCore
+	sawFrom atomic.Bool
+}
+
+var _ ledgerbackend.LedgerStream = (*e2eStream)(nil)
+
+func (s *e2eStream) RawLedgers(
+	ctx context.Context, r ledgerbackend.Range, _ ...ledgerbackend.StreamOption,
+) iter.Seq2[[]byte, error] {
+	return func(yield func([]byte, error) bool) {
+		if s.sawFrom.CompareAndSwap(false, true) {
+			s.core.fromSeen.Store(r.From())
+		}
+		for seq := r.From(); ; seq++ {
+			if ctx.Err() != nil {
+				yield(nil, ctx.Err())
+				return
+			}
+			if raw, ok := s.core.frames[seq]; ok {
+				s.core.delivered.Store(seq)
+				if !yield(raw, nil) {
+					return
+				}
+				continue
+			}
+			// Past the synthetic backlog: a live tip blocks until shutdown so the loop
+			// does not see an error that would look like a core crash.
+			<-ctx.Done()
+			yield(nil, ctx.Err())
+			return
+		}
+	}
+}
+
+// e2eMetrics is a concurrency-safe observability.Metrics that records the
+// lifecycle signals this test waits on.
+type e2eMetrics struct {
+	observability.NopMetrics
+
+	mu         sync.Mutex
+	boundaries int
+	freezes    int
+	discarded  int
+	pruned     int
+}
+
+func (m *e2eMetrics) ChunkBoundary() {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.boundaries++
+}
+
+func (m *e2eMetrics) Freeze(time.Duration) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.freezes++
+}
+
+func (m *e2eMetrics) Discard(count int, _ time.Duration) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.discarded += count
+}
+
+func (m *e2eMetrics) Prune(count int, _ time.Duration) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.pruned += count
+}
+
+func (m *e2eMetrics) boundaryCount() int {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	return m.boundaries
+}
+
+func (m *e2eMetrics) snapshotFreezeCount() int {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	return m.freezes
+}
+
+func (m *e2eMetrics) discardedCount() int {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	return m.discarded
+}
+
+func (m *e2eMetrics) prunedCount() int {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	return m.pruned
+}
+
+// e2eConfigPath writes a daemon TOML for an in-process E2E: genesis floor (no
+// tip needed to validate/start) and the given retention width. captive_core_config
+// is a stub path the test's injected CoreOpener replaces, never opening a real core.
+// The one-chunk index window is set via the chunksPerTxhashIndex test seam, not config.
+func e2eConfigPath(t *testing.T, dataDir string, retentionChunks uint32) string {
+	t.Helper()
+	cfgPath := filepath.Join(t.TempDir(), "daemon.toml")
+	body := fmt.Sprintf(`
+[service]
+default_data_dir = %q
+
+[retention]
+earliest_ledger = "genesis"
+retention_chunks = %d
+
+[ingestion]
+captive_core_config = "/dev/null"
+
+[logging]
+level = "error"
+format = "text"
+`, dataDir, retentionChunks)
+	require.NoError(t, os.WriteFile(cfgPath, []byte(body), 0o644))
+	return cfgPath
+}
+
+// runDaemonInBackground starts runDaemonWith on a cancellable ctx and returns a
+// cancel func plus a channel carrying its (clean-shutdown) return. A young-network
+// tip (inside chunk 0) means backfill is a no-op and first-start ingests directly
+// from genesis via the fake core.
+func runDaemonInBackground(
+	t *testing.T, cfgPath string, core *e2eCore, served *atomic.Int32, metrics observability.Metrics,
+) (context.CancelFunc, <-chan error) {
+	t.Helper()
+	ctx, cancelFn := context.WithCancel(context.Background())
+	errCh := make(chan error, 1)
+	opts := daemonOptions{
+		Backend:              &fakeBackend{tip: chunk.FirstLedgerSeq + 5}, // young: no backfill
+		Core:                 core,
+		ServeReads:           func(context.Context) error { served.Add(1); return nil },
+		Logger:               silentLogger(),
+		Metrics:              metrics,
+		RestartBackoff:       10 * time.Millisecond,
+		chunksPerTxhashIndex: 1,
+	}
+	go func() { errCh <- runDaemonWith(ctx, cfgPath, opts) }()
+	return cancelFn, errCh
+}
+
+// waitClean cancels the daemon and requires a clean (nil) shutdown.
+func waitClean(t *testing.T, cancel context.CancelFunc, done <-chan error) {
+	t.Helper()
+	cancel()
+	select {
+	case err := <-done:
+		require.NoError(t, err, "ctx cancel is a clean daemon shutdown")
+	case <-time.After(60 * time.Second):
+		// Post-cancel shutdown joins one in-flight lifecycle unit; a mid-flight
+		// freeze's Finalize fsync + index build is unpreemptible and slow under
+		// -race + contention — the same reason the boundary-cross budget is 600s.
+		t.Fatal("daemon did not shut down cleanly after ctx cancel")
+	}
+}
+
+// hotKeyExists reports whether chunk c's hot:chunk key is present (any non-empty state).
+func hotKeyExists(cat *catalog.Catalog, c chunk.ID) (bool, error) {
+	st, err := cat.HotState(c)
+	if err != nil {
+		return false, err
+	}
+	return st != geometry.HotState(""), nil
+}
+
+// hashAt builds a deterministic 32-byte hash from n (for the never-committed miss).
+func hashAt(n uint64) [32]byte {
+	var h [32]byte
+	for i := range 8 {
+		h[i] = byte(n >> (8 * i))
+	}
+	return h
+}
+
+// TestE2E_DaemonLifecycle_FirstStartIngestFreezeLookupRestartPrune drives the
+// whole daemon lifecycle in one process against the real stores and the fake
+// ledger source:
+//
+//	first start (genesis, young-network tip ⇒ direct ingest) →
+//	ingest a FULL chunk + cross into the next (real boundary handoff) →
+//	lifecycle tick freezes chunk 0 + folds its terminal txhash index + discards
+//	  its hot tier →
+//	getTransaction-style hash→seq lookup resolves from the cold .idx (chunk 0)
+//	  AND from the live hot CF (chunk 2) →
+//	clean shutdown →
+//	RESTART: re-derive the watermark, resume at exactly watermark+1 (no gap) →
+//	drive retention far enough to prune chunk 0, confirm a pruned read is not-found.
+//
+// Correctness is asserted at every step.
+//
+//nolint:funlen // one linear end-to-end scenario asserted step by step
+func TestE2E_DaemonLifecycle_FirstStartIngestFreezeLookupRestartPrune(t *testing.T) {
+	if testing.Short() {
+		t.Skip("e2e ingests a full 10k-ledger chunk; skipped in -short")
+	}
+
+	dataDir := t.TempDir()
+
+	const c0 = chunk.ID(0)
+	const c1 = chunk.ID(1)
+	const c2 = chunk.ID(2)
+
+	// Cross TWO chunk boundaries so chunks 0 AND 1 both freeze, leaving chunk 2 as
+	// the live (un-frozen) chunk. That layout lets a later retention_chunks=1 run
+	// prune chunk 0 (wholly below the floor) while chunk 1 survives.
+	c0First := c0.FirstLedger()
+	c1First := c1.FirstLedger()
+	c2First := c2.FirstLedger()
+
+	// One shared source account; the per-seq SeqNum makes each tx hash unique.
+	src := xdr.MustMuxedAddress(keypair.MustRandom().Address())
+	coldRaw, coldHash := oneTxLCMBytes(t, c0First, src) // → frozen cold .idx (chunk 0)
+	hotRaw, hotHash := oneTxLCMBytes(t, c2First, src)   // → live hot CF (chunk 2)
+	// Chunk 1's first ledger also carries a tx so its txhash .bin is non-empty —
+	// streamhash refuses to build a cold index over zero keys (ErrEmptyBuildSet).
+	c1Raw, _ := oneTxLCMBytes(t, c1First, src)
+
+	frames := make(map[uint32][]byte, 2*int(chunk.LedgersPerChunk)+2)
+	appendLedger := func(seq uint32) {
+		switch seq {
+		case c0First:
+			frames[seq] = coldRaw
+		case c1First:
+			frames[seq] = c1Raw
+		case c2First:
+			frames[seq] = hotRaw
+		default:
+			frames[seq] = zeroTxLCMBytes(t, seq)
+		}
+	}
+	// Chunks 0 and 1 in full (both freeze), then chunk 2's first two ledgers.
+	for seq := c0First; seq <= c1.LastLedger(); seq++ {
+		appendLedger(seq)
+	}
+	appendLedger(c2First)
+	appendLedger(c2First + 1)
+
+	core := &e2eCore{frames: frames}
+	var served atomic.Int32
+	metrics := &e2eMetrics{}
+
+	// =====================================================================
+	// STEP 1 — first start: config → lock → validate (pin genesis) → start →
+	// direct ingest across the chunk-0 AND chunk-1 boundaries, the lifecycle
+	// freezing, folding, and discarding each just-closed chunk off the doorbell.
+	// =====================================================================
+	cfgPath := e2eConfigPath(t, dataDir, 0) // retention 0 (full history) for now
+	cancel, done := runDaemonInBackground(t, cfgPath, core, &served, metrics)
+
+	// Wait until ingestion crosses BOTH boundaries and commits into chunk 2.
+	// Delivering c2First proves both boundary handoffs fired (chunks 0 and 1
+	// closed, chunk 2 opened) and seeds the live hot-CF lookup. 600s absorbs the
+	// worst-case contended -race path (per-ledger synced WriteBatches racing the
+	// freezes that re-read 10k ledgers each).
+	require.Eventually(t, func() bool {
+		return core.delivered.Load() >= c2First
+	}, 600*time.Second, 200*time.Millisecond, "ingestion must cross both boundaries into chunk 2")
+
+	require.Eventually(t, func() bool {
+		return metrics.discardedCount() >= 2
+	}, 60*time.Second, 50*time.Millisecond, "the boundary ticks must freeze+fold+discard chunks 0 and 1")
+
+	require.GreaterOrEqual(t, served.Load(), int32(1), "reads were served")
+	require.Equal(t, c0First, core.fromSeen.Load(),
+		"first start resumes the ingestion stream at genesis (watermark+1)")
+
+	// =====================================================================
+	// STEP 2 — clean shutdown. The supervised loop returns nil on ctx cancel.
+	// =====================================================================
+	waitClean(t, cancel, done)
+
+	// Bind a fresh inspection catalog on the (now lock-free) data dir for the
+	// post-shutdown reads. It MUST be closed before the restart reopens the metastore.
+	postCat, closePost := e2eReadCatalog(t, dataDir)
+	w0 := postCat.TxHashIndexLayout().TxHashIndexID(c0)
+
+	// --- Correctness: chunks 0 and 1 per-chunk cold artifacts (ledgers + events) froze. ---
+	for _, c := range []chunk.ID{c0, c1} {
+		for _, kind := range []geometry.Kind{geometry.KindLedgers, geometry.KindEvents} {
+			st, err := postCat.State(c, kind)
+			require.NoError(t, err)
+			assert.Equal(t, geometry.StateFrozen, st, "chunk %s %s is frozen", c, kind)
+		}
+		has, err := hotKeyExists(postCat, c)
+		require.NoError(t, err)
+		assert.False(t, has, "chunk %s hot key is discarded", c)
+	}
+	// The window's txhash index is a frozen, terminal coverage (the .idx the cold
+	// getTransaction read resolves against).
+	frozenCov, ok, err := postCat.FrozenTxHashIndex(w0)
+	require.NoError(t, err)
+	require.True(t, ok, "chunk 0's window has a frozen txhash coverage")
+	require.True(t, postCat.TxHashIndexLayout().IsTerminalCoverage(frozenCov), "a one-chunk (cpi=1) window is terminal")
+
+	// =====================================================================
+	// STEP 3 — getTransaction-style hash→seq lookup, cold tier.
+	// =====================================================================
+
+	// Cold .idx — the exact reader getTransaction will sit on for frozen history.
+	coldReader, err := txhash.OpenColdReader(postCat.Layout().TxHashIndexFilePath(frozenCov))
+	require.NoError(t, err)
+	gotSeq, err := coldReader.Get(coldHash)
+	require.NoError(t, err, "the chunk-0 tx hash must resolve from the frozen cold index")
+	assert.Equal(t, c0First, gotSeq, "cold lookup returns the ledger the tx was committed in")
+	// A hash that was never committed misses (not-found, not a wrong answer).
+	_, missErr := coldReader.Get(hashAt(0xE2EDEADBEEF))
+	require.ErrorIs(t, missErr, stores.ErrNotFound, "an uncommitted hash misses the cold index")
+	require.NoError(t, coldReader.Close())
+
+	// Observability: the daemon emitted the boundary + freeze phase signals.
+	assert.GreaterOrEqual(t, metrics.boundaryCount(), 1, "at least one chunk boundary was signaled")
+	assert.GreaterOrEqual(t, metrics.snapshotFreezeCount(), 1, "at least one freeze stage ran")
+
+	// =====================================================================
+	// STEP 4 — hot lookup and restart watermark.
+	// =====================================================================
+	wmBeforeRestart := mustDeriveWatermark(t, postCat)
+	require.GreaterOrEqual(t, wmBeforeRestart, c2First, "watermark advanced into chunk 2")
+
+	// Live hot CF — now the daemon has stopped, chunk 2 (still the un-frozen live
+	// chunk) is reopenable. Resolve the chunk-2 tx hash through the txhash CF — the
+	// read path getTransaction uses for live history before a chunk freezes.
+	hotState, err := postCat.HotState(c2)
+	require.NoError(t, err)
+	require.Equal(t, geometry.HotReady, hotState, "chunk 2 is the un-frozen live chunk")
+	c2lfs, err := postCat.State(c2, geometry.KindLedgers)
+	require.NoError(t, err)
+	require.Equal(t, geometry.State(""), c2lfs, "the live chunk has no cold artifacts yet")
+
+	// Retry the open: RocksDB's process-level LOCK can linger momentarily after the
+	// writer closed (the same transient a production reader retries through).
+	var liveDB *hotchunk.DB
+	require.Eventually(t, func() bool {
+		db, oerr := hotchunk.Open(postCat.Layout().HotChunkPath(c2), c2, silentLogger())
+		if oerr != nil {
+			return false
+		}
+		liveDB = db
+		return true
+	}, 10*time.Second, 50*time.Millisecond, "chunk 2's hot DB must be reopenable after shutdown")
+	hotSeq, err := liveDB.Txhash().Get(hotHash)
+	require.NoError(t, err, "the chunk-2 tx hash must resolve from the live hot CF")
+	assert.Equal(t, c2First, hotSeq, "hot lookup returns the live tx's ledger")
+	require.NoError(t, liveDB.Close()) // release before the restart reopens it as the live writer
+	prunedIdxPath := postCat.Layout().TxHashIndexFilePath(frozenCov)
+
+	// =====================================================================
+	// STEP 5 — RESTART. A fresh runDaemonWith re-opens everything, re-derives the
+	// watermark from durable state, and resumes captive core at watermark+1 with no gap.
+	// =====================================================================
+	closePost() // release the inspection metastore handle before the daemon reopens it
+	core.opens.Store(0)
+	core.fromSeen.Store(0)
+	cancel2, done2 := runDaemonInBackground(t, cfgPath, core, &served, &e2eMetrics{})
+
+	require.Eventually(t, func() bool { return core.opens.Load() >= 1 }, 30*time.Second, 20*time.Millisecond,
+		"the restarted daemon re-opened captive core")
+	require.Eventually(t, func() bool { return core.fromSeen.Load() != 0 }, 30*time.Second, 20*time.Millisecond,
+		"the restarted ingestion loop requested a resume range")
+
+	wantResume := wmBeforeRestart + 1
+	assert.Equal(t, wantResume, core.fromSeen.Load(),
+		"restart streams from the re-derived watermark+1 — the durable frontier, re-derived not stored, no gap")
+
+	waitClean(t, cancel2, done2)
+
+	// =====================================================================
+	// STEP 6 — retention prune. Re-run with retention_chunks = 1: the floor anchors
+	// at chunk 1, so chunk 0 (frozen + folded) falls WHOLLY below it and the prune
+	// scan sweeps its files + keys, while chunk 1 (the floor chunk) survives. A read
+	// of a pruned chunk-0 hash is then not-found (no coverage to resolve it).
+	// =====================================================================
+	prunedCfg := e2eConfigPath(t, dataDir, 1) // retain ~1 chunk
+	require.FileExists(t, prunedIdxPath, "chunk 0's cold index exists before the prune")
+
+	pruneMetrics := &e2eMetrics{}
+	cancel3, done3 := runDaemonInBackground(t, prunedCfg, core, &served, pruneMetrics)
+
+	// The prune scan runs on the first lifecycle tick (the at-start doorbell ring).
+	require.Eventually(t, func() bool {
+		return pruneMetrics.prunedCount() > 0
+	}, 60*time.Second, 50*time.Millisecond, "retention prune scan must sweep chunk 0")
+
+	waitClean(t, cancel3, done3)
+	pruneCat, closePrune := e2eReadCatalog(t, dataDir)
+	defer closePrune()
+
+	// Chunk 0's per-chunk artifact keys (ledgers + events) vanished.
+	ledgers, err := pruneCat.State(c0, geometry.KindLedgers)
+	require.NoError(t, err)
+	ev, err := pruneCat.State(c0, geometry.KindEvents)
+	require.NoError(t, err)
+	assert.Equal(t, geometry.State(""), ledgers, "chunk 0 ledgers key is pruned")
+	assert.Equal(t, geometry.State(""), ev, "chunk 0 events key is pruned")
+
+	// Chunk 1 (the floor chunk) is WITHIN retention and survives the prune.
+	c1lfs, err := pruneCat.State(c1, geometry.KindLedgers)
+	require.NoError(t, err)
+	assert.Equal(t, geometry.StateFrozen, c1lfs, "chunk 1 is at the retention floor and survives")
+
+	// The on-disk cold index file is gone too (prune unlinks the files, not just keys).
+	require.Eventually(t, func() bool {
+		_, statErr := os.Stat(prunedIdxPath)
+		return os.IsNotExist(statErr)
+	}, 10*time.Second, 50*time.Millisecond, "the pruned cold index file is unlinked")
+
+	// "pruned read is not-found": after prune the window has no frozen coverage
+	// (ok=false) — the read layer's "no coverage ⇒ not-found" gate.
+	_, covOK, err := pruneCat.FrozenTxHashIndex(w0)
+	require.NoError(t, err)
+	assert.False(t, covOK, "chunk 0's window coverage is pruned ⇒ a chunk-0 hash read is not-found")
+}
+
+// e2eReadCatalog binds a Catalog over a SEPARATE metastore handle on the daemon's
+// data dir, with the same one-chunk window the daemon's test seam uses, for
+// read-only inspection BETWEEN daemon runs (the metastore is RocksDB-primary, so
+// this MUST be closed via the returned close func before the next daemon run).
+func e2eReadCatalog(t *testing.T, dataDir string) (*catalog.Catalog, func()) {
+	t.Helper()
+	paths := Config{Service: ServiceConfig{DefaultDataDir: dataDir}}.WithDefaults().ResolvePaths()
+	store, err := openMetaAt(t, paths.Catalog)
+	require.NoError(t, err)
+	windows, err := geometry.NewTxHashIndexLayout(1) // matches chunksPerTxhashIndex = 1
+	require.NoError(t, err)
+	return catalog.NewCatalog(store, NewLayoutFromPaths(paths), windows), func() { _ = store.Close() }
+}
+
+// mustDeriveWatermark derives the durable watermark with the read-only hot-DB
+// refinement (passing a logger opens the highest ready hot DB by its Layout path).
+func mustDeriveWatermark(t *testing.T, cat *catalog.Catalog) uint32 {
+	t.Helper()
+	wm, err := lifecycle.LastCommittedLedger(cat, silentLogger())
+	require.NoError(t, err)
+	return wm
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/geometry/chunk_arith.go b/cmd/stellar-rpc/internal/fullhistory/geometry/chunk_arith.go
new file mode 100644
index 000000000..cb7437de1
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/geometry/chunk_arith.go
@@ -0,0 +1,46 @@
+package geometry
+
+import "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+
+// Signed pre-genesis chunk arithmetic — the single home for the chunk↔ledger maps
+// that run in int64 so the pre-genesis sentinel (-1 = "nothing complete") never
+// underflows the uint32 domain. Keeping all of it here (rather than split across
+// lifecycle progress and this package) means there is one -1 convention, not two.
+
+// PreGenesisLedger is the last-committed ledger when nothing is complete
+// (FirstLedgerSeq-1) — the ledger-domain image of the -1 chunk sentinel.
+const PreGenesisLedger uint32 = chunk.FirstLedgerSeq - 1
+
+// CompleteThrough maps a signed chunk index to its "complete through" last ledger:
+// c < 0 ⇒ PreGenesisLedger; c >= 0 ⇒ chunk.ID(c).LastLedger().
+func CompleteThrough(c int64) uint32 {
+	if c < 0 {
+		return PreGenesisLedger
+	}
+	return chunk.ID(c).LastLedger() //nolint:gosec // c >= 0 and bounded by real chunk ids
+}
+
+// ChunkIDOfLedger maps a ledger to its chunk, signed so a sub-genesis ledger
+// yields -1 instead of panicking.
+func ChunkIDOfLedger(ledger uint32) int64 {
+	if ledger < chunk.FirstLedgerSeq {
+		return -1
+	}
+	return int64(chunk.IDFromLedger(ledger))
+}
+
+// LastCompleteChunkAt is the inverse of chunk.ID.LastLedger: the largest chunk
+// whose last ledger is <= ledger. Returns SIGNED int64 so a sub-genesis ledger
+// (the sub-genesis sentinel) maps to -1 ("before the first chunk") rather than
+// wrapping; the cast-before-subtract keeps it in int64 (uint32 ledger-1 would
+// underflow for ledger 0).
+func LastCompleteChunkAt(ledger uint32) int64 {
+	return (int64(ledger)+1-int64(chunk.FirstLedgerSeq))/int64(chunk.LedgersPerChunk) - 1
+}
+
+// ChunkFirstLedger maps a non-negative signed chunk index to its first ledger.
+// It is the signed-domain companion of chunk.ID.FirstLedger used after a
+// max(..., 0) clamp.
+func ChunkFirstLedger(c int64) uint32 {
+	return chunk.ID(c).FirstLedger() //nolint:gosec // c >= 0 (clamped) and bounded by real chunk ids
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/geometry/chunk_arith_test.go b/cmd/stellar-rpc/internal/fullhistory/geometry/chunk_arith_test.go
new file mode 100644
index 000000000..e784494f9
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/geometry/chunk_arith_test.go
@@ -0,0 +1,57 @@
+package geometry
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+)
+
+// ---------------------------------------------------------------------------
+// CompleteThrough — sentinel-safe signed->ledger map.
+//
+// ALIASING TRAP: a guard-less impl wraps -1 to exactly PreGenesisLedger anyway
+// (MaxUint32+1 overflows to 0), so a -1-only test is blind to a dropped guard.
+// The -2/-100 rows are the load-bearing ones (they wrap to large, distinct values
+// the guard must squash).
+// ---------------------------------------------------------------------------
+
+func TestCompleteThrough(t *testing.T) {
+	tests := []struct {
+		name string
+		in   int64
+		want uint32
+	}{
+		{"pre-genesis sentinel -1 => FirstLedgerSeq-1, not MaxUint32 (aliases the wrap)", -1, PreGenesisLedger},
+		{"sentinel -2 does NOT alias the wrap (guard-less would yield 4294957297)", -2, PreGenesisLedger},
+		{"deeply negative still pre-genesis", -100, PreGenesisLedger},
+		{"chunk 0 last ledger", 0, chunk.ID(0).LastLedger()},
+		{"chunk 5 last ledger", 5, chunk.ID(5).LastLedger()},
+	}
+	require.Equal(t, uint32(1), PreGenesisLedger, "FirstLedgerSeq-1 == 1 (the doc's chunkLastLedger(-1))")
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			require.Equal(t, tc.want, CompleteThrough(tc.in))
+		})
+	}
+
+	// Assert the aliasing trap directly so the comment above can't rot: -1 wraps to
+	// PreGenesisLedger, -2 does not. Computed from chunk arithmetic, not hardcoded.
+	guardlessWrap := func(c int64) uint32 {
+		return chunk.ID(uint32(c)).LastLedger()
+	}
+	require.Equal(t, PreGenesisLedger, guardlessWrap(-1),
+		"-1 aliases PreGenesisLedger under the wrap — the coincidence this test must not rely on")
+	require.NotEqual(t, PreGenesisLedger, guardlessWrap(-2),
+		"-2 must NOT alias — proving the guard (not a coincidence) is what makes CompleteThrough(-2) safe")
+}
+
+// ChunkIDOfLedger maps a ledger to its containing chunk, signed so a sub-genesis
+// ledger yields -1 rather than panicking.
+func TestChunkIDOfLedger(t *testing.T) {
+	require.Equal(t, int64(-1), ChunkIDOfLedger(chunk.FirstLedgerSeq-1), "sub-genesis => -1 sentinel")
+	require.Equal(t, int64(0), ChunkIDOfLedger(chunk.FirstLedgerSeq), "genesis => chunk 0")
+	require.Equal(t, int64(0), ChunkIDOfLedger(chunk.ID(0).LastLedger()), "chunk 0's last ledger => chunk 0")
+	require.Equal(t, int64(1), ChunkIDOfLedger(chunk.ID(1).FirstLedger()), "chunk 1's first ledger => chunk 1")
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/geometry/keys.go b/cmd/stellar-rpc/internal/fullhistory/geometry/keys.go
index ba672301a..f8d054f3a 100644
--- a/cmd/stellar-rpc/internal/fullhistory/geometry/keys.go
+++ b/cmd/stellar-rpc/internal/fullhistory/geometry/keys.go
@@ -27,6 +27,19 @@ const (
 	StatePruning State = "pruning"
 )
 
+// HotState is a hot-DB key's value. One key per chunk brackets the chunk's hot
+// RocksDB directory; the column families inside carry no individual key.
+type HotState string
+
+const (
+	// HotTransient — a dir operation is in flight (create/delete) or recovery
+	// demoted the key. Recovery is identical either way: open wipes+recreates,
+	// discard re-runs the scan.
+	HotTransient HotState = "transient"
+	// HotReady — the dir exists and is usable.
+	HotReady HotState = "ready"
+)
+
 // Kind is a per-chunk artifact kind. Each maps to one meta-store key suffix
 // and one set of on-disk files.
 type Kind string
@@ -65,7 +78,8 @@ func (i TxHashIndexID) String() string { return fmt.Sprintf("%08d", uint32(i)) }
 
 const (
 	ChunkPrefix       = "chunk:"
-	TxHashIndexPrefix = "txhash_index:"
+	HotChunkPrefix    = "hot:chunk:"
+	TxHashIndexPrefix = "index:"
 
 	// ConfigEarliestLedger is the sole config pin key. (chunks_per_txhash_index is
 	// the fixed ChunksPerTxhashIndex constant, not a pin.)
@@ -77,7 +91,13 @@ func ChunkKey(c chunk.ID, kind Kind) string {
 	return ChunkPrefix + c.String() + ":" + string(kind)
 }
 
-// TxHashIndexKey returns the index coverage key txhash_index:{idx:08d}:{lo:08d}:{hi:08d}.
+// HotChunkKey returns the hot-DB key hot:chunk:{chunk:08d}. One key per chunk
+// brackets the hot RocksDB dir; the value is a HotState.
+func HotChunkKey(c chunk.ID) string {
+	return HotChunkPrefix + c.String()
+}
+
+// TxHashIndexKey returns the index coverage key index:{idx:08d}:{lo:08d}:{hi:08d}.
 // The coverage [lo, hi] lives in the key NAME; the value is pure lifecycle
 // state. lo > hi is a programmer error, surfaced loudly via panic.
 func TxHashIndexKey(idx TxHashIndexID, lo, hi chunk.ID) string {
@@ -87,7 +107,7 @@ func TxHashIndexKey(idx TxHashIndexID, lo, hi chunk.ID) string {
 	return TxHashIndexPrefix + idx.String() + ":" + lo.String() + ":" + hi.String()
 }
 
-// TxHashIndexPrefixFor returns the scan prefix txhash_index:{idx:08d}: that enumerates
+// TxHashIndexPrefixFor returns the scan prefix index:{idx:08d}: that enumerates
 // all coverage keys of one index.
 func TxHashIndexPrefixFor(idx TxHashIndexID) string {
 	return TxHashIndexPrefix + idx.String() + ":"
@@ -129,7 +149,21 @@ func ParseChunkKey(key string) (chunk.ID, Kind, bool) {
 	return chunk.ID(n), kind, true
 }
 
-// ParseTxHashIndexKey decodes txhash_index:{idx:08d}:{lo:08d}:{hi:08d}. State is not part
+// ParseHotChunkKey decodes hot:chunk:{chunk:08d}. ok is false for any key that
+// is not a well-formed hot-chunk key.
+func ParseHotChunkKey(key string) (chunk.ID, bool) {
+	rest, found := strings.CutPrefix(key, HotChunkPrefix)
+	if !found {
+		return 0, false
+	}
+	n, err := ParsePadded(rest)
+	if err != nil {
+		return 0, false
+	}
+	return chunk.ID(n), true
+}
+
+// ParseTxHashIndexKey decodes index:{idx:08d}:{lo:08d}:{hi:08d}. State is not part
 // of the key; callers fill TxHashIndexCoverage.State from the scanned value.
 func ParseTxHashIndexKey(key string) (TxHashIndexCoverage, bool) {
 	rest, found := strings.CutPrefix(key, TxHashIndexPrefix)
diff --git a/cmd/stellar-rpc/internal/fullhistory/geometry/keys_test.go b/cmd/stellar-rpc/internal/fullhistory/geometry/keys_test.go
index 17685323a..424ca0dff 100644
--- a/cmd/stellar-rpc/internal/fullhistory/geometry/keys_test.go
+++ b/cmd/stellar-rpc/internal/fullhistory/geometry/keys_test.go
@@ -16,7 +16,7 @@ func TestKeyConstructorsMatchSpec(t *testing.T) {
 	require.Equal(t, "chunk:00005350:ledgers", ChunkKey(5350, KindLedgers))
 	require.Equal(t, "chunk:00005350:events", ChunkKey(5350, KindEvents))
 	require.Equal(t, "chunk:00005350:txhash", ChunkKey(5350, KindTxHash))
-	require.Equal(t, "txhash_index:00000005:00005100:00005349", TxHashIndexKey(5, 5100, 5349))
+	require.Equal(t, "index:00000005:00005100:00005349", TxHashIndexKey(5, 5100, 5349))
 }
 
 func TestChunkKeyBijection(t *testing.T) {
@@ -62,12 +62,12 @@ func TestKeyToPathBijection(t *testing.T) {
 
 func TestParseRejectsMalformed(t *testing.T) {
 	bad := []string{
-		"chunk:5350:ledgers",             // not 8-digit padded
-		"chunk:00005350:bogus",           // unknown kind
-		"chunk:00005350",                 // missing kind
-		"txhash_index:00000005:00005100", // too few segments
-		"txhash_index:5:5100:5349",       // not padded
-		"unrelated:key",                  // wrong family
+		"chunk:5350:ledgers",      // not 8-digit padded
+		"chunk:00005350:bogus",    // unknown kind
+		"chunk:00005350",          // missing kind
+		"index:00000005:00005100", // too few segments
+		"index:5:5100:5349",       // not padded
+		"unrelated:key",           // wrong family
 	}
 	for _, key := range bad {
 		_, _, okChunk := ParseChunkKey(key)
@@ -77,10 +77,19 @@ func TestParseRejectsMalformed(t *testing.T) {
 	// Specific rejections.
 	_, _, ok := ParseChunkKey("chunk:00005350:bogus")
 	require.False(t, ok)
-	_, ok2 := ParseTxHashIndexKey("txhash_index:00000005:00005349:00005100") // lo > hi
+	_, ok2 := ParseTxHashIndexKey("index:00000005:00005349:00005100") // lo > hi
 	require.False(t, ok2)
 }
 
 func TestIndexKeyPanicsOnLoGreaterThanHi(t *testing.T) {
 	require.Panics(t, func() { TxHashIndexKey(5, 5349, 5100) })
 }
+
+func TestHotKeyBijection(t *testing.T) {
+	for _, id := range []chunk.ID{0, 7, 5350} {
+		key := HotChunkKey(id)
+		got, ok := ParseHotChunkKey(key)
+		require.True(t, ok)
+		require.Equal(t, id, got)
+	}
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/geometry/paths.go b/cmd/stellar-rpc/internal/fullhistory/geometry/paths.go
index 58eb6752b..4f3331dd6 100644
--- a/cmd/stellar-rpc/internal/fullhistory/geometry/paths.go
+++ b/cmd/stellar-rpc/internal/fullhistory/geometry/paths.go
@@ -82,10 +82,18 @@ func (l Layout) LedgerPackPath(c chunk.ID) string {
 	return filepath.Join(l.ledgersRoot, c.BucketID(), ledger.PackName(c))
 }
 
+// EventsBucketDir is a chunk's events cold-segment directory — the bucket dir the
+// three events files (pack, index-pack, index-hash) live under, and the single
+// path the cold events ingester writes into. Sharing it with EventsPaths keeps
+// the events tree's shape defined once.
+func (l Layout) EventsBucketDir(c chunk.ID) string {
+	return filepath.Join(l.eventsRoot, c.BucketID())
+}
+
 // EventsPaths are a chunk's three events cold-segment files. Leaves owned by
 // eventstore.*.
 func (l Layout) EventsPaths(c chunk.ID) []string {
-	dir := filepath.Join(l.eventsRoot, c.BucketID())
+	dir := l.EventsBucketDir(c)
 	return []string{
 		filepath.Join(dir, eventstore.EventsPackName(c)),
 		filepath.Join(dir, eventstore.IndexPackName(c)),
diff --git a/cmd/stellar-rpc/internal/fullhistory/geometry/txhash_index.go b/cmd/stellar-rpc/internal/fullhistory/geometry/txhash_index.go
index 14f7a99f0..b63164925 100644
--- a/cmd/stellar-rpc/internal/fullhistory/geometry/txhash_index.go
+++ b/cmd/stellar-rpc/internal/fullhistory/geometry/txhash_index.go
@@ -80,19 +80,3 @@ func (l TxHashIndexLayout) LastChunk(id TxHashIndexID) chunk.ID {
 func (l TxHashIndexLayout) IsTerminalCoverage(cov TxHashIndexCoverage) bool {
 	return cov.Hi == l.LastChunk(cov.Index)
 }
-
-// LastCompleteChunkAt is the inverse of chunk.ID.LastLedger: the largest chunk
-// whose last ledger is <= ledger. Returns SIGNED int64 so a sub-genesis ledger
-// (the sub-genesis sentinel) maps to -1 ("before the first chunk") rather than
-// wrapping; the cast-before-subtract keeps it in int64 (uint32 ledger-1 would
-// underflow for ledger 0).
-func LastCompleteChunkAt(ledger uint32) int64 {
-	return (int64(ledger)+1-int64(chunk.FirstLedgerSeq))/int64(chunk.LedgersPerChunk) - 1
-}
-
-// ChunkFirstLedger maps a non-negative signed chunk index to its first ledger.
-// It is the signed-domain companion of chunk.ID.FirstLedger used by
-// retentionFloorChunk after the max(..., 0) clamp.
-func ChunkFirstLedger(c int64) uint32 {
-	return chunk.ID(c).FirstLedger() //nolint:gosec // c >= 0 (clamped) and bounded by real chunk ids
-}
diff --git a/cmd/stellar-rpc/internal/fullhistory/helpers_test.go b/cmd/stellar-rpc/internal/fullhistory/helpers_test.go
index d7dc16241..7d6b3da31 100644
--- a/cmd/stellar-rpc/internal/fullhistory/helpers_test.go
+++ b/cmd/stellar-rpc/internal/fullhistory/helpers_test.go
@@ -93,21 +93,30 @@ func newRecordingMetrics() *recordingMetrics {
 	return &recordingMetrics{gaugesSet: map[string]int{}}
 }
 
-func (r *recordingMetrics) LastCommitted(uint32, uint32) {
+func (r *recordingMetrics) LastCommitted(uint32) {
 	r.mu.Lock()
 	defer r.mu.Unlock()
 	r.gaugesSet["last_committed"]++
 }
 
+func (r *recordingMetrics) RetentionFloor(uint32) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	r.gaugesSet["retention_floor"]++
+}
+
 func (r *recordingMetrics) BackfillPass(time.Duration) {
 	r.mu.Lock()
 	defer r.mu.Unlock()
 	r.backfillPasses++
 }
 
-func (*recordingMetrics) Freeze(time.Duration)     {}
-func (*recordingMetrics) Rebuild(time.Duration)    {}
-func (*recordingMetrics) Prune(int, time.Duration) {}
+func (*recordingMetrics) ChunkBoundary()             {}
+func (*recordingMetrics) Freeze(time.Duration)       {}
+func (*recordingMetrics) Rebuild(time.Duration)      {}
+func (*recordingMetrics) Prune(int, time.Duration)   {}
+func (*recordingMetrics) LiveHotChunks(int)          {}
+func (*recordingMetrics) Discard(int, time.Duration) {}
 
 var _ observability.Metrics = (*recordingMetrics)(nil)
 
diff --git a/cmd/stellar-rpc/internal/fullhistory/hotloop.go b/cmd/stellar-rpc/internal/fullhistory/hotloop.go
new file mode 100644
index 000000000..3b1ac2487
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/hotloop.go
@@ -0,0 +1,210 @@
+package fullhistory
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"github.com/stellar/go-stellar-sdk/ingest/ledgerbackend"
+	supportlog "github.com/stellar/go-stellar-sdk/support/log"
+	"github.com/stellar/go-stellar-sdk/xdr"
+
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/ingest"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/observability"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk"
+)
+
+// The hot-DB ingestion loop (decision (a)). One goroutine consumes a single
+// sequence-validated ledger stream into the per-chunk shared multi-CF hot DB,
+// committing each ledger as one atomic synced WriteBatch across all CFs. It keeps
+// NO progress variable — the last synced batch IS the last-committed ledger,
+// re-derived at startup. Its only coupling to the lifecycle is the boundary
+// signal: at each boundary it publishes the just-completed chunk id (the two
+// goroutines share no memory). Clean-shutdown vs crash is decided at the daemon
+// top level (a ctx-canceled return is clean).
+
+// openHotDBForChunk opens/recovers/creates the chunk's shared hot DB, keyed on
+// the durable hot:chunk state:
+//   - "ready": open it must-exist (create-if-missing OFF). A missing or gutted DB
+//     FAILS the open — never auto-heal into a fresh empty DB (which would silently
+//     regress the watermark). The open failure is an ordinary restartable error:
+//     a transient self-heals on the next attempt, genuine loss becomes a
+//     supervised crash-loop with the wrapped context.
+//   - "transient" or absent: wipe any leftover dir and create fresh
+//     (transient -> fsync dir+parent -> ready), so a crash mid-create can't
+//     fabricate a "ready but DB gone" open failure above.
+func openHotDBForChunk(cat *catalog.Catalog, chunkID chunk.ID, logger *supportlog.Entry) (*hotchunk.DB, error) {
+	dir := cat.Layout().HotChunkPath(chunkID)
+
+	state, err := cat.HotState(chunkID)
+	if err != nil {
+		return nil, fmt.Errorf("read hot state chunk %s: %w", chunkID, err)
+	}
+
+	if state == geometry.HotReady {
+		db, openErr := hotchunk.OpenExisting(dir, chunkID, logger)
+		if openErr != nil {
+			return nil, fmt.Errorf("chunk %s is %q but its hot DB won't open: %w", chunkID, geometry.HotReady, openErr)
+		}
+		return db, nil
+	}
+
+	// "transient" or absent: wipe any leftover dir, then create fresh under the bracket.
+	if rmErr := os.RemoveAll(dir); rmErr != nil {
+		return nil, fmt.Errorf("wipe leftover hot dir %s: %w", dir, rmErr)
+	}
+	if putErr := cat.PutHotTransient(chunkID); putErr != nil {
+		return nil, fmt.Errorf("mark hot transient chunk %s: %w", chunkID, putErr)
+	}
+
+	db, openErr := hotchunk.Open(dir, chunkID, logger)
+	if openErr != nil {
+		return nil, fmt.Errorf("create hot DB chunk %s: %w", chunkID, openErr)
+	}
+
+	// The dir + dirent must be durable BEFORE the key flips to "ready", else a
+	// crash between the flip and the dir's durability fabricates the "ready but
+	// dir missing" won't-open error above for a DB that was actually fine. FsyncNewDirs
+	// syncs the leaf then its parent dirent (the one audited barrier for a
+	// freshly created dir).
+	if syncErr := geometry.FsyncNewDirs(filepath.Dir(dir), dir); syncErr != nil {
+		_ = db.Close()
+		return nil, fmt.Errorf("fsync hot dir %s: %w", dir, syncErr)
+	}
+	if flipErr := cat.FlipHotReady(chunkID); flipErr != nil {
+		_ = db.Close()
+		return nil, fmt.Errorf("flip hot ready chunk %s: %w", chunkID, flipErr)
+	}
+	return db, nil
+}
+
+// boundaryPublisher is the ingestion loop's handoff sink: it publishes the
+// just-completed chunk id to the lifecycle at each boundary.
+// *lifecycle.BoundarySignal is the production impl; tests inject a recorder.
+type boundaryPublisher interface {
+	Publish(c chunk.ID)
+}
+
+// ingestionLoopConfig bundles the ingestion loop's dependencies. run() opens the
+// resume chunk's hot DB (HotDB) BEFORE serving reads — so a broken hot tier fails
+// startup instead of serving behind a crash-looping loop — and hands the open
+// handle in; the loop's first deferred statement takes ownership of the close, and
+// it reopens the DB itself at every boundary (Catalog + Logger).
+type ingestionLoopConfig struct {
+	Stream   ledgerbackend.LedgerStream
+	Resume   uint32
+	HotDB    *hotchunk.DB
+	Catalog  *catalog.Catalog
+	Boundary boundaryPublisher
+	Logger   *supportlog.Entry
+	Metrics  observability.Metrics
+	Sink     ingest.MetricSink
+}
+
+// runIngestionLoop is the hot tier's OWNER: the single goroutine that opens,
+// writes, closes, and hands off the per-chunk hot DBs. It consumes ONE continuous
+// sequence-validated ledger stream from Resume (the stream owns the captive-core
+// process — started on the first pull, torn down when this loop exits), commits
+// each ledger as one atomic synced WriteBatch (decision (a)), and at each chunk
+// boundary closes the just-filled DB, opens the next, and publishes the completed
+// chunk to the lifecycle. A ctx-canceled return is a clean shutdown; any other
+// error is RESTARTABLE (startup re-derives the last-committed ledger, losing nothing).
+//
+// HANDOFF FENCE: the DB is CLOSED before the next chunk's hot:chunk key is created
+// — that key is what makes THIS chunk complete to the lifecycle, which could then
+// discard a dir a still-live writer holds. Publish fires only after the next DB is
+// open. The HotService is rebuilt each boundary.
+//
+// LIVE-CHUNK EXCLUSION: this loop is the SOLE writer of a chunk's hot DB and
+// closes it before publishing the chunk complete (the fence above); the lifecycle
+// only ever opens chunks at or below the highest complete one — strictly below the
+// live chunk. Those opens are read-only, which takes no RocksDB LOCK, so
+// writer/reader separation is a construction invariant here, not a lock readers
+// rely on.
+func runIngestionLoop(ctx context.Context, cfg ingestionLoopConfig) (err error) {
+	metrics := observability.MetricsOrNop(cfg.Metrics)
+
+	// Take ownership of the resume hot DB run() opened (before serving reads) as the
+	// loop's FIRST statement, so the deferred close sits ahead of any early return —
+	// no ownership gap for a transient failure to leak the handle (and its RocksDB
+	// LOCK) through. The loop is this DB's single writer and reopens it at every
+	// boundary; the defer closes whatever handle is live on any exit (the boundary
+	// handoff already closed every prior chunk's DB), and no writer races the close
+	// (the loop has stopped on every exit path).
+	hotDB := cfg.HotDB
+	defer func() {
+		if hotDB != nil {
+			if cerr := hotDB.Close(); cerr != nil && err == nil {
+				err = fmt.Errorf("close live hot DB: %w", cerr)
+			}
+		}
+	}()
+
+	// hotService binds the metrics sink to THIS hotDB instance; the boundary handoff
+	// rebuilds it for the reopened chunk DB below.
+	hotService := ingest.NewHotService(hotDB, cfg.Sink)
+
+	// One continuous stream from the resume ledger, consumed on a local sequence
+	// counter. The in-order contract is enforced at the SOURCE — captive core (and
+	// every SDK backend) validates its own output — so the loop trusts the counter
+	// rather than re-parsing each view's sequence. A stream / decode error ends the
+	// loop for the daemon to classify.
+	seq := cfg.Resume
+	for raw, verr := range cfg.Stream.RawLedgers(ctx, ledgerbackend.UnboundedRange(cfg.Resume)) {
+		if verr != nil {
+			return fmt.Errorf("ingestion stream: %w", verr)
+		}
+
+		// One atomic synced WriteBatch across all hot CFs (via hotDB.IngestLedger).
+		if ierr := hotService.Ingest(ctx, seq, xdr.LedgerCloseMetaView(raw)); ierr != nil {
+			return fmt.Errorf("ingest ledger %d: %w", seq, ierr)
+		}
+		// The ingestion loop owns the last-committed gauge: this is the TRUE
+		// committed ledger (mid-chunk included), one atomic gauge set per ledger.
+		// The tick must not touch it — its chunk-aligned value would regress it.
+		metrics.LastCommitted(seq)
+
+		// Chunk boundary: this seq is the chunk's last ledger.
+		if closed := chunk.IDFromLedger(seq); seq == closed.LastLedger() {
+			next := closed + 1
+			// Handoff fence: close the write handle BEFORE the next chunk's key is
+			// created (that key is what makes THIS chunk complete to a tick, which may
+			// then freeze and discard its hot DB — no writer may hold it then).
+			if cerr := hotDB.Close(); cerr != nil {
+				hotDB = nil // closed (failed) — do not double-close in defer
+				return fmt.Errorf("close hot DB at boundary chunk %s: %w", closed, cerr)
+			}
+			hotDB = nil // released; reopen below republishes it for the defer
+
+			nextDB, oerr := openHotDBForChunk(cfg.Catalog, next, cfg.Logger)
+			if oerr != nil {
+				return fmt.Errorf("open hot DB for chunk %s at boundary: %w", next, oerr)
+			}
+			hotDB = nextDB
+			hotService = ingest.NewHotService(hotDB, cfg.Sink)
+			// next's key (created inside openHotDBForChunk) moved the partition; only
+			// now publish the completed chunk to the lifecycle.
+			cfg.Boundary.Publish(closed)
+
+			// Boundary observability (the woken tick reports the freeze/discard/prune).
+			metrics.ChunkBoundary()
+			cfg.Logger.WithField("closed_chunk", closed.String()).
+				WithField("next_chunk", next.String()).
+				WithField("last_ledger", seq).
+				Info("streaming: ingestion chunk boundary — handed off to lifecycle")
+		}
+		seq++
+	}
+	// The unbounded production stream ends only on ctx cancellation or a source
+	// error, both surfaced as the cursor's error element above. Falling through here
+	// means the source stopped WITHOUT an error while the daemon ctx is still live —
+	// unexpected for captive core; surface it as a restartable error rather than a
+	// nil return, which supervise would read as a clean shutdown and silently stop
+	// ingesting.
+	return errors.New("ingestion stream ended unexpectedly (source stopped with no error)")
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/hotloop_test.go b/cmd/stellar-rpc/internal/fullhistory/hotloop_test.go
new file mode 100644
index 000000000..6f7b62ec1
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/hotloop_test.go
@@ -0,0 +1,416 @@
+package fullhistory
+
+import (
+	"context"
+	"errors"
+	"iter"
+	"os"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/stellar/go-stellar-sdk/ingest/ledgerbackend"
+
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk"
+)
+
+// ---------------------------------------------------------------------------
+// fakeCoreStream — an injectable ledgerbackend.LedgerStream the ingestion loop
+// consumes (the design's raw captive-core stream). RawLedgers yields programmed
+// frames contiguously from the range's From(); once it runs past the last
+// programmed seq it either blocks until ctx is canceled (a live tip stream that
+// only ends on shutdown) or yields endErr (a crashed backend). It records the
+// FIRST seq it was asked for (the loop's resume point) and a per-seq consideration
+// count so a test can wait for the loop to reach the blocking pull.
+// ---------------------------------------------------------------------------
+
+type fakeCoreStream struct {
+	frames     map[uint32][]byte // seq -> raw LCM bytes
+	blockOnCtx bool              // past the last frame, block until ctx.Done
+	endErr     error             // past the last frame, yield this (when not blocking)
+	yieldErrAt uint32            // if non-zero, yield errAt at this seq instead of bytes
+	errAt      error
+
+	calls     atomic.Int32 // seqs yielded by the stream
+	firstSeen atomic.Uint32
+	sawFirst  atomic.Bool
+}
+
+var _ ledgerbackend.LedgerStream = (*fakeCoreStream)(nil)
+
+func (s *fakeCoreStream) RawLedgers(
+	ctx context.Context, r ledgerbackend.Range, _ ...ledgerbackend.StreamOption,
+) iter.Seq2[[]byte, error] {
+	return func(yield func([]byte, error) bool) {
+		if s.sawFirst.CompareAndSwap(false, true) {
+			s.firstSeen.Store(r.From())
+		}
+		for seq := r.From(); ; seq++ {
+			s.calls.Add(1)
+			if ctx.Err() != nil {
+				yield(nil, ctx.Err())
+				return
+			}
+			if s.yieldErrAt != 0 && seq == s.yieldErrAt {
+				yield(nil, s.errAt)
+				return
+			}
+			if raw, ok := s.frames[seq]; ok {
+				if !yield(raw, nil) {
+					return
+				}
+				continue
+			}
+			// Past the programmed frames.
+			if s.blockOnCtx {
+				<-ctx.Done()
+				yield(nil, ctx.Err())
+				return
+			}
+			if s.endErr != nil {
+				yield(nil, s.endErr)
+				return
+			}
+			yield(nil, errors.New("fakeCoreStream: no frame for seq"))
+			return
+		}
+	}
+}
+
+// streamForSeqs builds a fakeCoreStream with zero-tx LCM frames for [from,to].
+func streamForSeqs(t *testing.T, from, to uint32) *fakeCoreStream {
+	t.Helper()
+	s := &fakeCoreStream{frames: map[uint32][]byte{}}
+	for seq := from; seq <= to; seq++ {
+		s.frames[seq] = zeroTxLCMBytes(t, seq)
+	}
+	return s
+}
+
+// recordingBoundary is a test boundaryPublisher capturing the completed chunk ids
+// the loop publishes at each boundary, so a test can assert the handoff without
+// wiring a real lifecycle Loop.
+type recordingBoundary struct {
+	mu  sync.Mutex
+	ids []chunk.ID
+}
+
+func (r *recordingBoundary) Publish(c chunk.ID) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	r.ids = append(r.ids, c)
+}
+
+func (r *recordingBoundary) list() []chunk.ID {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	return append([]chunk.ID(nil), r.ids...)
+}
+
+// loopConfig builds an ingestionLoopConfig for a test: the stream + resume point +
+// a recording boundary, and opens the resume chunk's hot DB the way run() does now
+// (the loop takes ownership and closes it). The test must hold no other handle on
+// that dir while the loop runs (a second read-write open would contend the LOCK).
+func loopConfig(
+	t *testing.T, stream ledgerbackend.LedgerStream, cat *catalog.Catalog, resume uint32,
+) (ingestionLoopConfig, *recordingBoundary) {
+	t.Helper()
+	rec := &recordingBoundary{}
+	db, err := openHotDBForChunk(cat, chunk.IDFromLedger(resume), silentLogger())
+	require.NoError(t, err)
+	return ingestionLoopConfig{
+		Stream:   stream,
+		Resume:   resume,
+		HotDB:    db,
+		Catalog:  cat,
+		Boundary: rec,
+		Logger:   silentLogger(),
+	}, rec
+}
+
+// impliedResume is the resume point a hot DB's durable watermark implies — one past
+// its last committed ledger, or the chunk's first ledger when empty. Production no
+// longer derives this in the loop (it trusts the resume run() passes it), but tests
+// still assert that a restart's durable watermark matches what startup would derive.
+func impliedResume(t *testing.T, db *hotchunk.DB) uint32 {
+	t.Helper()
+	maxSeq, ok, err := db.MaxCommittedSeq()
+	require.NoError(t, err)
+	if !ok {
+		return db.ChunkID().FirstLedger()
+	}
+	return maxSeq + 1
+}
+
+// openLiveHotDB opens (and brackets ready) the live hot DB for a chunk via the
+// production opener, returning the handle and the catalog it lives under.
+func openLiveHotDB(t *testing.T, cat *catalog.Catalog, c chunk.ID) *hotchunk.DB {
+	t.Helper()
+	db, err := openHotDBForChunk(cat, c, silentLogger())
+	require.NoError(t, err)
+	return db
+}
+
+// seedWatermark commits real zero-tx LCMs for [FirstLedger, seq] into chunk c's
+// hot DB through the production IngestLedger path (the events CF requires strict
+// ledger contiguity from the chunk's first ledger), then CLOSES the handle —
+// leaving the chunk "ready" on disk with NO open handle, so the loop can open it
+// itself. Returns the resume point (seq+1) a boundary test drives the loop from.
+// Seeding a near-full chunk costs one synced commit per ledger, so its callers run
+// t.Parallel().
+func seedWatermark(t *testing.T, cat *catalog.Catalog, c chunk.ID, seq uint32) uint32 {
+	t.Helper()
+	db := openLiveHotDB(t, cat, c)
+	for s := c.FirstLedger(); s <= seq; s++ {
+		_, err := db.IngestLedger(s, zeroTxLCMBytes(t, s))
+		require.NoError(t, err)
+	}
+	require.NoError(t, db.Close())
+	return seq + 1
+}
+
+// ---------------------------------------------------------------------------
+// openHotDBForChunk — the bracket's open end.
+// ---------------------------------------------------------------------------
+
+// TestOpenHotTier_CreatesBracketAndDir: a fresh open writes the dir and flips
+// the key "ready"; the returned DB is empty (resume at FirstLedger).
+func TestOpenHotTier_CreatesBracketAndDir(t *testing.T) {
+	cat, _ := testCatalog(t)
+	c := chunk.ID(3)
+
+	db, err := openHotDBForChunk(cat, c, silentLogger())
+	require.NoError(t, err)
+	t.Cleanup(func() { _ = db.Close() })
+
+	state, err := cat.HotState(c)
+	require.NoError(t, err)
+	assert.Equal(t, geometry.HotReady, state, "open flips the key ready")
+
+	_, statErr := os.Stat(cat.Layout().HotChunkPath(c))
+	require.NoError(t, statErr, "the dir exists")
+
+	assert.Equal(t, c.FirstLedger(), impliedResume(t, db), "an empty resume DB resumes at the chunk's first ledger")
+}
+
+// TestOpenHotTier_ReadyButDirMissingFailsOpen: a "ready" key whose DB is gone
+// FAILS the must-exist open (never auto-healed into a fresh empty DB). The error
+// is ordinary/restartable — no sentinel.
+func TestOpenHotTier_ReadyButDirMissingFailsOpen(t *testing.T) {
+	cat, _ := testCatalog(t)
+	c := chunk.ID(5)
+	require.NoError(t, cat.PutHotTransient(c))
+	require.NoError(t, cat.FlipHotReady(c)) // key says ready, but no dir created
+
+	_, err := openHotDBForChunk(cat, c, silentLogger())
+	require.Error(t, err)
+}
+
+// TestOpenHotTier_TransientRecreatesFresh: a "transient" key (crashed
+// create/discard) is recovered by wiping any leftover and recreating.
+func TestOpenHotTier_TransientRecreatesFresh(t *testing.T) {
+	cat, _ := testCatalog(t)
+	c := chunk.ID(2)
+	require.NoError(t, cat.PutHotTransient(c)) // a crash left a transient key
+
+	db, err := openHotDBForChunk(cat, c, silentLogger())
+	require.NoError(t, err)
+	t.Cleanup(func() { _ = db.Close() })
+
+	state, err := cat.HotState(c)
+	require.NoError(t, err)
+	assert.Equal(t, geometry.HotReady, state)
+}
+
+// ---------------------------------------------------------------------------
+// runIngestionLoop — atomic landing.
+// ---------------------------------------------------------------------------
+
+// TestRunIngestionLoop_LedgerLandsAcrossAllCFs: polling a short contiguous
+// prefix lands each ledger atomically across the ledgers, txhash, and events
+// CFs — the single watermark advances to the last committed seq, and every CF
+// is readable. The getter then errs (backend crash), which the loop returns.
+func TestRunIngestionLoop_LedgerLandsAcrossAllCFs(t *testing.T) {
+	cat, _ := testCatalog(t)
+	c := chunk.ID(0)
+	first := c.FirstLedger()
+
+	// A short contiguous prefix from the chunk's first ledger (events require
+	// strict contiguity from FirstLedger), then the stream runs dry and errs. The
+	// loop opens the empty chunk 0 itself and resumes at its first ledger.
+	stream := streamForSeqs(t, first, first+2)
+	stream.endErr = errors.New("backend crashed")
+	cfg, _ := loopConfig(t, stream, cat, first)
+
+	err := runIngestionLoop(context.Background(), cfg)
+	require.Error(t, err, "stream ran past the prefix and errored")
+
+	// Reopen the (loop-closed) DB and assert every CF advanced together.
+	reopened, err := hotchunk.Open(cat.Layout().HotChunkPath(c), c, silentLogger())
+	require.NoError(t, err)
+	t.Cleanup(func() { _ = reopened.Close() })
+
+	maxSeq, ok, err := reopened.MaxCommittedSeq()
+	require.NoError(t, err)
+	require.True(t, ok)
+	assert.Equal(t, first+2, maxSeq, "the single watermark is the last committed seq")
+
+	raw, err := reopened.Ledgers().GetLedgerRaw(first + 2)
+	require.NoError(t, err)
+	assert.NotEmpty(t, raw)
+	assert.Equal(t, uint32(0), eventCount(t, reopened.Events()), "zero-tx ledgers carry no events")
+}
+
+// ---------------------------------------------------------------------------
+// runIngestionLoop — boundary notifications carry the completed chunk id.
+// ---------------------------------------------------------------------------
+
+// TestRunIngestionLoop_BoundaryNotifiesCompletedChunk: crossing the chunk 0 -> 1
+// boundary publishes chunk 0 to the lifecycle. The watermark is seeded just below
+// the boundary so the stream crosses it in one step.
+func TestRunIngestionLoop_BoundaryNotifiesCompletedChunk(t *testing.T) {
+	t.Parallel() // seeds a near-full chunk (one synced commit per ledger)
+	cat, _ := testCatalog(t)
+	c := chunk.ID(0)
+	c1 := c + 1
+	resume := seedWatermark(t, cat, c, c.LastLedger()-1) // == c.LastLedger()
+
+	stream := &fakeCoreStream{frames: map[uint32][]byte{
+		c.LastLedger():   zeroTxLCMBytes(t, c.LastLedger()),   // boundary 0->1
+		c1.FirstLedger(): zeroTxLCMBytes(t, c1.FirstLedger()), // a ledger in chunk 1
+	}, endErr: errors.New("end")}
+	cfg, rec := loopConfig(t, stream, cat, resume)
+
+	done := make(chan error, 1)
+	go func() {
+		done <- runIngestionLoop(context.Background(), cfg)
+	}()
+
+	select {
+	case err := <-done:
+		require.Error(t, err, "stream ran dry")
+	case <-time.After(10 * time.Second):
+		t.Fatal("ingestion loop deadlocked")
+	}
+
+	assert.Equal(t, []chunk.ID{c}, rec.list(), "the completed chunk id was published at the boundary")
+}
+
+// ---------------------------------------------------------------------------
+// runIngestionLoop — clean shutdown vs crash (classified at the daemon top
+// level: ctx-canceled return is clean, any other error is restartable).
+// ---------------------------------------------------------------------------
+
+// TestRunIngestionLoop_CtxCancelReturnsCtxErr: a ctx cancellation while the stream
+// is blocking on the tip makes RawLedgers yield ctx.Err(); the loop returns that
+// (the daemon top level classifies a ctx-canceled return as a clean shutdown).
+func TestRunIngestionLoop_CtxCancelReturnsCtxErr(t *testing.T) {
+	cat, _ := testCatalog(t)
+	c := chunk.ID(0)
+	first := c.FirstLedger()
+
+	stream := streamForSeqs(t, first, first+1)
+	stream.blockOnCtx = true // after the frames, behave like a live tip stream
+	cfg, _ := loopConfig(t, stream, cat, first)
+	ctx, cancel := context.WithCancel(context.Background())
+
+	done := make(chan error, 1)
+	go func() {
+		done <- runIngestionLoop(ctx, cfg)
+	}()
+
+	require.Eventually(t, func() bool {
+		return stream.calls.Load() >= 3 // ingested 2 frames, blocked on the 3rd
+	}, 5*time.Second, 5*time.Millisecond)
+	cancel()
+
+	select {
+	case err := <-done:
+		require.Error(t, err)
+		require.ErrorIs(t, err, context.Canceled, "the loop surfaces the ctx-canceled stream error")
+	case <-time.After(10 * time.Second):
+		t.Fatal("ingestion loop did not stop on ctx cancellation")
+	}
+}
+
+// TestRunIngestionLoop_StreamErrorReturnsError: a stream error (not a shutdown)
+// propagates as a restartable failure.
+func TestRunIngestionLoop_StreamErrorReturnsError(t *testing.T) {
+	cat, _ := testCatalog(t)
+	c := chunk.ID(0)
+	first := c.FirstLedger()
+
+	boom := errors.New("backend exploded")
+	stream := streamForSeqs(t, first, first)
+	stream.yieldErrAt = first + 1
+	stream.errAt = boom
+	cfg, _ := loopConfig(t, stream, cat, first)
+
+	err := runIngestionLoop(context.Background(), cfg)
+	require.Error(t, err)
+	require.ErrorIs(t, err, boom)
+}
+
+// ---------------------------------------------------------------------------
+// runIngestionLoop — restart resumes idempotently from the derived watermark.
+// ---------------------------------------------------------------------------
+
+// TestRunIngestionLoop_RestartResumesFromWatermark: after a first run commits a
+// prefix and exits, a second run over a FRESH open of the SAME hot dir resumes at
+// watermark+1 (asserted via the FIRST seq the stream is asked for) — the stream
+// range starts at the derived resume, and the final watermark is exactly the last
+// delivered seq.
+func TestRunIngestionLoop_RestartResumesFromWatermark(t *testing.T) {
+	cat, _ := testCatalog(t)
+	c := chunk.ID(0)
+	first := c.FirstLedger()
+
+	// First run: loopConfig opens empty chunk 0 (resumes at first), the loop commits
+	// [first, first+2], then the stream errs.
+	stream1 := streamForSeqs(t, first, first+2)
+	stream1.endErr = errors.New("end")
+	cfg1, _ := loopConfig(t, stream1, cat, first)
+	err := runIngestionLoop(context.Background(), cfg1)
+	require.Error(t, err)
+	assert.Equal(t, first, stream1.firstSeen.Load(), "first run resumed at the chunk's first ledger")
+
+	// The durable watermark now implies resume first+3 — exactly what startup would
+	// derive on restart. Close the handle before the loop reopens the dir.
+	db2, err := openHotDBForChunk(cat, c, silentLogger())
+	require.NoError(t, err)
+	resume := impliedResume(t, db2)
+	assert.Equal(t, first+3, resume, "restart resumes one past the durable watermark")
+	require.NoError(t, db2.Close())
+
+	// Second run resumes at the derived watermark and commits two more ledgers.
+	stream2 := streamForSeqs(t, first+3, first+5)
+	stream2.endErr = errors.New("end")
+	cfg2, _ := loopConfig(t, stream2, cat, resume)
+	err = runIngestionLoop(context.Background(), cfg2)
+	require.Error(t, err)
+	assert.Equal(t, first+3, stream2.firstSeen.Load(), "second run resumed at watermark+1")
+
+	reopened, err := hotchunk.Open(cat.Layout().HotChunkPath(c), c, silentLogger())
+	require.NoError(t, err)
+	t.Cleanup(func() { _ = reopened.Close() })
+	maxSeq, ok, err := reopened.MaxCommittedSeq()
+	require.NoError(t, err)
+	require.True(t, ok)
+	assert.Equal(t, first+5, maxSeq)
+}
+
+// eventCount reads the hot events store's committed event count, failing the
+// test on the (close-only) error the Reader contract allows.
+func eventCount(t *testing.T, r interface{ EventCount() (uint32, error) }) uint32 {
+	t.Helper()
+	n, err := r.EventCount()
+	require.NoError(t, err)
+	return n
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/config.go b/cmd/stellar-rpc/internal/fullhistory/ingest/config.go
index 139f70d43..014c554c6 100644
--- a/cmd/stellar-rpc/internal/fullhistory/ingest/config.go
+++ b/cmd/stellar-rpc/internal/fullhistory/ingest/config.go
@@ -3,8 +3,7 @@ package ingest
 import "errors"
 
 // Config selects which data types the ingest drivers write. At least one of
-// Ledgers/Txhash/Events must be enabled. Per-ledger hot fan-out is always
-// parallel; that is not configurable.
+// Ledgers/Txhash/Events must be enabled.
 //
 // The view-based event path derives payloads from the LedgerCloseMetaView and
 // needs no network passphrase, so Config carries no passphrase.
diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/doc.go b/cmd/stellar-rpc/internal/fullhistory/ingest/doc.go
index 5667214d9..4eeb79f70 100644
--- a/cmd/stellar-rpc/internal/fullhistory/ingest/doc.go
+++ b/cmd/stellar-rpc/internal/fullhistory/ingest/doc.go
@@ -8,12 +8,12 @@
 // Two tiers share the per-ledger extraction but differ in everything
 // else:
 //
-//   - Hot (RunHot): one chunk into the long-lived, caller-owned hot
-//     stores, from an injected ledgerbackend.LedgerStream. The stores
-//     are INJECTED and never opened or closed here, and neither is the
-//     stream; each ledger is durable before the next is pulled.
-//     Per-ledger fan-out across the enabled ingesters is concurrent
-//     (HotService).
+//   - Hot (HotService): one ledger at a time into the long-lived,
+//     caller-owned per-chunk hot DB, driven by the daemon's live
+//     ingestion loop. The DB is INJECTED and never opened or closed
+//     here. Each ledger is written as ONE atomic synced WriteBatch
+//     across all column families (decision (a) — no per-type fan-out),
+//     so a ledger is fully present or absent before the next is pulled.
 //   - Cold (WriteColdChunk): one chunk into per-chunk cold artifacts
 //     (ledger .pack, txhash .bin, events pack+index). It is
 //     SOURCE-BLIND — the caller resolves the chunk's ledger source and
@@ -58,7 +58,7 @@
 //
 // Inputs are borrowed: every Ingest receives a view over the source
 // stream's buffer, valid only until the next ledger is pulled, and
-// each ingester copies what it retains (see HotIngester). The raw
+// each ingester copies what it retains (see ColdIngester). The raw
 // ledger iterator's contract includes yielding an error on ctx
 // cancellation — the drain loop relies on it for cancellation rather
 // than polling ctx itself. Metrics flow through MetricSink (Prometheus in prod,
diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/driver.go b/cmd/stellar-rpc/internal/fullhistory/ingest/driver.go
index 417cc2d37..7c73ad0f3 100644
--- a/cmd/stellar-rpc/internal/fullhistory/ingest/driver.go
+++ b/cmd/stellar-rpc/internal/fullhistory/ingest/driver.go
@@ -7,80 +7,19 @@ import (
 	"iter"
 	"time"
 
-	"github.com/stellar/go-stellar-sdk/ingest/ledgerbackend"
 	supportlog "github.com/stellar/go-stellar-sdk/support/log"
 	"github.com/stellar/go-stellar-sdk/xdr"
 
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore"
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger"
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash"
 )
 
-// HotStores holds the long-lived, caller-owned hot stores injected into RunHot.
-// The caller (the daemon) opens and closes these; RunHot only borrows them to
-// build the per-type hot ingesters. A field left nil for an enabled data type is
-// a configuration error caught by RunHot. Every hot store is chunk-bound (each
-// instance accumulates exactly one chunk before being frozen into cold
-// artifacts), so each injected store must already be bound to the chunk being
-// ingested — RunHot rejects a mismatch up front.
-type HotStores struct {
-	Ledgers *ledger.HotStore
-	Txhash  *txhash.HotStore
-	Events  *eventstore.HotStore
-}
-
-// buildHotIngesters constructs one HotIngester per data type enabled in cfg, in
-// canonical ledgers→txhash→events order, from the injected stores. It errors if
-// an enabled type's store is nil.
-func buildHotIngesters(stores HotStores, sink MetricSink, cfg Config) ([]HotIngester, error) {
-	var ings []HotIngester
-	if cfg.Ledgers {
-		if stores.Ledgers == nil {
-			return nil, errors.New("ingest: Ledgers enabled but HotStores.Ledgers is nil")
-		}
-		ings = append(ings, NewLedgerHotIngester(stores.Ledgers, sink))
-	}
-	if cfg.Txhash {
-		if stores.Txhash == nil {
-			return nil, errors.New("ingest: Txhash enabled but HotStores.Txhash is nil")
-		}
-		ings = append(ings, NewTxhashHotIngester(stores.Txhash, sink))
-	}
-	if cfg.Events {
-		if stores.Events == nil {
-			return nil, errors.New("ingest: Events enabled but HotStores.Events is nil")
-		}
-		ings = append(ings, NewEventsHotIngester(stores.Events, sink))
-	}
-	return ings, nil
-}
-
-// errColdBuildAborted is the synthetic error recorded against an
-// already-built cold ingester's metric when a LATER constructor fails and the
-// build is rolled back. Without it, closing a fully-built ingester would emit
-// a clean (nil-err, 0-items) ColdIngest — a phantom "success" for a chunk that
-// never actually ingested anything.
-var errColdBuildAborted = errors.New("ingest: cold ingester build aborted (sibling constructor failed)")
-
-// coldAborter is implemented by the concrete cold ingesters so the
-// constructor-rollback path can mark their per-chunk metric as aborted before
-// Close emits it, turning what would be a phantom success into a recorded
-// abort. Optional: an ingester that does not implement it just gets its normal
-// Close emission.
-type coldAborter interface {
-	abortMetric(err error)
-}
-
 // closeColdAll closes every cold ingester built so far, joining each Close error
-// into err. Used when a LATER constructor fails mid-build: the already-built
-// ingesters never ingested anything, so each one's metric is first marked
-// aborted (so the deferred Close emit is not a phantom success).
+// into err. Used when a LATER constructor fails mid-build. The already-built
+// ingesters never ingested or finalized, and Close no longer emits a per-ingester
+// ColdIngest, so a rolled-back build produces no phantom-success sample — no
+// abort bookkeeping needed here.
 func closeColdAll(ings []ColdIngester, err error) error {
 	for _, ing := range ings {
-		if a, ok := ing.(coldAborter); ok {
-			a.abortMetric(errColdBuildAborted)
-		}
 		if cerr := ing.Close(); cerr != nil {
 			err = errors.Join(err, fmt.Errorf("close: %w", cerr))
 		}
@@ -88,105 +27,29 @@ func closeColdAll(ings []ColdIngester, err error) error {
 	return err
 }
 
-// RunHot feeds each ledger of chunkID (as a view) from the injected stream to a
-// HotService over the enabled hot ingesters, built from the INJECTED,
-// caller-owned stores in hotStores. Ingest errors abort fast; HotService.Ingest
-// waits for all ingesters before the loop pulls again so the borrowed view is
-// never read past its lifetime. The hot stores are NOT closed here, and neither
-// is the stream — the caller owns both lifecycles.
-func RunHot(
-	ctx context.Context,
-	logger *supportlog.Entry,
-	stream ledgerbackend.LedgerStream,
-	chunkID chunk.ID,
-	hotStores HotStores,
-	sink MetricSink,
-	cfg Config,
-) error {
-	if verr := cfg.validate(); verr != nil {
-		return verr
-	}
-	// Every hot store is chunk-bound — each instance accumulates exactly one
-	// chunk's data before being frozen into the chunk's cold artifacts — and
-	// records its chunk at open time. An injected store bound to a different
-	// chunk than we're ingesting would silently interleave two chunks' data
-	// (ledgers, txhash) or fail every per-ledger write with an out-of-range
-	// offset (events, whose LedgerOffsets are chunk-relative), so catch the
-	// mismatch up front with a clear message. Nil stores are skipped here:
-	// buildHotIngesters rejects a nil store for an enabled type with a more
-	// specific error.
-	checkBinding := func(name string, got chunk.ID) error {
-		if got != chunkID {
-			return fmt.Errorf("ingest: RunHot chunk %d but injected %s store is bound to chunk %d",
-				uint32(chunkID), name, uint32(got))
-		}
-		return nil
-	}
-	if cfg.Ledgers && hotStores.Ledgers != nil {
-		if err := checkBinding("Ledgers", hotStores.Ledgers.ChunkID()); err != nil {
-			return err
-		}
-	}
-	if cfg.Txhash && hotStores.Txhash != nil {
-		if err := checkBinding("Txhash", hotStores.Txhash.ChunkID()); err != nil {
-			return err
-		}
-	}
-	if cfg.Events && hotStores.Events != nil {
-		if err := checkBinding("Events", hotStores.Events.ChunkID()); err != nil {
-			return err
-		}
-	}
-	ings, berr := buildHotIngesters(hotStores, sink, cfg)
-	if berr != nil {
-		return berr
-	}
-	logger.Debugf("RunHot: ingesting chunk %d [%d, %d]", uint32(chunkID), chunkID.FirstLedger(), chunkID.LastLedger())
-	service := NewHotService(ings, sink)
-	raw := stream.RawLedgers(ctx, ledgerbackend.BoundedRange(chunkID.FirstLedger(), chunkID.LastLedger()))
-	return drain(ctx, raw, chunkID, service)
-}
-
-// drain pulls the chunk's raw ledgers from the iterator and feeds each (as a view)
-// to the service, then verifies the full [first,last] range was consumed. For the
-// cold path this completeness check runs before Finalize, so a short stream never
-// produces a finalized truncated artifact. The caller passes an iterator already
-// bounded to the chunk's range; cancellation is the iterator's job (RawLedgers
-// yields an error once ctx is canceled), so the loop needs no ctx poll of its own.
-func drain(ctx context.Context, ledgers iter.Seq2[[]byte, error], chunkID chunk.ID, ing HotIngester) error {
+// drain feeds each of the chunk's raw ledgers (as a borrowed view) to the
+// service on a local sequence counter, then verifies the full [first,last] range
+// was consumed — for cold this runs before Finalize, so a short stream never
+// finalizes a truncated artifact. The in-order contract is enforced at the SOURCE
+// (packStream reads positionally by key; hotLedgerStream key-checks its own
+// keyspace; the SDK backends validate their own output), so drain trusts the
+// counter rather than re-parsing every view's sequence. Cancellation is the
+// iterator's job (RawLedgers errors on a canceled ctx), so there is no ctx poll
+// here.
+func drain(ctx context.Context, ledgers iter.Seq2[[]byte, error], chunkID chunk.ID, svc *ColdService) error {
 	first, last := chunkID.FirstLedger(), chunkID.LastLedger()
 	seq := first
 	for raw, serr := range ledgers {
 		if serr != nil {
-			return fmt.Errorf("RawLedgers(%d): %w", seq, serr)
+			return fmt.Errorf("ingest: stream for chunk %d: %w", uint32(chunkID), serr)
 		}
-		// Reject a stream that runs PAST the chunk before ingesting anything
-		// out-of-chunk. Without this, an in-order overrun would only trip the
-		// post-loop count check after the extra ledgers were durably ingested
-		// (the ledger and txhash hot stores accept any sequence). All in-repo
-		// sources bound themselves; this guards custom iterators.
+		// Reject a stream that runs PAST the chunk before ingesting out-of-chunk.
+		// All in-repo sources self-bound; this guards a custom iterator.
 		if seq > last {
 			return fmt.Errorf("ingest: stream for chunk %d yielded a ledger past %d (chunk overrun)",
 				uint32(chunkID), last)
 		}
-		lcm := xdr.LedgerCloseMetaView(raw)
-		// Validate the actual ledger sequence before ingesting. The final
-		// count check below only catches a short/long stream; a source that
-		// yields a duplicate or out-of-order ledger with the right total
-		// count would otherwise pass silently (e.g. on the txhash and
-		// ledger-hot paths, which key on the LCM's own seq).
-		actual, aerr := lcm.LedgerSequence()
-		if aerr != nil {
-			return fmt.Errorf("ingest: stream for chunk %d: ledger sequence at expected %d: %w",
-				uint32(chunkID), seq, aerr)
-		}
-		if actual != seq {
-			return fmt.Errorf("ingest: stream for chunk %d yielded ledger %d, expected %d",
-				uint32(chunkID), actual, seq)
-		}
-		// seq is now VALIDATED as lcm's sequence — pass it through so the
-		// ingesters consume it instead of each re-deriving it from the view.
-		if err := ing.Ingest(ctx, seq, lcm); err != nil {
+		if err := svc.Ingest(ctx, seq, xdr.LedgerCloseMetaView(raw)); err != nil {
 			return err
 		}
 		seq++
@@ -197,36 +60,40 @@ func drain(ctx context.Context, ledgers iter.Seq2[[]byte, error], chunkID chunk.
 	return nil
 }
 
-// ColdDirs is the per-type output root for one chunk's cold artifacts. An empty
-// field for an enabled type is a config error.
+// ColdDirs holds ONE chunk's RESOLVED cold-artifact destinations, derived by the
+// caller from geometry.Layout so the ingesters write exactly where the freeze
+// barrier and the sweeps resolve — the path formula lives in Layout alone, never
+// re-derived here. LedgerPack and TxhashBin are the chunk's full file paths;
+// EventsDir is its events bucket dir. An empty field for an enabled type is a
+// config error.
 type ColdDirs struct {
-	Ledgers string
-	Txhash  string
-	Events  string
+	LedgerPack string
+	TxhashBin  string
+	EventsDir  string
 }
 
-// buildColdIngesters opens one ColdIngester per enabled type under its dirs field.
+// buildColdIngesters opens one ColdIngester per enabled type at its resolved path.
 // Single definition site of the ctor table, order, and rollback.
 func buildColdIngesters(dirs ColdDirs, chunkID chunk.ID, sink MetricSink, cfg Config) ([]ColdIngester, error) {
 	ctors := []struct {
 		enabled  bool
 		dataType string
-		dir      string
+		path     string
 		open     func(string, chunk.ID, MetricSink) (ColdIngester, error)
 	}{
-		{cfg.Ledgers, dataTypeLedgers, dirs.Ledgers, NewLedgerColdIngester},
-		{cfg.Txhash, dataTypeTxhash, dirs.Txhash, NewTxhashColdIngester},
-		{cfg.Events, dataTypeEvents, dirs.Events, NewEventsColdIngester},
+		{cfg.Ledgers, dataTypeLedgers, dirs.LedgerPack, NewLedgerColdIngester},
+		{cfg.Txhash, dataTypeTxhash, dirs.TxhashBin, NewTxhashColdIngester},
+		{cfg.Events, dataTypeEvents, dirs.EventsDir, NewEventsColdIngester},
 	}
 	ings := make([]ColdIngester, 0, len(ctors))
 	for _, c := range ctors {
 		if !c.enabled {
 			continue
 		}
-		if c.dir == "" {
-			return nil, closeColdAll(ings, fmt.Errorf("ingest: %s enabled but ColdDirs.%s is empty", c.dataType, c.dataType))
+		if c.path == "" {
+			return nil, closeColdAll(ings, fmt.Errorf("ingest: %s enabled but its ColdDirs path is empty", c.dataType))
 		}
-		ing, err := c.open(c.dir, chunkID, sink)
+		ing, err := c.open(c.path, chunkID, sink)
 		if err != nil {
 			return nil, closeColdAll(ings, fmt.Errorf("open %s cold ingester: %w", c.dataType, err))
 		}
@@ -235,8 +102,8 @@ func buildColdIngesters(dirs ColdDirs, chunkID chunk.ID, sink MetricSink, cfg Co
 	return ings, nil
 }
 
-// WriteColdChunk materializes ONE chunk's cold artifacts into the roots named by
-// dirs, in a single pass, from the already-opened raw ledger iterator. It is
+// WriteColdChunk materializes ONE chunk's cold artifacts at the resolved paths
+// named by dirs, in a single pass, from the already-opened raw ledger iterator. It is
 // SOURCE-BLIND: the caller (backfill) resolves the chunk's ledger source — the
 // local frozen .pack or the bulk backend — and hands its RawLedgers iterator here,
 // so the cold materializer never learns where the bytes came from and is faked in
@@ -275,8 +142,8 @@ func WriteColdChunk(
 
 	ings, berr := buildColdIngesters(dirs, chunkID, sink, cfg)
 	if berr != nil {
-		// A constructor failure is still a chunk attempt
-		// (closeColdAll only emitted the per-ingester aborts).
+		// A constructor failure is still a chunk attempt: emit the aggregate
+		// (closeColdAll rolled back the built ingesters with no per-ingester emit).
 		sink.ColdChunkTotal(time.Since(start))
 		return berr
 	}
diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/events.go b/cmd/stellar-rpc/internal/fullhistory/ingest/events.go
index 6bf9268b9..98be9f62e 100644
--- a/cmd/stellar-rpc/internal/fullhistory/ingest/events.go
+++ b/cmd/stellar-rpc/internal/fullhistory/ingest/events.go
@@ -4,7 +4,6 @@ import (
 	"context"
 	"fmt"
 	"math"
-	"path/filepath"
 	"time"
 
 	"github.com/stellar/go-stellar-sdk/xdr"
@@ -25,53 +24,6 @@ func eventPayloads(seq uint32, lcm xdr.LedgerCloseMetaView) ([]events.Payload, e
 	return payloads, nil
 }
 
-// ───────────────────────── Hot ingester ─────────────────────────
-
-// eventsHot derives []events.Payload from the view (events.LCMViewToPayloads) and
-// writes them with IngestLedgerEvents. Each call is one atomic RocksDB batch
-// (sync=true) plus an in-memory mirror update. The store is INJECTED, already
-// bound to a chunk, and owned by the caller.
-//
-// IngestLedgerEvents is called on every ledger, including ones with zero
-// payloads — LedgerOffsets.Append requires a contiguous sequence and would
-// reject the next non-empty ledger if an empty one were skipped.
-type eventsHot struct {
-	store *eventstore.HotStore
-	sink  MetricSink
-}
-
-// NewEventsHotIngester returns a HotIngester writing contract events into the
-// injected, caller-owned store (already bound to a chunk).
-func NewEventsHotIngester(store *eventstore.HotStore, sink MetricSink) HotIngester {
-	return &eventsHot{store: store, sink: orNop(sink)}
-}
-
-func (e *eventsHot) Ingest(_ context.Context, seq uint32, lcm xdr.LedgerCloseMetaView) error {
-	m := newHotMetrics(e.sink, dataTypeEvents)
-	var err error
-	defer func() { m.emit(err) }()
-
-	estart := time.Now()
-	payloads, eerr := eventPayloads(seq, lcm)
-	if eerr != nil {
-		err = eerr
-		return err
-	}
-	e.sink.IngestStage(dataTypeEvents, tierHot, stageExtract, time.Since(estart), len(payloads))
-	// IngestLedgerEvents marshals each payload into a scratch buffer that
-	// RocksDB copies synchronously, so the borrowed ContractEventBytes (aliasing
-	// the view) is safe to pass. Term indexing happens inside the store call,
-	// so the write stage here covers term derivation + the RocksDB batch.
-	wstart := time.Now()
-	if ierr := e.store.IngestLedgerEvents(seq, payloads); ierr != nil {
-		err = fmt.Errorf("IngestLedgerEvents(seq=%d, n=%d): %w", seq, len(payloads), ierr)
-		return err
-	}
-	e.sink.IngestStage(dataTypeEvents, tierHot, stageWrite, time.Since(wstart), len(payloads))
-	m.items = len(payloads)
-	return nil
-}
-
 // ───────────────────────── Cold ingester ─────────────────────────
 
 // eventsCold models the backfill path: per-ledger view → payloads → term-index
@@ -95,11 +47,11 @@ type eventsCold struct {
 	failed bool
 }
 
-// NewEventsColdIngester opens a per-chunk events.pack cold writer under coldDir
-// and returns a ColdIngester that owns it. The writer uses its zero-value
-// options; driver-level tuning is a follow-up via Config.
-func NewEventsColdIngester(coldDir string, chunkID chunk.ID, sink MetricSink) (ColdIngester, error) {
-	bucketDir := filepath.Join(coldDir, chunkID.BucketID())
+// NewEventsColdIngester opens a per-chunk events.pack cold writer in bucketDir —
+// the caller's geometry.Layout.EventsBucketDir(chunkID), so the write path is
+// Layout's single derivation — and returns a ColdIngester that owns it. The
+// writer uses its zero-value options; driver-level tuning is a follow-up via Config.
+func NewEventsColdIngester(bucketDir string, chunkID chunk.ID, sink MetricSink) (ColdIngester, error) {
 	w, err := eventstore.NewColdWriter(chunkID, bucketDir, eventstore.ColdWriterOptions{})
 	if err != nil {
 		return nil, fmt.Errorf("eventstore.NewColdWriter: %w", err)
@@ -117,11 +69,12 @@ func NewEventsColdIngester(coldDir string, chunkID chunk.ID, sink MetricSink) (C
 func (e *eventsCold) Ingest(_ context.Context, seq uint32, lcm xdr.LedgerCloseMetaView) error {
 	start := time.Now()
 	n, ierr := e.ingestSeq(seq, lcm)
+	e.metrics.observe(time.Since(start), n, ierr) // terminal on err: observe emits the per-ingester signal
 	if ierr != nil {
-		e.failed = true
+		e.failed = true // refuse a post-failure Finalize
+		return ierr
 	}
-	e.metrics.observe(time.Since(start), n, ierr)
-	return ierr
+	return nil
 }
 
 // Finalize writes the events.pack trailer (Finish) + materializes the cold
@@ -135,9 +88,9 @@ func (e *eventsCold) Ingest(_ context.Context, seq uint32, lcm xdr.LedgerCloseMe
 func (e *eventsCold) Finalize(ctx context.Context) error {
 	start := time.Now()
 	if e.failed {
-		err := fmt.Errorf("events cold ingester for chunk %s: Finalize after failed Ingest", e.chunkID)
-		e.metrics.emit(time.Since(start), err)
-		return err
+		// Ingest already metered and latched this failure; refuse to finalize a
+		// chunk whose mirror/pack may be ahead of the offsets commit point.
+		return fmt.Errorf("events cold ingester for chunk %s: Finalize after failed Ingest", e.chunkID)
 	}
 	if err := e.writer.Finish(e.offsets); err != nil {
 		err = fmt.Errorf("events ColdWriter.Finish: %w", err)
@@ -153,32 +106,29 @@ func (e *eventsCold) Finalize(ctx context.Context) error {
 		e.metrics.emit(time.Since(start), err)
 		return err
 	}
-	e.metrics.sink.IngestStage(dataTypeEvents, tierCold, stageFinalize, time.Since(start), 0)
+	e.metrics.sink.IngestStage(dataTypeEvents, stageFinalize, time.Since(start), 0)
 	e.metrics.emit(time.Since(start), nil)
 	return nil
 }
 
-// Close drops the partial events.pack when Finalize never ran, and emits the
-// cold metrics if Finalize did not already (the failure path). The writer.Close
-// error is folded into the emitted metric so a close-time failure (e.g. ENOSPC
-// on the partial-drop) is counted in errors_total. emit is a no-op after a
-// successful Finalize. Error propagation is unchanged: the writer.Close error is
-// still returned.
+// Close drops the partial events.pack when Finalize never ran. It does NOT emit
+// the cold metric: a terminal Ingest error or Finalize already emitted it, and an
+// ingester that never got that far (a rolled-back build) must produce no phantom
+// sample. The writer.Close error is returned unchanged.
 func (e *eventsCold) Close() error {
-	cerr := e.writer.Close()
-	e.metrics.emit(0, cerr)
-	return cerr
+	return e.writer.Close()
 }
 
 // ingestSeq writes one ledger's events and returns the count written. The
-// pre-Soroban (V0) policy lives in eventPayloads, shared with the hot tier.
+// pre-Soroban (V0) policy lives in events.LCMViewToPayloads, shared with the
+// hot tier.
 func (e *eventsCold) ingestSeq(seq uint32, lcm xdr.LedgerCloseMetaView) (int, error) {
 	estart := time.Now()
 	payloads, err := eventPayloads(seq, lcm)
 	if err != nil {
 		return 0, err
 	}
-	e.metrics.sink.IngestStage(dataTypeEvents, tierCold, stageExtract, time.Since(estart), len(payloads))
+	e.metrics.sink.IngestStage(dataTypeEvents, stageExtract, time.Since(estart), len(payloads))
 
 	startID := e.offsets.TotalEvents()
 	if uint64(startID)+uint64(len(payloads)) > math.MaxUint32 {
@@ -216,7 +166,7 @@ func (e *eventsCold) ingestSeq(seq uint32, lcm xdr.LedgerCloseMetaView) (int, er
 		}
 		writeDur += time.Since(wstart)
 	}
-	e.metrics.sink.IngestStage(dataTypeEvents, tierCold, stageTermIndex, termDur, len(payloads))
+	e.metrics.sink.IngestStage(dataTypeEvents, stageTermIndex, termDur, len(payloads))
 
 	// offsets.Append LAST — it is the commit point for the ledger. Its cost folds
 	// into the write stage (rather than landing in the per-chunk total but in no
@@ -227,13 +177,9 @@ func (e *eventsCold) ingestSeq(seq uint32, lcm xdr.LedgerCloseMetaView) (int, er
 	//nolint:gosec // the overflow guard above proved startID+len(payloads) fits in uint32
 	oerr := e.offsets.Append(seq, uint32(len(payloads)))
 	writeDur += time.Since(wstart)
-	e.metrics.sink.IngestStage(dataTypeEvents, tierCold, stageWrite, writeDur, len(payloads))
+	e.metrics.sink.IngestStage(dataTypeEvents, stageWrite, writeDur, len(payloads))
 	if oerr != nil {
 		return 0, fmt.Errorf("offsets append seq %d: %w", seq, oerr)
 	}
 	return len(payloads), nil
 }
-
-// abortMetric records a synthetic abort error so a subsequent Close emit does
-// not look like a clean success. Used by the constructor-rollback path.
-func (e *eventsCold) abortMetric(err error) { e.metrics.recordErr(err) }
diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/ingest_test.go b/cmd/stellar-rpc/internal/fullhistory/ingest/ingest_test.go
index e98898302..033ea5f45 100644
--- a/cmd/stellar-rpc/internal/fullhistory/ingest/ingest_test.go
+++ b/cmd/stellar-rpc/internal/fullhistory/ingest/ingest_test.go
@@ -7,13 +7,13 @@ import (
 	"iter"
 	"os"
 	"path/filepath"
-	"strconv"
 	"sync"
 	"testing"
 	"time"
 
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/sirupsen/logrus"
+	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 
 	"github.com/stellar/go-stellar-sdk/ingest/ledgerbackend"
@@ -25,6 +25,7 @@ import (
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/events"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash"
 )
@@ -35,10 +36,10 @@ const testPassphrase = "Public Global Stellar Network ; September 2015"
 
 // ───────────────────────── test metric sink ─────────────────────────
 
-type hotCall struct {
-	dataType string
-	items    int
-	err      error
+type hotPhaseCall struct {
+	phase hotchunk.Phase
+	items int
+	err   error
 }
 
 type coldCall struct {
@@ -49,26 +50,24 @@ type coldCall struct {
 
 type stageCall struct {
 	dataType string
-	tier     string
 	stage    string
 	items    int
 }
 
 // testSink records every MetricSink call for assertions. Safe for concurrent
-// use (HotIngest fires from the per-ledger fan-out goroutines).
+// use (the hot methods fire from the per-ledger ingestion goroutine).
 type testSink struct {
 	mu              sync.Mutex
-	hotIngests      []hotCall
+	hotPhases       []hotPhaseCall
 	coldIngests     []coldCall
 	stages          []stageCall
-	hotLedgerTotals int
 	coldChunkTotals int
 }
 
-func (s *testSink) HotIngest(dataType string, _ time.Duration, items int, err error) {
+func (s *testSink) HotPhase(phase hotchunk.Phase, _ time.Duration, items int, err error) {
 	s.mu.Lock()
 	defer s.mu.Unlock()
-	s.hotIngests = append(s.hotIngests, hotCall{dataType, items, err})
+	s.hotPhases = append(s.hotPhases, hotPhaseCall{phase, items, err})
 }
 
 func (s *testSink) ColdIngest(dataType string, _ time.Duration, items int, err error) {
@@ -77,45 +76,52 @@ func (s *testSink) ColdIngest(dataType string, _ time.Duration, items int, err e
 	s.coldIngests = append(s.coldIngests, coldCall{dataType, items, err})
 }
 
-func (s *testSink) HotLedgerTotal(time.Duration) {
-	s.mu.Lock()
-	defer s.mu.Unlock()
-	s.hotLedgerTotals++
-}
-
 func (s *testSink) ColdChunkTotal(time.Duration) {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 	s.coldChunkTotals++
 }
 
-func (s *testSink) IngestStage(dataType, tier, stage string, _ time.Duration, items int) {
+func (s *testSink) IngestStage(dataType, stage string, _ time.Duration, items int) {
 	s.mu.Lock()
 	defer s.mu.Unlock()
-	s.stages = append(s.stages, stageCall{dataType, tier, stage, items})
+	s.stages = append(s.stages, stageCall{dataType, stage, items})
 }
 
-// stageCounts counts IngestStage calls keyed "dataType/tier/stage".
+// stageCounts counts cold IngestStage calls keyed "dataType/stage".
 func (s *testSink) stageCounts() map[string]int {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 	m := map[string]int{}
 	for _, c := range s.stages {
-		m[c.dataType+"/"+c.tier+"/"+c.stage]++
+		m[c.dataType+"/"+c.stage]++
 	}
 	return m
 }
 
-func (s *testSink) hotDataTypes() map[string]int {
+// hotPhaseItems returns the items reported per hot phase, keyed by phase.
+func (s *testSink) hotPhaseItems() map[hotchunk.Phase]int {
 	s.mu.Lock()
 	defer s.mu.Unlock()
-	m := map[string]int{}
-	for _, c := range s.hotIngests {
-		m[c.dataType]++
+	m := map[hotchunk.Phase]int{}
+	for _, c := range s.hotPhases {
+		m[c.phase] += c.items
 	}
 	return m
 }
 
+// hotPhaseErr returns the phase that carried a non-nil error, or (0,false) if none.
+func (s *testSink) hotPhaseErr() (hotchunk.Phase, bool) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	for _, c := range s.hotPhases {
+		if c.err != nil {
+			return c.phase, true
+		}
+	}
+	return 0, false
+}
+
 func (s *testSink) coldDataTypes() map[string]int {
 	s.mu.Lock()
 	defer s.mu.Unlock()
@@ -191,13 +197,16 @@ func packPath(ledgersRoot string, c chunk.ID) string {
 	return filepath.Join(ledgersRoot, c.BucketID(), ledger.PackName(c))
 }
 
-// coldDirsAt derives the three per-type cold roots under one dir — the fixed
-// layout the removed RunCold used, convenient for single-tmpdir tests.
-func coldDirsAt(dir string) ColdDirs {
+// coldDirsAt resolves chunk c's three cold-artifact paths under one dir's per-type
+// roots — mirroring what geometry.Layout derives in production, so the readback
+// helpers (packPath/txhashBinPath) find what the ingesters wrote.
+//
+//nolint:unparam // chunk-general helper; every current caller uses chunk 0
+func coldDirsAt(dir string, c chunk.ID) ColdDirs {
 	return ColdDirs{
-		Ledgers: filepath.Join(dir, dataTypeLedgers),
-		Txhash:  filepath.Join(dir, dataTypeTxhash),
-		Events:  filepath.Join(dir, dataTypeEvents),
+		LedgerPack: packPath(filepath.Join(dir, dataTypeLedgers), c),
+		TxhashBin:  txhashBinPath(filepath.Join(dir, dataTypeTxhash)),
+		EventsDir:  filepath.Join(dir, dataTypeEvents, c.BucketID()),
 	}
 }
 
@@ -384,29 +393,6 @@ func marshalV0LCM(t *testing.T, seq uint32) []byte {
 	return raw
 }
 
-// seqStream is a ledgerbackend.LedgerStream that yields LCMs for an explicit
-// list of ledger sequences (in order), regardless of the requested range. It
-// models a backend that hands back a duplicate / out-of-order / wrong-but-
-// right-count sequence, exercising the drain seq guard.
-type seqStream struct {
-	t    *testing.T
-	seqs []uint32
-}
-
-var _ ledgerbackend.LedgerStream = (*seqStream)(nil)
-
-func (s *seqStream) RawLedgers(
-	_ context.Context, _ ledgerbackend.Range, _ ...ledgerbackend.StreamOption,
-) iter.Seq2[[]byte, error] {
-	return func(yield func([]byte, error) bool) {
-		for _, seq := range s.seqs {
-			if !yield(marshalLCM(s.t, seq), nil) {
-				return
-			}
-		}
-	}
-}
-
 // errAtSeqStream yields valid LCMs until it reaches errAtSeq, where it yields
 // (nil, err) — modeling a backend that fails mid-stream. Used to exercise the
 // drain RawLedgers error path.
@@ -436,68 +422,6 @@ func (s *errAtSeqStream) RawLedgers(
 
 // ───────────────────────── per-ingester unit tests ─────────────────────────
 
-// TestLedgerHotIngester_Readback ingests one ledger via the hot ledger ingester
-// (injected store) and reads the bytes back.
-func TestLedgerHotIngester_Readback(t *testing.T) {
-	seq := chunk.ID(0).FirstLedger()
-	raw := marshalLCM(t, seq)
-	dir := t.TempDir()
-	logger := testLogger()
-
-	store, err := ledger.OpenHotStore(dir, chunk.ID(0), logger)
-	require.NoError(t, err)
-	defer func() { require.NoError(t, store.Close()) }()
-
-	ing := NewLedgerHotIngester(store, nil)
-	require.NoError(t, ing.Ingest(context.Background(), seq, xdr.LedgerCloseMetaView(raw)))
-
-	got, err := store.GetLedgerRaw(seq)
-	require.NoError(t, err)
-	require.Equal(t, raw, got)
-}
-
-// TestTxhashHotIngester_Lookup ingests an event/tx-bearing ledger via the hot
-// txhash ingester and looks the hash up.
-func TestTxhashHotIngester_Lookup(t *testing.T) {
-	seq := chunk.ID(0).FirstLedger()
-	raw, hash, _ := marshalLCMWithEvent(t, seq)
-	dir := t.TempDir()
-	logger := testLogger()
-
-	store, err := txhash.NewHotStore(dir, chunk.ID(0), logger)
-	require.NoError(t, err)
-	defer func() { require.NoError(t, store.Close()) }()
-
-	ing := NewTxhashHotIngester(store, nil)
-	require.NoError(t, ing.Ingest(context.Background(), seq, xdr.LedgerCloseMetaView(raw)))
-
-	got, err := store.Get(hash)
-	require.NoError(t, err)
-	require.Equal(t, seq, got)
-}
-
-// TestEventsHotIngester_Query ingests an event-bearing ledger via the hot events
-// ingester and resolves the term.
-func TestEventsHotIngester_Query(t *testing.T) {
-	chunkID := chunk.ID(0)
-	seq := chunkID.FirstLedger()
-	raw, _, term := marshalLCMWithEvent(t, seq)
-	dir := t.TempDir()
-	logger := testLogger()
-
-	store, err := eventstore.OpenHotStore(dir, chunkID, logger)
-	require.NoError(t, err)
-	defer func() { require.NoError(t, store.Close()) }()
-
-	ing := NewEventsHotIngester(store, nil)
-	require.NoError(t, ing.Ingest(context.Background(), seq, xdr.LedgerCloseMetaView(raw)))
-
-	bm, err := store.Lookup(context.Background(), term)
-	require.NoError(t, err)
-	require.NotNil(t, bm)
-	require.Equal(t, uint64(1), bm.GetCardinality())
-}
-
 // TestLedgerColdIngester_Readback ingests one ledger via the cold ledger
 // ingester, finalizes, and reads back through the cold reader.
 func TestLedgerColdIngester_Readback(t *testing.T) {
@@ -506,7 +430,7 @@ func TestLedgerColdIngester_Readback(t *testing.T) {
 	raw := marshalLCM(t, seq)
 	coldDir := t.TempDir()
 
-	ing, err := NewLedgerColdIngester(coldDir, chunkID, nil)
+	ing, err := NewLedgerColdIngester(packPath(coldDir, chunkID), chunkID, nil)
 	require.NoError(t, err)
 	defer func() { require.NoError(t, ing.Close()) }()
 
@@ -535,7 +459,7 @@ func TestTxhashColdIngester_Bin(t *testing.T) {
 	first := chunkID.FirstLedger()
 	coldDir := t.TempDir()
 
-	ing, err := NewTxhashColdIngester(coldDir, chunkID, nil)
+	ing, err := NewTxhashColdIngester(txhashBinPath(coldDir), chunkID, nil)
 	require.NoError(t, err)
 	defer func() { require.NoError(t, ing.Close()) }()
 
@@ -557,7 +481,7 @@ func TestEventsColdIngester_Readback(t *testing.T) {
 	first := chunkID.FirstLedger()
 	coldDir := t.TempDir()
 
-	ing, err := NewEventsColdIngester(coldDir, chunkID, nil)
+	ing, err := NewEventsColdIngester(filepath.Join(coldDir, chunkID.BucketID()), chunkID, nil)
 	require.NoError(t, err)
 	defer func() { require.NoError(t, ing.Close()) }()
 
@@ -584,28 +508,6 @@ func TestEventsColdIngester_Readback(t *testing.T) {
 
 // ───────────────────────── V0 (pre-Soroban) events handling ─────────────────────────
 
-// TestEventsHotIngester_V0AsEmpty asserts the hot events ingester treats a V0
-// LCM as a zero-event ledger (no error) rather than failing the range, and that
-// the store records the empty ledger (its event count is unchanged).
-func TestEventsHotIngester_V0AsEmpty(t *testing.T) {
-	chunkID := chunk.ID(0)
-	seq := chunkID.FirstLedger()
-	dir := t.TempDir()
-	logger := testLogger()
-
-	store, err := eventstore.OpenHotStore(dir, chunkID, logger)
-	require.NoError(t, err)
-	defer func() { require.NoError(t, store.Close()) }()
-
-	ing := NewEventsHotIngester(store, nil)
-	require.NoError(t, ing.Ingest(context.Background(), seq, xdr.LedgerCloseMetaView(marshalV0LCM(t, seq))),
-		"V0 ledger must ingest as zero events, not error")
-
-	cnt, err := store.EventCount()
-	require.NoError(t, err)
-	require.Equal(t, uint32(0), cnt, "V0 ledger contributes no events")
-}
-
 // TestEventsColdIngester_V0KeepsOffsetsContiguous ingests a V0 ledger followed by
 // an event-bearing V2 ledger and asserts: the V0 ledger does not error, and the
 // LedgerOffsets stay contiguous (both ledgers present, the event-bearing one's
@@ -615,7 +517,7 @@ func TestEventsColdIngester_V0KeepsOffsetsContiguous(t *testing.T) {
 	first := chunkID.FirstLedger()
 	coldDir := t.TempDir()
 
-	ing, err := NewEventsColdIngester(coldDir, chunkID, nil)
+	ing, err := NewEventsColdIngester(filepath.Join(coldDir, chunkID.BucketID()), chunkID, nil)
 	require.NoError(t, err)
 	defer func() { require.NoError(t, ing.Close()) }()
 
@@ -674,7 +576,7 @@ func TestWriteColdChunk_EventlessChunk_FullyReadable(t *testing.T) {
 	// Every ledger in the chunk is a V0 (pre-Soroban) ledger → zero events.
 	require.NoError(t, WriteColdChunk(
 		context.Background(), logger, chunkID, rawChunk(fullStream(t, chunkID, marshalV0LCM), chunkID),
-		coldDirsAt(coldDir), sink, Config{Events: true},
+		coldDirsAt(coldDir, chunkID), sink, Config{Events: true},
 	))
 
 	bucketDir := filepath.Join(coldDir, dataTypeEvents, chunkID.BucketID())
@@ -706,92 +608,6 @@ func TestWriteColdChunk_EventlessChunk_FullyReadable(t *testing.T) {
 	require.Zero(t, sink.coldErrorTypes()[dataTypeEvents], "eventless chunk is not an error")
 }
 
-// ───────────────────────── HotService tests ─────────────────────────
-
-// TestHotService_AllTypes_FanOut runs HotService with all three hot ingesters
-// over event/tx-bearing ledgers and reads each store back, asserting the
-// aggregate HotLedgerTotal and per-ingester signals fired.
-func TestHotService_AllTypes_FanOut(t *testing.T) {
-	chunkID := chunk.ID(0)
-	first := chunkID.FirstLedger()
-	logger := testLogger()
-	dir := t.TempDir()
-
-	ls, err := ledger.OpenHotStore(filepath.Join(dir, "ledgers"), chunkID, logger)
-	require.NoError(t, err)
-	defer func() { require.NoError(t, ls.Close()) }()
-	ts, err := txhash.NewHotStore(filepath.Join(dir, "txhash"), chunkID, logger)
-	require.NoError(t, err)
-	defer func() { require.NoError(t, ts.Close()) }()
-	es, err := eventstore.OpenHotStore(filepath.Join(dir, "events"), chunkID, logger)
-	require.NoError(t, err)
-	defer func() { require.NoError(t, es.Close()) }()
-
-	sink := &testSink{}
-	service := NewHotService([]HotIngester{
-		NewLedgerHotIngester(ls, sink),
-		NewTxhashHotIngester(ts, sink),
-		NewEventsHotIngester(es, sink),
-	}, sink)
-
-	rawA, hashA, termA := marshalLCMWithEvent(t, first)
-	rawB, hashB, _ := marshalLCMWithEvent(t, first+1)
-	require.NoError(t, service.Ingest(context.Background(), first, xdr.LedgerCloseMetaView(rawA)))
-	require.NoError(t, service.Ingest(context.Background(), first+1, xdr.LedgerCloseMetaView(rawB)))
-
-	// All three stores retained the data.
-	gotRawA, err := ls.GetLedgerRaw(first)
-	require.NoError(t, err)
-	require.Equal(t, rawA, gotRawA)
-	gotA, err := ts.Get(hashA)
-	require.NoError(t, err)
-	require.Equal(t, first, gotA)
-	gotB, err := ts.Get(hashB)
-	require.NoError(t, err)
-	require.Equal(t, first+1, gotB)
-	bm, err := es.Lookup(context.Background(), termA)
-	require.NoError(t, err)
-	require.Equal(t, uint64(2), bm.GetCardinality())
-
-	// Aggregate + per-ingester signals.
-	require.Equal(t, 2, sink.hotLedgerTotals, "one HotLedgerTotal per ledger")
-	dt := sink.hotDataTypes()
-	require.Equal(t, 2, dt[dataTypeLedgers])
-	require.Equal(t, 2, dt[dataTypeTxhash])
-	require.Equal(t, 2, dt[dataTypeEvents])
-
-	// Per-stage signals: each ledger fired the hot extract/write stages its
-	// data type defines (ledgers has no extract — it writes the view verbatim).
-	st := sink.stageCounts()
-	require.Equal(t, 2, st[dataTypeLedgers+"/"+tierHot+"/"+stageWrite])
-	require.Equal(t, 2, st[dataTypeTxhash+"/"+tierHot+"/"+stageExtract])
-	require.Equal(t, 2, st[dataTypeTxhash+"/"+tierHot+"/"+stageWrite])
-	require.Equal(t, 2, st[dataTypeEvents+"/"+tierHot+"/"+stageExtract])
-	require.Equal(t, 2, st[dataTypeEvents+"/"+tierHot+"/"+stageWrite])
-}
-
-// TestHotService_EnabledSubset runs HotService with only the ledger ingester and
-// asserts only that type's signals fire.
-func TestHotService_EnabledSubset(t *testing.T) {
-	seq := chunk.ID(0).FirstLedger()
-	logger := testLogger()
-	dir := t.TempDir()
-
-	ls, err := ledger.OpenHotStore(dir, chunk.ID(0), logger)
-	require.NoError(t, err)
-	defer func() { require.NoError(t, ls.Close()) }()
-
-	sink := &testSink{}
-	service := NewHotService([]HotIngester{NewLedgerHotIngester(ls, sink)}, sink)
-	require.NoError(t, service.Ingest(context.Background(), seq, viewOf(t, seq)))
-
-	require.Equal(t, 1, sink.hotLedgerTotals)
-	dt := sink.hotDataTypes()
-	require.Equal(t, 1, dt[dataTypeLedgers])
-	require.Zero(t, dt[dataTypeTxhash])
-	require.Zero(t, dt[dataTypeEvents])
-}
-
 // ───────────────────────── ColdService tests ─────────────────────────
 
 // TestColdService_Success drives ledger+txhash+events cold ingesters through a
@@ -802,7 +618,8 @@ func TestColdService_Success(t *testing.T) {
 	coldDir := t.TempDir()
 	sink := &testSink{}
 
-	ings, err := buildColdIngesters(coldDirsAt(coldDir), chunkID, sink, Config{Ledgers: true, Txhash: true, Events: true})
+	ings, err := buildColdIngesters(
+		coldDirsAt(coldDir, chunkID), chunkID, sink, Config{Ledgers: true, Txhash: true, Events: true})
 	require.NoError(t, err)
 	service := NewColdService(ings, sink)
 	defer func() { require.NoError(t, service.Close()) }()
@@ -855,14 +672,14 @@ func TestColdService_Success(t *testing.T) {
 	// events now emits term_index/write for every ledger, and txhash's extract
 	// spans its whole per-ledger Ingest.
 	require.Equal(t, map[string]int{
-		dataTypeLedgers + "/" + tierCold + "/" + stageWrite:    2,
-		dataTypeLedgers + "/" + tierCold + "/" + stageFinalize: 1,
-		dataTypeTxhash + "/" + tierCold + "/" + stageExtract:   2,
-		dataTypeTxhash + "/" + tierCold + "/" + stageFinalize:  1,
-		dataTypeEvents + "/" + tierCold + "/" + stageExtract:   2,
-		dataTypeEvents + "/" + tierCold + "/" + stageTermIndex: 2,
-		dataTypeEvents + "/" + tierCold + "/" + stageWrite:     2,
-		dataTypeEvents + "/" + tierCold + "/" + stageFinalize:  1,
+		dataTypeLedgers + "/" + stageWrite:    2,
+		dataTypeLedgers + "/" + stageFinalize: 1,
+		dataTypeTxhash + "/" + stageExtract:   2,
+		dataTypeTxhash + "/" + stageFinalize:  1,
+		dataTypeEvents + "/" + stageExtract:   2,
+		dataTypeEvents + "/" + stageTermIndex: 2,
+		dataTypeEvents + "/" + stageWrite:     2,
+		dataTypeEvents + "/" + stageFinalize:  1,
 	}, sink.stageCounts())
 
 	// No double-emit: the deferred Close (after this body) must not add a second
@@ -887,22 +704,21 @@ func (f *failingCold) Ingest(context.Context, uint32, xdr.LedgerCloseMetaView) e
 func (f *failingCold) Finalize(context.Context) error { f.finalized = true; return nil }
 func (f *failingCold) Close() error                   { f.closed = true; return nil }
 
-// TestColdService_FailurePath_NoArtifact uses a real ledger cold ingester plus a
+// TestColdService_FailurePath_NoArtifact uses two real cold ingesters plus a
 // failing sibling: ColdService.Ingest returns the sibling's error, Finalize is
 // not called, the deferred Close drops the partial ledger pack, and no finalized
-// artifact remains. It also asserts the cold metrics still fire on this failure
-// path: each real ingester emits exactly one ColdIngest and the service emits one
-// aggregate ColdChunkTotal — driven from Close, since Finalize never ran.
+// artifact remains. It asserts the aggregate ColdChunkTotal still fires for the
+// attempt, but the two real ingesters emit NO per-ingester ColdIngest: each
+// ingested cleanly (no terminal error of its own) and never finalized, and Close
+// no longer emits — so a chunk abandoned by a sibling leaves no phantom sample.
 func TestColdService_FailurePath_NoArtifact(t *testing.T) {
 	chunkID := chunk.ID(0)
 	coldDir := t.TempDir()
 	sink := &testSink{}
 
-	// Two real cold ingesters (ledger + events) plus a failing sibling, so we can
-	// assert each real ingester emits its per-chunk ColdIngest from Close.
-	realLedger, err := NewLedgerColdIngester(filepath.Join(coldDir, dataTypeLedgers), chunkID, sink)
+	realLedger, err := NewLedgerColdIngester(packPath(filepath.Join(coldDir, dataTypeLedgers), chunkID), chunkID, sink)
 	require.NoError(t, err)
-	realEvents, err := NewEventsColdIngester(filepath.Join(coldDir, dataTypeEvents), chunkID, sink)
+	realEvents, err := NewEventsColdIngester(filepath.Join(coldDir, dataTypeEvents, chunkID.BucketID()), chunkID, sink)
 	require.NoError(t, err)
 	failing := &failingCold{}
 	service := NewColdService([]ColdIngester{realLedger, realEvents, failing}, sink)
@@ -913,19 +729,16 @@ func TestColdService_FailurePath_NoArtifact(t *testing.T) {
 	require.ErrorIs(t, err, errFailingCold)
 	require.False(t, failing.finalized, "Finalize must not run on the failure path")
 
-	// Before Close, no cold metric has fired (emission is deferred to Close on the
-	// failure path).
-	require.Empty(t, sink.coldDataTypes(), "no ColdIngest before Close on failure path")
-	require.Zero(t, sink.coldChunkTotals, "no ColdChunkTotal before Close on failure path")
+	// Nothing has emitted: the real ingesters ingested cleanly (no terminal error)
+	// and never finalized; the mock sibling records nothing.
+	require.Empty(t, sink.coldDataTypes(), "no per-ingester ColdIngest on the sibling-failure path")
+	require.Zero(t, sink.coldChunkTotals, "no ColdChunkTotal before Close")
 
-	// Close drops partials and drives the deferred metric emissions.
+	// Close drops partials and emits the aggregate only.
 	require.NoError(t, service.Close())
 	require.True(t, failing.closed)
 
-	// Each real ingester emitted exactly one ColdIngest; the aggregate fired once.
-	cdt := sink.coldDataTypes()
-	require.Equal(t, 1, cdt[dataTypeLedgers], "ledger cold ingester emits once on failure path")
-	require.Equal(t, 1, cdt[dataTypeEvents], "events cold ingester emits once on failure path")
+	require.Empty(t, sink.coldDataTypes(), "a chunk abandoned by a sibling emits no per-ingester ColdIngest")
 	require.Equal(t, 1, sink.coldChunkTotals, "exactly one aggregate ColdChunkTotal")
 
 	// No finalized ledger pack must exist.
@@ -938,24 +751,27 @@ func TestColdService_FailurePath_NoArtifact(t *testing.T) {
 // so its OWN Ingest fails (recording firstErr), then Close. The failure is an
 // out-of-order seq: the per-chunk ColdWriter expects the chunk's first ledger,
 // so AppendLedger rejects a later one. Per #765 a failed cold chunk must record
-// a per-ingester error count and an aggregate duration sample. Emission happens
-// exactly once (from Close), with the accumulated error carried.
+// a per-ingester error count and an aggregate duration sample. A terminal Ingest
+// error emits the single per-ingester ColdIngest right there (Close no longer
+// emits), so the error-carrying sample is present after Ingest returns.
 func TestColdIngester_Failure_RecordsErrorMetric(t *testing.T) {
 	chunkID := chunk.ID(0)
 	coldDir := t.TempDir()
 	sink := &testSink{}
 
-	realLedger, err := NewLedgerColdIngester(filepath.Join(coldDir, dataTypeLedgers), chunkID, sink)
+	realLedger, err := NewLedgerColdIngester(packPath(filepath.Join(coldDir, dataTypeLedgers), chunkID), chunkID, sink)
 	require.NoError(t, err)
 	service := NewColdService([]ColdIngester{realLedger}, sink)
 
 	// An out-of-order seq makes the writer's own AppendLedger fail inside the
-	// ingester's Ingest, so it records its firstErr. (drain would never feed
-	// this — the test targets the ingester's metric path directly.)
+	// ingester's Ingest, so it records its firstErr and emits the error-carrying
+	// ColdIngest. (drain would never feed this — the test targets the ingester's
+	// metric path directly.)
 	wrongSeq := chunkID.FirstLedger() + 5
 	require.Error(t, service.Ingest(context.Background(), wrongSeq, viewOf(t, wrongSeq)))
+	require.Equal(t, 1, sink.coldDataTypes()[dataTypeLedgers], "the failed Ingest emits its ColdIngest immediately")
 
-	// Finalize is skipped on this path; Close drives the single emission.
+	// Finalize is skipped on this path; Close emits nothing more.
 	require.NoError(t, service.Close())
 
 	// Exactly one ColdIngest for ledgers, carrying the error, plus one aggregate.
@@ -972,13 +788,16 @@ func TestPrometheusSink_Smoke(t *testing.T) {
 	reg := prometheus.NewRegistry()
 	require.NotPanics(t, func() {
 		sink := NewPrometheusSink(reg, "test")
-		sink.HotIngest(dataTypeLedgers, time.Millisecond, 1, nil)
-		sink.HotIngest(dataTypeEvents, time.Millisecond, 3, errFailingCold)
+		// The five hot per-ledger phases: extract/commit carry no items, the write
+		// phases carry per-type volume; the commit phase exercises the error dimension.
+		sink.HotPhase(hotchunk.PhaseExtract, time.Millisecond, 0, nil)
+		sink.HotPhase(hotchunk.PhaseLedgers, time.Millisecond, 1, nil)
+		sink.HotPhase(hotchunk.PhaseTxhash, time.Millisecond, 5, nil)
+		sink.HotPhase(hotchunk.PhaseEvents, time.Millisecond, 3, nil)
+		sink.HotPhase(hotchunk.PhaseCommit, time.Millisecond, 0, errFailingCold)
 		sink.ColdIngest(dataTypeTxhash, time.Second, 100, nil)
-		sink.HotLedgerTotal(time.Millisecond)
 		sink.ColdChunkTotal(time.Second)
-		sink.IngestStage(dataTypeEvents, tierHot, stageExtract, time.Millisecond, 3)
-		sink.IngestStage(dataTypeEvents, tierCold, stageFinalize, time.Second, 0)
+		sink.IngestStage(dataTypeEvents, stageFinalize, time.Second, 0)
 	})
 
 	mfs, err := reg.Gather()
@@ -986,78 +805,6 @@ func TestPrometheusSink_Smoke(t *testing.T) {
 	require.NotEmpty(t, mfs)
 }
 
-// ───────────────────────── hot driver tests ─────────────────────────
-
-// TestRunHot_AllTypes_Readback runs the RunHot driver with injected hot stores
-// over event/tx-bearing ledgers and asserts each hot store reads back. The short
-// stream ends early so RunHot returns the completeness error after both ledgers
-// are fully ingested.
-func TestRunHot_AllTypes_Readback(t *testing.T) {
-	chunkID := chunk.ID(0)
-	first := chunkID.FirstLedger()
-	logger := testLogger()
-	dir := t.TempDir()
-
-	ls, err := ledger.OpenHotStore(filepath.Join(dir, "ledgers"), chunkID, logger)
-	require.NoError(t, err)
-	defer func() { require.NoError(t, ls.Close()) }()
-	ts, err := txhash.NewHotStore(filepath.Join(dir, "txhash"), chunkID, logger)
-	require.NoError(t, err)
-	defer func() { require.NoError(t, ts.Close()) }()
-	es, err := eventstore.OpenHotStore(filepath.Join(dir, "events"), chunkID, logger)
-	require.NoError(t, err)
-	defer func() { require.NoError(t, es.Close()) }()
-
-	evSeqA, evSeqB := first, first+1
-	rawA, hashA, termA := marshalLCMWithEvent(t, evSeqA)
-	rawB, hashB, _ := marshalLCMWithEvent(t, evSeqB)
-	gen := func(tt *testing.T, seq uint32) []byte {
-		switch seq {
-		case evSeqA:
-			return rawA
-		case evSeqB:
-			return rawB
-		default:
-			return marshalLCM(tt, seq)
-		}
-	}
-	stream := &fakeStream{t: t, count: 2, gen: gen}
-
-	stores := HotStores{Ledgers: ls, Txhash: ts, Events: es}
-	cfg := Config{Ledgers: true, Txhash: true, Events: true}
-
-	err = RunHot(context.Background(), logger, stream, chunkID, stores, nil, cfg)
-	require.Error(t, err)
-	require.Contains(t, err.Error(), "ended at")
-
-	gotRawA, err := ls.GetLedgerRaw(evSeqA)
-	require.NoError(t, err)
-	require.Equal(t, rawA, gotRawA)
-
-	gotA, err := ts.Get(hashA)
-	require.NoError(t, err)
-	require.Equal(t, evSeqA, gotA)
-	gotB, err := ts.Get(hashB)
-	require.NoError(t, err)
-	require.Equal(t, evSeqB, gotB)
-
-	bm, err := es.Lookup(context.Background(), termA)
-	require.NoError(t, err)
-	require.NotNil(t, bm)
-	require.Equal(t, uint64(2), bm.GetCardinality(), "both sentinel events share the term")
-}
-
-// TestRunHot_MissingStore asserts RunHot rejects an enabled type with a nil
-// injected store.
-func TestRunHot_MissingStore(t *testing.T) {
-	chunkID := chunk.ID(0)
-	logger := testLogger()
-	err := RunHot(context.Background(), logger, &fakeStream{t: t, count: 1}, chunkID,
-		HotStores{}, nil, Config{Ledgers: true})
-	require.Error(t, err)
-	require.Contains(t, err.Error(), "HotStores.Ledgers is nil")
-}
-
 // ───────────────────────── cold driver tests ─────────────────────────
 
 func TestWriteColdChunk_RoundTrip(t *testing.T) {
@@ -1070,7 +817,8 @@ func TestWriteColdChunk_RoundTrip(t *testing.T) {
 	sink := &testSink{}
 
 	require.NoError(t, WriteColdChunk(
-		context.Background(), logger, chunkID, rawChunk(stream, chunkID), coldDirsAt(coldDir), sink, Config{Ledgers: true},
+		context.Background(), logger, chunkID, rawChunk(stream, chunkID),
+		coldDirsAt(coldDir, chunkID), sink, Config{Ledgers: true},
 	))
 
 	path := packPath(filepath.Join(coldDir, "ledgers"), chunkID)
@@ -1099,7 +847,8 @@ func TestWriteColdChunk_ShortStream_NoArtifact(t *testing.T) {
 
 	short := &fakeStream{t: t, count: 3}
 	err := WriteColdChunk(
-		context.Background(), logger, chunkID, rawChunk(short, chunkID), coldDirsAt(coldDir), nil, Config{Ledgers: true},
+		context.Background(), logger, chunkID, rawChunk(short, chunkID),
+		coldDirsAt(coldDir, chunkID), nil, Config{Ledgers: true},
 	)
 	require.Error(t, err)
 	require.Contains(t, err.Error(), "ended at")
@@ -1128,7 +877,7 @@ func TestWriteColdChunk_TxhashCold_Bin(t *testing.T) {
 
 	require.NoError(t, WriteColdChunk(
 		context.Background(), logger, chunkID, rawChunk(fullStream(t, chunkID, gen), chunkID),
-		coldDirsAt(coldDir), nil, Config{Txhash: true},
+		coldDirsAt(coldDir, chunkID), nil, Config{Txhash: true},
 	))
 
 	entries, err := txhash.ReadColdBin(txhashBinPath(filepath.Join(coldDir, dataTypeTxhash)))
@@ -1157,7 +906,7 @@ func TestWriteColdChunk_EventsCold_Readback(t *testing.T) {
 
 	require.NoError(t, WriteColdChunk(
 		context.Background(), logger, chunkID, rawChunk(fullStream(t, chunkID, gen), chunkID),
-		coldDirsAt(coldDir), nil, Config{Events: true},
+		coldDirsAt(coldDir, chunkID), nil, Config{Events: true},
 	))
 
 	bucketDir := filepath.Join(coldDir, "events", chunkID.BucketID())
@@ -1174,76 +923,17 @@ func TestWriteColdChunk_EventsCold_Readback(t *testing.T) {
 	require.Equal(t, uint64(len(evSeqs)), bm.GetCardinality())
 }
 
-// ───────────────────────── drain seq guard (P0-1) ─────────────────────────
-
-// TestWriteColdChunk_OutOfOrderSeq_NoArtifact feeds a stream that yields a ledger out
-// of expected order (the second ledger repeats the first's seq — right total
-// count, wrong sequence). drain must reject it with the mismatch error before
-// any Finalize, and leave no cold artifact behind.
-func TestWriteColdChunk_OutOfOrderSeq_NoArtifact(t *testing.T) {
-	chunkID := chunk.ID(0)
-	first := chunkID.FirstLedger()
-	last := chunkID.LastLedger()
-	coldDir := t.TempDir()
-	logger := testLogger()
-
-	// Build a full-length seq list, then corrupt the second entry to a
-	// duplicate of the first: same count as a valid stream, wrong order.
-	seqs := make([]uint32, 0, last-first+1)
-	for s := first; s <= last; s++ {
-		seqs = append(seqs, s)
-	}
-	require.GreaterOrEqual(t, len(seqs), 2)
-	seqs[1] = seqs[0] // duplicate/out-of-order while keeping the count intact
-
-	stream := &seqStream{t: t, seqs: seqs}
-	err := WriteColdChunk(
-		context.Background(), logger, chunkID, rawChunk(stream, chunkID), coldDirsAt(coldDir), nil, Config{Ledgers: true},
-	)
-	require.Error(t, err)
-	require.Contains(t, err.Error(), "yielded ledger")
-	require.Contains(t, err.Error(), "expected")
-
-	// No finalized artifact: the deferred Close dropped the partial pack.
-	path := packPath(filepath.Join(coldDir, dataTypeLedgers), chunkID)
-	_, statErr := os.Stat(path)
-	require.True(t, os.IsNotExist(statErr), "expected no cold artifact at %s, stat err: %v", path, statErr)
-}
-
-// TestDrain_TxhashSeqGuard asserts the guard also fires on the txhash path,
-// where a wrong-but-right-count sequence would otherwise be silently absorbed
-// (each ledger keys on its own LCM seq).
-func TestDrain_TxhashSeqGuard(t *testing.T) {
-	chunkID := chunk.ID(0)
-	first := chunkID.FirstLedger()
-	last := chunkID.LastLedger()
-	coldDir := t.TempDir()
-	logger := testLogger()
-
-	seqs := make([]uint32, 0, last-first+1)
-	for s := first; s <= last; s++ {
-		seqs = append(seqs, s)
-	}
-	require.GreaterOrEqual(t, len(seqs), 2)
-	// Corrupt the SECOND ledger so at least one valid ledger is ingested
-	// before the guard fires.
-	seqs[1] += 100
-
-	err := WriteColdChunk(
-		context.Background(), logger, chunkID, rawChunk(&seqStream{t: t, seqs: seqs}, chunkID),
-		coldDirsAt(coldDir), nil, Config{Txhash: true},
-	)
-	require.Error(t, err)
-	require.Contains(t, err.Error(), "yielded ledger")
-
-	binPath := txhashBinPath(filepath.Join(coldDir, dataTypeTxhash))
-	_, statErr := os.Stat(binPath)
-	require.True(t, os.IsNotExist(statErr), "expected no .bin at %s, stat err: %v", binPath, statErr)
-}
+// ───────────────────────── drain stream errors ─────────────────────────
+//
+// The per-seq order guard the shared cursor used to run in drain moved to the
+// SOURCE (packStream reads positionally; hotLedgerStream key-checks its keyspace,
+// see TestSource_RejectsGap; the SDK backends validate their own output), so drain
+// keeps only its overrun + completeness checks on a local counter. The tests that
+// fed an artificially mis-ordered stream to drain were deleted with the cursor.
 
 // TestWriteColdChunk_DrainStreamError_NoArtifact exercises the drain mid-stream error
 // path: the backend yields valid ledgers, then hands back (nil, err) at a seq in
-// the middle of the chunk. drain must wrap the error with RawLedgers + the seq,
+// the middle of the chunk. drain must propagate the error (wrapped with the chunk),
 // short-circuit before Finalize (so no cold artifact is committed), and the
 // deferred Close must drop the partial.
 func TestWriteColdChunk_DrainStreamError_NoArtifact(t *testing.T) {
@@ -1257,12 +947,12 @@ func TestWriteColdChunk_DrainStreamError_NoArtifact(t *testing.T) {
 	stream := &errAtSeqStream{t: t, errAtSeq: failAt, err: wantErr}
 
 	err := WriteColdChunk(
-		context.Background(), logger, chunkID, rawChunk(stream, chunkID), coldDirsAt(coldDir), nil, Config{Ledgers: true},
+		context.Background(), logger, chunkID, rawChunk(stream, chunkID),
+		coldDirsAt(coldDir, chunkID), nil, Config{Ledgers: true},
 	)
 	require.Error(t, err)
 	require.ErrorIs(t, err, wantErr, "the backend error must propagate")
-	require.Contains(t, err.Error(), "RawLedgers", "error wraps RawLedgers")
-	require.Contains(t, err.Error(), strconv.FormatUint(uint64(failAt), 10), "error names the failing seq")
+	require.Contains(t, err.Error(), "stream for chunk", "error wraps the drained chunk")
 
 	// Finalize never ran → no finalized artifact; deferred Close dropped the partial.
 	path := packPath(filepath.Join(coldDir, dataTypeLedgers), chunkID)
@@ -1275,103 +965,60 @@ func TestWriteColdChunk_DrainStreamError_NoArtifact(t *testing.T) {
 // pkg/stores/txhash (cold_bin_test.go); these tests only cover the
 // ingester-level behavior on top of it.
 
-// ───────────────────────── HotService failure path (P1-c) ─────────────────────────
-
-// failingHot is a HotIngester whose Ingest always fails. ctxObserved records
-// whether the ingester's context was already canceled when it ran (used to
-// show errgroup sibling cancellation in the multi-ingester path).
-type failingHot struct {
-	mu          sync.Mutex
-	ran         int
-	ctxObserved error
-}
-
-var errFailingHot = errors.New("failingHot: induced ingest failure")
+// ───────────────────────── hot service emission ─────────────────────────
 
-func (f *failingHot) Ingest(ctx context.Context, _ uint32, _ xdr.LedgerCloseMetaView) error {
-	f.mu.Lock()
-	f.ran++
-	f.ctxObserved = ctx.Err()
-	f.mu.Unlock()
-	return errFailingHot
-}
-
-// blockingHot blocks until its context is canceled, then reports the cancel
-// error. Pairs with failingHot in the multi-ingester test to prove the first
-// error cancels the siblings via the errgroup context.
-type blockingHot struct {
-	canceled chan struct{}
-	once     sync.Once
+func hotTestLogger() *supportlog.Entry {
+	l := supportlog.New()
+	l.SetLevel(logrus.ErrorLevel)
+	return l
 }
 
-func (b *blockingHot) Ingest(ctx context.Context, _ uint32, _ xdr.LedgerCloseMetaView) error {
-	<-ctx.Done()
-	b.once.Do(func() { close(b.canceled) })
-	return ctx.Err()
-}
+// TestHotService_EmitsEveryPhaseOnSuccess constructs a HotService over a real hot
+// DB with a recording sink and asserts one successful ingest emits every phase
+// once, the write phases carry per-type volume (extract/commit carry none), and no
+// phase carries an error.
+func TestHotService_EmitsEveryPhaseOnSuccess(t *testing.T) {
+	db, err := hotchunk.Open(t.TempDir(), chunk.ID(0), hotTestLogger())
+	require.NoError(t, err)
+	t.Cleanup(func() { _ = db.Close() })
 
-// TestHotService_SingleIngesterFailure asserts the len==1 fast path returns the
-// ingester error and still emits exactly one HotLedgerTotal.
-func TestHotService_SingleIngesterFailure(t *testing.T) {
 	sink := &testSink{}
-	fail := &failingHot{}
-	service := NewHotService([]HotIngester{fail}, sink)
-
-	err := service.Ingest(context.Background(), chunk.ID(0).FirstLedger(), viewOf(t, chunk.ID(0).FirstLedger()))
-	require.ErrorIs(t, err, errFailingHot)
-	require.Equal(t, 1, sink.hotLedgerTotals, "HotLedgerTotal fires exactly once even on failure")
-}
+	svc := NewHotService(db, sink)
+	first := chunk.ID(0).FirstLedger()
+	raw, _, _ := marshalLCMWithEvent(t, first) // one tx, one event
+	require.NoError(t, svc.Ingest(context.Background(), first, xdr.LedgerCloseMetaView(raw)))
+
+	require.Len(t, sink.hotPhases, int(hotchunk.NumPhases), "every phase emitted once on success")
+	items := sink.hotPhaseItems()
+	assert.Equal(t, 1, items[hotchunk.PhaseLedgers], "one ledger")
+	assert.Equal(t, 1, items[hotchunk.PhaseTxhash], "one tx hash")
+	assert.Equal(t, 1, items[hotchunk.PhaseEvents], "one event")
+	assert.Zero(t, items[hotchunk.PhaseExtract], "extract carries no items")
+	assert.Zero(t, items[hotchunk.PhaseCommit], "commit carries no items")
+	_, hadErr := sink.hotPhaseErr()
+	assert.False(t, hadErr, "success path carries no phase error")
+}
+
+// TestHotService_CommitErrorLandsOnCommitPhase asserts a commit failure (a closed
+// DB) surfaces the error on the commit phase — by construction, not by a
+// separately-maintained label — and emits no items on the failure path.
+func TestHotService_CommitErrorLandsOnCommitPhase(t *testing.T) {
+	db, err := hotchunk.Open(t.TempDir(), chunk.ID(0), hotTestLogger())
+	require.NoError(t, err)
+	require.NoError(t, db.Close()) // closed => the batch commit fails
 
-// TestHotService_MultiIngesterFailureCancelsSiblings asserts the errgroup path
-// propagates the failing ingester's error, cancels the sibling via the group
-// context, and still emits exactly one HotLedgerTotal.
-func TestHotService_MultiIngesterFailureCancelsSiblings(t *testing.T) {
 	sink := &testSink{}
-	fail := &failingHot{}
-	block := &blockingHot{canceled: make(chan struct{})}
-	service := NewHotService([]HotIngester{fail, block}, sink)
-
-	err := service.Ingest(context.Background(), chunk.ID(0).FirstLedger(), viewOf(t, chunk.ID(0).FirstLedger()))
-	require.ErrorIs(t, err, errFailingHot)
-
-	// The blocking sibling only returns once its context is canceled, so a
-	// non-blocking Ingest return already proves cancellation propagated.
-	select {
-	case <-block.canceled:
-	case <-time.After(2 * time.Second):
-		t.Fatal("sibling ingester was not canceled by the failing ingester")
+	svc := NewHotService(db, sink)
+	first := chunk.ID(0).FirstLedger()
+	raw, _, _ := marshalLCMWithEvent(t, first)
+	require.Error(t, svc.Ingest(context.Background(), first, xdr.LedgerCloseMetaView(raw)))
+
+	phase, hadErr := sink.hotPhaseErr()
+	require.True(t, hadErr, "the failure must be reported on a phase")
+	assert.Equal(t, hotchunk.PhaseCommit, phase, "a commit failure lands on the commit phase")
+	for p, n := range sink.hotPhaseItems() {
+		assert.Zero(t, n, "no items on the failure path (phase %v)", p)
 	}
-	require.Equal(t, 1, sink.hotLedgerTotals, "HotLedgerTotal fires exactly once even on failure")
-}
-
-// TestHotIngester_Failure_RecordsErrorMetric drives a REAL hot ingester
-// (eventsHot, built via NewEventsHotIngester) with a malformed view so its own
-// Ingest fails through the production hotMetrics emit path — unlike the
-// failingHot/blockingHot stubs, which bypass hotMetrics entirely. Per #765 a
-// failed hot Ingest must record exactly one HotIngest carrying a non-nil error
-// for that data type. Mirrors the cold-side TestColdIngester_Failure_RecordsErrorMetric.
-func TestHotIngester_Failure_RecordsErrorMetric(t *testing.T) {
-	chunkID := chunk.ID(0)
-	logger := testLogger()
-	dir := t.TempDir()
-	sink := &testSink{}
-
-	store, err := eventstore.OpenHotStore(dir, chunkID, logger)
-	require.NoError(t, err)
-	defer func() { require.NoError(t, store.Close()) }()
-
-	ing := NewEventsHotIngester(store, sink)
-
-	// A truncated/garbage view makes the event extraction fail inside the real
-	// Ingest, so the deferred hotMetrics.emit reports the wrapped error.
-	bad := xdr.LedgerCloseMetaView([]byte{0x00, 0x01, 0x02})
-	require.Error(t, ing.Ingest(context.Background(), chunkID.FirstLedger(), bad))
-
-	sink.mu.Lock()
-	defer sink.mu.Unlock()
-	require.Len(t, sink.hotIngests, 1, "exactly one HotIngest recorded")
-	require.Equal(t, dataTypeEvents, sink.hotIngests[0].dataType)
-	require.Error(t, sink.hotIngests[0].err, "the recorded HotIngest carries the ingest error")
 }
 
 // ───────────────────────── cold txhash .bin content (P1-d) ─────────────────────────
@@ -1387,7 +1034,7 @@ func TestTxhashColdIngester_BinContent(t *testing.T) {
 	first := chunkID.FirstLedger()
 	coldDir := t.TempDir()
 
-	ing, err := NewTxhashColdIngester(coldDir, chunkID, nil)
+	ing, err := NewTxhashColdIngester(txhashBinPath(coldDir), chunkID, nil)
 	require.NoError(t, err)
 	defer func() { require.NoError(t, ing.Close()) }()
 
@@ -1436,53 +1083,12 @@ func TestWriteColdChunk_CanceledContext(t *testing.T) {
 	cancel()
 	rerr := WriteColdChunk(
 		ctx, logger, chunkID, rawChunk(fullStream(t, chunkID, nil), chunkID),
-		coldDirsAt(coldDir), sink, Config{Ledgers: true},
+		coldDirsAt(coldDir, chunkID), sink, Config{Ledgers: true},
 	)
 	require.ErrorIs(t, rerr, context.Canceled)
 	require.Equal(t, 1, sink.coldChunkTotals, "a canceled chunk attempt still emits one ColdChunkTotal")
 }
 
-// ───────────────────────── RunHot chunkID cross-check (P2-e) ─────────────────────────
-
-// TestRunHot_ChunkIDMismatch asserts RunHot rejects ANY injected hot store
-// bound to a different chunk than the one being ingested, with a clear
-// up-front error (rather than silently interleaving chunks on the ledger and
-// txhash paths, or a later per-ledger out-of-range on the events path). All
-// three hot stores are chunk-bound.
-func TestRunHot_ChunkIDMismatch(t *testing.T) {
-	ingestChunk := chunk.ID(1)
-	storeChunk := chunk.ID(0)
-	logger := testLogger()
-
-	run := func(t *testing.T, stores HotStores, cfg Config) {
-		t.Helper()
-		err := RunHot(context.Background(), logger, &fakeStream{t: t, count: 1}, ingestChunk,
-			stores, nil, cfg)
-		require.Error(t, err)
-		require.Contains(t, err.Error(), "bound to chunk 0")
-		require.Contains(t, err.Error(), "RunHot chunk 1")
-	}
-
-	t.Run("ledgers", func(t *testing.T) {
-		ls, err := ledger.OpenHotStore(t.TempDir(), storeChunk, logger)
-		require.NoError(t, err)
-		defer func() { require.NoError(t, ls.Close()) }()
-		run(t, HotStores{Ledgers: ls}, Config{Ledgers: true})
-	})
-	t.Run("txhash", func(t *testing.T) {
-		ts, err := txhash.NewHotStore(t.TempDir(), storeChunk, logger)
-		require.NoError(t, err)
-		defer func() { require.NoError(t, ts.Close()) }()
-		run(t, HotStores{Txhash: ts}, Config{Txhash: true})
-	})
-	t.Run("events", func(t *testing.T) {
-		es, err := eventstore.OpenHotStore(t.TempDir(), storeChunk, logger)
-		require.NoError(t, err)
-		defer func() { require.NoError(t, es.Close()) }()
-		run(t, HotStores{Events: es}, Config{Events: true})
-	})
-}
-
 // ───────────────────────── Config validate / guard negatives (P2-g) ─────────────────────────
 
 // TestWriteColdChunk_ConfigGuards covers the validate guard on the cold materializer:
@@ -1493,15 +1099,7 @@ func TestWriteColdChunk_ConfigGuards(t *testing.T) {
 	chunkID := chunk.ID(0)
 
 	err := WriteColdChunk(context.Background(), logger, chunkID,
-		rawChunk(fullStream(t, chunkID, nil), chunkID), coldDirsAt(t.TempDir()), nil, Config{})
-	require.Error(t, err)
-	require.Contains(t, err.Error(), "enables no data types")
-}
-
-// TestRunHot_EmptyConfig asserts the hot driver also rejects an empty Config.
-func TestRunHot_EmptyConfig(t *testing.T) {
-	err := RunHot(context.Background(), testLogger(), &fakeStream{t: t, count: 1},
-		chunk.ID(0), HotStores{}, nil, Config{})
+		rawChunk(fullStream(t, chunkID, nil), chunkID), coldDirsAt(t.TempDir(), chunkID), nil, Config{})
 	require.Error(t, err)
 	require.Contains(t, err.Error(), "enables no data types")
 }
@@ -1521,13 +1119,13 @@ func countCleanColdIngests(s *testSink) int {
 	return n
 }
 
-// TestBuildColdIngesters_RollbackNoPhantomMetric makes a LATER constructor
-// (txhash) fail by planting a regular file at the txhash per-type directory,
-// so the constructor's own MkdirAll fails. The earlier-built ledger ingester
-// is rolled back via closeColdAll, which must NOT emit a phantom success
-// ColdIngest — the recorded ledger metric (if any) must carry the abort
-// error, never a clean (nil-err, 0-items) success.
-func TestBuildColdIngesters_RollbackNoPhantomMetric(t *testing.T) {
+// TestBuildColdIngesters_RollbackOneBuilt makes a LATER constructor (txhash) fail
+// by planting a regular file at the txhash per-type directory, so the
+// constructor's own MkdirAll fails. The earlier-built ledger ingester is rolled
+// back via closeColdAll — which only closes it. Since Close no longer emits a
+// per-ingester ColdIngest, a rolled-back ingester (built, never ingested or
+// finalized) produces NO sample at all: no phantom success, no synthetic abort.
+func TestBuildColdIngesters_RollbackOneBuilt(t *testing.T) {
 	chunkID := chunk.ID(0)
 	coldDir := t.TempDir()
 	sink := &testSink{}
@@ -1537,27 +1135,19 @@ func TestBuildColdIngesters_RollbackNoPhantomMetric(t *testing.T) {
 	// fails its bucket-dir MkdirAll.
 	require.NoError(t, os.WriteFile(filepath.Join(coldDir, dataTypeTxhash), []byte("not a dir"), 0o644))
 
-	_, err := buildColdIngesters(coldDirsAt(coldDir), chunkID, sink, Config{Ledgers: true, Txhash: true})
+	_, err := buildColdIngesters(coldDirsAt(coldDir, chunkID), chunkID, sink, Config{Ledgers: true, Txhash: true})
 	require.Error(t, err, "txhash constructor must fail on the planted file")
 
-	// The ledger ingester was built then rolled back. No phantom SUCCESS metric:
-	// any recorded ledger ColdIngest must carry an error.
-	cdt := sink.coldDataTypes()
-	if cdt[dataTypeLedgers] > 0 {
-		require.Equal(t, cdt[dataTypeLedgers], sink.coldErrorTypes()[dataTypeLedgers],
-			"rolled-back ledger ingester must not emit a phantom success ColdIngest")
-	}
-	// And the success-only assertion: there must be zero clean (nil-err) cold
-	// ingest signals recorded.
-	require.Zero(t, countCleanColdIngests(sink), "no clean ColdIngest on the rollback path")
+	// The ledger ingester was built then rolled back with no Ingest/Finalize, so
+	// it emits nothing.
+	require.Empty(t, sink.coldDataTypes(), "a rolled-back ingester emits no per-ingester ColdIngest")
 }
 
-// TestBuildColdIngesters_RollbackLaterFailure_TxhashAborts makes the LAST
-// constructor (events) fail AFTER both the ledger AND txhash ingesters were
-// already built, so closeColdAll rolls back two ingesters. It asserts the txhash
-// ingester (which DOES implement abortMetric) emits an error-carrying — not a
-// clean-success — ColdIngest, complementing the ledger-only abort coverage above.
-func TestBuildColdIngesters_RollbackLaterFailure_TxhashAborts(t *testing.T) {
+// TestBuildColdIngesters_RollbackTwoBuilt makes the LAST constructor (events)
+// fail AFTER both the ledger AND txhash ingesters were already built, so
+// closeColdAll rolls back two ingesters. Same invariant at greater rollback
+// depth: neither rolled-back ingester emits a per-ingester ColdIngest.
+func TestBuildColdIngesters_RollbackTwoBuilt(t *testing.T) {
 	chunkID := chunk.ID(0)
 	coldDir := t.TempDir()
 	sink := &testSink{}
@@ -1568,19 +1158,13 @@ func TestBuildColdIngesters_RollbackLaterFailure_TxhashAborts(t *testing.T) {
 	packPath := filepath.Join(coldDir, dataTypeEvents, chunkID.BucketID(), eventstore.EventsPackName(chunkID))
 	require.NoError(t, os.MkdirAll(packPath, 0o755))
 
-	_, err := buildColdIngesters(coldDirsAt(coldDir), chunkID, sink,
+	_, err := buildColdIngesters(coldDirsAt(coldDir, chunkID), chunkID, sink,
 		Config{Ledgers: true, Txhash: true, Events: true})
 	require.Error(t, err, "events constructor must fail on the planted directory")
 
-	// The txhash ingester was built then rolled back: its recorded ColdIngest must
-	// carry the abort error, never a clean success.
-	cdt := sink.coldDataTypes()
-	require.Equal(t, 1, cdt[dataTypeTxhash], "rolled-back txhash ingester emits one ColdIngest")
-	require.Equal(t, 1, sink.coldErrorTypes()[dataTypeTxhash],
-		"the rolled-back txhash ColdIngest must carry the abort error")
-
-	// No phantom clean success on the rollback path for any ingester.
-	require.Zero(t, countCleanColdIngests(sink), "no clean ColdIngest on the rollback path")
+	// Both the ledger and txhash ingesters were built then rolled back with no
+	// Ingest/Finalize, so neither emits a per-ingester ColdIngest.
+	require.Empty(t, sink.coldDataTypes(), "rolled-back ingesters emit no per-ingester ColdIngest")
 }
 
 // TestWriteColdChunk_ConstructorFailure_EmitsAggregate drives a constructor failure
@@ -1598,7 +1182,7 @@ func TestWriteColdChunk_ConstructorFailure_EmitsAggregate(t *testing.T) {
 
 	err := WriteColdChunk(
 		context.Background(), logger, chunkID, rawChunk(fullStream(t, chunkID, nil), chunkID),
-		coldDirsAt(coldDir), sink, Config{Ledgers: true},
+		coldDirsAt(coldDir, chunkID), sink, Config{Ledgers: true},
 	)
 	require.Error(t, err)
 	require.Equal(t, 1, sink.coldChunkTotals,
@@ -1619,7 +1203,7 @@ func TestEventsCold_FinishThenIndexFails_LeavesInertPack(t *testing.T) {
 	first := chunkID.FirstLedger()
 	coldDir := t.TempDir()
 
-	ing, err := NewEventsColdIngester(coldDir, chunkID, nil)
+	ing, err := NewEventsColdIngester(filepath.Join(coldDir, chunkID.BucketID()), chunkID, nil)
 	require.NoError(t, err)
 
 	// Ingest one event-bearing ledger so the mirror is non-empty (an empty
@@ -1656,7 +1240,7 @@ func TestEventsCold_FinalizeAfterFailedIngest_Refuses(t *testing.T) {
 	chunkID := chunk.ID(0)
 	coldDir := t.TempDir()
 
-	ing, err := NewEventsColdIngester(coldDir, chunkID, nil)
+	ing, err := NewEventsColdIngester(filepath.Join(coldDir, chunkID.BucketID()), chunkID, nil)
 	require.NoError(t, err)
 	defer func() { require.NoError(t, ing.Close()) }()
 
@@ -1721,13 +1305,16 @@ func TestColdService_Finalize_FirstErrorStopsRemaining(t *testing.T) {
 // ───────────────────────── drain overrun guard ─────────────────────────
 
 // countingIngester counts Ingest calls; used to prove the overrun guard fires
-// BEFORE the out-of-chunk ledger is handed to the ingesters.
+// BEFORE the out-of-chunk ledger is handed to the ingesters. It fakes the
+// ColdIngester seam (a ColdService drives it), the layer drain consumes.
 type countingIngester struct{ ingested int }
 
 func (c *countingIngester) Ingest(context.Context, uint32, xdr.LedgerCloseMetaView) error {
 	c.ingested++
 	return nil
 }
+func (*countingIngester) Finalize(context.Context) error { return nil }
+func (*countingIngester) Close() error                   { return nil }
 
 // TestDrain_OverrunPastChunk asserts a stream that keeps yielding in order
 // PAST the chunk's last ledger is rejected before the overrun ledger is
@@ -1739,8 +1326,9 @@ func TestDrain_OverrunPastChunk(t *testing.T) {
 	// One ledger past the chunk, still in order.
 	stream := &fakeStream{t: t, count: ledgersInChunk + 1}
 	counter := &countingIngester{}
+	service := NewColdService([]ColdIngester{counter}, nil)
 
-	err := drain(context.Background(), rawChunk(stream, chunkID), chunkID, counter)
+	err := drain(context.Background(), rawChunk(stream, chunkID), chunkID, service)
 	require.Error(t, err)
 	require.Contains(t, err.Error(), "overrun")
 	require.Equal(t, int(ledgersInChunk), counter.ingested,
@@ -1777,11 +1365,11 @@ func TestWriteColdChunk_LazySourceFirstReadError(t *testing.T) {
 	wantErr := errors.New("induced lazy-source failure (bad config / missing object)")
 	err := WriteColdChunk(
 		context.Background(), logger, chunkID, rawChunk(lazyErrStream{err: wantErr}, chunkID),
-		coldDirsAt(coldDir), sink, Config{Ledgers: true},
+		coldDirsAt(coldDir, chunkID), sink, Config{Ledgers: true},
 	)
 	require.Error(t, err)
 	require.ErrorIs(t, err, wantErr)
-	require.Contains(t, err.Error(), "RawLedgers", "the error surfaces from drain's stream pull")
+	require.Contains(t, err.Error(), "stream for chunk", "the error surfaces from drain's stream pull")
 
 	// Finalize never committed → no finalized pack (Close dropped the partial).
 	path := packPath(filepath.Join(coldDir, dataTypeLedgers), chunkID)
@@ -1801,7 +1389,7 @@ func TestWriteColdChunk_EmptyStream(t *testing.T) {
 
 	err := WriteColdChunk(
 		context.Background(), logger, chunkID, rawChunk(&fakeStream{t: t, count: 0}, chunkID),
-		coldDirsAt(coldDir), sink, Config{Ledgers: true},
+		coldDirsAt(coldDir, chunkID), sink, Config{Ledgers: true},
 	)
 	require.Error(t, err)
 	require.Contains(t, err.Error(), "ended at", "the completeness check rejects the empty stream")
@@ -1823,7 +1411,7 @@ func TestColdService_FinalizeAbort_KeepsEarlierArtifact(t *testing.T) {
 	coldDir := t.TempDir()
 	sink := &testSink{}
 
-	realLedger, err := NewLedgerColdIngester(filepath.Join(coldDir, dataTypeLedgers), chunkID, sink)
+	realLedger, err := NewLedgerColdIngester(packPath(filepath.Join(coldDir, dataTypeLedgers), chunkID), chunkID, sink)
 	require.NoError(t, err)
 	failErr := errors.New("induced finalize failure")
 	failing := &finalizeErrCold{err: failErr}
diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/ingester.go b/cmd/stellar-rpc/internal/fullhistory/ingest/ingester.go
index d59453293..ad312520d 100644
--- a/cmd/stellar-rpc/internal/fullhistory/ingest/ingester.go
+++ b/cmd/stellar-rpc/internal/fullhistory/ingest/ingester.go
@@ -6,29 +6,6 @@ import (
 	"github.com/stellar/go-stellar-sdk/xdr"
 )
 
-// HotIngester ingests one data type for one ledger into a long-lived hot store.
-//
-// Ownership: the hot store is INJECTED into the ingester's constructor and owned
-// by the caller (the daemon). The ingester does NOT open the store and does NOT
-// close it — Close is intentionally absent from this interface.
-//
-// Input: seq is the DRIVER-VALIDATED ledger sequence of lcm — the drain loop
-// has already read it off the view and checked it against the chunk's expected
-// position (duplicate / out-of-order / overrun), so ingesters consume it
-// directly instead of each re-deriving and re-error-handling it. lcm is a
-// zero-copy xdr.LedgerCloseMetaView (a []byte alias over the source stream's
-// BORROWED buffer), valid only for the current iteration step; an ingester
-// must copy any bytes it retains. The hot fan-out (HotService) waits for all
-// ingesters to finish a ledger before the source pulls the next one, so
-// synchronous consumption inside Ingest is safe.
-//
-// Concurrency: distinct HotIngester instances are run concurrently for the same
-// ledger (HotService fans out via errgroup); each instance touches only its own
-// store plus the read-only view.
-type HotIngester interface {
-	Ingest(ctx context.Context, seq uint32, lcm xdr.LedgerCloseMetaView) error
-}
-
 // ColdIngester ingests one data type for one chunk into a per-chunk cold writer.
 //
 // Ownership: the ingester OPENS its own per-chunk writer in its constructor and
@@ -44,7 +21,10 @@ type HotIngester interface {
 // artifact; implementations are encouraged to latch the failure and refuse
 // (eventsCold does).
 //
-// Input: same driver-validated-seq and borrowed-view contract as HotIngester.
+// Input: seq is the ledger sequence of lcm on drain's contiguous counter (the
+// in-order contract is enforced at the source), and lcm is a zero-copy
+// xdr.LedgerCloseMetaView over the source stream's BORROWED buffer, valid only for
+// the current iteration step — an implementation must copy any bytes it retains.
 // ColdService drives the per-ledger Ingest calls sequentially, so each view is
 // fully consumed before the next.
 type ColdIngester interface {
diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/ledgers.go b/cmd/stellar-rpc/internal/fullhistory/ingest/ledgers.go
index f9bab63af..5acf01b91 100644
--- a/cmd/stellar-rpc/internal/fullhistory/ingest/ledgers.go
+++ b/cmd/stellar-rpc/internal/fullhistory/ingest/ledgers.go
@@ -13,42 +13,6 @@ import (
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger"
 )
 
-// ───────────────────────── Hot ingester ─────────────────────────
-
-// ledgerHot writes raw ledger bytes verbatim into a long-lived ledger.HotStore.
-// AddLedgers fsyncs once per call, so each ledger is durable before Ingest
-// returns. The store is INJECTED and owned by the caller — ledgerHot never
-// opens or closes it.
-type ledgerHot struct {
-	store *ledger.HotStore
-	sink  MetricSink
-}
-
-// NewLedgerHotIngester returns a HotIngester writing raw ledger bytes into the
-// injected, caller-owned store.
-func NewLedgerHotIngester(store *ledger.HotStore, sink MetricSink) HotIngester {
-	return &ledgerHot{store: store, sink: orNop(sink)}
-}
-
-func (h *ledgerHot) Ingest(_ context.Context, seq uint32, lcm xdr.LedgerCloseMetaView) error {
-	m := newHotMetrics(h.sink, dataTypeLedgers)
-	var err error
-	defer func() { m.emit(err) }()
-
-	// ledger.HotStore.AddLedgers copies the bytes into its RocksDB batch
-	// synchronously, so aliasing the borrowed view buffer here is safe.
-	wstart := time.Now()
-	if aerr := h.store.AddLedgers(ledger.Entry{Seq: seq, Bytes: []byte(lcm)}); aerr != nil {
-		err = fmt.Errorf("AddLedgers(seq=%d): %w", seq, aerr)
-		return err
-	}
-	h.sink.IngestStage(dataTypeLedgers, tierHot, stageWrite, time.Since(wstart), 1)
-	// Set AFTER the store call so a failed write reports items=0, matching
-	// the MetricSink "items written" contract and the other hot ingesters.
-	m.items = 1
-	return nil
-}
-
 // ───────────────────────── Cold ingester ─────────────────────────
 
 // ledgerCold writes raw ledger bytes into a per-chunk ledger.ColdWriter (one
@@ -61,31 +25,28 @@ type ledgerCold struct {
 	appended bool
 }
 
-// NewLedgerColdIngester opens a per-chunk cold ledger writer under coldDir and
-// returns a ColdIngester that owns it. The writer uses its zero-value options;
-// driver-level tuning is a follow-up via Config.
-func NewLedgerColdIngester(coldDir string, chunkID chunk.ID, sink MetricSink) (ColdIngester, error) {
-	// The chunk's pack lives under its %05d bucket subdirectory; ledger.PackName
-	// owns the per-chunk filename so the naming convention has a single owner
-	// shared with the cold-ledger read path (ledger.NewPackStream).
-	path := filepath.Join(coldDir, chunkID.BucketID(), ledger.PackName(chunkID))
-	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
-		return nil, fmt.Errorf("mkdir %s: %w", filepath.Dir(path), err)
+// NewLedgerColdIngester opens a per-chunk cold ledger writer at packPath — the
+// caller's geometry.Layout.LedgerPackPath(chunkID), so the write path is Layout's
+// single derivation, not a second copy — and returns a ColdIngester that owns it.
+// The writer uses its zero-value options; driver-level tuning is a follow-up via Config.
+func NewLedgerColdIngester(packPath string, chunkID chunk.ID, sink MetricSink) (ColdIngester, error) {
+	if err := os.MkdirAll(filepath.Dir(packPath), 0o755); err != nil {
+		return nil, fmt.Errorf("mkdir %s: %w", filepath.Dir(packPath), err)
 	}
-	w, err := ledger.NewColdWriter(path, chunkID.FirstLedger(), ledger.ColdWriterOptions{})
+	w, err := ledger.NewColdWriter(packPath, chunkID.FirstLedger(), ledger.ColdWriterOptions{})
 	if err != nil {
-		return nil, fmt.Errorf("ledger.NewColdWriter %s: %w", path, err)
+		return nil, fmt.Errorf("ledger.NewColdWriter %s: %w", packPath, err)
 	}
-	return &ledgerCold{path: path, writer: w, metrics: newColdMetrics(sink, dataTypeLedgers)}, nil
+	return &ledgerCold{path: packPath, writer: w, metrics: newColdMetrics(sink, dataTypeLedgers)}, nil
 }
 
 func (c *ledgerCold) Ingest(_ context.Context, seq uint32, lcm xdr.LedgerCloseMetaView) error {
 	start := time.Now()
 	if err := c.writer.AppendLedger(seq, []byte(lcm)); err != nil {
-		c.metrics.observe(time.Since(start), 0, err)
+		c.metrics.observe(time.Since(start), 0, err) // terminal: observe emits the per-ingester signal
 		return fmt.Errorf("AppendLedger(seq=%d): %w", seq, err)
 	}
-	c.metrics.sink.IngestStage(dataTypeLedgers, tierCold, stageWrite, time.Since(start), 1)
+	c.metrics.sink.IngestStage(dataTypeLedgers, stageWrite, time.Since(start), 1)
 	c.appended = true
 	c.metrics.observe(time.Since(start), 1, nil)
 	return nil
@@ -105,23 +66,15 @@ func (c *ledgerCold) Finalize(_ context.Context) error {
 		c.metrics.emit(time.Since(start), err)
 		return err
 	}
-	c.metrics.sink.IngestStage(dataTypeLedgers, tierCold, stageFinalize, time.Since(start), 0)
+	c.metrics.sink.IngestStage(dataTypeLedgers, stageFinalize, time.Since(start), 0)
 	c.metrics.emit(time.Since(start), nil)
 	return nil
 }
 
-// Close drops the partial pack when Finalize never ran, and emits the cold
-// metrics if Finalize did not already (the failure path). The writer.Close
-// error is folded into the emitted metric so a close-time failure is counted in
-// errors_total. emit is a no-op after a successful Finalize, so this never
-// double-counts. Error propagation is unchanged: the writer.Close error is
-// still returned.
+// Close drops the partial pack when Finalize never ran. It does NOT emit the cold
+// metric: a terminal Ingest error or Finalize already emitted it, and an ingester
+// that never got that far (a rolled-back build) must produce no phantom sample.
+// The writer.Close error is returned unchanged.
 func (c *ledgerCold) Close() error {
-	cerr := c.writer.Close()
-	c.metrics.emit(0, cerr)
-	return cerr
+	return c.writer.Close()
 }
-
-// abortMetric records a synthetic abort error so a subsequent Close emit does
-// not look like a clean success. Used by the constructor-rollback path.
-func (c *ledgerCold) abortMetric(err error) { c.metrics.recordErr(err) }
diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/metrics.go b/cmd/stellar-rpc/internal/fullhistory/ingest/metrics.go
index 22ab631dc..8b9952e6b 100644
--- a/cmd/stellar-rpc/internal/fullhistory/ingest/metrics.go
+++ b/cmd/stellar-rpc/internal/fullhistory/ingest/metrics.go
@@ -4,23 +4,20 @@ import (
 	"time"
 
 	"github.com/prometheus/client_golang/prometheus"
+
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk"
 )
 
 // Data-type labels reported to a MetricSink. These match the per-type
-// subdirectory names used on disk.
+// subdirectory names used on disk. (The hot tier keys its per-ledger phases by
+// hotchunk.Phase, not by data type — see MetricSink.HotPhase.)
 const (
 	dataTypeLedgers = "ledgers"
 	dataTypeTxhash  = "txhash"
 	dataTypeEvents  = "events"
 )
 
-// Tier labels reported to a MetricSink.
-const (
-	tierHot  = "hot"
-	tierCold = "cold"
-)
-
-// Stage labels reported via MetricSink.IngestStage. These sit at the seams
+// Cold stage labels reported via MetricSink.IngestStage. These sit at the seams
 // the rpc-hack bench collectors measured (per-stage extract / term-index /
 // store-write samples plus a per-chunk finish), so a CSV sink can reproduce
 // those reports from production ingesters without re-instrumenting.
@@ -31,53 +28,66 @@ const (
 	stageFinalize  = "finalize"   // per-chunk commit (pack trailer, index build, .bin write)
 )
 
+// coldStagePairs is the set of (data_type, stage) pairs the cold ingesters
+// actually emit — the eight real ones, not the 3×4 cross-product. A sink
+// pre-resolves exactly these, so it registers no series no code path can feed.
+//
+//nolint:gochecknoglobals // fixed label set, read-only
+var coldStagePairs = []struct{ dataType, stage string }{
+	{dataTypeLedgers, stageWrite},
+	{dataTypeLedgers, stageFinalize},
+	{dataTypeTxhash, stageExtract},
+	{dataTypeTxhash, stageFinalize},
+	{dataTypeEvents, stageExtract},
+	{dataTypeEvents, stageTermIndex},
+	{dataTypeEvents, stageWrite},
+	{dataTypeEvents, stageFinalize},
+}
+
 // MetricSink receives ingest timing and volume signals. Ingesters report their
 // own per-call latency / item counts / errors (they know the item count); the
 // per-tier services report aggregate per-ledger (hot) and per-chunk (cold)
 // wall-clock. A sink lets the same ingesters/services feed Prometheus in prod,
 // a CSV recorder in benchmarks, or a test recorder — interchangeably.
 //
-// Implementations must be safe for concurrent use across ALL methods, not just
-// HotIngest: the hot fan-out calls HotIngest/HotLedgerTotal from per-ledger
-// goroutines, and a caller may freeze several chunks concurrently (each its own
-// WriteColdChunk), so the cold methods (ColdIngest, ColdChunkTotal) can likewise
-// be called from several goroutines at once.
+// Implementations must be safe for concurrent use across ALL methods: the live
+// hot ingestion loop reports HotPhase from its own goroutine while the lifecycle
+// may freeze several chunks concurrently (each its own WriteColdChunk), so the
+// cold methods (ColdIngest, ColdChunkTotal, IngestStage) can likewise be called
+// from several goroutines at once.
 type MetricSink interface {
-	// HotIngest reports one hot ingester's per-ledger Ingest: dataType is the
-	// data-type label, d the wall-clock, items the number of items written
-	// (events, txhashes, or 1 for a ledger), err the Ingest error (nil on
-	// success).
-	HotIngest(dataType string, d time.Duration, items int, err error)
+	// HotPhase reports ONE phase of one hot ledger ingest — the single hot-tier
+	// signal family. It carries that phase's wall-clock, its item count (0 for the
+	// extract/commit phases, the per-type write volume for the write phases, on the
+	// success path), and its outcome (err is non-nil only on the phase that failed,
+	// so a decode failure lands on PhaseExtract and a commit failure on PhaseCommit
+	// by construction). The per-ledger total is the sum of the phase durations; the
+	// caller emits phases [0, Failed] on error and all phases on success.
+	HotPhase(phase hotchunk.Phase, d time.Duration, items int, err error)
 	// ColdIngest reports one cold ingester's per-chunk total: the summed Ingest
 	// wall-clock plus its Finalize, items the total items written for the chunk,
 	// err the first error (nil on success).
 	ColdIngest(dataType string, d time.Duration, items int, err error)
-	// HotLedgerTotal reports the per-ledger wall-clock across all hot ingesters
-	// (the HotService.Ingest fan-out duration).
-	HotLedgerTotal(d time.Duration)
 	// ColdChunkTotal reports the per-chunk wall-clock across all cold ingesters'
 	// ingests plus their Finalizes (the ColdService lifetime).
 	ColdChunkTotal(d time.Duration)
-	// IngestStage reports one ingester's per-stage wall-clock INSIDE an
+	// IngestStage reports one COLD ingester's per-stage wall-clock inside an
 	// Ingest/Finalize call: stage is one of the stage* constants (extract,
-	// term_index, write, finalize), tier "hot" or "cold", items the stage's
-	// natural item count (0 where none applies). The whole-call HotIngest /
-	// ColdIngest signals above cannot be decomposed by a sink after the
-	// fact, so the per-stage granularity the bench reports need is exposed
-	// as its own signal — a sink that doesn't want it (production
-	// Prometheus, optionally) can no-op it.
-	IngestStage(dataType, tier, stage string, d time.Duration, items int)
+	// term_index, write, finalize), items the stage's natural item count (0 where
+	// none applies). The whole-call ColdIngest signal cannot be decomposed by a
+	// sink after the fact, so the per-stage granularity the bench reports need is
+	// exposed as its own signal — a sink that doesn't want it can no-op it.
+	IngestStage(dataType, stage string, d time.Duration, items int)
 }
 
 // NopSink is a MetricSink that discards everything. It is the default when a
 // caller passes a nil sink to a service or ingester.
 type NopSink struct{}
 
-func (NopSink) HotIngest(string, time.Duration, int, error)            {}
-func (NopSink) ColdIngest(string, time.Duration, int, error)           {}
-func (NopSink) HotLedgerTotal(time.Duration)                           {}
-func (NopSink) ColdChunkTotal(time.Duration)                           {}
-func (NopSink) IngestStage(string, string, string, time.Duration, int) {}
+func (NopSink) HotPhase(hotchunk.Phase, time.Duration, int, error) {}
+func (NopSink) ColdIngest(string, time.Duration, int, error)       {}
+func (NopSink) ColdChunkTotal(time.Duration)                       {}
+func (NopSink) IngestStage(string, string, time.Duration, int)     {}
 
 // orNop returns sink, or NopSink{} when sink is nil, so call sites never
 // nil-check before reporting.
@@ -88,48 +98,18 @@ func orNop(sink MetricSink) MetricSink {
 	return sink
 }
 
-// hotMetrics emits a single HotIngest signal for one hot ingester's per-ledger
-// Ingest. The ingester sets items as it learns the count, then a single deferred
-// emit reports the wall-clock since start, the final item count, and the WRAPPED
-// error captured from the named return — so every Ingest has exactly one emit
-// site regardless of which return path it takes.
-//
-// Usage:
-//
-//	func (h *fooHot) Ingest(...) (err error) {
-//	    m := newHotMetrics(h.sink, dataTypeFoo)
-//	    defer func() { m.emit(err) }()
-//	    ...
-//	    m.items = len(things)
-//	    return nil
-//	}
-type hotMetrics struct {
-	sink     MetricSink
-	dataType string
-	start    time.Time
-	items    int
-}
-
-func newHotMetrics(sink MetricSink, dataType string) hotMetrics {
-	return hotMetrics{sink: orNop(sink), dataType: dataType, start: time.Now()}
-}
-
-// emit reports the single HotIngest signal: the wall-clock since construction,
-// the accumulated item count, and the (wrapped) error from the named return.
-func (m *hotMetrics) emit(err error) {
-	m.sink.HotIngest(m.dataType, time.Since(m.start), m.items, err)
-}
-
 // coldMetrics is the per-chunk metric accumulator shared by all three cold
 // ingesters. Each ingester accumulates Ingest wall-clock (accum), item count
 // (items), and the FIRST error it saw (firstErr) across the chunk, then emits a
-// single ColdIngest signal — in Finalize if reached, otherwise in Close (the
-// failure path). The emitted flag guards against a double-emit: a successful
-// Finalize emits and sets emitted=true so the deferred Close is a no-op, while a
-// chunk that errors before Finalize emits exactly once from Close.
+// single ColdIngest signal on a TERMINAL step only: Finalize (success or error),
+// or an Ingest error (which abandons the chunk). Close NEVER emits — an ingester
+// that was built but never ingested/finalized (e.g. a sibling constructor failed
+// and the build rolled back) produces NO phantom sample. The emitted flag guards
+// against a double-emit so the guarantee holds even if a defensive caller drives
+// the terminal steps redundantly.
 //
-// This guarantees: failed chunk → one ColdIngest with the error recorded;
-// success → exactly one ColdIngest per ingester; never both.
+// This guarantees: a chunk that ingested and then failed/finalized → exactly one
+// ColdIngest (error recorded on failure); a rolled-back ingester → none.
 type coldMetrics struct {
 	sink     MetricSink
 	dataType string
@@ -143,29 +123,25 @@ func newColdMetrics(sink MetricSink, dataType string) coldMetrics {
 	return coldMetrics{sink: orNop(sink), dataType: dataType}
 }
 
-// recordErr folds err into firstErr WITHOUT emitting. Used on the
-// constructor-rollback path so the subsequent Close emit carries the abort
-// error instead of looking like a clean (nil-err, 0-items) success.
-func (m *coldMetrics) recordErr(err error) {
-	if err != nil {
-		m.firstErr = errOrFirst(m.firstErr, err)
-	}
-}
-
-// observe records one Ingest's wall-clock and (on error) the first error.
+// observe records one Ingest's wall-clock and (on error) the first error. An
+// Ingest error is TERMINAL by the ColdIngester contract (the chunk is abandoned
+// and the ingester is never reused), so observe emits the single per-ingester
+// ColdIngest itself here — callers just observe-and-return, no hand-paired emit.
 func (m *coldMetrics) observe(d time.Duration, items int, err error) {
 	m.accum += d
 	m.items += items
 	if err != nil {
 		m.firstErr = errOrFirst(m.firstErr, err)
+		m.emit(0, nil)
 	}
 }
 
 // emit reports the single ColdIngest signal for this ingester, adding extra to
 // the accumulated Ingest time (e.g. the Finalize wall-clock) and folding err
 // (if non-nil) into firstErr before reporting. It is a no-op after the first
-// call, so calling it from both Finalize (success) and Close (deferred cleanup)
-// emits exactly once. Pass a nil err when there is no stage error to record.
+// call, so a redundant terminal-step call emits exactly once. Pass a nil err
+// when the error is already recorded (an Ingest failure observes it) or there is
+// none.
 func (m *coldMetrics) emit(extra time.Duration, err error) {
 	if err != nil {
 		m.firstErr = errOrFirst(m.firstErr, err)
@@ -200,16 +176,9 @@ var (
 	coldStageBuckets = prometheus.ExponentialBuckets(0.001, 4, 12)
 )
 
-// ingestStages is the construction-time stage label set used to pre-resolve
-// the per-(data_type, stage) children.
-//
-//nolint:gochecknoglobals // fixed label set, read-only
-var ingestStages = []string{stageExtract, stageTermIndex, stageWrite, stageFinalize}
-
-// ingestCollectors bundles the pre-resolved per-(data_type, tier) children.
-// The label space is fixed at construction (three data types × two tiers), so
-// resolving the children once removes the per-emit label-map allocation and
-// hashed vector lookups from the hot per-ledger path.
+// ingestCollectors bundles the pre-resolved per-cold-data-type children. The
+// label space is fixed at construction, so resolving the children once removes
+// the per-emit label-map allocation and hashed vector lookup.
 type ingestCollectors struct {
 	duration prometheus.Observer
 	items    prometheus.Counter
@@ -234,26 +203,22 @@ func (c ingestCollectors) observe(d time.Duration, items int, err error) {
 // passing it into the ingest drivers) is a follow-up — there is no full-history
 // ingest daemon startup path yet. This type only provides the registerable sink.
 type PrometheusSink struct {
-	// Pre-resolved per-ingester children, keyed by data type, one map per
-	// tier (the duration histograms have per-tier buckets).
-	hot  map[string]ingestCollectors
+	// Hot per-ledger phases — the single hot signal family, one set of children per
+	// hotchunk.Phase, indexed by the phase value into a fixed-size ARRAY (not a map),
+	// so an out-of-table phase is a bounds panic at the index rather than a silent
+	// nil-map emit. The per-ledger total is the sum of hotPhaseDur; commit errors are
+	// hotPhaseErrs[PhaseCommit]; decode errors hotPhaseErrs[PhaseExtract].
+	hotPhaseDur   [hotchunk.NumPhases]prometheus.Observer
+	hotPhaseItems [hotchunk.NumPhases]prometheus.Counter
+	hotPhaseErrs  [hotchunk.NumPhases]prometheus.Counter
+	// Pre-resolved per-cold-ingester children, keyed by data type. Producers draw
+	// their data_type from the same constant set the map is built from, so a lookup
+	// can never miss — indexed directly, no on-the-fly vector fallback.
 	cold map[string]ingestCollectors
-	// The vectors behind the resolved children, kept for the (unexpected)
-	// case of a data type outside the construction-time set — resolved on
-	// the fly so no signal is ever silently dropped.
-	hotDuration  *prometheus.HistogramVec
-	coldDuration *prometheus.HistogramVec
-	ingestItems  *prometheus.CounterVec
-	ingestErrors *prometheus.CounterVec
-	// Per-stage durations (IngestStage), pre-resolved per
-	// (data_type, stage) with per-tier buckets, keyed "dataType/stage".
-	hotStage     map[string]prometheus.Observer
-	coldStage    map[string]prometheus.Observer
-	hotStageVec  *prometheus.HistogramVec
-	coldStageVec *prometheus.HistogramVec
-	// Aggregate per-tier wall-clock: hot per-ledger fan-out, cold per-chunk
-	// service lifetime. Separate histograms so each tier gets fitting buckets.
-	hotLedgerTotal prometheus.Observer
+	// Per-cold-stage durations, pre-resolved for the eight real (data_type, stage)
+	// pairs only (coldStagePairs), keyed "dataType/stage".
+	coldStage map[string]prometheus.Observer
+	// Aggregate per-chunk cold wall-clock (ColdService lifetime).
 	coldChunkTotal prometheus.Observer
 }
 
@@ -261,12 +226,24 @@ type PrometheusSink struct {
 // registry under namespace + the fullhistory_ingest subsystem. namespace is the
 // daemon convention value (interfaces.PrometheusNamespace).
 func NewPrometheusSink(registry *prometheus.Registry, namespace string) *PrometheusSink {
-	hotDuration := prometheus.NewHistogramVec(prometheus.HistogramOpts{
+	hotPhaseDurVec := prometheus.NewHistogramVec(prometheus.HistogramOpts{
 		Namespace: namespace, Subsystem: metricsSubsystem,
-		Name:    "hot_ingest_duration_seconds",
-		Help:    "per-ingester hot Ingest wall-clock (per ledger)",
+		Name:    "hot_phase_duration_seconds",
+		Help:    "per-ledger phase wall-clock (extract/ledgers/txhash/events/commit; phases sum to the per-ledger total)",
 		Buckets: hotBuckets,
-	}, []string{"data_type"})
+	}, []string{"phase"})
+
+	hotPhaseItemsVec := prometheus.NewCounterVec(prometheus.CounterOpts{
+		Namespace: namespace, Subsystem: metricsSubsystem,
+		Name: "hot_phase_items_total",
+		Help: "items written per hot phase (the write phases carry per-type volume; extract/commit are 0)",
+	}, []string{"phase"})
+
+	hotPhaseErrsVec := prometheus.NewCounterVec(prometheus.CounterOpts{
+		Namespace: namespace, Subsystem: metricsSubsystem,
+		Name: "hot_phase_errors_total",
+		Help: "hot ledger failures by the phase that failed (decode->extract, commit->commit, by construction)",
+	}, []string{"phase"})
 
 	coldDuration := prometheus.NewHistogramVec(prometheus.HistogramOpts{
 		Namespace: namespace, Subsystem: metricsSubsystem,
@@ -275,24 +252,17 @@ func NewPrometheusSink(registry *prometheus.Registry, namespace string) *Prometh
 		Buckets: coldBuckets,
 	}, []string{"data_type"})
 
-	ingestItems := prometheus.NewCounterVec(prometheus.CounterOpts{
-		Namespace: namespace, Subsystem: metricsSubsystem,
-		Name: "items_total",
-		Help: "items written per ingester (events, txhashes, or ledgers)",
-	}, []string{"data_type", "tier"})
-
-	ingestErrors := prometheus.NewCounterVec(prometheus.CounterOpts{
+	coldItems := prometheus.NewCounterVec(prometheus.CounterOpts{
 		Namespace: namespace, Subsystem: metricsSubsystem,
-		Name: "errors_total",
-		Help: "ingester Ingest/Finalize errors",
-	}, []string{"data_type", "tier"})
+		Name: "cold_items_total",
+		Help: "items written per cold ingester (events, txhashes, or ledgers)",
+	}, []string{"data_type"})
 
-	hotLedgerTotal := prometheus.NewHistogram(prometheus.HistogramOpts{
+	coldErrors := prometheus.NewCounterVec(prometheus.CounterOpts{
 		Namespace: namespace, Subsystem: metricsSubsystem,
-		Name:    "hot_ledger_duration_seconds",
-		Help:    "aggregate per-ledger wall-clock across all hot ingesters (HotService fan-out)",
-		Buckets: hotBuckets,
-	})
+		Name: "cold_errors_total",
+		Help: "cold ingester Ingest/Finalize errors",
+	}, []string{"data_type"})
 
 	coldChunkTotal := prometheus.NewHistogram(prometheus.HistogramOpts{
 		Namespace: namespace, Subsystem: metricsSubsystem,
@@ -301,13 +271,6 @@ func NewPrometheusSink(registry *prometheus.Registry, namespace string) *Prometh
 		Buckets: coldBuckets,
 	})
 
-	hotStageVec := prometheus.NewHistogramVec(prometheus.HistogramOpts{
-		Namespace: namespace, Subsystem: metricsSubsystem,
-		Name:    "hot_stage_duration_seconds",
-		Help:    "per-stage wall-clock inside a hot Ingest (extract, write; ledgers emits write only)",
-		Buckets: hotBuckets,
-	}, []string{"data_type", "stage"})
-
 	coldStageVec := prometheus.NewHistogramVec(prometheus.HistogramOpts{
 		Namespace: namespace, Subsystem: metricsSubsystem,
 		Name: "cold_stage_duration_seconds",
@@ -316,92 +279,55 @@ func NewPrometheusSink(registry *prometheus.Registry, namespace string) *Prometh
 		Buckets: coldStageBuckets,
 	}, []string{"data_type", "stage"})
 
-	registry.MustRegister(hotDuration, coldDuration, ingestItems, ingestErrors,
-		hotLedgerTotal, coldChunkTotal, hotStageVec, coldStageVec)
+	registry.MustRegister(hotPhaseDurVec, hotPhaseItemsVec, hotPhaseErrsVec,
+		coldDuration, coldItems, coldErrors, coldChunkTotal, coldStageVec)
 
-	hot := make(map[string]ingestCollectors, 3)
-	cold := make(map[string]ingestCollectors, 3)
-	hotStage := make(map[string]prometheus.Observer, 3*len(ingestStages))
-	coldStage := make(map[string]prometheus.Observer, 3*len(ingestStages))
+	sink := &PrometheusSink{
+		cold:           make(map[string]ingestCollectors, 3),
+		coldStage:      make(map[string]prometheus.Observer, len(coldStagePairs)),
+		coldChunkTotal: coldChunkTotal,
+	}
+	// Hot phases: one child per phase, indexed by the phase value.
+	for p := range hotchunk.NumPhases {
+		sink.hotPhaseDur[p] = hotPhaseDurVec.WithLabelValues(p.String())
+		sink.hotPhaseItems[p] = hotPhaseItemsVec.WithLabelValues(p.String())
+		sink.hotPhaseErrs[p] = hotPhaseErrsVec.WithLabelValues(p.String())
+	}
 	for _, dataType := range []string{dataTypeLedgers, dataTypeTxhash, dataTypeEvents} {
-		hot[dataType] = ingestCollectors{
-			duration: hotDuration.WithLabelValues(dataType),
-			items:    ingestItems.WithLabelValues(dataType, tierHot),
-			errors:   ingestErrors.WithLabelValues(dataType, tierHot),
-		}
-		cold[dataType] = ingestCollectors{
+		sink.cold[dataType] = ingestCollectors{
 			duration: coldDuration.WithLabelValues(dataType),
-			items:    ingestItems.WithLabelValues(dataType, tierCold),
-			errors:   ingestErrors.WithLabelValues(dataType, tierCold),
-		}
-		for _, stage := range ingestStages {
-			hotStage[dataType+"/"+stage] = hotStageVec.WithLabelValues(dataType, stage)
-			coldStage[dataType+"/"+stage] = coldStageVec.WithLabelValues(dataType, stage)
+			items:    coldItems.WithLabelValues(dataType),
+			errors:   coldErrors.WithLabelValues(dataType),
 		}
 	}
-
-	return &PrometheusSink{
-		hot:            hot,
-		cold:           cold,
-		hotDuration:    hotDuration,
-		coldDuration:   coldDuration,
-		ingestItems:    ingestItems,
-		ingestErrors:   ingestErrors,
-		hotStage:       hotStage,
-		coldStage:      coldStage,
-		hotStageVec:    hotStageVec,
-		coldStageVec:   coldStageVec,
-		hotLedgerTotal: hotLedgerTotal,
-		coldChunkTotal: coldChunkTotal,
+	// Cold stages: only the eight real (data_type, stage) pairs.
+	for _, k := range coldStagePairs {
+		sink.coldStage[k.dataType+"/"+k.stage] = coldStageVec.WithLabelValues(k.dataType, k.stage)
 	}
+	return sink
 }
 
-func (p *PrometheusSink) HotIngest(dataType string, d time.Duration, items int, err error) {
-	c, ok := p.hot[dataType]
-	if !ok {
-		c = ingestCollectors{
-			duration: p.hotDuration.WithLabelValues(dataType),
-			items:    p.ingestItems.WithLabelValues(dataType, tierHot),
-			errors:   p.ingestErrors.WithLabelValues(dataType, tierHot),
-		}
+func (p *PrometheusSink) HotPhase(phase hotchunk.Phase, d time.Duration, items int, err error) {
+	p.hotPhaseDur[phase].Observe(d.Seconds())
+	if items > 0 {
+		p.hotPhaseItems[phase].Add(float64(items))
 	}
-	c.observe(d, items, err)
-}
-
-func (p *PrometheusSink) ColdIngest(dataType string, d time.Duration, items int, err error) {
-	c, ok := p.cold[dataType]
-	if !ok {
-		c = ingestCollectors{
-			duration: p.coldDuration.WithLabelValues(dataType),
-			items:    p.ingestItems.WithLabelValues(dataType, tierCold),
-			errors:   p.ingestErrors.WithLabelValues(dataType, tierCold),
-		}
+	if err != nil {
+		p.hotPhaseErrs[phase].Inc()
 	}
-	c.observe(d, items, err)
 }
 
-func (p *PrometheusSink) HotLedgerTotal(d time.Duration) {
-	p.hotLedgerTotal.Observe(d.Seconds())
+func (p *PrometheusSink) ColdIngest(dataType string, d time.Duration, items int, err error) {
+	p.cold[dataType].observe(d, items, err)
 }
 
 func (p *PrometheusSink) ColdChunkTotal(d time.Duration) {
 	p.coldChunkTotal.Observe(d.Seconds())
 }
 
-// IngestStage records the per-stage duration into the tier's stage histogram.
-// The per-stage item counts are not exported to Prometheus (the per-Ingest
-// items_total already carries volume); they exist on the interface for the
-// CSV bench sink.
-func (p *PrometheusSink) IngestStage(dataType, tier, stage string, d time.Duration, _ int) {
-	resolved, vec := p.hotStage, p.hotStageVec
-	if tier == tierCold {
-		resolved, vec = p.coldStage, p.coldStageVec
-	}
-	o, ok := resolved[dataType+"/"+stage]
-	if !ok {
-		// Unexpected (data_type, stage) outside the construction-time set —
-		// resolve on the fly so no signal is silently dropped.
-		o = vec.WithLabelValues(dataType, stage)
-	}
-	o.Observe(d.Seconds())
+// IngestStage records the per-stage cold duration. The per-stage item counts are
+// not exported to Prometheus (cold_items_total already carries volume); they exist
+// on the interface for the CSV bench sink.
+func (p *PrometheusSink) IngestStage(dataType, stage string, d time.Duration, _ int) {
+	p.coldStage[dataType+"/"+stage].Observe(d.Seconds())
 }
diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/service.go b/cmd/stellar-rpc/internal/fullhistory/ingest/service.go
index 1d5430f06..ec0c317d7 100644
--- a/cmd/stellar-rpc/internal/fullhistory/ingest/service.go
+++ b/cmd/stellar-rpc/internal/fullhistory/ingest/service.go
@@ -6,9 +6,9 @@ import (
 	"fmt"
 	"time"
 
-	"golang.org/x/sync/errgroup"
-
 	"github.com/stellar/go-stellar-sdk/xdr"
+
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk"
 )
 
 // errOrFirst returns prev if it is non-nil, else cur. Used to retain the FIRST
@@ -21,49 +21,48 @@ func errOrFirst(prev, cur error) error {
 	return cur
 }
 
-// HotService fans one ledger out to a set of HotIngesters concurrently, waiting
-// for all to finish before returning (so the borrowed view is safe to release),
-// and emits the aggregate per-ledger wall-clock via the sink.
+// HotService commits one ledger to the shared per-chunk hot DB as ONE atomic
+// synced WriteBatch across all hot CFs (decision (a)) and emits the single hot
+// signal family: one HotPhase per hotchunk.Phase. No fan-out — the three types are
+// CFs of one RocksDB committing in one WriteBatch (hotchunk.DB.IngestLedger).
 type HotService struct {
-	ingesters []HotIngester
-	sink      MetricSink
+	db   *hotchunk.DB
+	sink MetricSink
 }
 
-// NewHotService builds a HotService over the enabled hot ingesters. A nil sink
-// defaults to NopSink.
-func NewHotService(ingesters []HotIngester, sink MetricSink) *HotService {
-	return &HotService{ingesters: ingesters, sink: orNop(sink)}
+// NewHotService builds a HotService that writes ledgers, txhash, and events into
+// the shared per-chunk DB. A nil sink defaults to NopSink.
+func NewHotService(db *hotchunk.DB, sink MetricSink) *HotService {
+	return &HotService{db: db, sink: orNop(sink)}
 }
 
-// Ingest runs every hot ingester on lcm concurrently and waits for all of them.
-// seq is the driver-validated sequence of lcm, passed through unchanged. The
-// first ingester error is returned; the production HotIngester.Ingest
-// implementations do not check ctx.Err(), so the siblings run to completion
-// regardless (g.Wait still returns the first error). The single-ingester config
-// skips the errgroup entirely. HotLedgerTotal is emitted with the fan-out
-// wall-clock regardless of success.
-func (s *HotService) Ingest(ctx context.Context, seq uint32, lcm xdr.LedgerCloseMetaView) error {
-	start := time.Now()
-	switch len(s.ingesters) {
-	case 0:
-		// No hot ingesters enabled for this tier: nothing to do.
-		s.sink.HotLedgerTotal(time.Since(start))
-		return nil
-	case 1:
-		// Single ingester: call directly, skipping the errgroup overhead.
-		err := s.ingesters[0].Ingest(ctx, seq, lcm)
-		s.sink.HotLedgerTotal(time.Since(start))
-		return err
-	default:
-		// Two or more: concurrent fan-out, waiting for all.
-		g, gctx := errgroup.WithContext(ctx)
-		for _, ing := range s.ingesters {
-			g.Go(func() error { return ing.Ingest(gctx, seq, lcm) })
+// Ingest commits lcm to the shared hot DB in one atomic synced WriteBatch
+// (decision (a)) and emits one HotPhase per phase from the ledger report. Each
+// phase carries its own wall-clock (the phases partition the per-ledger total),
+// the write phases carry per-type item volume on success, and the outcome lands on
+// the phase that failed BY CONSTRUCTION — a decode failure on PhaseExtract, a
+// commit failure on PhaseCommit — so there is no mislabeled batch-scoped error.
+// On failure only phases [0, Failed] ran, so only those are emitted (and with zero
+// items — nothing landed durably); on success every phase is emitted.
+func (s *HotService) Ingest(_ context.Context, seq uint32, lcm xdr.LedgerCloseMetaView) error {
+	rep, err := s.db.IngestLedger(seq, lcm)
+
+	last := hotchunk.NumPhases - 1
+	if err != nil {
+		last = rep.Failed
+	}
+	for p := hotchunk.Phase(0); p <= last; p++ {
+		items := rep.Phases[p].Items
+		var perr error
+		if err != nil {
+			items = 0 // the failure path committed nothing durably
+			if p == rep.Failed {
+				perr = err
+			}
 		}
-		err := g.Wait()
-		s.sink.HotLedgerTotal(time.Since(start))
-		return err
+		s.sink.HotPhase(p, rep.Phases[p].Dur, items, perr)
 	}
+	return err
 }
 
 // ColdService drives a set of ColdIngesters for one chunk: sequential per-ledger
@@ -124,11 +123,12 @@ func (s *ColdService) Finalize(ctx context.Context) error {
 }
 
 // Close closes every cold ingester, joining each Close error, and emits the
-// aggregate ColdChunkTotal if Finalize never reached it (the failure path). Each
-// ingester's own Close in turn emits that ingester's per-chunk ColdIngest if its
-// Finalize never ran, so a failed chunk still produces one per-ingester signal
-// and one aggregate. Idempotent: on the failure path a writer's Close drops its
-// partial file; after a successful Finalize all emissions are no-ops.
+// aggregate ColdChunkTotal if Finalize never reached it (the failure path). A
+// per-ingester ColdIngest is emitted only from a TERMINAL step (a failed Ingest,
+// via coldMetrics.observe, or Finalize) — never from Close, so an ingester rolled
+// back before any work produces no per-ingester sample (only the aggregate here).
+// Idempotent: on the failure path a writer's Close drops its partial file; after
+// a successful Finalize this is a no-op for the aggregate.
 func (s *ColdService) Close() error {
 	var err error
 	for _, ing := range s.ingesters {
diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/txhash.go b/cmd/stellar-rpc/internal/fullhistory/ingest/txhash.go
index b80f77de5..7d98b0a70 100644
--- a/cmd/stellar-rpc/internal/fullhistory/ingest/txhash.go
+++ b/cmd/stellar-rpc/internal/fullhistory/ingest/txhash.go
@@ -16,51 +16,6 @@ import (
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash"
 )
 
-// ───────────────────────── Hot ingester ─────────────────────────
-
-// txhashHot extracts the ledger's transaction hashes via the SDK
-// (sdkingest.ExtractTxHashes — apply order, hashes copied off the view) and
-// writes (txhash, seq) tuples in one AddEntries call (one fsync per ledger).
-// The store is INJECTED and owned by the caller.
-type txhashHot struct {
-	store *txhash.HotStore
-	sink  MetricSink
-}
-
-// NewTxhashHotIngester returns a HotIngester writing (txhash, seq) tuples into
-// the injected, caller-owned store.
-func NewTxhashHotIngester(store *txhash.HotStore, sink MetricSink) HotIngester {
-	return &txhashHot{store: store, sink: orNop(sink)}
-}
-
-func (t *txhashHot) Ingest(_ context.Context, seq uint32, lcm xdr.LedgerCloseMetaView) error {
-	m := newHotMetrics(t.sink, dataTypeTxhash)
-	var err error
-	defer func() { m.emit(err) }()
-
-	estart := time.Now()
-	hashes, eerr := sdkingest.ExtractTxHashes(lcm)
-	if eerr != nil {
-		err = fmt.Errorf("ExtractTxHashes seq %d: %w", seq, eerr)
-		return err
-	}
-	t.sink.IngestStage(dataTypeTxhash, tierHot, stageExtract, time.Since(estart), len(hashes))
-	if len(hashes) > 0 {
-		entries := make([]txhash.Entry, len(hashes))
-		for i, h := range hashes {
-			entries[i] = txhash.Entry{Hash: [32]byte(h), LedgerSeq: seq}
-		}
-		wstart := time.Now()
-		if aerr := t.store.AddEntries(entries); aerr != nil {
-			err = fmt.Errorf("AddEntries(seq=%d, n=%d): %w", seq, len(entries), aerr)
-			return err
-		}
-		t.sink.IngestStage(dataTypeTxhash, tierHot, stageWrite, time.Since(wstart), len(entries))
-	}
-	m.items = len(hashes)
-	return nil
-}
-
 // ───────────────────────── Cold ingester ─────────────────────────
 
 // txhashCold accumulates (txhash[:ColdKeySize], seq) tuples per ledger; at
@@ -78,19 +33,18 @@ type txhashCold struct {
 }
 
 // NewTxhashColdIngester returns a ColdIngester that accumulates a per-chunk
-// sorted .bin under coldDir's bucket subdirectory, written at Finalize
-// (overwriting any prior attempt's file — see the package doc's artifact
-// model).
-func NewTxhashColdIngester(coldDir string, chunkID chunk.ID, sink MetricSink) (ColdIngester, error) {
-	bucketDir := filepath.Join(coldDir, chunkID.BucketID())
-	if err := os.MkdirAll(bucketDir, 0o755); err != nil {
-		return nil, fmt.Errorf("mkdir %s: %w", bucketDir, err)
+// sorted .bin at binPath — the caller's geometry.Layout.TxHashBinPath(chunkID),
+// so the write path is Layout's single derivation — written at Finalize
+// (overwriting any prior attempt's file — see the package doc's artifact model).
+func NewTxhashColdIngester(binPath string, chunkID chunk.ID, sink MetricSink) (ColdIngester, error) {
+	if err := os.MkdirAll(filepath.Dir(binPath), 0o755); err != nil {
+		return nil, fmt.Errorf("mkdir %s: %w", filepath.Dir(binPath), err)
 	}
 	// The initial cap (64Ki entries, ~1.3 MB) deliberately starts well below a
 	// typical pubnet chunk's tx count (~3M): empty/sparse chunks stay cheap,
 	// and a busy chunk just pays a few amortized growths.
 	return &txhashCold{
-		binPath: filepath.Join(bucketDir, txhash.ColdBinName(chunkID)),
+		binPath: binPath,
 		chunkID: chunkID,
 		entries: make([]txhash.ColdEntry, 0, 1<<16),
 		metrics: newColdMetrics(sink, dataTypeTxhash),
@@ -105,7 +59,7 @@ func (t *txhashCold) Ingest(_ context.Context, seq uint32, lcm xdr.LedgerCloseMe
 	// chunk that intermediate would be hundreds of MB of transient garbage.
 	hashes, err := sdkingest.ExtractTxHashes(lcm)
 	if err != nil {
-		t.metrics.observe(time.Since(start), 0, err)
+		t.metrics.observe(time.Since(start), 0, err) // terminal: observe emits the per-ingester signal
 		return fmt.Errorf("ExtractTxHashes seq %d: %w", seq, err)
 	}
 	for i := range hashes {
@@ -122,7 +76,7 @@ func (t *txhashCold) Ingest(_ context.Context, seq uint32, lcm xdr.LedgerCloseMe
 	// write is the finalize stage; there is no separate cold write stage for
 	// txhash.)
 	d := time.Since(start)
-	t.metrics.sink.IngestStage(dataTypeTxhash, tierCold, stageExtract, d, len(hashes))
+	t.metrics.sink.IngestStage(dataTypeTxhash, stageExtract, d, len(hashes))
 	t.metrics.observe(d, len(hashes), nil)
 	return nil
 }
@@ -139,20 +93,15 @@ func (t *txhashCold) Finalize(_ context.Context) error {
 	})
 	err := txhash.WriteColdBin(t.binPath, t.entries)
 	if err == nil {
-		t.metrics.sink.IngestStage(dataTypeTxhash, tierCold, stageFinalize, time.Since(start), len(t.entries))
+		t.metrics.sink.IngestStage(dataTypeTxhash, stageFinalize, time.Since(start), len(t.entries))
 	}
 	t.metrics.emit(time.Since(start), err)
 	return err
 }
 
-// Close emits the cold metrics if Finalize never ran (the failure path); emit is
-// a no-op after Finalize. There is no open file handle to release (the .bin is
-// written in Finalize).
+// Close is a no-op: there is no open file handle to release (the .bin is written
+// in Finalize), and the cold metric is emitted on a terminal Ingest error or in
+// Finalize — never here, so a rolled-back build produces no phantom sample.
 func (t *txhashCold) Close() error {
-	t.metrics.emit(0, nil)
 	return nil
 }
-
-// abortMetric records a synthetic abort error so a subsequent Close emit does
-// not look like a clean success. Used by the constructor-rollback path.
-func (t *txhashCold) abortMetric(err error) { t.metrics.recordErr(err) }
diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/discard_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/discard_test.go
new file mode 100644
index 000000000..fd6253929
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/discard_test.go
@@ -0,0 +1,30 @@
+package lifecycle
+
+import (
+	"os"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+)
+
+// TestDiscardHotTier_RemovesDirAndKey retires the bracket: the key is deleted
+// and the dir is gone. A second discard is a no-op.
+func TestDiscardHotTier_RemovesDirAndKey(t *testing.T) {
+	cat, _ := testCatalog(t)
+	c := chunk.ID(4)
+	db := openLiveHotDB(t, cat, c)
+	require.NoError(t, db.Close())
+
+	require.NoError(t, cat.DiscardHotChunk(c))
+
+	has, err := hotKeyExists(cat, c)
+	require.NoError(t, err)
+	assert.False(t, has, "the hot key is deleted")
+	_, statErr := os.Stat(cat.Layout().HotChunkPath(c))
+	assert.True(t, os.IsNotExist(statErr), "the dir is removed")
+
+	require.NoError(t, cat.DiscardHotChunk(c), "second discard is a no-op")
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/eligibility.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/eligibility.go
new file mode 100644
index 000000000..4db9c212c
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/eligibility.go
@@ -0,0 +1,157 @@
+package lifecycle
+
+import (
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+)
+
+// The discard and prune eligibility scans. Each returns zero-arg op closures the
+// tick calls in order. Both are PURE READS — eligibility comes from durable keys
+// alone, so re-running against the same snapshot yields nothing (quiescence).
+
+// eligibleDiscardOps returns a discard closure per hot DB the cold artifacts now
+// fully serve (or that fell past retention). Per chunk: below the floor → discard;
+// complete (last <= through), nothing pending, and the index covers it → discard;
+// otherwise (live, or frozen awaiting coverage) → leave alone.
+// catalog.DiscardHotChunk is idempotent, so a crash between freeze and discard
+// self-heals next tick.
+func eligibleDiscardOps(cat *catalog.Catalog, gate RetentionFloor, through uint32) ([]func() error, error) {
+	hot, err := cat.HotChunkKeys()
+	if err != nil {
+		return nil, err
+	}
+
+	var ops []func() error
+	for _, c := range hot {
+		last := c.LastLedger()
+		switch {
+		case gate.Excludes(c):
+			ops = append(ops, func() error { return cat.DiscardHotChunk(c) })
+		case last <= through:
+			// Coverage is read once here and passed into pendingArtifacts — the
+			// discard requires covers independently, so the whole predicate is
+			// ledgers-frozen && events-frozen && covers.
+			covers, cerr := cat.FrozenIndexCovers(c)
+			if cerr != nil {
+				return nil, cerr
+			}
+			pending, perr := pendingArtifacts(c, cat, covers)
+			if perr != nil {
+				return nil, perr
+			}
+			if pending.Empty() && covers {
+				ops = append(ops, func() error { return cat.DiscardHotChunk(c) })
+			}
+			// else: frozen awaiting coverage, or still producing — leave alone.
+		}
+		// default (last > through): the live chunk or above — ingestion's, not ours.
+	}
+	return ops, nil
+}
+
+// pendingArtifacts lists which outputs chunk still needs: ledgers and events must
+// be frozen; txhash/.bin is exempt when the window's index already covers the
+// chunk (covers, computed by the caller — after finalization the chunk:c:txhash
+// key is demoted/swept, so regenerating the .bin would orphan it).
+func pendingArtifacts(c chunk.ID, cat *catalog.Catalog, covers bool) (catalog.ArtifactSet, error) {
+	var need catalog.ArtifactSet
+	for _, kind := range []geometry.Kind{geometry.KindLedgers, geometry.KindEvents} {
+		state, err := cat.State(c, kind)
+		if err != nil {
+			return need, err
+		}
+		if state != geometry.StateFrozen {
+			need = need.Add(kind)
+		}
+	}
+	txState, err := cat.State(c, geometry.KindTxHash)
+	if err != nil {
+		return need, err
+	}
+	if txState != geometry.StateFrozen && !covers {
+		need = need.Add(geometry.KindTxHash)
+	}
+	return need, nil
+}
+
+// eligiblePruneOps is the system's only file-deleter, key-driven, covering both
+// key families. It returns sweep closures (SweepTxHashIndexKey per index key, one
+// batched SweepChunkArtifacts for the chunk family). "Below the floor" is the
+// gate predicate shared with the discard scan and read path, so prune deletes
+// exactly what the reader has stopped admitting.
+// The second return is the total number of artifacts the ops will sweep (one per
+// index-key op plus every ref in the single batched chunk sweep), so the caller
+// meters Prune in artifacts — the same unit the Phase 1 sweep reports — rather
+// than in op closures (the chunk family collapses N artifacts into one op).
+func eligiblePruneOps(cat *catalog.Catalog, gate RetentionFloor) ([]func() error, int, error) {
+	var ops []func() error
+	artifacts := 0
+
+	// Index family: transient debris from any window, plus frozen keys below the floor.
+	idxKeys, err := cat.AllTxHashIndexKeys()
+	if err != nil {
+		return nil, 0, err
+	}
+	for _, cov := range idxKeys {
+		switch {
+		case cov.State == geometry.StateFreezing || cov.State == geometry.StatePruning:
+			// Transient debris (a crashed build or unfinished demotion). Safe only
+			// because no build is in flight when this scan runs (it follows
+			// executePlan's return, and backfill finishes before the loop starts).
+			ops = append(ops, func() error { return cat.SweepTxHashIndexKey(cov) })
+			artifacts++
+		case gate.Excludes(cat.TxHashIndexLayout().LastChunk(cov.Index)):
+			// Frozen index key below the floor; the sweep demotes it first.
+			ops = append(ops, func() error { return cat.SweepTxHashIndexKey(cov) })
+			artifacts++
+		}
+	}
+
+	// Chunk family: swept in one batch.
+	refs, err := cat.ChunkArtifactKeys()
+	if err != nil {
+		return nil, 0, err
+	}
+	var sweep []catalog.ArtifactRef
+	for _, ref := range refs {
+		switch {
+		case gate.Excludes(ref.Chunk):
+			// Past retention: any state goes.
+			sweep = append(sweep, ref)
+		case ref.State == geometry.StatePruning:
+			// In-retention .bin demoted by its window's terminal commit batch.
+			sweep = append(sweep, ref)
+		case ref.Kind == geometry.KindTxHash:
+			// A frozen/freezing chunk:c:txhash inside a FINALIZED window: re-derived
+			// (or left mid-write) by a widening backfill that crashed before its
+			// terminal rebuild, then abandoned when retention narrowed. The terminal
+			// .idx provably covers the chunk and is never re-materialized, so it's
+			// redundant.
+			redundant, rerr := txhashRedundantInFinalizedWindow(cat, ref.Chunk)
+			if rerr != nil {
+				return nil, 0, rerr
+			}
+			if redundant {
+				sweep = append(sweep, ref)
+			}
+		}
+	}
+	if len(sweep) > 0 {
+		ops = append(ops, func() error { return cat.SweepChunkArtifacts(sweep) })
+		artifacts += len(sweep)
+	}
+	return ops, artifacts, nil
+}
+
+// txhashRedundantInFinalizedWindow reports whether c's window has a TERMINAL
+// frozen index coverage (Hi == the window's last chunk) — the branch that makes
+// INV-2's no-leftover-txhash-keys clause self-healing, not merely auditable.
+func txhashRedundantInFinalizedWindow(cat *catalog.Catalog, c chunk.ID) (bool, error) {
+	w := cat.TxHashIndexLayout().TxHashIndexID(c)
+	fk, ok, err := cat.FrozenTxHashIndex(w)
+	if err != nil {
+		return false, err
+	}
+	return ok && cat.TxHashIndexLayout().IsTerminalCoverage(fk), nil
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/helpers_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/helpers_test.go
new file mode 100644
index 000000000..09bc2bad1
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/helpers_test.go
@@ -0,0 +1,156 @@
+package lifecycle
+
+import (
+	"bytes"
+	"fmt"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/sirupsen/logrus"
+	"github.com/stretchr/testify/require"
+
+	supportlog "github.com/stellar/go-stellar-sdk/support/log"
+	"github.com/stellar/go-stellar-sdk/xdr"
+
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/metastore"
+)
+
+// This file provides the shared test scaffolding the lifecycle tests need. The
+// catalog/fixture helpers are copied verbatim from the root fullhistory package's
+// helpers_test.go (which still serves the root tests). The hot-tier helpers
+// (openHotDBForChunk / openLiveHotDB) create the SAME on-disk "ready" hot DBs the
+// real daemon does, so the lifecycle tick freezes and the watermark refinement
+// read the genuine hot DBs by path (the way production does after #22).
+
+// testCPI is the tx-hash index width tests build layouts with; equals the
+// production constant so on-disk geometry reads back identically.
+const testCPI = geometry.ChunksPerTxhashIndex
+
+func silentLogger() *supportlog.Entry {
+	var buf bytes.Buffer
+	log := supportlog.New()
+	log.SetLevel(logrus.DebugLevel)
+	log.SetOutput(&buf)
+	return log
+}
+
+// newTestCatalog builds a Catalog over a real metastore on temp dirs with
+// cpi-wide tx-hash indexes; returns the catalog and artifact root (the store is
+// closed via t.Cleanup).
+func newTestCatalog(t *testing.T, cpi uint32) (*catalog.Catalog, string) {
+	t.Helper()
+	metaDir := t.TempDir()
+	artifactRoot := t.TempDir()
+
+	store, err := metastore.New(filepath.Join(metaDir, "rocksdb"), silentLogger())
+	require.NoError(t, err)
+	t.Cleanup(func() { _ = store.Close() })
+
+	idxLayout, err := geometry.NewTxHashIndexLayout(cpi)
+	require.NoError(t, err)
+
+	return catalog.NewCatalog(store, geometry.NewLayout(artifactRoot), idxLayout), artifactRoot
+}
+
+// testCatalog builds a catalog with the default (wide) tx-hash index, returning it
+// and the artifact root.
+func testCatalog(t *testing.T) (*catalog.Catalog, string) {
+	t.Helper()
+	cat, root := newTestCatalog(t, testCPI)
+	return cat, root
+}
+
+// smallTxHashIndexCatalog builds a test catalog whose indexes are cpi chunks
+// wide, so a "terminal" (full-index) build needs only a few chunks. Returns the
+// catalog and the artifact root.
+func smallTxHashIndexCatalog(t *testing.T, cpi uint32) (*catalog.Catalog, string) {
+	t.Helper()
+	cat, root := newTestCatalog(t, cpi)
+	return cat, root
+}
+
+// freezeKinds flips the given per-chunk kinds to "frozen" via the one-write protocol.
+func freezeKinds(t *testing.T, cat *catalog.Catalog, chunkID chunk.ID, kinds ...geometry.Kind) {
+	t.Helper()
+	require.NoError(t, cat.MarkChunkFreezing(chunkID, kinds...))
+	require.NoError(t, cat.FlipChunkFrozen(chunkID, kinds...))
+}
+
+// freezeCoverage marks and commits a frozen index coverage [lo, hi] for index w.
+func freezeCoverage(t *testing.T, cat *catalog.Catalog, w geometry.TxHashIndexID, lo, hi chunk.ID) {
+	t.Helper()
+	cov, err := cat.MarkTxHashIndexFreezing(w, lo, hi)
+	require.NoError(t, err)
+	require.NoError(t, cat.CommitTxHashIndex(cov))
+}
+
+// zeroTxLCMBytes builds wire bytes of a minimal valid zero-tx V2 LedgerCloseMeta;
+// zero-tx keeps a full 10k-ledger chunk pass cheap.
+func zeroTxLCMBytes(t *testing.T, seq uint32) []byte {
+	t.Helper()
+	lcm := xdr.LedgerCloseMeta{
+		V: 2,
+		V2: &xdr.LedgerCloseMetaV2{
+			LedgerHeader: xdr.LedgerHeaderHistoryEntry{
+				Header: xdr.LedgerHeader{
+					ScpValue:  xdr.StellarValue{CloseTime: xdr.TimePoint(0)},
+					LedgerSeq: xdr.Uint32(seq),
+				},
+			},
+			TxSet: xdr.GeneralizedTransactionSet{
+				V:       1,
+				V1TxSet: &xdr.TransactionSetV1{Phases: nil},
+			},
+			TxProcessing: nil,
+		},
+	}
+	raw, err := lcm.MarshalBinary()
+	require.NoError(t, err)
+	return raw
+}
+
+// ---------------------------------------------------------------------------
+// Hot-tier test scaffolding: a test-local equivalent of the root package's hot
+// DB opener (hotloop.go's openHotDBForChunk). It uses only the public
+// hotchunk/catalog APIs the production code uses, so a lifecycle test creates the
+// SAME on-disk "ready" hot DB the real daemon would — which the freeze and the
+// watermark refinement then open by Layout path, exactly as production does.
+// ---------------------------------------------------------------------------
+
+// openHotDBForChunk creates a "ready" shared hot DB for chunkID under the
+// hot:chunk bracket (transient -> create -> ready) and returns an open handle the
+// caller owns. The test equivalent of the production opener, trimmed to the
+// create branch the lifecycle tests need (no crash-recovery / fsync — those edges
+// are covered by the root hotloop_test.go opener tests).
+func openHotDBForChunk(cat *catalog.Catalog, chunkID chunk.ID, logger *supportlog.Entry) (*hotchunk.DB, error) {
+	dir := cat.Layout().HotChunkPath(chunkID)
+	if err := os.RemoveAll(dir); err != nil {
+		return nil, fmt.Errorf("wipe leftover hot dir %s: %w", dir, err)
+	}
+	if err := cat.PutHotTransient(chunkID); err != nil {
+		return nil, fmt.Errorf("mark hot transient chunk %s: %w", chunkID, err)
+	}
+	db, err := hotchunk.Open(dir, chunkID, logger)
+	if err != nil {
+		return nil, fmt.Errorf("create hot DB chunk %s: %w", chunkID, err)
+	}
+	if err := cat.FlipHotReady(chunkID); err != nil {
+		_ = db.Close()
+		return nil, fmt.Errorf("flip hot ready chunk %s: %w", chunkID, err)
+	}
+	return db, nil
+}
+
+// openLiveHotDB opens (and brackets ready) the live hot DB for a chunk via the
+// test opener, returning the handle.
+func openLiveHotDB(t *testing.T, cat *catalog.Catalog, c chunk.ID) *hotchunk.DB {
+	t.Helper()
+	db, err := openHotDBForChunk(cat, c, silentLogger())
+	require.NoError(t, err)
+	return db
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/hotkeys_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/hotkeys_test.go
new file mode 100644
index 000000000..e0fb16c79
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/hotkeys_test.go
@@ -0,0 +1,54 @@
+package lifecycle
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+)
+
+// writeArtifact writes a placeholder artifact file at path (creating parents),
+// so a test can assert presence/absence around the catalog protocol.
+func writeArtifact(t *testing.T, path string) {
+	t.Helper()
+	require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755))
+	require.NoError(t, os.WriteFile(path, []byte("artifact"), 0o644))
+}
+
+// hotKeyExists reports whether chunk c has a hot:chunk key (any value). The
+// catalog's key existence read is unexported; this is the streaming-package test
+// shim over the public HotState ("" ⇒ absent).
+func hotKeyExists(cat *catalog.Catalog, c chunk.ID) (bool, error) {
+	s, err := cat.HotState(c)
+	return s != "", err
+}
+
+func TestRoundTripHotKeys(t *testing.T) {
+	cat, _ := testCatalog(t)
+
+	state, err := cat.HotState(7)
+	require.NoError(t, err)
+	require.Equal(t, geometry.HotState(""), state)
+
+	require.NoError(t, cat.PutHotTransient(7))
+	state, err = cat.HotState(7)
+	require.NoError(t, err)
+	require.Equal(t, geometry.HotTransient, state)
+
+	require.NoError(t, cat.FlipHotReady(7))
+	state, err = cat.HotState(7)
+	require.NoError(t, err)
+	require.Equal(t, geometry.HotReady, state)
+
+	require.NoError(t, cat.DeleteHotKey(7))
+	state, err = cat.HotState(7)
+	require.NoError(t, err)
+	require.Equal(t, geometry.HotState(""), state)
+	// Idempotent on a missing key.
+	require.NoError(t, cat.DeleteHotKey(7))
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle.go
new file mode 100644
index 000000000..3046a034c
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle.go
@@ -0,0 +1,228 @@
+package lifecycle
+
+import (
+	"context"
+	"fmt"
+	"sync/atomic"
+	"time"
+
+	"github.com/cenkalti/backoff/v4"
+
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/backfill"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/observability"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+)
+
+// The lifecycle tick runs three stages in order: (1) plan-and-execute (the same
+// resolve+executePlan as backfill, over [floor, lastChunk]); (2) discard scan;
+// (3) prune scan. The tick is a pure function of the catalog — the two goroutines
+// share no state.
+//
+// The retention floor has two roles with OPPOSITE safe directions (design
+// "Lifecycle"): as a RETENTION boundary erring low is harmless (an extra chunk
+// lingers, or a read returns not-found via the missing-file rule); as a
+// PRODUCTION boundary erring low would in principle plan a build below existing
+// storage — but producibility is enforced lazily per chunk in resolve, so the
+// plan simply spans [floor, lastChunk] and extending the bottom is backfill's job.
+
+// Config bundles the tick/loop dependencies. It composes the scheduler's
+// ExecConfig (shared postconditions + worker pool with backfill) plus the
+// retention knob.
+type Config struct {
+	backfill.ExecConfig
+
+	// RetentionChunks bounds the sliding retention floor's width. 0 disables the
+	// sliding floor (the fixed earliest-ledger floor alone applies).
+	RetentionChunks uint32
+
+	// OpRetryAttempts / OpRetryBackoff bound the per-op retry the discard/prune
+	// sweeps use (see runOps). Zero values fall back to defaults in
+	// WithLifecycleDefaults.
+	OpRetryAttempts int
+	OpRetryBackoff  time.Duration
+}
+
+const (
+	defaultOpRetryAttempts = 3
+	defaultOpRetryBackoff  = 5 * time.Second
+)
+
+// WithLifecycleDefaults returns a copy with the embedded ExecConfig defaults and
+// the op-retry defaults applied. Called once at startup before launching the loop.
+func (cfg Config) WithLifecycleDefaults() Config {
+	cfg.ExecConfig = cfg.WithDefaults()
+	if cfg.OpRetryAttempts < 1 {
+		cfg.OpRetryAttempts = defaultOpRetryAttempts
+	}
+	if cfg.OpRetryBackoff <= 0 {
+		cfg.OpRetryBackoff = defaultOpRetryBackoff
+	}
+	return cfg
+}
+
+// runOps runs each op in order, retrying a failed op a bounded number of times on
+// a fixed pause before giving up. The discard/prune ops are idempotent file
+// deletions, so a transient failure (a busy file, a slow fsync) is exactly the
+// retryable kind — retrying in place avoids canceling ingestion through the shared
+// errgroup and forcing a whole-daemon restart (which relaunches captive core) for
+// a retryable file operation. It checks ctx between ops (and the backoff aborts on
+// ctx cancellation) so a shutdown mid-scan stops promptly; the ctx error surfaces
+// up through Loop for supervise to classify as clean.
+func runOps(ctx context.Context, cfg Config, ops []func() error) error {
+	// A zero-value Config (no WithLifecycleDefaults, e.g. a test harness) runs each
+	// op exactly once.
+	attempts := max(cfg.OpRetryAttempts, 1)
+	for _, op := range ops {
+		if err := ctx.Err(); err != nil {
+			return err
+		}
+		// attempts total tries == 1 initial + (attempts-1) retries, fixed pause.
+		//nolint:gosec // attempts >= 1, so attempts-1 >= 0
+		bo := backoff.WithMaxRetries(backoff.NewConstantBackOff(cfg.OpRetryBackoff), uint64(attempts-1))
+		if err := backoff.Retry(op, backoff.WithContext(bo, ctx)); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// runLifecycle runs one tick over the three stages for just-completed chunk
+// lastChunk. through = lastChunk.LastLedger() is the single snapshot every stage
+// shares, so a boundary committing mid-tick can't make stages contradict (it's
+// next tick's work). Plan range is [floor, lastChunk] (start raised to storage);
+// discard/prune key off through.
+//
+// It returns the first stage error WITHOUT classifying it: Loop propagates it to
+// run's errgroup and supervise decides clean-vs-restart (a canceled ctx surfaces
+// as a ctx error supervise treats as a clean shutdown).
+func runLifecycle(ctx context.Context, cfg Config, cat *catalog.Catalog, lastChunk chunk.ID) error {
+	metrics := observability.MetricsOrNop(cfg.Metrics)
+	logger := cfg.Logger
+
+	// The one snapshot every stage shares. earliest and the retention gate are read
+	// and computed ONCE here (not re-derived per scan), then passed to both scans.
+	through := lastChunk.LastLedger()
+
+	earliest, _, err := cat.EarliestLedger()
+	if err != nil {
+		return fmt.Errorf("read earliest ledger: %w", err)
+	}
+	floorLedger := EffectiveRetentionFloor(through, cfg.RetentionChunks, earliest)
+	gate := RetentionFloorAt(floorLedger)
+
+	// Retention-floor gauge only. The last-committed gauge is owned by the ingestion
+	// loop (which holds the true, possibly mid-chunk value); re-emitting it here from
+	// the chunk-aligned `through` would regress it on every tick.
+	metrics.RetentionFloor(floorLedger)
+	logger.WithField("through", through).
+		WithField("floor_chunk", gate.FirstChunk().String()).
+		Debug("streaming: lifecycle tick — derived snapshot")
+
+	// Stage 1 — plan-and-execute (freeze + index fold) over [floor, lastChunk], via
+	// the same entry point backfill uses (resolve → executePlan → Freeze metric,
+	// recorded internally). A canceled ctx makes RunBackfill return ctx.Err(), which
+	// propagates up for supervise to treat as a clean shutdown. lastChunk is always
+	// a completed chunk (boundary fence + post-backfill seed), so the only guard
+	// needed is the empty-range check (floor above lastChunk when retention outran
+	// production). An empty range emits no Freeze sample — the Discard/Prune samples
+	// below carry empty-tick visibility.
+	if start := gate.FirstChunk(); start <= lastChunk {
+		if eerr := backfill.RunBackfill(ctx, cfg.ExecConfig, start, lastChunk); eerr != nil {
+			return fmt.Errorf("run backfill [%s,%s]: %w", start, lastChunk, eerr)
+		}
+	}
+
+	// Stage 2 — discard scan.
+	discardStart := time.Now()
+	discardOps, err := eligibleDiscardOps(cat, gate, through)
+	if err != nil {
+		return fmt.Errorf("eligible discard ops: %w", err)
+	}
+	if err := runOps(ctx, cfg, discardOps); err != nil {
+		return fmt.Errorf("discard op: %w", err)
+	}
+	metrics.Discard(len(discardOps), time.Since(discardStart))
+	if len(discardOps) > 0 {
+		logger.WithField("discarded", len(discardOps)).Info("streaming: lifecycle discard stage complete")
+	}
+
+	// Live hot-chunk gauge after the discard stage.
+	hot, err := cat.HotChunkKeys()
+	if err != nil {
+		return fmt.Errorf("read hot chunk keys: %w", err)
+	}
+	metrics.LiveHotChunks(len(hot))
+
+	// Stage 3 — prune scan.
+	pruneStart := time.Now()
+	pruneOps, prunedArtifacts, err := eligiblePruneOps(cat, gate)
+	if err != nil {
+		return fmt.Errorf("eligible prune ops: %w", err)
+	}
+	if err := runOps(ctx, cfg, pruneOps); err != nil {
+		return fmt.Errorf("prune op: %w", err)
+	}
+	metrics.Prune(prunedArtifacts, time.Since(pruneStart))
+	if prunedArtifacts > 0 {
+		logger.WithField("pruned", prunedArtifacts).Info("streaming: lifecycle prune stage complete")
+	}
+	return nil
+}
+
+// BoundarySignal couples ingestion (the producer) to the lifecycle Loop (the
+// consumer): ingestion stores the latest completed chunk id and pings a
+// 1-buffered wake; the Loop blocks on the wake, then reads the latest id. A
+// latest-CELL (not a queue) means a slow lifecycle can never fall behind — one
+// tick over [floor, latest] subsumes every skipped boundary — so there is no
+// bounded buffer to overflow and thus no "fell behind" fatal path. Safe for one
+// producer and one consumer.
+type BoundarySignal struct {
+	latest atomic.Uint32
+	wake   chan struct{}
+}
+
+// NewBoundarySignal returns a ready signal with an empty latest cell.
+func NewBoundarySignal() *BoundarySignal {
+	return &BoundarySignal{wake: make(chan struct{}, 1)}
+}
+
+// Publish records c as the latest completed chunk and wakes the Loop. The wake is
+// non-blocking: a pending wake already covers this boundary (the Loop will read
+// the newest latest when it runs), so a full buffer is dropped, never blocked on.
+func (s *BoundarySignal) Publish(c chunk.ID) {
+	s.latest.Store(uint32(c))
+	select {
+	case s.wake <- struct{}{}:
+	default:
+	}
+}
+
+// latestChunk returns the most recently published completed chunk id. A wake is
+// only ever sent by Publish, AFTER it stores the cell, so a received wake proves a
+// value is present — no separate "was anything published" flag is needed.
+func (s *BoundarySignal) latestChunk() chunk.ID {
+	return chunk.ID(s.latest.Load())
+}
+
+// Loop is the event-driven lifecycle goroutine. It blocks on the boundary signal's
+// wake, reads the latest completed chunk id, and runs one tick over
+// [floor, lastChunk] (which subsumes every boundary skipped while it was busy). It
+// selects on ctx.Done() too, so it never blocks past shutdown.
+//
+// It returns the first tick error to its caller (run() joins it with ingestion in
+// an errgroup, so supervise decides clean-vs-restart). A cancellation observed at
+// the select returns nil; a cancellation mid-tick returns the tick's wrapped ctx
+// error — both are clean, since supervise keys off the daemon ctx, not this return.
+func Loop(ctx context.Context, cfg Config, cat *catalog.Catalog, sig *BoundarySignal) error {
+	for {
+		select {
+		case <-ctx.Done():
+			return nil
+		case <-sig.wake:
+			if err := runLifecycle(ctx, cfg, cat, sig.latestChunk()); err != nil {
+				return err
+			}
+		}
+	}
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_arith_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_arith_test.go
new file mode 100644
index 000000000..84d230749
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_arith_test.go
@@ -0,0 +1,94 @@
+package lifecycle
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+)
+
+// ---------------------------------------------------------------------------
+// Arithmetic: geometry.LastCompleteChunkAt, EffectiveRetentionFloor.
+// ---------------------------------------------------------------------------
+
+func TestLastCompleteChunkAt(t *testing.T) {
+	tests := []struct {
+		name   string
+		ledger uint32
+		want   int64
+	}{
+		{"below first chunk's last ledger => sentinel -1", chunk.ID(0).LastLedger() - 1, -1},
+		{"genesis sentinel (FirstLedgerSeq-1) => -1", chunk.FirstLedgerSeq - 1, -1},
+		{"ledger 0 does not underflow => -1", 0, -1},
+		{"chunk 0's last ledger => 0", chunk.ID(0).LastLedger(), 0},
+		{"chunk 0's last ledger + 1 (into chunk 1) => still 0", chunk.ID(0).LastLedger() + 1, 0},
+		{"chunk 5's last ledger => 5", chunk.ID(5).LastLedger(), 5},
+		{"the doc's example 10_001 => 0", 10_001, 0},
+	}
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			require.Equal(t, tc.want, geometry.LastCompleteChunkAt(tc.ledger))
+		})
+	}
+}
+
+func TestEffectiveRetentionFloor(t *testing.T) {
+	genesis := uint32(chunk.FirstLedgerSeq)
+	tests := []struct {
+		name            string
+		upperBound      uint32
+		retentionChunks uint32
+		earliest        uint32
+		want            uint32
+	}{
+		{
+			name:            "no sliding (retention 0): earliest floor wins",
+			upperBound:      chunk.ID(100).LastLedger(),
+			retentionChunks: 0,
+			earliest:        chunk.ID(10).FirstLedger(),
+			want:            chunk.ID(10).FirstLedger(),
+		},
+		{
+			name:            "no sliding, no earliest pin: genesis",
+			upperBound:      chunk.ID(100).LastLedger(),
+			retentionChunks: 0,
+			earliest:        0,
+			want:            genesis,
+		},
+		{
+			name:            "sliding floor leads when above earliest",
+			upperBound:      chunk.ID(100).LastLedger(), // last complete chunk = 100
+			retentionChunks: 10,                         // floor chunk = 100-10+1 = 91
+			earliest:        0,
+			want:            chunk.ID(91).FirstLedger(),
+		},
+		{
+			name:            "earliest floor leads when above the sliding floor",
+			upperBound:      chunk.ID(100).LastLedger(),
+			retentionChunks: 10,                         // sliding floor chunk = 91
+			earliest:        chunk.ID(95).FirstLedger(), // higher
+			want:            chunk.ID(95).FirstLedger(),
+		},
+		{
+			name:            "retention wider than history clamps to chunk 0, never wraps",
+			upperBound:      chunk.ID(3).LastLedger(),
+			retentionChunks: 1000, // sliding chunk = 3-1000+1 < 0 => clamp to chunk 0
+			earliest:        0,
+			want:            chunk.ID(0).FirstLedger(),
+		},
+		{
+			name:            "young store (upperBound below first chunk) clamps to chunk 0",
+			upperBound:      chunk.FirstLedgerSeq + 5, // no complete chunk yet
+			retentionChunks: 5,
+			earliest:        0,
+			want:            chunk.ID(0).FirstLedger(),
+		},
+	}
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			require.Equal(t, tc.want, EffectiveRetentionFloor(tc.upperBound, tc.retentionChunks, tc.earliest))
+		})
+	}
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_helpers_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_helpers_test.go
new file mode 100644
index 000000000..72c8471f6
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_helpers_test.go
@@ -0,0 +1,192 @@
+package lifecycle
+
+import (
+	"context"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/stellar/go-stellar-sdk/keypair"
+	"github.com/stellar/go-stellar-sdk/network"
+	"github.com/stellar/go-stellar-sdk/xdr"
+
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/backfill"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+)
+
+// lifecyclePassphrase is the network passphrase the one-tx fixture hashes
+// against (any stable value works; the index only needs deterministic hashes).
+const lifecyclePassphrase = network.PublicNetworkPassphrase
+
+// oneTxLCMRand builds the wire bytes of a V2 LedgerCloseMeta carrying ONE
+// transaction for seq, so a chunk ingested with at least one such ledger yields
+// a NON-empty txhash .bin — streamhash refuses to build a cold index over zero
+// keys (txhash.ErrEmptyBuildSet), so a fully zero-tx chunk cannot exercise the
+// real index fold. Mirrors ingest_test's buildLCMReturningHashes, trimmed to one
+// tx.
+func oneTxLCMRand(t *testing.T, seq uint32) []byte {
+	t.Helper()
+	envelope := xdr.TransactionEnvelope{
+		Type: xdr.EnvelopeTypeEnvelopeTypeTx,
+		V1: &xdr.TransactionV1Envelope{
+			Tx: xdr.Transaction{
+				SourceAccount: xdr.MustMuxedAddress(keypair.MustRandom().Address()),
+				Ext:           xdr.TransactionExt{V: 1, SorobanData: &xdr.SorobanTransactionData{}},
+			},
+		},
+	}
+	hash, err := network.HashTransactionInEnvelope(envelope, lifecyclePassphrase)
+	require.NoError(t, err)
+
+	comp := []xdr.TxSetComponent{{
+		Type: xdr.TxSetComponentTypeTxsetCompTxsMaybeDiscountedFee,
+		TxsMaybeDiscountedFee: &xdr.TxSetComponentTxsMaybeDiscountedFee{
+			Txs: []xdr.TransactionEnvelope{envelope},
+		},
+	}}
+	opResults := []xdr.OperationResult{}
+	lcm := xdr.LedgerCloseMeta{
+		V: 2,
+		V2: &xdr.LedgerCloseMetaV2{
+			LedgerHeader: xdr.LedgerHeaderHistoryEntry{
+				Header: xdr.LedgerHeader{
+					ScpValue:  xdr.StellarValue{CloseTime: xdr.TimePoint(0)},
+					LedgerSeq: xdr.Uint32(seq),
+				},
+			},
+			TxSet: xdr.GeneralizedTransactionSet{
+				V:       1,
+				V1TxSet: &xdr.TransactionSetV1{Phases: []xdr.TransactionPhase{{V: 0, V0Components: &comp}}},
+			},
+			TxProcessing: []xdr.TransactionResultMetaV1{{
+				TxApplyProcessing: xdr.TransactionMeta{
+					V:  4,
+					V4: &xdr.TransactionMetaV4{Operations: []xdr.OperationMetaV2{}},
+				},
+				Result: xdr.TransactionResultPair{
+					TransactionHash: hash,
+					Result: xdr.TransactionResult{
+						FeeCharged: 100,
+						Result:     xdr.TransactionResultResult{Code: xdr.TransactionResultCodeTxSuccess, Results: &opResults},
+					},
+				},
+			}},
+		},
+	}
+	raw, err := lcm.MarshalBinary()
+	require.NoError(t, err)
+	return raw
+}
+
+// ingestFullHotChunk creates a "ready" hot DB for chunk c and ingests every
+// ledger in the chunk (all CFs, contiguous from FirstLedger), then closes the
+// write handle — the post-boundary state the lifecycle freezes from. The hot
+// key is left "ready" and the dir is on disk, as the boundary handoff leaves it.
+func ingestFullHotChunk(t *testing.T, cat *catalog.Catalog, c chunk.ID) {
+	t.Helper()
+	db := openLiveHotDB(t, cat, c)
+	for seq := c.FirstLedger(); seq <= c.LastLedger(); seq++ {
+		// The first ledger carries one tx so the chunk's txhash .bin is non-empty
+		// (streamhash refuses a zero-key index); the rest stay zero-tx for speed.
+		var raw []byte
+		if seq == c.FirstLedger() {
+			raw = oneTxLCMRand(t, seq)
+		} else {
+			raw = zeroTxLCMBytes(t, seq)
+		}
+		_, err := db.IngestLedger(seq, xdr.LedgerCloseMetaView(raw))
+		require.NoError(t, err)
+	}
+	require.NoError(t, db.Close()) // release the write handle (boundary handoff)
+}
+
+// lifecycleTestConfig wires a Config over the real production primitives. The
+// freeze reads the hot tier by opening the chunk's real on-disk DB (created by
+// ingestFullHotChunk) straight from its Layout path — the same open production
+// does after #22. A tick failure now surfaces as runLifecycle's returned error
+// (no Fatalf), so tests assert on that error rather than a recorder.
+func lifecycleTestConfig(t *testing.T, cat *catalog.Catalog, retentionChunks uint32) Config {
+	t.Helper()
+	return Config{
+		ExecConfig: backfill.ExecConfig{
+			Catalog: cat,
+			Logger:  silentLogger(),
+			Workers: 2,
+			Process: backfill.ProcessConfig{},
+		},
+		RetentionChunks: retentionChunks,
+	}
+}
+
+// lastCompleteChunkAtID maps geometry.LastCompleteChunkAt to a chunk.ID (ok=false
+// on a negative result). Was a production helper until #25 (the tick now plans
+// [floor, lastChunk] without it); it lives here for the tick-mirroring helpers.
+func lastCompleteChunkAtID(ledger uint32) (chunk.ID, bool) {
+	c := geometry.LastCompleteChunkAt(ledger)
+	if c < 0 {
+		return 0, false
+	}
+	return chunk.ID(c), true
+}
+
+// runTickForCatalog runs one lifecycle tick the way ingestion would drive it: it
+// derives the highest complete chunk from the catalog (the chunk id ingestion
+// hands over at a boundary) and passes it as lastChunk, returning the tick's
+// error. On a young network with no complete chunk it runs no tick (returns nil) —
+// mirroring production, where the boundary/seed guard upstream never triggers the
+// Loop in that state.
+func runTickForCatalog(ctx context.Context, t *testing.T, cfg Config, cat *catalog.Catalog) error {
+	t.Helper()
+	through, err := deriveCompleteThrough(cat)
+	require.NoError(t, err)
+	last, ok := lastCompleteChunkAtID(through)
+	if !ok {
+		return nil
+	}
+	return runLifecycle(ctx, cfg, cat, last)
+}
+
+// makeReadyHotDirNoData opens and closes a real (empty) hot DB for c so its dir
+// exists on disk and its key is "ready" — the state a discard scan inspects
+// without needing a full ingest.
+func makeReadyHotDirNoData(t *testing.T, cat *catalog.Catalog, c chunk.ID) {
+	t.Helper()
+	db, err := openHotDBForChunk(cat, c, silentLogger())
+	require.NoError(t, err)
+	require.NoError(t, db.Close())
+}
+
+// gateFor builds the retention gate the tick passes into the eligibility scans,
+// from the same (through, retention, earliest) snapshot runLifecycle uses.
+func gateFor(t *testing.T, cfg Config, cat *catalog.Catalog, through uint32) RetentionFloor {
+	t.Helper()
+	earliest, _, err := cat.EarliestLedger()
+	require.NoError(t, err)
+	return NewRetentionFloor(through, cfg.RetentionChunks, earliest)
+}
+
+// assertQuiescent re-runs the tick's three derivations against the SAME through
+// snapshot and asserts none schedule work — the quiescence postcondition.
+func assertQuiescent(t *testing.T, cfg Config, cat *catalog.Catalog, through uint32) {
+	t.Helper()
+	earliest, _, err := cat.EarliestLedger()
+	require.NoError(t, err)
+	gate := NewRetentionFloor(through, cfg.RetentionChunks, earliest)
+	start := gate.FirstChunk()
+	if rangeEnd, ok := lastCompleteChunkAtID(through); ok && start <= rangeEnd {
+		// At quiescence resolve finds an empty plan, so RunBackfill (resolve +
+		// executePlan) is a no-op that returns nil — even with no Backend wired,
+		// since an empty plan never reaches backfillSource.
+		perr := backfill.RunBackfill(context.Background(), cfg.ExecConfig, start, rangeEnd)
+		assert.NoError(t, perr, "re-running backfill schedules no work at quiescence")
+	}
+	dops, err := eligibleDiscardOps(cat, gate, through)
+	require.NoError(t, err)
+	assert.Empty(t, dops, "re-scan finds no discard work at quiescence")
+	pops, _, err := eligiblePruneOps(cat, gate)
+	require.NoError(t, err)
+	assert.Empty(t, pops, "re-scan finds no prune work at quiescence")
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_loop_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_loop_test.go
new file mode 100644
index 000000000..d8702a11e
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_loop_test.go
@@ -0,0 +1,114 @@
+package lifecycle
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+)
+
+// ---------------------------------------------------------------------------
+// Loop: selects on BOTH ctx.Done and the boundary signal's wake; reads the
+// most-recent published chunk id from the latest-cell.
+// ---------------------------------------------------------------------------
+
+// TestLifecycleLoop_RunsTickPerNotifyThenStopsOnCtx: a boundary signal (a completed
+// chunk id) runs a tick; a ctx cancellation returns the loop. The loop never
+// blocks forever and never fatals on shutdown.
+func TestLifecycleLoop_RunsTickPerNotifyThenStopsOnCtx(t *testing.T) {
+	cat, _ := smallTxHashIndexCatalog(t, 1)
+	cfg := lifecycleTestConfig(t, cat, 0)
+
+	// Make the tick observable WITHOUT a slow full ingest: chunk 0 is already
+	// fully frozen and folded into its (terminal, cpi=1) window, with a leftover
+	// "ready" hot DB on disk. The plan stage is a no-op; the discard scan retires
+	// chunk 0's hot DB. A live chunk 1 keeps chunk 0 below the partition.
+	freezeKinds(t, cat, 0, geometry.KindLedgers, geometry.KindEvents, geometry.KindTxHash)
+	freezeCoverage(t, cat, cat.TxHashIndexLayout().TxHashIndexID(0), 0, 0) // terminal coverage of chunk 0
+	makeReadyHotDirNoData(t, cat, 0)
+	live := openLiveHotDB(t, cat, 1)
+	t.Cleanup(func() { _ = live.Close() })
+
+	sig := NewBoundarySignal()
+	ctx, cancel := context.WithCancel(context.Background())
+	done := make(chan error, 1)
+	go func() { done <- Loop(ctx, cfg, cat, sig) }()
+
+	sig.Publish(chunk.ID(0)) // ingestion hands over the just-completed chunk 0
+	require.Eventually(t, func() bool {
+		has, err := hotKeyExists(cat, 0)
+		return err == nil && !has
+	}, 10*time.Second, 20*time.Millisecond, "the signal ran a tick that discarded chunk 0")
+
+	cancel()
+	select {
+	case err := <-done:
+		require.NoError(t, err, "a ctx-canceled Loop is a clean return")
+	case <-time.After(5 * time.Second):
+		t.Fatal("the loop did not return on ctx cancellation")
+	}
+}
+
+// TestLifecycleLoop_DrainsToMostRecent: the latest-cell coalesces rapid
+// boundaries — publishing 0 then 1 lands a tick over the most-recent (chunk 1)
+// that subsumes chunk 0. With chunks 0 and 1 both frozen+covered and a live chunk
+// 2, both are discarded (whether that takes one coalesced tick or two).
+func TestLifecycleLoop_DrainsToMostRecent(t *testing.T) {
+	cat, _ := smallTxHashIndexCatalog(t, 1)
+	cfg := lifecycleTestConfig(t, cat, 0)
+
+	for c := chunk.ID(0); c <= 1; c++ {
+		freezeKinds(t, cat, c, geometry.KindLedgers, geometry.KindEvents, geometry.KindTxHash)
+		freezeCoverage(t, cat, cat.TxHashIndexLayout().TxHashIndexID(c), c, c)
+		makeReadyHotDirNoData(t, cat, c)
+	}
+	live := openLiveHotDB(t, cat, 2)
+	t.Cleanup(func() { _ = live.Close() })
+
+	sig := NewBoundarySignal()
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+	done := make(chan error, 1)
+	go func() { done <- Loop(ctx, cfg, cat, sig) }()
+
+	sig.Publish(chunk.ID(0))
+	sig.Publish(chunk.ID(1)) // latest-cell coalesces: a tick over [floor, 1] discards both
+	require.Eventually(t, func() bool {
+		h0, e0 := hotKeyExists(cat, 0)
+		h1, e1 := hotKeyExists(cat, 1)
+		return e0 == nil && e1 == nil && !h0 && !h1
+	}, 10*time.Second, 20*time.Millisecond, "one drained tick discarded both completed chunks")
+
+	cancel()
+	select {
+	case err := <-done:
+		require.NoError(t, err, "a ctx-canceled Loop is a clean return")
+	case <-time.After(5 * time.Second):
+		t.Fatal("the loop did not return on ctx cancellation")
+	}
+}
+
+// TestLifecycleLoop_ReturnsImmediatelyOnAlreadyCancelledCtx: an already-canceled
+// ctx makes the loop return without running any tick (never blocks on the
+// channel forever).
+func TestLifecycleLoop_ReturnsImmediatelyOnAlreadyCancelledCtx(t *testing.T) {
+	cat, _ := smallTxHashIndexCatalog(t, 1)
+	cfg := lifecycleTestConfig(t, cat, 0)
+
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+
+	sig := NewBoundarySignal() // never published to
+	done := make(chan error, 1)
+	go func() { done <- Loop(ctx, cfg, cat, sig) }()
+	select {
+	case err := <-done:
+		require.NoError(t, err, "an already-canceled ctx is a clean return")
+	case <-time.After(5 * time.Second):
+		t.Fatal("the loop blocked instead of observing the canceled ctx")
+	}
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_test.go
new file mode 100644
index 000000000..3d3398d11
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_test.go
@@ -0,0 +1,209 @@
+package lifecycle
+
+import (
+	"context"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+)
+
+// ---------------------------------------------------------------------------
+// End-to-end tick harness: real catalog + real hotchunk DBs.
+// ---------------------------------------------------------------------------
+
+// TestRunLifecycleTick_BoundaryFreezesFoldsDiscards is the "one boundary, end to
+// end" walk: chunk 0 just closed (its full hot DB is on disk, ready), chunk 1 is
+// the new live chunk. One tick must:
+//   - freeze chunk 0's cold artifacts FROM its hot DB (via processChunk's hot
+//     branch),
+//   - fold chunk 0 into its window's index (terminal coverage, cpi=1),
+//   - discard chunk 0's hot DB (cold artifacts now fully serve it),
+//   - leave the live chunk 1 untouched.
+//
+// Then re-running the tick is a no-op (quiescence).
+func TestRunLifecycleTick_BoundaryFreezesFoldsDiscards(t *testing.T) {
+	// full-chunk ingest on an isolated TempDir/catalog; overlaps the other heavy
+	// tests to fit the gate's go-test timeout.
+	t.Parallel()
+	cat, _ := smallTxHashIndexCatalog(t, 1) // window w == chunk w; a one-chunk window finalizes immediately
+	cfg := lifecycleTestConfig(t, cat, 0)
+
+	// Chunk 0: just-closed, full hot DB on disk. Chunk 1: the new live chunk.
+	ingestFullHotChunk(t, cat, 0)
+	live := openLiveHotDB(t, cat, 1) // the live chunk's hot DB (held open by "ingestion")
+	t.Cleanup(func() { _ = live.Close() })
+
+	require.NoError(t, runTickForCatalog(context.Background(), t, cfg, cat), "a healthy tick never fails")
+
+	// Chunk 0's cold artifacts are all frozen.
+	for _, kind := range []geometry.Kind{geometry.KindLedgers, geometry.KindEvents} {
+		state, err := cat.State(0, kind)
+		require.NoError(t, err)
+		assert.Equal(t, geometry.StateFrozen, state, "chunk 0 %s frozen", kind)
+	}
+	// The window's index is terminal and covers chunk 0.
+	covered, err := cat.FrozenIndexCovers(0)
+	require.NoError(t, err)
+	assert.True(t, covered, "the window index folded chunk 0 in")
+	fk, ok, err := cat.FrozenTxHashIndex(cat.TxHashIndexLayout().TxHashIndexID(0))
+	require.NoError(t, err)
+	require.True(t, ok)
+	assert.True(t, cat.TxHashIndexLayout().IsTerminalCoverage(fk), "a one-chunk window is terminal")
+
+	// Chunk 0's hot DB is discarded (cold artifacts fully serve it).
+	has, err := hotKeyExists(cat, 0)
+	require.NoError(t, err)
+	assert.False(t, has, "chunk 0's hot key is gone")
+
+	// The live chunk 1 is untouched: its hot key still "ready", no cold artifacts.
+	hotState, err := cat.HotState(1)
+	require.NoError(t, err)
+	assert.Equal(t, geometry.HotReady, hotState, "the live chunk's hot key is untouched")
+	lfs1, err := cat.State(1, geometry.KindLedgers)
+	require.NoError(t, err)
+	assert.Equal(t, geometry.State(""), lfs1, "the live chunk is not frozen")
+
+	// Quiescence: re-running the tick produces no work.
+	through, err := deriveCompleteThrough(cat)
+	require.NoError(t, err)
+	assertQuiescent(t, cfg, cat, through)
+}
+
+// TestRunLifecycleTick_DiscardGatedOnIndexCoverage: a complete chunk whose cold
+// ledgers+events are frozen but whose window index does NOT yet cover it keeps its
+// hot DB (it still serves tx lookups). Only once a terminal coverage exists does
+// the discard fire. cpi=2 so a single chunk does NOT finalize the window.
+func TestRunLifecycleTick_DiscardGatedOnIndexCoverage(t *testing.T) {
+	cat, _ := smallTxHashIndexCatalog(t, 2) // window 0 = chunks [0,1]
+	cfg := lifecycleTestConfig(t, cat, 0)
+
+	// Pre-freeze chunk 0's ledgers+events+txhash directly (no hot dependence), and
+	// leave it with a "ready" hot DB on disk. The window is NOT finalized (cpi=2,
+	// only chunk 0 present), so no terminal coverage exists.
+	freezeKinds(t, cat, 0, geometry.KindLedgers, geometry.KindEvents, geometry.KindTxHash)
+	makeReadyHotDirNoData(t, cat, 0)
+	// A live chunk 1 above it so chunk 0 is below the partition boundary.
+	require.NoError(t, cat.PutHotTransient(1))
+
+	through := chunk.ID(0).LastLedger() // chunk 0 complete via cold
+	// txhash is frozen, ledgers/events frozen, but the window has no FROZEN coverage
+	// yet => indexCovers(0) is false => NOT discarded (still needed for lookups via
+	// its .bin/hot DB until the index folds it in).
+	ops, err := eligibleDiscardOps(cat, gateFor(t, cfg, cat, through), through)
+	require.NoError(t, err)
+	require.Empty(t, ops, "no index coverage yet: the hot DB stays")
+
+	// Now finalize the window's index so it covers chunk 0 (terminal needs chunk
+	// 1's .bin too; build a non-terminal-but-covering frozen coverage [0,0]).
+	freezeCoverage(t, cat, 0, 0, 0)
+	covered, err := cat.FrozenIndexCovers(0)
+	require.NoError(t, err)
+	require.True(t, covered)
+
+	ops, err = eligibleDiscardOps(cat, gateFor(t, cfg, cat, through), through)
+	require.NoError(t, err)
+	require.Len(t, ops, 1, "covered + nothing pending => discard eligible")
+	require.NoError(t, ops[0]())
+
+	has, err := hotKeyExists(cat, 0)
+	require.NoError(t, err)
+	assert.False(t, has, "the now-covered chunk's hot DB is discarded")
+}
+
+// TestRunLifecycleTick_PastFloorPrune: a chunk wholly below the effective
+// retention floor has its artifact files and hot DB swept, regardless of state.
+func TestRunLifecycleTick_PastFloorPrune(t *testing.T) {
+	cat, _ := smallTxHashIndexCatalog(t, 1)
+	cfg := lifecycleTestConfig(t, cat, 2) // retain ~2 chunks
+
+	// CompleteThrough will be chunk 5's last ledger (positional: live chunk 6).
+	// floor = geometry.LastCompleteChunkAt(through)-retention+1 = 5-2+1 = chunk 4's first
+	// ledger. So chunks 0..3 are wholly past the floor and must be swept.
+	for c := chunk.ID(0); c <= 5; c++ {
+		freezeKinds(t, cat, c, geometry.KindLedgers, geometry.KindEvents, geometry.KindTxHash)
+		writeArtifact(t, cat.Layout().LedgerPackPath(c))
+		freezeCoverage(t, cat, cat.TxHashIndexLayout().TxHashIndexID(c), c, c) // each one-chunk window terminal
+	}
+	// A past-floor hot DB too (chunk 1).
+	makeReadyHotDirNoData(t, cat, 1)
+	live := openLiveHotDB(t, cat, 6) // live chunk
+	t.Cleanup(func() { _ = live.Close() })
+
+	through, err := deriveCompleteThrough(cat)
+	require.NoError(t, err)
+	require.Equal(t, chunk.ID(5).LastLedger(), through)
+	floor := EffectiveRetentionFloor(through, cfg.RetentionChunks, 0)
+	require.Equal(t, chunk.ID(4).FirstLedger(), floor, "floor anchors 2 chunks back")
+
+	require.NoError(t, runTickForCatalog(context.Background(), t, cfg, cat), "prune tick never fails")
+
+	// Chunks 0..3 (wholly below the floor) are gone: keys and files.
+	for c := chunk.ID(0); c <= 3; c++ {
+		ledgers, serr := cat.State(c, geometry.KindLedgers)
+		require.NoError(t, serr)
+		assert.Equal(t, geometry.State(""), ledgers, "chunk %s ledgers key swept", c)
+		assert.NoFileExists(t, cat.Layout().LedgerPackPath(c), "chunk %s pack swept", c)
+		has, herr := hotKeyExists(cat, c)
+		require.NoError(t, herr)
+		assert.False(t, has, "chunk %s hot key swept", c)
+	}
+	// Chunk 4 (the floor chunk) and 5 are within retention and survive.
+	for c := chunk.ID(4); c <= 5; c++ {
+		ledgers, serr := cat.State(c, geometry.KindLedgers)
+		require.NoError(t, serr)
+		assert.Equal(t, geometry.StateFrozen, ledgers, "chunk %s in retention survives", c)
+	}
+
+	assertQuiescent(t, cfg, cat, through)
+}
+
+// TestRunLifecycleTick_PrunesTransientIndexDebris: a "freezing" index key (a
+// crashed build attempt) is swept regardless of window, even within retention.
+func TestRunLifecycleTick_PrunesTransientIndexDebris(t *testing.T) {
+	cat, _ := smallTxHashIndexCatalog(t, 2)
+	cfg := lifecycleTestConfig(t, cat, 0)
+
+	// A crashed build left a "freezing" coverage key (no commit).
+	_, err := cat.MarkTxHashIndexFreezing(0, 0, 0)
+	require.NoError(t, err)
+
+	through, err := deriveCompleteThrough(cat)
+	require.NoError(t, err)
+	ops, artifacts, err := eligiblePruneOps(cat, gateFor(t, cfg, cat, through))
+	require.NoError(t, err)
+	require.Len(t, ops, 1, "the freezing debris is swept")
+	require.Equal(t, 1, artifacts, "one index artifact swept")
+	require.NoError(t, ops[0]())
+
+	covs, err := cat.AllTxHashIndexKeys()
+	require.NoError(t, err)
+	require.Empty(t, covs, "the freezing index key is gone")
+}
+
+// ---------------------------------------------------------------------------
+// ERROR PLUMBING: a failing tick RETURNS its error (no Fatalf / os.Exit).
+// supervise — not the tick — classifies ctx-cancel-is-clean vs restart (tested at
+// the daemon level: TestRunDaemon_LoadValidateWireStartCleanShutdown, TestSupervise_*).
+// ---------------------------------------------------------------------------
+
+// TestRunLifecycleTick_FailureReturnsError: when a plan op fails, runLifecycle
+// returns the wrapped error rather than aborting the process — so Loop can
+// propagate it up through the errgroup to supervise. The chunk-0 build is
+// GENUINELY unproducible: chunk 0 sits below a READY live chunk 1 (so it counts as
+// complete and the plan range [0,0] is non-empty), has no frozen artifacts, and
+// its hot key is "transient" (not a ready read source). With no bulk Backend
+// configured, backfillSource has no source for chunk 0 and RunBackfill fails;
+// MaxRetries defaults to 0, so it fails fast.
+func TestRunLifecycleTick_FailureReturnsError(t *testing.T) {
+	cat, _ := smallTxHashIndexCatalog(t, 1)
+	cfg := lifecycleTestConfig(t, cat, 0)      // hot tier read by path, no Backend
+	readyHot(t, cat, 1)                        // ready live chunk => through = chunk 0 last ledger
+	require.NoError(t, cat.PutHotTransient(0)) // chunk 0 below live, no frozen artifacts, not a ready source
+
+	err := runLifecycle(context.Background(), cfg, cat, 0) // plan range [0,0], the failing build
+	require.Error(t, err, "a genuine op failure surfaces up the call stack")
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress.go
new file mode 100644
index 000000000..d25e62224
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress.go
@@ -0,0 +1,163 @@
+package lifecycle
+
+import (
+	"fmt"
+
+	supportlog "github.com/stellar/go-stellar-sdk/support/log"
+
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk"
+)
+
+// Progress is derived, never stored. "Highest complete chunk" arithmetic runs in
+// int64 (-1 = "nothing complete") to avoid uint32 wraparound on the pre-genesis
+// sentinel; geometry.CompleteThrough is the chokepoint (the signed chunk↔ledger
+// maps live in geometry so there is one -1 convention across the daemon).
+
+// LastCommittedLedger is the single highest-durably-committed-ledger derivation.
+// It maxes three terms, each in the signed domain so a fresh/young store never
+// underflows to MaxUint32:
+//
+//   - COLD — highest chunk with all artifacts durable (highestDurableChunk; -1 on
+//     a fresh start). Leads at startup before any hot key exists.
+//   - HOT — only when hot > cold, over "ready" keys: one read-only MaxCommittedSeq
+//     read of the highest ready hot DB (empty DB ⇒ positional CompleteThrough(hot-1)).
+//     The read-only open takes no RocksDB LOCK, so it never contends with a writer;
+//     in practice it runs before ingestion opens the live chunk anyway.
+//   - FLOOR — EarliestLedger()-1 as int64(earliest)-1, so an absent/zero pin
+//     yields the pre-genesis sentinel rather than underflowing.
+//
+// logger is required (hotchunk.OpenReadOnly needs it); there is no logger-less
+// mode — the tick derives the frontier the same way startup does.
+func LastCommittedLedger(cat *catalog.Catalog, logger *supportlog.Entry) (uint32, error) {
+	cold, err := highestDurableChunk(cat)
+	if err != nil {
+		return 0, err
+	}
+	through := geometry.CompleteThrough(cold)
+
+	hot, err := highestReadyChunkSigned(cat)
+	if err != nil {
+		return 0, err
+	}
+	if hot > cold {
+		// One refinement read of the highest ready hot DB; loss detected lazily on
+		// this open (no eager scan over every ready key).
+		refined, rerr := refineWithHotDB(cat, logger, hot)
+		if rerr != nil {
+			return 0, rerr
+		}
+		through = max(through, refined)
+	}
+
+	earliest, ok, err := cat.EarliestLedger()
+	if err != nil {
+		return 0, err
+	}
+	if ok {
+		// int64 before the -1 so a zero/genesis pin does not underflow.
+		floor := max(int64(earliest)-1, 0)
+		through = max(through, uint32(floor)) //nolint:gosec // floor in [0, MaxUint32), fits uint32
+	}
+
+	return through, nil
+}
+
+// refineWithHotDB opens the highest ready hot chunk read-only straight from its
+// Layout path and returns its MaxCommittedSeq, or CompleteThrough(live-1) on an
+// empty DB. A "ready" key whose dir/DB is gone surfaces as an ordinary
+// (restartable) error — the read-only open never auto-heals it into a fresh empty
+// DB. A read-only open replays any crash-left synced WAL into memtables, so
+// MaxCommittedSeq is correct even after an ungraceful crash.
+func refineWithHotDB(cat *catalog.Catalog, logger *supportlog.Entry, live int64) (uint32, error) {
+	id := chunk.ID(live) //nolint:gosec // live > cold >= -1, so live >= 0
+	hot, openErr := hotchunk.OpenReadOnly(cat.Layout().HotChunkPath(id), id, logger)
+	if openErr != nil {
+		return 0, fmt.Errorf("chunk %s is %q but its hot DB won't open: %w", id, geometry.HotReady, openErr)
+	}
+	defer func() { _ = hot.Close() }()
+
+	maxSeq, present, seqErr := hot.MaxCommittedSeq()
+	if seqErr != nil {
+		return 0, fmt.Errorf("chunk %s: read hot max committed seq: %w", id, seqErr)
+	}
+	if present {
+		return maxSeq, nil
+	}
+	// Empty live DB: positional fallback (everything below it).
+	return geometry.CompleteThrough(live - 1), nil
+}
+
+// highestReadyChunkSigned returns the highest "ready" hot chunk id as int64, or -1
+// when none. The signed return lets CompleteThrough compute the positional term
+// without a uint32 underflow when the live chunk is chunk 0.
+func highestReadyChunkSigned(cat *catalog.Catalog) (int64, error) {
+	ready, err := cat.ReadyHotChunkKeys()
+	if err != nil {
+		return 0, err
+	}
+	if len(ready) == 0 {
+		return -1, nil
+	}
+	// Sorted ascending; the last is the highest.
+	return int64(ready[len(ready)-1]), nil
+}
+
+// highestDurableChunk returns the highest chunk id with all artifacts durable
+// (ledgers AND events frozen AND (txhash frozen OR covered by a frozen index)),
+// or -1 on a fresh start. A partially-frozen tip chunk is excluded; backfill
+// repairs it.
+func highestDurableChunk(cat *catalog.Catalog) (int64, error) {
+	refs, err := cat.ChunkArtifactKeys()
+	if err != nil {
+		return 0, err
+	}
+
+	// Frozen per-kind state per chunk.
+	type kinds struct{ ledgers, events, txhash bool }
+	frozen := map[chunk.ID]*kinds{}
+	for _, ref := range refs {
+		if ref.State != geometry.StateFrozen {
+			continue
+		}
+		k := frozen[ref.Chunk]
+		if k == nil {
+			k = &kinds{}
+			frozen[ref.Chunk] = k
+		}
+		switch ref.Kind {
+		case geometry.KindLedgers:
+			k.ledgers = true
+		case geometry.KindEvents:
+			k.events = true
+		case geometry.KindTxHash:
+			k.txhash = true
+		}
+	}
+
+	highest := int64(-1)
+	for c, k := range frozen {
+		if !k.ledgers || !k.events {
+			continue
+		}
+		// A frozen index coverage satisfies txhash even after the .bin was demoted.
+		// The shared catalog predicate asserts INV-2 (one frozen coverage per window)
+		// on every read, so watermark derivation, discard eligibility, and resolve
+		// can never disagree about the same snapshot.
+		if !k.txhash {
+			covered, err := cat.FrozenIndexCovers(c)
+			if err != nil {
+				return 0, err
+			}
+			if !covered {
+				continue
+			}
+		}
+		if id := int64(c); id > highest {
+			highest = id
+		}
+	}
+	return highest, nil
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress_realdb_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress_realdb_test.go
new file mode 100644
index 000000000..394816e2b
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress_realdb_test.go
@@ -0,0 +1,144 @@
+package lifecycle
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger"
+)
+
+// seedLedgersCF reopens a CLOSED chunk hot DB raw and commits sparse ledgers-CF
+// entries in one batch via the production AddLedgerToBatch. These fixtures need
+// arbitrary frontier heights without the events CF's contiguity requirement, so
+// they write the one CF the watermark refinement reads (MaxCommittedSeq only
+// looks at the ledgers CF's last key; the payload bytes are never decoded).
+func seedLedgersCF(t *testing.T, cat *catalog.Catalog, c chunk.ID, entries ...ledger.Entry) {
+	t.Helper()
+	store, err := rocksdb.New(rocksdb.Config{
+		Path:           cat.Layout().HotChunkPath(c),
+		ColumnFamilies: hotchunk.ColumnFamilies(),
+		Logger:         silentLogger(),
+	})
+	require.NoError(t, err)
+	h := ledger.NewWithStore(store)
+	require.NoError(t, store.Batch(func(b *rocksdb.BatchWriter) error {
+		for _, e := range entries {
+			if berr := h.AddLedgerToBatch(b, e); berr != nil {
+				return berr
+			}
+		}
+		return nil
+	}))
+	require.NoError(t, store.Close())
+}
+
+// seedReadyLiveDB brackets a "ready" hot DB for chunk c (via the production
+// opener) and commits a single ledgers-CF entry at seq `top` so MaxCommittedSeq
+// reads back `top`. top==0 leaves the DB empty (present=false). It closes the DB
+// as hygiene — a read-only reopen takes no RocksDB LOCK, so this isn't required
+// for the refinement to open, but it keeps the fixtures single-handle.
+func seedReadyLiveDB(t *testing.T, cat *catalog.Catalog, c chunk.ID, top uint32) {
+	t.Helper()
+	db := openLiveHotDB(t, cat, c) // ready key + real dir + empty DB
+	require.NoError(t, db.Close())
+	if top > 0 {
+		seedLedgersCF(t, cat, c, ledger.Entry{Seq: top, Bytes: []byte("ledger")})
+	}
+}
+
+// TestDeriveWatermark_RealHotDB_RefinementIsNotStale exercises the watermark
+// refinement against a REAL per-chunk hotchunk DB opened read-only by its Layout
+// path (the same open production does). It proves the single-DB MaxCommittedSeq
+// refinement reads the actual committed ledger frontier (the ledgers CF's last
+// key) and is not a stale/constant value: the bound rises to exactly the highest
+// seq committed to the live chunk's real DB.
+func TestDeriveWatermark_RealHotDB_RefinementIsNotStale(t *testing.T) {
+	cat, _ := testCatalog(t)
+
+	live := chunk.ID(5)
+	// Production bracket: creates the hot dir, opens the SINGLE shared multi-CF
+	// DB, flips the hot key "ready". This is exactly what ingestion does.
+	db := openLiveHotDB(t, cat, live)
+	// Close the live writer before seeding — hygiene (the refinement's read-only
+	// reopen takes no RocksDB LOCK), keeping the fixture single-handle.
+	require.NoError(t, db.Close())
+
+	// Commit two real ledgers into the ledgers CF (the CF MaxCommittedSeq reads).
+	first := live.FirstLedger()
+	committedTop := first + 200
+	seedLedgersCF(t, cat, live,
+		ledger.Entry{Seq: first, Bytes: []byte("ledger-A")},
+		ledger.Entry{Seq: committedTop, Bytes: []byte("ledger-B")},
+	)
+
+	// Sanity: positional baseline (live chunk 5 ⇒ everything below 5) is chunk 4's
+	// last ledger, strictly below the committed top — so the assertion below can
+	// only pass if the refinement actually read the real DB.
+	baseline := geometry.CompleteThrough(int64(live) - 1)
+	require.Equal(t, chunk.ID(4).LastLedger(), baseline)
+	require.Greater(t, committedTop, baseline, "fixture must put the real frontier above the baseline")
+
+	got, err := deriveWatermark(cat, silentLogger())
+	require.NoError(t, err)
+	require.Equal(t, committedTop, got,
+		"watermark must equal the REAL ledgers-CF last key, not the positional baseline")
+}
+
+// TestDeriveWatermark_RealHotDB_OpensHighestReady proves the refinement opens the
+// HIGHEST ready chunk (the live chunk), not just any ready chunk. Two ready chunks
+// have independent real hot DBs with DIFFERENT committed frontiers; the watermark
+// must reflect the higher chunk's DB. Only opening the real per-chunk DB by its
+// Layout path distinguishes the two — a "open ready[0] instead of ready[len-1]"
+// regression would land on the wrong frontier.
+func TestDeriveWatermark_RealHotDB_OpensHighestReady(t *testing.T) {
+	cat, _ := testCatalog(t)
+
+	lower, higher := chunk.ID(4), chunk.ID(7)
+
+	// Lower ready chunk: a real DB committed near the TOP of chunk 4. If the
+	// refinement wrongly opened the lower chunk, the bound would land here.
+	lowDB := openLiveHotDB(t, cat, lower)
+	require.NoError(t, lowDB.Close())
+	lowTop := lower.FirstLedger() + 9000
+	seedLedgersCF(t, cat, lower, ledger.Entry{Seq: lowTop, Bytes: []byte("low")})
+
+	// Higher ready chunk (the live chunk): committed mid-chunk 7.
+	highDB := openLiveHotDB(t, cat, higher)
+	require.NoError(t, highDB.Close())
+	highMid := higher.FirstLedger() + 1234
+	seedLedgersCF(t, cat, higher, ledger.Entry{Seq: highMid, Bytes: []byte("high")})
+
+	// The two frontiers must be unambiguous: chunk 7 mid-seq is far above chunk 4's
+	// top, so reading the wrong chunk yields a strictly different (lower) answer.
+	require.Greater(t, highMid, lowTop)
+
+	got, err := deriveWatermark(cat, silentLogger())
+	require.NoError(t, err)
+	require.Equal(t, highMid, got,
+		"refinement must open the HIGHEST ready chunk (7), reading its committed mid-seq")
+}
+
+// TestDeriveWatermark_RealHotDB_EmptyLiveFallsBack is the count-only-ready case
+// against a real DB: a "ready" live chunk whose real hot DB has NO committed
+// ledger (MaxCommittedSeq ok=false) must fall back to deriveCompleteThrough, not
+// fabricate a frontier. Read through a real read-only open by Layout path.
+func TestDeriveWatermark_RealHotDB_EmptyLiveFallsBack(t *testing.T) {
+	cat, _ := testCatalog(t)
+	makeChunkDurable(t, cat, 0) // cold term => chunk 0 last ledger
+
+	live := chunk.ID(3)
+	db := openLiveHotDB(t, cat, live) // ready key + real dir, but NOTHING committed
+	require.NoError(t, db.Close())
+
+	// A read-only open of the empty ledgers CF: ok=false, no refinement.
+	got, err := deriveWatermark(cat, silentLogger())
+	require.NoError(t, err)
+	require.Equal(t, chunk.ID(2).LastLedger(), got,
+		"empty live DB ⇒ positional baseline (max ready 3 - 1 = chunk 2), no fabricated frontier")
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress_shim_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress_shim_test.go
new file mode 100644
index 000000000..271b0c282
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress_shim_test.go
@@ -0,0 +1,22 @@
+package lifecycle
+
+import (
+	supportlog "github.com/stellar/go-stellar-sdk/support/log"
+
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog"
+)
+
+// Test-only aliases for the single progress derivation, LastCommittedLedger.
+// There is no logger-less mode: when a "ready" hot key leads the cold term the
+// derivation always opens that DB read-only, so both aliases pass a real logger.
+// deriveCompleteThrough names the cold/floor/positional-selection intent (its
+// callers seed no ready-above-cold hot key, or seed an empty real hot DB whose
+// refinement falls back to the positional term); deriveWatermark names the
+// refinement-value intent. Production callers use LastCommittedLedger directly.
+func deriveCompleteThrough(cat *catalog.Catalog) (uint32, error) {
+	return LastCommittedLedger(cat, silentLogger())
+}
+
+func deriveWatermark(cat *catalog.Catalog, logger *supportlog.Entry) (uint32, error) {
+	return LastCommittedLedger(cat, logger)
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress_test.go
new file mode 100644
index 000000000..c4bd5f50d
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress_test.go
@@ -0,0 +1,233 @@
+package lifecycle
+
+import (
+	"os"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+)
+
+// ---------------------------------------------------------------------------
+// progress derivation test helpers.
+// ---------------------------------------------------------------------------
+
+// makeChunkDurable freezes ledgers+events+txhash for a chunk — the durable state
+// highestDurableChunk counts.
+func makeChunkDurable(t *testing.T, cat *catalog.Catalog, c chunk.ID) {
+	t.Helper()
+	freezeKinds(t, cat, c, geometry.KindLedgers, geometry.KindEvents, geometry.KindTxHash)
+}
+
+// makeHotDir creates the on-disk hot dir for a chunk. The refinement opens only
+// the HIGHEST ready chunk, so a lower ready key needs only its dir present, not a
+// real DB (readyHot pairs this with the key); the highest ready chunk in a
+// positional-term test needs a real empty DB via seedReadyLiveDB.
+func makeHotDir(t *testing.T, cat *catalog.Catalog, c chunk.ID) {
+	t.Helper()
+	require.NoError(t, os.MkdirAll(cat.Layout().HotChunkPath(c), 0o755))
+}
+
+// readyHot marks a chunk's hot key "ready" AND creates its dir, the production
+// pairing deriveWatermark expects (a ready key whose dir is missing is loss).
+func readyHot(t *testing.T, cat *catalog.Catalog, c chunk.ID) {
+	t.Helper()
+	require.NoError(t, cat.PutHotTransient(c))
+	require.NoError(t, cat.FlipHotReady(c))
+	makeHotDir(t, cat, c)
+}
+
+// ---------------------------------------------------------------------------
+// LastCommittedLedger — chunk-granularity bound, pure catalog read.
+// (CompleteThrough / ChunkIDOfLedger arithmetic is tested in geometry.)
+// ---------------------------------------------------------------------------
+
+func TestLastCommittedLedger(t *testing.T) {
+	t.Run("fresh store => pre-genesis sentinel, never MaxUint32", func(t *testing.T) {
+		// Every term is -1; the signed domain must yield FirstLedgerSeq-1, not wrap.
+		cat, _ := testCatalog(t)
+		got, err := deriveCompleteThrough(cat)
+		require.NoError(t, err)
+		require.Equal(t, geometry.PreGenesisLedger, got)
+	})
+
+	t.Run("cold term leads: highest fully-durable chunk", func(t *testing.T) {
+		cat, _ := testCatalog(t)
+		makeChunkDurable(t, cat, 0)
+		makeChunkDurable(t, cat, 1)
+		makeChunkDurable(t, cat, 2)
+		got, err := deriveCompleteThrough(cat)
+		require.NoError(t, err)
+		require.Equal(t, chunk.ID(2).LastLedger(), got)
+	})
+
+	t.Run("incompletely-frozen tip degrades the bound (ledgers frozen, events freezing)", func(t *testing.T) {
+		cat, _ := testCatalog(t)
+		makeChunkDurable(t, cat, 0)
+		makeChunkDurable(t, cat, 1)
+		// Chunk 2 mid-freeze (events only "freezing") must NOT count: bound stays at 1.
+		freezeKinds(t, cat, 2, geometry.KindLedgers, geometry.KindTxHash)
+		require.NoError(t, cat.MarkChunkFreezing(2, geometry.KindEvents))
+		got, err := deriveCompleteThrough(cat)
+		require.NoError(t, err)
+		require.Equal(t, chunk.ID(1).LastLedger(), got)
+	})
+
+	t.Run("txhash satisfied by a frozen index coverage (post-finalization demote)", func(t *testing.T) {
+		cat, _ := testCatalog(t)
+		// Chunk 7: txhash demoted but a frozen index coverage spans it ⇒ still durable.
+		freezeKinds(t, cat, 7, geometry.KindLedgers, geometry.KindEvents)
+		freezeCoverage(t, cat, cat.TxHashIndexLayout().TxHashIndexID(7), 0, 999) // window 0 covers chunk 7
+		got, err := deriveCompleteThrough(cat)
+		require.NoError(t, err)
+		require.Equal(t, chunk.ID(7).LastLedger(), got)
+	})
+
+	t.Run("chunk NOT covered by any frozen index and no frozen txhash does not count", func(t *testing.T) {
+		cat, _ := testCatalog(t)
+		makeChunkDurable(t, cat, 0)
+		// Chunk 1: ledgers+events frozen, no txhash, no covering index.
+		freezeKinds(t, cat, 1, geometry.KindLedgers, geometry.KindEvents)
+		got, err := deriveCompleteThrough(cat)
+		require.NoError(t, err)
+		require.Equal(t, chunk.ID(0).LastLedger(), got, "chunk 1 not durable; bound stays at chunk 0")
+	})
+
+	t.Run("positional term leads in steady state: everything below the live chunk", func(t *testing.T) {
+		cat, _ := testCatalog(t)
+		// No cold artifacts yet (steady state: chunks complete before cold exists).
+		// Ready hot keys 3,4,5 => live chunk is 5 => everything below 5 complete. Only
+		// the highest (5) is opened; empty DB ⇒ positional fallback CompleteThrough(4).
+		readyHot(t, cat, 3)
+		readyHot(t, cat, 4)
+		seedReadyLiveDB(t, cat, 5, 0)
+		got, err := deriveCompleteThrough(cat)
+		require.NoError(t, err)
+		require.Equal(t, chunk.ID(4).LastLedger(), got, "max ready (5) - 1 = chunk 4's last ledger")
+	})
+
+	t.Run("transient hot key does NOT advance the positional term", func(t *testing.T) {
+		cat, _ := testCatalog(t)
+		seedReadyLiveDB(t, cat, 3, 0) // highest ready, empty DB ⇒ positional CompleteThrough(2)
+		// A transient key above the highest ready one must be excluded.
+		require.NoError(t, cat.PutHotTransient(9))
+		got, err := deriveCompleteThrough(cat)
+		require.NoError(t, err)
+		require.Equal(t, chunk.ID(2).LastLedger(), got, "max READY (3) - 1, ignoring transient 9")
+	})
+
+	t.Run("live chunk 0 => positional term is pre-genesis, NOT MaxUint32", func(t *testing.T) {
+		// The exact uint32-underflow trap: max ready = 0, so 0-1 must be the
+		// pre-genesis sentinel, not ID(4294967295).LastLedger().
+		cat, _ := testCatalog(t)
+		seedReadyLiveDB(t, cat, 0, 0) // ready chunk 0, empty DB ⇒ positional fallback
+		got, err := deriveCompleteThrough(cat)
+		require.NoError(t, err)
+		require.Equal(t, geometry.PreGenesisLedger, got)
+	})
+
+	t.Run("earliest pin floor leads when above cold/positional terms", func(t *testing.T) {
+		cat, _ := testCatalog(t)
+		// Floor pinned mid-chain, no chunks durable, no hot keys.
+		const floor = 50000
+		require.NoError(t, cat.PinEarliestLedger(floor))
+		got, err := deriveCompleteThrough(cat)
+		require.NoError(t, err)
+		require.Equal(t, uint32(floor-1), got)
+	})
+
+	t.Run("earliest pin == genesis (2) does not underflow", func(t *testing.T) {
+		cat, _ := testCatalog(t)
+		require.NoError(t, cat.PinEarliestLedger(chunk.FirstLedgerSeq))
+		got, err := deriveCompleteThrough(cat)
+		require.NoError(t, err)
+		require.Equal(t, geometry.PreGenesisLedger, got, "earliest 2 - 1 = 1, not MaxUint32")
+	})
+
+	t.Run("max of all three terms", func(t *testing.T) {
+		cat, _ := testCatalog(t)
+		makeChunkDurable(t, cat, 0)   // cold => chunk 0 last ledger
+		seedReadyLiveDB(t, cat, 4, 0) // positional (empty DB) => chunk 3 last ledger (highest)
+		require.NoError(t, cat.PinEarliestLedger(2))
+		got, err := deriveCompleteThrough(cat)
+		require.NoError(t, err)
+		require.Equal(t, chunk.ID(3).LastLedger(), got)
+	})
+}
+
+// ---------------------------------------------------------------------------
+// deriveWatermark — deriveCompleteThrough + one read-only refinement of the
+// highest ready hot DB, opened lazily by its Layout path. These read REAL
+// per-chunk hot DBs; the sub-chunk-precision / opens-highest / empty-fallback
+// value cases are covered against real DBs in progress_realdb_test.go.
+// ---------------------------------------------------------------------------
+
+func TestDeriveWatermark(t *testing.T) {
+	t.Run("no ready hot keys => equals deriveCompleteThrough, no open", func(t *testing.T) {
+		cat, _ := testCatalog(t)
+		makeChunkDurable(t, cat, 0)
+		// No ready key above the cold term ⇒ the hot>cold gate skips the open entirely.
+		got, err := deriveWatermark(cat, silentLogger())
+		require.NoError(t, err)
+		require.Equal(t, chunk.ID(0).LastLedger(), got)
+	})
+
+	t.Run("boundary-crash under-count recovered by refinement", func(t *testing.T) {
+		// Live chunk crashed at a boundary and was demoted to "transient": the
+		// highest READY key is the just-completed predecessor (chunk 4), whose
+		// completion no key advertises (positional term = chunk 3). The refinement
+		// opens chunk 4's real DB and reads its full committed seq = chunk 4's last
+		// ledger, recovering the frontier the positional term under-counted.
+		cat, _ := testCatalog(t)
+		chunk4Last := chunk.ID(4).LastLedger()
+		seedReadyLiveDB(t, cat, 4, chunk4Last)
+		require.NoError(t, cat.PutHotTransient(5)) // the crashed live chunk
+		// The positional term alone (highest ready 4, minus 1) under-counts to chunk 3;
+		// only the refinement below, opening chunk 4's real DB, recovers chunk 4's frontier.
+		require.Equal(t, chunk.ID(3).LastLedger(), geometry.CompleteThrough(3),
+			"positional term alone under-counts to chunk 3")
+
+		got, err := deriveWatermark(cat, silentLogger())
+		require.NoError(t, err)
+		require.Equal(t, chunk4Last, got, "refinement recovers the chunk-4 frontier")
+	})
+
+	t.Run("LAZY loss (item R2-6): only the highest ready chunk is opened; a lower"+
+		" ready key's missing dir is NOT eagerly flagged", func(t *testing.T) {
+		cat, _ := testCatalog(t)
+		// Two ready keys; the LOWER one's dir is missing. Under the design's lazy
+		// detection (no eager all-ready-keys scan) only the HIGHEST ready chunk is
+		// opened, so the lower key's missing dir is not surfaced here — it surfaces
+		// later, when ingestion/discard reaches that chunk via openHotDBForChunk.
+		require.NoError(t, cat.PutHotTransient(2))
+		require.NoError(t, cat.FlipHotReady(2)) // ready key 2, NO dir (not opened here)
+		highSeq := chunk.ID(5).FirstLedger() + 10
+		seedReadyLiveDB(t, cat, 5, highSeq) // highest ready key 5 WITH real DB (opened)
+		got, err := deriveWatermark(cat, silentLogger())
+		require.NoError(t, err)
+		require.Equal(t, highSeq, got, "refined to the highest ready chunk's seq")
+	})
+
+	t.Run("errors: a ready HIGHEST chunk whose dir is missing (lazy detection on open)", func(t *testing.T) {
+		cat, _ := testCatalog(t)
+		// The highest ready chunk's dir is missing: the one open the derivation
+		// performs surfaces an ordinary (restartable) error — the read-only open
+		// never auto-heals it into a fresh empty DB.
+		require.NoError(t, cat.PutHotTransient(5))
+		require.NoError(t, cat.FlipHotReady(5)) // ready key 5, NO dir
+		_, err := deriveWatermark(cat, silentLogger())
+		require.Error(t, err)
+		require.Contains(t, err.Error(), "00000005")
+	})
+
+	t.Run("live chunk 0 ready, empty DB => pre-genesis, no underflow", func(t *testing.T) {
+		cat, _ := testCatalog(t)
+		seedReadyLiveDB(t, cat, 0, 0) // ready + real dir, nothing committed
+		got, err := deriveWatermark(cat, silentLogger())
+		require.NoError(t, err)
+		require.Equal(t, geometry.PreGenesisLedger, got)
+	})
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/retention.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/retention.go
new file mode 100644
index 000000000..c96b65678
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/retention.go
@@ -0,0 +1,54 @@
+package lifecycle
+
+import (
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+)
+
+// RetentionFloor is the lowest chunk still within retention; anything below is
+// eligible for discard/prune. It is the reader-side retention contract (design
+// "Reader retention contract", gettx §8.2 / §8.5): availability is decided by
+// retention, not the on-disk file set, so prune/sweep can unlink a chunk the
+// instant it passes the floor without coordinating with the index lifecycle. The
+// floor may err LOW harmlessly (a wrongly-retained chunk still hits the reader's
+// missing-file rule), so it anchors on the live CompleteThrough; widening history
+// is backfill's job, not the floor's.
+type RetentionFloor struct {
+	chunk chunk.ID // lowest in-retention chunk
+}
+
+// NewRetentionFloor pins the floor for one (through, retentionChunks, earliest)
+// snapshot. A shortened retentionChunks raises the floor at once.
+func NewRetentionFloor(through, retentionChunks, earliest uint32) RetentionFloor {
+	return RetentionFloorAt(EffectiveRetentionFloor(through, retentionChunks, earliest))
+}
+
+// RetentionFloorAt pins the floor from an already-computed floor ledger, so the
+// tick derives EffectiveRetentionFloor once and shares it between the gauge and
+// the gate rather than recomputing it per scan.
+func RetentionFloorAt(floorLedger uint32) RetentionFloor {
+	return RetentionFloor{chunk: chunk.IDFromLedger(floorLedger)}
+}
+
+// Excludes reports whether chunk c is below the floor (past retention). The scans
+// use it on a chunk directly and, since an index is below the floor exactly when
+// its last chunk is, as Excludes(layout.LastChunk(idx)) for a whole index.
+func (f RetentionFloor) Excludes(c chunk.ID) bool { return c < f.chunk }
+
+// FirstChunk is the lowest in-retention chunk — the single floor→chunk boundary
+// definition shared by prune (the gate), the lifecycle plan range, and startup
+// backfill, so the three can never disagree on where retention begins.
+func (f RetentionFloor) FirstChunk() chunk.ID { return f.chunk }
+
+// EffectiveRetentionFloor is the chunk-aligned lower bound of the retention
+// window: the HIGHER of the sliding floor (retentionChunks back from the last
+// complete chunk) and the fixed earliest_ledger. slidingChunk is signed so a
+// young store / large retentionChunks clamps to chunk 0 instead of underflowing.
+func EffectiveRetentionFloor(upperBound, retentionChunks, earliest uint32) uint32 {
+	sliding := uint32(chunk.FirstLedgerSeq) // GenesisLedger
+	if retentionChunks > 0 {
+		slidingChunk := geometry.LastCompleteChunkAt(upperBound) - int64(retentionChunks) + 1
+		sliding = geometry.ChunkFirstLedger(max(slidingChunk, 0))
+	}
+	return max(sliding, earliest)
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/retention_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/retention_test.go
similarity index 56%
rename from cmd/stellar-rpc/internal/fullhistory/retention_test.go
rename to cmd/stellar-rpc/internal/fullhistory/lifecycle/retention_test.go
index e3defe955..7ced429e6 100644
--- a/cmd/stellar-rpc/internal/fullhistory/retention_test.go
+++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/retention_test.go
@@ -1,4 +1,4 @@
-package fullhistory
+package lifecycle
 
 import (
 	"testing"
@@ -6,8 +6,8 @@ import (
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
 )
 
 // ---------------------------------------------------------------------------
@@ -18,7 +18,7 @@ import (
 // ---------------------------------------------------------------------------
 
 // through = chunk 100's last ledger, retain 10 chunks ⇒ floor = chunk 91
-// (retentionFloorChunk: 100-10+1 = 91). Anything below chunk 91 is excluded.
+// (EffectiveRetentionFloor: 100-10+1 = 91). Anything below chunk 91 is excluded.
 func TestRetentionFloor_ExcludesBelow(t *testing.T) {
 	floor := NewRetentionFloor(chunk.ID(100).LastLedger(), 10, 0)
 
@@ -44,8 +44,8 @@ func TestRetentionFloor_ShorteningRaisesFloorImmediately(t *testing.T) {
 // A whole tx-hash index is below the floor exactly when its last chunk is, so
 // callers test Excludes(layout.LastChunk(idx)) — no index-specific method needed.
 func TestRetentionFloor_ExcludesIndexByLastChunk(t *testing.T) {
-	layout, err := geometry.NewTxHashIndexLayout(4) // indexes: 0=[0,3], 1=[4,7], 2=[8,11]
-	require.NoError(t, err)
+	cat, _ := smallTxHashIndexCatalog(t, 4) // indexes: 0=[0,3], 1=[4,7], 2=[8,11]
+	layout := cat.TxHashIndexLayout()
 
 	// through = chunk 11's last ledger, retain 4 chunks ⇒ floor = chunk 8
 	// (11-4+1 = 8). Index 2 ([8,11]) starts at the floor.
@@ -94,3 +94,71 @@ func TestRetentionFloor_YoungStoreClampsToGenesis(t *testing.T) {
 	floor := NewRetentionFloor(chunk.ID(3).LastLedger(), 1000, 0)
 	assert.False(t, floor.Excludes(0), "chunk 0 is at the clamped floor, not below it")
 }
+
+// ---------------------------------------------------------------------------
+// Scenario: a window STRADDLING the floor serves in-range seqs and not-found
+// below. A finalized window's frozen .idx covers [lo, hi] including chunks the
+// floor has since risen past; the gate masks those below-floor chunks. This is
+// the stale-.idx case gettransaction §8.5 tolerates because the reader gate
+// makes below-floor reads not-found regardless of what the .idx resolves.
+// ---------------------------------------------------------------------------
+
+func TestReaderRetention_WindowStraddlingFloorServesInRangeNotBelow(t *testing.T) {
+	cat, _ := smallTxHashIndexCatalog(t, 4) // window 0 = chunks [0,3]
+	wins := cat.TxHashIndexLayout()
+
+	// Window 0 was finalized at terminal coverage [0,3] when the floor sat at
+	// genesis. Its frozen .idx hashes chunks 0..3 — a static, stale-lo artifact.
+	for c := chunk.ID(0); c <= 3; c++ {
+		freezeKinds(t, cat, c, geometry.KindLedgers, geometry.KindEvents)
+	}
+	freezeCoverage(t, cat, 0, 0, 3)
+	fk, ok, err := cat.FrozenTxHashIndex(0)
+	require.NoError(t, err)
+	require.True(t, ok)
+	require.True(t, wins.IsTerminalCoverage(fk), "window 0 is finalized")
+
+	// The floor later rose to chunk 2 (its first ledger). Window 0 now STRADDLES
+	// the floor: chunks 0,1 below it, chunks 2,3 in range. The .idx still claims
+	// lo=0, but the reader gate is the source of truth.
+	through := chunk.ID(3).LastLedger()
+	// Pick retentionChunks so the sliding floor lands on chunk 2:
+	// geometry.LastCompleteChunkAt(through)=3, floor chunk = 3-retention+1 = 2 ⇒ retention=2.
+	floor := NewRetentionFloor(through, 2, 0)
+
+	// (The seq-level reader masking — a below-floor read is not-found even though
+	// the stale .idx still hashes chunks 0,1 — returns with the read path, #772;
+	// RetentionFloor here exposes only the chunk-granularity prune predicate.)
+
+	// The straddling window's frozen .idx is NOT swept: the window is not wholly
+	// below the floor (its last chunk, 3, is in range), so only its below-floor
+	// chunk artifacts (chunks 0,1) are pruned.
+	assert.False(t, floor.Excludes(wins.LastChunk(0)),
+		"a straddling window is not wholly below the floor — its .idx is kept")
+	cfg := lifecycleTestConfig(t, cat, 2)
+	pops, _, err := eligiblePruneOps(cat, gateFor(t, cfg, cat, through))
+	require.NoError(t, err)
+	for _, op := range pops {
+		require.NoError(t, op())
+	}
+
+	// The window's frozen .idx coverage survives the prune (index family).
+	survives, ok, err := cat.FrozenTxHashIndex(0)
+	require.NoError(t, err)
+	require.True(t, ok, "the straddling window keeps its frozen coverage")
+	require.Equal(t, fk.Key, survives.Key)
+
+	// The below-floor chunks 0,1 ARE pruned (chunk family); the in-range chunks
+	// 2,3 survive — exactly the data the gate admits.
+	for c := chunk.ID(0); c <= 1; c++ {
+		ledgers, serr := cat.State(c, geometry.KindLedgers)
+		require.NoError(t, serr)
+		assert.Equal(t, geometry.State(""), ledgers, "below-floor chunk %s pruned", c)
+	}
+	for c := chunk.ID(2); c <= 3; c++ {
+		ledgers, serr := cat.State(c, geometry.KindLedgers)
+		require.NoError(t, serr)
+		assert.Equal(t, geometry.StateFrozen, ledgers, "in-range chunk %s survives", c)
+	}
+	assertQuiescent(t, cfg, cat, through)
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/runops_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/runops_test.go
new file mode 100644
index 000000000..a3f3eb8b1
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/runops_test.go
@@ -0,0 +1,56 @@
+package lifecycle
+
+import (
+	"context"
+	"errors"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
+)
+
+// runOps retries a failed (idempotent) op a bounded number of times on a fixed
+// pause before giving up, so a transient sweep failure doesn't cancel ingestion
+// and force a whole-daemon restart.
+
+func TestRunOps_RetriesTransientThenSucceeds(t *testing.T) {
+	cfg := Config{OpRetryAttempts: 3, OpRetryBackoff: time.Millisecond}
+	calls := 0
+	op := func() error {
+		calls++
+		if calls < 3 {
+			return errors.New("busy file")
+		}
+		return nil
+	}
+	require.NoError(t, runOps(context.Background(), cfg, []func() error{op}))
+	require.Equal(t, 3, calls, "two transient failures retried, third try succeeds")
+}
+
+func TestRunOps_GivesUpAfterAttempts(t *testing.T) {
+	cfg := Config{OpRetryAttempts: 2, OpRetryBackoff: time.Millisecond}
+	calls := 0
+	op := func() error { calls++; return errors.New("permanent") }
+	require.Error(t, runOps(context.Background(), cfg, []func() error{op}))
+	require.Equal(t, 2, calls, "attempts total tries (1 initial + 1 retry), then gives up")
+}
+
+func TestRunOps_CtxCancelStopsBeforeOp(t *testing.T) {
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+	cfg := Config{OpRetryAttempts: 3, OpRetryBackoff: time.Hour}
+	calls := 0
+	op := func() error { calls++; return errors.New("x") }
+	require.ErrorIs(t, runOps(ctx, cfg, []func() error{op}), context.Canceled)
+	require.Zero(t, calls, "a canceled ctx stops before running the op")
+}
+
+// A zero-value Config (no WithLifecycleDefaults) runs each op exactly once — no
+// retry, no panic on the zero backoff — so a test harness that builds Config
+// directly keeps the pre-retry behavior.
+func TestRunOps_ZeroConfigRunsOnce(t *testing.T) {
+	calls := 0
+	op := func() error { calls++; return errors.New("boom") }
+	require.Error(t, runOps(context.Background(), Config{}, []func() error{op}))
+	require.Equal(t, 1, calls, "zero-config = single attempt")
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/observability/observability.go b/cmd/stellar-rpc/internal/fullhistory/observability/observability.go
index d6972fe10..cb5e0c0ef 100644
--- a/cmd/stellar-rpc/internal/fullhistory/observability/observability.go
+++ b/cmd/stellar-rpc/internal/fullhistory/observability/observability.go
@@ -10,9 +10,26 @@ import (
 // per-phase wall-clock timings; distinct from the per-data-type ingest.MetricSink.
 // All methods must be safe for concurrent use.
 type Metrics interface {
-	// LastCommitted sets the derived last-committed ledger and the effective
-	// retention floor (the two advance together each backfill pass).
-	LastCommitted(lastCommitted, retentionFloor uint32)
+	// LastCommitted sets the derived last-committed ledger gauge. Owned by the two
+	// call sites that know the TRUE value: startup/backfill (as history advances)
+	// and the ingestion loop (one atomic gauge set per committed ledger). The tick
+	// must NOT set it — its chunk-aligned lastChunk.LastLedger() would regress the
+	// gauge below a mid-chunk refined watermark on every restart.
+	LastCommitted(lastCommitted uint32)
+
+	// RetentionFloor sets the effective retention floor gauge (lowest in-window
+	// ledger). Owned by startup/backfill and the lifecycle tick; the floor depends
+	// only on the last complete chunk, so it does not regress in the tick's window.
+	RetentionFloor(retentionFloor uint32)
+
+	// ChunkBoundary counts one ingestion chunk-boundary handoff. The closed chunk
+	// id is logged at the call site; this metric is a plain counter.
+	ChunkBoundary()
+
+	// LiveHotChunks sets the count of hot-chunk DBs currently on disk (the
+	// hot:chunk key count). Reported by every lifecycle tick after the discard
+	// stage so the gauge tracks the live + awaiting-discard set.
+	LiveHotChunks(count int)
 
 	// BackfillPass records one completed backfill pass's wall-clock.
 	BackfillPass(d time.Duration)
@@ -20,6 +37,8 @@ type Metrics interface {
 	Freeze(d time.Duration)
 	// Rebuild records one index rebuild's wall-clock.
 	Rebuild(d time.Duration)
+	// Discard counts the hot DBs a tick retired and records the stage wall-clock.
+	Discard(count int, d time.Duration)
 	// Prune counts swept artifacts and records the sweep's wall-clock.
 	Prune(count int, d time.Duration)
 }
@@ -27,11 +46,15 @@ type Metrics interface {
 // NopMetrics discards every signal — the default when a config carries no Metrics.
 type NopMetrics struct{}
 
-func (NopMetrics) LastCommitted(uint32, uint32) {}
-func (NopMetrics) BackfillPass(time.Duration)   {}
-func (NopMetrics) Freeze(time.Duration)         {}
-func (NopMetrics) Rebuild(time.Duration)        {}
-func (NopMetrics) Prune(int, time.Duration)     {}
+func (NopMetrics) LastCommitted(uint32)       {}
+func (NopMetrics) RetentionFloor(uint32)      {}
+func (NopMetrics) ChunkBoundary()             {}
+func (NopMetrics) LiveHotChunks(int)          {}
+func (NopMetrics) BackfillPass(time.Duration) {}
+func (NopMetrics) Freeze(time.Duration)       {}
+func (NopMetrics) Rebuild(time.Duration)      {}
+func (NopMetrics) Discard(int, time.Duration) {}
+func (NopMetrics) Prune(int, time.Duration)   {}
 
 // MetricsOrNop returns m, or NopMetrics{} when nil, so call sites never nil-check.
 func MetricsOrNop(m Metrics) Metrics {
@@ -56,9 +79,12 @@ type PrometheusMetrics struct {
 	// Gauges — absolute, last-write-wins.
 	lastCommitted  prometheus.Gauge
 	retentionFloor prometheus.Gauge
+	liveHotChunks  prometheus.Gauge
 
-	// Counter — monotonic tally.
-	pruned prometheus.Counter
+	// Counters — monotonic tallies.
+	chunkBoundaries prometheus.Counter
+	discarded       prometheus.Counter
+	pruned          prometheus.Counter
 
 	// Durations — per-phase wall-clock histogram, keyed by phase label.
 	phaseDuration *prometheus.HistogramVec
@@ -69,6 +95,7 @@ const (
 	phaseBackfillPass = "backfill_pass"
 	phaseFreeze       = "freeze"
 	phaseRebuild      = "rebuild"
+	phaseDiscard      = "discard"
 	phasePrune        = "prune"
 )
 
@@ -79,14 +106,19 @@ func NewPrometheusMetrics(registry *prometheus.Registry, namespace string) *Prom
 			Namespace: namespace, Subsystem: subsystem, Name: name, Help: help,
 		})
 	}
+	counter := func(name, help string) prometheus.Counter {
+		return prometheus.NewCounter(prometheus.CounterOpts{
+			Namespace: namespace, Subsystem: subsystem, Name: name, Help: help,
+		})
+	}
 
 	m := &PrometheusMetrics{
-		lastCommitted:  gauge("last_committed_ledger", "highest ledger durably committed"),
-		retentionFloor: gauge("retention_floor_ledger", "effective retention floor — lowest in-window ledger"),
-		pruned: prometheus.NewCounter(prometheus.CounterOpts{
-			Namespace: namespace, Subsystem: subsystem,
-			Name: "pruned_ops_total", Help: "artifacts swept after an index build",
-		}),
+		lastCommitted:   gauge("last_committed_ledger", "highest ledger durably committed"),
+		retentionFloor:  gauge("retention_floor_ledger", "effective retention floor — lowest in-window ledger"),
+		liveHotChunks:   gauge("live_hot_chunks", "count of hot-chunk DBs currently on disk"),
+		chunkBoundaries: counter("chunk_boundaries_total", "ingestion chunk-boundary handoffs"),
+		discarded:       counter("discarded_hot_chunks_total", "hot DBs retired by the discard stage"),
+		pruned:          counter("pruned_artifacts_total", "artifacts swept by the prune stage (below the retention floor)"),
 		phaseDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
 			Namespace: namespace, Subsystem: subsystem,
 			Name: "phase_duration_seconds", Help: "wall-clock of a daemon phase action",
@@ -94,15 +126,26 @@ func NewPrometheusMetrics(registry *prometheus.Registry, namespace string) *Prom
 		}, []string{"phase"}),
 	}
 
-	registry.MustRegister(m.lastCommitted, m.retentionFloor, m.pruned, m.phaseDuration)
+	registry.MustRegister(
+		m.lastCommitted, m.retentionFloor, m.liveHotChunks,
+		m.chunkBoundaries, m.discarded, m.pruned,
+		m.phaseDuration,
+	)
 	return m
 }
 
-func (m *PrometheusMetrics) LastCommitted(lastCommitted, retentionFloor uint32) {
+func (m *PrometheusMetrics) LastCommitted(lastCommitted uint32) {
 	m.lastCommitted.Set(float64(lastCommitted))
+}
+
+func (m *PrometheusMetrics) RetentionFloor(retentionFloor uint32) {
 	m.retentionFloor.Set(float64(retentionFloor))
 }
 
+func (m *PrometheusMetrics) ChunkBoundary() { m.chunkBoundaries.Inc() }
+
+func (m *PrometheusMetrics) LiveHotChunks(count int) { m.liveHotChunks.Set(float64(count)) }
+
 func (m *PrometheusMetrics) BackfillPass(d time.Duration) {
 	m.phaseDuration.WithLabelValues(phaseBackfillPass).Observe(d.Seconds())
 }
@@ -115,6 +158,13 @@ func (m *PrometheusMetrics) Rebuild(d time.Duration) {
 	m.phaseDuration.WithLabelValues(phaseRebuild).Observe(d.Seconds())
 }
 
+func (m *PrometheusMetrics) Discard(count int, d time.Duration) {
+	if count > 0 {
+		m.discarded.Add(float64(count))
+	}
+	m.phaseDuration.WithLabelValues(phaseDiscard).Observe(d.Seconds())
+}
+
 func (m *PrometheusMetrics) Prune(count int, d time.Duration) {
 	if count > 0 {
 		m.pruned.Add(float64(count))
diff --git a/cmd/stellar-rpc/internal/fullhistory/observability/observability_test.go b/cmd/stellar-rpc/internal/fullhistory/observability/observability_test.go
index 6ebe0310a..336dfcff5 100644
--- a/cmd/stellar-rpc/internal/fullhistory/observability/observability_test.go
+++ b/cmd/stellar-rpc/internal/fullhistory/observability/observability_test.go
@@ -17,10 +17,14 @@ import (
 func TestMetricsOrNop_NilNeverPanics(t *testing.T) {
 	m := MetricsOrNop(nil)
 	require.NotNil(t, m)
-	m.LastCommitted(5, 2)
+	m.LastCommitted(5)
+	m.RetentionFloor(2)
+	m.ChunkBoundary()
+	m.LiveHotChunks(3)
 	m.BackfillPass(time.Second)
 	m.Freeze(time.Second)
 	m.Rebuild(time.Second)
+	m.Discard(1, time.Second)
 	m.Prune(2, time.Second)
 }
 
@@ -34,10 +38,15 @@ func TestPrometheusMetrics_RegistersAndRecords(t *testing.T) {
 	reg := prometheus.NewRegistry()
 	m := NewPrometheusMetrics(reg, "test_ns")
 
-	m.LastCommitted(58, 12)
+	m.LastCommitted(58)
+	m.RetentionFloor(12)
+	m.LiveHotChunks(4)
+	m.ChunkBoundary()
+	m.ChunkBoundary()
 	m.BackfillPass(250 * time.Millisecond)
 	m.Freeze(100 * time.Millisecond)
 	m.Rebuild(50 * time.Millisecond)
+	m.Discard(3, 20*time.Millisecond)
 	m.Prune(2, 5*time.Millisecond)
 
 	families, err := reg.Gather()
@@ -61,10 +70,13 @@ func TestPrometheusMetrics_RegistersAndRecords(t *testing.T) {
 
 	assert.InDelta(t, float64(58), values["test_ns_fullhistory_streaming_last_committed_ledger"], 0)
 	assert.InDelta(t, float64(12), values["test_ns_fullhistory_streaming_retention_floor_ledger"], 0)
-	assert.InDelta(t, float64(2), values["test_ns_fullhistory_streaming_pruned_ops_total"], 0)
+	assert.InDelta(t, float64(4), values["test_ns_fullhistory_streaming_live_hot_chunks"], 0)
+	assert.InDelta(t, float64(2), values["test_ns_fullhistory_streaming_chunk_boundaries_total"], 0)
+	assert.InDelta(t, float64(3), values["test_ns_fullhistory_streaming_discarded_hot_chunks_total"], 0)
+	assert.InDelta(t, float64(2), values["test_ns_fullhistory_streaming_pruned_artifacts_total"], 0)
 
-	// Phase-duration histogram saw backfill_pass + freeze + rebuild + prune = 4 observations.
-	assert.Equal(t, uint64(4), counts["test_ns_fullhistory_streaming_phase_duration_seconds"])
+	// Phase-duration histogram saw backfill_pass + freeze + rebuild + discard + prune = 5 observations.
+	assert.Equal(t, uint64(5), counts["test_ns_fullhistory_streaming_phase_duration_seconds"])
 }
 
 // Double-registration on the same registry panics (one sink per registry).
diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb/rocksdb.go b/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb/rocksdb.go
index f172587e6..0f1b56552 100644
--- a/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb/rocksdb.go
+++ b/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb/rocksdb.go
@@ -58,6 +58,22 @@ type Config struct {
 	// "inherit the pinned defaults"; see CFOptions docstring for
 	// the per-knob inherit/override semantics.
 	PerCFOptions map[string]CFOptions
+
+	// ReadOnly opens the store read-only (dir never created, no writes, no
+	// flush-on-close). An un-flushed WAL IS recovered into in-memory memtables
+	// on open (RocksDB OpenForReadOnly semantics; nothing is persisted), so
+	// reads see every synced write, not just SST/MANIFEST state. Used by the
+	// freeze source.
+	ReadOnly bool
+
+	// MustExist opens read-WRITE but with create-if-missing OFF, so opening a
+	// missing or gutted DB fails instead of silently fabricating a fresh empty one
+	// — the "never auto-heal" hot-DB open under a "ready" key, a DB the filesystem
+	// should already hold. (RocksDB's env layer may still leave a stub leaf dir with
+	// a LOG file behind on the failed open; correctness holds — every retry still
+	// fails on the missing CURRENT — but no usable DB is created.) Ignored when
+	// ReadOnly is set (read-only never creates regardless).
+	MustExist bool
 }
 
 // Store is the Layer-1 RocksDB handle. Concrete struct: one impl,
@@ -292,26 +308,12 @@ func (s *Store) Iterate(cf string, prefix []byte) iter.Seq2[Entry, error] {
 	}
 }
 
-// FirstKey returns the smallest key in cf. If cf has no keys this is not
-// an error: it returns (nil, false, nil), so callers detect emptiness via
-// ok. (cf == "" selects the default column family; an unregistered cf name
-// returns ErrCFNotFound.)
-// Cheap: a single boundary seek (no scan).
-func (s *Store) FirstKey(cf string) ([]byte, bool, error) {
-	return s.edgeKey(cf, false)
-}
-
 // LastKey returns the largest key in cf. If cf has no keys this is not an
 // error: it returns (nil, false, nil), so callers detect emptiness via ok.
 // (cf == "" selects the default column family; an unregistered cf name
 // returns ErrCFNotFound.)
 // Cheap: a single boundary seek (no scan).
 func (s *Store) LastKey(cf string) ([]byte, bool, error) {
-	return s.edgeKey(cf, true)
-}
-
-//nolint:funcorder // helper grouped with FirstKey/LastKey for readability
-func (s *Store) edgeKey(cf string, last bool) ([]byte, bool, error) {
 	s.mu.RLock()
 	defer s.mu.RUnlock()
 
@@ -325,11 +327,7 @@ func (s *Store) edgeKey(cf string, last bool) ([]byte, bool, error) {
 
 	it := s.db.NewIteratorCF(s.ro, cfh)
 	defer it.Close()
-	if last {
-		it.SeekToLast()
-	} else {
-		it.SeekToFirst()
-	}
+	it.SeekToLast()
 	if !it.Valid() {
 		// Empty CF (it.Err() is nil) or a mid-seek RocksDB error.
 		return nil, false, it.Err()
@@ -425,8 +423,13 @@ func (s *Store) Close() error {
 		return nil
 	}
 
-	if err := s.doFlush(); err != nil {
-		s.cfg.Logger.WithError(err).Warnf("rocksdb: graceful close Flush failed at %s; next Open will replay WAL", s.cfg.Path)
+	// A read-only store has nothing to flush (and the RocksDB read-only handle
+	// would reject it); only a writable store flushes its memtable on close.
+	if !s.cfg.ReadOnly {
+		if err := s.doFlush(); err != nil {
+			s.cfg.Logger.WithError(err).Warnf(
+				"rocksdb: graceful close Flush failed at %s; next Open will replay WAL", s.cfg.Path)
+		}
 	}
 
 	for _, cfh := range s.cfHandles {
@@ -494,14 +497,20 @@ func (s *Store) constructAndOpen() error {
 	if err != nil {
 		return fmt.Errorf("rocksdb: canonicalize path %s: %w", s.cfg.Path, err)
 	}
-	if err := os.MkdirAll(abs, dirPerm); err != nil {
-		return fmt.Errorf("rocksdb: mkdir %s: %w", abs, err)
+	// Read-only and must-exist opens require a pre-existing DB; neither creates
+	// the directory. Only a plain read-write open (create-if-missing) does.
+	if !s.cfg.ReadOnly && !s.cfg.MustExist {
+		if err := os.MkdirAll(abs, dirPerm); err != nil {
+			return fmt.Errorf("mkdir %s: %w", abs, err)
+		}
 	}
 
 	cfNames := resolveCFNames(s.cfg)
 	opts := grocksdb.NewDefaultOptions()
-	opts.SetCreateIfMissing(true)
-	opts.SetCreateIfMissingColumnFamilies(true)
+	if !s.cfg.ReadOnly && !s.cfg.MustExist {
+		opts.SetCreateIfMissing(true)
+		opts.SetCreateIfMissingColumnFamilies(true)
+	}
 
 	cfOpts := make([]*grocksdb.Options, len(cfNames))
 	for i := range cfOpts {
@@ -511,7 +520,18 @@ func (s *Store) constructAndOpen() error {
 	s.applyTuning(opts, cfNames, cfOpts)
 
 	start := time.Now()
-	db, cfHandles, err := grocksdb.OpenDbColumnFamilies(opts, abs, cfNames, cfOpts)
+	var (
+		db        *grocksdb.DB
+		cfHandles []*grocksdb.ColumnFamilyHandle
+	)
+	if s.cfg.ReadOnly {
+		// errorIfWalFileExists=false: a cleanly-closed DB has no WAL; if a crash ever
+		// left one, the open recovers it into in-memory memtables (see Config.ReadOnly)
+		// rather than failing, so reads still see every synced write.
+		db, cfHandles, err = grocksdb.OpenDbForReadOnlyColumnFamilies(opts, abs, cfNames, cfOpts, false)
+	} else {
+		db, cfHandles, err = grocksdb.OpenDbColumnFamilies(opts, abs, cfNames, cfOpts)
+	}
 	elapsed := time.Since(start)
 	if err != nil {
 		opts.Destroy()
@@ -548,7 +568,7 @@ func (s *Store) constructAndOpen() error {
 	// WAL on + per-write Sync on — non-negotiable across every
 	// fullhistory store, so pinned here on the shared wo rather
 	// than exposed via Tuning. The streaming ingestion contract
-	// requires "AddEntries returned nil" to mean "durable on disk";
+	// requires "the ledger batch committed" to mean "durable on disk";
 	// one fsync per Put/Batch regardless of size.
 	s.wo.DisableWAL(false)
 	s.wo.SetSync(true)
diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb/rocksdb_test.go b/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb/rocksdb_test.go
index 999803b75..f1a726875 100644
--- a/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb/rocksdb_test.go
+++ b/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb/rocksdb_test.go
@@ -48,6 +48,47 @@ func openTestStore(t *testing.T, cfNames []string) *Store {
 	return s
 }
 
+// TestNew_MustExist_EmptyReadyDBReopens pins that a must-exist read-write open of
+// an already-created but EMPTY DB succeeds: the mode refuses only to CREATE, it
+// never requires committed data. This is the "ready" hot-chunk reopen path (an
+// ingester that crashed before committing its first ledger must still reopen).
+func TestNew_MustExist_EmptyReadyDBReopens(t *testing.T) {
+	path := t.TempDir()
+	cf := []string{"c0"}
+
+	// Create an empty DB the normal way (create-if-missing), then close it.
+	s, err := New(Config{Path: path, ColumnFamilies: cf, Logger: silentLogger()})
+	require.NoError(t, err)
+	require.NoError(t, s.Close())
+
+	// Reopen must-exist: succeeds against the existing empty DB.
+	reopened, err := New(Config{Path: path, ColumnFamilies: cf, Logger: silentLogger(), MustExist: true})
+	require.NoError(t, err, "must-exist reopen of an empty ready DB succeeds")
+	require.NoError(t, reopened.Close())
+}
+
+// TestNew_MustExist_GuttedDirFailsOpen pins that a must-exist open of a directory
+// that exists but holds no valid RocksDB (no CURRENT) FAILS. The daemon depends on
+// this: a "ready" hot key whose DB was wiped must never silently auto-heal into a
+// fresh empty DB, which would regress the watermark.
+func TestNew_MustExist_GuttedDirFailsOpen(t *testing.T) {
+	path := t.TempDir()
+	cf := []string{"c0"}
+
+	// Create a real DB, close it, then gut the dir (remove every file, keep the dir).
+	s, err := New(Config{Path: path, ColumnFamilies: cf, Logger: silentLogger()})
+	require.NoError(t, err)
+	require.NoError(t, s.Close())
+	entries, err := os.ReadDir(path)
+	require.NoError(t, err)
+	for _, e := range entries {
+		require.NoError(t, os.RemoveAll(filepath.Join(path, e.Name())))
+	}
+
+	_, err = New(Config{Path: path, ColumnFamilies: cf, Logger: silentLogger(), MustExist: true})
+	require.Error(t, err, "must-exist open of a gutted dir (no CURRENT) fails, never auto-heals")
+}
+
 func TestMain(m *testing.M) {
 	if os.Getenv("ROCKSDB_LOCK_PROBE") == "1" {
 		_, err := New(Config{
@@ -141,27 +182,19 @@ func TestStore_PutGet_DefaultCF(t *testing.T) {
 	assert.False(t, found3)
 }
 
-func TestStore_FirstLastKey(t *testing.T) {
+func TestStore_LastKey(t *testing.T) {
 	s := openTestStore(t, nil)
 
-	// Empty default CF: ok=false, no error, at both ends.
-	_, ok, err := s.FirstKey("")
-	require.NoError(t, err)
-	require.False(t, ok)
-	_, ok, err = s.LastKey("")
+	// Empty default CF: ok=false, no error.
+	_, ok, err := s.LastKey("")
 	require.NoError(t, err)
 	require.False(t, ok)
 
 	// EncodeUint32 is big-endian, so byte-lex key order is numeric order:
-	// insert out of order and expect the min/max back.
+	// insert out of order and expect the max back.
 	for _, n := range []uint32{500, 1, 9999, 42} {
 		require.NoError(t, s.Put("", EncodeUint32(n), []byte{byte(n)}))
 	}
-	first, ok, err := s.FirstKey("")
-	require.NoError(t, err)
-	require.True(t, ok)
-	require.Equal(t, uint32(1), DecodeUint32(first))
-
 	last, ok, err := s.LastKey("")
 	require.NoError(t, err)
 	require.True(t, ok)
@@ -169,28 +202,21 @@ func TestStore_FirstLastKey(t *testing.T) {
 
 	// Unknown CF surfaces ErrCFNotFound (distinct from ok=false on an
 	// empty-but-configured CF).
-	_, _, err = s.FirstKey("not-configured")
-	require.ErrorIs(t, err, ErrCFNotFound)
 	_, _, err = s.LastKey("not-configured")
 	require.ErrorIs(t, err, ErrCFNotFound)
 
-	// Non-default CF: FirstKey/LastKey resolve the requested CF
-	// independently of the default CF.
+	// Non-default CF: LastKey resolves the requested CF independently of the default.
 	const altCF = "alt"
 	sAlt := openTestStore(t, []string{altCF})
 	for _, n := range []uint32{7, 3, 8} {
 		require.NoError(t, sAlt.Put(altCF, EncodeUint32(n), []byte{byte(n)}))
 	}
-	first, ok, err = sAlt.FirstKey(altCF)
-	require.NoError(t, err)
-	require.True(t, ok)
-	require.Equal(t, uint32(3), DecodeUint32(first))
 	last, ok, err = sAlt.LastKey(altCF)
 	require.NoError(t, err)
 	require.True(t, ok)
 	require.Equal(t, uint32(8), DecodeUint32(last))
 	// The default CF of the same store is untouched → ok=false.
-	_, ok, err = sAlt.FirstKey("")
+	_, ok, err = sAlt.LastKey("")
 	require.NoError(t, err)
 	require.False(t, ok)
 }
@@ -373,7 +399,6 @@ func TestStore_OpsAfterCloseFailWithErrStoreClosed(t *testing.T) {
 	}{
 		{"Put", func() error { return s.Put(defaultCFName, []byte("k"), []byte("v")) }},
 		{"Get", func() error { _, _, err := s.Get(defaultCFName, []byte("k")); return err }},
-		{"FirstKey", func() error { _, _, err := s.FirstKey(defaultCFName); return err }},
 		{"LastKey", func() error { _, _, err := s.LastKey(defaultCFName); return err }},
 		{"Delete", func() error { return s.Delete(defaultCFName, []byte("k")) }},
 		{"Iterate", func() error {
diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/cold_index.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/cold_index.go
index b730b9986..70ad7fb78 100644
--- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/cold_index.go
+++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/cold_index.go
@@ -50,15 +50,9 @@ import (
 // hit a slow (*Bitmap).lazyOR path at query time and K≥12 regresses
 // catastrophically.
 //
-// Two callers produce bitmaps:
-//
-//   - Cold backfill builds a Bitmaps single-threaded via per-event
-//     events.TermsFor + Bitmaps.AddTo, hands it directly to this
-//     function.
-//   - The live-chunk freeze path calls hotStore.Index().Snapshot() to
-//     materialize a uniquely-owned Bitmaps from the concurrent live
-//     mirror; that Snapshot Clones each bitmap so this function may
-//     mutate them freely.
+// Both cold backfill and the live-chunk freeze build a Bitmaps single-threaded by
+// re-deriving terms from raw LCMs (per-event events.TermsFor + Bitmaps.AddTo) and
+// hand it directly here.
 //
 // index.hash is the MPHF serialized via buildMPHF.
 //
@@ -133,9 +127,9 @@ func WriteColdIndex(ctx context.Context, chunkID chunk.ID, bitmaps events.Bitmap
 		}
 		var fp [IndexRecordFingerprintLen]byte
 		copy(fp[:], term[:IndexRecordFingerprintLen])
-		// Mutate in place — bitmaps is uniquely owned by the caller
-		// (built single-threaded for cold backfill, or Cloned via
-		// ConcurrentBitmaps.Snapshot for the live-chunk freeze path).
+		// Mutate in place — bitmaps is uniquely owned by the caller, built
+		// single-threaded either way: cold backfill from the .pack, or the freeze
+		// from the read-only hot DB.
 		bitmap.RunOptimize()
 		entries = append(entries, indexEntry{slot: slot, fp: fp, bitmap: bitmap})
 	}
diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_store.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_store.go
index 0b95fc8ef..ea905d0a3 100644
--- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_store.go
+++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_store.go
@@ -7,24 +7,15 @@ import (
 	"fmt"
 	"iter"
 	"math"
-	"os"
-	"path/filepath"
 
 	"github.com/RoaringBitmap/roaring/v2"
 	"github.com/linxGnu/grocksdb"
 
-	supportlog "github.com/stellar/go-stellar-sdk/support/log"
-
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/events"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb"
 )
 
-// HotDirName is the subdirectory under EventsFullHistoryDataDir that
-// contains one DB per active hot chunk (the current_hot_chunk plus
-// any chunk currently being frozen).
-const HotDirName = "hot"
-
 // Column-family names used inside one chunk's hot RocksDB DB. The
 // per-Chunk DB directory encodes the chunk ID, so the CF names
 // themselves carry no chunk suffix.
@@ -34,22 +25,6 @@ const (
 	OffsetsCF = "events_offsets"
 )
 
-// HotChunkDir returns the on-disk path of chunkID's per-Chunk hot DB
-// rooted at dataDir.
-func HotChunkDir(dataDir string, chunkID chunk.ID) string {
-	return filepath.Join(dataDir, HotDirName, chunkID.String())
-}
-
-// RemoveHotChunkDir deletes chunkID's hot DB directory. Idempotent —
-// returns nil when the directory is already absent.
-//
-// The caller MUST close chunkID's HotStore before calling this;
-// otherwise RocksDB's LOCK file is still held and the on-disk state
-// will be inconsistent.
-func RemoveHotChunkDir(dataDir string, chunkID chunk.ID) error {
-	return os.RemoveAll(HotChunkDir(dataDir, chunkID))
-}
-
 // Per-CF tuning for the hot store, passed via rocksdb.Config.PerCFOptions:
 //
 //   - DataCF holds XDR-encoded event payloads: compressible (zstd
@@ -79,26 +54,13 @@ func hotStoreCFOptions() map[string]rocksdb.CFOptions {
 	}
 }
 
-// openHotChunk opens (or creates) chunkID's per-Chunk hot RocksDB DB
-// at HotChunkDir(dataDir, chunkID). The three per-Chunk CFs are
-// configured at New so they auto-create on a fresh DB and are
-// rediscovered on a reopen.
-//
-// Unexported: OpenHotStore is the only caller and is the public way
-// to open a per-Chunk hot DB (since the warmup step is mandatory
-// before the store is usable).
-func openHotChunk(dataDir string, chunkID chunk.ID, logger *supportlog.Entry) (*rocksdb.Store, error) {
-	store, err := rocksdb.New(rocksdb.Config{
-		Path:           HotChunkDir(dataDir, chunkID),
-		ColumnFamilies: []string{DataCF, IndexCF, OffsetsCF},
-		Logger:         logger,
-		PerCFOptions:   hotStoreCFOptions(),
-	})
-	if err != nil {
-		return nil, fmt.Errorf("events: open hot chunk %s: %w", chunkID, err)
-	}
-	return store, nil
-}
+// CFNames returns the three CFs this facade owns. Exported so the hotchunk
+// shared-DB opener can register them alongside the other CFs (decision (a)).
+func CFNames() []string { return []string{DataCF, IndexCF, OffsetsCF} }
+
+// CFOptions returns this facade's per-CF options. Exported so the hotchunk
+// opener merges them into the shared per-chunk DB's PerCFOptions.
+func CFOptions() map[string]rocksdb.CFOptions { return hotStoreCFOptions() }
 
 const (
 	dataKeyLen   = 4      // event_id (chunk encoded by per-Chunk DB directory)
@@ -107,47 +69,35 @@ const (
 	offsetValLen = 4      // per-ledger event count (uint32 BE)
 )
 
-// ErrLedgerOutOfRange is returned by IngestLedgerEvents when the
+// ErrLedgerOutOfRange is returned by IngestLedgerToBatch when the
 // supplied ledger sequence falls outside the chunk's [FirstLedger,
 // LastLedger] window.
 var ErrLedgerOutOfRange = errors.New("events: ledger outside chunk range")
 
-// ErrLedgerOutOfOrder is returned by IngestLedgerEvents when the
+// ErrLedgerOutOfOrder is returned by IngestLedgerToBatch when the
 // supplied ledger sequence is not the next-expected one. Catches
 // duplicate ingest of an already-committed ledger as well as gaps
 // (skipping ahead). Both would silently corrupt the per-ledger
 // offset chain if not rejected up front.
 var ErrLedgerOutOfOrder = errors.New("events: ledger out of order")
 
-// HotStore wraps one chunk's hot RocksDB DB plus the in-memory term
-// mirror and ledger-offset cache that feed the query path. Reads and
-// writes share the same struct; every HotStore owns its chunkStore
-// exclusively and Close releases it.
+// HotStore wraps one chunk's hot RocksDB DB plus the in-memory term mirror and
+// ledger-offset cache that feed the query path.
 //
-// Atomicity model: the per-Chunk DB is the source of truth.
-// IngestLedgerEvents commits data + index + offsets to chunkStore in one
-// atomic batch and then updates the in-memory mirrors. Warmup on next
-// startup reconstructs the mirrors from the chunk's on-disk CFs.
+// Atomicity: the per-Chunk DB is the source of truth. IngestLedgerToBatch queues
+// data + index + offsets into one atomic batch, then (post-commit) the apply
+// hook updates the in-memory mirrors; warmup reconstructs them from the on-disk
+// CFs on next startup.
 //
-// Concurrency model:
+// Concurrency:
 //
-//   - Writes (IngestLedgerEvents) follow a single-writer contract —
-//     the orchestrator drives ingest from one goroutine per chunk.
-//     The in-memory mirror and offsets have their own concurrency
-//     primitives for the single-writer-vs-multi-reader pattern.
-//   - Reads (Lookup, FetchEvents, All) take NO HotStore-level lock.
-//     They fast-path-guard via h.chunkStore.IsClosed() and rely on
-//     the in-memory primitives' internal locks (for the mirror) and
-//     RocksDB's own thread-safety (for chunkStore).
-//   - Metadata accessors split by Close behavior:
-//     ChunkID, NextEventID, Index — infallible, return their cached
-//     value forever (usable for post-Close logging).
-//     EventCount, Offsets — return ErrClosed after Close, matching
-//     the ColdReader and Reader-interface contract.
-//   - Close delegates to chunkStore.Close, which is itself idempotent
-//     via rocksdb.Store's own atomic.Bool + CompareAndSwap. The
-//     in-memory mirror has no separate close step — it is dropped
-//     implicitly when HotStore is GC'd.
+//   - Writes (IngestLedgerToBatch) are single-writer (one goroutine per chunk).
+//   - Reads (Lookup, FetchEvents, All) take NO HotStore-level lock — they guard
+//     via chunkStore.IsClosed() and rely on the mirror's internal locks and
+//     RocksDB's thread-safety.
+//   - Metadata split after the caller-owned store is closed: ChunkID is
+//     infallible (cached, usable post-close); EventCount and
+//     Offsets return ErrClosed after close (Reader-interface contract).
 type HotStore struct {
 	chunkStore *rocksdb.Store
 	chunkID    chunk.ID
@@ -158,60 +108,31 @@ type HotStore struct {
 // Compile-time guard: *HotStore satisfies Reader.
 var _ Reader = (*HotStore)(nil)
 
-// OpenHotStore opens (or creates) chunkID's hot DB at
-// HotChunkDir(dataDir, chunkID), warms up the in-memory mirror and
-// offsets from disk, and returns a ready-to-use HotStore. The
-// returned store owns its chunkStore; Close releases it.
-func OpenHotStore(
-	dataDir string,
-	chunkID chunk.ID,
-	logger *supportlog.Entry,
-) (*HotStore, error) {
-	if dataDir == "" {
-		return nil, errors.New("events: OpenHotStore requires a data dir")
-	}
-	if logger == nil {
-		return nil, errors.New("events: OpenHotStore requires a logger")
-	}
-
-	chunkStore, err := openHotChunk(dataDir, chunkID, logger)
+// NewWithStore wraps an ALREADY-OPEN rocksdb.Store as an events HotStore on the
+// three events CFs (CFNames()), running the mandatory warmup to rebuild the
+// in-memory mirror + offsets. The store is owned by the caller — in production,
+// hotchunk.DB composes this facade over the shared per-chunk DB and closes that
+// DB once. The store must have CFNames() registered + CFOptions() applied.
+// A warmup failure returns the error WITHOUT closing the caller-owned store.
+func NewWithStore(store *rocksdb.Store, chunkID chunk.ID) (*HotStore, error) {
+	mirror, offsets, err := warmup(store, chunkID)
 	if err != nil {
-		return nil, err
-	}
-	mirror, offsets, err := warmup(chunkStore, chunkID)
-	if err != nil {
-		_ = chunkStore.Close()
 		return nil, fmt.Errorf("events: warmup chunk %s: %w", chunkID, err)
 	}
 	return &HotStore{
-		chunkStore: chunkStore,
+		chunkStore: store,
 		chunkID:    chunkID,
 		mirror:     mirror,
 		offsets:    offsets,
 	}, nil
 }
 
-// Close releases the underlying chunk store. Idempotent — delegates
-// to chunkStore.Close, which is itself idempotent via its own
-// atomic.Bool + CompareAndSwap. The in-memory mirror is dropped
-// implicitly when HotStore is GC'd.
-//
-// Concurrency: must not be called concurrently with in-flight read
-// methods on the same HotStore (Lookup, FetchEvents, All). Callers
-// drain those reads before invoking Close. The single-writer ingest
-// contract means there is no concurrent IngestLedgerEvents call to
-// race with either; chunkStore's IsClosed check inside
-// IngestLedgerEvents fast-fails any post-Close ingest attempt.
-func (h *HotStore) Close() error {
-	return h.chunkStore.Close()
-}
-
 // ChunkID returns the chunk this store serves.
 func (h *HotStore) ChunkID() chunk.ID { return h.chunkID }
 
 // EventCount is the total number of events committed to this Chunk
-// so far. Equal to the next event-id IngestLedgerEvents would assign.
-// Returns (0, ErrClosed) after Close. The Reader interface signature
+// so far. Equal to the next event-id IngestLedgerToBatch would assign.
+// Returns (0, ErrClosed) after the caller-owned store is closed. The Reader interface signature
 // is fallible to accommodate ColdReader's lazy metadata load; on the
 // hot side the value is always live and the error is only ErrClosed.
 func (h *HotStore) EventCount() (uint32, error) {
@@ -221,20 +142,13 @@ func (h *HotStore) EventCount() (uint32, error) {
 	return h.offsets.TotalEvents(), nil
 }
 
-// NextEventID is the next chunk-relative event ID IngestLedgerEvents
-// will assign. Returns the same value as EventCount on the hot side
-// and is exposed under both names for the ingest-side and reader-side
-// mental models. Infallible at the type level (hot-only API, not on
-// the Reader interface).
-func (h *HotStore) NextEventID() uint32 { return h.offsets.TotalEvents() }
-
 // Offsets returns a point-in-time view of the ledger-offset cache.
 // The coordinator uses this to stitch a multi-ledger query range
 // into chunk-relative event-id ranges (see Reader.Offsets).
 //
 // Implementation: returns a *LedgerOffsets sharing the live
 // backing array, capped at the count visible at call time
-// (~24-byte allocation per Query). Concurrent IngestLedgerEvents
+// (~24-byte allocation per Query). A concurrent IngestLedgerToBatch
 // may extend the backing past the cap, but the returned view's
 // slice stays bounded to what was visible when Offsets returned.
 // Callers (Query) take the view once at entry and pass it through
@@ -244,7 +158,7 @@ func (h *HotStore) NextEventID() uint32 { return h.offsets.TotalEvents() }
 // with the live backing array. Calling Append on the view would
 // silently fork it from the live data; the contract is read-only.
 //
-// Returns (nil, ErrClosed) after Close.
+// Returns (nil, ErrClosed) after the caller-owned store is closed.
 func (h *HotStore) Offsets() (*events.LedgerOffsets, error) {
 	if h.chunkStore.IsClosed() {
 		return nil, ErrClosed
@@ -252,13 +166,6 @@ func (h *HotStore) Offsets() (*events.LedgerOffsets, error) {
 	return h.offsets.View(), nil
 }
 
-// Index returns the in-memory term mirror. Used by the freezer to
-// snapshot every (events.TermKey, bitmap) pair into WriteColdIndex
-// without rebuilding from RocksDB. Callers should typically call
-// h.Index().Snapshot() to get a uniquely owned Bitmaps for
-// serialization.
-func (h *HotStore) Index() *events.ConcurrentBitmaps { return h.mirror }
-
 // Lookup returns the bitmap of event IDs in this Chunk that match
 // the given term. The returned bitmap is an immutable snapshot of
 // the live mirror — writers publish new pointers via atomic.Store
@@ -266,7 +173,7 @@ func (h *HotStore) Index() *events.ConcurrentBitmaps { return h.mirror }
 // bitmap. Callers MUST NOT mutate it themselves. See Reader.Lookup
 // and ConcurrentBitmaps.Get for the full contract. Returns
 // (nil, ErrTermNotFound) when the term has no matching events.
-// Returns (nil, ErrClosed) after Close.
+// Returns (nil, ErrClosed) after the caller-owned store is closed.
 //
 // ctx is checked as a fast guard but the hot path does no blocking
 // I/O — the bitmap comes from the in-memory mirror.
@@ -336,7 +243,7 @@ func (h *HotStore) LookupKeys(ctx context.Context, keys []events.TermKey) ([]*ro
 // RocksDB also has them. A miss indicates corruption or a
 // writer/reader mismatch, not a normal not-found case.
 //
-// After Close, returns ErrClosed.
+// After the caller-owned store is closed, returns ErrClosed.
 func (h *HotStore) FetchEvents(ctx context.Context, eventIDs []uint32) ([]events.Payload, error) {
 	if h.chunkStore.IsClosed() {
 		return nil, ErrClosed
@@ -392,7 +299,7 @@ func (h *HotStore) FetchEvents(ctx context.Context, eventIDs []uint32) ([]events
 // Yielded Payloads are borrowed: ContractEventBytes aliases the iteration
 // buffer and is valid only until the next step — clone to retain.
 //
-// After Close, yields (zero Payload, ErrClosed) and stops.
+// After the caller-owned store is closed, yields (zero Payload, ErrClosed) and stops.
 // ctx is checked at entry and between iterator steps —
 // rocksdb.Store.IterateRange does not itself accept a ctx, so a
 // very slow Next() can block past a cancellation until the next
@@ -400,11 +307,11 @@ func (h *HotStore) FetchEvents(ctx context.Context, eventIDs []uint32) ([]events
 //
 // Out-of-range arguments yield an error and stop:
 //   - count == 0 is a natural no-op (no yields).
-//   - start+count > NextEventID (overflow-safe via uint64) yields a
-//     wrapped out-of-bounds error.
+//   - start+count > the committed event count (overflow-safe via uint64)
+//     yields a wrapped out-of-bounds error.
 //   - A short scan (fewer DataCF rows than count) yields a wrapped
 //     error after the partial stream — the CF should be dense in
-//     [0, NextEventID), so a hole indicates corruption.
+//     [0, committed count), so a hole indicates corruption.
 func (h *HotStore) FetchRange(ctx context.Context, start, count uint32) iter.Seq2[events.Payload, error] {
 	return func(yield func(events.Payload, error) bool) {
 		if h.chunkStore.IsClosed() {
@@ -418,7 +325,7 @@ func (h *HotStore) FetchRange(ctx context.Context, start, count uint32) iter.Seq
 		if count == 0 {
 			return
 		}
-		if err := validateFetchRange(start, count, h.NextEventID(), h.chunkID); err != nil {
+		if err := validateFetchRange(start, count, h.offsets.TotalEvents(), h.chunkID); err != nil {
 			yield(events.Payload{}, err)
 			return
 		}
@@ -463,16 +370,16 @@ func (h *HotStore) FetchRange(ctx context.Context, start, count uint32) iter.Seq
 // ColdWriter without buffering. Thin wrapper over FetchRange; its
 // yielded Payloads are likewise borrowed (valid only for the step).
 //
-// NextEventID is read inside the returned closure body, so a
+// The committed event count is read inside the returned closure body, so a
 // concurrent ingest between r.All(ctx) returning the Seq2 and the
 // consumer's first range step is included in the snapshot.
 //
-// After Close, yields (zero Payload, ErrClosed) and stops.
+// After the caller-owned store is closed, yields (zero Payload, ErrClosed) and stops.
 func (h *HotStore) All(ctx context.Context) iter.Seq2[events.Payload, error] {
 	return func(yield func(events.Payload, error) bool) {
 		// FetchRange stops iterating after yielding an error; we
 		// just forward whatever it yields and exit on the same step.
-		for p, err := range h.FetchRange(ctx, 0, h.NextEventID()) {
+		for p, err := range h.FetchRange(ctx, 0, h.offsets.TotalEvents()) {
 			if !yield(p, err) {
 				return
 			}
@@ -480,138 +387,104 @@ func (h *HotStore) All(ctx context.Context) iter.Seq2[events.Payload, error] {
 	}
 }
 
-// IngestLedgerEvents commits one ledger's events to the chunk store
-// atomically and then updates the in-memory mirrors.
+// IngestLedgerToBatch validates one ledger's events, marshals them, and queues
+// their CF Puts into the SHARED batch b, returning the post-commit apply hook the
+// caller runs AFTER b commits (decision (a)). Validation + term derivation happen
+// before any Put; on any error Store.Batch discards the whole WriteBatch, so a
+// rejected ledger never leaves committed rows behind.
 //
 // payloads is produced by events.LCMViewToPayloads, which emits each ledger's
-// events in ascending getEvents cursor order — write order here IS the
-// cursor contract (event IDs are assigned by arrival position). Terms are
-// derived internally via events.TermsForBytes on each payload's
-// ContractEventBytes.
-//
-// Sequence validation is performed up front, before any RocksDB
-// write or mirror mutation:
-//
-//   - ledgerSeq must lie within [chunkID.FirstLedger(),
-//     chunkID.LastLedger()] — out-of-range returns ErrLedgerOutOfRange.
-//   - ledgerSeq == the next expected ledger (StartLedger + LedgerCount)
-//     is appended normally.
-//   - ledgerSeq < expected (an already-ingested ledger) is an idempotent
-//     no-op returning nil, so a restarted ingester can blindly re-deliver
-//     the in-flight ledger; the re-delivered events are not re-verified.
-//   - ledgerSeq > expected (a gap) returns ErrLedgerOutOfOrder.
-//
-// A rejected call (out-of-range or gap) completes its checks before
-// marshaling, leaving the chunk store and in-memory mirrors untouched.
-//
-// Post-batch atomicity: once the RocksDB batch commits, the in-memory
-// mirror + offsets updates are infallible by construction. Any
-// failure there panics rather than returning an error, because a
-// returned error would leave on-disk state ahead of in-memory state
-// with no clean recovery short of close + reopen.
-//
-//nolint:cyclop // sequential pipeline: validate -> marshal -> batch -> mirror updates
-func (h *HotStore) IngestLedgerEvents(ledgerSeq uint32, payloads []events.Payload) error {
-	if h.chunkStore.IsClosed() {
-		return ErrClosed
-	}
-
-	// Validate ledger sequence BEFORE any disk write or mirror mutation.
-	// Failing the offsets.Append check after the RocksDB batch has
-	// committed would leave events orphaned under a bad ledger key.
+// events in ascending getEvents cursor order — write order here IS the cursor
+// contract (event IDs are assigned by arrival position). Terms are derived via
+// events.TermsForBytes on each payload's ContractEventBytes.
+//
+// Sequence validation, before any Put or mirror mutation:
+//
+//   - ledgerSeq must lie within [chunkID.FirstLedger(), chunkID.LastLedger()] —
+//     out-of-range returns ErrLedgerOutOfRange.
+//   - ledgerSeq must equal the next expected ledger (StartLedger + LedgerCount).
+//     Under decision (a) resume is always MaxCommittedSeq+1, so a non-expected
+//     ledger is a mis-sequencing source (the ingestion loop's seq guard should
+//     have caught it) — an error (ErrLedgerOutOfOrder), never silent tolerance.
+//
+// Post-batch atomicity: once the batch commits, the apply hook's in-memory
+// mirror + offsets updates are infallible by construction. Any failure there
+// panics rather than returning an error, because a returned error would leave
+// on-disk state ahead of in-memory state with no clean recovery short of
+// close + reopen.
+func (h *HotStore) IngestLedgerToBatch(
+	b *rocksdb.BatchWriter, ledgerSeq uint32, payloads []events.Payload,
+) (func(), error) {
+	// Validate BEFORE any Put. On error Store.Batch discards the whole WriteBatch,
+	// so a mid-loop failure never orphans rows — no separate staging buffer needed.
 	if ledgerSeq < h.chunkID.FirstLedger() || ledgerSeq > h.chunkID.LastLedger() {
-		return fmt.Errorf("%w: ledger %d not in chunk %s [%d, %d]",
+		return nil, fmt.Errorf("%w: ledger %d not in chunk %s [%d, %d]",
 			ErrLedgerOutOfRange, ledgerSeq, h.chunkID,
 			h.chunkID.FirstLedger(), h.chunkID.LastLedger())
 	}
 	expected := h.offsets.StartLedger() + uint32(h.offsets.LedgerCount()) //nolint:gosec
-	if ledgerSeq < expected {
-		// Already ingested: idempotent retry no-op. A restarted ingester
-		// can blindly re-deliver an already-committed ledger; drop it
-		// rather than erroring or double-appending. The re-delivered
-		// events are not re-verified, so a re-delivery carrying different
-		// events for an already-ingested ledger is silently ignored.
-		return nil
-	}
-	if ledgerSeq > expected {
-		return fmt.Errorf("%w: expected ledger %d, got %d",
+	if ledgerSeq != expected {
+		return nil, fmt.Errorf("%w: expected ledger %d, got %d",
 			ErrLedgerOutOfOrder, expected, ledgerSeq)
 	}
 
-	// Pre-derive term keys per payload so the post-commit mirror
-	// update doesn't re-hash. Surfacing TermsForBytes errors here
-	// (pre-batch) cleanly rejects the ledger commit without touching disk —
-	// a decode failure on stellar-core-validated XDR is a corruption
-	// signal worth aborting on.
+	// Derive term keys per payload up front (a TermsForBytes error rejects the
+	// ledger without any Put) and retain them for the post-commit mirror update.
 	termKeys := make([][]events.TermKey, len(payloads))
 	for i := range payloads {
 		keys, err := events.TermsForBytes(payloads[i].ContractEventBytes)
 		if err != nil {
-			return fmt.Errorf("events: derive terms for payload %d in ledger %d: %w", i, ledgerSeq, err)
+			return nil, fmt.Errorf("derive terms for payload %d in ledger %d: %w", i, ledgerSeq, err)
 		}
 		termKeys[i] = keys
 	}
 
 	startID := h.offsets.TotalEvents()
 	if uint64(startID)+uint64(len(payloads)) > math.MaxUint32 {
-		return fmt.Errorf("events: chunk %s would overflow uint32 event-id space at ledger %d",
+		return nil, fmt.Errorf("chunk %s would overflow uint32 event-id space at ledger %d",
 			h.chunkID, ledgerSeq)
 	}
 
-	// Atomic batch on the per-Chunk DB. Each payload is marshaled into one
-	// reused scratch buffer: BatchWriter.Put copies the value into the write
-	// batch synchronously, so the scratch is free to reuse on the next
-	// iteration — no per-payload allocation. A marshal error returns from
-	// the callback, which aborts the batch so nothing commits.
+	// Marshal + queue each event directly into b. BatchWriter.Put copies
+	// synchronously, so ONE reused scratch buffer serves every event — the caller
+	// opens exactly one batch per ledger, so no row must outlive this call.
 	var scratch []byte
-	err := h.chunkStore.Batch(func(b *rocksdb.BatchWriter) error {
-		for i := range payloads {
-			eventID := startID + uint32(i)
-			blob, err := payloads[i].MarshalInto(scratch[:0])
-			if err != nil {
-				return fmt.Errorf("events: marshal payload %d for ledger %d: %w", i, ledgerSeq, err)
-			}
-			scratch = blob
-			b.Put(DataCF, encodeDataKey(eventID), blob)
-			for _, key := range termKeys[i] {
-				b.Put(IndexCF, encodeIndexKey(key, eventID), nil)
-			}
+	for i := range payloads {
+		blob, err := payloads[i].MarshalInto(scratch[:0])
+		if err != nil {
+			return nil, fmt.Errorf("marshal payload %d for ledger %d: %w", i, ledgerSeq, err)
 		}
-		// On-disk shape matches the in-memory API: per-ledger event
-		// count, not cumulative. Warmup replays directly via
-		// offsets.Append(eventCount) — no delta arithmetic.
-		//nolint:gosec // bounds-checked above
-		eventCount := uint32(len(payloads))
-		b.Put(OffsetsCF, encodeOffsetKey(ledgerSeq), encodeLedgerEventCount(eventCount))
-		return nil
-	})
-	if err != nil {
-		return fmt.Errorf("events: commit ledger %d to chunk %s: %w", ledgerSeq, h.chunkID, err)
-	}
-
-	// Phase 3: the batch is durable — apply it to the in-memory cache.
-	// Infallible given the validation above (ledgerSeq == expected and
-	// in-chunk, single writer): mirror.AddTo cannot fail and offsets.Append
-	// appends at the already-validated next slot, so the only
-	// non-completion is a crash, after which warmup rebuilds the cache from
-	// disk.
-	//
-	// Ordering invariant: mirror BEFORE offsets. A concurrent Query
-	// that captures offsets via h.offsets.Snapshot() then later calls
-	// mirror.Get for the same key sees either the previous state
-	// (offsets count N-1, mirror without ledger-N events) or a
-	// consistent later one (offsets count ≥N, mirror with ledger-N
-	// events). Reversing the order would let a reader observe an
-	// offsets count that includes IDs the mirror hasn't published
-	// yet — Query would then ask FetchEvents for IDs not yet
-	// indexed; the bitmap intersection would simply miss them, with
-	// no error surface.
-	//
-	// Batch by key so each ConcurrentBitmaps.AddTo call clones at most
-	// once per (key, ledger), not once per (key, event). For popular
-	// terms that receive many events in one ledger this turns N COW
-	// clones into 1. Initial capacity 64 ≈ a few × unique-terms per
-	// typical ledger; the map grows correctly past that.
+		scratch = blob
+		eventID := startID + uint32(i)
+		b.Put(DataCF, encodeDataKey(eventID), blob)
+		for _, key := range termKeys[i] {
+			b.Put(IndexCF, encodeIndexKey(key, eventID), nil)
+		}
+	}
+	//nolint:gosec // len bounded by the overflow guard above
+	b.Put(OffsetsCF, encodeOffsetKey(ledgerSeq), encodeLedgerEventCount(uint32(len(payloads))))
+
+	return func() { h.applyLedger(startID, termKeys) }, nil
+}
+
+// index returns the in-memory term mirror. Test-only write hook: no production
+// path reads it. Kept unexported until #772 decides whether the v2 read path
+// hooks into it.
+func (h *HotStore) index() *events.ConcurrentBitmaps { return h.mirror }
+
+// applyLedger updates the mirror + offsets for a ledger whose rows are durable.
+// Infallible by construction (IngestLedgerToBatch validated seq under the
+// single-writer contract); the only non-completion is a crash, after which warmup
+// rebuilds.
+//
+// Ordering invariant: mirror BEFORE offsets. A concurrent Query that snapshots
+// offsets then reads the mirror must see either the prior state or a consistent
+// later one. Reversing it would let a reader see an offsets count including IDs
+// the mirror hasn't published — FetchEvents would then miss them, silently.
+func (h *HotStore) applyLedger(startID uint32, termKeys [][]events.TermKey) {
+	// Batch by key so each AddTo clones at most once per (key, ledger), not per
+	// (key, event) — turns N COW clones into 1 for popular terms. Cap 64 ≈ a few
+	// × unique-terms per ledger; the map grows past that.
 	perKeyIDs := make(map[events.TermKey][]uint32, 64)
 	for i, keys := range termKeys {
 		eventID := startID + uint32(i)
@@ -622,14 +495,13 @@ func (h *HotStore) IngestLedgerEvents(ledgerSeq uint32, payloads []events.Payloa
 	for key, ids := range perKeyIDs {
 		h.mirror.AddTo(key, ids...)
 	}
-	//nolint:gosec // len bounded by the overflow check above
-	h.offsets.Append(uint32(len(payloads)))
-	return nil
+	//nolint:gosec // len bounded by IngestLedgerToBatch's overflow guard
+	h.offsets.Append(uint32(len(termKeys)))
 }
 
 // ──────────────────────────────────────────────────────────────────
 // Warmup — reconstructs the in-memory mirror + offsets from the
-// per-Chunk DB's on-disk CFs. Called only by OpenHotStore.
+// per-Chunk DB's on-disk CFs. Called by NewWithStore.
 // ──────────────────────────────────────────────────────────────────
 
 // warmup rebuilds the in-memory mirrors for chunkID by prefix-scanning
diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_store_test.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_store_test.go
index ea5d3ce7d..bda698c7f 100644
--- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_store_test.go
+++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_store_test.go
@@ -7,6 +7,7 @@ import (
 	"errors"
 	"fmt"
 	"iter"
+	"path/filepath"
 	"sync"
 	"testing"
 
@@ -19,6 +20,7 @@ import (
 
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/events"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb"
 )
 
 // silentLogger returns a logger whose output is buffered into an
@@ -37,6 +39,7 @@ func silentLogger() *supportlog.Entry {
 type hotStoreHarness struct {
 	dataDir string
 	store   *HotStore
+	raw     *rocksdb.Store
 }
 
 // openHotStoreForTest opens a fresh per-Chunk hot DB for chunkID
@@ -48,11 +51,39 @@ func openHotStoreForTest(t *testing.T, chunkID chunk.ID) *hotStoreHarness {
 	t.Helper()
 	dir := t.TempDir()
 
-	hot, err := OpenHotStore(dir, chunkID, silentLogger())
+	hot, raw := openHotStoreForTestAt(t, dir, chunkID)
+	return &hotStoreHarness{dataDir: dir, store: hot, raw: raw}
+}
+
+func openHotStoreForTestAt(t *testing.T, dir string, chunkID chunk.ID) (*HotStore, *rocksdb.Store) {
+	t.Helper()
+	hot, raw, err := tryOpenHotStoreForTest(t, dir, chunkID)
 	require.NoError(t, err)
-	t.Cleanup(func() { _ = hot.Close() })
+	return hot, raw
+}
+
+func tryOpenHotStoreForTest(t *testing.T, dir string, chunkID chunk.ID) (*HotStore, *rocksdb.Store, error) {
+	t.Helper()
+	raw := openRawHotChunkForTest(t, dir, chunkID)
+	hot, err := NewWithStore(raw, chunkID)
+	if err != nil {
+		_ = raw.Close()
+		return nil, nil, err
+	}
+	t.Cleanup(func() { _ = raw.Close() })
+	return hot, raw, nil
+}
 
-	return &hotStoreHarness{dataDir: dir, store: hot}
+func openRawHotChunkForTest(t *testing.T, dir string, chunkID chunk.ID) *rocksdb.Store {
+	t.Helper()
+	raw, err := rocksdb.New(rocksdb.Config{
+		Path:           filepath.Join(dir, chunkID.String()),
+		ColumnFamilies: CFNames(),
+		Logger:         silentLogger(),
+		PerCFOptions:   CFOptions(),
+	})
+	require.NoError(t, err)
+	return raw
 }
 
 func makePayload(symbol string) (events.Payload, []events.TermKey) {
@@ -105,23 +136,12 @@ func dataSym(t *testing.T, p events.Payload) string {
 	return string(*eventOf(p).Body.V0.Data.Sym)
 }
 
-func TestOpenHotStore_RequiresDataDirAndLogger(t *testing.T) {
-	dir := t.TempDir()
-
-	_, err := OpenHotStore("", 0, silentLogger())
-	require.Error(t, err, "missing dataDir")
-
-	_, err = OpenHotStore(dir, 0, nil)
-	require.Error(t, err, "missing logger")
-}
-
 func TestHotStore_FreshChunkHasEmptyState(t *testing.T) {
 	const chunkID = chunk.ID(0)
 	h := openHotStoreForTest(t, chunkID)
 
 	assert.Equal(t, chunkID, h.store.ChunkID())
 	assert.Equal(t, uint32(0), mustEventCount(t, h.store))
-	assert.Equal(t, uint32(0), h.store.NextEventID())
 	assert.Equal(t, chunkID.FirstLedger(), mustOffsets(t, h.store).StartLedger())
 }
 
@@ -130,7 +150,7 @@ func TestHotStore_IngestLedgerWritesAllCFs(t *testing.T) {
 	h := openHotStoreForTest(t, chunkID)
 
 	p, keys := makePayload("transfer")
-	require.NoError(t, h.store.IngestLedgerEvents(2, []events.Payload{p}))
+	require.NoError(t, ingestLedgerEvents(h.store, 2, []events.Payload{p}))
 
 	// events_data row exists.
 	got, found, err := h.store.chunkStore.Get(DataCF, encodeDataKey(0))
@@ -159,7 +179,7 @@ func TestHotStore_IngestLedgerWritesAllCFs(t *testing.T) {
 	require.NotNil(t, bm)
 	assert.True(t, bm.Contains(0))
 
-	assert.Equal(t, uint32(1), h.store.NextEventID())
+	assert.Equal(t, uint32(1), mustEventCount(t, h.store))
 }
 
 func TestHotStore_EventIDsAreMonotonic(t *testing.T) {
@@ -169,24 +189,24 @@ func TestHotStore_EventIDsAreMonotonic(t *testing.T) {
 
 	p1, _ := makePayload("a")
 	p2, _ := makePayload("b")
-	require.NoError(t, h.store.IngestLedgerEvents(first, []events.Payload{p1, p2}))
+	require.NoError(t, ingestLedgerEvents(h.store, first, []events.Payload{p1, p2}))
 
 	p3, _ := makePayload("c")
-	require.NoError(t, h.store.IngestLedgerEvents(first+1, []events.Payload{p3}))
+	require.NoError(t, ingestLedgerEvents(h.store, first+1, []events.Payload{p3}))
 
 	for id := range uint32(3) {
 		_, found, err := h.store.chunkStore.Get(DataCF, encodeDataKey(id))
 		require.NoError(t, err)
 		assert.True(t, found, "missing event id %d", id)
 	}
-	assert.Equal(t, uint32(3), h.store.NextEventID())
+	assert.Equal(t, uint32(3), mustEventCount(t, h.store))
 }
 
 func TestHotStore_EmptyLedgerStillWritesOffsetsAndState(t *testing.T) {
 	const chunkID = chunk.ID(0)
 	h := openHotStoreForTest(t, chunkID)
 
-	require.NoError(t, h.store.IngestLedgerEvents(2, nil))
+	require.NoError(t, ingestLedgerEvents(h.store, 2, nil))
 
 	val, found, err := h.store.chunkStore.Get(OffsetsCF, encodeOffsetKey(2))
 	require.NoError(t, err)
@@ -209,7 +229,7 @@ func TestHotStore_LookupReturnsImmutableSnapshot(t *testing.T) {
 	// Promote to dense mode so we exercise the bm.Load path (sparse
 	// mode allocates a fresh bitmap per Get).
 	for i := range uint32(70) {
-		require.NoError(t, h.store.IngestLedgerEvents(2+i, []events.Payload{p}))
+		require.NoError(t, ingestLedgerEvents(h.store, 2+i, []events.Payload{p}))
 	}
 
 	first, err := h.store.Lookup(context.Background(), keys[0])
@@ -218,7 +238,7 @@ func TestHotStore_LookupReturnsImmutableSnapshot(t *testing.T) {
 
 	// New ingest publishes a new snapshot. The old pointer must
 	// remain unchanged (it's the previous snapshot).
-	require.NoError(t, h.store.IngestLedgerEvents(72, []events.Payload{p}))
+	require.NoError(t, ingestLedgerEvents(h.store, 72, []events.Payload{p}))
 
 	assert.Equal(t, cardBefore, first.GetCardinality(),
 		"prior Lookup result must be an immutable snapshot — later IngestLedgerEvents must not mutate it")
@@ -235,9 +255,9 @@ func TestHotStore_FetchEventsRoundTrip(t *testing.T) {
 
 	p1, _ := makePayload("a")
 	p2, _ := makePayload("b")
-	require.NoError(t, h.store.IngestLedgerEvents(2, []events.Payload{p1, p2}))
+	require.NoError(t, ingestLedgerEvents(h.store, 2, []events.Payload{p1, p2}))
 	p3, _ := makePayload("c")
-	require.NoError(t, h.store.IngestLedgerEvents(3, []events.Payload{p3}))
+	require.NoError(t, ingestLedgerEvents(h.store, 3, []events.Payload{p3}))
 
 	fetched, err := h.store.FetchEvents(context.Background(), []uint32{0, 1, 2})
 	require.NoError(t, err)
@@ -251,7 +271,7 @@ func TestHotStore_FetchEventsErrorsOnMissingID(t *testing.T) {
 	const chunkID = chunk.ID(0)
 	h := openHotStoreForTest(t, chunkID)
 	p, _ := makePayload("only")
-	require.NoError(t, h.store.IngestLedgerEvents(2, []events.Payload{p}))
+	require.NoError(t, ingestLedgerEvents(h.store, 2, []events.Payload{p}))
 
 	_, err := h.store.FetchEvents(context.Background(), []uint32{99})
 	assert.Error(t, err)
@@ -272,7 +292,7 @@ func TestHotStore_FetchEventsLargeBatch(t *testing.T) {
 		p, _ := makePayload(fmt.Sprintf("evt-%03d", i))
 		payloads[i] = p
 	}
-	require.NoError(t, h.store.IngestLedgerEvents(2, payloads))
+	require.NoError(t, ingestLedgerEvents(h.store, 2, payloads))
 
 	ids := make([]uint32, n)
 	for i := range n {
@@ -297,7 +317,7 @@ func TestHotStore_FetchEventsHonorsContext(t *testing.T) {
 	const chunkID = chunk.ID(0)
 	h := openHotStoreForTest(t, chunkID)
 	p, _ := makePayload("only")
-	require.NoError(t, h.store.IngestLedgerEvents(2, []events.Payload{p}))
+	require.NoError(t, ingestLedgerEvents(h.store, 2, []events.Payload{p}))
 
 	ctx, cancel := context.WithCancel(context.Background())
 	cancel()
@@ -320,7 +340,7 @@ func TestHotStore_FetchEventsRejectsUnsortedInput(t *testing.T) {
 	p2.LedgerSequence = 2
 	p3, _ := makePayload("c")
 	p3.LedgerSequence = 2
-	require.NoError(t, h.store.IngestLedgerEvents(2, []events.Payload{p1, p2, p3}))
+	require.NoError(t, ingestLedgerEvents(h.store, 2, []events.Payload{p1, p2, p3}))
 
 	_, err := h.store.FetchEvents(context.Background(), []uint32{2, 0})
 	require.ErrorIs(t, err, ErrUnsortedEventIDs, "out-of-order input must error")
@@ -336,10 +356,10 @@ func TestHotStore_AllStreamsInEventIDOrder(t *testing.T) {
 	p1.LedgerSequence = 2
 	p2, _ := makePayload("b")
 	p2.LedgerSequence = 2
-	require.NoError(t, h.store.IngestLedgerEvents(2, []events.Payload{p1, p2}))
+	require.NoError(t, ingestLedgerEvents(h.store, 2, []events.Payload{p1, p2}))
 	p3, _ := makePayload("c")
 	p3.LedgerSequence = 3
-	require.NoError(t, h.store.IngestLedgerEvents(3, []events.Payload{p3}))
+	require.NoError(t, ingestLedgerEvents(h.store, 3, []events.Payload{p3}))
 
 	got := make([]string, 0, 3)
 	gotLedgers := make([]uint32, 0, 3)
@@ -364,8 +384,8 @@ func TestHotStore_AllEmptyChunkYieldsNothing(t *testing.T) {
 
 func TestHotStore_CloseRejectsWrites(t *testing.T) {
 	h := openHotStoreForTest(t, 0)
-	require.NoError(t, h.store.Close())
-	err := h.store.IngestLedgerEvents(2, nil)
+	require.NoError(t, h.raw.Close())
+	err := ingestLedgerEvents(h.store, 2, nil)
 	assert.ErrorIs(t, err, ErrClosed)
 }
 
@@ -379,8 +399,8 @@ func TestHotStore_PostCloseReadsError(t *testing.T) {
 	h := openHotStoreForTest(t, chunkID)
 
 	p, keys := makePayload("seed")
-	require.NoError(t, h.store.IngestLedgerEvents(chunkID.FirstLedger(), []events.Payload{p}))
-	require.NoError(t, h.store.Close())
+	require.NoError(t, ingestLedgerEvents(h.store, chunkID.FirstLedger(), []events.Payload{p}))
+	require.NoError(t, h.raw.Close())
 
 	// Lookup must error rather than silently returning the cached bitmap.
 	bm, err := h.store.Lookup(context.Background(), keys[0])
@@ -403,45 +423,45 @@ func TestHotStore_PostCloseReadsError(t *testing.T) {
 	require.ErrorIs(t, err, ErrClosed)
 }
 
-// TestHotStore_IngestLedgerEvents_DuplicateLedgerIsNoOp pins the
-// idempotency contract: re-ingesting an already-committed ledger is a
-// no-op (returns nil) that leaves state untouched — it neither advances
-// eventID/offsets nor writes the re-delivered payload, and the original
-// ledger's events remain intact. A restarted ingester can blindly
-// re-deliver the in-flight ledger.
-func TestHotStore_IngestLedgerEvents_DuplicateLedgerIsNoOp(t *testing.T) {
+// TestHotStore_IngestLedgerEvents_DuplicateLedgerErrors pins the sequencing
+// contract after the staging collapse (#30): re-ingesting an already-committed
+// ledger is NOT a silent no-op — it is a mis-sequencing error (ErrLedgerOutOfOrder)
+// that leaves state untouched (Store.Batch discards the WriteBatch on the error).
+// Under decision (a) the ingestion loop always resumes at MaxCommittedSeq+1 and
+// the shared cursor validates contiguity, so a duplicate can only mean a broken
+// source — an error, never silent tolerance.
+func TestHotStore_IngestLedgerEvents_DuplicateLedgerErrors(t *testing.T) {
 	const chunkID = chunk.ID(0)
 	h := openHotStoreForTest(t, chunkID)
 	first := chunkID.FirstLedger()
 
 	p1, _ := makePayload("a")
-	require.NoError(t, h.store.IngestLedgerEvents(first, []events.Payload{p1}))
+	require.NoError(t, ingestLedgerEvents(h.store, first, []events.Payload{p1}))
 
 	countBefore := mustEventCount(t, h.store)
-	nextBefore := h.store.NextEventID()
 
-	// Re-ingesting the same ledger is an idempotent no-op.
+	// Re-ingesting the same ledger errors (expected is now first+1).
 	p2, _ := makePayload("b")
-	require.NoError(t, h.store.IngestLedgerEvents(first, []events.Payload{p2}))
+	err := ingestLedgerEvents(h.store, first, []events.Payload{p2})
+	require.ErrorIs(t, err, ErrLedgerOutOfOrder, "a re-delivered committed ledger must error, not no-op")
 
-	assert.Equal(t, countBefore, mustEventCount(t, h.store), "EventCount must not advance on duplicate ingest")
-	assert.Equal(t, nextBefore, h.store.NextEventID(), "NextEventID must not advance on duplicate ingest")
+	assert.Equal(t, countBefore, mustEventCount(t, h.store), "event count must not advance on the rejected ingest")
 
-	// The original ledger's event is untouched (not overwritten by p2).
+	// The original ledger's event is untouched, and the rejected batch committed
+	// nothing (Store.Batch discards the WriteBatch on the callback error).
 	got, err := h.store.FetchEvents(context.Background(), []uint32{0})
 	require.NoError(t, err)
 	require.Len(t, got, 1)
-	assert.Equal(t, "a", dataSym(t, got[0]), "original event must survive the no-op")
+	assert.Equal(t, "a", dataSym(t, got[0]), "original event must survive the rejected re-ingest")
 
-	// The dropped payload must not reach the mirror. makePayload emits
+	// The rejected payload must not reach the mirror. makePayload emits
 	// [contractID, topic0, ...]; contractID is shared across symbols
-	// (hardcoded 0xab), so we check topic0 (index 1), which is
-	// symbol-specific.
+	// (hardcoded 0xab), so we check topic0 (index 1), which is symbol-specific.
 	_, secondKeys := makePayload("b")
 	require.GreaterOrEqual(t, len(secondKeys), 2, "test fixture expected to have a topic0 term")
 	bm, lookupErr := h.store.Lookup(context.Background(), secondKeys[1])
 	require.ErrorIs(t, lookupErr, ErrTermNotFound,
-		"the no-op'd payload's topic0 term must not appear in the mirror")
+		"the rejected payload's topic0 term must not appear in the mirror")
 	assert.Nil(t, bm)
 }
 
@@ -453,18 +473,16 @@ func TestHotStore_IngestLedgerEvents_RejectsLedgerGap(t *testing.T) {
 	first := chunkID.FirstLedger()
 
 	p1, _ := makePayload("a")
-	require.NoError(t, h.store.IngestLedgerEvents(first, []events.Payload{p1}))
+	require.NoError(t, ingestLedgerEvents(h.store, first, []events.Payload{p1}))
 
 	countBefore := mustEventCount(t, h.store)
-	nextBefore := h.store.NextEventID()
 
 	// Skip first+1; jump directly to first+2.
 	p2, _ := makePayload("c")
-	err := h.store.IngestLedgerEvents(first+2, []events.Payload{p2})
+	err := ingestLedgerEvents(h.store, first+2, []events.Payload{p2})
 	require.ErrorIs(t, err, ErrLedgerOutOfOrder)
 
 	assert.Equal(t, countBefore, mustEventCount(t, h.store))
-	assert.Equal(t, nextBefore, h.store.NextEventID())
 }
 
 // TestHotStore_IngestLedgerEvents_RejectsOutOfRangeLedger pins the
@@ -476,22 +494,21 @@ func TestHotStore_IngestLedgerEvents_RejectsOutOfRangeLedger(t *testing.T) {
 	p, _ := makePayload("a")
 
 	// Below range (chunk 0's FirstLedger is FirstLedgerSeq == 2).
-	err := h.store.IngestLedgerEvents(1, []events.Payload{p})
+	err := ingestLedgerEvents(h.store, 1, []events.Payload{p})
 	require.ErrorIs(t, err, ErrLedgerOutOfRange, "ledger below chunk range")
 
 	// Above range — well past chunk 0's LastLedger.
-	err = h.store.IngestLedgerEvents(chunkID.LastLedger()+1, []events.Payload{p})
+	err = ingestLedgerEvents(h.store, chunkID.LastLedger()+1, []events.Payload{p})
 	require.ErrorIs(t, err, ErrLedgerOutOfRange, "ledger above chunk range")
 
 	// State must be unchanged after both rejections.
 	assert.Equal(t, uint32(0), mustEventCount(t, h.store))
-	assert.Equal(t, uint32(0), h.store.NextEventID())
 }
 
 func TestHotStore_CloseIsIdempotent(t *testing.T) {
 	h := openHotStoreForTest(t, 0)
-	require.NoError(t, h.store.Close())
-	assert.NoError(t, h.store.Close())
+	require.NoError(t, h.raw.Close())
+	assert.NoError(t, h.raw.Close())
 }
 
 func TestHotStore_ReopenRecoversState(t *testing.T) {
@@ -501,21 +518,18 @@ func TestHotStore_ReopenRecoversState(t *testing.T) {
 	const chunkID = chunk.ID(0)
 	dir := t.TempDir()
 
-	hot1, err := OpenHotStore(dir, chunkID, silentLogger())
-	require.NoError(t, err)
+	hot1, raw1 := openHotStoreForTestAt(t, dir, chunkID)
 	p1, _ := makePayload("before")
-	require.NoError(t, hot1.IngestLedgerEvents(2, []events.Payload{p1}))
-	require.NoError(t, hot1.Close())
+	require.NoError(t, ingestLedgerEvents(hot1, 2, []events.Payload{p1}))
+	require.NoError(t, raw1.Close())
 
-	hot2, err := OpenHotStore(dir, chunkID, silentLogger())
-	require.NoError(t, err)
-	t.Cleanup(func() { _ = hot2.Close() })
+	hot2, _ := openHotStoreForTestAt(t, dir, chunkID)
 
-	assert.Equal(t, uint32(1), hot2.NextEventID(), "warmup recovered offsets")
+	assert.Equal(t, uint32(1), mustEventCount(t, hot2), "warmup recovered offsets")
 
 	p2, _ := makePayload("after")
-	require.NoError(t, hot2.IngestLedgerEvents(3, []events.Payload{p2}))
-	assert.Equal(t, uint32(2), hot2.NextEventID())
+	require.NoError(t, ingestLedgerEvents(hot2, 3, []events.Payload{p2}))
+	assert.Equal(t, uint32(2), mustEventCount(t, hot2))
 }
 
 func TestHotStore_SatisfiesReader(t *testing.T) {
@@ -542,7 +556,7 @@ func TestHotStore_ConcurrentIngestAndLookup(t *testing.T) {
 	go func() {
 		defer wg.Done()
 		for i := range uint32(N) {
-			if err := h.store.IngestLedgerEvents(2+i, []events.Payload{p}); err != nil {
+			if err := ingestLedgerEvents(h.store, 2+i, []events.Payload{p}); err != nil {
 				t.Errorf("ingest %d: %v", i, err)
 				return
 			}
@@ -560,7 +574,7 @@ func TestHotStore_ConcurrentIngestAndLookup(t *testing.T) {
 		}
 	}()
 	wg.Wait()
-	assert.Equal(t, uint32(N), h.store.NextEventID())
+	assert.Equal(t, uint32(N), mustEventCount(t, h.store))
 }
 
 // fetchRangePayloads fully drains FetchRange into a slice for tests
@@ -608,7 +622,7 @@ func TestHotStore_FetchRangeMidRange(t *testing.T) {
 		p, _ := makePayload(fmt.Sprintf("evt-%d", i))
 		payloads[i] = p
 	}
-	require.NoError(t, h.store.IngestLedgerEvents(first, payloads))
+	require.NoError(t, ingestLedgerEvents(h.store, first, payloads))
 
 	got, err := fetchRangePayloads(t, h.store, 1, 3)
 	require.NoError(t, err)
@@ -630,7 +644,7 @@ func TestHotStore_FetchRangeOutOfBoundsErrors(t *testing.T) {
 	const chunkID = chunk.ID(0)
 	h := openHotStoreForTest(t, chunkID)
 	p, _ := makePayload("only")
-	require.NoError(t, h.store.IngestLedgerEvents(chunkID.FirstLedger(), []events.Payload{p}))
+	require.NoError(t, ingestLedgerEvents(h.store, chunkID.FirstLedger(), []events.Payload{p}))
 
 	_, err := fetchRangePayloads(t, h.store, 0, 2) // count > EventCount
 	require.Error(t, err)
@@ -641,7 +655,7 @@ func TestHotStore_FetchRangeOutOfBoundsErrors(t *testing.T) {
 func TestHotStore_FetchRangePostCloseYieldsErrClosed(t *testing.T) {
 	const chunkID = chunk.ID(0)
 	h := openHotStoreForTest(t, chunkID)
-	require.NoError(t, h.store.Close())
+	require.NoError(t, h.raw.Close())
 
 	require.ErrorIs(t, firstIterError(h.store.FetchRange(context.Background(), 0, 1)), ErrClosed)
 }
@@ -655,7 +669,7 @@ func TestHotStore_AllMatchesFetchRange(t *testing.T) {
 		p, _ := makePayload(fmt.Sprintf("e%d", i))
 		payloads[i] = p
 	}
-	require.NoError(t, h.store.IngestLedgerEvents(first, payloads))
+	require.NoError(t, ingestLedgerEvents(h.store, first, payloads))
 
 	allSyms := make([]string, 0, len(payloads))
 	for p, err := range h.store.All(context.Background()) {
@@ -689,3 +703,24 @@ func mustOffsets(t *testing.T, r Reader) *events.LedgerOffsets {
 	require.NotNil(t, o)
 	return o
 }
+
+// ingestLedgerEvents commits one ledger's events through IngestLedgerToBatch in
+// a test-owned batch and runs the post-commit apply hook — the production
+// write shape, reduced to a test seeding call.
+func ingestLedgerEvents(h *HotStore, ledgerSeq uint32, payloads []events.Payload) error {
+	if h.chunkStore.IsClosed() {
+		return ErrClosed
+	}
+	var apply func()
+	if err := h.chunkStore.Batch(func(b *rocksdb.BatchWriter) error {
+		a, aerr := h.IngestLedgerToBatch(b, ledgerSeq, payloads)
+		apply = a
+		return aerr
+	}); err != nil {
+		return err
+	}
+	if apply != nil {
+		apply()
+	}
+	return nil
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_warmup_test.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_warmup_test.go
index bae6da25a..5dd71349e 100644
--- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_warmup_test.go
+++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_warmup_test.go
@@ -13,11 +13,11 @@ import (
 )
 
 // These tests exercise the (unexported) warmup() function indirectly
-// through OpenHotStore, which is the only production caller. They
+// through NewWithStore over an explicitly opened RocksDB store. They
 // document the "fresh chunk → empty caches", "ingested chunk →
 // reconstructed caches" contract.
 
-func TestWarmup_FreshChunkProducesEmptyMirrorsViaOpenHotStore(t *testing.T) {
+func TestWarmup_FreshChunkProducesEmptyMirrorsViaNewWithStore(t *testing.T) {
 	const chunkID = chunk.ID(0)
 	h := openHotStoreForTest(t, chunkID)
 
@@ -37,11 +37,10 @@ func TestWarmup_RebuildsMirrorFromIngestedRows(t *testing.T) {
 	const chunkID = chunk.ID(0)
 	dir := t.TempDir()
 
-	hot1, err := OpenHotStore(dir, chunkID, silentLogger())
-	require.NoError(t, err)
+	hot1, raw1 := openHotStoreForTestAt(t, dir, chunkID)
 	p1, _ := makePayload("alpha")
 	p2, _ := makePayload("beta")
-	require.NoError(t, hot1.IngestLedgerEvents(2, []events.Payload{p1, p2}))
+	require.NoError(t, ingestLedgerEvents(hot1, 2, []events.Payload{p1, p2}))
 
 	// Snapshot the mirror state before close. Snapshot returns a
 	// uniquely-owned Bitmaps the test can iterate freely.
@@ -49,12 +48,10 @@ func TestWarmup_RebuildsMirrorFromIngestedRows(t *testing.T) {
 	for term, bm := range hot1.mirror.Snapshot() {
 		expected[term] = bm.GetCardinality()
 	}
-	require.NoError(t, hot1.Close())
+	require.NoError(t, raw1.Close())
 
 	// Reopen — warmup replays events_index into a fresh mirror.
-	hot2, err := OpenHotStore(dir, chunkID, silentLogger())
-	require.NoError(t, err)
-	t.Cleanup(func() { _ = hot2.Close() })
+	hot2, _ := openHotStoreForTestAt(t, dir, chunkID)
 
 	got := make(map[events.TermKey]uint64)
 	for term, bm := range hot2.mirror.Snapshot() {
@@ -67,17 +64,14 @@ func TestWarmup_RestoresEventIDsForRepeatedTerm(t *testing.T) {
 	const chunkID = chunk.ID(0)
 	dir := t.TempDir()
 
-	hot1, err := OpenHotStore(dir, chunkID, silentLogger())
-	require.NoError(t, err)
+	hot1, raw1 := openHotStoreForTestAt(t, dir, chunkID)
 	p1, _ := makePayload("shared")
 	p2, _ := makePayload("shared")
 	p3, _ := makePayload("shared")
-	require.NoError(t, hot1.IngestLedgerEvents(2, []events.Payload{p1, p2, p3}))
-	require.NoError(t, hot1.Close())
+	require.NoError(t, ingestLedgerEvents(hot1, 2, []events.Payload{p1, p2, p3}))
+	require.NoError(t, raw1.Close())
 
-	hot2, err := OpenHotStore(dir, chunkID, silentLogger())
-	require.NoError(t, err)
-	t.Cleanup(func() { _ = hot2.Close() })
+	hot2, _ := openHotStoreForTestAt(t, dir, chunkID)
 
 	contractTermKey := events.ComputeTermKey(eventOf(p1).ContractId[:], events.FieldContractID)
 	bm, err := hot2.Lookup(context.Background(), contractTermKey)
@@ -93,18 +87,15 @@ func TestWarmup_OffsetsReconstructedAcrossLedgers(t *testing.T) {
 	const chunkID = chunk.ID(0)
 	dir := t.TempDir()
 
-	hot1, err := OpenHotStore(dir, chunkID, silentLogger())
-	require.NoError(t, err)
+	hot1, raw1 := openHotStoreForTestAt(t, dir, chunkID)
 	p1, _ := makePayload("a")
 	p2, _ := makePayload("b")
-	require.NoError(t, hot1.IngestLedgerEvents(2, []events.Payload{p1, p2}))
+	require.NoError(t, ingestLedgerEvents(hot1, 2, []events.Payload{p1, p2}))
 	p3, _ := makePayload("c")
-	require.NoError(t, hot1.IngestLedgerEvents(3, []events.Payload{p3}))
-	require.NoError(t, hot1.Close())
+	require.NoError(t, ingestLedgerEvents(hot1, 3, []events.Payload{p3}))
+	require.NoError(t, raw1.Close())
 
-	hot2, err := OpenHotStore(dir, chunkID, silentLogger())
-	require.NoError(t, err)
-	t.Cleanup(func() { _ = hot2.Close() })
+	hot2, _ := openHotStoreForTestAt(t, dir, chunkID)
 
 	assert.Equal(t, uint32(3), mustEventCount(t, hot2))
 
@@ -127,8 +118,7 @@ func TestWarmup_OffsetsReconstructedAcrossLedgers(t *testing.T) {
 //nolint:unparam // chunkID kept as a param for call-site clarity; today every caller uses 0
 func corruptHotChunk(t *testing.T, dir string, chunkID chunk.ID, mutate func(raw *rocksdb.Store)) {
 	t.Helper()
-	raw, err := openHotChunk(dir, chunkID, silentLogger())
-	require.NoError(t, err)
+	raw := openRawHotChunkForTest(t, dir, chunkID)
 	defer func() { require.NoError(t, raw.Close()) }() // release LOCK even if mutate fails
 	mutate(raw)
 }
@@ -137,12 +127,11 @@ func TestWarmup_RejectsDataEventBeyondOffsets(t *testing.T) {
 	const chunkID = chunk.ID(0)
 	dir := t.TempDir()
 
-	hot1, err := OpenHotStore(dir, chunkID, silentLogger())
-	require.NoError(t, err)
+	hot1, raw1 := openHotStoreForTestAt(t, dir, chunkID)
 	p1, _ := makePayload("a")
 	p2, _ := makePayload("b")
-	require.NoError(t, hot1.IngestLedgerEvents(2, []events.Payload{p1, p2})) // total = 2
-	require.NoError(t, hot1.Close())
+	require.NoError(t, ingestLedgerEvents(hot1, 2, []events.Payload{p1, p2})) // total = 2
+	require.NoError(t, raw1.Close())
 
 	// An orphan data row well beyond total (id 7, total = 2): proves the
 	// check catches any id >= total, not just one past the boundary.
@@ -150,7 +139,7 @@ func TestWarmup_RejectsDataEventBeyondOffsets(t *testing.T) {
 		require.NoError(t, raw.Put(DataCF, encodeDataKey(7), []byte("orphan")))
 	})
 
-	_, err = OpenHotStore(dir, chunkID, silentLogger())
+	_, _, err := tryOpenHotStoreForTest(t, dir, chunkID)
 	// Branch-specific substring: every corruption shares "corrupt chunk",
 	// so assert the data-orphan message to prove this branch fired.
 	require.ErrorContains(t, err, "data present at id >= committed count")
@@ -160,13 +149,12 @@ func TestWarmup_RejectsOffsetsGap(t *testing.T) {
 	const chunkID = chunk.ID(0)
 	dir := t.TempDir()
 
-	hot1, err := OpenHotStore(dir, chunkID, silentLogger())
-	require.NoError(t, err)
+	hot1, raw1 := openHotStoreForTestAt(t, dir, chunkID)
 	for _, seq := range []uint32{2, 3, 4} {
 		p, _ := makePayload("x")
-		require.NoError(t, hot1.IngestLedgerEvents(seq, []events.Payload{p}))
+		require.NoError(t, ingestLedgerEvents(hot1, seq, []events.Payload{p}))
 	}
-	require.NoError(t, hot1.Close())
+	require.NoError(t, raw1.Close())
 
 	// Drop ledger 3's offset row: warmup then iterates 2, 4 and must
 	// reject the gap. This is the sequence check that moved out of
@@ -175,7 +163,7 @@ func TestWarmup_RejectsOffsetsGap(t *testing.T) {
 		require.NoError(t, raw.Delete(OffsetsCF, encodeOffsetKey(3)))
 	})
 
-	_, err = OpenHotStore(dir, chunkID, silentLogger())
+	_, _, err := tryOpenHotStoreForTest(t, dir, chunkID)
 	require.ErrorContains(t, err, "expected ledger 3, got 4")
 }
 
@@ -183,13 +171,12 @@ func TestWarmup_RejectsOffsetsOverflow(t *testing.T) {
 	const chunkID = chunk.ID(0)
 	dir := t.TempDir()
 
-	hot1, err := OpenHotStore(dir, chunkID, silentLogger())
-	require.NoError(t, err)
+	hot1, raw1 := openHotStoreForTestAt(t, dir, chunkID)
 	for _, seq := range []uint32{2, 3} {
 		p, _ := makePayload("x")
-		require.NoError(t, hot1.IngestLedgerEvents(seq, []events.Payload{p}))
+		require.NoError(t, ingestLedgerEvents(hot1, seq, []events.Payload{p}))
 	}
-	require.NoError(t, hot1.Close())
+	require.NoError(t, raw1.Close())
 
 	// Overwrite the offset rows with counts that sum past uint32: warmup
 	// must reject the cumulative overflow rather than silently wrapping.
@@ -198,7 +185,7 @@ func TestWarmup_RejectsOffsetsOverflow(t *testing.T) {
 		require.NoError(t, raw.Put(OffsetsCF, encodeOffsetKey(3), encodeLedgerEventCount(2_000_000_000)))
 	})
 
-	_, err = OpenHotStore(dir, chunkID, silentLogger())
+	_, _, err := tryOpenHotStoreForTest(t, dir, chunkID)
 	require.ErrorContains(t, err, "cumulative event count overflow")
 }
 
@@ -206,9 +193,8 @@ func TestWarmup_RejectsOrphanInEmptyChunk(t *testing.T) {
 	const chunkID = chunk.ID(0)
 	dir := t.TempDir()
 
-	hot1, err := OpenHotStore(dir, chunkID, silentLogger())
-	require.NoError(t, err)
-	require.NoError(t, hot1.Close()) // total = 0, nothing committed
+	_, raw1 := openHotStoreForTestAt(t, dir, chunkID)
+	require.NoError(t, raw1.Close()) // total = 0, nothing committed
 
 	// A data row in a chunk that committed nothing: total == 0, so the
 	// tail Get is skipped and the orphan scan must fire from id 0.
@@ -216,7 +202,7 @@ func TestWarmup_RejectsOrphanInEmptyChunk(t *testing.T) {
 		require.NoError(t, raw.Put(DataCF, encodeDataKey(0), []byte("orphan")))
 	})
 
-	_, err = OpenHotStore(dir, chunkID, silentLogger())
+	_, _, err := tryOpenHotStoreForTest(t, dir, chunkID)
 	require.ErrorContains(t, err, "data present at id >= committed count 0")
 }
 
@@ -224,12 +210,11 @@ func TestWarmup_RejectsMissingTailDataEvent(t *testing.T) {
 	const chunkID = chunk.ID(0)
 	dir := t.TempDir()
 
-	hot1, err := OpenHotStore(dir, chunkID, silentLogger())
-	require.NoError(t, err)
+	hot1, raw1 := openHotStoreForTestAt(t, dir, chunkID)
 	p1, _ := makePayload("a")
 	p2, _ := makePayload("b")
-	require.NoError(t, hot1.IngestLedgerEvents(2, []events.Payload{p1, p2})) // total = 2
-	require.NoError(t, hot1.Close())
+	require.NoError(t, ingestLedgerEvents(hot1, 2, []events.Payload{p1, p2})) // total = 2
+	require.NoError(t, raw1.Close())
 
 	// Drop the last data row (event id total-1 == 1) while offsets still
 	// count 2.
@@ -237,7 +222,7 @@ func TestWarmup_RejectsMissingTailDataEvent(t *testing.T) {
 		require.NoError(t, raw.Delete(DataCF, encodeDataKey(1)))
 	})
 
-	_, err = OpenHotStore(dir, chunkID, silentLogger())
+	_, _, err := tryOpenHotStoreForTest(t, dir, chunkID)
 	require.ErrorContains(t, err, "missing from data")
 }
 
@@ -245,12 +230,11 @@ func TestWarmup_RejectsIndexBeyondCommitted(t *testing.T) {
 	const chunkID = chunk.ID(0)
 	dir := t.TempDir()
 
-	hot1, err := OpenHotStore(dir, chunkID, silentLogger())
-	require.NoError(t, err)
+	hot1, raw1 := openHotStoreForTestAt(t, dir, chunkID)
 	p1, _ := makePayload("a")
 	p2, _ := makePayload("b")
-	require.NoError(t, hot1.IngestLedgerEvents(2, []events.Payload{p1, p2})) // total = 2
-	require.NoError(t, hot1.Close())
+	require.NoError(t, ingestLedgerEvents(hot1, 2, []events.Payload{p1, p2})) // total = 2
+	require.NoError(t, raw1.Close())
 
 	// An index row at exactly total (id 2): the tightest "beyond
 	// committed" case, pinning the > (not >=) bound — valid ids are 0..1.
@@ -260,7 +244,7 @@ func TestWarmup_RejectsIndexBeyondCommitted(t *testing.T) {
 		require.NoError(t, raw.Put(IndexCF, encodeIndexKey(term, 2), nil))
 	})
 
-	_, err = OpenHotStore(dir, chunkID, silentLogger())
+	_, _, err := tryOpenHotStoreForTest(t, dir, chunkID)
 	require.ErrorContains(t, err, "index references event 2 but only 2 committed")
 }
 
@@ -268,16 +252,13 @@ func TestWarmup_OffsetsHandleEmptyTrailingLedger(t *testing.T) {
 	const chunkID = chunk.ID(0)
 	dir := t.TempDir()
 
-	hot1, err := OpenHotStore(dir, chunkID, silentLogger())
-	require.NoError(t, err)
+	hot1, raw1 := openHotStoreForTestAt(t, dir, chunkID)
 	p, _ := makePayload("only")
-	require.NoError(t, hot1.IngestLedgerEvents(2, []events.Payload{p}))
-	require.NoError(t, hot1.IngestLedgerEvents(3, nil))
-	require.NoError(t, hot1.Close())
+	require.NoError(t, ingestLedgerEvents(hot1, 2, []events.Payload{p}))
+	require.NoError(t, ingestLedgerEvents(hot1, 3, nil))
+	require.NoError(t, raw1.Close())
 
-	hot2, err := OpenHotStore(dir, chunkID, silentLogger())
-	require.NoError(t, err)
-	t.Cleanup(func() { _ = hot2.Close() })
+	hot2, _ := openHotStoreForTestAt(t, dir, chunkID)
 
 	assert.Equal(t, uint32(1), mustEventCount(t, hot2))
 	assert.Equal(t, 2, mustOffsets(t, hot2).LedgerCount())
diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/query_test.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/query_test.go
index 88b48c48a..d3d7fd08d 100644
--- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/query_test.go
+++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/query_test.go
@@ -104,7 +104,7 @@ func newQueryFixture(t *testing.T) *queryFixture {
 	require.NoError(t, err)
 
 	first := chunkID.FirstLedger()
-	require.NoError(t, fx.store.IngestLedgerEvents(first, []events.Payload{
+	require.NoError(t, ingestLedgerEvents(fx.store, first, []events.Payload{
 		payloadFor(t, fx.contractA, "evt-a-ab", fx.t0a, fx.t0b),
 		payloadFor(t, fx.contractA, "evt-a-ac", fx.t0a, fx.t0c),
 		payloadFor(t, fx.contractB, "evt-b-ab", fx.t0a, fx.t0b),
@@ -354,7 +354,7 @@ func TestQuery_ManyFiltersAtCallerCap(t *testing.T) {
 		contracts[i][0] = byte(i + 1)
 		payloads[i] = payloadFor(t, contracts[i], fmt.Sprintf("evt-%02d", i))
 	}
-	require.NoError(t, h.store.IngestLedgerEvents(first, payloads))
+	require.NoError(t, ingestLedgerEvents(h.store, first, payloads))
 
 	filters := make([]Filter, n)
 	for i := range n {
@@ -377,7 +377,7 @@ func newMultiLedgerQueryFixture(t *testing.T) *queryFixture {
 	t.Helper()
 	fx := newQueryFixture(t)
 	first := chunk.ID(0).FirstLedger()
-	require.NoError(t, fx.store.IngestLedgerEvents(first+1, []events.Payload{
+	require.NoError(t, ingestLedgerEvents(fx.store, first+1, []events.Payload{
 		payloadFor(t, fx.contractA, "evt-extra-0", fx.t0a),
 		payloadFor(t, fx.contractA, "evt-extra-1", fx.t0a),
 	}))
@@ -544,7 +544,7 @@ func TestQuery_PostFilterRejectsTermHashCollision(t *testing.T) {
 	// ConcurrentBitmaps.AddTo is the writer-side API the ingest path uses
 	// to register (term, eventID) pairs. No concurrent ingest is running
 	// in this test, so the single-writer contract is satisfied.
-	fx.store.Index().AddTo(gammaKey, 4)
+	fx.store.index().AddTo(gammaKey, 4)
 
 	after, err := fx.store.Lookup(context.Background(), gammaKey)
 	require.NoError(t, err)
@@ -609,7 +609,7 @@ func TestQuery_ChunkWithLedgersButZeroEvents(t *testing.T) {
 
 	// Ingest three empty ledgers — recorded in offsets, no events.
 	for i := range uint32(3) {
-		require.NoError(t, h.store.IngestLedgerEvents(first+i, nil))
+		require.NoError(t, ingestLedgerEvents(h.store, first+i, nil))
 	}
 	require.Equal(t, uint32(0), mustEventCount(t, h.store))
 
@@ -704,8 +704,8 @@ func TestQuery_EmptyLeadingLedgerRangeStaysEmpty(t *testing.T) {
 	// real events. After ingest the chunk's offsets read:
 	//   [first]   → [0, 0)   (empty)
 	//   [first+1] → [0, 5)   (5 events)
-	require.NoError(t, h.store.IngestLedgerEvents(first, nil))
-	require.NoError(t, h.store.IngestLedgerEvents(first+1, []events.Payload{
+	require.NoError(t, ingestLedgerEvents(h.store, first, nil))
+	require.NoError(t, ingestLedgerEvents(h.store, first+1, []events.Payload{
 		makeSimplePayload(t, "evt-0"),
 		makeSimplePayload(t, "evt-1"),
 		makeSimplePayload(t, "evt-2"),
@@ -804,7 +804,7 @@ func makeSimplePayload(t *testing.T, dataSymbol string) events.Payload {
 // Walks the hot store one ledger at a time using its Offsets snapshot
 // (which tracks the ingest-time ledger sequence) rather than reading
 // LedgerSequence off each Payload — the test fixture's payloadFor
-// builder doesn't set Payload.LedgerSequence, and HotStore.IngestLedgerEvents
+// builder doesn't set Payload.LedgerSequence, and IngestLedgerToBatch
 // stores them verbatim, so the per-event field is the zero value and
 // can't be used to recover ledger boundaries.
 //
diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/reader.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/reader.go
index 77f306d4c..41b5ad63e 100644
--- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/reader.go
+++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/reader.go
@@ -83,8 +83,8 @@ type Reader interface {
 	//
 	// Implementations:
 	//   - HotStore allocates a fresh Snapshot from the live
-	//     ConcurrentLedgerOffsets per call. Concurrent
-	//     IngestLedgerEvents may extend the underlying state after
+	//     ConcurrentLedgerOffsets per call. A concurrent
+	//     IngestLedgerToBatch may extend the underlying state after
 	//     Offsets returns, but the returned snapshot reflects what
 	//     was visible at call time. Callers (Query) take the
 	//     snapshot once at entry and pass it through their helpers.
@@ -183,12 +183,6 @@ type Reader interface {
 	// Each events.Payload carries its LedgerSequence, so consumers can
 	// track ledger boundaries without separate signaling.
 	All(ctx context.Context) iter.Seq2[events.Payload, error]
-
-	// Close releases any resources the Reader holds. Idempotent.
-	// After Close, Lookup / FetchEvents / FetchRange / All return
-	// ErrClosed. Metadata accessors (ChunkID, EventCount, Offsets)
-	// survive Close — see each impl's docstring for details.
-	Close() error
 }
 
 // validateSortedEventIDs returns a wrapped ErrUnsortedEventIDs if
diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk/hotchunk.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk/hotchunk.go
new file mode 100644
index 000000000..477b31ecf
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk/hotchunk.go
@@ -0,0 +1,388 @@
+// Package hotchunk implements decision (a): the per-chunk hot tier is ONE
+// RocksDB holding the union of every hot data type's CFs (ledger + 3 events + 1
+// txhash), and each ledger commits as ONE atomic synced WriteBatch
+// across ALL of them — so a ledger is fully present or fully absent, with a
+// SINGLE per-chunk last-committed ledger (max committed seq, from the ledgers CF's last key)
+// and no per-store frontiers / min-of-three. The three typed facades
+// (ledger/txhash/eventstore HotStore) are composed over the shared store via
+// NewWithStore; their write paths queue Puts into the one shared batch.
+package hotchunk
+
+import (
+	"context"
+	"fmt"
+	"iter"
+	"slices"
+	"time"
+
+	sdkingest "github.com/stellar/go-stellar-sdk/ingest"
+	"github.com/stellar/go-stellar-sdk/ingest/ledgerbackend"
+	supportlog "github.com/stellar/go-stellar-sdk/support/log"
+	"github.com/stellar/go-stellar-sdk/xdr"
+
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/events"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash"
+)
+
+// DB is one chunk's hot tier: a single multi-CF rocksdb.Store plus the three
+// typed facades composed over it. It owns the store (Close closes it once); the
+// facades wrap it without owning it.
+//
+// Concurrency: ingestion is single-writer; IngestLedger is not safe to call
+// concurrently with itself. Reads via the facades follow each facade's own
+// contract and are safe alongside the single writer.
+type DB struct {
+	store   *rocksdb.Store
+	chunkID chunk.ID
+
+	ledger *ledger.HotStore
+	txhash *txhash.HotStore
+	events *eventstore.HotStore
+}
+
+// ColumnFamilies is the full CF list for the shared per-chunk DB (ledger + 3
+// events + 1 txhash), assembled from each facade's CFNames() — one idiom, so
+// callers (including tests) never hand-stitch the union. Names are non-colliding
+// across the facades.
+func ColumnFamilies() []string {
+	return slices.Concat(ledger.CFNames(), eventstore.CFNames(), txhash.CFNames())
+}
+
+// config builds the shared store's rocksdb.Config: events' per-CF options (ZSTD
+// on DataCF, tuned block sizes) plus the txhash workload's Tuning. Tuning's
+// per-CF fields apply to every CF — a benign over-application (ledger/events CFs
+// just gain a bloom + larger write buffer); the per-CF overrides keep events
+// distinct.
+func config(path string, logger *supportlog.Entry, readOnly, mustExist bool) rocksdb.Config {
+	return rocksdb.Config{
+		Path:           path,
+		ColumnFamilies: ColumnFamilies(),
+		Logger:         logger,
+		Tuning:         txhash.Tuning(),
+		PerCFOptions:   eventstore.CFOptions(),
+		ReadOnly:       readOnly,
+		MustExist:      mustExist,
+	}
+}
+
+// Open opens (or creates) the chunk's shared multi-CF hot DB read-WRITE
+// (ingestion's handle for a NEW chunk) and composes the three facades over it. On
+// any facade-construction failure the shared store is closed before returning.
+func Open(path string, chunkID chunk.ID, logger *supportlog.Entry) (*DB, error) {
+	return open(path, chunkID, logger, false, false)
+}
+
+// OpenExisting opens an EXISTING hot DB read-WRITE with create-if-missing OFF —
+// ingestion's handle for a chunk whose "ready" key promises the DB already exists.
+// A missing or gutted DB fails the open instead of silently fabricating a fresh
+// empty one (the "never auto-heal" rule); the caller treats that failure as an
+// ordinary restartable error.
+func OpenExisting(path string, chunkID chunk.ID, logger *supportlog.Entry) (*DB, error) {
+	return open(path, chunkID, logger, false, true)
+}
+
+// OpenReadOnly opens an EXISTING hot DB read-only — the freeze source's view AND
+// the startup watermark refiner's. RocksDB's read-only open replays the
+// synced-but-unflushed WAL into in-memory memtables (persisting nothing), so a
+// reader sees every synced write even after an ungraceful crash — the watermark
+// refinement DEPENDS on that replay to read a correct MaxCommittedSeq. (An
+// unsynced tail is exactly what a crash loses, and is not recovered.) Composing
+// the facades only reads.
+func OpenReadOnly(path string, chunkID chunk.ID, logger *supportlog.Entry) (*DB, error) {
+	return open(path, chunkID, logger, true, false)
+}
+
+func open(path string, chunkID chunk.ID, logger *supportlog.Entry, readOnly, mustExist bool) (*DB, error) {
+	if path == "" {
+		return nil, stores.ErrInvalidConfig
+	}
+	if logger == nil {
+		return nil, stores.ErrInvalidConfig
+	}
+	store, err := rocksdb.New(config(path, logger, readOnly, mustExist))
+	if err != nil {
+		return nil, fmt.Errorf("open chunk %s: %w", chunkID, err)
+	}
+
+	es, err := eventstore.NewWithStore(store, chunkID)
+	if err != nil {
+		_ = store.Close()
+		return nil, fmt.Errorf("compose events facade for chunk %s: %w", chunkID, err)
+	}
+	return &DB{
+		store:   store,
+		chunkID: chunkID,
+		ledger:  ledger.NewWithStore(store),
+		txhash:  txhash.NewWithStore(store),
+		events:  es,
+	}, nil
+}
+
+// ChunkID returns the chunk this DB is bound to.
+func (d *DB) ChunkID() chunk.ID { return d.chunkID }
+
+// Ledgers returns the ledger read/write facade over the shared store.
+func (d *DB) Ledgers() *ledger.HotStore { return d.ledger }
+
+// Txhash returns the txhash read/write facade over the shared store.
+// Write side feeds the ingestion loop; the read side has no production
+// caller yet — it's the intended hot read seam for the v2 cutover (#772),
+// exercised by tests until then.
+func (d *DB) Txhash() *txhash.HotStore { return d.txhash }
+
+// Events returns the events read/write facade over the shared store.
+// Same status as Txhash: writes feed ingestion, reads are the #772 seam.
+func (d *DB) Events() *eventstore.HotStore { return d.events }
+
+// Source streams the chunk's LCMs from the ledgers CF as a ledgerbackend.LedgerStream
+// the cold writer (backfill's WriteColdChunk) drains, so a just-closed chunk freezes
+// straight from its hot DB without a refetch. The freeze opens the DB read-only.
+func (d *DB) Source() ledgerbackend.LedgerStream {
+	return &hotLedgerStream{store: d.ledger}
+}
+
+// Close releases the shared store exactly once. Idempotent. Must not be called
+// concurrently with in-flight reads/writes.
+func (d *DB) Close() error { return d.store.Close() }
+
+// MaxCommittedSeq returns the single authoritative per-chunk last-committed ledger: the
+// highest seq durably committed, from the ledgers CF's last key. Under decision
+// (a) this one value pins EVERY CF's frontier. ok=false on an empty DB.
+func (d *DB) MaxCommittedSeq() (uint32, bool, error) {
+	return d.ledger.LastSeq()
+}
+
+// Phase enumerates the ordered phases of one IngestLedger call. It is a typed
+// index into a fixed-size array (LedgerReport.Phases), so an out-of-table phase is
+// unrepresentable — no string label to mistype and no map lookup to nil-panic in a
+// sink. The phases partition the per-ledger wall-clock:
+//   - PhaseExtract: the shared ExtractLedgerEvents walk + txhash-entry build +
+//     event shaping (all pre-batch — every decode failure lands here by construction);
+//   - PhaseLedgers/PhaseTxhash/PhaseEvents: each facade's queue-into-batch step;
+//   - PhaseCommit: the RocksDB batch write (WAL append + fsync + memtable) = the
+//     whole Batch call minus the three queue steps — the fsync wait pprof can't see.
+type Phase uint8
+
+const (
+	PhaseExtract Phase = iota
+	PhaseLedgers
+	PhaseTxhash
+	PhaseEvents
+	PhaseCommit
+	// NumPhases is the array size; it is not itself a phase.
+	NumPhases
+)
+
+// String is the metric label for a phase.
+func (p Phase) String() string {
+	switch p {
+	case PhaseExtract:
+		return "extract"
+	case PhaseLedgers:
+		return "ledgers"
+	case PhaseTxhash:
+		return "txhash"
+	case PhaseEvents:
+		return "events"
+	case PhaseCommit:
+		return "commit"
+	default:
+		return "unknown"
+	}
+}
+
+// PhaseSample is one phase's wall-clock and item count (Items is 0 where a phase
+// handles no per-type volume — extract and commit).
+type PhaseSample struct {
+	Dur   time.Duration
+	Items int
+}
+
+// LedgerReport is the single result of IngestLedger: the per-phase samples, plus
+// the phase that failed when the call returns a non-nil error. Phases that never
+// ran (after a failure) keep their zero sample; the caller emits phases up to and
+// including Failed on error, and all phases on success.
+type LedgerReport struct {
+	Phases [NumPhases]PhaseSample
+	// Failed is meaningful only when IngestLedger returns a non-nil error.
+	Failed Phase
+}
+
+// IngestLedger commits ONE ledger as a SINGLE atomic synced WriteBatch across all
+// hot CFs (decision (a)): queue ledgers, txhash, and events rows into one
+// BatchWriter, commit once, and only then apply the events in-memory mirror/offsets
+// update.
+//
+// lcm is a borrowed zero-copy view; every extractor copies what it retains, so
+// the view need not outlive this call. Store.Batch's lifecycle RLock + checkOpen
+// is the authoritative closed-store guard, so there is no separate pre-check here.
+func (d *DB) IngestLedger(seq uint32, lcm xdr.LedgerCloseMetaView) (LedgerReport, error) {
+	var rep LedgerReport
+
+	// Pre-extract anything that can fail BEFORE opening the batch, so a decode
+	// error rejects the ledger without a half-built batch.
+	//
+	// ONE TxProcessing walk feeds BOTH hot data types: ExtractLedgerEvents yields,
+	// per transaction in apply order, the tx hash AND its contract events. txhash
+	// reads each element's Hash and events shapes the same slice
+	// (PayloadsFromLedgerEvents), so the two share one walk instead of the two
+	// (ExtractTxHashes + LCMViewToPayloads-internal ExtractLedgerEvents) they used
+	// to each run — halving per-ledger extraction. Shaping the already-extracted
+	// slice (not re-walking) keeps the event-ID assignment order identical to
+	// LCMViewToPayloads. The atomic batch below serializes only the commit; the
+	// extractors are independent and could run concurrently into the same batch if
+	// catch-up profiling ever demands it — sequential is right at live cadence.
+	// Every failure below stamps the failed phase's PARTIAL duration before
+	// returning — a phase that blocked and then failed is signal (mirrors
+	// RunBackfill's "reported even on failure"), so the error is never emitted with
+	// a zero-duration sample.
+	extractStart := time.Now()
+	txEvents, err := sdkingest.ExtractLedgerEvents(lcm)
+	if err != nil {
+		rep.Phases[PhaseExtract].Dur = time.Since(extractStart)
+		rep.Failed = PhaseExtract
+		return rep, fmt.Errorf("extract ledger events seq %d: %w", seq, err)
+	}
+	txEntries := make([]txhash.Entry, len(txEvents))
+	for i := range txEvents {
+		txEntries[i] = txhash.Entry{Hash: txEvents[i].Hash, LedgerSeq: seq}
+	}
+
+	closedAt, err := lcm.LedgerCloseTime()
+	if err != nil {
+		rep.Phases[PhaseExtract].Dur = time.Since(extractStart)
+		rep.Failed = PhaseExtract
+		return rep, fmt.Errorf("ledger close time seq %d: %w", seq, err)
+	}
+	// A pre-Soroban ledger yields zero payloads, no error.
+	payloads, err := events.PayloadsFromLedgerEvents(txEvents, seq, closedAt)
+	if err != nil {
+		rep.Phases[PhaseExtract].Dur = time.Since(extractStart)
+		rep.Failed = PhaseExtract
+		return rep, fmt.Errorf("shape events seq %d: %w", seq, err)
+	}
+	rep.Phases[PhaseExtract].Dur = time.Since(extractStart)
+	// Per-type write volume lives on the write phases (emitted on success).
+	rep.Phases[PhaseLedgers].Items = 1
+	rep.Phases[PhaseTxhash].Items = len(txEntries)
+	rep.Phases[PhaseEvents].Items = len(payloads)
+
+	// The events facade validates + marshals inside the batch callback (so a
+	// rejected ledger never leaves committed rows) and returns the post-commit
+	// apply hook. Under decision (a) resume is always MaxCommittedSeq+1, so seq is
+	// never a duplicate — the hook is always non-nil on success. Each facade's queue
+	// step is timed individually; Commit (below) is the whole Batch minus those —
+	// the RocksDB write (WAL append + fsync + memtable).
+	var applyEvents func()
+	// A batch error not attributed to a specific queue step below is the commit
+	// itself (the RocksDB write); a queue-step error narrows Failed to its phase.
+	failed := PhaseCommit
+	batchStart := time.Now()
+	cerr := d.store.Batch(func(b *rocksdb.BatchWriter) error {
+		ls := time.Now()
+		if err := d.ledger.AddLedgerToBatch(b, ledger.Entry{Seq: seq, Bytes: []byte(lcm)}); err != nil {
+			rep.Phases[PhaseLedgers].Dur = time.Since(ls)
+			failed = PhaseLedgers
+			return fmt.Errorf("queue ledger seq %d: %w", seq, err)
+		}
+		rep.Phases[PhaseLedgers].Dur = time.Since(ls)
+
+		ts := time.Now()
+		if len(txEntries) > 0 {
+			if err := d.txhash.AddEntriesToBatch(b, txEntries); err != nil {
+				rep.Phases[PhaseTxhash].Dur = time.Since(ts)
+				failed = PhaseTxhash
+				return fmt.Errorf("queue tx hashes seq %d: %w", seq, err)
+			}
+		}
+		rep.Phases[PhaseTxhash].Dur = time.Since(ts)
+
+		es := time.Now()
+		apply, err := d.events.IngestLedgerToBatch(b, seq, payloads)
+		if err != nil {
+			rep.Phases[PhaseEvents].Dur = time.Since(es)
+			failed = PhaseEvents
+			return fmt.Errorf("queue events seq %d: %w", seq, err)
+		}
+		rep.Phases[PhaseEvents].Dur = time.Since(es)
+		applyEvents = apply
+		return nil
+	})
+	// Commit is the whole Batch call minus the three queue steps: the RocksDB write
+	// (WAL append + fsync + memtable). Stamp it whether the batch succeeded or the
+	// commit itself failed (all queue steps ran) — a slow-then-failed commit is
+	// signal. A queue-step failure already stamped its own partial above.
+	if failed == PhaseCommit {
+		rep.Phases[PhaseCommit].Dur = time.Since(batchStart) -
+			rep.Phases[PhaseLedgers].Dur - rep.Phases[PhaseTxhash].Dur - rep.Phases[PhaseEvents].Dur
+	}
+	if cerr != nil {
+		rep.Failed = failed
+		return rep, fmt.Errorf("commit ledger %d to chunk %s: %w", seq, d.chunkID, cerr)
+	}
+
+	// Batch is durable — now and only now apply the events mirror/offsets update.
+	applyEvents()
+	return rep, nil
+}
+
+// hotLedgerStream is a ledgerbackend.LedgerStream over a ledger.HotStore, so the
+// source-blind cold pipeline freezes a just-closed chunk from its hot DB.
+type hotLedgerStream struct {
+	store *ledger.HotStore
+}
+
+var _ ledgerbackend.LedgerStream = (*hotLedgerStream)(nil)
+
+// RawLedgers yields the range's wire bytes from the hot store. IterateLedgers
+// yields BORROWED buffers (valid only to the next step); the drain loop consumes
+// each fully before the next yield, so the borrow is safe. ctx cancellation is
+// observed between ledgers (the LedgerStream contract drain relies on).
+//
+// It enforces the LedgerStream in-order contract at the source (so the shared
+// cursor could be deleted): the hot store is the SOLE writer of recent history, so
+// a gap in its keyspace is a real defect, caught here by a key-derived seq check
+// (no XDR parse). An unbounded range self-bounds at the store's committed frontier
+// (LastSeq), mirroring packStream, so callers can pass UnboundedRange(from).
+func (st *hotLedgerStream) RawLedgers(
+	ctx context.Context, r ledgerbackend.Range, _ ...ledgerbackend.StreamOption,
+) iter.Seq2[[]byte, error] {
+	return func(yield func([]byte, error) bool) {
+		to := r.To()
+		if !r.Bounded() {
+			maxSeq, ok, err := st.store.LastSeq()
+			if err != nil {
+				yield(nil, fmt.Errorf("hotLedgerStream: read committed frontier: %w", err))
+				return
+			}
+			if !ok {
+				return // empty store: nothing to yield
+			}
+			to = maxSeq
+		}
+		expected := r.From()
+		for e, ierr := range st.store.IterateLedgers(r.From(), to) {
+			if cerr := ctx.Err(); cerr != nil {
+				yield(nil, cerr)
+				return
+			}
+			if ierr != nil {
+				yield(nil, ierr)
+				return
+			}
+			if e.Seq != expected {
+				yield(nil, fmt.Errorf("hotLedgerStream: gap at seq %d, expected %d", e.Seq, expected))
+				return
+			}
+			if !yield(e.Bytes, nil) {
+				return
+			}
+			expected++
+		}
+	}
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk/hotchunk_test.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk/hotchunk_test.go
new file mode 100644
index 000000000..7485979e8
--- /dev/null
+++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk/hotchunk_test.go
@@ -0,0 +1,582 @@
+package hotchunk
+
+import (
+	"context"
+	"testing"
+
+	"github.com/sirupsen/logrus"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/stellar/go-stellar-sdk/ingest/ledgerbackend"
+	"github.com/stellar/go-stellar-sdk/keypair"
+	"github.com/stellar/go-stellar-sdk/network"
+	supportlog "github.com/stellar/go-stellar-sdk/support/log"
+	"github.com/stellar/go-stellar-sdk/xdr"
+
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/events"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash"
+)
+
+const testPassphrase = "Public Global Stellar Network ; September 2015"
+
+func silentLogger() *supportlog.Entry {
+	log := supportlog.New()
+	log.SetLevel(logrus.ErrorLevel)
+	return log
+}
+
+// openTestDB opens a fresh hot DB bound to chunk 0 (every test uses chunk 0).
+func openTestDB(t *testing.T) *DB {
+	t.Helper()
+	db, err := Open(t.TempDir(), chunk.ID(0), silentLogger())
+	require.NoError(t, err)
+	t.Cleanup(func() { _ = db.Close() })
+	return db
+}
+
+// assertWriteItems checks the per-type write volume the report carries on the
+// write phases (the item counts that used to be LedgerCounts). Every fixture
+// commits exactly one ledger with one event, so only the txhash count (one per
+// applied tx) varies across callers.
+func assertWriteItems(t *testing.T, rep LedgerReport, txhash int) {
+	t.Helper()
+	assert.Equal(t, 1, rep.Phases[PhaseLedgers].Items, "ledgers items")
+	assert.Equal(t, txhash, rep.Phases[PhaseTxhash].Items, "txhash items")
+	assert.Equal(t, 1, rep.Phases[PhaseEvents].Items, "events items")
+}
+
+func TestOpen_ValidatesInputs(t *testing.T) {
+	_, err := Open("", chunk.ID(0), silentLogger())
+	require.ErrorIs(t, err, stores.ErrInvalidConfig)
+
+	_, err = Open(t.TempDir(), chunk.ID(0), nil)
+	require.ErrorIs(t, err, stores.ErrInvalidConfig)
+}
+
+func TestColumnFamilies_UnionIsNonColliding(t *testing.T) {
+	cfs := ColumnFamilies()
+	// 1 ledger CF + 3 events CFs + 1 txhash CF = 5.
+	require.Len(t, cfs, len(ledger.CFNames())+len(eventstore.CFNames())+len(txhash.CFNames()))
+	seen := map[string]bool{}
+	for _, cf := range cfs {
+		require.False(t, seen[cf], "CF name %q collides across facades", cf)
+		seen[cf] = true
+	}
+	require.Contains(t, seen, ledger.LedgersCF)
+	for _, cf := range eventstore.CFNames() {
+		require.Contains(t, seen, cf)
+	}
+	for _, cf := range txhash.CFNames() {
+		require.Contains(t, seen, cf)
+	}
+}
+
+// TestIngestLedger_AllCFsAdvanceTogether is the core decision-(a) happy path:
+// one IngestLedger call writes the ledger, its tx hash, and its event into the
+// ONE shared DB, and the single watermark reaches exactly the committed seq —
+// every CF readable, every CF in lockstep.
+func TestIngestLedger_AllCFsAdvanceTogether(t *testing.T) {
+	chunkID := chunk.ID(0)
+	first := chunkID.FirstLedger()
+	db := openTestDB(t)
+
+	// Empty DB: no watermark.
+	_, ok, err := db.MaxCommittedSeq()
+	require.NoError(t, err)
+	require.False(t, ok)
+
+	rawA, hashA, termA := lcmWithEvent(t, first)
+	rawB, hashB, _ := lcmWithEvent(t, first+1)
+
+	repA, err := db.IngestLedger(first, xdr.LedgerCloseMetaView(rawA))
+	require.NoError(t, err)
+	assertWriteItems(t, repA, 1)
+
+	repB, err := db.IngestLedger(first+1, xdr.LedgerCloseMetaView(rawB))
+	require.NoError(t, err)
+	assertWriteItems(t, repB, 1)
+
+	// ledgers CF.
+	gotA, err := db.Ledgers().GetLedgerRaw(first)
+	require.NoError(t, err)
+	assert.Equal(t, rawA, gotA)
+	// txhash CFs.
+	seqA, err := db.Txhash().Get(hashA)
+	require.NoError(t, err)
+	assert.Equal(t, first, seqA)
+	seqB, err := db.Txhash().Get(hashB)
+	require.NoError(t, err)
+	assert.Equal(t, first+1, seqB)
+	// events CFs.
+	bm, err := db.Events().Lookup(context.Background(), termA)
+	require.NoError(t, err)
+	require.NotNil(t, bm)
+	assert.Equal(t, uint64(2), bm.GetCardinality(), "both ledgers share the event term")
+	assert.Equal(t, uint32(2), eventCount(t, db.Events()))
+
+	// The single authoritative watermark equals the last committed seq.
+	maxSeq, ok, err := db.MaxCommittedSeq()
+	require.NoError(t, err)
+	require.True(t, ok)
+	assert.Equal(t, first+1, maxSeq)
+}
+
+// TestIngestLedger_RejectedLedgerPersistsNothingAcrossAnyCF is the atomicity
+// guarantee for decision (a): a ledger the events facade rejects (here an
+// out-of-range seq) must leave EVERY CF untouched — the ledgers and txhash CFs
+// included — because the whole ledger is one batch and the events facade's
+// validation aborts that batch before commit. The single watermark must not
+// advance.
+func TestIngestLedger_RejectedLedgerPersistsNothingAcrossAnyCF(t *testing.T) {
+	chunkID := chunk.ID(0)
+	db := openTestDB(t)
+
+	// A ledger seq ABOVE the chunk's range: the events facade rejects it
+	// (ErrLedgerOutOfRange) from inside the batch callback, aborting the write.
+	badSeq := chunkID.LastLedger() + 1
+	raw, hash, term := lcmWithEvent(t, badSeq)
+
+	_, err := db.IngestLedger(badSeq, xdr.LedgerCloseMetaView(raw))
+	require.Error(t, err)
+	require.ErrorIs(t, err, eventstore.ErrLedgerOutOfRange)
+
+	// NOTHING persisted, across every CF:
+	// ledgers CF — no row at badSeq.
+	_, gerr := db.Ledgers().GetLedgerRaw(badSeq)
+	require.ErrorIs(t, gerr, stores.ErrNotFound)
+	// txhash CFs — the hash is absent.
+	_, gerr = db.Txhash().Get(hash)
+	require.ErrorIs(t, gerr, stores.ErrNotFound)
+	// events CFs — no term indexed, no event committed.
+	_, lerr := db.Events().Lookup(context.Background(), term)
+	require.ErrorIs(t, lerr, eventstore.ErrTermNotFound)
+	assert.Equal(t, uint32(0), eventCount(t, db.Events()))
+
+	// The single watermark is still empty — nothing committed.
+	_, ok, err := db.MaxCommittedSeq()
+	require.NoError(t, err)
+	require.False(t, ok, "a rejected ledger must not advance the watermark")
+}
+
+// TestIngestLedger_MidBatchCommitFailurePersistsNothing simulates a mid-batch
+// COMMIT failure (the store closed under the writer) and asserts the partial
+// batch persisted nothing across any CF after reopen — the single synced
+// WriteBatch is all-or-nothing.
+func TestIngestLedger_MidBatchCommitFailurePersistsNothing(t *testing.T) {
+	chunkID := chunk.ID(0)
+	first := chunkID.FirstLedger()
+	dir := t.TempDir()
+
+	db, err := Open(dir, chunkID, silentLogger())
+	require.NoError(t, err)
+
+	// Commit one good ledger so there is a known watermark, then close the DB.
+	rawGood, hashGood, _ := lcmWithEvent(t, first)
+	_, err = db.IngestLedger(first, xdr.LedgerCloseMetaView(rawGood))
+	require.NoError(t, err)
+	require.NoError(t, db.Close())
+
+	// Reopen and confirm the watermark survived (sync=true durability).
+	db2, err := Open(dir, chunkID, silentLogger())
+	require.NoError(t, err)
+	t.Cleanup(func() { _ = db2.Close() })
+
+	maxSeq, ok, err := db2.MaxCommittedSeq()
+	require.NoError(t, err)
+	require.True(t, ok)
+	require.Equal(t, first, maxSeq, "the committed ledger is durable across reopen")
+
+	// Now close the DB and attempt to ingest the NEXT ledger into the closed
+	// store: the commit fails, and nothing for that ledger persists anywhere.
+	require.NoError(t, db2.Close())
+	rawNext, hashNext, _ := lcmWithEvent(t, first+1)
+	_, err = db2.IngestLedger(first+1, xdr.LedgerCloseMetaView(rawNext))
+	require.Error(t, err)
+
+	// Reopen a third time: the failed ledger left NO trace in any CF, and the
+	// watermark is still the last good seq.
+	db3, err := Open(dir, chunkID, silentLogger())
+	require.NoError(t, err)
+	t.Cleanup(func() { _ = db3.Close() })
+
+	maxSeq, ok, err = db3.MaxCommittedSeq()
+	require.NoError(t, err)
+	require.True(t, ok)
+	assert.Equal(t, first, maxSeq, "the failed ledger did not advance the watermark")
+
+	// The events CF advanced for exactly the one good ledger — the failed
+	// ledger's event was not committed (warmup reconstructed the offsets from
+	// disk, which hold only the good ledger).
+	assert.Equal(t, uint32(1), eventCount(t, db3.Events()),
+		"the failed ledger's event must not be committed to the events CFs")
+
+	// The good ledger's data is intact; the failed ledger's is wholly absent
+	// across the ledgers and txhash CFs.
+	_, gerr := db3.Ledgers().GetLedgerRaw(first + 1)
+	require.ErrorIs(t, gerr, stores.ErrNotFound)
+	_, gerr = db3.Txhash().Get(hashNext)
+	require.ErrorIs(t, gerr, stores.ErrNotFound)
+
+	gotGood, err := db3.Ledgers().GetLedgerRaw(first)
+	require.NoError(t, err)
+	assert.Equal(t, rawGood, gotGood)
+	_, err = db3.Txhash().Get(hashGood)
+	require.NoError(t, err)
+}
+
+// TestSharedBatch_DirectRocksAbortAcrossCFs is the lower-level atomicity proof:
+// queue Puts into DIFFERENT CFs of the shared store, then return an error from
+// the batch callback — RocksDB applies NONE of them. Pins the property the
+// IngestLedger path relies on (intra-store cross-CF atomicity of one
+// WriteBatch).
+func TestSharedBatch_DirectRocksAbortAcrossCFs(t *testing.T) {
+	db := openTestDB(t)
+
+	var hash [32]byte
+	hash[0] = 0xa0
+	sentinelErr := assert.AnError
+
+	err := storeOf(db).Batch(func(b *rocksdb.BatchWriter) error {
+		b.Put(ledger.LedgersCF, rocksdb.EncodeUint32(2), []byte("ledger-row"))
+		b.Put(txhash.CFNames()[0], hash[:], rocksdb.EncodeUint32(2))
+		b.Put(eventstore.DataCF, []byte{0, 0, 0, 0}, []byte("event-row"))
+		return sentinelErr // abort: nothing should commit
+	})
+	require.ErrorIs(t, err, sentinelErr)
+
+	// None of the three CFs received the aborted writes.
+	_, gerr := db.Ledgers().GetLedgerRaw(2)
+	require.ErrorIs(t, gerr, stores.ErrNotFound)
+	_, gerr = db.Txhash().Get(hash)
+	require.ErrorIs(t, gerr, stores.ErrNotFound)
+	_, ok, derr := db.MaxCommittedSeq()
+	require.NoError(t, derr)
+	require.False(t, ok)
+}
+
+// storeOf exposes the shared store for the direct-batch atomicity test (same
+// package, so no production accessor is needed).
+func storeOf(db *DB) *rocksdb.Store { return db.store }
+
+// TestSource_SelfBoundsUnboundedRange confirms the freeze source (hotLedgerStream)
+// yields the store's committed ledgers in order and self-bounds an UNBOUNDED range
+// at the committed frontier (mirroring packStream), so drain can pass
+// UnboundedRange(from) rather than a pre-computed bound.
+func TestSource_SelfBoundsUnboundedRange(t *testing.T) {
+	db := openTestDB(t)
+	first := chunk.ID(0).FirstLedger()
+	for i := range uint32(3) {
+		_, err := db.IngestLedger(first+i, xdr.LedgerCloseMetaView(zeroTxLCM(t, first+i)))
+		require.NoError(t, err)
+	}
+
+	var got []uint32
+	for raw, err := range db.Source().RawLedgers(context.Background(), ledgerbackend.UnboundedRange(first)) {
+		require.NoError(t, err)
+		seq, serr := xdr.LedgerCloseMetaView(raw).LedgerSequence()
+		require.NoError(t, serr)
+		got = append(got, seq)
+	}
+	require.Equal(t, []uint32{first, first + 1, first + 2}, got, "self-bounds at the frontier, in order")
+}
+
+// TestSource_RejectsGap pins the source-side in-order guard that replaced the
+// shared cursor: a gap in the hot store's keyspace (the sole writer of recent
+// history) is a real defect and must surface as an error, not a silent skip.
+func TestSource_RejectsGap(t *testing.T) {
+	db := openTestDB(t)
+	first := chunk.ID(0).FirstLedger()
+	// Seed the ledgers CF directly with a GAP (first, first+2), bypassing
+	// IngestLedger's contiguity so the source-level guard is what's exercised.
+	require.NoError(t, storeOf(db).Batch(func(b *rocksdb.BatchWriter) error {
+		for _, s := range []uint32{first, first + 2} {
+			if err := db.Ledgers().AddLedgerToBatch(b, ledger.Entry{Seq: s, Bytes: []byte("x")}); err != nil {
+				return err
+			}
+		}
+		return nil
+	}))
+
+	var lastErr error
+	for _, err := range db.Source().RawLedgers(context.Background(), ledgerbackend.BoundedRange(first, first+2)) {
+		if err != nil {
+			lastErr = err
+			break
+		}
+	}
+	require.Error(t, lastErr)
+	require.Contains(t, lastErr.Error(), "gap")
+}
+
+// TestIngestLedger_WritesEveryHotType confirms the hot tier always writes all
+// three hot data types; per-type disabling is not a supported hot DB mode.
+func TestIngestLedger_WritesEveryHotType(t *testing.T) {
+	chunkID := chunk.ID(0)
+	first := chunkID.FirstLedger()
+	db := openTestDB(t)
+
+	raw, hash, term := lcmWithEvent(t, first)
+	rep, err := db.IngestLedger(first, xdr.LedgerCloseMetaView(raw))
+	require.NoError(t, err)
+	assertWriteItems(t, rep, 1)
+
+	got, err := db.Ledgers().GetLedgerRaw(first)
+	require.NoError(t, err)
+	assert.Equal(t, raw, got)
+
+	seq, err := db.Txhash().Get(hash)
+	require.NoError(t, err)
+	assert.Equal(t, first, seq)
+	bm, err := db.Events().Lookup(context.Background(), term)
+	require.NoError(t, err)
+	require.NotNil(t, bm)
+	assert.Equal(t, uint64(1), bm.GetCardinality())
+}
+
+// TestIngestLedger_EventlessTxStillIndexesHash pins the post-merge txhash
+// completeness invariant: after #18 folded the txhash and events walks into one
+// ExtractLedgerEvents pass, txhash coverage rests entirely on that walk yielding
+// an element per APPLIED tx — hash included — even for an event-less transaction
+// (the common classic-only case). Every other hotchunk test uses one-tx-one-event
+// ledgers, so nothing else pins it: an SDK change that dropped event-less txs from
+// the walk would silently gut the txhash index for every classic-only transaction.
+func TestIngestLedger_EventlessTxStillIndexesHash(t *testing.T) {
+	chunkID := chunk.ID(0)
+	first := chunkID.FirstLedger()
+	db := openTestDB(t)
+
+	// Two applied txs in one ledger: one carries a contract event, one carries none.
+	eventful := xdr.TransactionMeta{V: 4, V4: &xdr.TransactionMetaV4{
+		Operations: []xdr.OperationMetaV2{{Events: []xdr.ContractEvent{buildContractEvent("eventful")}}},
+	}}
+	eventless := xdr.TransactionMeta{V: 4, V4: &xdr.TransactionMetaV4{
+		Operations: []xdr.OperationMetaV2{{}}, // one op, no events
+	}}
+	lcm, hashes := buildLCM(t, first, []xdr.TransactionMeta{eventful, eventless})
+	require.Len(t, hashes, 2)
+	raw, err := lcm.MarshalBinary()
+	require.NoError(t, err)
+
+	rep, err := db.IngestLedger(first, xdr.LedgerCloseMetaView(raw))
+	require.NoError(t, err)
+	assertWriteItems(t, rep, 2) // both hashes indexed (event-less included); one event
+
+	// Both hashes resolve in the txhash CF to this ledger.
+	for _, h := range hashes {
+		seq, gerr := db.Txhash().Get(h)
+		require.NoError(t, gerr, "event-less tx hash must still be indexed")
+		assert.Equal(t, first, seq)
+	}
+	// The events CF holds exactly the one eventful tx's event.
+	assert.Equal(t, uint32(1), eventCount(t, db.Events()))
+}
+
+// TestReopen_RecoversEventsMirror confirms the events facade's warmup runs over
+// the shared store on reopen (the mirror/offsets are reconstructed from the
+// events CFs), so a reopened DB assigns event IDs continuing from disk.
+func TestReopen_RecoversEventsMirror(t *testing.T) {
+	chunkID := chunk.ID(0)
+	first := chunkID.FirstLedger()
+	dir := t.TempDir()
+
+	db, err := Open(dir, chunkID, silentLogger())
+	require.NoError(t, err)
+	raw, _, _ := lcmWithEvent(t, first)
+	_, err = db.IngestLedger(first, xdr.LedgerCloseMetaView(raw))
+	require.NoError(t, err)
+	require.NoError(t, db.Close())
+
+	db2, err := Open(dir, chunkID, silentLogger())
+	require.NoError(t, err)
+	t.Cleanup(func() { _ = db2.Close() })
+	assert.Equal(t, uint32(1), eventCount(t, db2.Events()), "warmup recovered the events offsets")
+}
+
+// TestOpenReadOnly_ReadsCommittedAndRejectsWrites pins the freeze source's
+// read-only handle: it sees data a writer committed and cleanly closed (so the
+// completeness gate is exact), and any write through it fails — a freeze can
+// never mutate the hot DB it reads.
+func TestOpenReadOnly_ReadsCommittedAndRejectsWrites(t *testing.T) {
+	chunkID := chunk.ID(0)
+	first := chunkID.FirstLedger()
+	dir := t.TempDir()
+
+	// Writer: ingest two ledgers, then close (flushes the WAL into SST).
+	db, err := Open(dir, chunkID, silentLogger())
+	require.NoError(t, err)
+	for _, seq := range []uint32{first, first + 1} {
+		_, ierr := db.IngestLedger(seq, xdr.LedgerCloseMetaView(zeroTxLCM(t, seq)))
+		require.NoError(t, ierr)
+	}
+	require.NoError(t, db.Close())
+
+	// Reader: a read-only open sees the committed watermark; Close must not flush.
+	ro, err := OpenReadOnly(dir, chunkID, silentLogger())
+	require.NoError(t, err)
+	t.Cleanup(func() { require.NoError(t, ro.Close()) })
+
+	seq, ok, err := ro.MaxCommittedSeq()
+	require.NoError(t, err)
+	require.True(t, ok)
+	assert.Equal(t, first+1, seq, "read-only handle sees the committed data")
+
+	// A write through the read-only handle must fail — the freeze never mutates.
+	_, err = ro.IngestLedger(first+2, xdr.LedgerCloseMetaView(zeroTxLCM(t, first+2)))
+	require.Error(t, err, "read-only DB must reject writes")
+}
+
+// TestIngestLedger_ClosedDBFails confirms a closed shared DB rejects ingest. The
+// closed-store guard is Store.Batch's authoritative lifecycle RLock + checkOpen
+// (the per-facade pre-checks were dropped in #30), so the surfaced sentinel is
+// rocksdb.ErrStoreClosed.
+func TestIngestLedger_ClosedDBFails(t *testing.T) {
+	chunkID := chunk.ID(0)
+	db, err := Open(t.TempDir(), chunkID, silentLogger())
+	require.NoError(t, err)
+	require.NoError(t, db.Close())
+
+	raw := zeroTxLCM(t, chunkID.FirstLedger())
+	_, err = db.IngestLedger(chunkID.FirstLedger(), xdr.LedgerCloseMetaView(raw))
+	require.ErrorIs(t, err, rocksdb.ErrStoreClosed)
+}
+
+// ──────────────────────────── LCM fixtures ────────────────────────────
+
+// lcmWithEvent builds a V2 LCM with one transaction carrying one contract event
+// (topic="hotchunk_test"). Returns the wire bytes, the tx hash, and the event's
+// term key.
+func lcmWithEvent(t *testing.T, seq uint32) ([]byte, [32]byte, events.TermKey) {
+	t.Helper()
+	ev := buildContractEvent("hotchunk_test")
+	meta := xdr.TransactionMeta{
+		V:  4,
+		V4: &xdr.TransactionMetaV4{Operations: []xdr.OperationMetaV2{{Events: []xdr.ContractEvent{ev}}}},
+	}
+	lcm, hash := buildLCMWithTx(t, seq, meta)
+	raw, err := lcm.MarshalBinary()
+	require.NoError(t, err)
+
+	evBytes, err := ev.MarshalBinary()
+	require.NoError(t, err)
+	keys, err := events.TermsForBytes(evBytes)
+	require.NoError(t, err)
+	require.NotEmpty(t, keys)
+	return raw, hash, keys[0]
+}
+
+func zeroTxLCM(t *testing.T, seq uint32) []byte {
+	t.Helper()
+	lcm, _ := buildLCM(t, seq, nil)
+	raw, err := lcm.MarshalBinary()
+	require.NoError(t, err)
+	return raw
+}
+
+func buildContractEvent(topic string) xdr.ContractEvent {
+	var contractID xdr.ContractId
+	contractID[0] = 0xab
+	contractID[1] = 0xcd
+	sym := xdr.ScSymbol(topic)
+	return xdr.ContractEvent{
+		ContractId: &contractID,
+		Type:       xdr.ContractEventTypeContract,
+		Body: xdr.ContractEventBody{
+			V: 0,
+			V0: &xdr.ContractEventV0{
+				Topics: []xdr.ScVal{{Type: xdr.ScValTypeScvSymbol, Sym: &sym}},
+				Data:   xdr.ScVal{Type: xdr.ScValTypeScvSymbol, Sym: &sym},
+			},
+		},
+	}
+}
+
+func successResult() xdr.TransactionResult {
+	opResults := []xdr.OperationResult{}
+	return xdr.TransactionResult{
+		FeeCharged: 100,
+		Result: xdr.TransactionResultResult{
+			Code:    xdr.TransactionResultCodeTxSuccess,
+			Results: &opResults,
+		},
+	}
+}
+
+func buildLCMWithTx(t *testing.T, seq uint32, meta xdr.TransactionMeta) (xdr.LedgerCloseMeta, [32]byte) {
+	t.Helper()
+	lcm, hashes := buildLCM(t, seq, []xdr.TransactionMeta{meta})
+	require.Len(t, hashes, 1)
+	return lcm, hashes[0]
+}
+
+func buildLCM(t *testing.T, seq uint32, txMetas []xdr.TransactionMeta) (xdr.LedgerCloseMeta, [][32]byte) {
+	t.Helper()
+	phases := make([]xdr.TransactionPhase, 0, len(txMetas))
+	txProcessing := make([]xdr.TransactionResultMetaV1, 0, len(txMetas))
+	hashes := make([][32]byte, 0, len(txMetas))
+
+	for _, meta := range txMetas {
+		envelope := xdr.TransactionEnvelope{
+			Type: xdr.EnvelopeTypeEnvelopeTypeTx,
+			V1: &xdr.TransactionV1Envelope{
+				Tx: xdr.Transaction{
+					SourceAccount: xdr.MustMuxedAddress(keypair.MustRandom().Address()),
+					Ext: xdr.TransactionExt{
+						V:           1,
+						SorobanData: &xdr.SorobanTransactionData{},
+					},
+				},
+			},
+		}
+		hash, err := network.HashTransactionInEnvelope(envelope, testPassphrase)
+		require.NoError(t, err)
+		hashes = append(hashes, hash)
+
+		txProcessing = append(txProcessing, xdr.TransactionResultMetaV1{
+			TxApplyProcessing: meta,
+			Result: xdr.TransactionResultPair{
+				TransactionHash: hash,
+				Result:          successResult(),
+			},
+		})
+		comp := []xdr.TxSetComponent{{
+			Type: xdr.TxSetComponentTypeTxsetCompTxsMaybeDiscountedFee,
+			TxsMaybeDiscountedFee: &xdr.TxSetComponentTxsMaybeDiscountedFee{
+				Txs: []xdr.TransactionEnvelope{envelope},
+			},
+		}}
+		phases = append(phases, xdr.TransactionPhase{V: 0, V0Components: &comp})
+	}
+
+	lcm := xdr.LedgerCloseMeta{
+		V: 2,
+		V2: &xdr.LedgerCloseMetaV2{
+			LedgerHeader: xdr.LedgerHeaderHistoryEntry{
+				Header: xdr.LedgerHeader{
+					ScpValue:  xdr.StellarValue{CloseTime: xdr.TimePoint(0)},
+					LedgerSeq: xdr.Uint32(seq),
+				},
+			},
+			TxSet: xdr.GeneralizedTransactionSet{
+				V:       1,
+				V1TxSet: &xdr.TransactionSetV1{Phases: phases},
+			},
+			TxProcessing: txProcessing,
+		},
+	}
+	return lcm, hashes
+}
+
+// eventCount reads the hot events store's committed event count, failing the
+// test on the (close-only) error the Reader contract allows.
+func eventCount(t *testing.T, r interface{ EventCount() (uint32, error) }) uint32 {
+	t.Helper()
+	n, err := r.EventCount()
+	require.NoError(t, err)
+	return n
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger/hot_store.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger/hot_store.go
index 2ba7afd4f..b860a93cb 100644
--- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger/hot_store.go
+++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger/hot_store.go
@@ -9,144 +9,74 @@ import (
 	"iter"
 	"sync"
 
-	supportlog "github.com/stellar/go-stellar-sdk/support/log"
-
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/zstd"
 )
 
-// Entry — one (sequence, uncompressed ledger bytes) pair. Both
-// hot and cold stores compress on write and decompress on read,
-// so callers always pass and receive raw ledger bytes here.
+// LedgersCF is the column family the hot ledger data lives in. Registered the
+// shared per-chunk multi-CF DB (decision (a)).
+const LedgersCF = "ledgers"
+
+// CFNames returns the CFs this facade owns, so the hotchunk shared-DB opener
+// assembles the union the same way it does for txhash and eventstore (every
+// facade exports CFNames()).
+func CFNames() []string { return []string{LedgersCF} }
+
+// Entry — one (sequence, uncompressed ledger bytes) pair. Compression is
+// internal to the store, so callers pass and receive raw bytes here.
 type Entry struct {
 	Seq   uint32
 	Bytes []byte
 }
 
-// HotStore — RocksDB-backed hot ledger store. Default-CF only;
-// keys are 4-byte big-endian sequences; values are zstd-compressed
-// ledger bytes. Compression is internal: callers see raw bytes on
-// the boundary.
+// HotStore — RocksDB-backed hot ledger store. Keys are 4-byte BE sequences;
+// values are zstd-compressed (internal). It accumulates one chunk's ledgers
+// before freezing; it does not itself range-check writes (the driver's drain loop
+// already validates every sequence against the chunk).
 //
-// Like every hot store, a HotStore instance is chunk-bound: it
-// accumulates exactly one chunk's ledgers before being frozen into
-// the chunk's cold artifacts. The binding is recorded at open time
-// (ChunkID) so the ingest driver can reject a store bound to a
-// different chunk than it is ingesting; the store does not itself
-// range-check writes (the driver's drain loop already validates
-// every sequence against the chunk).
-//
-// Concurrency: all methods, including Close, are safe for concurrent
-// use. rocksdb.Store.Close CAS-marks the store closed and then drains
-// in-flight ops (each holds an RLock for its duration) before releasing
-// resources; a read/write racing Close either completes first or
-// observes the closed store and returns stores.ErrStoreClosed. Close is
-// idempotent. HotStore adds no unguarded state of its own — the
-// compressor pool and decompressor are both concurrent-safe.
+// Concurrency: all methods are safe for concurrent use, including use alongside
+// the caller-owned rocksdb.Store.Close. A read/write racing Close either completes
+// first or observes the closed store and returns stores.ErrStoreClosed. HotStore
+// adds no unguarded state of its own — the compressor pool and decompressor are
+// both concurrent-safe.
 type HotStore struct {
-	store   *rocksdb.Store
-	chunkID chunk.ID
-	dec     *zstd.Decompressor
-	// compPool — per-store pool of zstd.Compressors. Each
-	// concurrent AddLedgers borrows one for the duration of its
-	// Encode call; the pool's GC finalizer (set inside
-	// zstd.NewCompressor) frees the C context when the compressor
-	// is dropped between GC cycles.
+	store *rocksdb.Store
+	dec   *zstd.Decompressor
+	// compPool — per-store pool of zstd.Compressors; each concurrent
+	// AddLedgerToBatch borrows one for its Encode call.
 	compPool sync.Pool
 }
 
-// OpenHotStore validates inputs and returns an open HotStore bound
-// to chunkID (see the HotStore doc on chunk binding). path and
-// logger are both required; logger is forwarded to the
-// pkg/rocksdb wrapper (rocksdb writes the on-open state line and
-// the close-time Flush warning through it). HotStore itself does
-// not emit any logs — the cold store, by contrast, takes no
-// logger because packfile is silent. Rides on RocksDB defaults —
-// no explicit block cache (RocksDB's per-CF default plus OS page
-// cache cover range scans), no bloom filter (callers know in
-// advance which sequences this store holds, so it is never asked
-// for a key it doesn't have), no WAL cap (graceful Close flushes
-// the memtable; ungraceful WAL replay at this scale is sub-second).
-// Re-tune only with a workload measurement.
-func OpenHotStore(path string, chunkID chunk.ID, logger *supportlog.Entry) (*HotStore, error) {
-	if path == "" {
-		return nil, stores.ErrInvalidConfig
-	}
-	if logger == nil {
-		return nil, stores.ErrInvalidConfig
-	}
-	store, err := rocksdb.New(rocksdb.Config{
-		Path:   path,
-		Logger: logger,
-	})
-	if err != nil {
-		return nil, err
-	}
+// NewWithStore wraps an ALREADY-OPEN rocksdb.Store as a ledger HotStore on
+// LedgersCF. The store is owned by the caller — in production, hotchunk.DB
+// composes this facade over the shared multi-CF DB and closes that DB once. The
+// store must have LedgersCF registered.
+func NewWithStore(store *rocksdb.Store) *HotStore {
 	return &HotStore{
-		store:   store,
-		chunkID: chunkID,
-		dec:     zstd.NewDecompressor(),
+		store: store,
+		dec:   zstd.NewDecompressor(),
 		compPool: sync.Pool{
 			New: func() any { return zstd.NewCompressor() },
 		},
-	}, nil
+	}
 }
 
-// Close releases the underlying RocksDB store. Idempotent —
-// delegates to rocksdb.Store.Close. Must not be called concurrently
-// with in-flight reads/writes on this HotStore.
-func (h *HotStore) Close() error { return h.store.Close() }
-
-// ChunkID returns the chunk this store is bound to (constructor-supplied;
-// never reads the store).
-func (h *HotStore) ChunkID() chunk.ID { return h.chunkID }
-
-// AddLedgers writes (seq, raw-bytes) entries to rocksdb. Bytes is
-// the uncompressed ledger payload; AddLedgers compresses each
-// entry with zstd before write. Variadic so callers can pass
-// individual entries (h.AddLedgers(e)), a literal batch
-// (h.AddLedgers(e1, e2, e3)), or a slice (h.AddLedgers(entries...)).
-// Zero entries is a no-op; one entry uses Store.Put; multiple
-// entries use Store.Batch (one atomic write, one fsync — versus N
-// fsyncs for N Put calls).
-func (h *HotStore) AddLedgers(entries ...Entry) error {
-	if h.store.IsClosed() {
-		return stores.ErrStoreClosed
-	}
-	if len(entries) == 0 {
-		return nil
-	}
+// AddLedgerToBatch compresses one ledger and queues its Put into b on LedgersCF
+// — the building block hotchunk uses to fold the ledger write into the one
+// shared per-ledger WriteBatch (decision (a)). Does not commit (caller owns the
+// batch). Compresses into a fresh buffer BatchWriter.Put copies, so e.Bytes need
+// not outlive this call. The caller runs inside Store.Batch, whose lifecycle
+// RLock + checkOpen is the authoritative closed-store guard, so this adds none.
+func (h *HotStore) AddLedgerToBatch(b *rocksdb.BatchWriter, e Entry) error {
 	c, _ := h.compPool.Get().(*zstd.Compressor)
 	defer h.compPool.Put(c)
-
-	if len(entries) == 1 {
-		e := entries[0]
-		compressed, err := c.Encode(nil, e.Bytes)
-		if err != nil {
-			return err
-		}
-		return translateRocksErr(h.store.Put("", rocksdb.EncodeUint32(e.Seq), compressed))
-	}
-	// Multi-entry path: compress each into its own fresh slice so
-	// the batch can hold them all simultaneously (the compressor's
-	// internal buffer would otherwise be overwritten on the next
-	// Encode call).
-	compressed := make([][]byte, len(entries))
-	for i, e := range entries {
-		out, err := c.Encode(nil, e.Bytes)
-		if err != nil {
-			return err
-		}
-		compressed[i] = out
+	compressed, err := c.Encode(nil, e.Bytes)
+	if err != nil {
+		return err
 	}
-	return translateRocksErr(h.store.Batch(func(b *rocksdb.BatchWriter) error {
-		for i, e := range entries {
-			b.Put("", rocksdb.EncodeUint32(e.Seq), compressed[i])
-		}
-		return nil
-	}))
+	b.Put(LedgersCF, rocksdb.EncodeUint32(e.Seq), compressed)
+	return nil
 }
 
 // GetLedgerRaw decodes the ledger stored under seq into a fresh,
@@ -155,7 +85,7 @@ func (h *HotStore) AddLedgers(entries ...Entry) error {
 // should prefer IterateLedgers, which yields borrows without the
 // per-ledger decode allocation.
 func (h *HotStore) GetLedgerRaw(seq uint32) ([]byte, error) {
-	v, found, err := h.store.Get("", rocksdb.EncodeUint32(seq))
+	v, found, err := h.store.Get(LedgersCF, rocksdb.EncodeUint32(seq))
 	if err != nil {
 		return nil, translateRocksErr(err)
 	}
@@ -169,22 +99,12 @@ func (h *HotStore) GetLedgerRaw(seq uint32) ([]byte, error) {
 	return out, nil
 }
 
-// FirstSeq returns the lowest ledger sequence in the store, or ok=false
-// if the store is empty. Cheap (a single RocksDB boundary seek): lets a
-// caller learn the store's ledger range without an external chunk hint.
-func (h *HotStore) FirstSeq() (uint32, bool, error) { return h.edgeSeq(false) }
-
 // LastSeq returns the highest ledger sequence in the store, or ok=false
-// if the store is empty.
-func (h *HotStore) LastSeq() (uint32, bool, error) { return h.edgeSeq(true) }
-
-//nolint:funcorder // helper grouped with FirstSeq/LastSeq for readability
-func (h *HotStore) edgeSeq(last bool) (uint32, bool, error) {
-	edge := h.store.FirstKey
-	if last {
-		edge = h.store.LastKey
-	}
-	k, ok, err := edge("")
+// if the store is empty. This is the chunk's authoritative last-committed
+// ledger (hotchunk.DB.MaxCommittedSeq reads it). Cheap — a single RocksDB
+// boundary seek on the last key.
+func (h *HotStore) LastSeq() (uint32, bool, error) {
+	k, ok, err := h.store.LastKey(LedgersCF)
 	if err != nil {
 		return 0, false, translateRocksErr(err)
 	}
@@ -213,7 +133,7 @@ func (h *HotStore) IterateLedgers(start, end uint32) iter.Seq2[Entry, error] {
 		// it past the loop body. The read benches consume each ledger in-scope,
 		// so this avoids a per-ledger decode allocation.
 		var scratch []byte
-		for e, err := range h.store.IterateRange("", rocksdb.EncodeUint32(start), rocksdb.EncodeUint32(end)) {
+		for e, err := range h.store.IterateRange(LedgersCF, rocksdb.EncodeUint32(start), rocksdb.EncodeUint32(end)) {
 			if err != nil {
 				yield(Entry{}, translateRocksErr(err))
 				return
diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger/hot_store_test.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger/hot_store_test.go
index 4a7f89ecd..fc53cab30 100644
--- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger/hot_store_test.go
+++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger/hot_store_test.go
@@ -17,7 +17,7 @@ import (
 	supportlog "github.com/stellar/go-stellar-sdk/support/log"
 	"github.com/stellar/go-stellar-sdk/xdr"
 
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores"
 )
 
@@ -31,41 +31,20 @@ func silentLogger() *supportlog.Entry {
 
 func openTestHotStore(t *testing.T) *HotStore {
 	t.Helper()
-	h, err := OpenHotStore(t.TempDir(), chunk.ID(0), silentLogger())
-	require.NoError(t, err)
-	t.Cleanup(func() { _ = h.Close() })
+	h, _ := openTestHotStoreAt(t, t.TempDir())
 	return h
 }
 
-func TestOpenHotStore_ValidatesInputs(t *testing.T) {
-	_, err := OpenHotStore("", chunk.ID(0), silentLogger())
-	require.ErrorIs(t, err, stores.ErrInvalidConfig)
-
-	_, err = OpenHotStore(t.TempDir(), chunk.ID(0), nil)
-	require.ErrorIs(t, err, stores.ErrInvalidConfig)
-}
-
-func TestOpenHotStore_RecordsChunkBinding(t *testing.T) {
-	h, err := OpenHotStore(t.TempDir(), chunk.ID(7), silentLogger())
-	require.NoError(t, err)
-	t.Cleanup(func() { _ = h.Close() })
-	require.Equal(t, chunk.ID(7), h.ChunkID())
-}
-
-func TestOpenHotStore_CreatesMissingDirectory(t *testing.T) {
-	path := filepath.Join(t.TempDir(), "subdir-never-created")
-	h, err := OpenHotStore(path, chunk.ID(0), silentLogger())
+func openTestHotStoreAt(t *testing.T, path string) (*HotStore, *rocksdb.Store) {
+	t.Helper()
+	store, err := rocksdb.New(rocksdb.Config{
+		Path:           path,
+		ColumnFamilies: []string{LedgersCF},
+		Logger:         silentLogger(),
+	})
 	require.NoError(t, err)
-	require.NotNil(t, h)
-	t.Cleanup(func() { _ = h.Close() })
-}
-
-func TestHotStore_CloseIsIdempotent(t *testing.T) {
-	h, err := OpenHotStore(t.TempDir(), chunk.ID(0), silentLogger())
-	require.NoError(t, err)
-
-	require.NoError(t, h.Close())
-	require.NoError(t, h.Close())
+	t.Cleanup(func() { _ = store.Close() })
+	return NewWithStore(store), store
 }
 
 func TestHotStore_AddGetRoundTripVerbatim(t *testing.T) {
@@ -77,20 +56,20 @@ func TestHotStore_AddGetRoundTripVerbatim(t *testing.T) {
 
 	// Single-entry write.
 	payload := []byte("arbitrary opaque bytes the store has no opinion about")
-	require.NoError(t, h.AddLedgers(Entry{Seq: 42, Bytes: payload}))
+	require.NoError(t, addLedgers(h, Entry{Seq: 42, Bytes: payload}))
 	got, err := h.GetLedgerRaw(42)
 	require.NoError(t, err)
 	assert.Equal(t, payload, got)
 
 	// Overwrite.
 	updated := []byte("different bytes")
-	require.NoError(t, h.AddLedgers(Entry{Seq: 42, Bytes: updated}))
+	require.NoError(t, addLedgers(h, Entry{Seq: 42, Bytes: updated}))
 	got, err = h.GetLedgerRaw(42)
 	require.NoError(t, err)
 	assert.Equal(t, updated, got)
 
 	// Zero entries — no-op, no error.
-	require.NoError(t, h.AddLedgers())
+	require.NoError(t, addLedgers(h))
 }
 
 // TestHotStore_AddLedgersIdempotentRetry mirrors the events store's retry
@@ -103,46 +82,34 @@ func TestHotStore_AddLedgersIdempotentRetry(t *testing.T) {
 	h := openTestHotStore(t)
 	payload := []byte("ledger payload")
 
-	require.NoError(t, h.AddLedgers(Entry{Seq: 7, Bytes: payload}))
-	require.NoError(t, h.AddLedgers(Entry{Seq: 7, Bytes: payload})) // retry
+	require.NoError(t, addLedgers(h, Entry{Seq: 7, Bytes: payload}))
+	require.NoError(t, addLedgers(h, Entry{Seq: 7, Bytes: payload})) // retry
 
 	got, err := h.GetLedgerRaw(7)
 	require.NoError(t, err)
 	assert.Equal(t, payload, got)
 
 	// Still a single entry — the retry overwrote rather than appended.
-	first, ok, err := h.FirstSeq()
-	require.NoError(t, err)
-	require.True(t, ok)
-	assert.Equal(t, uint32(7), first)
 	last, ok, err := h.LastSeq()
 	require.NoError(t, err)
 	require.True(t, ok)
 	assert.Equal(t, uint32(7), last)
 }
 
-func TestHotStore_FirstLastSeq(t *testing.T) {
+func TestHotStore_LastSeq(t *testing.T) {
 	h := openTestHotStore(t)
 
 	// Empty store: ok=false, no error.
-	_, ok, err := h.FirstSeq()
-	require.NoError(t, err)
-	require.False(t, ok)
-	_, ok, err = h.LastSeq()
+	_, ok, err := h.LastSeq()
 	require.NoError(t, err)
 	require.False(t, ok)
 
-	// Insert seqs out of order; FirstSeq/LastSeq report the min/max present.
-	require.NoError(t, h.AddLedgers(
+	// Insert seqs out of order; LastSeq reports the max present.
+	require.NoError(t, addLedgers(h,
 		Entry{Seq: 105, Bytes: []byte("c")},
 		Entry{Seq: 100, Bytes: []byte("a")},
 		Entry{Seq: 103, Bytes: []byte("b")},
 	))
-	first, ok, err := h.FirstSeq()
-	require.NoError(t, err)
-	require.True(t, ok)
-	assert.Equal(t, uint32(100), first)
-
 	last, ok, err := h.LastSeq()
 	require.NoError(t, err)
 	require.True(t, ok)
@@ -157,7 +124,7 @@ func TestHotStore_AddLedgersMultipleEntries(t *testing.T) {
 		{Seq: 101, Bytes: []byte("ledger 101 payload")},
 		{Seq: 102, Bytes: []byte("ledger 102 payload")},
 	}
-	require.NoError(t, h.AddLedgers(entries...))
+	require.NoError(t, addLedgers(h, entries...))
 	for _, e := range entries {
 		got, err := h.GetLedgerRaw(e.Seq)
 		require.NoError(t, err)
@@ -168,7 +135,7 @@ func TestHotStore_AddLedgersMultipleEntries(t *testing.T) {
 func TestHotStore_IterateLedgers(t *testing.T) {
 	h := openTestHotStore(t)
 	for _, seq := range []uint32{10, 20, 30, 40, 50} {
-		require.NoError(t, h.AddLedgers(Entry{Seq: seq, Bytes: []byte("v")}))
+		require.NoError(t, addLedgers(h, Entry{Seq: seq, Bytes: []byte("v")}))
 	}
 
 	// Full window.
@@ -219,7 +186,7 @@ func TestHotStore_IterateLedgersVisibleGap(t *testing.T) {
 	h := openTestHotStore(t)
 	// Non-contiguous keyspace: missing 30.
 	for _, seq := range []uint32{10, 20, 40, 50} {
-		require.NoError(t, h.AddLedgers(Entry{Seq: seq, Bytes: []byte("v")}))
+		require.NoError(t, addLedgers(h, Entry{Seq: seq, Bytes: []byte("v")}))
 	}
 
 	var seen []uint32
@@ -239,14 +206,11 @@ func TestHotStore_GracefulCloseAndReopen(t *testing.T) {
 		{Seq: 15, Bytes: []byte("payload-15")},
 	}
 
-	first, err := OpenHotStore(path, chunk.ID(0), silentLogger())
-	require.NoError(t, err)
-	require.NoError(t, first.AddLedgers(seeded...))
-	require.NoError(t, first.Close())
+	first, firstStore := openTestHotStoreAt(t, path)
+	require.NoError(t, addLedgers(first, seeded...))
+	require.NoError(t, firstStore.Close())
 
-	second, err := OpenHotStore(path, chunk.ID(0), silentLogger())
-	require.NoError(t, err)
-	t.Cleanup(func() { _ = second.Close() })
+	second, _ := openTestHotStoreAt(t, path)
 
 	for _, want := range seeded {
 		got, err := second.GetLedgerRaw(want.Seq)
@@ -256,12 +220,11 @@ func TestHotStore_GracefulCloseAndReopen(t *testing.T) {
 }
 
 func TestHotStore_PostCloseOps(t *testing.T) {
-	h, err := OpenHotStore(t.TempDir(), chunk.ID(0), silentLogger())
-	require.NoError(t, err)
-	require.NoError(t, h.Close())
+	h, store := openTestHotStoreAt(t, t.TempDir())
+	require.NoError(t, store.Close())
 
-	require.ErrorIs(t, h.AddLedgers(Entry{Seq: 1, Bytes: []byte("v")}), stores.ErrStoreClosed)
-	_, err = h.GetLedgerRaw(1)
+	require.ErrorIs(t, addLedgers(h, Entry{Seq: 1, Bytes: []byte("v")}), stores.ErrStoreClosed)
+	_, err := h.GetLedgerRaw(1)
 	require.ErrorIs(t, err, stores.ErrStoreClosed)
 	var iterErr error
 	for _, e := range h.IterateLedgers(0, 100) {
@@ -269,7 +232,7 @@ func TestHotStore_PostCloseOps(t *testing.T) {
 	}
 	require.ErrorIs(t, iterErr, stores.ErrStoreClosed)
 
-	require.ErrorIs(t, h.AddLedgers(), stores.ErrStoreClosed)
+	require.ErrorIs(t, addLedgers(h), stores.ErrStoreClosed)
 
 	iterErr = nil
 	for _, e := range h.IterateLedgers(100, 50) {
@@ -279,9 +242,9 @@ func TestHotStore_PostCloseOps(t *testing.T) {
 }
 
 func TestHotStore_ConcurrentOpsAndCloseRaceFree(t *testing.T) {
-	h := openTestHotStore(t)
+	h, store := openTestHotStoreAt(t, t.TempDir())
 	for i := range uint32(50) {
-		require.NoError(t, h.AddLedgers(Entry{Seq: i, Bytes: []byte("v")}))
+		require.NoError(t, addLedgers(h, Entry{Seq: i, Bytes: []byte("v")}))
 	}
 
 	var wg sync.WaitGroup
@@ -290,7 +253,7 @@ func TestHotStore_ConcurrentOpsAndCloseRaceFree(t *testing.T) {
 	for w := range workers {
 		wg.Go(func() {
 			for i := uint32(0); !stop.Load(); i++ {
-				_ = h.AddLedgers(Entry{Seq: uint32(w)*1_000_000 + i, Bytes: []byte("v")})
+				_ = addLedgers(h, Entry{Seq: uint32(w)*1_000_000 + i, Bytes: []byte("v")})
 			}
 		})
 		wg.Go(func() {
@@ -310,11 +273,11 @@ func TestHotStore_ConcurrentOpsAndCloseRaceFree(t *testing.T) {
 	}
 
 	time.Sleep(50 * time.Millisecond)
-	require.NoError(t, h.Close())
+	require.NoError(t, store.Close())
 	stop.Store(true)
 	wg.Wait()
 
-	require.ErrorIs(t, h.AddLedgers(Entry{Seq: 1, Bytes: []byte("v")}), stores.ErrStoreClosed)
+	require.ErrorIs(t, addLedgers(h, Entry{Seq: 1, Bytes: []byte("v")}), stores.ErrStoreClosed)
 }
 
 // TestHotStore_AddLedgersEmptyBytes pins behavior on zero-length
@@ -322,7 +285,7 @@ func TestHotStore_ConcurrentOpsAndCloseRaceFree(t *testing.T) {
 // and read back as empty.
 func TestHotStore_AddLedgersEmptyBytes(t *testing.T) {
 	h := openTestHotStore(t)
-	require.NoError(t, h.AddLedgers(Entry{Seq: 1, Bytes: nil}))
+	require.NoError(t, addLedgers(h, Entry{Seq: 1, Bytes: nil}))
 	got, err := h.GetLedgerRaw(1)
 	require.NoError(t, err)
 	assert.Empty(t, got)
@@ -345,7 +308,7 @@ func TestHotToColdMigration(t *testing.T) {
 		b, err := lcm.MarshalBinary()
 		require.NoError(t, err)
 		raws[i] = b
-		require.NoError(t, hot.AddLedgers(Entry{Seq: firstSeq + uint32(i), Bytes: b}))
+		require.NoError(t, addLedgers(hot, Entry{Seq: firstSeq + uint32(i), Bytes: b}))
 	}
 
 	// Stream hot → cold. No re-encoding step on the caller side.
@@ -380,7 +343,7 @@ func TestHotStore_XDRRoundTrip(t *testing.T) {
 	require.NoError(t, err)
 
 	h := openTestHotStore(t)
-	require.NoError(t, h.AddLedgers(Entry{Seq: ledgerSeq, Bytes: raw}))
+	require.NoError(t, addLedgers(h, Entry{Seq: ledgerSeq, Bytes: raw}))
 
 	gotRaw, err := h.GetLedgerRaw(ledgerSeq)
 	require.NoError(t, err)
@@ -476,3 +439,16 @@ func makeRandomLedgerCloseMeta(
 	lcm.V1.LedgerHeader.Header.LedgerSeq = xdr.Uint32(ledgerSeq)
 	return lcm, hashes
 }
+
+// addLedgers commits entries through AddLedgerToBatch in one batch — the
+// production write shape, reduced to a test seeding call.
+func addLedgers(h *HotStore, entries ...Entry) error {
+	return translateRocksErr(h.store.Batch(func(b *rocksdb.BatchWriter) error {
+		for _, e := range entries {
+			if err := h.AddLedgerToBatch(b, e); err != nil {
+				return err
+			}
+		}
+		return nil
+	}))
+}
diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/hot_store.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/hot_store.go
index 18bfa4420..1c1ed81fe 100644
--- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/hot_store.go
+++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/hot_store.go
@@ -1,29 +1,16 @@
-// Package txhash holds the hot transaction-hash store (RocksDB-backed,
-// 16-CF nibble-routed) and its value types. A future cold reader
-// (RecSplit-backed) will live alongside the HotStore in this package.
+// Package txhash holds the hot transaction-hash store (RocksDB-backed, a single
+// txhash CF) and its value types. A future cold reader (RecSplit-backed) will
+// live alongside the HotStore in this package.
 package txhash
 
 import (
-	supportlog "github.com/stellar/go-stellar-sdk/support/log"
-
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores"
 )
 
-// 16 CFs — one per high-nibble bucket of byte 0 of the txhash.
-// Same routing the cold RecSplit index uses.
-const numCFs = 16
-
-// cfNameByNibble is the precomputed (cf-0..cf-f) table indexed by
-// hash[0]>>4. Single source of truth used by both cfNames (open-time
-// CF list) and cfNameForTxHash (hot path).
-//
-//nolint:gochecknoglobals
-var cfNameByNibble = [16]string{
-	"cf-0", "cf-1", "cf-2", "cf-3", "cf-4", "cf-5", "cf-6", "cf-7",
-	"cf-8", "cf-9", "cf-a", "cf-b", "cf-c", "cf-d", "cf-e", "cf-f",
-}
+// txhashCF is the single column family holding every (txhash → ledgerSeq)
+// entry for the chunk, per the design's hot-tier spec (one `txhash` CF).
+const txhashCF = "txhash"
 
 // Entry — one (txhash → ledgerSeq) mapping.
 type Entry struct {
@@ -31,65 +18,40 @@ type Entry struct {
 	LedgerSeq uint32
 }
 
-// HotStore — RocksDB-backed hot transaction-hash store. 16 CFs named
-// cf-0..cf-f; each hash routes to cf-{txhash[0]>>4}; ledgerSeq
-// encoded big-endian. Routing, CF names, and encoding are internal.
+// HotStore — RocksDB-backed hot transaction-hash store. A single txhash CF
+// holding the full 32-byte hash as key and the big-endian ledgerSeq as value.
+// The CF name and encoding are internal.
 //
 // Like every hot store, a HotStore instance is chunk-bound: it
 // accumulates exactly one chunk's (txhash → seq) tuples before being
-// frozen into the chunk's cold .bin artifact. The binding is recorded
-// at open time (ChunkID) so the ingest driver can reject a store
-// bound to a different chunk than it is ingesting; the store does not
-// itself range-check writes (the driver's drain loop already
-// validates every ledger sequence against the chunk).
+// frozen into the chunk's cold .bin artifact. The store does not itself
+// range-check writes (the driver's drain loop already validates every ledger
+// sequence against the chunk).
 type HotStore struct {
-	store   *rocksdb.Store
-	chunkID chunk.ID
-}
-
-// NewHotStore validates inputs and returns an open HotStore bound to
-// chunkID (see the HotStore doc on chunk binding).
-func NewHotStore(path string, chunkID chunk.ID, logger *supportlog.Entry) (*HotStore, error) {
-	if path == "" {
-		return nil, rocksdb.ErrInvalidConfig
-	}
-	if logger == nil {
-		return nil, rocksdb.ErrInvalidConfig
-	}
-	store, err := rocksdb.New(rocksdb.Config{
-		Path:           path,
-		ColumnFamilies: cfNames(),
-		Logger:         logger,
-		Tuning:         tuning(),
-	})
-	if err != nil {
-		return nil, err
-	}
-	return &HotStore{store: store, chunkID: chunkID}, nil
+	store *rocksdb.Store
 }
 
-func cfNames() []string {
-	out := make([]string, numCFs)
-	copy(out, cfNameByNibble[:])
-	return out
+// NewWithStore wraps an ALREADY-OPEN rocksdb.Store as a txhash HotStore on the
+// single txhash CF (CFNames()). The store is owned by the caller — in production,
+// hotchunk.DB composes this facade over the shared per-chunk DB and closes that DB
+// once. The store must have CFNames() registered.
+func NewWithStore(store *rocksdb.Store) *HotStore {
+	return &HotStore{store: store}
 }
 
-func cfNameForTxHash(hash [32]byte) string {
-	return cfNameByNibble[hash[0]>>4]
-}
+// CFNames returns the single txhash CF name this facade owns. Exported so
+// the hotchunk shared-DB opener can register it alongside the other CFs.
+func CFNames() []string { return []string{txhashCF} }
 
-// tuning — the hot txhash workload is write-once / point-lookup over
-// 16 CFs; the cross-knob interactions below are non-obvious enough
-// that they get an explicit per-stanza rationale. The other facades
-// ride on RocksDB defaults by contrast — only this workload earned
-// the calibration.
-func tuning() rocksdb.Tuning {
+// Tuning returns this facade's RocksDB tuning, applied to the shared per-chunk
+// DB by the hotchunk opener. The hot txhash workload is write-once /
+// point-lookup; the cross-knob interactions below are non-obvious enough that
+// they get an explicit per-stanza rationale. The other facades ride on RocksDB
+// defaults by contrast — only this workload earned the calibration.
+func Tuning() rocksdb.Tuning {
 	return rocksdb.Tuning{
-		// Per-CF memtable budget × 16 CFs (64 MB × 16 = 1024 MB)
-		// matches the MaxTotalWalSizeMB cap below. Memtable-fill
-		// cadence and WAL-cap cadence align under uniform writes;
-		// either trigger fires at roughly the same time and produces
-		// ~64 MB SSTs.
+		// 64 MB memtable so one flush produces one ~64 MB SST under
+		// uniform writes.
 		WriteBufferMB:        64,
 		MaxWriteBufferNumber: 2,
 
@@ -117,8 +79,7 @@ func tuning() rocksdb.Tuning {
 		TargetFileSizeMB:       64,
 		MaxBytesForLevelBaseMB: 256,
 
-		// High background-job budget for the periodic memtable
-		// flushes across 16 CFs.
+		// Background-job budget for the periodic memtable flushes.
 		MaxBackgroundJobs: 8,
 		MaxOpenFiles:      10_000,
 
@@ -131,47 +92,29 @@ func tuning() rocksdb.Tuning {
 		BlockCacheMB:          512,
 		BloomFilterBitsPerKey: 12,
 
-		// 1 GB WAL cap matches the natural memtable budget above.
-		// Graceful Close auto-Flushes (see rocksdb.Store.Close), so
-		// this cap only bounds ungraceful-shutdown recovery (kernel
-		// panic, power loss, OOM kill).
+		// 1 GB WAL cap. Graceful Close auto-Flushes (see
+		// rocksdb.Store.Close), so this cap only bounds ungraceful-shutdown
+		// recovery (kernel panic, power loss, OOM kill).
 		MaxTotalWalSizeMB: 1024,
 	}
 }
 
-func (h *HotStore) Close() error { return h.store.Close() }
-
-// ChunkID returns the chunk this store is bound to (constructor-supplied;
-// never reads the store).
-func (h *HotStore) ChunkID() chunk.ID { return h.chunkID }
-
-// AddEntries writes a batch of (txhash → ledgerSeq) atomically
-// across however many CFs the hashes' nibbles cover. One fsync per
-// call.
-func (h *HotStore) AddEntries(entries []Entry) error {
-	if h.store.IsClosed() {
-		return rocksdb.ErrStoreClosed
-	}
-	switch len(entries) {
-	case 0:
-		return nil
-	case 1:
-		e := entries[0]
-		return h.store.Put(cfNameForTxHash(e.Hash), e.Hash[:], rocksdb.EncodeUint32(e.LedgerSeq))
-	default:
-		return h.store.Batch(func(b *rocksdb.BatchWriter) error {
-			for _, e := range entries {
-				b.Put(cfNameForTxHash(e.Hash), e.Hash[:], rocksdb.EncodeUint32(e.LedgerSeq))
-			}
-			return nil
-		})
+// AddEntriesToBatch queues each (txhash → ledgerSeq) Put into b on the txhash
+// CF — the building block hotchunk uses to fold the tx-hash writes into the one
+// shared per-ledger WriteBatch (decision (a)). Does not commit (caller owns the
+// batch). The caller runs inside Store.Batch, whose lifecycle RLock + checkOpen
+// is the authoritative closed-store guard, so this adds none.
+func (h *HotStore) AddEntriesToBatch(b *rocksdb.BatchWriter, entries []Entry) error {
+	for _, e := range entries {
+		b.Put(txhashCF, e.Hash[:], rocksdb.EncodeUint32(e.LedgerSeq))
 	}
+	return nil
 }
 
 // Get returns the ledger sequence the hash was committed in, or
-// (0, stores.ErrNotFound) on miss. Only the routed CF is queried.
+// (0, stores.ErrNotFound) on miss.
 func (h *HotStore) Get(hash [32]byte) (uint32, error) {
-	v, found, err := h.store.Get(cfNameForTxHash(hash), hash[:])
+	v, found, err := h.store.Get(txhashCF, hash[:])
 	if err != nil {
 		return 0, err
 	}
diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/hot_store_test.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/hot_store_test.go
index c600d6141..7e0a117e0 100644
--- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/hot_store_test.go
+++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/hot_store_test.go
@@ -2,7 +2,6 @@ package txhash
 
 import (
 	"bytes"
-	"path/filepath"
 	"sync"
 	"sync/atomic"
 	"testing"
@@ -14,7 +13,6 @@ import (
 
 	supportlog "github.com/stellar/go-stellar-sdk/support/log"
 
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores"
 )
@@ -27,6 +25,8 @@ func silentLogger() *supportlog.Entry {
 	return log
 }
 
+// txhashFor builds a distinct 32-byte hash from a (high-nibble, tag) pair —
+// a convenient generator of many distinct keys for the single txhash CF.
 func txhashFor(nibble, tag byte) [32]byte {
 	var h [32]byte
 	h[0] = nibble << 4
@@ -39,41 +39,21 @@ func txhashFor(nibble, tag byte) [32]byte {
 
 func openTestHotStore(t *testing.T) *HotStore {
 	t.Helper()
-	s, err := NewHotStore(t.TempDir(), chunk.ID(0), silentLogger())
-	require.NoError(t, err)
-	t.Cleanup(func() { _ = s.Close() })
+	s, _ := openTestHotStoreAt(t, t.TempDir())
 	return s
 }
 
-func TestNewHotStore_ValidatesInputs(t *testing.T) {
-	_, err := NewHotStore("", chunk.ID(0), silentLogger())
-	require.ErrorIs(t, err, rocksdb.ErrInvalidConfig)
-
-	_, err = NewHotStore(t.TempDir(), chunk.ID(0), nil)
-	require.ErrorIs(t, err, rocksdb.ErrInvalidConfig)
-}
-
-func TestNewHotStore_RecordsChunkBinding(t *testing.T) {
-	s, err := NewHotStore(t.TempDir(), chunk.ID(7), silentLogger())
-	require.NoError(t, err)
-	t.Cleanup(func() { _ = s.Close() })
-	require.Equal(t, chunk.ID(7), s.ChunkID())
-}
-
-func TestNewHotStore_CreatesMissingDirectory(t *testing.T) {
-	path := filepath.Join(t.TempDir(), "subdir-never-created")
-	s, err := NewHotStore(path, chunk.ID(0), silentLogger())
-	require.NoError(t, err)
-	require.NotNil(t, s)
-	t.Cleanup(func() { _ = s.Close() })
-}
-
-func TestHotStore_CloseIsIdempotent(t *testing.T) {
-	s, err := NewHotStore(t.TempDir(), chunk.ID(0), silentLogger())
+func openTestHotStoreAt(t *testing.T, path string) (*HotStore, *rocksdb.Store) {
+	t.Helper()
+	store, err := rocksdb.New(rocksdb.Config{
+		Path:           path,
+		ColumnFamilies: CFNames(),
+		Logger:         silentLogger(),
+		Tuning:         Tuning(),
+	})
 	require.NoError(t, err)
-
-	require.NoError(t, s.Close())
-	require.NoError(t, s.Close())
+	t.Cleanup(func() { _ = store.Close() })
+	return NewWithStore(store), store
 }
 
 func TestHotStore_AddGetRoundTrip(t *testing.T) {
@@ -86,42 +66,43 @@ func TestHotStore_AddGetRoundTrip(t *testing.T) {
 	require.ErrorIs(t, err, stores.ErrNotFound)
 
 	// Single-entry AddEntries.
-	require.NoError(t, s.AddEntries([]Entry{{Hash: h, LedgerSeq: 12345}}))
+	require.NoError(t, addEntries(s, []Entry{{Hash: h, LedgerSeq: 12345}}))
 	got, err := s.Get(h)
 	require.NoError(t, err)
 	assert.Equal(t, uint32(12345), got)
 
 	// Overwrite via a second AddEntries.
-	require.NoError(t, s.AddEntries([]Entry{{Hash: h, LedgerSeq: 67890}}))
+	require.NoError(t, addEntries(s, []Entry{{Hash: h, LedgerSeq: 67890}}))
 	got, err = s.Get(h)
 	require.NoError(t, err)
 	assert.Equal(t, uint32(67890), got)
 
 	// Empty slice — no-op, no error.
-	require.NoError(t, s.AddEntries(nil))
-	require.NoError(t, s.AddEntries([]Entry{}))
+	require.NoError(t, addEntries(s, nil))
+	require.NoError(t, addEntries(s, []Entry{}))
 }
 
-func TestHotStore_NibbleRoutingAcrossAllCFs(t *testing.T) {
+func TestHotStore_ManyDistinctKeys(t *testing.T) {
 	s := openTestHotStore(t)
 
-	entries := make([]Entry, numCFs)
-	for n := range numCFs {
-		entries[n] = Entry{
-			Hash:      txhashFor(byte(n), 1),
-			LedgerSeq: uint32(n) * 100,
+	const n = 16
+	entries := make([]Entry, n)
+	for i := range n {
+		entries[i] = Entry{
+			Hash:      txhashFor(byte(i), 1),
+			LedgerSeq: uint32(i) * 100,
 		}
 	}
-	require.NoError(t, s.AddEntries(entries))
+	require.NoError(t, addEntries(s, entries))
 
-	for n := range numCFs {
-		got, err := s.Get(entries[n].Hash)
-		require.NoError(t, err, "nibble %x", n)
-		assert.Equal(t, uint32(n)*100, got, "nibble %x", n)
+	for i := range n {
+		got, err := s.Get(entries[i].Hash)
+		require.NoError(t, err, "key %d", i)
+		assert.Equal(t, uint32(i)*100, got, "key %d", i)
 	}
 }
 
-func TestHotStore_AddEntriesMultipleSpansCFs(t *testing.T) {
+func TestHotStore_AddEntriesMultiple(t *testing.T) {
 	s := openTestHotStore(t)
 
 	entries := []Entry{
@@ -131,7 +112,7 @@ func TestHotStore_AddEntriesMultipleSpansCFs(t *testing.T) {
 		{Hash: txhashFor(0xc, 1), LedgerSeq: 40},
 		{Hash: txhashFor(0xf, 1), LedgerSeq: 50},
 	}
-	require.NoError(t, s.AddEntries(entries))
+	require.NoError(t, addEntries(s, entries))
 
 	for _, e := range entries {
 		got, err := s.Get(e.Hash)
@@ -144,7 +125,7 @@ func TestHotStore_AddEntriesMultipleSpansCFs(t *testing.T) {
 	for i, e := range entries {
 		updated[i] = Entry{Hash: e.Hash, LedgerSeq: e.LedgerSeq + 1000}
 	}
-	require.NoError(t, s.AddEntries(updated))
+	require.NoError(t, addEntries(s, updated))
 	for _, e := range updated {
 		got, err := s.Get(e.Hash)
 		require.NoError(t, err)
@@ -153,36 +134,32 @@ func TestHotStore_AddEntriesMultipleSpansCFs(t *testing.T) {
 }
 
 func TestHotStore_PostCloseOps(t *testing.T) {
-	s, err := NewHotStore(t.TempDir(), chunk.ID(0), silentLogger())
-	require.NoError(t, err)
-	require.NoError(t, s.Close())
+	s, store := openTestHotStoreAt(t, t.TempDir())
+	require.NoError(t, store.Close())
 
 	h := txhashFor(0x5, 1)
-	require.ErrorIs(t, s.AddEntries([]Entry{{Hash: h, LedgerSeq: 1}}), rocksdb.ErrStoreClosed)
-	_, err = s.Get(h)
+	require.ErrorIs(t, addEntries(s, []Entry{{Hash: h, LedgerSeq: 1}}), rocksdb.ErrStoreClosed)
+	_, err := s.Get(h)
 	require.ErrorIs(t, err, rocksdb.ErrStoreClosed)
 
-	require.ErrorIs(t, s.AddEntries(nil), rocksdb.ErrStoreClosed)
-	require.ErrorIs(t, s.AddEntries([]Entry{}), rocksdb.ErrStoreClosed)
+	require.ErrorIs(t, addEntries(s, nil), rocksdb.ErrStoreClosed)
+	require.ErrorIs(t, addEntries(s, []Entry{}), rocksdb.ErrStoreClosed)
 }
 
 func TestHotStore_GracefulCloseAndReopenRoundTrips(t *testing.T) {
 	path := t.TempDir()
 
-	first, err := NewHotStore(path, chunk.ID(0), silentLogger())
-	require.NoError(t, err)
-	for n := range numCFs {
-		require.NoError(t, first.AddEntries([]Entry{
+	first, firstStore := openTestHotStoreAt(t, path)
+	for n := range 16 {
+		require.NoError(t, addEntries(first, []Entry{
 			{Hash: txhashFor(byte(n), 1), LedgerSeq: uint32(n) + 1},
 		}))
 	}
-	require.NoError(t, first.Close())
+	require.NoError(t, firstStore.Close())
 
-	second, err := NewHotStore(path, chunk.ID(0), silentLogger())
-	require.NoError(t, err)
-	t.Cleanup(func() { _ = second.Close() })
+	second, _ := openTestHotStoreAt(t, path)
 
-	for n := range numCFs {
+	for n := range 16 {
 		got, err := second.Get(txhashFor(byte(n), 1))
 		require.NoError(t, err)
 		assert.Equal(t, uint32(n)+1, got)
@@ -190,13 +167,13 @@ func TestHotStore_GracefulCloseAndReopenRoundTrips(t *testing.T) {
 }
 
 func TestHotStore_ConcurrentOpsAndCloseRaceFree(t *testing.T) {
-	s := openTestHotStore(t)
-	// Pre-populate one entry per nibble.
-	pre := make([]Entry, numCFs)
-	for n := range numCFs {
+	s, store := openTestHotStoreAt(t, t.TempDir())
+	// Pre-populate a spread of distinct keys.
+	pre := make([]Entry, 16)
+	for n := range 16 {
 		pre[n] = Entry{Hash: txhashFor(byte(n), 1), LedgerSeq: uint32(n)}
 	}
-	require.NoError(t, s.AddEntries(pre))
+	require.NoError(t, addEntries(s, pre))
 
 	var wg sync.WaitGroup
 	var stop atomic.Bool
@@ -204,52 +181,31 @@ func TestHotStore_ConcurrentOpsAndCloseRaceFree(t *testing.T) {
 	for w := range workers {
 		wg.Go(func() {
 			for i := byte(0); !stop.Load(); i++ {
-				_ = s.AddEntries([]Entry{
-					{Hash: txhashFor(i%numCFs, byte(w+5)), LedgerSeq: uint32(i)},
+				_ = addEntries(s, []Entry{
+					{Hash: txhashFor(i%16, byte(w+5)), LedgerSeq: uint32(i)},
 				})
 			}
 		})
 		wg.Go(func() {
 			for i := byte(0); !stop.Load(); i++ {
-				_, _ = s.Get(txhashFor(i%numCFs, 1))
+				_, _ = s.Get(txhashFor(i%16, 1))
 			}
 		})
 	}
 
 	time.Sleep(50 * time.Millisecond)
-	require.NoError(t, s.Close())
+	require.NoError(t, store.Close())
 	stop.Store(true)
 	wg.Wait()
 
 	postClose := []Entry{{Hash: txhashFor(0x1, 1), LedgerSeq: 1}}
-	require.ErrorIs(t, s.AddEntries(postClose), rocksdb.ErrStoreClosed)
+	require.ErrorIs(t, addEntries(s, postClose), rocksdb.ErrStoreClosed)
 }
 
-func TestCFNameForTxHash_AllHighNibbles(t *testing.T) {
-	cases := []struct {
-		topByte byte
-		want    string
-	}{
-		{0x00, "cf-0"},
-		{0x10, "cf-1"},
-		{0x20, "cf-2"},
-		{0x30, "cf-3"},
-		{0x40, "cf-4"},
-		{0x50, "cf-5"},
-		{0x60, "cf-6"},
-		{0x70, "cf-7"},
-		{0x80, "cf-8"},
-		{0x90, "cf-9"},
-		{0xa0, "cf-a"},
-		{0xb0, "cf-b"},
-		{0xc0, "cf-c"},
-		{0xd0, "cf-d"},
-		{0xe0, "cf-e"},
-		{0xf0, "cf-f"},
-	}
-	for _, c := range cases {
-		var h [32]byte
-		h[0] = c.topByte
-		assert.Equal(t, c.want, cfNameForTxHash(h))
-	}
+// addEntries commits entries through AddEntriesToBatch in one batch — the
+// production write shape, reduced to a test seeding call.
+func addEntries(h *HotStore, entries []Entry) error {
+	return h.store.Batch(func(b *rocksdb.BatchWriter) error {
+		return h.AddEntriesToBatch(b, entries)
+	})
 }
diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/read_assembly_test.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/read_assembly_test.go
index d358b4adc..840c9697f 100644
--- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/read_assembly_test.go
+++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/read_assembly_test.go
@@ -346,7 +346,7 @@ func TestTxReader_HotAndColdFederation(t *testing.T) {
 	flHot := buildLedgers(t, []uint32{hotSeq}, 1)
 	hotStore := openTestHotStore(t)
 	for h, seq := range flHot.byHash {
-		require.NoError(t, hotStore.AddEntries([]Entry{{Hash: h, LedgerSeq: seq}}))
+		require.NoError(t, addEntries(hotStore, []Entry{{Hash: h, LedgerSeq: seq}}))
 	}
 
 	coldSeq := chunk.ID(5).FirstLedger()
diff --git a/cmd/stellar-rpc/internal/fullhistory/progress.go b/cmd/stellar-rpc/internal/fullhistory/progress.go
deleted file mode 100644
index 2ab6ba375..000000000
--- a/cmd/stellar-rpc/internal/fullhistory/progress.go
+++ /dev/null
@@ -1,121 +0,0 @@
-package fullhistory
-
-import (
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog"
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
-)
-
-// Progress is derived, never stored: every consumer recomputes from durable keys.
-// "Highest complete chunk" arithmetic runs in int64 (-1 = "nothing complete") to
-// avoid uint32 wraparound on the pre-genesis sentinel.
-
-// lastCommittedLedger derives the highest durably committed ledger: the max of the
-// floor term (EarliestLedger()-1) and the cold term (the highest fully-durable
-// chunk's last ledger). Computed signed so a fresh/unpinned store doesn't underflow,
-// then floored at the pre-genesis base (FirstLedgerSeq-1) — the "ingest from
-// genesis, nothing committed" base.
-func lastCommittedLedger(cat *catalog.Catalog) (uint32, error) {
-	cold, err := highestDurableChunk(cat)
-	if err != nil {
-		return 0, err
-	}
-	earliest, ok, err := cat.EarliestLedger()
-	if err != nil {
-		return 0, err
-	}
-
-	through := int64(chunk.FirstLedgerSeq) - 1 // pre-genesis base
-	if ok {
-		through = max(through, int64(earliest)-1)
-	}
-	if cold >= 0 {
-		through = max(through, int64(chunk.ID(cold).LastLedger())) //nolint:gosec // cold >= 0, a real chunk id
-	}
-	return uint32(through), nil // through >= FirstLedgerSeq-1 >= 0
-}
-
-// highestDurableChunk returns the highest chunk id with all artifacts durable
-// (ledgers frozen AND events frozen AND (txhash frozen OR covered by a frozen
-// index)), or -1 on a fresh start. A partially-frozen tip chunk is excluded —
-// counting it would open reads over a partial artifact; backfill repairs it.
-func highestDurableChunk(cat *catalog.Catalog) (int64, error) {
-	refs, err := cat.ChunkArtifactKeys()
-	if err != nil {
-		return 0, err
-	}
-
-	// Frozen per-kind state per chunk.
-	type kinds struct{ ledgers, events, txhash bool }
-	frozen := map[chunk.ID]*kinds{}
-	for _, ref := range refs {
-		if ref.State != geometry.StateFrozen {
-			continue
-		}
-		k := frozen[ref.Chunk]
-		if k == nil {
-			k = &kinds{}
-			frozen[ref.Chunk] = k
-		}
-		switch ref.Kind {
-		case geometry.KindLedgers:
-			k.ledgers = true
-		case geometry.KindEvents:
-			k.events = true
-		case geometry.KindTxHash:
-			k.txhash = true
-		}
-	}
-
-	// A frozen index coverage satisfies a chunk's txhash even after its .bin was demoted.
-	covered, err := frozenCoverageContains(cat)
-	if err != nil {
-		return 0, err
-	}
-
-	highest := int64(-1)
-	for c, k := range frozen {
-		if !k.ledgers || !k.events {
-			continue
-		}
-		if !k.txhash && !covered(c) {
-			continue
-		}
-		if id := int64(c); id > highest {
-			highest = id
-		}
-	}
-	return highest, nil
-}
-
-// frozenCoverageContains returns a predicate reporting whether a chunk falls in
-// some frozen index coverage [Lo, Hi]; coverages are read once up front.
-func frozenCoverageContains(cat *catalog.Catalog) (func(chunk.ID) bool, error) {
-	covs, err := cat.AllTxHashIndexKeys()
-	if err != nil {
-		return nil, err
-	}
-	var frozen []geometry.TxHashIndexCoverage
-	for _, cov := range covs {
-		if cov.State == geometry.StateFrozen {
-			frozen = append(frozen, cov)
-		}
-	}
-	return func(c chunk.ID) bool {
-		for _, cov := range frozen {
-			if cov.Lo <= c && c <= cov.Hi {
-				return true
-			}
-		}
-		return false
-	}, nil
-}
-
-// chunkIDOfLedger maps a ledger to its chunk, signed so a sub-genesis ledger
-// yields -1 instead of panicking like chunk.IDFromLedger.
-func chunkIDOfLedger(ledger uint32) int64 {
-	if ledger < chunk.FirstLedgerSeq {
-		return -1
-	}
-	return int64(chunk.IDFromLedger(ledger))
-}
diff --git a/cmd/stellar-rpc/internal/fullhistory/progress_test.go b/cmd/stellar-rpc/internal/fullhistory/progress_test.go
deleted file mode 100644
index 6fc469049..000000000
--- a/cmd/stellar-rpc/internal/fullhistory/progress_test.go
+++ /dev/null
@@ -1,105 +0,0 @@
-package fullhistory
-
-import (
-	"testing"
-
-	"github.com/stretchr/testify/require"
-
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog"
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
-)
-
-// ---------------------------------------------------------------------------
-// progress derivation test helpers.
-// ---------------------------------------------------------------------------
-
-// makeChunkDurable freezes ledgers+events+txhash for a chunk — the durable state
-// highestDurableChunk counts.
-func makeChunkDurable(t *testing.T, cat *catalog.Catalog, c chunk.ID) {
-	t.Helper()
-	freezeKinds(t, cat, c, geometry.KindLedgers, geometry.KindEvents, geometry.KindTxHash)
-}
-
-// ---------------------------------------------------------------------------
-// lastCommittedLedger — chunk-granularity bound, pure catalog read.
-// ---------------------------------------------------------------------------
-
-func TestLastCommittedLedger(t *testing.T) {
-	t.Run("fresh store => pre-genesis sentinel, never MaxUint32", func(t *testing.T) {
-		// Every term is -1; the signed domain must yield FirstLedgerSeq-1, not wrap.
-		cat, _ := testCatalog(t)
-		got, err := lastCommittedLedger(cat)
-		require.NoError(t, err)
-		require.Equal(t, preGenesisLedger, got)
-	})
-
-	t.Run("cold term leads: highest fully-durable chunk", func(t *testing.T) {
-		cat, _ := testCatalog(t)
-		makeChunkDurable(t, cat, 0)
-		makeChunkDurable(t, cat, 1)
-		makeChunkDurable(t, cat, 2)
-		got, err := lastCommittedLedger(cat)
-		require.NoError(t, err)
-		require.Equal(t, chunk.ID(2).LastLedger(), got)
-	})
-
-	t.Run("incompletely-frozen tip degrades the bound (ledgers frozen, events freezing)", func(t *testing.T) {
-		cat, _ := testCatalog(t)
-		makeChunkDurable(t, cat, 0)
-		makeChunkDurable(t, cat, 1)
-		// Chunk 2 mid-freeze (events only "freezing") must NOT count: bound stays at 1.
-		freezeKinds(t, cat, 2, geometry.KindLedgers, geometry.KindTxHash)
-		require.NoError(t, cat.MarkChunkFreezing(2, geometry.KindEvents))
-		got, err := lastCommittedLedger(cat)
-		require.NoError(t, err)
-		require.Equal(t, chunk.ID(1).LastLedger(), got)
-	})
-
-	t.Run("txhash satisfied by a frozen index coverage (post-finalization demote)", func(t *testing.T) {
-		cat, _ := testCatalog(t)
-		// Chunk 7: txhash demoted but a frozen index coverage spans it ⇒ still durable.
-		freezeKinds(t, cat, 7, geometry.KindLedgers, geometry.KindEvents)
-		freezeCoverage(t, cat, cat.TxHashIndexLayout().TxHashIndexID(7), 0, 999) // window 0 covers chunk 7
-		got, err := lastCommittedLedger(cat)
-		require.NoError(t, err)
-		require.Equal(t, chunk.ID(7).LastLedger(), got)
-	})
-
-	t.Run("chunk NOT covered by any frozen index and no frozen txhash does not count", func(t *testing.T) {
-		cat, _ := testCatalog(t)
-		makeChunkDurable(t, cat, 0)
-		// Chunk 1: ledgers+events frozen, no txhash, no covering index.
-		freezeKinds(t, cat, 1, geometry.KindLedgers, geometry.KindEvents)
-		got, err := lastCommittedLedger(cat)
-		require.NoError(t, err)
-		require.Equal(t, chunk.ID(0).LastLedger(), got, "chunk 1 not durable; bound stays at chunk 0")
-	})
-
-	t.Run("earliest pin floor leads when above the cold term", func(t *testing.T) {
-		cat, _ := testCatalog(t)
-		// Floor pinned mid-chain, no chunks durable, no hot keys.
-		const floor = 50000
-		require.NoError(t, cat.PinEarliestLedger(floor))
-		got, err := lastCommittedLedger(cat)
-		require.NoError(t, err)
-		require.Equal(t, uint32(floor-1), got)
-	})
-
-	t.Run("earliest pin == genesis (2) does not underflow", func(t *testing.T) {
-		cat, _ := testCatalog(t)
-		require.NoError(t, cat.PinEarliestLedger(chunk.FirstLedgerSeq))
-		got, err := lastCommittedLedger(cat)
-		require.NoError(t, err)
-		require.Equal(t, preGenesisLedger, got, "earliest 2 - 1 = 1, not MaxUint32")
-	})
-
-	t.Run("max of the cold term and the earliest floor", func(t *testing.T) {
-		cat, _ := testCatalog(t)
-		makeChunkDurable(t, cat, 3) // cold => chunk 3 last ledger (the higher term)
-		require.NoError(t, cat.PinEarliestLedger(2))
-		got, err := lastCommittedLedger(cat)
-		require.NoError(t, err)
-		require.Equal(t, chunk.ID(3).LastLedger(), got)
-	})
-}
diff --git a/cmd/stellar-rpc/internal/fullhistory/retention.go b/cmd/stellar-rpc/internal/fullhistory/retention.go
deleted file mode 100644
index 2b0462390..000000000
--- a/cmd/stellar-rpc/internal/fullhistory/retention.go
+++ /dev/null
@@ -1,48 +0,0 @@
-package fullhistory
-
-import (
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
-	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
-)
-
-// RetentionFloor is the lowest chunk still within retention; any chunk below it
-// is eligible for discard/prune. It is the reader-side retention contract
-// (design "Reader retention contract", gettx §8.2 / §8.5): availability is
-// decided by retention, not the on-disk file set, which lets prune/sweep unlink
-// a chunk the instant it passes the floor without coordinating with the index
-// lifecycle (a stale .idx pointing at a pruned .pack is masked). The floor may
-// err LOW harmlessly — a wrongly-retained chunk still hits the reader's
-// missing-file rule — so it anchors on the same live completeThrough the prune
-// scan uses; widening history is backfill's job, not the floor's.
-type RetentionFloor struct {
-	chunk chunk.ID // lowest in-retention chunk
-}
-
-// NewRetentionFloor pins the floor for one (through, retentionChunks, earliest)
-// snapshot. A shortened retentionChunks raises the floor at once — no per-chunk
-// state to migrate.
-func NewRetentionFloor(through, retentionChunks, earliest uint32) RetentionFloor {
-	return RetentionFloor{chunk: retentionFloorChunk(through, retentionChunks, earliest)}
-}
-
-// Excludes reports whether chunk c is below the floor — past retention, eligible
-// for discard/prune. The discard and prune scans (eligibility.go) use it on a
-// chunk directly and, since an index is below the floor exactly when its last
-// chunk is, as Excludes(layout.LastChunk(idx)) for a whole tx-hash index. (The
-// reader's seq-level admit predicate and the ledger-seq floor for §8.2 coverage
-// filtering return with the read path, #772.)
-func (f RetentionFloor) Excludes(c chunk.ID) bool { return c < f.chunk }
-
-// retentionFloorChunk is the retention window's lower bound as a chunk id (the
-// design's retentionFloorChunk): the HIGHER of the sliding floor (retentionChunks
-// back from the last complete chunk) and the fixed earliest_ledger. slidingChunk is
-// signed so a young store / large retentionChunks clamps to chunk 0 instead of
-// underflowing. Both terms are chunk-first-ledgers, so IDFromLedger is exact.
-func retentionFloorChunk(upperBound, retentionChunks, earliest uint32) chunk.ID {
-	sliding := uint32(chunk.FirstLedgerSeq) // GenesisLedger
-	if retentionChunks > 0 {
-		slidingChunk := geometry.LastCompleteChunkAt(upperBound) - int64(retentionChunks) + 1
-		sliding = geometry.ChunkFirstLedger(max(slidingChunk, 0))
-	}
-	return chunk.IDFromLedger(max(sliding, earliest))
-}
diff --git a/cmd/stellar-rpc/internal/fullhistory/startup.go b/cmd/stellar-rpc/internal/fullhistory/startup.go
index 34e49dd46..a38d16c20 100644
--- a/cmd/stellar-rpc/internal/fullhistory/startup.go
+++ b/cmd/stellar-rpc/internal/fullhistory/startup.go
@@ -7,16 +7,26 @@ import (
 	"time"
 
 	"github.com/cenkalti/backoff/v4"
+	"golang.org/x/sync/errgroup"
+
+	"github.com/stellar/go-stellar-sdk/ingest/ledgerbackend"
 
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/backfill"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/lifecycle"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/observability"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
 )
 
-// run is the daemon's startup: backfill to the tip, then serve reads (injected).
-// Returns nil only on clean shutdown; any other return is restartable
-// (ErrFirstStartNoTip on a first start with no reachable backend).
+// run is the daemon's startup, in two steps: (1) BACKFILL to the tip, then
+// (2) SERVE + INGEST — start captive core (injected), begin serving reads
+// (injected), then run the live ingestion loop (which opens the resume chunk's hot
+// DB itself) and the lifecycle loop as a joined errgroup pair (whichever returns
+// first cancels the other; g.Wait surfaces the first error). Returns nil only on a
+// clean shutdown (ctx canceled mid-run); any other return is a restartable error
+// the supervisor warns on and retries with backoff (a first start with no
+// reachable backend, a backfill/ingest/lifecycle failure, or a "ready" hot DB that
+// won't open — none are auto-healed, all are re-attempted).
 func run(ctx context.Context, cfg StartConfig) error {
 	if err := cfg.validate(); err != nil {
 		return err
@@ -36,14 +46,19 @@ func run(ctx context.Context, cfg StartConfig) error {
 			"(validateConfig pins it before run; not done here)")
 	}
 
-	// Derived, never stored: highest durably-committed ledger, clamped by earliest-1.
-	lastCommitted, err := lastCommittedLedger(cat)
+	// Derived, never stored: highest durably-committed ledger (frozen cold artifacts
+	// vs the highest ready hot DB's max committed seq), clamped by earliest-1. Passing
+	// the logger refines with one read-only open of the highest ready hot DB before
+	// ingestion opens a writer; a read-only open replays any synced WAL from an
+	// ungraceful crash into memtables, so MaxCommittedSeq is correct.
+	lastCommitted, err := lifecycle.LastCommittedLedger(cat, logger)
 	if err != nil {
 		return fmt.Errorf("startup derive last-committed: %w", err)
 	}
 
 	metrics := observability.MetricsOrNop(cfg.Exec.Metrics)
-	metrics.LastCommitted(lastCommitted, retentionFloorChunk(lastCommitted, cfg.RetentionChunks, earliest).FirstLedger())
+	metrics.LastCommitted(lastCommitted)
+	metrics.RetentionFloor(lifecycle.EffectiveRetentionFloor(lastCommitted, cfg.RetentionChunks, earliest))
 	logger.WithField("last_committed", lastCommitted).
 		WithField("earliest", earliest).
 		WithField("pinned", pinned).
@@ -56,17 +71,100 @@ func run(ctx context.Context, cfg StartConfig) error {
 	}
 
 	logger.WithField("last_committed", lastCommitted).
-		Info("backfill complete — handing off to the read server")
+		WithField("resume_chunk", chunk.IDFromLedger(lastCommitted+1).String()).
+		Info("backfill complete — opening resume hot tier and ingesting")
+
+	// Step 2: serve + ingest. resumeLedger is one past the last-committed ledger —
+	// the live chunk's next un-committed ledger.
+	resumeLedger := lastCommitted + 1
+
+	// Open the resume chunk's hot DB BEFORE serving reads, so a broken hot tier (a
+	// "ready" key whose DB won't open) fails startup instead of serving behind a
+	// crash-looping ingestion loop. run() owns the close only until the loop takes
+	// over: loopOwnsDB flips true at the errgroup launch, after which the loop's
+	// deferred close owns it (and g.Wait joins before run returns, so there is no
+	// window where neither owns it). Restarts re-enter run() from the top, so this
+	// stays the single initial-open site; the loop still reopens at each boundary.
+	hotDB, err := openHotDBForChunk(cat, chunk.IDFromLedger(resumeLedger), logger)
+	if err != nil {
+		return fmt.Errorf("startup open resume hot tier for ledger %d: %w", resumeLedger, err)
+	}
+	loopOwnsDB := false
+	defer func() {
+		if !loopOwnsDB {
+			_ = hotDB.Close() // an error before the loop took ownership
+		}
+	}()
+
+	// The live ingestion stream. It owns the captive-core process (started on the
+	// loop's first pull, torn down when the loop exits), so there is no eager
+	// prepare and no closer to defer — the loop's ctx-scoped iteration is the
+	// teardown. OpenCore only constructs, so a start failure surfaces as the loop's
+	// first stream error for the daemon to classify (and restart). (Eager core start
+	// before serve would need a LedgerStream.Start hook the SDK deliberately omits.)
+	stream, err := cfg.Core.OpenCore(ctx)
+	if err != nil {
+		return fmt.Errorf("startup open ingestion stream: %w", err)
+	}
+
+	// The lifecycle goroutine runs one tick per boundary signal; ingestion Publishes
+	// the just-completed chunk id into a latest-cell. It shares NO in-memory state
+	// with ingestion — all derived from durable keys.
+	boundary := lifecycle.NewBoundarySignal()
+
+	// Seed the first tick with the last complete chunk at the resume point so it
+	// fires at once. Skipped on a young network where no chunk is complete.
+	if seed := geometry.LastCompleteChunkAt(lastCommitted); seed >= 0 {
+		boundary.Publish(chunk.ID(seed)) //nolint:gosec // seed >= 0
+	}
+
+	// The lifecycle config draws on the SAME Exec wiring backfill uses, so the two
+	// share one catalog/pool by construction.
+	lifecycleCfg := lifecycle.Config{
+		ExecConfig:      cfg.Exec,
+		RetentionChunks: cfg.RetentionChunks,
+	}.WithLifecycleDefaults()
 
-	// Step 2: serve (injected). Its error is restartable.
+	// Begin serving reads (injected) BEFORE launching the loops; it must return
+	// promptly (launch, not block).
 	if err := cfg.ServeReads(ctx); err != nil {
 		return fmt.Errorf("startup serve reads: %w", err)
 	}
-	// TODO(#772): production ServeReads is a no-op until the cutover, so an immediate
-	// clean exit after backfill is expected, not a misconfig.
-	logger.WithField("last_committed", lastCommitted).
-		Info("read server returned — cold-only daemon shutting down cleanly")
-	return nil
+
+	// Ingestion and the lifecycle run as a joined pair under errgroup.WithContext:
+	// gctx cancels as soon as EITHER returns — and WithContext records the returning
+	// goroutine's error BEFORE canceling, so g.Wait surfaces the real cause, not the
+	// sibling's induced context-canceled. g.Wait joins both before run returns,
+	// restoring the single-lifecycle-goroutine invariant across supervisor restarts.
+	// supervise is the one clean-vs-restart decision point; a canceled parent ctx
+	// classifies as clean.
+	g, gctx := errgroup.WithContext(ctx)
+	// The loop's deferred close now owns hotDB; g.Wait joins it before run returns.
+	loopOwnsDB = true
+	g.Go(func() error {
+		err := runIngestionLoop(gctx, ingestionLoopConfig{
+			Stream:   stream,
+			Resume:   resumeLedger,
+			HotDB:    hotDB,
+			Catalog:  cat,
+			Boundary: boundary,
+			Logger:   logger,
+			Metrics:  metrics,
+			Sink:     cfg.Exec.Process.Sink,
+		})
+		if err == nil {
+			// WithContext cancels gctx (unblocking the lifecycle sibling in g.Wait)
+			// ONLY on a non-nil return. runIngestionLoop upholds that — every exit is
+			// an error, including a clean stream end — but guard it so a future nil
+			// return degrades to a supervised restart, never a silent g.Wait hang.
+			return errors.New("ingestion loop returned nil unexpectedly")
+		}
+		return err
+	})
+	g.Go(func() error {
+		return lifecycle.Loop(gctx, lifecycleCfg, cat, boundary)
+	})
+	return g.Wait()
 }
 
 // backfillToTip runs the backfill loop, returning lastCommitted as backfill makes
@@ -93,8 +191,12 @@ func backfillToTip(ctx context.Context, cfg StartConfig, lastCommitted, earliest
 		tip, err := networkTip(ctx, cfg.NetworkTip, cfg.TipBackoff, cfg.TipMaxAttempts)
 		if err != nil {
 			if lastCommitted < earliest {
-				// First start, no reachable backend: FATAL — never serve incomplete history.
-				return 0, fmt.Errorf("%w: %w", ErrFirstStartNoTip, err)
+				// First start, no reachable backend: error out — the daemon must never
+				// serve incomplete history. Restartable: the property is enforced by
+				// returning an error at all (each restart re-checks lastCommitted <
+				// earliest), not by the exit shape, so a datastore mid-outage or a young
+				// lake below genesis self-heals on a later restart.
+				return 0, fmt.Errorf("network tip unavailable and no local history to serve: %w", err)
 			}
 			// Restart with local progress: serve what's below lastCommitted, skip backfill.
 			tip = lastCommitted
@@ -103,16 +205,18 @@ func backfillToTip(ctx context.Context, cfg StartConfig, lastCommitted, earliest
 		// max() guards a lagging bulk tip: the tip alone could regress the floor below
 		// pruning or drop a complete last-committed chunk.
 		anchor := max(tip, lastCommitted)
-		rangeStart := retentionFloorChunk(anchor, retentionChunks, earliest)
+		rangeStart := chunk.IDFromLedger(lifecycle.EffectiveRetentionFloor(anchor, retentionChunks, earliest))
 
 		// Same anchor for rangeEnd: a complete last-committed chunk above a lagging tip
 		// still folds in; chunks beyond the tip are durable and self-skip.
 		rangeEndSigned := geometry.LastCompleteChunkAt(anchor)
 
 		// Mid-chunk resume exclusion: a mid-chunk last-committed within one chunk of the tip
-		// leaves the partial resume chunk to ingestion. Signed so genesis reads as a boundary.
+		// leaves the partial resume chunk to ingestion. Under the mid-chunk precondition
+		// (guarded here) the last COMPLETE chunk is exactly one short of the live chunk,
+		// so LastCompleteChunkAt names it directly — same vocabulary as rangeEndSigned above.
 		if withinOneChunkOfTip(tip, lastCommitted) && lastCommittedMidChunk(lastCommitted) {
-			rangeEndSigned = chunkIDOfLedger(lastCommitted) - 1 // one short of the live chunk
+			rangeEndSigned = geometry.LastCompleteChunkAt(lastCommitted)
 		}
 
 		// Break on an empty or non-advancing range.
@@ -139,7 +243,8 @@ func backfillToTip(ctx context.Context, cfg StartConfig, lastCommitted, earliest
 
 		metrics.BackfillPass(passDuration)
 		// Refresh the derived gauges as last-committed advances and the floor rises with it.
-		metrics.LastCommitted(lastCommitted, retentionFloorChunk(lastCommitted, retentionChunks, earliest).FirstLedger())
+		metrics.LastCommitted(lastCommitted)
+		metrics.RetentionFloor(lifecycle.EffectiveRetentionFloor(lastCommitted, retentionChunks, earliest))
 		logger.WithField("range_lo", rangeStart.String()).
 			WithField("range_hi", rangeEnd.String()).
 			WithField("last_committed", lastCommitted).
@@ -156,40 +261,49 @@ func withinOneChunkOfTip(tip, lastCommitted uint32) bool {
 }
 
 // lastCommittedMidChunk reports whether lastCommitted falls strictly inside a chunk.
-// The only sub-genesis value it sees is the fresh-start sentinel preGenesisLedger,
-// where chunkIDOfLedger yields -1 and chunk.ID(-1).LastLedger() wraps (MaxUint32+1
-// overflows to 0) back to exactly preGenesisLedger — so the comparison reports a
-// boundary (false) without a special case.
+// The genesis sentinel reads as a boundary, never mid-chunk.
 func lastCommittedMidChunk(lastCommitted uint32) bool {
-	c := chunkIDOfLedger(lastCommitted)
-	//nolint:gosec // c is -1 (wraps to preGenesisLedger) or a real chunk id
-	return lastCommitted != chunk.ID(c).LastLedger()
+	c := geometry.ChunkIDOfLedger(lastCommitted)
+	return lastCommitted != geometry.CompleteThrough(c)
 }
 
-// ErrFirstStartNoTip is the first-start FATAL: no local progress and no reachable
-// tip. A sentinel so the supervisor owns the restart and tests can assert it.
-var ErrFirstStartNoTip = errors.New("network tip unavailable and no local history to serve")
-
 // ---------------------------------------------------------------------------
 // Injected external boundaries (so startup is testable with fakes).
 // ---------------------------------------------------------------------------
 
 // NetworkTipBackend samples the bulk backend's current network tip during backfill.
+// It is consulted only during backfill; once ingestion runs, captive core is the tip.
 type NetworkTipBackend interface {
 	NetworkTip(ctx context.Context) (uint32, error)
 }
 
+// CoreOpener hands back the live ingestion stream the loop consumes. The stream
+// OWNS its source's lifecycle (started on the first RawLedgers pull over the
+// unbounded range from the loop's resume ledger, torn down when the loop exits),
+// so there is no resume arg, no PrepareRange, and no closer for the caller to
+// sequence. Production returns a captive-core stream; tests pass a fake
+// LedgerStream.
+type CoreOpener interface {
+	OpenCore(ctx context.Context) (ledgerbackend.LedgerStream, error)
+}
+
 // StartConfig is run's resolved dependency bundle.
 type StartConfig struct {
 	// Exec drives backfill's RunBackfill; its Catalog/Logger are the shared ones.
 	Exec backfill.ExecConfig
 
-	// RetentionChunks is the backfill floor's width; 0 ⇒ the earliest-ledger floor only.
+	// RetentionChunks bounds the sliding retention floor's width — the backfill
+	// floor's width too (0 ⇒ the earliest-ledger floor only). run() assembles the
+	// lifecycle.Config from Exec + this, so the lifecycle and backfill can never
+	// diverge on the catalog/pool (the invariant is structural, not by comment).
 	RetentionChunks uint32
 
 	// NetworkTip samples the bulk backend's tip during backfill. Required.
 	NetworkTip NetworkTipBackend
 
+	// Core starts captive core and yields the ingestion getter. Required.
+	Core CoreOpener
+
 	// ServeReads begins serving reads; it must return promptly, not block. Required.
 	ServeReads func(ctx context.Context) error
 
@@ -207,7 +321,9 @@ const (
 	defaultTipMaxAttempts = 5
 )
 
-// withDefaults fills the tip-backoff defaults and the embedded ExecConfig defaults.
+// withDefaults fills the tip-backoff defaults and the embedded Exec defaults
+// (Workers -> GOMAXPROCS). The lifecycle.Config is assembled from Exec +
+// RetentionChunks in run().
 func (cfg StartConfig) withDefaults() StartConfig {
 	cfg.Exec = cfg.Exec.WithDefaults()
 	if cfg.TipBackoff <= 0 {
@@ -229,6 +345,9 @@ func (cfg StartConfig) validate() error {
 	if cfg.NetworkTip == nil {
 		return errors.New("nil StartConfig.NetworkTip")
 	}
+	if cfg.Core == nil {
+		return errors.New("nil StartConfig.Core")
+	}
 	if cfg.ServeReads == nil {
 		return errors.New("nil StartConfig.ServeReads")
 	}
diff --git a/cmd/stellar-rpc/internal/fullhistory/startup_test.go b/cmd/stellar-rpc/internal/fullhistory/startup_test.go
index 5f2e65c94..a9ba55b12 100644
--- a/cmd/stellar-rpc/internal/fullhistory/startup_test.go
+++ b/cmd/stellar-rpc/internal/fullhistory/startup_test.go
@@ -11,8 +11,11 @@ import (
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 
+	"github.com/stellar/go-stellar-sdk/ingest/ledgerbackend"
+
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/backfill"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog"
+	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry"
 	"github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk"
 )
 
@@ -75,16 +78,25 @@ func (r *recordingPlan) snapshot() [][2]chunk.ID {
 	return out
 }
 
-// startTestConfig builds a cold StartConfig over a real catalog with faked
-// boundaries; a non-nil recordPlan wires the runBackfill seam to record passes.
+// startTestConfig builds a StartConfig over a real catalog with faked boundaries.
+// core may be nil for backfillToTip tests (which call backfillToTip directly and
+// never reach validate or the ingestion path); run() tests pass a fakeCore. A
+// non-nil recordPlan wires the runBackfill seam to record passes without cold I/O.
 func startTestConfig(
-	t *testing.T, cat *catalog.Catalog, tip *fakeTipBackend, recordPlan *recordingPlan,
+	t *testing.T, cat *catalog.Catalog, tip *fakeTipBackend, core *fakeCore, recordPlan *recordingPlan,
 ) StartConfig {
 	t.Helper()
+	exec := backfill.ExecConfig{
+		Catalog: cat,
+		Logger:  silentLogger(),
+		Workers: 2,
+		Process: backfill.ProcessConfig{},
+	}
 	cfg := StartConfig{
-		Exec:            backfill.ExecConfig{Catalog: cat, Logger: silentLogger(), Workers: 2},
+		Exec:            exec,
 		RetentionChunks: 0,
 		NetworkTip:      tip,
+		Core:            core,
 		ServeReads:      func(context.Context) error { return nil },
 		TipBackoff:      time.Millisecond,
 		TipMaxAttempts:  3,
@@ -98,6 +110,37 @@ func startTestConfig(
 	return cfg
 }
 
+// fakeCore is a CoreOpener handing back a programmed LedgerStream. The loop opens
+// the stream at its resume ledger via RawLedgers(UnboundedRange(resume)), so the
+// resume the loop started from is the stream's recorded firstSeen (resumeSeen()).
+type fakeCore struct {
+	stream      *fakeCoreStream // programmed; nil → default block-on-ctx stream
+	openErr     error
+	openedCount atomic.Int32
+}
+
+func (c *fakeCore) OpenCore(context.Context) (ledgerbackend.LedgerStream, error) {
+	c.openedCount.Add(1)
+	if c.openErr != nil {
+		return nil, c.openErr
+	}
+	if c.stream == nil {
+		// Default: a live stream that blocks until ctx is canceled (the daemon's
+		// steady state). Tests that need a finite stream set c.stream.
+		c.stream = &fakeCoreStream{frames: map[uint32][]byte{}, blockOnCtx: true}
+	}
+	return c.stream, nil
+}
+
+// resumeSeen returns the resume ledger the loop opened the stream at (the range's
+// From()), 0 before the loop has pulled.
+func (c *fakeCore) resumeSeen() uint32 {
+	if c.stream == nil {
+		return 0
+	}
+	return c.stream.firstSeen.Load()
+}
+
 // pinGenesis pins earliest_ledger to genesis (as validateConfig does for a
 // "genesis" floor) so the first-start predicate classifies correctly.
 func pinGenesis(t *testing.T, cat *catalog.Catalog) {
@@ -145,17 +188,17 @@ func TestNetworkTip_CtxCancelAbortsWait(t *testing.T) {
 // backfillToTip — backfill loop edge cases.
 // ---------------------------------------------------------------------------
 
-// First start (genesis, no local history) with the tip absent is fatal.
-func TestBackfill_FirstStartTipAbsentFatal(t *testing.T) {
+// First start (genesis, no local history) with the tip absent errors out
+// (restartable — no sentinel; the supervisor retries).
+func TestBackfill_FirstStartTipAbsentErrors(t *testing.T) {
 	cat, _ := testCatalog(t)
 	pinGenesis(t, cat)
 	tip := &fakeTipBackend{err: errors.New("backend unreachable"), errFirst: 99}
-	cfg := startTestConfig(t, cat, tip, &recordingPlan{})
+	cfg := startTestConfig(t, cat, tip, nil, &recordingPlan{})
 
 	// Empty catalog ⇒ lastCommitted=1 < earliest=2 ⇒ first start with no progress.
 	_, err := backfillToTip(context.Background(), cfg, preGenesisLedger, chunk.FirstLedgerSeq)
 	require.Error(t, err)
-	require.ErrorIs(t, err, ErrFirstStartNoTip)
 }
 
 // First start (genesis) with the tip present computes range [chunk 0,
@@ -167,7 +210,7 @@ func TestBackfill_FirstStartTipPresentComputesRange(t *testing.T) {
 	tipLedger := chunk.ID(3).FirstLedger() + 100
 	rec := &recordingPlan{}
 	tip := &fakeTipBackend{tips: []uint32{tipLedger}}
-	cfg := startTestConfig(t, cat, tip, rec)
+	cfg := startTestConfig(t, cat, tip, nil, rec)
 
 	last, err := backfillToTip(context.Background(), cfg, preGenesisLedger, chunk.FirstLedgerSeq)
 	require.NoError(t, err)
@@ -186,7 +229,7 @@ func TestBackfill_YoungNetworkNoOp(t *testing.T) {
 	// Tip inside chunk 0 (no chunk has fully closed yet).
 	tip := &fakeTipBackend{tips: []uint32{chunk.FirstLedgerSeq + 50}}
 	rec := &recordingPlan{}
-	cfg := startTestConfig(t, cat, tip, rec)
+	cfg := startTestConfig(t, cat, tip, nil, rec)
 
 	last, err := backfillToTip(context.Background(), cfg, preGenesisLedger, chunk.FirstLedgerSeq)
 	require.NoError(t, err)
@@ -203,7 +246,7 @@ func TestBackfill_SteadyRestartNoOp(t *testing.T) {
 	tipLedger := chunk.ID(3).FirstLedger() + 10 // last complete chunk == 2
 	rec := &recordingPlan{}
 	tip := &fakeTipBackend{tips: []uint32{tipLedger}}
-	cfg := startTestConfig(t, cat, tip, rec)
+	cfg := startTestConfig(t, cat, tip, nil, rec)
 
 	last, err := backfillToTip(context.Background(), cfg, lastCommitted, chunk.FirstLedgerSeq)
 	require.NoError(t, err)
@@ -224,7 +267,7 @@ func TestBackfill_MidChunkResumeExclusion(t *testing.T) {
 	tipLedger := chunk.ID(5).LastLedger() // within one chunk, chunk 5 complete-at-tip
 	rec := &recordingPlan{}
 	tip := &fakeTipBackend{tips: []uint32{tipLedger}}
-	cfg := startTestConfig(t, cat, tip, rec)
+	cfg := startTestConfig(t, cat, tip, nil, rec)
 
 	last, err := backfillToTip(context.Background(), cfg, lastCommitted, chunk.FirstLedgerSeq)
 	require.NoError(t, err)
@@ -251,7 +294,7 @@ func TestBackfill_LongDowntimeRePass(t *testing.T) {
 		chunk.ID(6).FirstLedger() + 1, // last complete 5
 	}}
 	rec := &recordingPlan{}
-	cfg := startTestConfig(t, cat, tip, rec)
+	cfg := startTestConfig(t, cat, tip, nil, rec)
 
 	last, err := backfillToTip(context.Background(), cfg, preGenesisLedger, chunk.FirstLedgerSeq)
 	require.NoError(t, err)
@@ -274,7 +317,7 @@ func TestBackfill_RestartTipUnreachableDegrades(t *testing.T) {
 	lastCommitted := chunk.ID(2).LastLedger() // local progress exists
 	tip := &fakeTipBackend{err: errors.New("backend down"), errFirst: 99}
 	rec := &recordingPlan{}
-	cfg := startTestConfig(t, cat, tip, rec)
+	cfg := startTestConfig(t, cat, tip, nil, rec)
 
 	last, err := backfillToTip(context.Background(), cfg, lastCommitted, chunk.FirstLedgerSeq)
 	require.NoError(t, err, "local progress means no fatal")
@@ -295,7 +338,7 @@ func TestBackfill_LaggingBulkTipFoldsLastCommittedChunk(t *testing.T) {
 	tipLedger := chunk.ID(3).FirstLedger() + 10 // lagging bulk tip in chunk 3 (last complete 2)
 	rec := &recordingPlan{}
 	tip := &fakeTipBackend{tips: []uint32{tipLedger}}
-	cfg := startTestConfig(t, cat, tip, rec)
+	cfg := startTestConfig(t, cat, tip, nil, rec)
 
 	last, err := backfillToTip(context.Background(), cfg, lastCommitted, chunk.FirstLedgerSeq)
 	require.NoError(t, err)
@@ -308,57 +351,128 @@ func TestBackfill_LaggingBulkTipFoldsLastCommittedChunk(t *testing.T) {
 }
 
 // ---------------------------------------------------------------------------
-// run — the backfill + serve flow.
+// run — the backfill + serve + ingest flow.
 // ---------------------------------------------------------------------------
 
-// A young-network first start does no backfill then serves reads once.
-func TestRun_FirstStartBackfillThenServe(t *testing.T) {
+// A young-network first start does no backfill, opens the resume hot DB, starts
+// the (blocking) fake core, serves reads, and runs the ingestion loop — which
+// surfaces the ctx-canceled stream error on a clean shutdown (the daemon top
+// level classifies it as clean). The resume ledger is genesis (watermark+1).
+func TestRun_FirstStartServeIngestCleanShutdown(t *testing.T) {
 	cat, _ := testCatalog(t)
 	pinGenesis(t, cat)
 
 	served := atomic.Int32{}
+	core := &fakeCore{stream: &fakeCoreStream{frames: map[uint32][]byte{}, blockOnCtx: true}}
 	tip := &fakeTipBackend{tips: []uint32{chunk.FirstLedgerSeq + 10}} // young: no backfill
-	cfg := startTestConfig(t, cat, tip, nil)
+	cfg := startTestConfig(t, cat, tip, core, nil)
 	cfg.ServeReads = func(context.Context) error { served.Add(1); return nil }
 
-	require.NoError(t, run(context.Background(), cfg))
+	ctx, cancel := context.WithCancel(context.Background())
+	errCh := make(chan error, 1)
+	go func() { errCh <- run(ctx, cfg) }()
+
+	// Wait until the loop has opened the hot DB, started core, served, and parked on
+	// the blocking stream, then request a clean shutdown.
+	require.Eventually(t, func() bool { return served.Load() == 1 }, 2*time.Second, 5*time.Millisecond)
+	cancel()
+
+	select {
+	case err := <-errCh:
+		require.ErrorIs(t, err, context.Canceled, "clean shutdown surfaces the ctx-canceled error")
+	case <-time.After(3 * time.Second):
+		t.Fatal("run did not return after ctx cancel")
+	}
+
 	require.Equal(t, int32(1), served.Load(), "reads were served exactly once")
+	require.Equal(t, int32(1), core.openedCount.Load(), "captive core started once")
+	require.Equal(t, uint32(chunk.FirstLedgerSeq), core.resumeSeen(),
+		"resume ledger is genesis on a fresh start (watermark+1)")
+
+	// The resume chunk's hot key is "ready" (opened, boundary never crossed).
+	state, err := cat.HotState(chunk.IDFromLedger(chunk.FirstLedgerSeq))
+	require.NoError(t, err)
+	assert.Equal(t, geometry.HotReady, state)
 }
 
-// run surfaces a ServeReads error wrapped, as a restartable failure.
+// A ServeReads error is surfaced wrapped as a restartable failure (NOT clean).
+// run() opens the resume hot DB and starts core BEFORE serving; a serve error
+// after those returns via run()'s defer, which closes the DB (the loop never took
+// ownership), so a restart can reopen it — asserted by the reopen below.
 func TestRun_ServeReadsErrorSurfaces(t *testing.T) {
 	cat, _ := testCatalog(t)
 	pinGenesis(t, cat)
+	core := &fakeCore{stream: &fakeCoreStream{frames: map[uint32][]byte{}, blockOnCtx: true}}
 	tip := &fakeTipBackend{tips: []uint32{chunk.FirstLedgerSeq + 10}}
-	cfg := startTestConfig(t, cat, tip, nil)
+	cfg := startTestConfig(t, cat, tip, core, nil)
 	cfg.ServeReads = func(context.Context) error { return errors.New("rpc bind failed") }
 
 	err := run(context.Background(), cfg)
 	require.Error(t, err)
 	require.Contains(t, err.Error(), "serve reads")
+	require.NotErrorIs(t, err, context.Canceled, "a ServeReads error is restartable, not a clean shutdown")
+	require.Equal(t, int32(1), core.openedCount.Load(), "core was started before serving")
+
+	// run() opened the resume hot DB before serving and closed it on the error path
+	// (the loop never took ownership): reopening it succeeds (LOCK released).
+	db, err := openHotDBForChunk(cat, chunk.IDFromLedger(chunk.FirstLedgerSeq), silentLogger())
+	require.NoError(t, err, "the resume hot DB is reopenable — run released its LOCK")
+	require.NoError(t, db.Close())
+}
+
+// The resume hot DB and core are opened BEFORE reads are served (the design's
+// fail-fast order): by the time ServeReads runs, the resume chunk's hot key is
+// already "ready" and core has started — so a broken hot tier / core fails startup
+// instead of serving behind a crash-looping loop. Asserted from inside ServeReads,
+// which then errors to avoid entering the blocking loop.
+func TestRun_OpensHotDBAndCoreBeforeServe(t *testing.T) {
+	cat, _ := testCatalog(t)
+	pinGenesis(t, cat)
+	resumeChunk := chunk.IDFromLedger(chunk.FirstLedgerSeq) // fresh start ⇒ resume at genesis
+	core := &fakeCore{stream: &fakeCoreStream{frames: map[uint32][]byte{}, blockOnCtx: true}}
+	tip := &fakeTipBackend{tips: []uint32{chunk.FirstLedgerSeq + 10}} // young ⇒ no backfill
+	cfg := startTestConfig(t, cat, tip, core, nil)
+
+	var stateAtServe geometry.HotState
+	var coreAtServe int32
+	cfg.ServeReads = func(context.Context) error {
+		st, herr := cat.HotState(resumeChunk)
+		require.NoError(t, herr)
+		stateAtServe = st
+		coreAtServe = core.openedCount.Load()
+		return errors.New("stop before the blocking loop")
+	}
+
+	err := run(context.Background(), cfg)
+	require.Error(t, err)
+	require.Contains(t, err.Error(), "serve reads")
+	assert.Equal(t, geometry.HotReady, stateAtServe, "resume hot DB is open+ready before serve")
+	assert.Equal(t, int32(1), coreAtServe, "core is opened before serve")
 }
 
-// run fatals with ErrFirstStartNoTip on a first start with an
-// unavailable tip; reads are never served.
-func TestRun_FirstStartNoTipFatal(t *testing.T) {
+// run errors on a first start with an unavailable tip (restartable, no sentinel);
+// reads are never served and ingestion never starts.
+func TestRun_FirstStartNoTipErrors(t *testing.T) {
 	cat, _ := testCatalog(t)
 	pinGenesis(t, cat)
 	served := atomic.Int32{}
+	core := &fakeCore{}
 	tip := &fakeTipBackend{err: errors.New("unreachable"), errFirst: 99}
-	cfg := startTestConfig(t, cat, tip, nil)
+	cfg := startTestConfig(t, cat, tip, core, nil)
 	cfg.ServeReads = func(context.Context) error { served.Add(1); return nil }
 
 	err := run(context.Background(), cfg)
-	require.ErrorIs(t, err, ErrFirstStartNoTip)
-	require.Zero(t, served.Load(), "reads are never served when backfill fatals")
+	require.Error(t, err)
+	require.Zero(t, served.Load(), "reads are never served when backfill errors")
+	require.Zero(t, core.openedCount.Load(), "core never starts when backfill errors")
 }
 
-// run surfaces a missing earliest_ledger pin loudly (a wiring error,
-// not a first start to mis-classify).
+// run surfaces a missing earliest_ledger pin loudly (a wiring error, not a first
+// start to mis-classify).
 func TestRun_RequiresEarliestPin(t *testing.T) {
 	cat, _ := testCatalog(t)
 	// No pinGenesis.
-	cfg := startTestConfig(t, cat, &fakeTipBackend{tips: []uint32{50_000}}, nil)
+	cfg := startTestConfig(t, cat, &fakeTipBackend{tips: []uint32{50_000}}, &fakeCore{}, nil)
 	err := run(context.Background(), cfg)
 	require.Error(t, err)
 	require.Contains(t, err.Error(), "earliest_ledger pinned")
@@ -367,13 +481,18 @@ func TestRun_RequiresEarliestPin(t *testing.T) {
 // run validates its injected boundaries.
 func TestRun_ValidatesConfig(t *testing.T) {
 	cat, _ := testCatalog(t)
-	base := startTestConfig(t, cat, &fakeTipBackend{tips: []uint32{50_000}}, nil)
+	base := startTestConfig(t, cat, &fakeTipBackend{tips: []uint32{50_000}}, &fakeCore{}, nil)
 
 	t.Run("nil NetworkTip", func(t *testing.T) {
 		cfg := base
 		cfg.NetworkTip = nil
 		require.Error(t, run(context.Background(), cfg))
 	})
+	t.Run("nil Core", func(t *testing.T) {
+		cfg := base
+		cfg.Core = nil
+		require.Error(t, run(context.Background(), cfg))
+	})
 	t.Run("nil ServeReads", func(t *testing.T) {
 		cfg := base
 		cfg.ServeReads = nil
@@ -436,7 +555,7 @@ func TestBackfill_ReportsPassAndProgress(t *testing.T) {
 	rp := &recordingPlan{}
 	tipLedger := chunk.ID(3).LastLedger() + 5
 	tip := &fakeTipBackend{tips: []uint32{tipLedger}}
-	start := startTestConfig(t, cat, tip, rp)
+	start := startTestConfig(t, cat, tip, nil, rp)
 	metrics := newRecordingMetrics()
 	start.Exec.Metrics = metrics