diff --git a/design-docs/full-history-design-explorer.html b/design-docs/full-history-design-explorer.html
new file mode 100644
index 000000000..e16290f23
--- /dev/null
+++ b/design-docs/full-history-design-explorer.html
@@ -0,0 +1,1864 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>Full-History Design — Interactive Explorer</title>
+<style>
+:root {
+  --bg: #0d1117;
+  --panel: #161c26;
+  --panel2: #1c2430;
+  --line: #2a3445;
+  --text: #d8dee9;
+  --muted: #8b95a7;
+  --head: #f0f4fa;
+  --accent: #58a6ff;
+  --freezing: #e3b341;
+  --frozen: #58a6ff;
+  --pruning: #f47067;
+  --absent: #6e7787;
+  --ready: #3fb98f;
+  --transient: #b083f0;
+  --hot: #2dd4bf;
+  --mono: ui-monospace, SFMono-Regular, "SF Mono", Menlo, Consolas, monospace;
+}
+* { box-sizing: border-box; }
+html { scroll-behavior: smooth; }
+body {
+  margin: 0; background: var(--bg); color: var(--text);
+  font: 16px/1.6 -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
+}
+a { color: var(--accent); text-decoration: none; }
+a:hover { text-decoration: underline; }
+code, .mono { font-family: var(--mono); font-size: 0.88em; }
+code { background: #232c3b; padding: 1px 5px; border-radius: 4px; color: #cfe3ff; white-space: nowrap; }
+h1, h2, h3, h4 { color: var(--head); line-height: 1.25; }
+h2 { font-size: 1.6rem; margin: 0 0 4px; padding-top: 8px; }
+h3 { font-size: 1.12rem; margin: 26px 0 8px; }
+p { margin: 10px 0; }
+.kicker { color: var(--accent); font-size: 0.78rem; font-weight: 700; letter-spacing: 0.14em; text-transform: uppercase; margin-bottom: 2px; }
+.lead { color: var(--muted); }
+.spec-ref { font-size: 0.76rem; color: var(--muted); margin: 0 0 6px; }
+.spec-ref a { color: var(--muted); text-decoration: underline; text-underline-offset: 2px; }
+.spec-ref a:hover { color: var(--accent); }
+.layout { display: flex; max-width: 1280px; margin: 0 auto; }
+nav#toc {
+  position: sticky; top: 0; align-self: flex-start; flex: 0 0 230px;
+  height: 100vh; overflow-y: auto; padding: 28px 10px 28px 22px;
+  border-right: 1px solid var(--line); font-size: 0.86rem;
+}
+nav#toc .toc-title { color: var(--head); font-weight: 700; margin-bottom: 10px; font-size: 0.95rem; }
+nav#toc a { display: block; color: var(--muted); padding: 4px 10px; border-left: 2px solid transparent; border-radius: 0 6px 6px 0; }
+nav#toc a:hover { color: var(--text); text-decoration: none; background: var(--panel); }
+nav#toc a.active { color: var(--accent); border-left-color: var(--accent); background: var(--panel); }
+main { flex: 1; min-width: 0; padding: 28px 36px 120px; }
+header.doc-head { margin-bottom: 18px; }
+header.doc-head h1 { font-size: 2.1rem; margin: 4px 0 6px; }
+header.doc-head .sub { color: var(--muted); max-width: 760px; }
+section { margin-top: 56px; scroll-margin-top: 18px; }
+section > p, section > ul { max-width: 800px; }
+ul { padding-left: 22px; }
+li { margin: 5px 0; }
+.widget {
+  background: var(--panel); border: 1px solid var(--line); border-radius: 12px;
+  padding: 18px 20px; margin: 18px 0;
+}
+.widget .w-title { font-weight: 700; color: var(--head); margin-bottom: 4px; }
+.widget .w-hint { color: var(--muted); font-size: 0.84rem; margin-bottom: 14px; }
+.btn {
+  background: var(--panel2); color: var(--text); border: 1px solid var(--line);
+  border-radius: 8px; padding: 6px 14px; font-size: 0.88rem; cursor: pointer;
+}
+.btn:hover { border-color: var(--accent); color: var(--head); }
+.btn.primary { background: #16314f; }
+.btn.on { background: #16314f; border-color: var(--accent); color: #cfe3ff; }
+.btn:disabled { opacity: 0.4; cursor: default; }
+.btnrow { display: flex; flex-wrap: wrap; gap: 8px; margin: 10px 0; }
+.kv { font-family: var(--mono); font-size: 0.78rem; padding: 1px 8px; border-radius: 999px; border: 1px solid; white-space: nowrap; }
+.st-freezing { color: var(--freezing); border-color: var(--freezing); background: rgba(227,179,65,.08); }
+.st-frozen   { color: var(--frozen);   border-color: var(--frozen);   background: rgba(88,166,255,.08); }
+.st-pruning  { color: var(--pruning);  border-color: var(--pruning);  background: rgba(244,112,103,.08); }
+.st-absent   { color: var(--absent);   border-color: var(--absent);   background: rgba(110,119,135,.08); }
+.st-ready    { color: var(--ready);    border-color: var(--ready);    background: rgba(63,185,143,.08); }
+.st-transient{ color: var(--transient);border-color: var(--transient);background: rgba(176,131,240,.08); }
+.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 14px; margin: 16px 0; }
+.card { background: var(--panel); border: 1px solid var(--line); border-radius: 12px; padding: 16px 18px; }
+.card h4 { margin: 0 0 6px; font-size: 1rem; }
+.card p { margin: 0; color: var(--muted); font-size: 0.9rem; }
+.card .tag { float: right; font-size: 0.7rem; color: var(--accent); border: 1px solid var(--accent); border-radius: 999px; padding: 0 8px; opacity: .8; }
+table.t { border-collapse: collapse; width: 100%; font-size: 0.88rem; margin: 12px 0; }
+table.t th { text-align: left; color: var(--muted); font-weight: 600; border-bottom: 1px solid var(--line); padding: 6px 10px; }
+table.t td { border-bottom: 1px solid #202938; padding: 7px 10px; vertical-align: top; }
+table.t tr:last-child td { border-bottom: 0; }
+.note {
+  border-left: 3px solid var(--accent); background: rgba(88,166,255,.06);
+  padding: 10px 14px; border-radius: 0 8px 8px 0; margin: 14px 0; font-size: 0.92rem; max-width: 800px;
+}
+.note.warn { border-left-color: var(--freezing); background: rgba(227,179,65,.06); }
+.note.danger { border-left-color: var(--pruning); background: rgba(244,112,103,.06); }
+.legend { display: flex; flex-wrap: wrap; gap: 14px; font-size: 0.8rem; color: var(--muted); margin: 10px 0 0; }
+.legend .li { display: flex; align-items: center; gap: 6px; }
+.dot { width: 12px; height: 12px; border-radius: 3px; display: inline-block; }
+/* geometry */
+.geo-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 18px; }
+.geo-readout { font-family: var(--mono); font-size: 0.86rem; }
+.geo-readout div { padding: 3px 0; }
+.geo-readout b { color: var(--head); font-weight: 600; }
+.geo-readout .dim { color: var(--muted); }
+.bar { position: relative; height: 34px; background: var(--panel2); border: 1px solid var(--line); border-radius: 7px; margin: 6px 0 14px; overflow: hidden; }
+.bar .fill { position: absolute; top: 0; bottom: 0; left: 0; background: rgba(88,166,255,.18); }
+.bar .marker { position: absolute; top: 0; bottom: 0; width: 2px; background: var(--hot); }
+.bar .lbl { position: absolute; inset: 0; display: flex; align-items: center; justify-content: center; font-family: var(--mono); font-size: 0.74rem; color: var(--muted); pointer-events: none; }
+.bar-cap { font-size: 0.78rem; color: var(--muted); display: flex; justify-content: space-between; font-family: var(--mono); }
+input[type=range] { width: 100%; accent-color: var(--accent); }
+input[type=number], input[type=text] {
+  background: var(--panel2); border: 1px solid var(--line); color: var(--text);
+  border-radius: 8px; padding: 6px 10px; font-family: var(--mono); font-size: 0.9rem; width: 160px;
+}
+/* pipeline svg */
+.pipe-wrap { overflow-x: auto; }
+svg.pipe { width: 100%; min-width: 720px; height: auto; display: block; }
+svg.pipe rect.box { fill: var(--panel2); stroke: var(--line); rx: 9; }
+svg.pipe rect.box.src { stroke: var(--hot); }
+svg.pipe rect.box.cold { stroke: var(--frozen); }
+svg.pipe rect.box.meta { stroke: var(--freezing); fill: rgba(227,179,65,.05); }
+svg.pipe text { fill: var(--text); font: 13px -apple-system, sans-serif; }
+svg.pipe text.small { fill: var(--muted); font-size: 11px; }
+svg.pipe text.h { fill: var(--head); font-weight: 600; }
+svg.pipe path.arrow { stroke: #5a6a85; stroke-width: 1.6; fill: none; marker-end: url(#arr); }
+/* state machines */
+.sm { display: flex; align-items: center; flex-wrap: wrap; gap: 4px; margin: 10px 0 6px; }
+.sm .state {
+  font-family: var(--mono); font-size: 0.82rem; padding: 6px 13px; border-radius: 999px;
+  border: 1.5px solid var(--absent); color: var(--text); cursor: pointer; background: var(--panel2);
+}
+.sm .state:hover { filter: brightness(1.25); }
+.sm .state.sel { outline: 2px solid var(--accent); outline-offset: 2px; }
+.sm .arr { color: var(--muted); padding: 0 2px; }
+.sm-detail { background: var(--panel2); border: 1px solid var(--line); border-radius: 8px; padding: 10px 14px; font-size: 0.88rem; color: var(--muted); min-height: 44px; }
+.sm-detail b { color: var(--head); }
+/* step widgets (crash sim, boundary) */
+.steps-grid { display: grid; grid-template-columns: minmax(280px, 5fr) minmax(300px, 6fr); gap: 16px; align-items: start; }
+.step-list { display: flex; flex-direction: column; gap: 6px; }
+.step {
+  border: 1px solid var(--line); border-radius: 9px; padding: 8px 12px; cursor: pointer;
+  background: var(--panel2); font-size: 0.88rem; display: flex; gap: 10px; align-items: baseline;
+}
+.step:hover { border-color: var(--accent); }
+.step.sel { border-color: var(--accent); background: #16314f33; }
+.step.done { opacity: 0.92; }
+.step .n {
+  flex: 0 0 22px; height: 22px; border-radius: 50%; background: var(--panel);
+  border: 1px solid var(--line); color: var(--muted); font-size: 0.72rem;
+  display: inline-flex; align-items: center; justify-content: center; align-self: center;
+}
+.step.sel .n { border-color: var(--accent); color: var(--accent); }
+.step .actor { font-size: 0.68rem; text-transform: uppercase; letter-spacing: .08em; border-radius: 4px; padding: 0 6px; align-self: center; }
+.actor.ingestion { color: var(--hot); border: 1px solid var(--hot); }
+.actor.lifecycle { color: var(--transient); border: 1px solid var(--transient); }
+.state-panel { background: var(--panel2); border: 1px solid var(--line); border-radius: 10px; padding: 14px 16px; position: sticky; top: 16px; }
+.state-panel h5 { margin: 0 0 8px; color: var(--head); font-size: 0.86rem; text-transform: uppercase; letter-spacing: .06em; }
+.state-panel .grp { margin-bottom: 14px; }
+.mrow { display: flex; justify-content: space-between; gap: 10px; font-family: var(--mono); font-size: 0.78rem; padding: 3px 6px; border-radius: 6px; }
+.mrow .k { color: var(--text); overflow-wrap: anywhere; }
+.mrow.chg { background: rgba(88,166,255,.12); outline: 1px solid rgba(88,166,255,.4); }
+.mrow.gone { opacity: .45; text-decoration: line-through; }
+.frow { display: flex; align-items: baseline; gap: 8px; font-family: var(--mono); font-size: 0.78rem; padding: 3px 6px; border-radius: 6px; }
+.frow.chg { background: rgba(88,166,255,.12); outline: 1px solid rgba(88,166,255,.4); }
+.frow.gone { opacity: .45; text-decoration: line-through; }
+.fdot { width: 9px; height: 9px; border-radius: 50%; flex: 0 0 9px; position: relative; top: 0px; }
+.fdot.ok { background: var(--ready); }
+.fdot.partial { background: var(--freezing); }
+.fdot.dir { background: var(--hot); }
+.crash-box { border: 1px dashed var(--pruning); border-radius: 9px; padding: 10px 14px; margin-top: 12px; font-size: 0.86rem; }
+.crash-box .t { color: var(--pruning); font-weight: 700; font-size: 0.8rem; letter-spacing: .04em; }
+.step-desc { font-size: 0.9rem; color: var(--text); margin-bottom: 12px; }
+.tabrow { display: flex; gap: 8px; flex-wrap: wrap; margin-bottom: 14px; }
+/* rolling sim */
+.roll-controls { display: flex; flex-wrap: wrap; gap: 10px; align-items: center; margin-bottom: 12px; }
+.win-group { border: 1px solid var(--line); border-radius: 10px; padding: 10px 12px; margin: 10px 0; background: var(--panel2); }
+.win-head { display: flex; flex-wrap: wrap; justify-content: space-between; gap: 6px; font-family: var(--mono); font-size: 0.78rem; color: var(--muted); margin-bottom: 8px; }
+.win-head .fname { color: var(--frozen); }
+.chunk-row { display: flex; gap: 5px; flex-wrap: wrap; }
+.chunk {
+  width: 30px; height: 30px; border-radius: 6px; border: 1px solid var(--line);
+  background: #11161f; position: relative; font-size: 0; cursor: default;
+}
+.chunk.live { border-color: var(--ready); background: rgba(63,185,143,.25); animation: pulse 1.4s ease-in-out infinite; }
+.chunk.hot { border-color: var(--hot); background: rgba(45,212,191,.18); }
+.chunk.frozen { border-color: var(--frozen); background: rgba(88,166,255,.22); }
+.chunk.pruned { border-color: #222a38; background: transparent; }
+.chunk .bin-tick { position: absolute; right: 2px; bottom: 2px; width: 7px; height: 7px; border-radius: 2px; background: var(--freezing); }
+.chunk .cov { position: absolute; left: 0; right: 0; bottom: -7px; height: 3px; border-radius: 2px; background: var(--frozen); }
+@keyframes pulse { 0%,100% { box-shadow: 0 0 0 0 rgba(63,185,143,.35);} 50% { box-shadow: 0 0 0 5px rgba(63,185,143,0);} }
+.roll-log { font-family: var(--mono); font-size: 0.76rem; color: var(--muted); background: #11161f; border: 1px solid var(--line); border-radius: 8px; padding: 10px 12px; margin-top: 12px; max-height: 170px; overflow-y: auto; }
+.roll-log .run { color: var(--head); }
+.roll-log div { padding: 1px 0; }
+.badge { font-size: 0.68rem; border-radius: 999px; padding: 1px 8px; border: 1px solid; }
+.badge.final { color: var(--ready); border-color: var(--ready); }
+.badge.current { color: var(--hot); border-color: var(--hot); }
+.badge.gone { color: var(--absent); border-color: var(--absent); }
+/* resolver */
+.rs-win { margin: 14px 0; }
+.rs-track { position: relative; height: 48px; background: #11161f; border: 1px solid var(--line); border-radius: 8px; }
+.rs-seg { position: absolute; height: 14px; border-radius: 4px; font-size: 0.66rem; font-family: var(--mono); display: flex; align-items: center; justify-content: center; color: #061018; overflow: hidden; white-space: nowrap; }
+.rs-seg.stored { top: 6px; background: var(--frozen); }
+.rs-seg.desired { top: 27px; background: transparent; border: 1.5px dashed var(--freezing); color: var(--freezing); }
+.rs-cap { display: flex; justify-content: space-between; font-family: var(--mono); font-size: 0.72rem; color: var(--muted); margin-top: 3px; }
+.rs-winlabel { font-family: var(--mono); font-size: 0.8rem; color: var(--head); margin-bottom: 4px; display: flex; gap: 10px; align-items: center;}
+.plan-box { background: #11161f; border: 1px solid var(--line); border-radius: 10px; padding: 12px 16px; font-family: var(--mono); font-size: 0.8rem; margin-top: 12px; }
+.plan-box .empty { color: var(--ready); }
+.plan-box .cb { color: var(--hot); }
+.plan-box .ib { color: var(--frozen); }
+.plan-box .src { color: var(--muted); }
+.plan-box div { padding: 2px 0; }
+/* reader chain */
+.chain { display: flex; flex-direction: column; gap: 0; margin-top: 12px; max-width: 660px; }
+.chain .nd { border: 1px solid var(--line); background: var(--panel2); border-radius: 9px; padding: 8px 14px; font-size: 0.87rem; display: flex; gap: 10px; align-items: baseline; }
+.chain .nd .ic { font-family: var(--mono); }
+.chain .nd.ok { border-color: var(--ready); }
+.chain .nd.fail { border-color: var(--pruning); }
+.chain .nd.retry { border-color: var(--freezing); }
+.chain .nd.dim { opacity: .38; }
+.chain .lnk { width: 2px; height: 14px; background: var(--line); margin-left: 26px; }
+.chain .nd small { color: var(--muted); display: block; }
+/* details/accordion */
+details.inv { background: var(--panel); border: 1px solid var(--line); border-radius: 10px; padding: 0; margin: 10px 0; }
+details.inv summary { cursor: pointer; padding: 12px 16px; font-weight: 600; color: var(--head); list-style: none; display: flex; gap: 10px; align-items: baseline; }
+details.inv summary::-webkit-details-marker { display: none; }
+details.inv summary .chev { color: var(--muted); transition: transform .15s; }
+details.inv[open] summary .chev { transform: rotate(90deg); }
+details.inv .body { padding: 0 16px 14px; color: var(--muted); font-size: 0.92rem; }
+details.inv .body b { color: var(--text); }
+.invchip { font-size: 0.7rem; font-family: var(--mono); border: 1px solid var(--accent); color: var(--accent); border-radius: 5px; padding: 0 6px; }
+/* misc */
+.two-col { display: grid; grid-template-columns: 1fr 1fr; gap: 18px; }
+.dirtree { font-family: var(--mono); font-size: 0.82rem; line-height: 1.75; background: #11161f; border: 1px solid var(--line); border-radius: 10px; padding: 14px 18px; overflow-x: auto; }
+.dirtree .c { color: var(--muted); }
+.dirtree .hl { color: var(--hot); }
+.dirtree .cold { color: var(--frozen); }
+.dirtree .tr { color: var(--freezing); }
+.partition { display: flex; gap: 0; border: 1px solid var(--line); border-radius: 12px; overflow: hidden; margin: 16px 0; min-height: 130px; }
+.partition .half { flex: 1; padding: 14px 18px; }
+.partition .half h4 { margin: 0 0 6px; font-size: 0.95rem; }
+.partition .half p { font-size: 0.84rem; color: var(--muted); margin: 4px 0; }
+.partition .lc { border-right: 2px dashed var(--line); background: rgba(45,212,191,.04); }
+.partition .rc { background: rgba(176,131,240,.04); }
+footer { margin-top: 80px; color: var(--muted); font-size: 0.82rem; border-top: 1px solid var(--line); padding-top: 16px; }
+@media (max-width: 980px) {
+  nav#toc { display: none; }
+  .geo-grid, .steps-grid, .two-col { grid-template-columns: 1fr; }
+  main { padding: 20px 18px 80px; }
+}
+</style>
+</head>
+<body>
+<div class="layout">
+<nav id="toc">
+  <div class="toc-title">Full-History Design</div>
+  <a href="#big-picture">The big picture</a>
+  <a href="#geometry">Geometry</a>
+  <a href="#guarantees">The four guarantees</a>
+  <a href="#data-model">Data model</a>
+  <a href="#lifecycles">Artifact lifecycles</a>
+  <a href="#write-protocol">One write protocol</a>
+  <a href="#derived-progress">Derived progress</a>
+  <a href="#rolling-index">The rolling index</a>
+  <a href="#boundary">A boundary, end to end</a>
+  <a href="#resolver">Backfill &amp; the resolver</a>
+  <a href="#startup">Startup</a>
+  <a href="#concurrency">Concurrency</a>
+  <a href="#reader">Reader contract</a>
+  <a href="#correctness">Correctness</a>
+</nav>
+<main>
+
+<header class="doc-head">
+  <div class="kicker">Full-History RPC · Interactive Design Explorer</div>
+  <h1>The Full-History Streaming Design</h1>
+  <div class="sub">
+    How the full-history daemon backfills old history, ingests live ledgers, freezes immutable history,
+    and serves transactions by hash — explained with interactive models you can poke at.
+    Companion to the <a href="full-history-streaming-workflow.md">streaming workflow</a> and
+    <a href="gettransaction-full-history-design.md">getTransaction</a> design docs; the markdown remains the
+    normative spec. Each section links to the doc that owns it.
+  </div>
+</header>
+
+<!-- ================================================================ -->
+<section id="big-picture">
+  <h2>The big picture</h2>
+  <div class="spec-ref">Normative spec: <a href="full-history-streaming-workflow.md#overview">streaming — Overview</a> · <a href="full-history-streaming-workflow.md#daemon-flow">How the daemon runs</a></div>
+  <p>
+    Full-history RPC runs as <b>one daemon in one mode</b>. There is no separate backfill command and no
+    explicit step for the operator: on startup the daemon figures out how far behind the network tip it is
+    and backfills to it automatically, then serves live ledgers as they're produced.
+  </p>
+  <div class="cards">
+    <div class="card"><span class="tag">startup</span><h4>1 · Backfill</h4>
+      <p>Runs bulk backfill as a subroutine: any chunk inside the retention window that isn't already
+      frozen is pulled from the configured LedgerBackend (BSB by default) — skipping the tip chunk that
+      captive core is actively ingesting. Covers first-ever start, downtime gaps, and retention widening.</p></div>
+    <div class="card"><span class="tag">steady state</span><h4>2 · Ingest</h4>
+      <p>Streams live ledgers from <code>CaptiveStellarCore</code> into <b>one hot RocksDB per chunk</b> —
+      ledgers, tx hashes, and events as column families, written as <b>one atomic synced WriteBatch per
+      ledger</b>. A ledger is either fully in the hot DB or absent.</p></div>
+    <div class="card"><span class="tag">steady state</span><h4>3 · Freeze &amp; prune</h4>
+      <p>A background goroutine wakes on each chunk boundary and runs one <b>run</b>: freeze the completed
+      chunk to immutable files, rebuild the current tx-hash index to fold it in, discard hot DBs the cold
+      artifacts now serve, and prune everything superseded or past retention.</p></div>
+  </div>
+
+  <div class="widget">
+    <div class="w-title">Data flow</div>
+    <div class="w-hint">Two sources feed one set of artifacts. Whatever produced the bytes, the artifacts — and the catalog keys that catalog them — are identical.</div>
+    <div class="pipe-wrap">
+    <svg class="pipe" viewBox="0 0 880 400" role="img" aria-label="Data flow diagram">
+      <defs>
+        <marker id="arr" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="7" markerHeight="7" orient="auto-start-reverse">
+          <path d="M0,0 L10,5 L0,10 z" fill="#5a6a85"></path>
+        </marker>
+      </defs>
+      <!-- sources -->
+      <rect class="box src" x="16" y="44" width="178" height="56" rx="9"></rect>
+      <text class="h" x="105" y="68" text-anchor="middle">CaptiveStellarCore</text>
+      <text class="small" x="105" y="86" text-anchor="middle">live ledgers at the tip</text>
+
+      <rect class="box src" x="16" y="252" width="178" height="56" rx="9"></rect>
+      <text class="h" x="105" y="276" text-anchor="middle">Object store (BSB)</text>
+      <text class="small" x="105" y="294" text-anchor="middle">or any conformant backend</text>
+
+      <!-- hot tier -->
+      <rect class="box" x="262" y="26" width="220" height="96" rx="9"></rect>
+      <text class="h" x="372" y="50" text-anchor="middle">Hot RocksDB · one per chunk</text>
+      <text class="small" x="372" y="70" text-anchor="middle">column families:</text>
+      <text class="small" x="372" y="88" text-anchor="middle">ledgers · txhash · events</text>
+      <text class="small" x="372" y="112" text-anchor="middle" style="fill:#2dd4bf">serves reads for the live chunk</text>
+
+      <!-- processChunk -->
+      <rect class="box" x="262" y="244" width="220" height="72" rx="9"></rect>
+      <text class="h" x="372" y="272" text-anchor="middle">processChunk</text>
+      <text class="small" x="372" y="292" text-anchor="middle">one streaming pass over 10,000 LCMs</text>
+
+      <!-- cold artifacts -->
+      <rect class="box cold" x="556" y="22" width="150" height="40" rx="9"></rect>
+      <text x="631" y="47" text-anchor="middle" class="mono">{chunk}.pack</text>
+      <rect class="box cold" x="556" y="74" width="150" height="40" rx="9"></rect>
+      <text x="631" y="99" text-anchor="middle">events segment</text>
+      <rect class="box cold" x="556" y="126" width="150" height="40" rx="9"></rect>
+      <text x="631" y="151" text-anchor="middle" class="mono">{chunk}.bin</text>
+      <text class="small" x="631" y="186" text-anchor="middle">per-chunk, write-once</text>
+
+      <rect class="box cold" x="556" y="218" width="290" height="56" rx="9"></rect>
+      <text class="h" x="701" y="242" text-anchor="middle">per-window .idx (streamhash MPHF)</text>
+      <text class="small" x="701" y="260" text-anchor="middle">rebuilt from .bin files on every chunk boundary</text>
+
+      <!-- arrows -->
+      <path class="arrow" d="M194,72 L258,72" style="stroke:#5a6a85"></path>
+      <text class="small" x="226" y="62" text-anchor="middle">stream</text>
+      <path class="arrow" d="M194,280 L258,280" style="stroke:#5a6a85"></path>
+      <text class="small" x="226" y="270" text-anchor="middle">backfill</text>
+      <path class="arrow" d="M372,126 L372,240" style="stroke:#5a6a85"></path>
+      <text class="small" x="382" y="180">freeze at the chunk</text>
+      <text class="small" x="382" y="196">boundary (hot branch)</text>
+      <path class="arrow" d="M486,262 C516,240 524,120 552,52" style="stroke:#5a6a85"></path>
+      <path class="arrow" d="M486,270 C516,260 528,150 552,100" style="stroke:#5a6a85"></path>
+      <path class="arrow" d="M486,278 C512,272 530,190 552,150" style="stroke:#5a6a85"></path>
+      <path class="arrow" d="M631,170 L631,214" style="stroke:#5a6a85"></path>
+      <text class="small" x="641" y="196">k-way merge</text>
+
+      <!-- meta band -->
+      <rect class="box meta" x="16" y="346" width="830" height="42" rx="9"></rect>
+      <text x="431" y="372" text-anchor="middle" class="small" style="fill:#e3b341">
+        catalog RocksDB — catalogs every file and directory above: mark-then-write keys, synced WAL, no directory is ever listed to find work
+      </text>
+    </svg>
+    </div>
+  </div>
+
+  <div class="note">
+    <b>The one-sentence summary:</b> data is born hot (one RocksDB per chunk), becomes cold and immutable
+    at the chunk boundary (<code>.pack</code> / events segment / <code>.bin</code> → rolled into a per-window
+    <code>.idx</code>), and every transition is recorded in a catalog key <em>before</em> the bytes move —
+    so a crash at any instant is recoverable from keys alone.
+  </div>
+</section>
+
+<!-- ================================================================ -->
+<section id="geometry">
+  <h2>Geometry</h2>
+  <div class="spec-ref">Normative spec: <a href="full-history-streaming-workflow.md#geometry">streaming — Geometry</a> · <a href="gettransaction-full-history-design.md#4-geometry">transactions — §4 Geometry</a></div>
+  <p>
+    The chain starts at ledger 2 (<code>GENESIS_LEDGER</code>). Two units organize all storage:
+  </p>
+  <ul>
+    <li><b>Chunk</b> — 10,000 ledgers (hardcoded). The atomic unit of ingestion, freezing, and crash recovery.</li>
+    <li><b>Window</b> — 1,000 chunks = 10,000,000 ledgers (hardcoded). The unit of
+      the rolling tx-hash index.</li>
+  </ul>
+
+  <div class="widget">
+    <div class="w-title">Geometry explorer</div>
+    <div class="w-hint">Drag the slider or type any ledger sequence to see where it lives. All ids are zero-padded <code>%08d</code>; file buckets group 1000 chunks (<code>%05d</code>).</div>
+    <div style="display:flex; gap:14px; align-items:center; flex-wrap:wrap; margin-bottom:8px;">
+      <label class="mono" style="color:var(--muted); font-size:.84rem;">ledger seq</label>
+      <input type="number" id="geo-seq" min="2" max="60000001" value="53510001">
+      <div class="btnrow" style="margin:0">
+        <button class="btn" data-geo="2">genesis</button>
+        <button class="btn" data-geo="10000001">window 0's last ledger</button>
+        <button class="btn" data-geo="53510001">the doc's example</button>
+        <button class="btn" data-geo="20000002">window 2's first ledger</button>
+      </div>
+    </div>
+    <input type="range" id="geo-slider" min="2" max="60000001" value="53510001" step="1">
+    <div class="geo-grid" style="margin-top:14px">
+      <div>
+        <div style="font-size:.78rem;color:var(--muted);margin-bottom:2px">WINDOW <span class="mono" id="geo-win-label"></span> — 1000 chunks · 10,000,000 ledgers</div>
+        <div class="bar" id="geo-win-bar"><div class="fill"></div><div class="marker"></div><div class="lbl"></div></div>
+        <div class="bar-cap"><span id="geo-win-lo"></span><span id="geo-win-hi"></span></div>
+        <div style="font-size:.78rem;color:var(--muted);margin:10px 0 2px">CHUNK <span class="mono" id="geo-chunk-label"></span> — 10,000 ledgers</div>
+        <div class="bar" id="geo-chunk-bar"><div class="fill"></div><div class="marker"></div><div class="lbl"></div></div>
+        <div class="bar-cap"><span id="geo-chunk-lo"></span><span id="geo-chunk-hi"></span></div>
+      </div>
+      <div class="geo-readout" id="geo-readout"></div>
+    </div>
+  </div>
+
+  <div class="note">
+    The file-bucket size (fixed at 1000 chunks) and the window size (1,000 chunks) coincide
+    numerically — but they are different concepts: buckets are purely a filesystem concern and never
+    appear in catalog keys; windows define the tx-hash index layout.
+  </div>
+</section>
+
+<!-- ================================================================ -->
+<section id="guarantees">
+  <h2>The four guarantees</h2>
+  <div class="spec-ref">Normative spec: <a href="full-history-streaming-workflow.md#invariants">streaming — Invariants (INV-1…4)</a></div>
+  <p>
+    The daemon is built around four guarantees over its data. Everything else in the design — the write
+    protocol, the derived last committed ledger, the key-driven sweeps — exists to maintain these through any crash at
+    any instant.
+  </p>
+  <div class="cards">
+    <div class="card"><h4>Retention is complete</h4>
+      <p>No gaps within the retention window — for every ledger from the retention floor up to
+      the last committed ledger, all data derived from it (transactions,
+      events) is present on disk and can serve any request that falls entirely inside the window.</p></div>
+    <div class="card"><h4>Cold is canonical, hot is transient</h4>
+      <p>Frozen chunks and finalized indexes live in immutable cold artifacts. A chunk's hot DB is discarded
+      once every cold artifact derived from it is durable <em>and</em> the rolling index covers it — so a tx
+      lookup always has exactly one home: the hot DB until coverage, the <code>.idx</code> after.</p></div>
+    <div class="card"><h4>The catalog catalogs what's on disk</h4>
+      <p>Disk content is exactly what the catalog specifies — every file is named by a catalog key and
+      every key in a final state has its file. File and key writes/deletes are ordered to preserve this
+      across crashes.</p></div>
+    <div class="card"><h4>Storage tracks retention</h4>
+      <p>Disk usage scales with <code>retention_chunks</code>, not with uptime — files and keys for ledger
+      ranges below the effective retention floor are pruned as the floor advances.</p></div>
+  </div>
+</section>
+
+<!-- ================================================================ -->
+<section id="data-model">
+  <h2>Data model</h2>
+  <div class="spec-ref">Normative spec: <a href="full-history-streaming-workflow.md#data-model">streaming — Data model</a> · <a href="gettransaction-full-history-design.md#5-hot-tier">transactions — §5–6 tx tiers &amp; artifacts</a></div>
+  <p>
+    Durable state lives in two places: the <b>catalog RocksDB</b> (state markers and config pins) and the
+    <b>filesystem</b> (immutable files, plus one per-chunk hot RocksDB holding in-progress data during
+    ingestion).
+  </p>
+
+  <h3>On disk</h3>
+  <div class="dirtree">{default_data_dir}/<br>
+├── meta/rocksdb/ <span class="c">                                 ← catalog (WAL always on)</span><br>
+├── <span class="hl">hot/{chunk:08d}/</span> <span class="c">                              ← per-chunk hot RocksDB (transient)</span><br>
+├── <span class="cold">ledgers/{bucket:05d}/{chunk:08d}.pack</span><br>
+├── <span class="cold">events/{bucket:05d}/{chunk:08d}-events.pack</span> <span class="c">   (+ -index.pack, -index.hash)</span><br>
+└── txhash/<br>
+&nbsp;&nbsp;&nbsp;&nbsp;├── <span class="tr">raw/{bucket:05d}/{chunk:08d}.bin</span> <span class="c">          ← transient until window finalization (or retention pruning)</span><br>
+&nbsp;&nbsp;&nbsp;&nbsp;└── <span class="cold">index/{window:08d}/{lo:08d}-{hi:08d}.idx</span> <span class="c">  ← one frozen file per window, coverage-named</span>
+  </div>
+  <div class="legend">
+    <span class="li"><span class="dot" style="background:var(--hot)"></span> hot / transient-per-chunk</span>
+    <span class="li"><span class="dot" style="background:var(--frozen)"></span> cold, persists until retention pruning</span>
+    <span class="li"><span class="dot" style="background:var(--freezing)"></span> transient rebuild input</span>
+  </div>
+
+  <p style="margin-top:18px">
+    The <code>.bin</code> is the interesting transient: it is the input to <code>buildTxhashIndex</code>,
+    retained while its chunk is still within the window's live <code>[lo, hi]</code> coverage (each boundary
+    the rebuild reads every in-coverage <code>.bin</code>). When the window finalizes, the terminal build's
+    commit batch demotes its inputs to <span class="kv st-pruning">"pruning"</span> and the sweep removes
+    them — and under retention narrower than a window, a chunk drops below the floor before its window
+    completes, so retention pruning removes its <code>.bin</code> instead.
+  </p>
+
+  <h3>The chunk hot DB</h3>
+  <p>
+    One RocksDB per chunk at <code>hot/{chunk:08d}/</code>, holding everything for that chunk not yet
+    materialized to cold artifacts. The data types are column families of one instance — they share the
+    instance's WAL, so each ledger commits as <b>one atomic WriteBatch across all CFs</b>.
+  </p>
+  <table class="t">
+    <tr><th>Column family</th><th>Holds</th><th>Serves</th></tr>
+    <tr><td><code>ledgers</code></td><td>compressed LCMs, keyed by seq</td><td><code>getLedger</code> for the live chunk; the source <code>processChunk</code> reads at freeze</td></tr>
+    <tr><td><code>txhash</code></td><td>tx hash → seq</td><td><code>getTransaction</code> for the live chunk</td></tr>
+    <tr><td>events CFs</td><td>live events (schema per the events doc)</td><td><code>getEvents</code> for the live chunk</td></tr>
+  </table>
+
+  <h3>Catalog keys</h3>
+  <p>Three groups: per-chunk artifact state, hot DB state, and config pins. Lifecycle states are shared by
+  every artifact key in the system:</p>
+  <div class="btnrow" style="margin: 10px 0 14px">
+    <span class="kv st-freezing">"freezing"</span><span style="color:var(--muted);font-size:.84rem"> = file being written (or crashed mid-write) — delete or re-derive</span>
+  </div>
+  <div class="btnrow" style="margin: 0 0 14px">
+    <span class="kv st-frozen">"frozen"</span><span style="color:var(--muted);font-size:.84rem"> = fsynced and durable — truth; the only state readers resolve</span>
+  </div>
+  <div class="btnrow" style="margin: 0 0 14px">
+    <span class="kv st-pruning">"pruning"</span><span style="color:var(--muted);font-size:.84rem"> = queued for removal — finish the delete</span>
+  </div>
+  <table class="t">
+    <tr><th>Key</th><th>Meaning</th></tr>
+    <tr><td><code>chunk:{c}:ledgers</code></td><td>Per-chunk <code>.pack</code> file state.</td></tr>
+    <tr><td><code>chunk:{c}:txhash</code></td><td>Per-chunk <code>.bin</code> file state. Transient — removed at window finalization, or by retention pruning if its chunk ages out first.</td></tr>
+    <tr><td><code>chunk:{c}:events</code></td><td>Per-chunk events cold segment state.</td></tr>
+    <tr><td><code>index:{w}:{lo}:{hi}</code></td><td>One key per index <b>coverage</b>. The key <em>name</em> carries the coverage and maps 1:1 to the file <code>{lo}-{hi}.idx</code>; the <em>value</em> is pure lifecycle state. At most one coverage per window is <span class="kv st-frozen">"frozen"</span> at any moment.</td></tr>
+    <tr><td><code>hot:chunk:{c}</code></td><td><span class="kv st-ready">"ready"</span> = dir exists and is usable; <span class="kv st-transient">"transient"</span> = a directory operation (create <em>or</em> delete) is in flight — the recovery is the same either way, which is why one value suffices.</td></tr>
+    <tr><td><code>config:earliest_ledger</code></td><td>Written on first start, immutable thereafter (startup aborts on mismatch).</td></tr>
+  </table>
+  <div class="note">
+    <b>Key names carry identity; values carry only lifecycle.</b> An index key's filename is derived from
+    its name by a fixed bijection — resolving a key to its file never reads the value or lists a directory.
+    Every file on disk, including a crashed attempt's partial, is reachable from its key alone.
+  </div>
+</section>
+
+<!-- ================================================================ -->
+<section id="lifecycles">
+  <h2>Artifact lifecycles</h2>
+  <div class="spec-ref">Normative spec: <a href="full-history-streaming-workflow.md#catalog-keys">streaming — artifact lifecycles</a> · <a href="gettransaction-full-history-design.md#63-coverage-and-the-live-index">transactions — §6.3 coverage &amp; the live index</a></div>
+  <p>
+    Three state machines cover every durable thing in the system. Click any state to see what it means and
+    what recovery does if a crash leaves the system there.
+  </p>
+  <div class="widget">
+    <div class="w-title">Per-chunk artifacts — <span class="mono" style="font-size:.85em">.pack, events segment, .bin</span></div>
+    <div class="sm" data-machine="chunk"></div>
+    <div class="sm-detail" data-detail="chunk">Click a state above.</div>
+  </div>
+  <div class="widget">
+    <div class="w-title">Index coverage — <span class="mono" style="font-size:.85em">{lo}-{hi}.idx</span></div>
+    <div class="w-hint">The one logically-mutable cold artifact: "mutation" happens by freezing the next coverage and demoting the old one in a single atomic batch. The frozen file readers resolve is immutable until unlinked.</div>
+    <div class="sm" data-machine="index"></div>
+    <div class="sm-detail" data-detail="index">Click a state above.</div>
+  </div>
+  <div class="widget">
+    <div class="w-title">Hot DB — <span class="mono" style="font-size:.85em">hot/{chunk:08d}/</span></div>
+    <div class="sm" data-machine="hot"></div>
+    <div class="sm-detail" data-detail="hot">Click a state above.</div>
+  </div>
+</section>
+
+<!-- ================================================================ -->
+<section id="write-protocol">
+  <h2>One write protocol</h2>
+  <div class="spec-ref">Normative spec: <a href="full-history-streaming-workflow.md#one-write-protocol">streaming — One write protocol</a> · <a href="gettransaction-full-history-design.md#72-the-rebuild">transactions — §7.2 the rebuild</a></div>
+  <p>
+    Every durable artifact — per-chunk files and index coverages alike — uses the same protocol,
+    <b>mark-then-write</b>: put <span class="kv st-freezing">"freezing"</span> <em>before</em> any I/O;
+    write the file; fsync the file and its dirent(s); flip the key to
+    <span class="kv st-frozen">"frozen"</span>. The pre-mark guarantees <em>every file on disk has a
+    key</em>, so all cleanup is key-driven. Deletion mirrors it: demote, unlink the
+    <em>file before the key</em>, with an <code>fsyncDir</code> barrier between — giving the complementary
+    guarantee, <em>key absent ⟹ file gone</em>.
+  </p>
+  <div class="widget">
+    <div class="w-title">Crash simulator</div>
+    <div class="w-hint">Pick a protocol, then click any step. The right panel shows the durable state after that step completes — and what happens if the process dies right there.</div>
+    <div class="tabrow" id="proto-tabs"></div>
+    <div class="steps-grid">
+      <div class="step-list" id="proto-steps"></div>
+      <div class="state-panel" id="proto-state"></div>
+    </div>
+  </div>
+  <div class="note">
+    <b>Why the dirent fsyncs matter:</b> step 3 fsyncs the directory entries, not just the file, so a file's
+    (or a freshly created directory's) existence on disk is durable before its key flips to
+    <span class="kv st-frozen">"frozen"</span> — which is why a write that creates its parent directory also
+    barriers the grandparent.
+  </div>
+  <div class="note warn">
+    <b>A crashed index build is deleted, not salvaged:</b> a rebuild re-derives byte-identical output (the merge
+    is a deterministic function of the coverage), so a partial
+    <span class="kv st-freezing">"freezing"</span> file is just re-derived from scratch.
+  </div>
+</section>
+
+<!-- ================================================================ -->
+<section id="derived-progress">
+  <h2>Progress is derived, never stored</h2>
+  <div class="spec-ref">Normative spec: <a href="full-history-streaming-workflow.md#catalog-keys">streaming — Catalog keys</a></div>
+  <p>
+    There is no stored progress value. The hot DB's synced per-ledger WriteBatch <em>is</em> the durable commit;
+    recording it again in the catalog would create a second copy of the same fact. Instead, startup
+    recomputes the exact last committed ledger from the catalog, and during operation ingestion hands the
+    lifecycle each chunk as it completes. The recomputation leans on one <b>key-creation invariant</b>: a
+    <code>hot:chunk</code> key is created only after every ledger below its chunk has durably committed — so
+    everything below the highest hot key is complete, and a single read of the live hot DB pins the exact
+    ledger inside it.
+  </p>
+  <div class="widget">
+    <div class="w-title">lastCommittedLedger — two terms, take the higher</div>
+    <div class="w-hint">The <b>COLD term</b> is the last ledger of the highest fully-frozen chunk; the <b>HOT term</b> reads the live hot DB for the exact ledger inside it. The higher chunk wins — HOT in steady state, COLD only when no hot chunk sits above the frozen ones. Pick a state startup might find:</div>
+    <div class="btnrow" id="derived-presets"></div>
+    <div id="derived-viz"></div>
+  </div>
+  <div class="note">
+    Postcondition-driven backfill is what makes a recomputed last committed ledger safe: backfill converges
+    whole <em>ranges</em>, so recomputation can never skip a hole. And because nothing is stored, a lost hot
+    volume drops the recomputed answer to the last frozen boundary on its own (surgical recovery).
+  </div>
+</section>
+
+<!-- ================================================================ -->
+<section id="rolling-index">
+  <h2>The rolling tx-hash index</h2>
+  <div class="spec-ref">Normative spec: <a href="gettransaction-full-history-design.md#7-the-rolling-rebuild">transactions — §7 The rolling rebuild</a> · <a href="full-history-streaming-workflow.md#backfill">streaming — rule 3 summary</a></div>
+  <p>
+    The current window's index is <b>re-derived from scratch on every chunk boundary</b> to absorb the chunk
+    that just froze, growing until its window is complete. Only the window the network tip is in is ever
+    rebuilt; a completed window's index is finalized (its <code>.bin</code> inputs swept) and never touched
+    again. The rebuild is cheap relative to the cadence: a full-window streamhash build is ≈1 minute against
+    a chunk boundary every ~14 hours at mainnet rates.
+  </p>
+  <div class="widget">
+    <div class="w-title">Rolling-window simulator</div>
+    <div class="w-hint">
+      Scaled down to <b>8 chunks per window</b> so you can watch it roll (real default: 1000). Each step is
+      one chunk boundary: the live chunk freezes, the window's coverage advances by one atomic
+      promote-and-demote, the hot DB is discarded once covered. Enable retention to watch the floor chase the
+      tip and <code>lo</code> rise.
+    </div>
+    <div class="roll-controls">
+      <button class="btn primary" id="roll-step">Advance one boundary ▸</button>
+      <button class="btn" id="roll-auto">Auto-play</button>
+      <button class="btn" id="roll-reset">Reset</button>
+      <label style="font-size:.85rem;color:var(--muted);display:flex;align-items:center;gap:6px">
+        <input type="checkbox" id="roll-retention"> retention = 12 chunks
+      </label>
+    </div>
+    <div id="roll-windows"></div>
+    <div class="legend">
+      <span class="li"><span class="dot" style="background:rgba(63,185,143,.5);border:1px solid var(--ready)"></span> live chunk (hot DB, being written)</span>
+      <span class="li"><span class="dot" style="background:rgba(45,212,191,.4);border:1px solid var(--hot)"></span> hot DB awaiting coverage</span>
+      <span class="li"><span class="dot" style="background:rgba(88,166,255,.4);border:1px solid var(--frozen)"></span> frozen (.pack + events durable)</span>
+      <span class="li"><span class="dot" style="background:var(--freezing)"></span> .bin present (rebuild input)</span>
+      <span class="li"><span class="dot" style="background:var(--frozen);height:4px;border-radius:2px"></span> index coverage [lo, hi]</span>
+      <span class="li"><span class="dot" style="background:transparent;border:1px solid #222a38"></span> pruned (past retention)</span>
+    </div>
+    <div class="roll-log" id="roll-log"></div>
+  </div>
+  <div class="note">
+    <b>Why per-chunk <code>.bin</code> files make this affordable:</b> <code>processChunk</code> sorts each
+    chunk's ~3M entries in memory before writing, so the rebuild feeds streamhash sorted keys — its fast,
+    low-memory sorted-builder mode. Transient <code>.bin</code> disk is bounded by the windows actually in
+    flight (floor: one dense window ≈ 60 GB), because a finalized window's inputs are deleted as soon as its
+    final index is built.
+  </div>
+  <div class="note warn">
+    <b>Provisioning note:</b> old and new coverage files coexist from the start of a rebuild's write until
+    the eager sweep's unlink, so the window dir transiently holds ~2× the index size (~25 GB at the end of a
+    dense full window), and the window-end rebuild writes ~12.5 GB in ~1 minute (~200 MB/s burst) — trivial
+    on instance NVMe, worth provisioning for on throughput-capped volumes like EBS gp3.
+  </div>
+</section>
+
+<!-- ================================================================ -->
+<section id="boundary">
+  <h2>A chunk boundary, end to end</h2>
+  <div class="spec-ref">Normative spec: <a href="full-history-streaming-workflow.md#lifecycle">streaming — One boundary, end to end</a></div>
+  <p>
+    The micro view: ledger <span class="mono">53,510,001</span> closes chunk 5350 (window 5, floor pinned at
+    chunk 5100 by <code>earliest_ledger</code>, frozen index covering chunks 5100–5349). Step through every
+    write the boundary performs — watch the catalog, the filesystem, and where reads are served at each
+    instant.
+  </p>
+  <div class="widget">
+    <div class="btnrow">
+      <button class="btn" id="bd-prev">◂ Back</button>
+      <button class="btn primary" id="bd-next">Next ▸</button>
+      <button class="btn" id="bd-reset">Restart</button>
+      <span id="bd-pos" style="color:var(--muted);font-size:.85rem;align-self:center"></span>
+    </div>
+    <div class="steps-grid">
+      <div>
+        <div class="step-list" id="bd-steps"></div>
+      </div>
+      <div class="state-panel" id="bd-state"></div>
+    </div>
+  </div>
+  <div class="note">
+    Every arrow in this walkthrough is the one write protocol or its exit sweep. At the end of the run a
+    re-plan and re-scan find <em>nothing</em> to do — that settled is what makes the
+    <a href="#correctness">invariant audits</a> meaningful on a live daemon.
+  </div>
+</section>
+
+<!-- ================================================================ -->
+<section id="resolver">
+  <h2>Backfill &amp; the resolver</h2>
+  <div class="spec-ref">Normative spec: <a href="full-history-streaming-workflow.md#postcondition-driven-planning">streaming — Postcondition-driven planning</a></div>
+  <p>
+    Backfill has a contract — <em>given a range, ensure every artifact derived from every ledger in it is
+    durable and servable</em> — and resolves what's missing <b>before scheduling anything</b>, so a restart
+    re-plans from what is on disk instead of redoing finished work. Each artifact kind contributes one rule
+    that compares its <b>postcondition</b> against the catalog and emits the difference as tasks:
+  </p>
+  <ul>
+    <li><b><code>ledgers</code> / <code>events</code></b> (per-chunk): needed for chunk <em>c</em> iff the key isn't <span class="kv st-frozen">"frozen"</span>.</li>
+    <li><b><code>txhash</code></b> (per-window): compare the <b>stored</b> coverage (from the window's unique frozen
+      index key) with the <b>desired</b> coverage <code>[max(window_start, floor), min(window_last, range_end)]</code>.
+      Desired ⊆ stored → schedule <em>nothing</em>. Desired exceeds stored → request <code>.bin</code>
+      production for every chunk in the desired range (already-frozen ones self-skip; previously-covered ones
+      re-derive from local <code>.pack</code>) and emit one
+      <code>buildTxhashIndex(w, desired_lo, desired_hi)</code>.</li>
+  </ul>
+  <p>
+    The plan is <b>just a value</b> — pure data recomputed from durable keys on every run, so a restart
+    re-plans from what is actually on disk with nothing to resume and nothing to reconcile. And the
+    comparison can trust <span class="kv st-frozen">"frozen"</span> blindly: input keys are demoted in the
+    same synced write that freezes the terminal coverage, and files are only ever deleted by sweeps under
+    non-frozen keys — no crash can leave a frozen key whose file is gone.
+  </p>
+
+  <div class="widget">
+    <div class="w-title">Resolver playground</div>
+    <div class="w-hint">Six situations the daemon actually encounters. Solid bar = stored coverage (the frozen index key); dashed bar = desired coverage. The plan below is what <code>resolve()</code> emits.</div>
+    <div class="btnrow" id="rs-buttons"></div>
+    <div id="rs-context" style="font-size:.9rem;color:var(--muted);margin:10px 0"></div>
+    <div id="rs-viz"></div>
+    <div class="plan-box" id="rs-plan"></div>
+    <div class="note" id="rs-note" style="margin-bottom:0"></div>
+  </div>
+
+  <h3>The execution model</h3>
+  <p>
+    <code>executePlan</code> is map/reduce without the shuffle or the job tracker: chunk builds are the maps,
+    index builds are the per-window reduces, and completion is recorded as <em>the artifacts themselves</em>.
+    Dependencies are simple, and nothing is persisted:
+  </p>
+  <ul>
+    <li>The dependency structure is two strata with one edge type — an index build waits on the chunk builds
+      inside its coverage — expressed directly with done-channels. Thousands of goroutines may exist, parked
+      on a single worker semaphore (<code>cfg.Workers</code>, the only concurrency knob); at most
+      <code>Workers</code> tasks execute at any instant.</li>
+    <li>Done-channels signal <em>success</em>: a chunk build closes its channel only once its <code>.bin</code>
+      is frozen, so an index build proceeds only when every input it needs exists. A chunk build that exhausts
+      its retries leaves its channel open and returns an error, which cancels the group context; any dependent
+      waiting on it unblocks through the <code>&lt;-gctx.Done()</code> case and bails — the daemon aborts and a
+      restart re-resolves from durable keys.</li>
+    <li><code>resolve</code> re-plans from the artifact keys on every run, so completed work never repeats and
+      interrupted work needs no reconciliation.</li>
+  </ul>
+  <p>
+    The same <code>resolve</code> + <code>executePlan</code> pair is the lifecycle run's first stage — one
+    scheduler, two callers, so the two regimes can never disagree about what "done" looks like.
+    <code>processChunk</code>'s source selection (<code>backfillSource</code>) is also shared: a ready,
+    complete hot DB beats the local <code>.pack</code> beats the backfill backend — which is exactly what lets
+    the lifecycle's freeze be ordinary plan execution rather than a special path.
+  </p>
+
+  <div class="widget">
+    <div class="w-title">executePlan — bounded concurrency &amp; the done-channel barrier</div>
+    <div class="w-hint">
+      Five chunk builds feeding one window's index build, run under <b>2 worker slots</b>. Step the scheduler:
+      chunk builds fill the slots, each closes its done-channel on success, and the index build stays parked
+      until every input it needs is frozen. Toggle a failure to watch a build leave its channel open, cancel
+      the group, and bail its dependents.
+    </div>
+    <div class="btnrow">
+      <button class="btn primary" id="ep-step">Step ▸</button>
+      <button class="btn" id="ep-auto">Auto-play</button>
+      <button class="btn" id="ep-reset">Reset</button>
+      <label style="font-size:.85rem;color:var(--muted);display:flex;align-items:center;gap:6px">
+        <input type="checkbox" id="ep-fail"> chunk 5348 exhausts its retries
+      </label>
+    </div>
+    <div id="ep-sem" class="ep-sem"></div>
+    <div class="ep-graph-wrap"><svg id="ep-svg" class="ep-svg" viewBox="0 0 740 220" role="img" aria-label="executePlan dependency graph"></svg></div>
+    <div class="note" id="ep-note" style="margin-bottom:0"></div>
+  </div>
+</section>
+
+<!-- ================================================================ -->
+<section id="startup">
+  <h2>Startup: the backfill loop</h2>
+  <div class="spec-ref">Normative spec: <a href="full-history-streaming-workflow.md#startup">streaming — Startup</a></div>
+  <p>
+    Before it serves anything, the daemon runs backfill in a loop until on-disk coverage reaches the last
+    <em>complete</em> chunk at the network tip. Each pass re-reads the tip, plans <code>[floor, last complete
+    chunk]</code>, and executes it; if the tip advanced while the pass ran, another pass picks up the chunks it
+    moved past. The <b>partial chunk still forming at the tip is never backfilled</b> — its ledgers are already
+    in the live hot DB, and hot-DB ingestion finishes it. When the loop exits, the daemon opens the resume
+    chunk's hot DB, seeds the lifecycle, and starts serving.
+  </p>
+  <div class="widget">
+    <div class="w-title">Startup walkthrough</div>
+    <div class="w-hint">
+      Three situations, each running the real <code>startStreaming</code> loop arithmetic
+      (<code>lastCompleteChunkAt</code>, the near-tip/mid-chunk trim, the <code>rangeEnd ≤ backfilledThrough</code>
+      exit). Each card is one loop pass; the last is the hand-off to serve&nbsp;+&nbsp;ingest.
+    </div>
+    <div class="btnrow" id="su-buttons"></div>
+    <div id="su-context" style="font-size:.9rem;color:var(--muted);margin:10px 0"></div>
+    <div id="su-passes"></div>
+  </div>
+  <div class="note">
+    The loop reads durable keys only, so it is its own crash recovery: a restart re-plans from what is on disk,
+    redoing no finished chunk and skipping no unfinished one. The same <code>resolve</code> +
+    <code>executePlan</code> pair runs here and in every lifecycle run — startup just drives the
+    <em>bottom</em> of storage down to the floor, where the running lifecycle never reaches.
+  </div>
+</section>
+
+<!-- ================================================================ -->
+<section id="concurrency">
+  <h2>Concurrency: two writers, one fence</h2>
+  <div class="spec-ref">Normative spec: <a href="full-history-streaming-workflow.md#concurrency-model">streaming — Concurrency model</a></div>
+  <p>
+    Two writers; readers only read. Their domains <b>partition at the live chunk</b>, and the partition
+    itself is encoded in the catalog — the lifecycle's derivation treats the highest hot key as the live
+    chunk and touches only what lies below it.
+  </p>
+  <div class="partition">
+    <div class="half lc">
+      <h4 style="color:var(--hot)">Ingestion loop — owns the live chunk</h4>
+      <p>The only writer of the live chunk's hot DB, and the creator of each chunk's
+      <code>hot:chunk:{c}</code> key. One synced WriteBatch per ledger; no progress variable at all.</p>
+    </div>
+    <div class="half rc">
+      <h4 style="color:var(--transient)">Lifecycle goroutine — owns everything below</h4>
+      <p>Handed-off hot DBs (freeze + discard), all <code>chunk:*</code> and <code>index:*</code> keys, and
+      the deletion side of <code>hot:chunk:*</code>. The run's plan stage fans out to the bounded worker
+      pool — every worker operating strictly below the live chunk.</p>
+    </div>
+  </div>
+  <p>
+    The <b>handoff fence is the boundary's write order</b>: the ingestion loop closes its write handle and
+    opens the next chunk — which moves the partition, since the closed chunk now lies below the new live
+    chunk — <em>before</em> it hands the completed chunk to the lifecycle on the channel. So by the time the
+    lifecycle freezes and discards it, no writer holds it.
+  </p>
+  <p>
+    The only connection between the goroutines is the <b>channel</b>, which carries the chunk ingestion just
+    completed on a buffered channel of depth <code>lifecycleQueueDepth</code>. The lifecycle drains it to the
+    highest value each wake and plans up to that chunk, folding a backlog of boundaries into one run. The
+    value sets only the run's <em>range</em>; the work is still gated by durable keys — <code>resolve</code>
+    and the scans decide what to build, discard, and prune. A send onto a full buffer means the lifecycle has
+    fallen <code>lifecycleQueueDepth</code> boundaries behind ingestion — a fatal "freeze can't keep up,"
+    never a silent drop (the depth sits well above the at-most-one signal a healthy daemon holds).
+  </p>
+</section>
+
+<!-- ================================================================ -->
+<section id="reader">
+  <h2>The reader contract</h2>
+  <div class="spec-ref">Normative spec: <a href="full-history-streaming-workflow.md#reader-contract">streaming — Reader contract</a> · <a href="gettransaction-full-history-design.md#8-query-path">transactions — §8 Query path</a></div>
+  <p>
+    A read resolves data through <b>two rules</b>, and the rest of the design relies on both:
+  </p>
+  <ol>
+    <li><b>Only <span class="kv st-ready">"ready"</span> and <span class="kv st-frozen">"frozen"</span> are
+      visible.</b> A read resolves a chunk only from a <span class="kv st-ready">"ready"</span> hot DB or a
+      <span class="kv st-frozen">"frozen"</span> cold file — never a key in a transient state
+      (<span class="kv st-freezing">"freezing"</span>, <span class="kv st-pruning">"pruning"</span>,
+      <span class="kv st-transient">"transient"</span>). So a reader never sees a half-written file, crash
+      debris, or an in-flight sweep.</li>
+    <li><b>Below the floor is <em>not-found</em>.</b> A read for any seq below the retention floor returns
+      not-found regardless of what's still on disk — the contract that lets pruning unlink files
+      <em>unilaterally</em> (a stale <code>.idx</code> may resolve a hash to a <code>.pack</code> that's been
+      deleted, but the below-floor read is not-found anyway).</li>
+  </ol>
+  <p>
+    Together these make retention the single source of truth for "is this available?". <b>Everything else
+    about serving a read</b> — how the reader picks the tier, probes the right window, and stays correct while
+    a sweep unlinks a file mid-read — is the <b>query-routing design's</b> concern, out of scope here (and in
+    the streaming doc). The explorer below illustrates the cold-tier <code>getTransaction</code> probe from the
+    transactions design, for reference:
+  </p>
+  <div class="widget">
+    <div class="w-title">Read-path explorer <span style="font-weight:400;color:var(--muted);font-size:.8rem">· query-routing, out of scope of the streaming doc</span></div>
+    <div class="w-hint">Three cold lookups over a multi-window retention. The chain shows each per-window probe.</div>
+    <div class="btnrow" id="rd-buttons"></div>
+    <div class="chain" id="rd-chain"></div>
+    <div class="note" id="rd-note" style="margin-bottom:0"></div>
+  </div>
+</section>
+
+<!-- ================================================================ -->
+<section id="correctness">
+  <h2>Correctness</h2>
+  <div class="spec-ref">Normative spec: <a href="full-history-streaming-workflow.md#correctness">streaming — Correctness</a></div>
+  <p>
+    <b>settled</b> means the run's plan is empty and both scans produce empty op lists — the state the
+    system returns to between boundaries, and the state in which the invariants below are auditable on a
+    live daemon. From <em>any</em> storage state — partial-completion crashes, operator actions, surgical
+    recovery — startup (backfill + the first run) drives the system to settled satisfying all four.
+  </p>
+
+  <details class="inv" open>
+    <summary><span class="chev">▸</span><span class="invchip">INV-1</span> Read correctness</summary>
+    <div class="body">Any data request whose ledger scope falls entirely within the retention window returns
+    correct results — content matches what a conformant LedgerBackend would produce, no partial state
+    visible, no in-retention range unreachable. <b>Audit:</b> issue reads, or re-derive artifacts via a
+    conformant backend and byte-compare. One transient exception: when surgical recovery demotes hot data down
+    to the live chunk, the last committed ledger rewinds and the floor regresses with it, briefly admitting a
+    few already-pruned bottom chunks — those reads fail <em>soft</em> (not-found, never wrong data) until the
+    floor re-advances.</div>
+  </details>
+  <details class="inv">
+    <summary><span class="chev">▸</span><span class="invchip">INV-2</span> Single canonical state</summary>
+    <div class="body">At most one <span class="kv st-frozen">"frozen"</span> index key per window —
+    <b>at all times</b>, settled or not (the commit batch promotes and demotes in one write). At
+    settled: no key anywhere is <span class="kv st-freezing">"freezing"</span> or
+    <span class="kv st-pruning">"pruning"</span>; no hot DB persists for a chunk cold artifacts fully serve;
+    no <code>chunk:c:txhash</code> key survives in a finalized window. Two transients are tolerated even at
+    settled: a hot DB's <span class="kv st-transient">"transient"</span> bracket around an in-flight directory
+    op, and — after surgical recovery — a partially-frozen chunk <em>above</em> the last committed ledger
+    (no read can observe it). <b>Audit:</b> walk catalog keys, cross-check forbidden co-existence.</div>
+  </details>
+  <details class="inv">
+    <summary><span class="chev">▸</span><span class="invchip">INV-3</span> Disk matches catalog</summary>
+    <div class="body">At settled, the set of artifact files and hot DB directories on disk equals exactly
+    the set the catalog specifies — no orphan files, no dangling keys, no duplicate artifacts. A
+    non-key-named file in an index window dir is a real bug, not mid-run debris. <b>Audit:</b> walk the
+    filesystem against the catalog, both directions.</div>
+  </details>
+  <details class="inv">
+    <summary><span class="chev">▸</span><span class="invchip">INV-4</span> Retention bound</summary>
+    <div class="body">At settled, no file or catalog key maps to a ledger range strictly below the
+    effective retention floor — except a frozen index key whose window straddles the floor, which keeps the
+    <code>lo</code> it was built with (its below-floor coverage is never served; the reader gate returns
+    not-found). <b>Audit:</b> walk catalog keys, compare ledger ranges to the floor.</div>
+  </details>
+
+  <div class="note">
+    None of the invariants reference the phase scans that maintain them — so a bug in any scan shows up as a
+    <em>real invariant violation</em>, not as something the buggy code silently considers acceptable. An
+    <code>audit</code> admin command can implement the walks directly.
+  </div>
+
+  <div class="note warn">
+    <b>Surgical recovery (tainted data).</b> The operator never touches the filesystem — recovery is one
+    atomic catalog batch that <em>demotes</em> keys. Tainted cold artifacts go to
+    <span class="kv st-freezing">"freezing"</span>, and backfill re-derives them. For the hot tier, demote
+    <b>every <code>hot:chunk</code> at or above the lowest tainted chunk — the live chunk always included</b> —
+    to <span class="kv st-transient">"transient"</span>. Why the whole tail, not just the tainted chunk: the
+    hot tier is repaired only by re-ingestion, which replays <em>forward</em> from the last committed ledger
+    (the highest <span class="kv st-ready">"ready"</span> hot chunk). To replay a tainted hot chunk the
+    watermark must first fall <em>below</em> it — and since it's the max over all
+    <span class="kv st-ready">"ready"</span> keys, that means demoting every hot DB at or above the lowest
+    tainted one. Then captive core re-ingests the tail forward; the untainted chunks swept up in the demotion
+    are re-derived byte-identically. (A lost hot volume is the same recovery, triggered by loss rather than
+    taint.)
+  </div>
+
+  <h3>What a bug looks like</h3>
+  <p>Common bugs land as concrete, detectable violations:</p>
+  <table class="t">
+    <tr><th>Symptom</th><th>Violates</th><th>Detected by</th></tr>
+    <tr><td>A key flips <span class="kv st-frozen">"frozen"</span> before fsync; key's <code>{lo,hi}</code> doesn't match the file; a frozen file mutated post-freeze</td><td><span class="invchip">INV-1</span></td><td>re-derive via a conformant backend, byte-compare</td></tr>
+    <tr><td>Pruning too aggressive — an in-retention read returns wrong/missing results</td><td><span class="invchip">INV-1</span></td><td>issue reads</td></tr>
+    <tr><td>Two frozen index keys in one window (promotion and demotion landed as separate writes)</td><td><span class="invchip">INV-2</span></td><td>walk <code>index:*</code>, count "frozen" per window</td></tr>
+    <tr><td>A <span class="kv st-freezing">"freezing"</span>/<span class="kv st-pruning">"pruning"</span> key survives served settled</td><td><span class="invchip">INV-2</span></td><td>walk keys for transient values at settled</td></tr>
+    <tr><td>A hot DB persists for a chunk cold artifacts fully serve</td><td><span class="invchip">INV-2</span></td><td>walk <code>hot:chunk:*</code> against coverage</td></tr>
+    <tr><td>Finalization demotions don't complete — <code>.bin</code> keys outlive their terminal index</td><td><span class="invchip">INV-2</span></td><td>walk <code>chunk:c:txhash</code> in finalized windows</td></tr>
+    <tr><td>A file on disk without its key (orphan — invisible to every key-driven scan)</td><td><span class="invchip">INV-3</span></td><td>walk filesystem against catalog</td></tr>
+    <tr><td>A key without its file (dangling)</td><td><span class="invchip">INV-3</span></td><td>walk catalog against filesystem</td></tr>
+    <tr><td>Duplicate cold artifacts for the same logical data</td><td><span class="invchip">INV-3</span></td><td>walk filesystem against key-specified paths</td></tr>
+    <tr><td>Files or keys remain below the retention floor</td><td><span class="invchip">INV-4</span></td><td>walk keys against the floor</td></tr>
+  </table>
+
+  <h3>Why convergence works</h3>
+  <p>Three properties shared by the resolver and the scans, plus backfill's postcondition contract:</p>
+  <ul>
+    <li><b>Eligibility from durable state alone</b> — every decision derives from catalog keys; nothing depends on in-memory history.</li>
+    <li><b>Idempotent ops</b> — re-running any half-finished op is safe; re-materialization overwrites at canonical paths, sweeps re-run until the key is gone.</li>
+    <li><b>Everything re-derived on every notification</b> — there is no persisted plan to drift.</li>
+  </ul>
+  <p>
+    Runtime op failure aborts the daemon (after bounded retries) rather than deferring silently — safe
+    because <b>startup is the recovery path</b>: every state a run can leave behind is one startup is built
+    to converge.
+  </p>
+</section>
+
+<footer>
+  Interactive companion to <a href="full-history-streaming-workflow.md"><code>full-history-streaming-workflow.md</code></a>
+  (the daemon: backfill, ingestion, lifecycle, invariants) and
+  <a href="gettransaction-full-history-design.md"><code>gettransaction-full-history-design.md</code></a>
+  (the tx-by-hash subsystem: formats, the rolling index, the read path) —
+  the markdown is the normative spec; numbers here (chunk = 10,000 ledgers, window default = 1000 chunks,
+  build ≈ 1 min) come from those docs and the <code>bench-fullhistory</code> measurements they cite.
+  Re-synced to the current docs 2026-06-18. Self-contained; no external dependencies.
+</footer>
+
+<style>
+.chunk.future { opacity: .2; }
+.dterm { display:flex; flex-wrap:wrap; gap:10px; margin-top:14px; }
+.dterm .tbox { flex:1; min-width:220px; background:#11161f; border:1px solid var(--line); border-radius:9px; padding:10px 14px; font-size:.84rem; }
+.dterm .tbox .h { font-size:.72rem; text-transform:uppercase; letter-spacing:.07em; color:var(--muted); margin-bottom:4px; }
+.dterm .tbox .v { font-family:var(--mono); color:var(--head); }
+.dchunks { display:flex; gap:8px; flex-wrap:wrap; margin-top:8px; }
+.dchunk { border:1px solid var(--line); border-radius:9px; background:var(--panel2); padding:8px 12px; min-width:118px; font-size:.78rem; }
+.dchunk .id { font-family:var(--mono); color:var(--head); margin-bottom:4px; }
+.dchunk .b { display:block; color:var(--muted); }
+.dchunk.livec { border-color:var(--ready); }
+.dchunk.hotc { border-color:var(--hot); }
+.dchunk.lost { border-color:var(--pruning); }
+/* executePlan dependency graph */
+.ep-sem { display:flex; flex-wrap:wrap; align-items:center; gap:8px; margin:6px 0 12px; }
+.ep-sem-label { color:var(--muted); font-family:var(--mono); font-size:.76rem; }
+.ep-slot { font-family:var(--mono); font-size:.78rem; padding:4px 12px; border-radius:7px; border:1px solid var(--line); background:#11161f; color:var(--muted); min-width:98px; text-align:center; }
+.ep-slot.busy { border-color:var(--ready); color:var(--ready); background:rgba(63,185,143,.1); }
+.ep-sem-q { color:var(--muted); font-size:.76rem; margin-left:4px; font-family:var(--mono); }
+.ep-graph-wrap { overflow-x:auto; }
+.ep-svg { width:100%; min-width:560px; height:auto; display:block; margin:4px 0 6px; }
+/* startup backfill loop */
+.su-pass { border:1px solid var(--line); border-left:3px solid var(--accent); border-radius:0 9px 9px 0; background:#11161f; padding:10px 14px; margin:8px 0; }
+.su-pass.brk { border-left-color:var(--muted); }
+.su-pass.serve { border-left-color:var(--ready); background:rgba(63,185,143,.05); }
+.su-h { font-weight:700; color:var(--head); font-size:.9rem; margin-bottom:4px; }
+.su-row { font-size:.84rem; color:var(--text); font-family:var(--mono); padding:2px 0; line-height:1.45; }
+.su-row.dim { color:var(--muted); }
+.su-row.trim { color:var(--freezing); }
+.su-row.break { color:var(--muted); }
+.su-row .ok { color:var(--ready); }
+.su-row b { color:var(--head); }
+</style>
+<script>
+(function(){
+"use strict";
+const $ = (s, r) => (r||document).querySelector(s);
+const $$ = (s, r) => Array.from((r||document).querySelectorAll(s));
+const fmt = n => n.toLocaleString("en-US");
+const pad = (n, w) => String(n).padStart(w, "0");
+const p8 = n => pad(n, 8), p5 = n => pad(n, 5);
+const esc = s => s.replace(/&/g,"&amp;").replace(/</g,"&lt;").replace(/>/g,"&gt;");
+
+/* ---------------- scroll spy ---------------- */
+{
+  const links = $$("#toc a");
+  const map = new Map(links.map(a => [a.getAttribute("href").slice(1), a]));
+  const vis = new Set();
+  const obs = new IntersectionObserver(es => {
+    es.forEach(e => e.isIntersecting ? vis.add(e.target.id) : vis.delete(e.target.id));
+    const order = $$("main section").map(s => s.id).filter(id => vis.has(id));
+    links.forEach(a => a.classList.remove("active"));
+    if (order.length) map.get(order[0])?.classList.add("active");
+  }, { rootMargin: "-10% 0px -55% 0px" });
+  $$("main section").forEach(s => obs.observe(s));
+}
+
+/* ---------------- geometry ---------------- */
+{
+  const LPC = 10000, CPI = 1000;
+  const seqIn = $("#geo-seq"), slider = $("#geo-slider");
+  function render(seq) {
+    seq = Math.max(2, Math.min(60000001, Math.floor(seq) || 2));
+    seqIn.value = seq; slider.value = seq;
+    const chunk = Math.floor((seq - 2) / LPC);
+    const cFirst = chunk * LPC + 2, cLast = (chunk + 1) * LPC + 1;
+    const win = Math.floor(chunk / CPI);
+    const wcLo = win * CPI, wcHi = (win + 1) * CPI - 1;
+    const wFirst = wcLo * LPC + 2, wLast = (wcHi + 1) * LPC + 1;
+    const bucket = Math.floor(chunk / 1000);
+    $("#geo-win-label").textContent = p8(win);
+    $("#geo-chunk-label").textContent = p8(chunk);
+    $("#geo-win-lo").textContent = "chunk " + fmt(wcLo);
+    $("#geo-win-hi").textContent = "chunk " + fmt(wcHi);
+    $("#geo-chunk-lo").textContent = "seq " + fmt(cFirst);
+    $("#geo-chunk-hi").textContent = "seq " + fmt(cLast);
+    const wb = $("#geo-win-bar"), cb = $("#geo-chunk-bar");
+    const wfrac = (chunk - wcLo + (seq - cFirst) / LPC) / CPI;
+    const cfrac = (seq - cFirst) / LPC;
+    wb.querySelector(".fill").style.width = (wfrac * 100) + "%";
+    wb.querySelector(".marker").style.left = (wfrac * 100) + "%";
+    wb.querySelector(".lbl").textContent = "chunk " + fmt(chunk) + " — " + fmt(chunk - wcLo + 1) + " of 1000 in window " + win;
+    cb.querySelector(".fill").style.width = (cfrac * 100) + "%";
+    cb.querySelector(".marker").style.left = (cfrac * 100) + "%";
+    cb.querySelector(".lbl").textContent = "ledger " + fmt(seq - cFirst + 1) + " of 10,000";
+    $("#geo-readout").innerHTML = [
+      '<div><span class="dim">chunkID(seq) = floor((seq − 2) / 10,000) =</span> <b>' + fmt(chunk) + "</b></div>",
+      '<div><span class="dim">chunk spans</span> <b>' + fmt(cFirst) + " – " + fmt(cLast) + "</b></div>",
+      '<div><span class="dim">indexID(chunk) = chunk / 1000 =</span> <b>' + fmt(win) + "</b></div>",
+      '<div><span class="dim">window spans chunks</span> <b>' + fmt(wcLo) + "–" + fmt(wcHi) + '</b> <span class="dim">= ledgers</span> <b>' + fmt(wFirst) + " – " + fmt(wLast) + "</b></div>",
+      "<div style='margin-top:10px'><span class='dim'>pack:&nbsp;&nbsp;</span>ledgers/" + p5(bucket) + "/" + p8(chunk) + ".pack</div>",
+      "<div><span class='dim'>events:</span>events/" + p5(bucket) + "/" + p8(chunk) + "-events.pack</div>",
+      "<div><span class='dim'>bin:&nbsp;&nbsp;&nbsp;</span>txhash/raw/" + p5(bucket) + "/" + p8(chunk) + ".bin <span class='dim'>(transient)</span></div>",
+      "<div><span class='dim'>idx:&nbsp;&nbsp;&nbsp;</span>txhash/index/" + p8(win) + "/{lo}-{hi}.idx</div>",
+    ].join("");
+  }
+  seqIn.addEventListener("input", () => render(+seqIn.value));
+  slider.addEventListener("input", () => render(+slider.value));
+  $$("button[data-geo]").forEach(b => b.addEventListener("click", () => render(+b.dataset.geo)));
+  render(53510001);
+}
+
+/* ---------------- lifecycle state machines ---------------- */
+{
+  const MACHINES = {
+    chunk: { states: ["absent","ingesting","freezing","frozen","pruning","absent"], cls: {ingesting:"st-ready"},
+      detail: {
+        absent: "<b>Absent</b> — no artifact key, no immutable file. Either never produced, or fully swept (the lifecycle is a loop: pruning returns here).",
+        ingesting: "<b>Ingesting</b> — the chunk's hot DB holds the data being written; the artifact key is absent; the immutable file doesn't exist yet. Not a key state — this is the phase before <code>processChunk</code> runs.",
+        freezing: "<b>Freezing</b> — <code>processChunk</code> put <span class='kv st-freezing'>\"freezing\"</span> and is materializing the file, which may be partial on disk. <b>Crash here:</b> detectable from the key value alone — within retention the resolver schedules re-materialization (overwrite at the canonical path); past retention the prune scan deletes the partial file.",
+        frozen: "<b>Frozen</b> — the file is fsynced at its canonical path. Trusted blindly by readers and the resolver: flips happen only after fsync, and files are deleted only under non-frozen keys. Once all three artifacts are frozen <em>and</em> the rolling index covers the chunk, the hot DB is discarded.",
+        pruning: "<b>Pruning</b> — retention (or a terminal commit, for <code>.bin</code>s) is deleting the file; it may or may not still exist. <b>Crash here:</b> the key outlives the durable unlink, so the sweep simply re-runs: unlink → <code>fsyncDir</code> → delete key.",
+      }},
+    index: { states: ["absent","freezing","frozen","pruning","absent"],
+      detail: {
+        absent: "<b>Absent</b> — no coverage key under this window (a young window before its first build, or a fully retired one).",
+        freezing: "<b>Freezing</b> — the key was put (with its coverage in the name) <em>before</em> any I/O; the file may be partial or absent. A crashed attempt parks here. If its coverage is built again, the build re-marks the key and rewrites the file wholesale; one the prune scan observes was <em>not</em> retried — its coverage is no longer desired — so: <b>delete file and key</b> (a rebuild re-derives identical bytes).",
+        frozen: "<b>Frozen</b> — file and dirent fsynced, commit batch landed. The window's unique frozen coverage <em>is</em> the live index — readers resolve it with no tie-break and no value parsing.",
+        pruning: "<b>Pruning</b> — a newer coverage superseded this one (demoted in its commit batch), or retention is removing the window. The standard sweep finishes the removal; the eager sweep inside <code>buildThenSweep</code> usually gets there first.",
+      }},
+    hot: { states: ["absent","transient (creating)","ready","transient (deleting)","absent"], cls: {"transient (creating)":"st-transient","transient (deleting)":"st-transient",ready:"st-ready"},
+      detail: {
+        absent: "<b>Absent</b> — no <code>hot:chunk</code> key, no directory on disk.",
+        "transient (creating)": "<b>Transient</b> — a directory operation is in flight. <b>Crash here:</b> a possibly-partial dir; the open path wipes and recreates. One value covers both directions because the recovery is identical either way — that's why no code path ever needs to know <em>which</em> operation was interrupted.",
+        ready: "<b>Ready</b> — dir exists and is usable. Contents run up to the last committed ledger's position in the chunk. <b>A \"ready\" key whose dir is missing means the hot volume was lost</b> — startup fails when the resume-point derivation tries to open it (never silently wiped, since a missing dir could also be an unmounted volume). Recovery is surgical recovery: demote the orphaned key to \"transient\" and re-ingest forward.",
+        "transient (deleting)": "<b>Transient</b> — discard is rmdir'ing the dir. <b>Crash here:</b> the discard scan re-runs. Same recovery as the creating direction.",
+      }},
+  };
+  const STCLS = { absent:"st-absent", freezing:"st-freezing", frozen:"st-frozen", pruning:"st-pruning", ready:"st-ready" };
+  Object.entries(MACHINES).forEach(([name, m]) => {
+    const host = $('.sm[data-machine="'+name+'"]');
+    const det = $('.sm-detail[data-detail="'+name+'"]');
+    m.states.forEach((st, i) => {
+      if (i) { const a = document.createElement("span"); a.className = "arr"; a.textContent = "→"; host.appendChild(a); }
+      const b = document.createElement("button");
+      const cls = (m.cls && m.cls[st]) || STCLS[st.split(" ")[0]] || "st-absent";
+      b.className = "state " + cls; b.textContent = st;
+      b.addEventListener("click", () => {
+        $$(".state", host).forEach(x => x.classList.remove("sel"));
+        b.classList.add("sel");
+        det.innerHTML = m.detail[st];
+      });
+      host.appendChild(b);
+    });
+  });
+}
+
+/* ---------------- crash simulator ---------------- */
+{
+  const F_OK = "ok", F_PART = "partial", F_DIR = "dir";
+  const PROTOS = [
+    { id:"freeze", label:"Freeze a chunk's artifacts", intro:"processChunk(5350, {ledgers, events, txhash}) — sourced from the chunk's complete hot DB.",
+      steps: [
+        { t:"Mark — one synced batch, before any I/O",
+          d:"chunk:00005350:{ledgers, events, txhash} ← \"freezing\". The pre-write mark is what guarantees <em>any file on disk has its key set</em> — every later scan is key-driven; nothing ever lists a directory to find work.",
+          meta:[["chunk:00005350:ledgers","freezing"],["chunk:00005350:events","freezing"],["chunk:00005350:txhash","freezing"]],
+          files:[["hot/00005350/",F_DIR,"complete hot DB — the source"]],
+          crash:"Keys read \"freezing\", no cold files exist. The resolver classifies on frozen state only → it schedules re-materialization, and the writer overwrites at the canonical path. If the chunk has meanwhile fallen past retention, the prune scan sweeps the keys instead." },
+        { t:"Write — one streaming pass over 10,000 LCMs",
+          d:"Only the requested extractors run. Writes the .pack and events segment as it streams; collects ~3M txhash entries and sorts them in memory (≈60 MB) before writing the .bin — sorted per-chunk runs are what make every-boundary index rebuilds a single streaming merge.",
+          meta:[["chunk:00005350:ledgers","freezing"],["chunk:00005350:events","freezing"],["chunk:00005350:txhash","freezing"]],
+          files:[["hot/00005350/",F_DIR,"source"],["ledgers/00005/00005350.pack",F_PART,"being written"],["events/00005/00005350-events.pack",F_PART,"being written"],["txhash/raw/00005/00005350.bin",F_PART,"being written"]],
+          crash:"Partial files under \"freezing\" keys. Harmless: readers resolve only \"frozen\" keys, and every partial is reachable from its key. Recovery re-materializes — files are (re)created at canonical paths, overwriting wholesale." },
+        { t:"FsyncAll — files, parent dirents, grandparent if new bucket",
+          d:"Each file, its parent dirent, and the grandparent dirent when the bucket dir was just created (every 1000th chunk). Fsyncing the directory entries — not just the file — is what makes a file's existence on disk durable before its key flips to \"frozen\".",
+          meta:[["chunk:00005350:ledgers","freezing"],["chunk:00005350:events","freezing"],["chunk:00005350:txhash","freezing"]],
+          files:[["hot/00005350/",F_DIR,"source"],["ledgers/00005/00005350.pack",F_OK,"durable"],["events/00005/00005350-events.pack",F_OK,"durable"],["txhash/raw/00005/00005350.bin",F_OK,"durable"]],
+          crash:"Files are durable but the keys never flipped — still \"freezing\", so still re-materialized. The rewrite is byte-identical (deterministic LCM bytes from any conformant backend), so salvage is never attempted." },
+        { t:"Flip — one synced batch to \"frozen\"",
+          d:"All three keys flip together. From here the artifacts are truth: the resolver self-skips them, readers may resolve them, and only a sweep (which demotes first) can ever touch the files.",
+          meta:[["chunk:00005350:ledgers","frozen"],["chunk:00005350:events","frozen"],["chunk:00005350:txhash","frozen"]],
+          files:[["hot/00005350/",F_DIR,"stays until the index covers 5350"],["ledgers/00005/00005350.pack",F_OK,"durable"],["events/00005/00005350-events.pack",F_OK,"durable"],["txhash/raw/00005/00005350.bin",F_OK,"durable"]],
+          crash:"Nothing to recover — the work is done and idempotency makes any re-run a no-op. The hot DB keeps serving tx lookups until the rolling index covers the chunk." },
+      ]},
+    { id:"rebuild", label:"Rebuild the index", intro:"buildTxhashIndex(w5, 5100, 5350) — the boundary rebuild, widening coverage [5100,5349] → [5100,5350].",
+      steps: [
+        { t:"Skip check",
+          d:"If the window's unique frozen key already covers exactly [lo, hi] → return. This also covers re-scheduled builds of finalized windows, which must not demand .bin inputs the sweep has deleted. (Every input .bin exists by the time this runs: an index build only starts once its in-coverage chunk builds have closed their done-channels on success.)",
+          meta:[["index:00000005:00005100:00005349","frozen"]],
+          files:[["txhash/index/00000005/00005100-00005349.idx",F_OK,"the live index"]],
+          crash:"Nothing has happened yet." },
+        { t:"Mark the new coverage",
+          d:"index:00000005:00005100:00005350 ← \"freezing\". The coverage is the whole identity. Re-marking a crashed attempt's key is an idempotent overwrite; the file is rewritten wholesale either way.",
+          meta:[["index:00000005:00005100:00005349","frozen"],["index:00000005:00005100:00005350","freezing"]],
+          files:[["txhash/index/00000005/00005100-00005349.idx",F_OK,"the live index — readers unaffected"]],
+          crash:"The predecessor stays frozen — readers never notice. The new key is \"freezing\" debris: the next build of this coverage re-marks and overwrites it; if the coverage is no longer desired, the prune scan deletes file and key unread." },
+        { t:"Write the coverage's file",
+          d:"K-way merge of the sorted .bin files for chunks [5100, 5350] through streamhash's SortedBuilder → 00005100-00005350.idx, created or truncated wholesale. Fsync the file and the window dir. The file readers hold is never a writer's target: a file is writable only under a key that has never been \"frozen\" in this run.",
+          meta:[["index:00000005:00005100:00005349","frozen"],["index:00000005:00005100:00005350","freezing"]],
+          files:[["txhash/index/00000005/00005100-00005349.idx",F_OK,"the live index — readers unaffected"],["txhash/index/00000005/00005100-00005350.idx",F_OK,"written + fsynced, not yet committed"]],
+          crash:"Same story as the mark step: predecessor frozen, new coverage is debris (partial or even complete — irrelevant; deleted unread or rewritten wholesale). The two files coexisting is why the window dir transiently holds ~2× the index size." },
+        { t:"Commit — one atomic synced batch",
+          d:"{[5100,5350] → \"frozen\", [5100,5349] → \"pruning\"} in a single write. A terminal build (hi = the window's last chunk — derived, marked nowhere) adds every chunk:c:txhash in [lo, hi] → \"pruning\": the batch <em>is</em> the entire finalization protocol. Note the batch only ever demotes keys — no file is unlinked here, ever.",
+          meta:[["index:00000005:00005100:00005349","pruning"],["index:00000005:00005100:00005350","frozen"]],
+          files:[["txhash/index/00000005/00005100-00005349.idx",F_OK,"superseded, awaiting sweep"],["txhash/index/00000005/00005100-00005350.idx",F_OK,"the live index"]],
+          crash:"The new coverage is live; the demotions are ordinary sweep work the next run finishes. At no crash instant are two coverages frozen, or none, or a frozen chunk:c:txhash key whose .bin is gone." },
+        { t:"Eager sweep (buildThenSweep)",
+          d:"Right after the commit, in both regimes: unlink 00005100-00005349.idx → fsyncDir → delete its key (plus the demoted .bins, for a terminal build). Window-local — concurrent windows' sweeps touch disjoint keys. The run's prune scan is the crash backstop.",
+          meta:[["index:00000005:00005100:00005350","frozen"]],
+          files:[["txhash/index/00000005/00005100-00005350.idx",F_OK,"the live index"]],
+          crash:"A crash mid-sweep leaves the \"pruning\" key in place (the key outlives the durable unlink), and the next prune scan finishes. The eager site is what bounds transient .bin disk to the windows actually in flight." },
+      ]},
+    { id:"sweep", label:"Sweep (delete an artifact)", intro:"sweepChunkArtifacts / sweepIndexKey — the system's only two deletion bodies, one per key family, identical internal shape.",
+      steps: [
+        { t:"Demote first — never unlink under a \"frozen\" key",
+          d:"If the key still reads \"frozen\", put \"pruning\" (synced) before touching the file. A crash mid-sweep must never leave a frozen key whose file is gone — \"frozen\" must stay blindly trustable.",
+          meta:[["chunk:00004777:ledgers","pruning"]],
+          files:[["ledgers/00004/00004777.pack",F_OK,"past retention, queued"]],
+          crash:"Key reads \"pruning\", file intact. The next scan re-runs the sweep from the top." },
+        { t:"Unlink the file(s)",
+          d:"Idempotent on already-gone paths — a re-run after a crash deletes nothing twice and errors on nothing missing.",
+          meta:[["chunk:00004777:ledgers","pruning"]],
+          files:[],
+          crash:"Key still present, unlink may or may not have hit the disk. Either way the re-run converges." },
+        { t:"fsyncDir the parent",
+          d:"Makes the unlink durable <em>before</em> the key delete commits, so the key always outlives the file — a crash anywhere leaves the key in place and the sweep re-runs from the top.",
+          meta:[["chunk:00004777:ledgers","pruning"]],
+          files:[],
+          crash:"Same as above — the key is the sentinel that keeps the sweep re-runnable." },
+        { t:"Delete the key (batched)",
+          d:"Gives the exit-side counterpart of mark-then-write: <b>key absent ⟹ file gone</b>. Many sweeps batch their fsyncs and key-deletes when sweeping en masse.",
+          meta:[],
+          files:[],
+          crash:"Done. Absent key + absent file is the lifecycle's terminal state — indistinguishable from \"never existed\", which is exactly the point." },
+      ]},
+  ];
+  const tabs = $("#proto-tabs"), list = $("#proto-steps"), panel = $("#proto-state");
+  let cur = PROTOS[0], sel = 0;
+  function stChip(v) { return '<span class="kv st-'+v+'">"'+v+'"</span>'; }
+  function renderPanel() {
+    const s = cur.steps[sel];
+    let h = '<div class="step-desc">' + s.d + "</div>";
+    h += '<h5>Catalog — after this step</h5><div class="grp">';
+    h += s.meta.length ? s.meta.map(([k,v]) => '<div class="mrow"><span class="k">'+k+'</span>'+stChip(v)+"</div>").join("") : '<div class="mrow"><span class="k" style="color:var(--muted)">— no keys —</span></div>';
+    h += "</div><h5>Filesystem</h5><div class='grp'>";
+    h += s.files.length ? s.files.map(([p,st,note]) => '<div class="frow"><span class="fdot '+st+'"></span><span>'+p+(note?' <span style="color:var(--muted)">· '+note+"</span>":"")+"</span></div>").join("") : '<div class="frow"><span style="color:var(--muted)">— nothing —</span></div>';
+    h += "</div>";
+    h += '<div class="crash-box"><div class="t">💥 IF THE PROCESS DIES HERE</div><div style="margin-top:4px">'+s.crash+"</div></div>";
+    panel.innerHTML = h;
+  }
+  function renderSteps() {
+    list.innerHTML = "";
+    const intro = document.createElement("div");
+    intro.style.cssText = "font-size:.84rem;color:var(--muted);margin-bottom:6px;font-family:var(--mono)";
+    intro.textContent = cur.intro;
+    list.appendChild(intro);
+    cur.steps.forEach((s, i) => {
+      const d = document.createElement("div");
+      d.className = "step" + (i === sel ? " sel" : "");
+      d.innerHTML = '<span class="n">'+(i+1)+"</span><span>"+s.t+"</span>";
+      d.addEventListener("click", () => { sel = i; renderSteps(); renderPanel(); });
+      list.appendChild(d);
+    });
+  }
+  PROTOS.forEach((p, i) => {
+    const b = document.createElement("button");
+    b.className = "btn" + (i === 0 ? " on" : ""); b.textContent = p.label;
+    b.addEventListener("click", () => {
+      $$(".btn", tabs).forEach(x => x.classList.remove("on")); b.classList.add("on");
+      cur = p; sel = 0; renderSteps(); renderPanel();
+    });
+    tabs.appendChild(b);
+  });
+  renderSteps(); renderPanel();
+}
+
+/* ---------------- derived progress ---------------- */
+{
+  const PRESETS = [
+    { label:"Steady state",
+      chunks:[
+        {id:5348, cls:"", b:["❄ artifacts frozen","covered by .idx"]},
+        {id:5349, cls:"", b:["❄ artifacts frozen","covered by .idx"]},
+        {id:5350, cls:"hotc", b:["hot:chunk = \"ready\"","complete; freeze in flight"]},
+        {id:5351, cls:"livec", b:["hot:chunk = \"ready\"","LIVE — mid-chunk"]},
+      ],
+      cold:"end of chunk 5349 = 53,500,001", pos:"read live hot DB 5351 → maxCommittedSeq = 53,514,200",
+      wm:"max(53,500,001, 53,514,200) = 53,514,200 → resume at 53,514,201",
+      note:"The <b>HOT term leads</b>: the live chunk's hot DB holds the frontier, read once. The key-creation invariant guarantees everything below the highest hot key (5351) is already committed, so that single read is the only sub-chunk work — and it is safe because this runs at startup, before ingestion writes the live DB." },
+    { label:"Startup, after backfill",
+      chunks:[
+        {id:5348, cls:"", b:["❄ artifacts frozen"]},
+        {id:5349, cls:"", b:["❄ artifacts frozen"]},
+        {id:5350, cls:"", b:["❄ artifacts frozen"]},
+        {id:5351, cls:"", b:["— nothing yet —"]},
+      ],
+      cold:"end of chunk 5350 = 53,510,001", pos:"no hot keys — HOT term absent",
+      wm:"max(53,510,001, —) = 53,510,001 → resume at 53,510,002",
+      note:"Here the resume hot DB isn't open yet, so there are no hot keys and the <b>COLD term carries it</b>. <code>highestDurableChunk</code> counts a chunk only when <code>pendingArtifacts()</code> is empty — not merely \"ledgers frozen\": a crash mid-freeze can leave ledgers frozen while events is still \"freezing\", and counting that chunk would let reads open over a partial artifact." },
+    { label:"Boundary-crash corner",
+      chunks:[
+        {id:5348, cls:"", b:["❄ artifacts frozen"]},
+        {id:5349, cls:"", b:["❄ artifacts frozen"]},
+        {id:5350, cls:"hotc", b:["hot:chunk = \"ready\"","complete; handle closed","crash before 5351's key"]},
+        {id:5351, cls:"", b:["key never created"]},
+      ],
+      cold:"end of chunk 5349 = 53,500,001", pos:"read live hot DB 5350 → maxCommittedSeq = 53,510,001",
+      wm:"max(53,500,001, 53,510,001) = 53,510,001 → resume at 53,510,002 (chunk 5351)",
+      note:"A crash <em>between</em> closing 5350's handle and creating 5351's key leaves 5350 as the highest hot key, complete. <b>This is why HOT reads the DB, not the keys</b>: the keys alone can't tell a complete 5350 from a still-filling one, so they'd stop at 5349. Reading 5350's hot DB recovers the exact frontier — end of 5350 — and resume picks up at 5351." },
+    { label:"Lost hot volume",
+      chunks:[
+        {id:5348, cls:"", b:["❄ artifacts frozen"]},
+        {id:5349, cls:"", b:["❄ artifacts frozen"]},
+        {id:5350, cls:"lost", b:["hot:chunk = \"ready\"","✖ dir MISSING"]},
+        {id:5351, cls:"lost", b:["hot:chunk = \"ready\"","✖ dir MISSING"]},
+      ],
+      cold:"end of chunk 5349", pos:"derivation opens the highest \"ready\" hot chunk (5351) → dir is gone",
+      wm:"startup FAILS: the resume-point derivation can't open the missing hot DB",
+      note:"The hot volume is gone (e.g. ephemeral NVMe died) while the catalog survives, so its hot:chunk keys still read \"ready\" with no dir on disk. Startup fails the moment the derivation tries to open the highest such chunk. Recovery is <b>surgical recovery</b> (scenario 3) — <b>key demotion, never file surgery</b>: demote the orphaned hot keys to \"transient\" and restart. lastCommittedLedger counts only \"ready\" keys, so it lands at the last frozen boundary (end of 5349) on its own, and captive core re-ingests the lost tail forward. There is no stored watermark to edit." },
+  ];
+  const host = $("#derived-presets"), viz = $("#derived-viz");
+  function render(p) {
+    let h = '<div class="dchunks">' + p.chunks.map(c =>
+      '<div class="dchunk '+c.cls+'"><div class="id">chunk '+fmt(c.id)+"</div>"+c.b.map(x=>'<span class="b">'+x+"</span>").join("")+"</div>").join("") + "</div>";
+    h += '<div class="dterm">';
+    h += '<div class="tbox"><div class="h">COLD term</div><div class="v">'+p.cold+"</div></div>";
+    h += '<div class="tbox"><div class="h">HOT term</div><div class="v">'+p.pos+"</div></div>";
+    h += '<div class="tbox" style="flex-basis:100%"><div class="h">lastCommittedLedger = max(COLD, HOT)</div><div class="v">'+p.wm+"</div></div>";
+    h += "</div>";
+    h += '<div class="note" style="margin-bottom:0">'+p.note+"</div>";
+    viz.innerHTML = h;
+  }
+  PRESETS.forEach((p, i) => {
+    const b = document.createElement("button");
+    b.className = "btn" + (i===0?" on":""); b.textContent = p.label;
+    b.addEventListener("click", () => { $$(".btn",host).forEach(x=>x.classList.remove("on")); b.classList.add("on"); render(p); });
+    host.appendChild(b);
+  });
+  render(PRESETS[0]);
+}
+
+/* ---------------- rolling-window simulator ---------------- */
+{
+  const W = 8, RET = 12;
+  const winsHost = $("#roll-windows"), logHost = $("#roll-log");
+  const retBox = $("#roll-retention");
+  let S, timer = null;
+  function reset() {
+    S = { tip: 0, chunks: new Map(), cov: new Map(), finalized: new Set(), floor: 0, run: 0 };
+    S.chunks.set(0, { frozen:false, bin:false, hot:true, pruned:false });
+    logHost.innerHTML = "<div>Press “Advance one boundary”. Chunk 0 is live — its hot DB is being written.</div>";
+    render();
+  }
+  function covName(w) {
+    const c = S.cov.get(w);
+    return c ? p8(c.lo)+"-"+p8(c.hi)+".idx" : null;
+  }
+  function step() {
+    const lines = [];
+    const c = S.tip;
+    const ch = S.chunks.get(c);
+    // ingestion: boundary
+    S.tip = c+1;
+    S.chunks.set(S.tip, { frozen:false, bin:false, hot:true, pruned:false });
+    lines.push("boundary: chunk "+c+"'s last ledger commits → handle closed → hot DB "+S.tip+" opened → notify()");
+    // lifecycle run
+    S.run++;
+    const floorC = retBox.checked ? Math.max(0, c - RET + 1) : 0;
+    S.floor = floorC;
+    // plan: ChunkBuild c
+    ch.frozen = true; ch.bin = true;
+    lines.push("plan: ChunkBuild "+c+" — .pack / events / .bin frozen (source: its hot DB)");
+    // IndexBuild for c's window
+    const w = Math.floor(c / W);
+    const lo = Math.max(w*W, floorC), hi = c;
+    const prev = S.cov.get(w);
+    S.cov.set(w, { lo, hi });
+    let m = "plan: IndexBuild w"+w+" → "+p8(lo)+"-"+p8(hi)+".idx frozen";
+    if (prev) m += " · predecessor "+p8(prev.lo)+"-"+p8(prev.hi)+".idx demoted → eagerly swept";
+    lines.push(m);
+    if (hi === (w+1)*W - 1) {
+      S.finalized.add(w);
+      for (let i = w*W; i <= hi; i++) { const x = S.chunks.get(i); if (x) x.bin = false; }
+      lines.push("terminal build: window "+w+" FINALIZED — its .bin inputs demoted in the same commit batch, swept");
+    }
+    // discard
+    ch.hot = false;
+    lines.push("discard: hot DB "+c+" retired — cold artifacts + index coverage now fully serve it");
+    // prune
+    if (retBox.checked) {
+      let pruned = [];
+      for (const [id, x] of S.chunks) {
+        if (id < floorC && !x.pruned && id < S.tip) { x.pruned = true; x.bin = false; x.hot = false; pruned.push(id); }
+      }
+      if (pruned.length) lines.push("prune: chunk"+(pruned.length>1?"s":"")+" "+pruned[0]+(pruned.length>1?"–"+pruned[pruned.length-1]:"")+" swept (below floor "+floorC+")");
+      for (const [wid, cv] of S.cov) {
+        if ((wid+1)*W - 1 < floorC) { S.cov.delete(wid); S.finalized.delete(wid); lines.push("prune: window "+wid+"'s .idx swept (wholly past the floor)"); }
+      }
+    }
+    const blk = document.createElement("div");
+    blk.innerHTML = '<div class="run">— run '+S.run+" —</div>" + lines.map(l=>"<div>"+esc(l)+"</div>").join("");
+    logHost.prepend(blk);
+    while (logHost.children.length > 12) logHost.lastChild.remove();
+    render();
+  }
+  function render() {
+    const curW = Math.floor(S.tip / W);
+    const loW = Math.max(0, Math.floor(S.floor / W), curW - 3);
+    let h = "";
+    for (let w = loW; w <= curW; w++) {
+      const cv = S.cov.get(w);
+      const badge = S.finalized.has(w) ? '<span class="badge final">finalized</span>'
+        : (w === curW ? '<span class="badge current">current window</span>'
+        : (cv ? "" : '<span class="badge gone">pruned</span>'));
+      const fname = cv ? '<span class="fname">index/'+p8(w)+"/"+covName(w)+'</span>' : '<span style="color:var(--absent)">no frozen coverage</span>';
+      h += '<div class="win-group"><div class="win-head"><span>window '+p8(w)+" · chunks "+(w*W)+"–"+((w+1)*W-1)+" "+badge+"</span><span>"+fname
+        + (retBox.checked ? ' <span style="color:var(--pruning)">· floor@'+S.floor+"</span>" : "") + "</span></div>";
+      h += '<div class="chunk-row">';
+      for (let id = w*W; id <= (w+1)*W-1; id++) {
+        const x = S.chunks.get(id);
+        let cls = "chunk", tip = "chunk "+id;
+        if (!x) { cls += " future"; tip += " — not yet reached"; }
+        else if (id === S.tip) { cls += " live"; tip += " — LIVE: hot DB being written"; }
+        else if (x.pruned) { cls += " pruned"; tip += " — pruned (below floor)"; }
+        else if (x.frozen && x.hot) { cls += " hot"; tip += " — frozen, hot DB awaiting coverage"; }
+        else if (x.frozen) { cls += " frozen"; tip += " — frozen (.pack + events)"+(x.bin?", .bin retained for rebuilds":", .bin swept (window finalized)"); }
+        else if (x.hot) { cls += " live"; tip += " — hot"; }
+        const covered = cv && !((x&&x.pruned)) && id >= cv.lo && id <= cv.hi;
+        h += '<div class="'+cls+'" title="'+tip+'">'+((x&&x.bin)?'<span class="bin-tick" title=".bin present"></span>':"")+(covered?'<span class="cov"></span>':"")+"</div>";
+      }
+      h += "</div></div>";
+    }
+    winsHost.innerHTML = h;
+  }
+  $("#roll-step").addEventListener("click", step);
+  $("#roll-reset").addEventListener("click", () => { if (timer) { clearInterval(timer); timer=null; $("#roll-auto").classList.remove("on"); $("#roll-auto").textContent="Auto-play"; } reset(); });
+  $("#roll-auto").addEventListener("click", function() {
+    if (timer) { clearInterval(timer); timer = null; this.classList.remove("on"); this.textContent = "Auto-play"; }
+    else { timer = setInterval(step, 850); this.classList.add("on"); this.textContent = "Pause"; }
+  });
+  retBox.addEventListener("change", render);
+  reset();
+}
+
+/* ---------------- boundary walkthrough ---------------- */
+{
+  const BASE = {
+    meta: [
+      { k:"hot:chunk:00005350", v:"ready" },
+      { k:"chunk:051xx:{ledgers,events} ×250", v:"frozen", dim:true },
+      { k:"chunk:051xx:txhash ×250", v:"frozen", dim:true },
+      { k:"index:00000005:00005100:00005349", v:"frozen" },
+    ],
+    files: [
+      { p:"hot/00005350/", kind:"dir", note:"live hot DB — ledgers/txhash/events CFs" },
+      { p:"ledgers/00005/051xx.pack ×250", kind:"ok", note:"chunks 5100–5349" },
+      { p:"txhash/raw/00005/051xx.bin ×250", kind:"ok", note:"window 5's rebuild inputs" },
+      { p:"txhash/index/00000005/00005100-00005349.idx", kind:"ok", note:"the live index" },
+    ],
+    reads: [
+      "tx in chunk 5350 → hot DB 5350's txhash CF",
+      "tx in chunks 5100–5349 → 00005100-00005349.idx",
+    ],
+  };
+  const STEPS = [
+    { a:"ingestion", t:"Final batch commits",
+      d:"Seq 53,510,001 lands as one atomic synced WriteBatch across all CFs of hot/00005350 — a ledger is either fully in the hot DB or absent. The batch is the durability boundary; the loop keeps no progress variable at all.",
+      f:s => { s.files[0].note = "complete: all 10,000 ledgers durable"; mark(s, "f", 0); } },
+    { a:"ingestion", t:"hotDB.Close()",
+      d:"Chunk 5350's write handle is released before anything announces completion. Load-bearing: the next chunk's hot key is what makes 5350 visibly complete to the lifecycle's derivation, and no writer may hold the DB once a run can claim it — otherwise a run still in flight from the previous notification could rmdir a dir whose writer is live.",
+      f:s => { s.files[0].note = "complete — write handle closed (handed off)"; mark(s, "f", 0); } },
+    { a:"ingestion", t:"Open chunk 5351's hot DB",
+      d:"openHotDBForChunk: put hot:chunk:00005351 = \"transient\" → create the RocksDB dir → fsync dir + parent dirent → flip to \"ready\". The instant this key exists, chunk 5350 sits below the highest hot key: the partition has moved, and any lifecycle scan may now freeze and discard it.",
+      f:s => { s.meta.splice(1, 0, { k:"hot:chunk:00005351", v:"ready" }); mark(s, "m", 1);
+               s.files.splice(1, 0, { p:"hot/00005351/", kind:"dir", note:"new live chunk" }); mark(s, "f", 1); } },
+    { a:"ingestion", t:"notify(5350) — send on the channel",
+      d:"Non-blocking send of the completed chunk on a buffered channel. The lifecycle freezes up to the most recent chunk it receives; the chunk only says how far to go — what to build, discard, and prune is read from the catalog keys. A full channel is fatal: the lifecycle has fallen too far behind ingestion.",
+      f:s => {} },
+    { a:"lifecycle", t:"Run: derive + resolve",
+      d:"lifecycle drains the channel → chunk 5350 (last ledger 53,510,001). Floor stays at 5100 — pinned by earliest_ledger, so it doesn't slide this run. resolve diffs [5100, 5350] against the catalog → Plan{ ChunkBuild 5350 {ledgers, events, txhash}, IndexBuild w5 [5100, 5350] }. The plan is just a value — loggable, diffable, testable.",
+      f:s => {} },
+    { a:"lifecycle", t:"ChunkBuild 5350 — mark",
+      d:"One synced batch puts chunk:00005350:{ledgers, events, txhash} = \"freezing\" — before any I/O, so any file that appears next is already reachable from a key.",
+      f:s => { s.meta.push({ k:"chunk:00005350:ledgers", v:"freezing" }, { k:"chunk:00005350:events", v:"freezing" }, { k:"chunk:00005350:txhash", v:"freezing" });
+               mark(s, "m", s.meta.length-3); mark(s, "m", s.meta.length-2); mark(s, "m", s.meta.length-1); } },
+    { a:"lifecycle", t:"ChunkBuild 5350 — write + fsync",
+      d:"backfillSource picks the hot DB (\"ready\" and complete — preference 1; no refetch, and the same rule would serve a backfill caller). One streaming pass over the 10,000 LCMs writes 00005350.pack and the events segment, sorts ~3M txhash entries in memory, writes 00005350.bin; FsyncAll makes files + dirents durable.",
+      f:s => { s.files.push({ p:"ledgers/00005/00005350.pack", kind:"ok", note:"durable" },
+                            { p:"events/00005/00005350-events.pack", kind:"ok", note:"durable (+index, +hash)" },
+                            { p:"txhash/raw/00005/00005350.bin", kind:"ok", note:"sorted run, durable" });
+               mark(s, "f", s.files.length-3); mark(s, "f", s.files.length-2); mark(s, "f", s.files.length-1); } },
+    { a:"lifecycle", t:"ChunkBuild 5350 — flip to \"frozen\"",
+      d:"One batch flips all three keys. The hot DB stays — it is still the only queryable home for 5350's tx hashes (the .bin is never a serving tier; it is rebuild input).",
+      f:s => { for (let i = s.meta.length-3; i < s.meta.length; i++) { s.meta[i].v = "frozen"; mark(s, "m", i); } } },
+    { a:"lifecycle", t:"IndexBuild w5 — mark the new coverage",
+      d:"The build waited on 5350's done-channel (freeze-before-build is a plan dependency, not run choreography). Put index:00000005:00005100:00005350 = \"freezing\". The coverage is the whole identity — a retry would re-mark this same key and rewrite the same file.",
+      f:s => { s.meta.push({ k:"index:00000005:00005100:00005350", v:"freezing" }); mark(s, "m", s.meta.length-1); } },
+    { a:"lifecycle", t:"IndexBuild w5 — merge, write, fsync",
+      d:"K-way merge of 251 sorted .bin runs (chunks 5100–5350) through streamhash's SortedBuilder → 00005100-00005350.idx, created/truncated wholesale; fsync file + window dir. Readers still resolve the old frozen key — the file they hold is never a writer's target (a file is writable only under a key that has never been \"frozen\" in this run).",
+      f:s => { s.files.push({ p:"txhash/index/00000005/00005100-00005350.idx", kind:"ok", note:"written + fsynced, not yet committed" }); mark(s, "f", s.files.length-1); } },
+    { a:"lifecycle", t:"Commit batch — the atomic swap",
+      d:"One synced batch: {[5100,5350] → \"frozen\", [5100,5349] → \"pruning\"}. The window's frozen coverage changes hands atomically — never two frozen, never none. (A terminal build — hi = the window's last chunk — would demote every chunk:c:txhash in [lo, hi] in this same write; that batch is the entire finalization protocol.)",
+      f:s => { const ni = s.meta.findIndex(m=>m.k.endsWith("00005350")&&m.k.startsWith("index"));
+               const oi = s.meta.findIndex(m=>m.k.endsWith("00005349"));
+               s.meta[ni].v = "frozen"; s.meta[oi].v = "pruning"; mark(s,"m",ni); mark(s,"m",oi);
+               const fi = s.files.findIndex(f=>f.p.includes("00005100-00005350"));
+               s.files[fi].note = "the live index";
+               s.reads = ["tx in chunk 5350 → 00005100-00005350.idx (now ≤ hi)", "tx in chunk 5351 → hot DB 5351's txhash CF"]; s.readsChg = true; } },
+    { a:"lifecycle", t:"Eager sweep (buildThenSweep)",
+      d:"Unlink 00005100-00005349.idx → fsyncDir → delete its key. A reader still holding the old fd keeps reading it safely — POSIX unlink doesn't invalidate open handles — and picks up the new coverage on its next key resolution.",
+      f:s => { const oi = s.meta.findIndex(m=>m.k.endsWith("00005349")); s.meta[oi].gone = true;
+               const fi = s.files.findIndex(f=>f.p.includes("00005100-00005349")); s.files[fi].gone = true; } },
+    { a:"lifecycle", t:"Discard stage",
+      d:"Scan hot:chunk:* — chunk 5350 is complete, nothing pending, and the index covers it: cold artifacts fully serve it. hot:chunk:00005350 → \"transient\" → rmdir → delete key. Gap-free by ordering: coverage landed before discard, so a tx lookup in 5350 always had exactly one home.",
+      f:s => { s.meta[0].gone = true; s.files[0].gone = true; } },
+    { a:"lifecycle", t:"Prune stage — and settled",
+      d:"Nothing left: the eager sweep already ran, and the floor is pinned at 5100. Assertable postcondition: re-running resolve and both scans against this same snapshot yields nothing — the run finished everything its snapshot showed. The store is settled until the next chunk completes, ~14 hours away at mainnet rates.",
+      f:s => {} },
+  ];
+  function mark(s, kind, i) { (kind === "m" ? s.meta[i] : s.files[i]).chg = true; }
+  // build snapshots
+  const snaps = [structuredClone(BASE)];
+  let acc = structuredClone(BASE);
+  STEPS.forEach(st => {
+    acc = structuredClone(acc);
+    acc.meta.forEach(m => { delete m.chg; m.gone && (m.dead = true); delete m.gone; });
+    acc.meta = acc.meta.filter(m => !m.dead);
+    acc.files.forEach(f => { delete f.chg; f.gone && (f.dead = true); delete f.gone; });
+    acc.files = acc.files.filter(f => !f.dead);
+    delete acc.readsChg;
+    st.f(acc);
+    snaps.push(structuredClone(acc));
+  });
+  let cur = 0;
+  const list = $("#bd-steps"), panel = $("#bd-state");
+  function render() {
+    list.innerHTML = "";
+    STEPS.forEach((st, i) => {
+      const d = document.createElement("div");
+      d.className = "step" + (i+1 === cur ? " sel" : "") + (i+1 < cur ? " done" : "");
+      d.innerHTML = '<span class="n">'+(i+1)+'</span><span class="actor '+st.a+'">'+st.a+"</span><span>"+st.t+"</span>";
+      d.addEventListener("click", () => { cur = i+1; render(); });
+      list.appendChild(d);
+    });
+    const s = snaps[cur];
+    let h = "";
+    if (cur === 0) h += '<div class="step-desc">Initial state — mid-window settled, seconds before the boundary. Click a step or press Next.</div>';
+    else h += '<div class="step-desc">' + STEPS[cur-1].d + "</div>";
+    h += "<h5>Catalog</h5><div class='grp'>";
+    h += s.meta.map(m => '<div class="mrow'+(m.chg?" chg":"")+(m.gone?" gone":"")+(m.dim?'" style="opacity:.6':"")+'"><span class="k">'+m.k+'</span><span class="kv st-'+m.v+'">"'+m.v+'"</span></div>').join("");
+    h += "</div><h5>Filesystem</h5><div class='grp'>";
+    h += s.files.map(f => '<div class="frow'+(f.chg?" chg":"")+(f.gone?" gone":"")+'"><span class="fdot '+f.kind+'"></span><span>'+f.p+(f.note?' <span style="color:var(--muted)">· '+f.note+"</span>":"")+"</span></div>").join("");
+    h += "</div><h5>Where reads are served</h5><div class='grp'>";
+    h += s.reads.map(r => '<div class="frow'+(s.readsChg?" chg":"")+'"><span>'+esc(r)+"</span></div>").join("");
+    h += "</div>";
+    panel.innerHTML = h;
+    $("#bd-pos").textContent = cur === 0 ? "initial state" : "step " + cur + " of " + STEPS.length;
+    $("#bd-prev").disabled = cur === 0;
+    $("#bd-next").disabled = cur === STEPS.length;
+  }
+  $("#bd-next").addEventListener("click", () => { if (cur < STEPS.length) { cur++; render(); } });
+  $("#bd-prev").addEventListener("click", () => { if (cur > 0) { cur--; render(); } });
+  $("#bd-reset").addEventListener("click", () => { cur = 0; render(); });
+  render();
+}
+
+/* ---------------- resolver playground ---------------- */
+{
+  const SCEN = [
+    { label:"Steady-state restart",
+      ctx:"Clean restart mid-window. Last committed ledger = mid-chunk 5351 (derived from the live hot DB); tip ≈ chunk 5351; floor = 5000. Backfill range: chunks [5000, 5350] — the partial resume chunk 5351 is excluded (a mid-chunk last committed ledger within one chunk of the tip: core replays its tail faster than a backfill refetch would gate serving, and the data is local by construction).",
+      wins:[ { w:5, range:[5000,5999], stored:[5000,5350], desired:[5000,5350] } ],
+      plan:[],
+      note:"Every steady-state restart lands in <b>desired ⊆ stored</b>, for every window — resolve emits nothing, backfill returns immediately, ingestion reopens chunk 5351's hot DB and resumes. Diffing the postcondition against the catalog is what keeps a restart from re-deriving every chunk's .bin only to delete it again at finalization." },
+    { label:"Boundary crash",
+      ctx:"Crash right after chunk 5350's last ledger committed, before the run froze it. hot:chunk:00005350 = \"ready\" with a complete DB; the chunk's artifact keys are absent; stored coverage is [5000,5349].",
+      wins:[ { w:5, range:[5000,5999], stored:[5000,5349], desired:[5000,5350] } ],
+      plan:[ { t:"cb", x:"ChunkBuild 5350 {ledgers, events, txhash}", s:"source: its complete hot DB (backfillSource pref. 1 — no refetch)" },
+             { t:"ib", x:"IndexBuild w5 [5000, 5350]" } ],
+      note:"Backfill performs exactly what the interrupted run would have — same resolver, same executor, same source rule. The hot DB itself stays until the first run's discard stage retires it, once coverage lands." },
+    { label:"Downtime (same window)",
+      ctx:"Daemon down while the network advanced: last committed ledger = end of chunk 5350; tip now mid-chunk 5353. Chunks 5351–5352 exist nowhere locally; 5353 is partial at the tip.",
+      wins:[ { w:5, range:[5000,5999], stored:[5000,5350], desired:[5000,5352] } ],
+      plan:[ { t:"cb", x:"ChunkBuild 5351 {ledgers, events, txhash}", s:"source: backfill backend (BSB)" },
+             { t:"cb", x:"ChunkBuild 5352 {ledgers, events, txhash}", s:"source: backfill backend (BSB)" },
+             { t:"ib", x:"IndexBuild w5 [5000, 5352]", s:"waits on 5351 + 5352's done-channels" } ],
+      note:"The resume chunk 5353 is left to ingestion (the range ends at the last complete chunk). The desired coverage's upper cap — min(window_last, range_end) — is what makes the trailing window need no special case." },
+    { label:"Downtime across a window boundary",
+      ctx:"Stopped with the tip in chunk 5998 (window 5 still current: stored coverage [5000,5998]); restarted with the tip in chunk 6002. The window 5 → 6 boundary passed during downtime.",
+      wins:[ { w:5, range:[5000,5999], stored:[5000,5998], desired:[5000,5999] },
+             { w:6, range:[6000,6999], stored:null, desired:[6000,6001] } ],
+      plan:[ { t:"cb", x:"ChunkBuild 5999 {ledgers, events, txhash}", s:"source: BSB" },
+             { t:"cb", x:"ChunkBuild 6000, 6001 {ledgers, events, txhash}", s:"source: BSB" },
+             { t:"ib", x:"IndexBuild w5 [5000, 5999] — TERMINAL", s:"its commit batch demotes all 1000 of window 5's .bin keys; the eager sweep deletes them" },
+             { t:"ib", x:"IndexBuild w6 [6000, 6001]" } ],
+      note:"The <b>stored_hi clause is load-bearing</b> here: window 5 was current at shutdown, so its frozen key has hi &lt; the window's last chunk. When downtime crosses the window boundary, the window becomes complete but still needs its tail chunks' .bin and a full build — and desired_hi &gt; stored_hi is what catches it." },
+    { label:"Floor rose",
+      ctx:"Retention slid forward: the floor is now chunk 5100. Window 5's stored coverage [5000,5349] still names chunks below the floor — pruning has already removed their files.",
+      wins:[ { w:5, range:[5000,5999], stored:[5000,5349], desired:[5100,5349] } ],
+      plan:[],
+      note:"Desired ⊆ stored → <b>nothing</b>. A risen floor is never a rebuild trigger: the stale lo is the reader retention contract's problem (reads below the floor return not-found before any file access), and the next boundary's rebuild uses lo = 5100 naturally — the current window's lo tracks the floor for free, every boundary." },
+    { label:"Retention widened",
+      ctx:"Operator raised retention_chunks: the floor drops 4500 → 4300. Window 4 finalized with lo = 4500 (the floor at build time). Chunks 4300–4499 were pruned long ago; chunks 4500–4999 still have .pack + events on disk, but their .bin keys were demoted and swept at finalization.",
+      wins:[ { w:4, range:[4000,4999], stored:[4500,4999], desired:[4300,4999] },
+             { w:5, range:[5000,5999], stored:[5000,5350], desired:[5000,5350] } ],
+      plan:[ { t:"cb", x:"ChunkBuild 4300–4499 {ledgers, events, txhash}", s:"source: BSB — everything below the old floor was pruned" },
+             { t:"cb", x:"ChunkBuild 4500–4999 {txhash only}", s:"source: local .pack (backfillSource pref. 2 — ledgers not requested ⇒ no download)" },
+             { t:"ib", x:"IndexBuild w4 [4300, 4999] — TERMINAL", s:"re-freezes at the new, wider coverage; the old [4500,4999] key is demoted by the commit batch" } ],
+      note:"This runs at the <b>next startup</b>, not in a run: at runtime the floor only rises, so a run's plan range stays within what's already on disk — extending the <em>bottom</em> of storage (a widened floor) is startup backfill's job. (Window 5 shows the uniform rule doing nothing: desired ⊆ stored.)" },
+  ];
+  const btns = $("#rs-buttons");
+  function seg(range, cov, cls, label) {
+    const span = range[1] - range[0] + 1;
+    const l = ((cov[0] - range[0]) / span * 100), wd = ((cov[1] - cov[0] + 1) / span * 100);
+    if (wd < 22) { // too narrow to hold its label — render the label beside the segment
+      const top = cls === "stored" ? "6px" : "27px";
+      const color = cls === "stored" ? "var(--frozen)" : "var(--freezing)";
+      return '<div class="rs-seg '+cls+'" style="left:'+l+'%;width:'+Math.max(wd,0.6)+'%"></div>'
+        + '<div style="position:absolute;top:'+top+';left:'+Math.min(l+wd+1.5,78)+'%;font-size:.66rem;font-family:var(--mono);color:'+color+';line-height:14px">'+label+"</div>";
+    }
+    return '<div class="rs-seg '+cls+'" style="left:'+l+'%;width:'+wd+'%">'+label+"</div>";
+  }
+  function render(sc) {
+    $("#rs-context").innerHTML = sc.ctx;
+    $("#rs-viz").innerHTML = sc.wins.map(w => {
+      const covered = w.stored && w.desired && w.stored[0] <= w.desired[0] && w.stored[1] >= w.desired[1];
+      return '<div class="rs-win"><div class="rs-winlabel">window '+p8(w.w)
+        + (covered ? ' <span class="badge final">desired ⊆ stored → skip</span>' : ' <span class="badge" style="color:var(--freezing);border-color:var(--freezing)">desired exceeds stored → work</span>')
+        + '</div><div class="rs-track">'
+        + (w.stored ? seg(w.range, w.stored, "stored", "stored ["+w.stored[0]+", "+w.stored[1]+"]") : '<div style="position:absolute;top:6px;left:8px;font-size:.68rem;color:var(--absent);font-family:var(--mono)">no frozen key</div>')
+        + (w.desired ? seg(w.range, w.desired, "desired", "desired ["+w.desired[0]+", "+w.desired[1]+"]") : "")
+        + '</div><div class="rs-cap"><span>chunk '+w.range[0]+"</span><span>chunk "+w.range[1]+"</span></div></div>";
+    }).join("");
+    $("#rs-plan").innerHTML = "<div style='color:var(--muted)'>resolve() emits:</div>" +
+      (sc.plan.length ? sc.plan.map(p => '<div class="'+p.t+'">'+(p.t==="cb"?"▸ ":"◆ ")+p.x+(p.s?' <span class="src">— '+p.s+"</span>":"")+"</div>").join("")
+        : '<div class="empty">Plan{} — nothing to do</div>');
+    $("#rs-note").innerHTML = sc.note;
+  }
+  SCEN.forEach((sc, i) => {
+    const b = document.createElement("button");
+    b.className = "btn" + (i===0?" on":""); b.textContent = sc.label;
+    b.addEventListener("click", () => { $$(".btn",btns).forEach(x=>x.classList.remove("on")); b.classList.add("on"); render(sc); });
+    btns.appendChild(b);
+  });
+  render(SCEN[0]);
+}
+
+/* ---------------- read-path explorer ---------------- */
+{
+  const SCEN = [
+    { label:"Found",
+      nodes:[
+        { c:"dim", ic:"·", t:"window 7 .idx: MPHF probe → fingerprint miss → skip", s:"non-containing window rejected with no fetch" },
+        { c:"dim", ic:"·", t:"window 6 .idx: fingerprint miss → skip", s:"" },
+        { c:"ok", ic:"✓", t:"window 5 .idx: fingerprint hit", s:"seq = MinLedger + payload (3 bytes)" },
+        { c:"ok", ic:"✓", t:"retention gate: seq ≥ floor → admitted", s:"" },
+        { c:"ok", ic:"✓", t:"fetch the LCM, verify the full 32-byte hash → confirms → return", s:"" },
+      ],
+      note:"A hash has no window hint, so the reader probes every in-retention window. The fingerprint (<code>fpWidth</code> bytes) rejects non-containing windows with no fetch; the containing window's fingerprint hit is confirmed by the full-hash verify. The tx is in at most one window, so at most one probe confirms." },
+    { label:"Fingerprint false positive",
+      nodes:[
+        { c:"dim", ic:"·", t:"window 7: fingerprint miss → skip", s:"" },
+        { c:"fail", ic:"✕", t:"window 6: fingerprint HIT (false positive) → fetch + verify → fails", s:"~256^(−fpWidth) chance; the fetched tx's full hash doesn't match → rejected, keep probing" },
+        { c:"ok", ic:"✓", t:"window 5: fingerprint hit → fetch + verify → confirms → return", s:"" },
+      ],
+      note:"The fingerprint is a screen, not a decision: a non-containing window matches it with probability <code>256^(−fpWidth)</code> and costs a wasted fetch the verify rejects. <code>fpWidth</code> is the knob that keeps spurious fetches ≪ 1; the verify is what guarantees only the true window is ever returned." },
+    { label:"Not found",
+      nodes:[
+        { c:"dim", ic:"·", t:"window 7: fingerprint miss", s:"" },
+        { c:"dim", ic:"·", t:"window 6: fingerprint miss", s:"" },
+        { c:"dim", ic:"·", t:"windows 5 … 0: fingerprint miss (every in-retention window)", s:"a not-found lookup can't stop early — it must rule out every window" },
+        { c:"fail", ic:"→", t:"no window confirms → not-found", s:"a non-existent or not-yet-ingested hash" },
+      ],
+      note:"Not-found is the cost ceiling: with no window hint and nothing to confirm, the reader must probe the full set of in-retention windows before answering. (Ordering and parallelism of those probes are the query-routing design's concern.)" },
+  ];
+  const btns = $("#rd-buttons"), chain = $("#rd-chain"), note = $("#rd-note");
+  function render(sc) {
+    chain.innerHTML = sc.nodes.map((n, i) =>
+      (i ? '<div class="lnk"></div>' : "") +
+      '<div class="nd '+n.c+'"><span class="ic">'+n.ic+"</span><span>"+n.t+(n.s?"<small>"+n.s+"</small>":"")+"</span></div>"
+    ).join("");
+    note.innerHTML = sc.note;
+  }
+  SCEN.forEach((sc, i) => {
+    const b = document.createElement("button");
+    b.className = "btn" + (i===0?" on":""); b.textContent = sc.label;
+    b.addEventListener("click", () => { $$(".btn",btns).forEach(x=>x.classList.remove("on")); b.classList.add("on"); render(sc); });
+    btns.appendChild(b);
+  });
+  render(SCEN[0]);
+}
+
+/* ---------------- executePlan dependency graph ---------------- */
+{
+  const WORKERS = 2, CHUNKS = [5346,5347,5348,5349,5350], FAILID = 5348;
+  const stepBtn = $("#ep-step"), autoBtn = $("#ep-auto"), resetBtn = $("#ep-reset"), failBox = $("#ep-fail");
+  const semHost = $("#ep-sem"), svg = $("#ep-svg"), note = $("#ep-note");
+  let tasks, cancelled, timer = null, t = 0;
+  const byId = id => tasks.find(x => x.id === id);
+  const idxTask = () => tasks.find(x => x.kind === "index");
+  const isTerminal = x => x.state === "done" || x.state === "failed" || x.state === "cancelled";
+  function build() {
+    cancelled = false; t = 0;
+    tasks = CHUNKS.map(id => ({ id, kind:"chunk", state:"queued", rem:0, cost:2, fail: failBox.checked && id === FAILID }));
+    tasks.push({ id:"w5", kind:"index", deps:CHUNKS.slice(), state:"queued", rem:0, cost:2 });
+  }
+  function step() {
+    if (tasks.every(isTerminal)) return;
+    t++;
+    for (const x of tasks) if (x.state === "running") {
+      x.rem--;
+      if (x.rem <= 0) { if (x.fail) { x.state = "failed"; cancelled = true; } else x.state = "done"; }
+    }
+    const idx = idxTask();
+    if (cancelled && idx.state === "queued") idx.state = "cancelled";
+    let running = tasks.filter(x => x.state === "running").length;
+    for (const x of tasks) {
+      if (running >= WORKERS) break;
+      if (x.state !== "queued") continue;
+      if (x.kind === "index" && !x.deps.every(d => byId(d).state === "done")) continue;
+      x.state = "running"; x.rem = x.cost; running++;
+    }
+    render();
+    if (tasks.every(isTerminal) && timer) stopAuto();
+  }
+  const FILL = { queued:"#1c2430", running:"rgba(63,185,143,.18)", done:"rgba(88,166,255,.18)", failed:"rgba(244,112,103,.18)", cancelled:"#161c26" };
+  const STROKE = { queued:"#2a3445", running:"#3fb98f", done:"#58a6ff", failed:"#f47067", cancelled:"#6e7787" };
+  function box(x, y, w, h, st, title, sub) {
+    const dash = st === "cancelled" ? ' stroke-dasharray="4 3"' : "";
+    return '<rect x="'+x+'" y="'+y+'" width="'+w+'" height="'+h+'" rx="9" fill="'+FILL[st]+'" stroke="'+STROKE[st]+'" stroke-width="1.6"'+dash+'></rect>'
+      + '<text x="'+(x+w/2)+'" y="'+(y+20)+'" text-anchor="middle" fill="#f0f4fa" font-size="13" font-weight="600">'+title+'</text>'
+      + '<text x="'+(x+w/2)+'" y="'+(y+37)+'" text-anchor="middle" fill="#8b95a7" font-size="10.5">'+sub+'</text>';
+  }
+  function render() {
+    const running = tasks.filter(x => x.state === "running");
+    let sh = '<span class="ep-sem-label">worker slots ('+WORKERS+'):</span>';
+    for (let i = 0; i < WORKERS; i++) {
+      const occ = running[i];
+      sh += '<span class="ep-slot'+(occ?" busy":"")+'">'+(occ ? (occ.kind === "index" ? "index w5" : "chunk "+occ.id) : "idle")+'</span>';
+    }
+    const queued = tasks.filter(x => x.state === "queued" && x.kind === "chunk").length;
+    sh += '<span class="ep-sem-q">'+queued+' chunk build'+(queued===1?"":"s")+' parked on the semaphore</span>';
+    semHost.innerHTML = sh;
+    const xs = [10,156,302,448,594], y0 = 18, bw = 120, bh = 46, ix = 310, iy = 150;
+    let g = "";
+    tasks.filter(x => x.kind === "chunk").forEach((c, i) => {
+      const sx = xs[i]+bw/2, sy = y0+bh, tx = ix+bw/2, ty = iy;
+      let col = "#2a3445", w = 1.4, dash = ' stroke-dasharray="5 4"';
+      if (c.state === "done") { col = "#3fb98f"; w = 2; dash = ""; }
+      else if (c.state === "failed") { col = "#f47067"; w = 2; dash = ' stroke-dasharray="2 3"'; }
+      g += '<path d="M'+sx+','+sy+' C'+sx+','+(sy+38)+' '+tx+','+(ty-38)+' '+tx+','+ty+'" fill="none" stroke="'+col+'" stroke-width="'+w+'"'+dash+'></path>';
+    });
+    tasks.filter(x => x.kind === "chunk").forEach((c, i) => {
+      const sub = c.state === "running" ? "building .bin" : c.state === "done" ? "done · closed" : c.state === "failed" ? "retries spent" : "queued";
+      g += box(xs[i], y0, bw, bh, c.state, "chunk "+c.id, sub);
+    });
+    const idx = idxTask();
+    const isub = idx.state === "queued" ? "waits on 5 channels" : idx.state === "running" ? "k-way merge" : idx.state === "done" ? "frozen" : idx.state === "cancelled" ? "bailed: <-gctx.Done()" : "";
+    g += box(ix, iy, bw, bh, idx.state, "IndexBuild w5", isub);
+    svg.innerHTML = g;
+    note.innerHTML = noteText();
+  }
+  function noteText() {
+    const idx = idxTask();
+    if (idx.state === "done") return "<b>Plan complete.</b> Every chunk build closed its done-channel, so the index build's wait fell through and it ran the k-way merge. Wall-clock was the slowest dependency chain, not the sum of the stages.";
+    const f = tasks.find(x => x.state === "failed");
+    if (idx.state === "cancelled" || f) {
+      return "<b>chunk "+(f ? f.id : FAILID)+" exhausted its retries.</b> It returns an error and <em>leaves <code>done["+(f ? f.id : FAILID)+"]</code> open</em>; the error cancels <code>gctx</code>. The index build, parked in its wait loop, unblocks through the <code>&lt;-gctx.Done()</code> case and bails. The daemon aborts; a restart re-resolves from durable keys and repeats no finished work.";
+    }
+    const running = tasks.filter(x => x.state === "running").length;
+    const dn = tasks.filter(x => x.kind === "chunk" && x.state === "done").length;
+    if (t === 0) return "Five chunk builds, one index build, 2 worker slots. Press <b>Step</b>. The index build can't start until all five <code>.bin</code> files are frozen — its done-channel wait is the only thing sequencing the two strata.";
+    return "<b>"+running+" of "+WORKERS+" slots busy"+(dn ? ", "+dn+"/5 chunk builds done" : "")+".</b> The index build stays parked: it proceeds only once every in-coverage chunk build has closed its done-channel. Extra chunk builds wait on the semaphore — thousands could be parked here for the price of a goroutine.";
+  }
+  function stopAuto(){ clearInterval(timer); timer = null; autoBtn.classList.remove("on"); autoBtn.textContent = "Auto-play"; }
+  stepBtn.addEventListener("click", step);
+  resetBtn.addEventListener("click", () => { if (timer) stopAuto(); build(); render(); });
+  autoBtn.addEventListener("click", () => { if (timer) stopAuto(); else { timer = setInterval(step, 800); autoBtn.classList.add("on"); autoBtn.textContent = "Pause"; } });
+  failBox.addEventListener("change", () => { if (timer) stopAuto(); build(); render(); });
+  build(); render();
+}
+
+/* ---------------- startup backfill loop ---------------- */
+{
+  const LPC = 10000;
+  const chunkID = seq => Math.floor((seq - 2) / LPC);
+  const chunkLast = c => (c + 1) * LPC + 1;
+  const lastCompleteChunkAt = led => Math.floor((led - 1) / LPC) - 1;
+  const SCEN = [
+    { label:"First-ever start",
+      ctx:"Empty disk, earliest_ledger = genesis (floor = chunk 0). Nothing committed yet (lastCommitted = earliest − 1 = 1). The network tip is mid-chunk 7.",
+      floor:0, frozen:-1, last0:1, tips:[70005, 70140] },
+    { label:"Restart, tip moves mid-pass",
+      ctx:"Restart with frozen history through chunk 5000 (lastCommitted = 50,010,001), floor = 4800. The tip is mid-chunk 5005 — and advances to mid-chunk 5009 while the first (large) pass runs.",
+      floor:4800, frozen:5000, last0:50010001, tips:[50053000, 50093000, 50094000] },
+    { label:"Clean restart mid-chunk",
+      ctx:"Clean restart, frozen through chunk 5350, lastCommitted mid-chunk 5351 (53,514,200) — read back from the live hot DB. The tip is 60 ledgers ahead, still inside chunk 5351. Floor = 5000.",
+      floor:5000, frozen:5350, last0:53514200, tips:[53514260] },
+  ];
+  function run(sc) {
+    let last = sc.last0, backfilled = -1, onDisk = sc.frozen, i = 0;
+    const passes = [];
+    while (i < 12) {
+      const tip = sc.tips[Math.min(i, sc.tips.length - 1)];
+      const anchor = Math.max(tip, last);
+      let rangeEnd = lastCompleteChunkAt(anchor);
+      const rangeStart = sc.floor;
+      const midChunk = last !== chunkLast(chunkID(last));
+      const nearTip = (tip - last) < LPC;
+      let trimmed = false;
+      if (nearTip && midChunk) { rangeEnd = chunkID(last) - 1; trimmed = true; }
+      if (rangeEnd < rangeStart || rangeEnd <= backfilled) {
+        passes.push({ brk:true, tip, anchor,
+          why: rangeEnd < rangeStart ? "rangeEnd &lt; rangeStart — the range is empty" : "rangeEnd ≤ backfilledThrough — the tip didn't advance past the last pass" });
+        break;
+      }
+      const emitLo = Math.max(rangeStart, onDisk + 1), emitHi = rangeEnd;
+      passes.push({ tip, anchor, rangeStart, rangeEnd, trimmed, emits: emitLo <= emitHi ? [emitLo, emitHi] : null });
+      last = Math.max(last, chunkLast(rangeEnd));
+      onDisk = Math.max(onDisk, rangeEnd);
+      backfilled = rangeEnd; i++;
+    }
+    return { passes, resumeLedger: last + 1, seed: lastCompleteChunkAt(last), resumeChunk: chunkID(last + 1) };
+  }
+  const host = $("#su-buttons"), ctxEl = $("#su-context"), passEl = $("#su-passes");
+  function render(sc) {
+    ctxEl.innerHTML = sc.ctx;
+    const r = run(sc);
+    let h = "", pn = 0;
+    r.passes.forEach(p => {
+      if (p.brk) {
+        h += '<div class="su-pass brk"><div class="su-h">Loop exits</div>'
+          + '<div class="su-row">tip = '+fmt(p.tip)+' · anchor = max(tip, lastCommitted) → last complete chunk '+lastCompleteChunkAt(p.anchor)+'</div>'
+          + '<div class="su-row break">'+p.why+'</div></div>';
+      } else {
+        pn++;
+        const trim = p.trimmed
+          ? '<div class="su-row trim">near tip + mid-chunk ⟹ rangeEnd trimmed to chunkID(lastCommitted) − 1 = <b>'+p.rangeEnd+'</b> — the partial resume chunk is left to ingestion</div>'
+          : '<div class="su-row dim">rangeEnd = lastCompleteChunkAt(anchor) = <b>'+p.rangeEnd+'</b> — the partial tip chunk '+(p.rangeEnd + 1)+' is excluded (left to ingestion)</div>';
+        h += '<div class="su-pass"><div class="su-h">Pass '+pn+'</div>'
+          + '<div class="su-row">tip = '+fmt(p.tip)+'  ·  anchor = max(tip, lastCommitted)</div>'
+          + trim
+          + '<div class="su-row">plan range = <b>[chunk '+p.rangeStart+', chunk '+p.rangeEnd+']</b></div>'
+          + '<div class="su-row">resolve emits: '+(p.emits ? 'ChunkBuild '+(p.emits[0] === p.emits[1] ? 'chunk '+p.emits[0] : 'chunks '+p.emits[0]+'–'+p.emits[1])+' + the overlapping window index rebuild(s)' : '<span class="ok">nothing — every chunk in range already frozen</span>')+'</div>'
+          + '</div>';
+      }
+    });
+    h += '<div class="su-pass serve"><div class="su-h">Serve + ingest</div>'
+      + '<div class="su-row">open chunk '+r.resumeChunk+'’s hot DB · seed the lifecycle with chunk '+r.seed+' · resume ingestion at ledger <b>'+fmt(r.resumeLedger)+'</b></div>'
+      + '<div class="su-row dim">reads are live immediately — a reader resolves only a "ready" hot DB or a "frozen" cold file, never the in-flight first run</div></div>';
+    passEl.innerHTML = h;
+  }
+  SCEN.forEach((sc, i) => {
+    const b = document.createElement("button");
+    b.className = "btn" + (i === 0 ? " on" : ""); b.textContent = sc.label;
+    b.addEventListener("click", () => { $$(".btn", host).forEach(x => x.classList.remove("on")); b.classList.add("on"); render(sc); });
+    host.appendChild(b);
+  });
+  render(SCEN[0]);
+}
+})();
+</script>
+</main>
+</div>
+</body>
+</html>
diff --git a/design-docs/full-history-streaming-workflow.md b/design-docs/full-history-streaming-workflow.md
new file mode 100644
index 000000000..0c1e45fc2
--- /dev/null
+++ b/design-docs/full-history-streaming-workflow.md
@@ -0,0 +1,998 @@
+# Streaming Workflow
+
+## Overview
+
+Full-history RPC runs as one daemon in one mode: it both backfills old history and follows the live network.
+
+It keeps two tiers of data. **Hot** data is the most recent ledgers near the network tip, written append-only into RocksDB. **Cold** data is older ledgers, held as immutable files on disk. On startup RPC backfills to the current tip, then ingests new ledgers continuously into the hot DB; when the hot DB fills, it writes the immutable cold files for that ledger range and discards the hot DB. This migration from hot to cold is called **freezing**.
+
+The daemon does three things:
+
+- **Backfills on startup.** Before it serves anything, it runs backfill as a subroutine to bring what's on disk in line with the current retention window. It pulls every chunk inside that window that isn't already frozen from a configured `LedgerBackend` — by default BSB (the Buffered Storage Backend, which reads ledgers from an object store), or captive core or any other conformant backend if BSB isn't available. It skips the partial chunk still forming at the tip; hot-DB ingestion fills that one once it starts. This single mechanism covers a first-ever start, gaps left by downtime, and gaps opened by widening retention.
+- **Ingests** live ledgers from `CaptiveStellarCore` into one hot RocksDB per chunk — ledgers, transaction hashes, and events as column families, written in one atomic batch per ledger.
+- **Freezes** completed chunks to immutable files, **rebuilds** the current tx-hash index from its frozen inputs on every chunk boundary, and **prunes** superseded and past-retention artifacts. All run in a background lifecycle goroutine.
+
+---
+
+## Geometry
+
+The Stellar blockchain starts at ledger 2 (`GENESIS_LEDGER`). Two units organize all storage; everything in this doc is described in terms of them:
+
+- **Chunk** — a run of 10,000 ledgers (hardcoded); the atomic unit of ingestion, freezing, and crash recovery. A hot DB holds at most one chunk, and each cold file — ledgers, events, transactions — spans exactly one chunk.
+- **Window** — 1,000 chunks (10M ledgers); the unit of the rolling tx-hash index. The index is the one exception to the per-chunk rule: it maps transaction hashes to ledger sequences across a whole window.
+
+```
+chunkID(seq)         = floor((seq - 2) / 10_000)
+chunkFirstLedger(c) = c * 10_000 + 2
+chunkLastLedger(c)  = (c + 1) * 10_000 + 1
+indexID(c)          = c / 1000                           # takes a CHUNK id
+```
+
+Chunk ids are **signed**, because `chunkID` uses floor division. The only id below 0 is **chunk −1**, meaning "before the first chunk." It comes up in one place: the "nothing ingested yet" sentinel `earliest_ledger - 1`, which maps to chunk −1 (and `chunkLastLedger(-1) = 1` maps back). Chunk −1 only ever appears in startup arithmetic; every chunk id written to disk is `≥ 0`.
+
+All chunk and window ids use uniform `%08d` zero-padding. Example (window = 1,000 chunks):
+
+| Window | First ledger | Last ledger | Chunks |
+|---|---|---|---|
+| 0 | 2 | 10,000,001 | 0–999 |
+| 1 | 10,000,002 | 20,000,001 | 1000–1999 |
+| N | N×10M + 2 | (N+1)×10M + 1 | N×1000 – (N+1)×1000−1 |
+
+---
+
+## Configuration
+
+One TOML file (`--config`) configures the daemon.
+
+**[service]**
+
+| Key | Type | Default | Description |
+|---|---|---|---|
+| `default_data_dir` | string | **required** | Base directory for the catalog and default storage paths. |
+
+**[backfill]**
+
+| Key | Type | Default | Description |
+|---|---|---|---|
+| `workers` | int | `GOMAXPROCS` | Concurrent task slots for backfill. |
+| `max_retries` | int | `3` | Retries per backfill task before the daemon aborts. |
+
+**[backfill.bsb]** — Buffered Storage Backend (the default backfill `LedgerBackend`; required **unless** another conformant `LedgerBackend` is configured as the backfill source — `backendNetworkTip`/`processChunk`'s default `source` all go through whichever backend is configured)
+
+| Key | Type | Default | Description |
+|---|---|---|---|
+| `bucket_path` | string | **required** | Remote object store path for LedgerCloseMeta (no `gs://` prefix for GCS). |
+| `buffer_size` | int | `1000` | Prefetch buffer depth per connection. |
+| `num_workers` | int | `20` | Download workers per connection. |
+
+**[immutable_storage.*]** — one optional `path` per artifact tree (defaults under `{default_data_dir}`):
+
+| Section | Default path | Holds |
+|---|---|---|
+| `[immutable_storage.ledgers]` | `{default_data_dir}/ledgers` | `.pack` files |
+| `[immutable_storage.events]` | `{default_data_dir}/events` | events cold segments |
+| `[immutable_storage.txhash_raw]` | `{default_data_dir}/txhash/raw` | transient `.bin` files |
+| `[immutable_storage.txhash_index]` | `{default_data_dir}/txhash/index` | per-window `.idx` |
+
+**[catalog]** — optional `path` (default `{default_data_dir}/catalog/rocksdb`).
+
+**[logging]** — optional `level` (`debug`/`info`/`warn`/`error`, default `info`) and `format` (`text`/`json`, default `text`).
+
+**[streaming]**
+
+| Key | Type | Default | Description |
+|---|---|---|---|
+| `retention_chunks` | uint32 | `0` | Retention window in chunks. `0` = full history. |
+| `earliest_ledger` | uint32 \| `"genesis"` \| `"now"` | `"genesis"` | Earliest ledger this daemon will ever have data for — a fixed lower floor on history. Combined with `retention_chunks`, the effective floor is the higher of the two. Must be chunk-aligned; `"now"` resolves to the current network tip's chunk at first start. Resolved and stored on the first start (a reachable backend is required for `"now"` and numeric floors; see `validateConfig`), immutable thereafter. Setting it above genesis skips upfront backfill — useful when no fast backfill source is available and the daemon only follows the live network (`earliest_ledger = "now"`). |
+| `captive_core_config` | string | **required** | Path to CaptiveStellarCore config file. |
+
+**[streaming.hot_storage]**
+
+| Key | Type | Default | Description |
+|---|---|---|---|
+| `path` | string | `{default_data_dir}/hot` | Base path for hot RocksDB databases. |
+
+**CLI**
+
+| Flag | Type | Default | Description |
+|---|---|---|---|
+| `--config` | string | **required** | Path to TOML config file. |
+
+---
+
+## Data model
+
+The daemon's durable state lives in two places. The **catalog** — a small RocksDB — records what's on disk and the state each file is in, plus a few config values fixed on the first start. The **filesystem** holds the data itself: the immutable cold files, and one per-chunk hot RocksDB for data still being ingested.
+
+Throughout this section, `chunk` is a chunk id and `txhash_index` is a window id.
+
+### Filesystem artifacts
+
+The per-chunk artifacts are each written once at chunk freeze; the txhash index is rebuilt on each chunk boundary while its window is current and then finalized. All four are produced by [the primitives](#the-primitives):
+
+| Artifact | Granularity | Format | Produced by |
+|---|---|---|---|
+| Ledger pack file | per chunk | `.pack` | `processChunk` |
+| Events cold segment | per chunk | three files per chunk (format defined in the events doc) | `processChunk` |
+| Sorted txhash file | per chunk | `.bin` (sorted **streamhash** entries — the sorted on-disk tx-hash index format, specified in [the transactions design](./gettransaction-full-history-design.md) §6) | `processChunk` |
+| Streamhash txhash index | per index | one `.idx` file per **coverage** (the chunk range `[lo, hi]` an index spans), named `{lo:08d}-{hi:08d}.idx` inside the window's dir; at most one coverage frozen at any moment | `buildTxhashIndex` |
+
+The `.bin` files are transient — they are the input `buildTxhashIndex` merges, and the terminal build deletes them once its window is complete (or retention pruning removes them first, once its chunks drop below the floor). The pack files, events segments, and `.idx` files persist until retention pruning removes them. State for each lives in [Catalog keys](#catalog-keys); the write ordering is [One write protocol](#one-write-protocol).
+
+### Directory layout
+
+Chunk-level files group into buckets of 1,000 chunks (`bucket_id = chunk_id / 1000`, formatted `%05d`) — a filesystem concern only; bucket ids never appear in catalog keys. Directories are created on demand.
+
+```
+{default_data_dir}/
+├── catalog/rocksdb/                                  ← catalog (WAL always on)
+├── hot/{chunk:08d}/                               ← per-chunk hot RocksDB (transient)
+├── ledgers/{bucket:05d}/{chunk:08d}.pack
+├── events/{bucket:05d}/{chunk:08d}-events.pack    (+ -index.pack, -index.hash)
+└── txhash/
+    ├── raw/{bucket:05d}/{chunk:08d}.bin           ← transient until window finalization (or retention pruning)
+    └── index/{window:08d}/{lo:08d}-{hi:08d}.idx   ← one frozen file per window, coverage-named
+```
+
+### The chunk hot DB
+
+During ingestion the daemon maintains **one hot RocksDB per chunk** at `{hot_storage.path}/{chunk:08d}/`, holding everything for that chunk not yet materialized to cold artifacts. The data types are column families of the one instance:
+
+| Column family | Holds | Serves |
+|---|---|---|
+| `ledgers` | compressed LCMs (LedgerCloseMeta), keyed by seq | `getLedger` for the live chunk; the source `processChunk` reads at freeze |
+| `txhash` | tx hash → seq | `getTransaction` for the live chunk |
+| events CFs | live events (schema per the events doc) | `getEvents` for the live chunk |
+
+CFs share the instance's WAL, so each ledger commits as **one atomic WriteBatch across all CFs**. Per-CF options keep tuning independent (the events CFs carry their own settings). The DB is created when ingestion enters the chunk. It is discarded whole once every cold artifact derived from the chunk is durable **and** the rolling index covers the chunk. It keeps serving tx lookups across the brief freeze-to-coverage interval; freeze, rebuild, and discard all chain within one lifecycle run.
+
+### Catalog keys
+
+The catalog holds three groups of keys: per-chunk artifact state keys, hot DB state keys, and the config pin.
+
+**Artifact state keys**:
+
+| Key | Value | Meaning |
+|---|---|---|
+| `chunk:{chunk:08d}:ledgers` | `"freezing"` \| `"frozen"` \| `"pruning"` | Per-chunk pack file state. |
+| `chunk:{chunk:08d}:txhash` | `"freezing"` \| `"frozen"` \| `"pruning"` | Per-chunk `.bin` file state. Transient — removed at window finalization, or by retention pruning if its chunk ages out first. |
+| `chunk:{chunk:08d}:events` | `"freezing"` \| `"frozen"` \| `"pruning"` | Per-chunk events cold segment state. |
+| `index:{txhash_index:08d}:{lo:08d}:{hi:08d}` | `"freezing"` \| `"frozen"` \| `"pruning"` | One key per index **coverage**. The key *name* carries the coverage `[lo, hi]` and maps 1:1 to the file `{lo:08d}-{hi:08d}.idx`; the *value* is pure lifecycle state — the same three values as every other artifact key. At most one coverage per window is `"frozen"` at any moment, and a key with `hi` = its window's last chunk is **terminal** by definition (see [Index keys](#index-keys) below). |
+
+For the per-chunk keys, `"freezing"` means the immutable file is being written; `"frozen"` means it's fsynced and durable; `"pruning"` means the file is queued for removal; key absent means neither file nor in-progress write exists. Index keys use the **same three states with the same meanings** — a rebuild marks its coverage `"freezing"` before any I/O, and its commit batch flips it to `"frozen"` while demoting the superseded coverage to `"pruning"`. Every artifact key therefore obeys one set of crash rules: `"freezing"` = delete (or re-derive) the file, `"pruning"` = finish the delete, `"frozen"` = truth.
+
+**Hot DB state key**:
+
+| Key | Value | Tracks |
+|---|---|---|
+| `hot:chunk:{chunk:08d}` | `"transient"` \| `"ready"` | The chunk's hot DB. |
+
+`"ready"` means the RocksDB dir exists and is usable. `"transient"` brackets a directory operation in flight — creation or deletion; no code path ever needs to know which, since the recovery is the same either way (the open path wipes and recreates; the discard scan re-runs). A crash mid-operation is detectable from the key value alone. One key per chunk; the column families inside the DB carry no individual catalog state.
+
+**Config pin:**
+
+| Key | Value | Written when |
+|---|---|---|
+| `config:earliest_ledger` | `uint32` (decimal string, chunk-aligned) | On the first daemon start. Immutable thereafter — changing it currently requires wiping the data directory, until a `set-earliest-ledger` admin command exists (see [Configuration](#configuration); the floor machinery already converges for either direction). |
+
+**Resume point.** Recomputed at startup from the durable keys plus a read of the live hot DB (see [Startup](#startup)).
+
+### Index keys
+
+An index key `index:{txhash_index:08d}:{lo:08d}:{hi:08d}` names the chunk range `[lo, hi]` that its `.idx` covers, mapping 1:1 to the file `txhash/index/{txhash_index:08d}/{lo:08d}-{hi:08d}.idx`.
+
+`hi` grows as the window fills: at each chunk boundary the rebuild folds in the chunk that just froze, advancing `hi` by one. When `hi` reaches the window's last chunk, the window is **complete** and its index is **terminal** — final, never rebuilt again.
+
+`lo` is the higher of the window's first chunk and the retention floor, fixed when the index is built. So:
+
+- a window still being rebuilt each boundary has its `lo` recomputed every time, so it rises as the floor does, dropping chunks that have aged out of retention;
+- a terminal window's `.idx` keeps the `lo` it was built with; if the floor later climbs past that `lo`, the index still covers chunks that have dropped out of retention — but a read for any ledger below the floor returns not-found regardless of what the index says, so that stale coverage is never served.
+
+So `lo` equals the window's first chunk unless the start of the window has dropped below the floor.
+
+[The transactions design](./gettransaction-full-history-design.md) (§6.3) is canonical for coverage semantics, with a worked example.
+
+### One write protocol
+
+Every durable artifact — per-chunk files and index coverages alike — is written the same way, **mark-then-write**:
+
+1. put `"freezing"` *before* any I/O;
+2. write the file;
+3. fsync the file, its parent dirent, and — when the parent was just created — the grandparent dirent;
+4. flip the key to `"frozen"`.
+
+The key is always written before the file. So every file can be found from its key — cleanup walks keys, never directories — and a file left half-written by a crash carries a `"freezing"` key, which marks it for re-derivation or removal. Step 3 fsyncs the directory entries, not just the file, so the file's existence on disk survives a crash before its key flips to `"frozen"`.
+
+Deletion is the same protocol in reverse: demote the key to `"pruning"`, unlink the file, then delete the key, with an `fsyncDir` between the unlink and the key delete. So a key is gone only once its file is — **key absent ⟹ file gone**. Two functions do all file deletion: `sweepChunkArtifacts` for per-chunk artifacts and `sweepIndexKey` for index files.
+
+---
+
+## Backfill
+
+Backfill makes every artifact derived from a range of ledgers durable and servable. It has three parts, in the order below: a **resolver** (`resolve`) that diffs what's wanted against the catalog and returns a plan of the missing work; the **primitives** (`processChunk`, `buildTxhashIndex`) that produce each artifact; and an **executor** (`executePlan`) that runs the plan concurrently. The [Startup](#startup) backfill loop and the [Lifecycle](#lifecycle) run are its two callers.
+
+### Postcondition-driven planning
+
+Backfill works from a postcondition: *given a range, every artifact derived from every ledger in it must be durable and servable.* `resolve` reads the catalog and returns a `Plan` of only the missing work — per-chunk artifacts whose key isn't `"frozen"`, and window indexes whose frozen coverage doesn't yet span the range. It reads nothing but durable keys, so every run re-plans from what's on disk; a restart neither redoes finished work nor skips unfinished work. The plan is a flat list of chunk builds and index builds:
+
+```go
+type ChunkBuild struct {
+	Chunk     ChunkID
+	Artifacts ArtifactSet // which kinds this chunk still needs — one processChunk pass produces all
+}
+
+type IndexBuild struct {
+	Window WindowID
+	Lo, Hi ChunkID // coverage to build; terminal iff Hi == windowLastChunk(Window)
+	// dependencies are derivable (the ChunkBuilds in [Lo, Hi]), so no input list
+}
+
+type Plan struct {
+	ChunkBuilds []ChunkBuild
+	IndexBuilds []IndexBuild
+}
+
+// resolve returns the work missing for [rangeStart, rangeEnd].
+func resolve(cfg Config, rangeStart, rangeEnd ChunkID) Plan {
+	if rangeEnd < rangeStart {
+		return Plan{} // young network: no complete chunk yet
+	}
+	cat := cfg.Catalog
+	needs := map[ChunkID]ArtifactSet{}
+
+	for c := rangeStart; c <= rangeEnd; c++ {
+		for _, kind := range []Kind{Ledgers, Events} {
+			if cat.State(c, kind) != Frozen {
+				needs[c] = needs[c].Add(kind)
+			}
+		}
+	}
+
+	var builds []IndexBuild
+	for _, w := range windowsOverlapping(rangeStart, rangeEnd) {
+		desired := Range{
+			Lo: max(windowFirstChunk(w), rangeStart),
+			Hi: min(windowLastChunk(w), rangeEnd),
+		}
+		if frozenCoverage(cat, w).Covers(desired) {
+			continue
+		}
+		for c := desired.Lo; c <= desired.Hi; c++ {
+			if cat.State(c, TxHashBin) != Frozen {
+				needs[c] = needs[c].Add(TxHashBin)
+			}
+		}
+		builds = append(builds, IndexBuild{Window: w, Lo: desired.Lo, Hi: desired.Hi})
+	}
+	return Plan{ChunkBuilds: chunkBuilds(needs), IndexBuilds: builds}
+}
+```
+
+### The primitives
+
+`processChunk` writes a chunk's requested artifacts through the [one write protocol](#one-write-protocol), reading ledgers from `backfillSource`. Its hot-DB branch is what lets the lifecycle freeze a just-closed chunk from its own hot DB, on the same path as a cold backfill.
+
+```go
+func processChunk(cfg Config, chunk ChunkID, artifacts ArtifactSet) error {
+	cat := cfg.Catalog
+	source, err := backfillSource(cfg, chunk, artifacts)
+	if err != nil {
+		return err
+	}
+
+	batch := cat.NewBatch() // mark "freezing" before any I/O
+	for _, kind := range artifacts.Kinds() {
+		batch.Put(chunkKey(chunk, kind), "freezing")
+	}
+	batch.Commit()
+
+	w := newArtifactWriters(chunk, artifacts)
+	for seq := chunkFirstLedger(chunk); seq <= chunkLastLedger(chunk); seq++ {
+		w.Add(source.GetLedger(seq))
+	}
+	w.Finish()
+	w.FsyncAll() // durable before the keys flip to "frozen"
+
+	batch = cat.NewBatch()
+	for _, kind := range artifacts.Kinds() {
+		batch.Put(chunkKey(chunk, kind), "frozen")
+	}
+	batch.Commit()
+	return nil
+}
+
+// backfillSource picks a chunk's ledger source in a fixed preference order. The
+// hot branch errors only when a "ready" hot DB won't open — its data is lost.
+// An incomplete-but-present DB is just stale: it falls through to the next
+// source, which re-derives the chunk and recovers it.
+func backfillSource(cfg Config, chunk ChunkID, artifacts ArtifactSet) (LedgerSource, error) {
+	cat := cfg.Catalog
+	if state, _ := cat.Get(hotChunkKey(chunk)); state == "ready" {
+		db, err := openRocksDBReadOnly(hotChunkPath(chunk))
+		if err != nil {
+			return nil, fmt.Errorf("hot DB for chunk %d is ready but won't open: %w", chunk, err)
+		}
+		if maxCommittedSeq(db) >= chunkLastLedger(chunk) {
+			return &HotLedgers{chunk: chunk, store: db}, nil
+		}
+		db.Close() // incomplete: stale leftover — close and fall through; the discard scan owns it
+	}
+	if cat.State(chunk, Ledgers) == Frozen && !artifacts.Has(Ledgers) {
+		return packReader(chunk), nil // re-derive locally
+	}
+	// Backfill backend: the only source for a chunk with no local copy. If its
+	// tip lags below this chunk, wait for coverage.
+	waitForBackendCoverage(cfg, chunk) // bounded; fatal on timeout
+	return backfillBackend(cfg), nil    // BSB by default
+}
+```
+
+**`buildTxhashIndex(w, lo, hi, cat)`** rebuilds window `w`'s index to cover chunks `[lo, hi]` — `lo` the lowest in-floor chunk, `hi` the highest frozen chunk (the window's last once the window is complete). The lifecycle calls it on every chunk boundary while the window is current.
+
+```go
+func buildTxhashIndex(w WindowID, lo, hi ChunkID, cat Catalog) error {
+	prev := frozenCoverage(cat, w)
+	if prev != nil && prev.Lo == lo && prev.Hi == hi {
+		return nil // already built (e.g. a buildThenSweep retry re-entering after the commit)
+	}
+
+	key := indexKey(w, lo, hi)
+	cat.Put(key, "freezing") // mark before any I/O
+
+	sb := streamhash.NewSortedBuilder(indexFilePath(key))
+	for entry := range kWayMerge(binFiles(lo, hi)) { // sorted .bin files → one stream
+		sb.Add(entry)
+	}
+	sb.Finish()
+	fsyncFile(indexFilePath(key))
+	fsyncDir(indexWindowDir(key)) // + grandparent on the window's first build
+
+	batch := cat.NewBatch() // one atomic synced write — the whole finalization
+	batch.Put(key, "frozen")
+	if prev != nil {
+		batch.Put(indexKey(w, prev.Lo, prev.Hi), "pruning") // demote predecessor
+	}
+	if hi == windowLastChunk(w) { // terminal: the merged .bin inputs are spent
+		for c := lo; c <= hi; c++ {
+			batch.Put(chunkKey(c, TxHashBin), "pruning")
+		}
+	}
+	batch.Commit()
+	return nil
+}
+```
+
+`kWayMerge` and `SortedBuilder` are streamhash internals, covered in [the transactions design](./gettransaction-full-history-design.md) (§6–§7).
+
+### Execution model
+
+`executePlan` runs a plan from either caller — startup backfill or the [lifecycle run](#lifecycle). Chunk builds run concurrently under one worker semaphore; each index build waits on the done-channels of the chunk builds inside its coverage, then runs.
+
+```go
+func executePlan(ctx context.Context, cfg Config, plan Plan) error {
+	slots := make(chan struct{}, cfg.Workers) // the only concurrency knob
+	done := make(map[ChunkID]chan struct{}, len(plan.ChunkBuilds))
+	for _, cb := range plan.ChunkBuilds {
+		done[cb.Chunk] = make(chan struct{})
+	}
+
+	g, gctx := errgroup.WithContext(ctx)
+	for _, cb := range plan.ChunkBuilds {
+		g.Go(func() error {
+			slots <- struct{}{}
+			defer func() { <-slots }()
+			if err := withRetries(gctx, cfg.MaxRetries, func() error {
+				return processChunk(cfg, cb.Chunk, cb.Artifacts)
+			}); err != nil {
+				return err // leave done[cb.Chunk] open; the error cancels gctx, freeing waiters
+			}
+			close(done[cb.Chunk]) // success: dependents may now read this chunk's .bin
+			return nil
+		})
+	}
+	for _, b := range plan.IndexBuilds {
+		g.Go(func() error {
+			for c := b.Lo; c <= b.Hi; c++ { // wait on the in-coverage chunk builds
+				if ch, ok := done[c]; ok {
+					select {
+					case <-ch: // this chunk's .bin is frozen
+					case <-gctx.Done(): // a build failed (or cancel) — bail
+						return gctx.Err()
+					}
+				}
+			}
+			slots <- struct{}{}
+			defer func() { <-slots }()
+			return withRetries(gctx, cfg.MaxRetries, func() error {
+				return buildThenSweep(cfg, b)
+			})
+		})
+	}
+	return g.Wait()
+}
+
+// buildThenSweep runs an IndexBuild, then eagerly sweeps the keys its commit
+// demoted (this window only), so freed disk returns without waiting for a run.
+func buildThenSweep(cfg Config, b IndexBuild) error {
+	cat := cfg.Catalog
+	if err := buildTxhashIndex(b.Window, b.Lo, b.Hi, cat); err != nil {
+		return err
+	}
+	for _, key := range indexKeys(cat, b.Window) { // superseded coverage(s)
+		if key.State == Pruning {
+			sweepIndexKey(cat, key)
+		}
+	}
+	var demoted []ArtifactRef // terminal build: the window's .bin inputs
+	for c := windowFirstChunk(b.Window); c <= windowLastChunk(b.Window); c++ {
+		if cat.State(c, TxHashBin) == Pruning {
+			demoted = append(demoted, ArtifactRef{Chunk: c, Kind: TxHashBin})
+		}
+	}
+	if len(demoted) > 0 {
+		sweepChunkArtifacts(cat, demoted)
+	}
+	return nil
+}
+```
+
+- **`cfg.Workers`** (default `GOMAXPROCS`) is the only resource knob: at most that many tasks run at once, drawn from all windows' eligible work. Goroutines are cheap structure — thousands may be parked on the semaphore or on done-channels.
+- Done-channels signal *success*: a chunk build closes its channel only once its `.bin` is frozen, so an index build proceeds only when every input it needs exists. A chunk build that exhausts its retries leaves its channel open and returns an error, which cancels `gctx`; any dependent waiting on it unblocks through the `<-gctx.Done()` case and bails. A task that exhausts its retries aborts the daemon ([error policy](#lifecycle)); restart re-resolves from durable keys and completed work never repeats.
+
+---
+
+## Daemon flow
+
+After startup, the daemon runs two goroutines. **Hot-DB ingestion** pulls new ledgers from captive core into the per-chunk hot DBs as the network closes them, and hands each completed chunk to the lifecycle. (This is the live-network loop — distinct from startup backfill, which reads *old* ledgers into cold files.) The **lifecycle** is a background goroutine responsible for everything else, and it does two kinds of work: **freezing** complete chunks from hot storage into immutable cold files (rolling the tx-hash index forward as it goes), and **cleanup** — discarding hot DBs the cold files now serve, and pruning artifacts that are superseded or have fallen past the retention floor. The sections below cover startup, then each goroutine in turn.
+
+### Startup
+
+Startup runs in two steps, both in `startStreaming` below:
+
+1. **Backfill** brings on-disk coverage in line with the retention window, up through the last *complete* chunk at the tip. The partial chunk still forming at the tip is left to hot-DB ingestion: its ledgers so far are already in the live hot DB (which serves them), and ingestion completes the chunk as new ledgers arrive. Backfill re-runs if the tip advances mid-pass, and when it returns, the whole in-retention history up to that point is on disk as frozen files — ready to serve.
+2. **Serve + ingest** opens the resume chunk's hot DB, starts captive core, serving, the lifecycle goroutine, and the hot-DB ingestion loop. The lifecycle is seeded with the last complete chunk so its first run fires at once; that run finishes any crash/downtime leftovers concurrently with serving. Reads never wait for it, because a reader only ever resolves a `"ready"` hot DB or a `"frozen"` cold file — never a transient key.
+
+Operational note — **peak disk after long downtime**: pruning runs only in the first run's prune stage, *after* backfill has materialized every newly-in-retention chunk, so a downtime approaching or exceeding the retention window transiently holds up to ~2× the retention footprint (the stale window plus its replacement). Size volumes accordingly, or prune stale ranges manually before restarting after very long downtime; a disk-full during backfill otherwise aborts before the relieving prune can run, on every retry.
+
+The retention floor and resume point are computed by:
+
+```go
+const (
+	GenesisLedger        = 2
+	LedgersPerChunk      = 10_000
+	ChunksPerTxhashIndex = 1_000 // window = 10M ledgers
+)
+
+// retentionFloorChunk: the lowest chunk kept — retentionChunks back from
+// lastChunk, never below earliest's chunk.
+func retentionFloorChunk(lastChunk ChunkID, retentionChunks uint32, earliest uint32) ChunkID {
+	floor := chunkID(earliest)
+	if retentionChunks > 0 {
+		floor = max(floor, lastChunk-ChunkID(retentionChunks)+1)
+	}
+	return floor
+}
+
+// lastCompleteChunkAt: the largest chunk whose last ledger is <= ledger.
+func lastCompleteChunkAt(ledger uint32) int64 {
+	return (int64(ledger)-1)/LedgersPerChunk - 1
+}
+
+// maxCommittedSeq returns the highest ledger committed to a hot DB; for a
+// freshly opened, empty chunk-C DB it returns chunkFirstLedger(C) - 1 (the
+// watermark just below the chunk), so the boundary-crash derivation is exact.
+//
+// lastCommittedLedger: the highest ledger in durable storage — the live hot DB's
+// last, the highest frozen chunk's if it leads, or earliest-1 if neither exists.
+func lastCommittedLedger(cat Catalog) uint32 {
+	base := cat.EarliestLedger() - 1
+	cold := highestDurableChunk(cat)
+	hot := highestReadyHotChunk(cat)
+	switch {
+	case hot > cold:
+		db := openReadOnly(hot)
+		defer db.Close()
+		return max(base, maxCommittedSeq(db))
+	case cold >= 0:
+		return max(base, chunkLastLedger(cold))
+	default:
+		return base
+	}
+}
+
+func networkTip(cfg Config) (uint32, error) {
+	tip, err := withBackoff(func() (uint32, error) { return backendNetworkTip(cfg) })
+	if err != nil {
+		return 0, err
+	}
+	if tip < GenesisLedger {
+		return 0, fmt.Errorf("backend tip %d is below genesis — backend not ready", tip)
+	}
+	return tip, nil
+}
+```
+
+```go
+func startStreaming(ctx context.Context, cfg Config) error {
+	cat := openCatalog(cfg)
+	cfg.Catalog = cat
+	validateConfig(cfg)
+
+	earliest := cat.EarliestLedger()
+	lastCommitted := lastCommittedLedger(cat)
+
+	// Step 1: backfill from the floor up to the last complete chunk at the tip,
+	// leaving the partial tip chunk to ingestion. Re-pass while the tip moves.
+	backfilledThrough := int64(-1)
+	for {
+		tip, err := networkTip(cfg)
+		if err != nil {
+			if lastCommitted < earliest {
+				fatalf("network tip unavailable and no local history to serve: %v", err)
+			}
+			tip = lastCommitted // backend down, but local data exists: serve it
+		}
+		anchor := max(tip, lastCommitted)
+		rangeEnd := lastCompleteChunkAt(anchor)
+		rangeStart := retentionFloorChunk(rangeEnd, cfg.RetentionChunks, earliest)
+		midChunk := lastCommitted != chunkLastLedger(chunkID(lastCommitted))
+		nearTip := int64(tip)-int64(lastCommitted) < LedgersPerChunk
+		if nearTip && midChunk {
+			rangeEnd = chunkID(lastCommitted) - 1 // leave the partial resume chunk to ingestion
+		}
+		if rangeEnd < rangeStart || rangeEnd <= backfilledThrough {
+			break
+		}
+		if err := executePlan(ctx, cfg, resolve(cfg, rangeStart, rangeEnd)); err != nil {
+			return err
+		}
+		lastCommitted = max(lastCommitted, chunkLastLedger(rangeEnd))
+		backfilledThrough = rangeEnd
+	}
+	resumeLedger := lastCommitted + 1
+
+	// Step 2: serve + ingest. Seed the lifecycle with the last complete chunk so
+	// its first run clears crash/downtime leftovers while serving is already live.
+	hotDB, err := openHotDBForChunk(cat, chunkID(resumeLedger))
+	if err != nil {
+		return err
+	}
+	core := startCaptiveCore(cfg, resumeLedger)
+	lifecycleCh := make(chan ChunkID, lifecycleQueueDepth)
+	lifecycleCh <- lastCompleteChunkAt(resumeLedger - 1) // seed the first run
+	go lifecycleLoop(ctx, cfg, lifecycleCh)
+	serveReads()
+	return runIngestionLoop(ctx, cat, core, hotDB, lifecycleCh, resumeLedger)
+}
+```
+
+`validateConfig` checks the config and, on the first start, resolves and pins `earliest_ledger`:
+
+```go
+func validateConfig(cfg Config) {
+	cat := cfg.Catalog
+	if cfg.Workers < 1 {
+		fatalf("workers must be > 0 (got %d)", cfg.Workers)
+	}
+	if cfg.MaxRetries < 0 {
+		fatalf("max_retries must be >= 0 (got %d)", cfg.MaxRetries)
+	}
+	if cfg.EarliestLedger != "genesis" && cfg.EarliestLedger != "now" {
+		n, err := parseUint32(cfg.EarliestLedger)
+		if err != nil || n < GenesisLedger || n != chunkFirstLedger(chunkID(n)) {
+			fatalf("earliest_ledger must be \"genesis\", \"now\", or a chunk-aligned "+
+				"ledger >= %d; got %q.", GenesisLedger, cfg.EarliestLedger)
+		}
+	}
+
+	earliestStored, earliestPinned := cat.Get("config:earliest_ledger")
+
+	if earliestPinned { // restart: confirm nothing changed, write nothing
+		if cfg.EarliestLedger != "now" { // "now" on restart keeps the pinned floor
+			want := uint32(GenesisLedger)
+			if cfg.EarliestLedger != "genesis" {
+				want = atoi(cfg.EarliestLedger)
+			}
+			if want != atoi(earliestStored) {
+				fatalf("earliest_ledger changed: stored=%s, config=%s; wipe the data dir to change it.",
+					earliestStored, cfg.EarliestLedger)
+			}
+		}
+		return
+	}
+
+	// First start: resolve earliest_ledger, then pin it. "now" and a numeric
+	// floor each need a reachable backend — "now" to resolve, a numeric floor to
+	// reject one past the tip (it is pinned immutably, so it can't be checked later).
+	var earliest uint32
+	switch cfg.EarliestLedger {
+	case "genesis":
+		earliest = GenesisLedger
+	case "now":
+		tip, err := networkTip(cfg)
+		if err != nil {
+			fatalf("earliest_ledger=now needs a reachable backend: %v", err)
+		}
+		earliest = chunkFirstLedger(chunkID(tip))
+	default:
+		earliest = atoi(cfg.EarliestLedger)
+		tip, err := networkTip(cfg)
+		if err != nil {
+			fatalf("a numeric earliest_ledger needs a reachable backend to validate against the tip: %v", err)
+		}
+		if earliest > tip {
+			fatalf("earliest_ledger (%d) is past the network tip (%d)", earliest, tip)
+		}
+	}
+	cat.Put("config:earliest_ledger", itoa(earliest))
+}
+```
+
+### Hot DB helpers
+
+`openHotDBForChunk` opens a chunk's hot DB — the existing one, or a fresh one after a crash or on first use:
+
+```go
+func openHotDBForChunk(cat Catalog, chunk ChunkID) (*HotDB, error) {
+	hotKey, path := hotChunkKey(chunk), hotChunkPath(chunk)
+	if state, _ := cat.Get(hotKey); state == "ready" {
+		db, err := openExistingRocksDB(path)
+		if err != nil {
+			return nil, fmt.Errorf("hot DB for chunk %d is ready but won't open: %w", chunk, err)
+		}
+		return db, nil
+	}
+	// transient or absent: wipe any leftover dir and create fresh.
+	deleteDirIfExists(path)
+	cat.Put(hotKey, "transient")
+	db := createChunkHotDB(path)
+	fsyncDir(path) // durable before the key flips to "ready"
+	fsyncParentDir(path)
+	cat.Put(hotKey, "ready")
+	return db, nil
+}
+```
+
+### Hot DB Ingestion
+
+```go
+func runIngestionLoop(ctx context.Context, cat Catalog, core LedgerBackend, hotDB *HotDB,
+	lifecycleCh chan<- ChunkID, resumeLedger uint32) error {
+
+	// A full lifecycleCh means freeze has fallen lifecycleQueueDepth boundaries
+	// behind ingestion — fail loud.
+	notify := func(complete ChunkID) {
+		select {
+		case lifecycleCh <- complete:
+		default:
+			fatalf("lifecycle fell %d boundaries behind ingestion; investigate", lifecycleQueueDepth)
+		}
+	}
+
+	for seq := resumeLedger; ; seq++ {
+		lcm, err := core.GetLedger(ctx, seq) // blocks until ledger seq is available
+		if err != nil {
+			return err
+		}
+
+		// One atomic synced batch across all CFs, so a ledger is fully present or
+		// absent; it is the only per-ledger durability boundary.
+		batch := hotDB.NewBatch()
+		putLedger(batch, lcm)
+		putTxHashes(batch, lcm)
+		putEvents(batch, lcm)
+		batch.Commit( /*sync=*/ true)
+
+		if seq == chunkLastLedger(chunkID(seq)) {
+			// Close this chunk and open the next before notifying, so the lifecycle
+			// never races a live writer for the chunk it is about to freeze.
+			hotDB.Close()
+			if hotDB, err = openHotDBForChunk(cat, chunkID(seq)+1); err != nil {
+				return err
+			}
+			notify(chunkID(seq))
+		}
+	}
+}
+```
+
+A `GetLedger` failure returns from the loop and exits the process; the next startup resumes from where the last synced batch left off, since the batch is all-or-nothing. A clean shutdown cancels `ctx` and returns the same way, distinguished from a crash at the daemon's top level. The completed chunk id is all ingestion sends the lifecycle — *how far to go*; what to build, discard, and prune the lifecycle reads from the catalog.
+
+### Lifecycle
+
+The lifecycle is a background goroutine. Each notification — one per ingestion boundary, plus a startup seed — triggers one **run**, which does three stages in order:
+
+1. **Plan-and-execute** — `resolve` + `executePlan` over `[floor, last complete chunk]`, the same machinery backfill uses. In steady state this freezes the just-closed chunk from its hot DB and folds it into the current window's index; rebuilding the whole window each boundary costs ≈1 minute against a boundary that arrives only every ~14 h at mainnet rates.
+2. **Discard** — retire hot DBs the cold artifacts now fully serve.
+3. **Prune** — sweep demoted and past-retention files.
+
+At runtime the floor only rises (retention config is fixed for the life of the process; widening applies at the next startup), so `[floor, last complete chunk]` always sits within existing storage — a run produces only the just-closed chunk and never reaches below. Extending the *bottom* of storage — a fresh start, or filling to a widened floor — is startup backfill's job.
+
+Everything the run does derives from the catalog plus the one chunk id ingestion hands it:
+
+```go
+func runLifecycle(ctx context.Context, cfg Config, lastChunk ChunkID) {
+	floor := retentionFloorChunk(lastChunk, cfg.RetentionChunks, cfg.Catalog.EarliestLedger())
+
+	if err := executePlan(ctx, cfg, resolve(cfg, floor, lastChunk)); err != nil {
+		fatalf("lifecycle run: %v", err) // abort; startup is the recovery path
+	}
+	for _, op := range eligibleDiscardOps(cfg, lastChunk, floor) {
+		op()
+	}
+	for _, op := range eligiblePruneOps(cfg, floor) {
+		op()
+	}
+}
+
+const lifecycleQueueDepth = 8 // far above the at-most-one a healthy daemon holds
+
+func lifecycleLoop(ctx context.Context, cfg Config, lifecycleCh <-chan ChunkID) {
+	for lastChunk := range lifecycleCh {
+	drain: // if several chunks queued, take the most recent — one run covers them
+		for {
+			select {
+			case lastChunk = <-lifecycleCh:
+			default:
+				break drain
+			}
+		}
+		runLifecycle(ctx, cfg, lastChunk)
+	}
+}
+```
+
+Between runs the goroutine is idle, and idle means **settled**: a re-scan would produce no ops and every storage invariant holds, so an [audit](#correctness) run at any such moment would pass. A failing op retries with backoff, then aborts the daemon — startup is the recovery path, the same policy as ingestion.
+
+The discard and prune stages are the two `eligible*` scans below. **Discard** retires a chunk's hot DB once its cold artifacts fully serve it (the window's index covers the chunk), or once it falls past retention. **Prune** is the system's only file-deleter: it sweeps transient index keys, the `.bin` inputs a terminal commit demoted, and everything below the retention floor, through `sweepIndexKey`/`sweepChunkArtifacts`. Each scan returns zero-arg ops the run calls in order.
+
+```go
+func eligibleDiscardOps(cfg Config, lastChunk, floor ChunkID) []func() {
+	cat := cfg.Catalog
+	var ops []func()
+	for _, chunk := range hotChunkKeys(cat) {
+		switch {
+		case chunk < floor:
+			ops = append(ops, func() { discardHotDBForChunk(cat, chunk) })
+		case chunk <= lastChunk &&
+			pendingArtifacts(cfg, chunk).Empty() &&
+			indexCovers(cfg, chunk): // cold artifacts fully serve it
+			ops = append(ops, func() { discardHotDBForChunk(cat, chunk) })
+		}
+	}
+	return ops
+}
+
+// pendingArtifacts lists which processChunk outputs the chunk still needs. The
+// .bin is exempt once the window's index covers the chunk (the finalized window
+// already demoted its key).
+func pendingArtifacts(cfg Config, chunk ChunkID) ArtifactSet {
+	cat := cfg.Catalog
+	var need ArtifactSet
+	for _, kind := range []Kind{Ledgers, Events} {
+		if cat.State(chunk, kind) != Frozen {
+			need = need.Add(kind)
+		}
+	}
+	if cat.State(chunk, TxHashBin) != Frozen && !indexCovers(cfg, chunk) {
+		need = need.Add(TxHashBin)
+	}
+	return need
+}
+
+// indexCovers reports whether the window's durable .idx already hashes the chunk.
+func indexCovers(cfg Config, chunk ChunkID) bool {
+	fk := frozenCoverage(cfg.Catalog, indexID(chunk))
+	return fk != nil && fk.Lo <= chunk && chunk <= fk.Hi
+}
+
+func eligiblePruneOps(cfg Config, floor ChunkID) []func() {
+	cat := cfg.Catalog
+	windowFloor := WindowID(-1)
+	chunkFloor := ChunkID(-1)
+	if floor > 0 {
+		windowFloor = indexID(floor) - 1
+		chunkFloor = floor - 1
+	}
+	var ops []func()
+
+	for _, key := range indexKeys(cat) {
+		switch {
+		case key.State == Freezing || key.State == Pruning: // transient debris
+			ops = append(ops, func() { sweepIndexKey(cat, key) })
+		case key.Window <= windowFloor: // frozen, wholly below the floor
+			ops = append(ops, func() { sweepIndexKey(cat, key) })
+		}
+	}
+
+	var refs []ArtifactRef
+	for _, ref := range chunkArtifactKeys(cat) {
+		switch {
+		case ref.Chunk <= chunkFloor: // wholly past retention
+			refs = append(refs, ref)
+		case cat.State(ref.Chunk, ref.Kind) == Pruning:
+			refs = append(refs, ref)
+		case ref.Kind == TxHashBin: // redundant .bin in a finalized window
+			if fk := frozenCoverage(cat, indexID(ref.Chunk)); fk != nil && fk.Hi == windowLastChunk(indexID(ref.Chunk)) {
+				refs = append(refs, ref)
+			}
+		}
+	}
+	if len(refs) > 0 {
+		ops = append(ops, func() { sweepChunkArtifacts(cat, refs) })
+	}
+	return ops
+}
+```
+
+The op bodies — one discard, two sweeps — are the daemon's entire directory- and file-deletion surface:
+
+```go
+func discardHotDBForChunk(cat Catalog, chunk ChunkID) {
+	if !cat.Has(hotChunkKey(chunk)) {
+		return
+	}
+	cat.Put(hotChunkKey(chunk), "transient")
+	deleteDirIfExists(hotChunkPath(chunk))
+	fsyncParentDir(hotChunkPath(chunk))
+	cat.Delete(hotChunkKey(chunk))
+}
+
+func sweepChunkArtifacts(cat Catalog, refs []ArtifactRef) {
+	batch := cat.NewBatch() // demote before the unlink
+	for _, ref := range refs {
+		batch.Put(chunkKey(ref.Chunk, ref.Kind), "pruning")
+	}
+	batch.Commit()
+
+	var paths []string
+	for _, ref := range refs {
+		deleteArtifactFiles(ref.Chunk, ref.Kind)
+		paths = append(paths, artifactPaths(ref.Chunk, ref.Kind)...)
+	}
+	fsyncParentDirs(paths) // unlinks durable before the keys go
+
+	batch = cat.NewBatch()
+	for _, ref := range refs {
+		batch.Delete(chunkKey(ref.Chunk, ref.Kind))
+	}
+	batch.Commit()
+}
+
+func sweepIndexKey(cat Catalog, key IndexKey) {
+	cat.Put(key, "pruning") // demote before the unlink (synced → durable first)
+	deleteFileIfExists(indexFilePath(key))
+	fsyncDir(indexWindowDir(key))
+	cat.Delete(key) // key outlives the unlink, so a crash re-runs the sweep
+	rmdirIfEmpty(indexWindowDir(key))
+}
+```
+
+`discardHotDBForChunk` removes a hot DB directory under its `hot:chunk` key; the two `sweep*` functions are the entire file-deletion surface, one body per key family. The prune walk's two families are independent of each other and of discard — a chunk swept while its window's `.idx` still resolves to it could leave a `getTransaction` pointing at a deleted `.pack`, but a below-floor read is not-found regardless ([reader contract](#reader-contract)).
+
+### Concurrency model
+
+Two writer goroutines and read-only readers. The catalog partitions their domains at the **live chunk** — the highest chunk with a `hot:chunk` key:
+
+- **Ingestion** owns the live chunk: the sole writer of its hot DB, and the creator of each `hot:chunk` key (via `openHotDBForChunk` at the boundary).
+- **The lifecycle** owns everything below it: handed-off hot DBs (freeze + discard), all `chunk:*` and `index:*` keys, and the deletion side of `hot:chunk` keys.
+
+The two share no memory; their only link is the channel. The handoff is by write ordering — ingestion closes the chunk and opens the next (moving the partition) *before* sending it — so the lifecycle never freezes a chunk a writer still holds. Both write the catalog at the same time but never the same key (RocksDB handles concurrent writes safely). And because the chunk ids ingestion hands over only increase, a chunk completing while a lifecycle run is already in progress just bumps the starting point of the *next* run — it can't disturb the one underway. Readers hold their own read-only handles and resolve files through keys, so writer activity never races them.
+
+**Single-process enforcement.** All of the above assumes a *single* daemon owns the data; two daemons sharing it would corrupt it. The daemon enforces that at startup by taking a kernel file lock (`flock`) on a `LOCK` file in **each** of its roots — the catalog and every configured storage tree. A second daemon pointed at any of those paths can't acquire the lock and exits; the lock releases on any exit, including `kill -9`, so it never goes stale. It has to lock every root, not just the catalog, because the catalog and the storage trees are configured as independent paths — otherwise two daemons with different catalogs could still share a storage tree. The hot tree matters most: its `hot/{chunk}` DBs are the only copy of recently-ingested ledgers that aren't frozen yet.
+
+---
+
+## Reader contract
+
+A read resolves data through two rules, and the rest of the design relies on both:
+
+1. **Only `"ready"` and `"frozen"` are visible.** A read resolves a chunk only from a `"ready"` hot DB or a `"frozen"` cold file — never from a key in a transient state (`"freezing"`, `"pruning"`, `"transient"`). So a reader never sees a half-written file, crash debris, or an in-progress sweep; transient keys are invisible to it.
+2. **Below the floor is *not found*.** A read for any seq below the retention floor returns not-found, whether or not the file still exists on disk. This is what lets pruning delete a chunk the instant it passes retention: a stale `.idx` might resolve a tx-hash to a `.pack` that's been unlinked, but the below-floor read is not-found anyway.
+
+Together they make retention the single source of truth for "is this data available?": the freeze, sweep, and prune stages constantly create transient states and delete below-floor data, and these rules guarantee a read never *resolves* either. (Whether a read already in flight survives a concurrent unlink is a separate question — see below.)
+
+How a read is actually served — choosing the hot DB or the cold files for a given query, reading across the cold artifact types (`.pack` ledgers, events segments, `.idx` index), and staying correct when a sweep or prune unlinks a file while a read is mid-flight — is the **query-routing design's** concern, out of scope here and in the transactions design (§8).
+
+---
+
+## Correctness
+
+This section states what the streaming workflow guarantees, the assumptions it relies on, and the operator actions and crash timings the design covers.
+
+### Invariants
+
+Two terms recur below. The **retention window** runs from the retention floor up to the last committed ledger; the reader gate and the prune scan both use the floor (rounding it a little low is harmless). The floor is also the bottom of the production range for both backfill and the lifecycle run, and at runtime it only rises — so a run never reaches below what's already on disk. The daemon is **settled** when a run's plan is empty and its discard and prune scans produce no ops: the state between runs, where the invariants below are meant to hold.
+
+**INV-1 (read correctness).** Any data request whose ledger scope falls entirely within the retention window returns correct results: the content matches what a conformant LedgerBackend would produce, no partial state is visible, and no in-retention range is unreachable.
+
+There is one transient exception. When surgical recovery demotes hot data down to the live chunk (scenario 3), the last committed ledger rewinds and the floor — anchored on the last complete chunk — regresses with it. For the few minutes until re-ingestion advances it again, the bottom of the window includes a handful of chunks already pruned under the old floor. Reads there fail soft — not-found, never wrong data, since files are write-once and pruning only unlinks — and the gap closes as the floor climbs back.
+
+**INV-2 (single canonical state).** The catalog records exactly one home for each data range. What it guarantees:
+
+- **One frozen index per window, at all times** (settled or not). The commit batch promotes the new coverage and demotes the old one in a single write, so "the window's index" is always well-defined for readers — never two frozen keys, never none once the window has one.
+- **No transient artifact key survives a settled state.** Between runs, no `chunk:*` or `index:*` key is `"freezing"` or `"pruning"`. Each kind of transient has cleared: index transients by the run that observed them; per-chunk `"freezing"` keys by re-materialization (the plan stage rebuilds them, for chunks in `[floor, last complete chunk]`, from whatever source `backfillSource` picks); and `"pruning"` keys by the sweeps.
+- **No leftover hot DB for a fully-cold chunk** (when settled). No `hot:chunk:c` exists for a chunk `c` whose artifacts are all durable *and* whose window's index covers `c` — that chunk is served entirely from cold files, so its hot DB must be gone.
+- **No leftover `.bin` key in a finalized window** (when settled). No `chunk:c:txhash` exists for a chunk in a window whose frozen index is terminal: the terminal commit demotes the merged inputs `[lo, hi]` and the sweep removes them, chunks below the floor are cleared by retention pruning, and the prune scan's redundant-input branch catches any that a crashed widening re-froze.
+
+Two transient states are tolerated even at a settled moment:
+
+- **A hot DB's `"transient"` bracket** around an in-flight directory operation (the boundary's `openHotDBForChunk`, startup's resume-chunk open, a discard mid-op). A crash-left bracket is finished by the next `openHotDBForChunk` or discard scan.
+- **After a hot-data recovery, a partially-frozen chunk above the last committed ledger** may hold `"freezing"` keys while serving and settled. It sits above the last complete chunk — outside every plan range and the retention window, so no read can observe it — until re-ingestion replays it forward from the last frozen boundary and re-freezes it, minutes later.
+
+**INV-3 (disk matches catalog).** When settled, the files and hot-DB directories on disk are exactly the set the catalog names — no more, no less. Every key maps to one expected path, and because a key is written before its file (mark-before-write), even a partial file is reachable from its key. So the match holds whether a key is in a final state or in one of the transients INV-2 tolerates. No orphan files, no dangling keys, no duplicates: a file that no catalog key names is a real bug, not mid-run debris.
+
+**INV-4 (retention bound).** When settled, no file or catalog key maps to a ledger range strictly below the effective retention floor — with one exception: a frozen index key whose window straddles the floor keeps the `lo` it was built with, so its coverage `[lo, hi]` reaches below the floor. That below-floor portion is never served ([reader contract](#reader-contract) rule 2 returns not-found), and the key and its `.idx` are swept once the whole window falls below the floor.
+
+Each invariant has a distinct audit. INV-1 you check by issuing reads or by re-deriving artifacts and byte-comparing. INV-2 you check by walking catalog keys and cross-checking forbidden co-existence. INV-3 you check by walking the filesystem against the catalog. INV-4 you check by walking catalog keys against the floor. None of the invariants reference the phase scans that maintain them — so a bug in any scan shows up as a real invariant violation, not as something the buggy code silently considers acceptable. A settled state between runs makes these walks meaningful on a live daemon, so an `audit` admin command can implement them directly (with an optional deep mode that re-derives sampled artifacts via a conformant LedgerBackend and byte-compares, for INV-1).
+
+### Convergence
+
+**Startup converges from any on-disk state.** Whatever a partial-completion crash, an operator action, or surgical recovery leaves behind, startup drives the system to a settled state satisfying INV-1 ∧ INV-2 ∧ INV-3 ∧ INV-4. Startup here is the backfill pass followed by the first lifecycle run (fired by the startup seed), and it reaches a settled state within that first run — typically seconds after serving opens, bounded by the run's freeze, rebuild, and prune workload. From any state reachable *during* a run, the lifecycle run alone converges, within a bounded number of runs. And since a runtime op failure aborts the daemon, every state a run can leave behind is one startup is built to converge.
+
+The split matters because some repairs are inherently backfill's, not the run's: a per-chunk `"freezing"` key with no hot DB behind it (a crashed backfill write) is repaired by re-materialization, and a surgically removed range is re-derived from the LedgerBackend — no run phase produces data. The run's province is everything else: index transients, demotions, freezes from live hot DBs, prunes.
+
+Convergence rests on three properties shared by the resolver and the scans — eligibility is computed from durable catalog state alone; ops are idempotent; everything is re-derived on every notification — plus backfill's postcondition contract. Together, whatever a crash leaves half-done, the next run or the next startup finishes.
+
+### Substrate assumptions
+
+Properties we rely on the underlying storage to provide:
+
+- **Sync WAL.** All catalog puts and deletes that the invariants depend on use RocksDB's `WriteOptions.sync = true`, which fsyncs the WAL before the write returns. Multi-key commits — the index commit batch, the sweeps' key-delete batches — are single atomic synced WriteBatches: all-or-nothing across keys.
+- **Per-ledger durability.** The chunk hot DB's synced WriteBatch (atomic across all CFs) is the sole per-ledger durability boundary; the last committed ledger is derived from it. Per-artifact: the per-chunk file **and its directory entry** are fsynced before its key flips to `"frozen"`, and an index coverage's `.idx` (and its dir entry) is fsynced before the commit batch freezes its key.
+- **Deterministic, idempotent writes.** Re-applying any write produces byte-identical state. Backed by deterministic LCM bytes from any conformant LedgerBackend and a byte-identical streamhash index from byte-identical sorted inputs.
+- **Monotonic progress.** Within a process run, ingestion only moves forward: each synced batch extends the last, and the last-complete-chunk it hands the lifecycle climbs with it (strictly increasing chunk ids). Across a crash, the startup derivation equals exactly the durable state — the pre-crash value, or a hair above it (a batch that committed in the instant before the crash). It lands *below* the pre-crash value in only two cases: hot state was lost or demoted to `"transient"`, or recovery demoted a finished window's index for rebuild on a daemon interrupted during its first backfill (before any live ingestion). In that second case there are no hot DBs to anchor the last committed ledger, so it drops below that whole window until backfill rebuilds the index — re-deriving the untainted chunks from their on-disk `.pack`s and re-fetching only the tainted ones. Surgical recovery, in general, shrinks the derivation's inputs by demoting state.
+
+### Design invariants
+
+These are streaming-specific properties the implementation guarantees on top of the substrate, and that INV-1 through INV-4 depend on:
+
+- **Every key precedes its file.** The pre-write `"freezing"` mark and post-fsync `"frozen"` flip mean any file on disk — per-chunk artifact or index file, partial or complete — has its catalog key set. Every scan and sweep iterates keys, so every file is reachable that way; nothing ever lists a directory to find work.
+- **Index promotion is atomic and gap-free.** The commit batch freezes the new coverage and demotes its predecessor in one synced write, so the window's unique frozen key changes hands atomically — never two frozen keys, never none once the window has one. A reader following the frozen key always lands on a complete, fsynced index; a crash mid-build leaves the prior coverage frozen and the attempt as `"freezing"` debris that is either overwritten by the next build of that coverage or deleted unread by the sweeps.
+- **Key absent ⟹ file gone.** Every sweep's shared ordering (unlink → `fsyncDir` → atomic key delete) gives the exit-side counterpart.
+- **Hot DB keys bracket the directory.** The `hot:chunk:{chunk}` key is put (`"transient"`) before the directory is created, and deleted only after rmdir completes — with `"transient"` re-marked first.
+- **Tx hashes always have a queryable home.** The hot DB is discarded only after the durable `.idx` covers the chunk — hot CF, then `.idx`, with no gap. (The `.bin` is never a serving tier; it is rebuild input, demoted to `"pruning"` by the terminal commit batch — the same write that freezes the final `.idx` — or by retention pruning once its chunk falls past the floor, and deleted only by the sweep after that.)
+- **`"frozen"` ⟹ the file is durable and complete.** Flips to `"frozen"` happen only after fsync, and files are deleted only under non-frozen keys (sweeps demote first) — so frozen keys can be trusted blindly by readers and the resolver.
+- **`"pruning"` is committed.** Once a key is in `"pruning"` — demoted by a commit batch or by retention — the sweep runs to completion on subsequent scans. Backfill treats any non-`"frozen"` state as empty and overwrites cleanly if the range is re-ingested.
+
+### Scenario coverage
+
+INV-1 holds at every point the daemon is serving reads — transient states are never externally visible, because a read resolves only a `"ready"` hot DB or a `"frozen"` cold artifact — never a `"freezing"`/`"pruning"`/`"transient"` key, and the retention check masks everything else. INV-2, INV-3, and INV-4 hold at every settled state reached after the events below; startup's first settled state arrives when the first run completes, shortly after reads open.
+
+1. **Steady-state operation.** Hot DB ingestion advances the last committed ledger; the lifecycle goroutine freezes complete chunks within retention and prunes anything past it. All four invariants hold by induction on it.
+2. **Operator state changes — widening or shortening retention (`retention_chunks`).** Changing `retention_chunks` recomputes the retention floor, and the next startup converges to the new state. Backfill's per-window rule rebuilds any window whose desired coverage now exceeds what's stored, and the prune stage removes anything below a raised floor.
+
+   Widening takes effect on the *next startup*, not immediately: a running daemon holds the retention config it started with, so its floor never drops mid-run — the lower floor, and the backfill that fills down to it, apply only at the next startup. `earliest_ledger` is not a live change at all: it is pinned on the first start and immutable, so editing the config never moves the floor (the only way to change it is to wipe the data directory and start fresh).
+3. **Surgical recovery (tainted data).** The operator never touches the filesystem. Recovery is **one atomic catalog batch** that *demotes* the affected keys — it never removes them — split by tier. Tainted cold artifacts (`chunk:{c}:*` and every overlapping `index:*` key) go to `"freezing"`, the state that already means *this file is not to be trusted: re-derive or delete*. For the hot tier, demote **every `hot:chunk` at or above the lowest tainted chunk — the live chunk always included** — to `"transient"`, not just the directly-tainted ones (the reason is the third paragraph). `"transient"` makes a hot DB instantly ineligible as a source (`backfillSource` reads only `"ready"`) and invisible to the last-committed-ledger derivation (which counts only `"ready"` keys). The batch commits atomically or not at all, and re-running it is a no-op; the catalog's lock means it can only be written against a stopped daemon.
+
+   Everything then converges through machinery that already exists. Backfill re-derives the `"freezing"` cold artifacts from a conformant LedgerBackend — overwriting in place, the write protocol's ordinary re-materialization — and rebuilds each window's index. (If the backend tip lags below a re-derived chunk, `backfillSource` waits for coverage; see [the primitives](#the-primitives).) The `"transient"` hot DBs need no file surgery: `openHotDBForChunk` wipes and recreates one when re-ingestion re-opens that chunk, and the discard scan retires any sitting below the live chunk.
+
+   **Why every hot DB at or above the taint, not just the tainted one.** The hot tier is repaired only by re-ingestion, which replays **forward** from the last committed ledger — the highest `"ready"` hot chunk. To replay a tainted hot chunk, that watermark must first fall *below* it; and since the watermark is the maximum over all `"ready"` hot chunks, it falls below the taint only once every hot DB at or above the lowest tainted chunk is demoted. Demoting just the tainted chunk would leave a higher `"ready"` chunk — ultimately the live chunk — pinning the watermark above the taint, so re-ingestion would never reach it. Once they are all demoted, the watermark drops to the last frozen boundary below the taint, captive core re-ingests the tail forward, and the untainted hot chunks swept up in the demotion are re-derived byte-identically. Every recovery demotes; nothing is removed by hand — the daemon's own sweeps and `openHotDBForChunk` handle the dirs in their existing crash-safe order.
+4. **First deployment / downtime between restarts.** The last committed ledger derives to `max(frozen/hot maxima, earliest_ledger - 1)`, ensuring `resumeLedger ≥ earliest_ledger`. Backfill fills `[earliest_ledger, lastCompleteChunkAt(network_tip)]` if needed (a no-op for `earliest_ledger = "now"` first deployment).
+5. **LedgerBackend choice or mid-flight swap.** The LedgerBackend contract guarantees canonical LCM bytes for any range, so any conformant backend produces byte-identical artifacts. Different backends differ in performance, not behavior. An operator using BSB for backfill and CaptiveCore for hot DB ingestion, or swapping mid-deployment, satisfies all four invariants.
+6. **Crash at any point during any of the above.** Sync WAL plus per-ledger durability ordering mean the catalog on next start is internally coherent and the derived last committed ledger equals exactly what the last synced batch committed. Idempotency means re-running any half-finished op is safe. Convergence finishes whatever the crash interrupted.
+
+### What a bug looks like
+
+The invariants describe what storage should look like, not how the phase scans maintain it. So common bugs show up as concrete violations:
+
+- **A catalog key claims something the file doesn't actually deliver** — e.g., a per-chunk writer flips a key to `"frozen"` before fsync (leaving a partial file the catalog advertises as complete), or an index key freezes before its `.idx` is fully fsynced, or the key name's `{lo, hi}` doesn't match the file's actual coverage, or a frozen file is mutated post-freeze ⟹ reads through the catalog key see wrong or missing data. **INV-1** violated. Detectable by re-deriving an artifact via a conformant LedgerBackend and byte-comparing against the on-disk file.
+- **Pruning too aggressive** ⟹ a request whose ledger scope is in retention returns wrong or missing results. Issue a read to find it. **INV-1** violated.
+- **Two frozen index keys in one window** — a commit batch failed to demote the predecessor, or promotion and demotion landed as separate writes ⟹ readers have no well-defined index. Walk `index:*` keys, count `"frozen"` per window. **INV-2** violated.
+- **A `"freezing"` or `"pruning"` key within `[floor, last complete chunk]` survives while serving and settled** ⟹ its recovery mechanism was skipped — an index transient the sweeps should have deleted, a `"pruning"` demotion the sweeps should have finished, or a per-chunk `"freezing"` key that the freeze phase or startup backfill should have re-materialized. Walk keys for transient values when settled, excluding the one corner INV-2 tolerates — a `"freezing"` artifact key *above* the last complete chunk after a hot-data recovery with a lagging backend tip, which no source can yet repair. **INV-2** violated.
+- **Chunk scan misses an orphan** ⟹ a hot DB persists for a chunk that cold artifacts fully serve. Walk `hot:chunk:c` keys whose chunk has its artifacts durable and its window's index covering `c`. **INV-2** violated.
+- **Finalization demotions don't complete** ⟹ per-chunk frozen tx hash files outlive the index that consumed them. Walk `chunk:c:txhash` keys whose window's frozen key has `hi` = the window's last chunk. **INV-2** violated.
+- **A writer leaves a file on disk without its catalog key** (file fsynced before key was durable, or a sweep deleted the key before its unlink was durable) ⟹ orphan file — invisible to every key-driven scan. Walk the filesystem against the catalog. **INV-3** violated.
+- **A catalog key persists without its file** (file deleted before key) ⟹ dangling key. Walk the catalog against the filesystem. **INV-3** violated.
+- **Duplicate cold artifacts for the same logical data** (e.g., two events files for the same chunk, from a migration or buggy retry) ⟹ the catalog names one expected path; the extras are orphans. Walk the filesystem against catalog-specified paths. **INV-3** violated.
+- **Pruning fails past the floor** ⟹ files or keys remain for ranges below the retention floor. Walk catalog keys, compare ledger ranges to the floor. **INV-4** violated.
+
+A storage walk against the invariants is enough to find these without inspecting the phase implementations.
+
+---
+
+## Related documents
+
+- The transactions design ([gettransaction-full-history-design.md](./gettransaction-full-history-design.md)) — the tx-by-hash subsystem end to end: the hot `txhash` CF, the `.bin`/`.idx` formats, the rolling window index rebuild — its streamhash merge internals and safety argument — the `getTransaction` read path, and the capacity numbers. Canonical for the streamhash `.bin`/`.idx` formats, the index merge internals, and the index-key coverage semantics this doc summarizes.
+- The events design ([getevents-full-history-design.md](./getevents-full-history-design.md), PR #635) — the cold-segment file formats and the hot events CF schema referenced by the data model.
+- The reader / query-routing design — how reads dispatch between hot DBs and frozen files for in-retention queries.
diff --git a/design-docs/gettransaction-full-history-design.md b/design-docs/gettransaction-full-history-design.md
new file mode 100644
index 000000000..cf07c0ec3
--- /dev/null
+++ b/design-docs/gettransaction-full-history-design.md
@@ -0,0 +1,253 @@
+# RPC getTransaction Full-History Design
+
+# Part 1: Problem and Scope
+
+## 1. Objective
+
+Serve `getTransaction(hash)` for any transaction whose ledger falls within the retention window (full history by default):
+
+- **Complete.** Every transaction in every in-retention ledger is resolvable by its hash, with no gaps — across crashes, restarts, and retention changes alike. The one exception is a hash-prefix collision so rare (~10⁻²⁰ for a dense window) that it counts as negligible, and even then it fails loudly rather than silently. §8.2 has it.
+- **Correct.** A lookup never returns the wrong transaction; a missing or out-of-retention one returns not-found.
+- **No in-memory index.** The map lives in on-disk `.idx` files, read through the page cache — not a RAM structure sized to the transaction count. The daemon's memory does not grow with the number of transactions in history.
+- **Cheap to maintain.** Ingestion adds negligible cost to the per-ledger write, and the cold index stays current with a rebuild that is small relative to how often it runs.
+
+Out of scope: how a reader chooses which tier and window to consult and stays correct while files are added and removed (the query-routing design), and the storage of the transaction bytes themselves (the ledger store).
+
+## 2. Lookup model
+
+`getTransaction` takes a 32-byte transaction hash and returns the transaction's envelope, result, and meta, plus its ledger and close time. The data flow:
+
+```
+hash ──► seq ──► LCM for seq ──► extract the tx ──► verify hash ──► respond
+      (this doc)  (ledger store)
+```
+
+Three properties of the transaction-hash key space shape the design:
+
+- **Point lookups only.** Every query is for one specific hash, never a range or prefix — exactly what a perfect hash is built for.
+- **Hashes are uniform and immutable.** A transaction hash is never updated, and corresponds to at most one applied transaction (the network's replay protection). The map is append-only: one batch of entries per ledger.
+- **The full transaction is always fetched anyway.** The response needs the envelope, result, and meta, so the read path always ends by fetching the transaction and checking its full 32-byte hash. That means the map needn't be exact — only *complete*, never missing a hash that is really there. False positives are harmless: a fingerprint screens most of them, and the final hash check catches the rest.
+
+---
+
+# Part 2: Architecture
+
+## 3. The two tiers
+
+Each in-retention transaction lives in exactly one place — one tier, one window, never copied. But a hash on its own doesn't say which place, so a lookup checks them all, and at most one answers (none, if the hash isn't stored). The two places a transaction can live:
+
+| Tier | Structure | Serves |
+|---|---|---|
+| **Hot** | `txhash` CF of the per-chunk hot RocksDB | the live chunk, plus any frozen chunk the window index doesn't cover yet |
+| **Cold** | one streamhash `.idx` per window, covering chunks `[lo, hi]` | every chunk in `[lo, hi]` (at/below the frozen `hi`, at/above the floor chunk `lo`) |
+
+```
+                     window w
+  chunks:   [lo ···························· hi] [hi+1 ···] [live]
+  served by: └──────── {lo}-{hi}.idx ─────────┘  hot DBs    hot DB
+                                                 (awaiting    (being
+                                                  coverage)   written)
+```
+
+The two tiers hand off with no gap. A chunk's hot table is dropped only *after* the cold index covers that chunk. So a freshly frozen chunk keeps being answered from its hot table until the index can answer for it, and only then does the hot table go away. Every transaction is findable in exactly one tier at all times.
+
+## 4. Geometry
+
+Two units organize the map. Every structure below is named by them:
+
+- **Chunk** — 10,000 ledgers (hardcoded). The unit of the hot DB and of the sorted runs.
+- **Window** — 1,000 chunks = 10,000,000 ledgers (hardcoded). The unit of the cold index.
+
+```
+chunkID(seq)        = (seq - 2) / 10_000
+chunkFirstLedger(c) = c * 10_000 + 2
+chunkLastLedger(c)  = (c + 1) * 10_000 + 1
+indexID(c)          = c / 1000                          # takes a CHUNK id
+chunksInIndex(w)    = [w*1000, (w+1)*1000 - 1]
+```
+
+Window 0 spans ledgers 2–10,000,001 (chunks 0–999), window N spans N×10M+2 – (N+1)×10M+1 (chunks N×1000 – (N+1)×1000−1). All ids zero-pad `%08d`.
+
+---
+
+# Part 3: Implementation Reference
+
+## 5. Hot tier
+
+### 5.1 Storage
+
+The hot tier is a plain key-value table, one per chunk, stored as a `txhash` column family in that chunk's RocksDB:
+
+- **Key**: the full 32-byte transaction hash.
+- **Value**: the 4-byte ledger sequence.
+
+Storing the full hash makes the hot tier **exact**: a lookup either finds the hash or it doesn't. There are no false positives to screen out and nothing to verify. The table is tuned for point lookups — bloom filters on, ordering off.
+
+### 5.2 Write path
+
+Writing is straightforward. As each ledger is ingested, one `(hash, seq)` entry is added for every transaction in it, in the same atomic write that stores the rest of the ledger. So a ledger's hashes are written all-or-nothing, together with the rest of the ledger.
+
+### 5.3 Lifetime
+
+A chunk's hot table lives from the moment the chunk starts ingesting until the cold index covers it. Coverage can lag the chunk's freeze by a while; until it lands, the chunk is simply answered from its hot table.
+
+## 6. Cold artifacts
+
+The cold tier has two kinds of file: a per-chunk sorted run (`.bin`) and the per-window index (`.idx`).
+
+### 6.1 The per-chunk sorted run: `.bin`
+
+The `.bin` lives at `txhash/raw/{bucket:05d}/{chunk:08d}.bin`, with catalog key `chunk:{chunk:08d}:txhash`. It is produced once, when the chunk is frozen: as the chunk's ledgers are read, each transaction's `(hash, seq)` is collected, and at the end they are **sorted in memory** (~3M entries ≈ 60 MB for a dense chunk — negligible) and written out.
+
+**Format** (the streamhash merge format):
+
+```
+uint64 LE        entry count
+entry × count    20 bytes each: [key: 16][seq: 4 LE]
+```
+
+- `key` is the **first 16 bytes of the transaction hash**. The index uses only these 16 bytes to place and find a transaction; what happens when two hashes share a 16-byte prefix is in §8.2.
+- Entries are sorted ascending by the **big-endian `uint64` prefix of `key`**.
+
+The `.bin` is a pre-sorted file, and a lookup never reads it directly. It is sorted because streamhash builds an index **much faster, and with much less memory, when its keys arrive already sorted** — its *sorted-builder mode*.
+
+A `.bin` is kept while it is still a rebuild input — every rebuild re-merges the `.bin` files for the chunks its window currently covers. Once the window is complete and its final index is built, the `.bin` files are no longer needed, and are deleted — or, if retention is narrower than a window so its chunks age out before the window completes, retention pruning deletes them first.
+
+### 6.2 The per-window index: `.idx`
+
+The `.idx` lives at `txhash/index/{window:08d}/{lo:08d}-{hi:08d}.idx`, tracked by the catalog key `index:{window:08d}:{lo:08d}:{hi:08d}`. There is one minimal-perfect-hash file per **coverage** — a coverage being the chunk range `[lo, hi]` the file actually hashes. Streamhash's `SortedBuilder` builds it from the k-way merge of `.bin[lo..hi]`. The index carries two per-entry fields:
+
+- **Payload (3 bytes): the answer the hash maps to — a ledger seq.** It is stored as an offset from the window's first ledger (`MinLedger = chunkFirstLedger(lo)`) rather than as a full seq, to save bytes. A window spans 10,000,000 ledgers, so the largest offset (`10_000_000 - 1`) fits in a 24-bit field. Streamhash writes the payload width into the index file's header; `MinLedger`, which streamhash does not model itself, rides in the file's user-metadata slot. Both are read back at lookup time, so there is no separate sidecar file.
+- **Fingerprint (`fpWidth` bytes, default 1): a few bytes per entry to screen out wrong hashes** before the expensive fetch-and-verify. Because a lookup probes every in-retention window (§8.2), a wider fingerprint is a trade-off: it costs index size (+1 byte per transaction) but cuts the number of false-positive fetches across those windows. Fixed per build.
+
+All-in, the index costs ≈4.2 bytes per transaction (MPHF structure + payload + fingerprint) — ≈12.5 GB for a dense full window, versus the ≈60 GB of `.bin` runs it consumes.
+
+### 6.3 Coverage and the live index
+
+An index file is named by its **coverage** — the chunk range `[lo, hi]` it hashes:
+
+- **`lo`** — the lowest chunk the index covers. It is the window's first chunk, unless the retention floor has cut into the window, in which case it rises to the first chunk still retained.
+- **`hi`** — the highest chunk the index covers. While the window is the current one (the network tip is in it), `hi` advances by one chunk on each rebuild. Once the window is complete, `hi` is its last chunk and the index is final.
+
+A window has exactly **one live index** at a time, and a lookup resolves "the window's index" to that one file. A rebuild builds a new index at a wider coverage and replaces the live one; the replacement is atomic, so a lookup always sees one complete index, never a half-built one. (How that swap stays atomic across a crash is the daemon's write protocol, in the streaming doc.)
+
+So the index hashes exactly the transactions in chunks `[lo, hi]`. Chunks below `lo` are out of scope — cut off by the floor. Chunks above `hi` aren't folded in yet, and are served from their hot tables until the next rebuild advances `hi`.
+
+**Example** (1,000 chunks per window): the tip is in chunk 5350, so window 5 (chunks 5000–5999) is the current window, and the floor is at chunk 5100. The live index covers chunks 5100–5349, in the file `txhash/index/00000005/00005100-00005349.idx`; chunk 5350 is still in its hot table, and chunks 5000–5099 are below the floor. At the next boundary the index is rebuilt to cover 5100–5350, and the old file is deleted.
+
+## 7. The rolling rebuild
+
+### 7.1 Rebuild cadence and cost
+
+The current window's index is **rebuilt from scratch on every chunk boundary**, to fold in the chunk that just froze; it grows until the window is complete. Only the current window is ever rebuilt — a finalized window's index never changes.
+
+This is affordable because the rebuild is cheap relative to its cadence: a full-window build takes ≈1 minute, against a boundary only every ~14 hours at mainnet rates (Part 4). Rebuilding the whole index each time keeps every `.idx` on disk a complete index for its coverage, with no half-updated state.
+
+### 7.2 The rebuild
+
+To rebuild window `w`'s index over coverage `[lo, hi]`:
+
+1. **Skip if already done.** If the live index already covers exactly `[lo, hi]`, there is nothing to do.
+2. **Merge.** Merge the sorted `.bin` files for chunks `[lo, hi]` into a new index file, with streamhash's sorted-builder. (Every chunk in `[lo, hi]` must have a `.bin`; a missing one fails the merge.)
+3. **Swap in.** Make the new file the window's live index, replacing the previous one.
+
+```go
+// rebuild window w's index over [lo, hi]
+sb := streamhash.NewSortedBuilder(newIndexFile, sortedBuilderOpts)
+for entry := range kWayMerge(binFiles(lo, hi)) { // sorted .bin files → one stream
+    sb.Add(entry)
+}
+sb.Finish()
+// then make newIndexFile the window's live index, replacing the old one
+```
+
+Because a rebuild writes a whole new file and only swaps it in at the end, the live index is never partially updated: a lookup sees either the old index or the new one, never something in between.
+
+### 7.3 Finalization
+
+When a window's last chunk is folded in, its index is final: it covers the whole window and is not rebuilt again — unless retention later widens to include older chunks, when it is rebuilt wider to cover them. The window's `.bin` files have done their job as rebuild inputs, and are deleted.
+
+### 7.4 Disk use during a rebuild
+
+A rebuild writes a whole new index file before the old one is removed, so a window directory briefly holds ~2× the index size (~25 GB at the end of a dense window). The window's `.bin` files are also all on disk together, since the rebuild merges them at once — about 60 GB for a dense window. Both are transient.
+
+The window-end rebuild writes ~12.5 GB in ~1 minute (~200 MB/s burst) — trivial on instance NVMe, but worth provisioning for on throughput-capped volumes like EBS gp3.
+
+## 8. Query path
+
+### 8.1 Routing
+
+A hash names no ledger, so the reader cannot know which home holds it in advance — it **probes them all**, and the hash resolves in exactly one:
+
+| Tier | Probe set | How |
+|---|---|---|
+| cold — one `.idx` per window | **every in-retention window** | MPHF + fingerprint + verify (§8.2) |
+| hot — `txhash` CF per chunk | the chunks above any window's `hi` (live, or frozen awaiting coverage) | exact full-key get (§8.3) |
+
+The hot tier is a few chunks at most — one window's tail, normally just the live chunk — so the probe set is `≈ (in-retention windows) + (a handful of chunks)`. How the reader learns current coverage and stays consistent across rebuilds is the query-routing design's concern. This document requires only two things: that the two tiers together cover the whole retention window (the gap-free hot→cold handoff, §5.3), and that each transaction lives in exactly one of them. So **at most one probe confirms**: the verify runs on every fingerprint hit but succeeds for at most one.
+
+### 8.2 Cold lookup
+
+The cold tier **probes every in-retention window's `.idx`**. A hash gives no hint about which window it's in — to know the window you'd compute `chunkID(seq) / 1000`, and `seq` is the very thing the lookup is trying to find. So there is nothing to pre-select, and each window is probed in turn:
+
+```
+for each in-retention window (its live index → {lo}-{hi}.idx):
+  → MPHF probe on the hash's 16-byte prefix
+  → fingerprint check (fpWidth bytes)             — miss ⇒ skip this window
+  → on a fingerprint hit:
+       seq = MinLedger + payload (3 bytes)
+       retention gate: seq ≥ floor?               — else skip this window
+       fetch the LCM for seq, extract the tx
+       verify the full 32-byte hash               — confirms, or rejects a false positive
+respond on the confirmed hit; not-found if no window confirms
+```
+
+Because the hash belongs to at most one window, **at most one window confirms**; a not-found lookup — a non-existent or not-yet-ingested hash — confirms none and must rule out every in-retention window.
+
+The final verification is essential: a minimal perfect hash returns a slot for *any* input, including a hash it doesn't contain, so every hit must be confirmed. The fingerprint screens out most foreign hashes cheaply, and the fetch-and-verify rejects the rest.
+
+A **16-byte prefix collision between two distinct in-retention transactions** has two cases, and only one bounds completeness. The cold index keys on streamhash's 128-bit routing key (§6.1), so two hashes sharing their first 16 bytes are indistinguishable *to a single window's build*.
+
+*Different windows* — the more likely of the two, since a shared prefix is far more apt to straddle two of history's windows than to fall inside one. Each transaction keys into its own window's `.idx`, so neither build sees a duplicate and both resolve normally. The collision shows up only as a fingerprint false-positive when a lookup probes the *other* window. That window's MPHF maps the shared prefix to its own resident transaction, and the fingerprint (also derived from those 16 bytes) matches — but the fetch-and-verify rejects it, because the full 32-byte hashes differ. This is exactly the foreign-key path the verify already exists for: one wasted ledger fetch, no wrong answer and no false negative.
+
+*Same window* — the genuine residual. The two are a single key to that window's builder, so streamhash rejects the duplicate at build time (`ErrDuplicateKey`) and the build fails **loudly**: it never silently drops a transaction, and the verify ensures it never returns a wrong one. This is the only bound on completeness, and it is tiny — the birthday probability over a dense window's ~3×10⁹ keys against 2¹²⁸ is ~10⁻²⁰ per window, a cryptographic-scale risk accepted as negligible.
+
+**Probe ordering, parallelism, early-stop, and the resulting latency and I/O are the query-routing design's concern** (§8.1), out of scope here.
+
+### 8.3 Hot lookup
+
+Chunks above `hi` are probed in their hot DBs' `txhash` column family — an exact, full-key point get. A miss here is a real miss, with none of the cold tier's verification subtleties (the fetch-and-verify still runs, since the response needs the transaction anyway). In steady state this tier is just the live chunk, plus briefly the one chunk in the freeze-to-coverage gap. After catch-up or a crash it can be several chunks, shrinking as rebuilds advance `hi`.
+
+---
+
+# Part 4: Capacity & Performance
+
+## 9. Storage footprint
+
+Per dense chunk (~3M transactions) and dense window (1,000 chunks, ~3×10⁹ transactions):
+
+| Structure | Unit cost | Dense chunk | Dense window | Lifetime |
+|---|---|---|---|---|
+| hot `txhash` CF | 36 B/tx raw (32 key + 4 value), before RocksDB overhead | ~110 MB raw | — (per-chunk) | chunk ingestion → index coverage |
+| `.bin` sorted run | 20 B/tx exactly | ~60 MB | ~60 GB | chunk freeze → window finalization, or retention floor |
+| `.idx` | ≈4.2 B/tx (3-byte payload) | — (per-window) | ~12.5 GB | build → superseded next boundary, or retention |
+
+Transient peaks: ~2× the index size in the window dir during each rebuild (~25 GB at window end); the `.bin` files for the in-flight window total ~60 GB. Both are transient (§7.4). The steady-state durable cost of the cold tier is the `.idx` files alone: ≈4.2 bytes per transaction across all retained history.
+
+## 10. Performance
+
+- **Ingest, hot**: one `(hash, seq)` put per transaction, inside the ledger's existing write.
+- **Ingest, cold**: the in-memory sort of ~3M entries is negligible against the chunk's streaming pass; the `.bin` write is sequential.
+- **Rebuild**: a full dense window merges ~60 GB of sorted `.bin` files into a ~12.5 GB `.idx` in ≈1 minute (~200 MB/s write burst), measured in the `bench-fullhistory` harness. Mid-window rebuilds scale with `hi − lo`. Against a ~14-hour boundary cadence at mainnet rates, the rebuild is a ~0.1% duty cycle.
+- **Lookup, cold**: one MPHF probe per in-retention window — fingerprint screen, then fetch-and-verify on a hit. The hash is in at most one window, so at most one fetch confirms; fingerprint false positives (bounded by `fpWidth`, §6.2) are rejected by the full-hash verify. Probe ordering, parallelism, and the resulting latency/throughput are the query-routing design's concern (§8.1).
+- **Lookup, hot**: one RocksDB point get in a bloom-filtered CF, then the same ledger fetch.
+
+---
+
+## Related documents
+
+- [full-history-streaming-workflow.md](./full-history-streaming-workflow.md) — the daemon this subsystem lives in: geometry, the catalog and one write protocol, `processChunk`, the resolver and executor, the lifecycle run (freeze → rebuild → discard → prune), and the correctness invariants (INV-1 … INV-4) with their audits.
+- The reader / query-routing design — how readers obtain current coverage and dispatch between hot DBs and frozen files across transitions.
+- [getevents-full-history-design.md](./getevents-full-history-design.md) — the sibling subsystem (events), same hot/cold architecture over the same chunk geometry.
+- [packfile-library.md](./packfile-library.md) — the `.pack` format the read path's ledger fetch lands on.
+- `bench-fullhistory` — the measurement harness behind every figure in Part 4.
diff --git a/full-history/design-docs/03-backfill-workflow.md b/full-history/design-docs/03-backfill-workflow.md
deleted file mode 100644
index dbb7aa05f..000000000
--- a/full-history/design-docs/03-backfill-workflow.md
+++ /dev/null
@@ -1,698 +0,0 @@
-# Backfill Workflow
-
-## Overview
-
-Backfill populates the immutable stores for a configured ledger range `[start_ledger, end_ledger]`.
-
-**What it does:**
-- Ingests historical ledgers offline — no live queries served (only `getHealth` / `getStatus`). `getHealth` is the existing lightweight liveness check; `getStatus` is the new backfill-specific progress endpoint (see [getStatus API Response](#getstatus-api-response) below).
-- Writes directly to immutable file formats — no RocksDB active stores
-- Schedules work as a DAG of idempotent tasks, dispatched via a flat worker pool (default GOMAXPROCS slots)
-- Exits when done; on failure, re-run the same command — completed work is never repeated
-
-**What it produces:**
-
-| Query it enables | Immutable output | Scope |
-|-----------------|-----------------|-------|
-| `getLedger` | Ledger [pack file](https://github.com/stellar/stellar-rpc/pull/633) | Per chunk (10K ledgers) |
-| `getTransaction` | Txhash index files | Per txhash index (default 10M ledgers) |
-| `getEvents` | [Events cold segment](https://github.com/stellar/stellar-rpc/pull/635) | Per chunk |
-
----
-
-## Geometry
-
-The Stellar blockchain starts at ledger 2. Backfill organizes data using two concepts:
-
-- **Chunk** — 10_000 ledgers (hardcoded, not configurable)
-  - Atomic unit of ingestion and crash recovery
-  - Produces: one ledger `.pack` file, one raw txhash `.bin` file, one events cold segment (`events.pack`, `index.pack`, `index.hash`)
-  - `chunk_id = (ledger_seq - 2) / 10_000`
-- **Txhash Index** — `CHUNKS_PER_TXHASH_INDEX` chunks (default 1000 = 10M ledgers)
-  - One RecSplit index covers all transactions across `CHUNKS_PER_TXHASH_INDEX` chunks (default: 10M ledgers worth of transactions)
-  - Produces 16 CF (column family) `.idx` files per txhash index
-  - `index_id = chunk_id / CHUNKS_PER_TXHASH_INDEX`
-  - Configurable via TOML, but must not change across runs — once set, it is fixed
-
-### ID Formulas
-
-```
-chunk_id   = (ledger_seq - 2) / 10_000
-index_id   = chunk_id / CHUNKS_PER_TXHASH_INDEX
-```
-
-Example with `CHUNKS_PER_TXHASH_INDEX = 1000` (default):
-
-| Txhash Index ID | First Ledger | Last Ledger | Chunks |
-|-----------------|-------------|------------|--------|
-| 0 | 2 | 10_000_001 | 0–999 |
-| 1 | 10_000_002 | 20_000_001 | 1000–1999 |
-| 2 | 20_000_002 | 30_000_001 | 2000–2999 |
-| N | (N × 10M) + 2 | ((N+1) × 10M) + 1 | N×1000 – (N+1)×1000 - 1 |
-
-All IDs use uniform `%08d` zero-padding (supports up to 99_999_999).
-
----
-
-## Configuration
-
-TOML file, passed via `stellar-rpc full-history-backfill --config path/to/config.toml`.
-
-- **TOML** defines data layout and storage paths — must be stable across runs
-- **CLI flags** define per-run parameters (range, workers, retries)
-
-### TOML Config
-
-**[SERVICE]**
-
-| Key | Type | Default | Description |
-|-----|------|---------|-------------|
-| `DEFAULT_DATA_DIR` | string | **required** | Base directory for meta store and default storage paths. |
-
-**[BACKFILL]**
-
-| Key | Type | Default | Description |
-|-----|------|---------|-------------|
-| `CHUNKS_PER_TXHASH_INDEX` | int | `1000` | Chunks per txhash index. Defines data layout — must be stable across runs. | 
-
-**[IMMUTABLE_STORAGE.LEDGERS]**
-
-| Key | Type | Default | Description |
-|-----|------|---------|-------------|
-| `PATH` | string | `{DEFAULT_DATA_DIR}/ledgers` | Base path for ledger pack files. |
-
-**[IMMUTABLE_STORAGE.EVENTS]**
-
-| Key | Type | Default | Description |
-|-----|------|---------|-------------|
-| `PATH` | string | `{DEFAULT_DATA_DIR}/events` | Base path for events cold segments. |
-
-**[IMMUTABLE_STORAGE.TXHASH_RAW]**
-
-| Key | Type | Default | Description |
-|-----|------|---------|-------------|
-| `PATH` | string | `{DEFAULT_DATA_DIR}/txhash/raw` | Base path for raw txhash `.bin` files (transient). |
-
-**[IMMUTABLE_STORAGE.TXHASH_INDEX]**
-
-| Key | Type | Default | Description |
-|-----|------|---------|-------------|
-| `PATH` | string | `{DEFAULT_DATA_DIR}/txhash/index` | Base path for RecSplit index files (permanent). |
-
-The `IMMUTABLE_STORAGE` prefix disambiguates from `ACTIVE_STORAGE` (RocksDB-backed mutable stores used by the streaming workflow).
-
-**[BACKFILL.BSB]** — BSB / Buffered Storage Backend (required)
-
-| Key | Type | Default | Description                                                                         |
-|-----|------|---------|-------------------------------------------------------------------------------------|
-| `BUCKET_PATH` | string | **required** | Remote object store path to fetch LedgerCloseMeta (without `gs://` prefix for GCS). |
-| `BUFFER_SIZE` | int | `1000` | Prefetch buffer depth per connection.                                               |
-| `NUM_WORKERS` | int | `20` | Download workers per connection.                                                    |
-
-**[LOGGING]**
-
-Both keys are optional. When a key is set in both TOML and on the CLI, the CLI flag wins — specifying both is not an error.
-
-| Key | Type | Default | Description |
-|-----|------|---------|-------------|
-| `LEVEL` | string | `"info"` | Minimum log severity. Accepted values: `debug` / `info` / `warn` / `error`. |
-| `FORMAT` | string | `"text"` | Log output format. Accepted values: `text` / `json`. |
-
-### CLI Flags
-
-| Flag | Type | Default | Description |
-|------|------|---------|-------------|
-| `--start-ledger` | uint32 | **required** | First ledger (inclusive). Must be ≥ 2. |
-| `--end-ledger` | uint32 | **required** | Last ledger (inclusive). Must be > `start_ledger`. |
-| `--workers` | int | `GOMAXPROCS` | Total concurrent DAG task slots. |
-| `--verify-recsplit` | bool | `true` | Run RecSplit verify phase after build. |
-| `--max-retries` | int | `3` | Max retries per task before marking it failed. |
-| `--log-level` | string | — | Overrides `[LOGGING].LEVEL` when set. |
-| `--log-format` | string | — | Overrides `[LOGGING].FORMAT` when set. |
-
-### Optional TOML Sections
-
-| Section | Key | Default | Description |
-|---------|-----|---------|-------------|
-| `[META_STORE]` | `PATH` | `{DEFAULT_DATA_DIR}/meta/rocksdb` | Meta store RocksDB directory |
-
-### Validation Rules
-
-The only hard constraints are:
-
-- `start_ledger >= 2`
-- `end_ledger > start_ledger`
-- `[BACKFILL.BSB]` must be present
-- `CHUNKS_PER_TXHASH_INDEX` must not change after the first run — changing it invalidates existing txhash index boundaries
-- Backfill never prunes existing data — narrowing the range between runs is safe (completed work outside the new range is simply left untouched)
-- No txhash-index-alignment required — the operator can pass any arbitrary ledger range
-- If gaps remain after backfill, streaming mode validates completeness for all chunks and all txhash indexes at startup, reports any gaps to the operator, and aborts
-
-#### Chunk Boundary Expansion
-
-- System expands the requested range **outward** to the nearest chunk boundaries
-- Start expands DOWN to the first ledger of its chunk
-- End expands UP to the last ledger of its chunk
-- Never clamps inward — the effective range is always ≥ the requested range
-- Operator doesn't need to manually calculate chunk-aligned values
-
-```
-Operator requests:     --start-ledger 5_000_000  --end-ledger 56_337_842
-Chunk boundary expand: start=5_000_000 falls within chunk 499 (starts at 4_990_002)
-                       → expand start to 4_990_002
-                       end=56_337_842 falls within chunk 5633 (ends at 56_340_001)
-                       → expand end to 56_340_001
-Effective range:       ledgers 4_990_002–56_340_001 = 5_135 chunks
-```
-
-#### BSB Availability Validation
-
-After expansion, the system validates that the remote object store referenced by BSB contains all ledgers in the expanded range:
-
-- Expanded end exceeds BSB availability → error at startup (no silent truncation)
-- Operator must either reduce `--end-ledger` or wait for more ledgers to become available in BSB
-
-#### Partial Txhash Index Ranges
-
-If the expanded range does not complete a full txhash index:
-
-- Chunks are still backfilled and immediately serve `getLedger`/`getEvents` when the service is started in streaming mode
-- Txhash index creation only happens once **all** input chunks for the txhash index are ready
-- If txhash index creation does not happen in the current backfill run, the remaining chunks are completed either by a subsequent backfill run (should the operator run backfill again) or when streaming mode starts for the first time (see [Implications for Streaming Workflow](#implications-for-streaming-workflow) below)
-
-Ledger and events data are useful per-chunk and should not be blocked by txhash index alignment. Without relaxed validation:
-
-- A node at ledger 56_340_000 cannot backfill the latest ~6.3M ledgers because `50_000_002–56_340_001` doesn't align to a 10M txhash index boundary — the operator would have to wait until ledger 60_000_001
-- Incremental backfill (extending coverage from a completed txhash index to recent history) would be blocked unless the chain happens to sit on a txhash index boundary
-
-#### Implications for Streaming Workflow
-
-When backfill completes at a non-txhash-index-aligned boundary, a partially-filled txhash index remains. The streaming workflow completes the remaining chunks:
-
-- Streaming continues chunk ingestion from where backfill left off, writing the same per-chunk outputs (LFS, txhash, events) using the same flag-based idempotency
-- When streaming completes the last chunk needed for a pending txhash index, txhash index creation becomes eligible and runs
-- The meta store is the shared coordination point — streaming checks the same chunk flags as backfill, so there is no gap or overlap between backfill and streaming coverage
-
-See [PR #617 discussion](https://github.com/stellar/stellar-rpc/pull/617#discussion_r2969796337) for the original rationale.
-
-### Example: GCS Backfill Config
-
-```toml
-[SERVICE]
-DEFAULT_DATA_DIR = "/data/stellar-rpc"
-
-[BACKFILL]
-CHUNKS_PER_TXHASH_INDEX = 1000
-
-[IMMUTABLE_STORAGE.LEDGERS]
-PATH = "/mnt/nvme/ledgers"
-
-[IMMUTABLE_STORAGE.EVENTS]
-PATH = "/mnt/nvme/events"
-
-[IMMUTABLE_STORAGE.TXHASH_RAW]
-PATH = "/mnt/nvme/txhash/raw"
-
-[IMMUTABLE_STORAGE.TXHASH_INDEX]
-PATH = "/mnt/nvme/txhash/index"
-
-[BACKFILL.BSB]
-BUCKET_PATH = "sdf-ledger-close-meta/v1/ledgers/pubnet"
-
-[LOGGING]
-LEVEL = "info"
-FORMAT = "text"
-```
-
-```bash
-stellar-rpc full-history-backfill --config config.toml \
-  --start-ledger 2 \
-  --end-ledger 30_000_001 \
-  --workers 40
-```
-
----
-
-## Directory Structure
-
-With geometry (chunk, txhash index) and storage paths (`IMMUTABLE_STORAGE.*`) defined above, here is how they map to the filesystem.
-
-- Each data type has its own directory tree rooted at its `IMMUTABLE_STORAGE.*.PATH`
-- Chunk-level files (ledgers, events, raw txhash) are grouped into subdirectories (bucket) of 1_000 chunks:
-  - `bucket_id = chunk_id / 1000` (hardcoded, not configurable), formatted as `%05d`
-  - `bucket_id` is purely a filesystem concern — it does not appear in meta store keys, DAG dependencies, or config
-- Txhash index output is the only structure that uses `index_id` instead of `bucket_id`
-- Directories are created on-demand via `os.MkdirAll` (safe for concurrent writes)
-
-```
-{DEFAULT_DATA_DIR}/
-├── meta/
-│   └── rocksdb/                                  ← Meta store (WAL always enabled)
-│
-├── ledgers/                                      ← IMMUTABLE_STORAGE.LEDGERS.PATH
-│   ├── 00000/                                    ← chunks 0–999 (1_000 .pack files)
-│   │   ├── 00000000.pack                         ← ledger pack file (PR #633)
-│   │   ├── 00000001.pack
-│   │   └── ...
-│   ├── 00001/                                    ← chunks 1000–1999
-│   │   └── ...
-│   └── .../
-│
-├── events/                                       ← IMMUTABLE_STORAGE.EVENTS.PATH
-│   ├── 00000/                                    ← chunks 0–999 (3_000 files: 3 per chunk)
-│   │   ├── 00000000-events.pack                  ← compressed event blocks
-│   │   ├── 00000000-index.pack                   ← serialized roaring bitmaps
-│   │   ├── 00000000-index.hash                   ← MPHF for term → slot lookup
-│   │   └── ...
-│   └── .../
-│
-└── txhash/
-    ├── raw/                                      ← IMMUTABLE_STORAGE.TXHASH_RAW.PATH
-    │   ├── 00000/                                ← chunks 0–999 (1_000 .bin files)
-    │   │   ├── 00000000.bin                      ← TRANSIENT (deleted after RecSplit)
-    │   │   └── ...
-    │   └── .../
-    └── index/                                    ← IMMUTABLE_STORAGE.TXHASH_INDEX.PATH
-        ├── 00000000/                             ← txhash index 0 (16 RecSplit CF files)
-        │   └── cf-{0-f}.idx                      ← PERMANENT
-        └── .../
-```
-
-`CHUNKS_PER_TXHASH_INDEX` only affects `txhash/index/` — all other trees use the hardcoded 1_000-chunk `bucket_id` grouping regardless. 
-
-The directory tree above reflects the default `CHUNKS_PER_TXHASH_INDEX = 1000`. Using 20M ledgers (2_000 chunks) as an example:
-
-| `CHUNKS_PER_TXHASH_INDEX` | Txhash index dirs | Tradeoff |
-|---------------------------|-------------------|----------|
-| `1000` (default) | 2_000 / 1000 = 2 | Fewer dirs, larger indexes — longer build time per index, fewer files to search at query time |
-| `100` | 2_000 / 100 = 20 | More dirs, smaller indexes — faster build time per index, more files to search at query time |
-| `1` | 2_000 / 1 = 2_000 | One index per chunk — fastest build, most files to search |
-
-### Path Conventions
-
-| File Type | Pattern | Example |
-|-----------|---------|---------|
-| Ledger pack | `{IMMUTABLE_STORAGE.LEDGERS.PATH}/{bucketID:05d}/{chunkID:08d}.pack` | `ledgers/00000/00000042.pack` |
-| Raw txhash | `{IMMUTABLE_STORAGE.TXHASH_RAW.PATH}/{bucketID:05d}/{chunkID:08d}.bin` | `txhash/raw/00000/00000042.bin` |
-| RecSplit CF | `{IMMUTABLE_STORAGE.TXHASH_INDEX.PATH}/{indexID:08d}/cf-{nibble}.idx` | `txhash/index/00000000/cf-a.idx` |
-| Events data | `{IMMUTABLE_STORAGE.EVENTS.PATH}/{bucketID:05d}/{chunkID:08d}-events.pack` | `events/00000/00000042-events.pack` |
-| Events index | `{IMMUTABLE_STORAGE.EVENTS.PATH}/{bucketID:05d}/{chunkID:08d}-index.pack` | `events/00000/00000042-index.pack` |
-| Events hash | `{IMMUTABLE_STORAGE.EVENTS.PATH}/{bucketID:05d}/{chunkID:08d}-index.hash` | `events/00000/00000042-index.hash` |
-
-- **Nibble** = high 4 bits of `txhash[0]`, i.e., `txhash[0] >> 4`. Values `0`–`f`. Determines which of 16 CFs a txhash is routed to.
-- **Raw txhash format**: 36 bytes per entry, no header: `[txhash: 32 bytes][ledgerSeq: 4 bytes big-endian]`
-- **Events cold segment**: See [getEvents full-history design](https://github.com/stellar/stellar-rpc/pull/635) for the full format specification.
-
----
-
-## Meta Store Keys
-
-- Single RocksDB instance with WAL (Write-Ahead Log) always enabled
-- Authoritative source for crash recovery — all resume decisions derive from key presence in this store
-
-### Key Schema
-
-All IDs use uniform `%08d` zero-padding, matching the directory structure.
-
-| Key Pattern | Value | Written When |
-|-------------|-------|-------------|
-| `chunk:{C:08d}:lfs` | `"1"` | After ledger `.pack` file is fsynced |
-| `chunk:{C:08d}:txhash` | `"1"` | After raw txhash `.bin` file is fsynced |
-| `chunk:{C:08d}:events` | `"1"` | After events cold segment files (`events.pack`, `index.pack`, `index.hash`) are fsynced |
-| `index:{N:08d}:txhash` | `"1"` | After all 16 RecSplit CF `.idx` files are built and fsynced |
-
-- Values are `"1"` (retained for `ldb`/`sst_dump` readability); key presence is the signal
-- Key absence means not started or incomplete — treated identically on resume
-- Each chunk flag is written independently after its output's fsync — a crash may leave some flags set and others absent for the same chunk
-- On resume, each chunk's flags are checked independently — only missing outputs are produced
-- WAL is always enabled — disabling it would invalidate all crash recovery
-- `chunk:{C}:txhash` keys are deleted after the txhash index is built (the raw `.bin` files they reference are also deleted); all other flags are permanent
-
-**Examples:**
-```
-chunk:00000000:lfs     →  "1"     chunk 0 ledger pack done
-chunk:00000000:txhash  →  "1"     chunk 0 raw txhash done
-chunk:00000000:events  →  "1"     chunk 0 events cold segment done
-chunk:00000999:events  →  "1"     last chunk of txhash index 0
-index:00000000:txhash  →  "1"     txhash index 0 RecSplit complete
-index:00000001:txhash  →  absent  txhash index 1 not yet built
-```
-
-### Key Lifecycle
-
-```
-chunk ingestion        → sets chunk:{C}:lfs, chunk:{C}:txhash, chunk:{C}:events
-                         (each independently, after its output's fsync)
-txhash index build     → sets index:{N}:txhash
-txhash cleanup         → deletes chunk:{C}:txhash keys + raw .bin files
-```
-
-After a completed txhash index:
-- `chunk:{C}:lfs`, `chunk:{C}:events`, `index:{N}:txhash` — permanent
-- `chunk:{C}:txhash` keys + raw `.bin` files — deleted after txhash index is built
-
----
-
-## Tasks and Dependencies
-
-The backfill DAG has three task types:
-
-| Task | Cadence | Dependencies | Produces |
-|------|---------|-------------|----------|
-| `process_chunk(chunk_id)` | Per chunk (10K ledgers) | None | Ledger `.pack` + raw txhash `.bin` + events cold segment |
-| `build_txhash_index(index_id)` | Per txhash index | All `process_chunk` tasks for this txhash index | 16 RecSplit `.idx` files |
-| `cleanup_txhash(index_id)` | Per txhash index | `build_txhash_index` for this txhash index | Deletes raw `.bin` files + `chunk:{C}:txhash` meta keys |
-
-- Each task is a black box to the DAG scheduler — it calls `Execute()` and waits for return
-- What happens inside (goroutines, I/O, parallelism) is up to the task
-
-### Dependency Diagram
-
-For a single txhash index with N chunks:
-
-```
-process_chunk(chunk 0) ─┐
-process_chunk(chunk 1) ─┤
-process_chunk(chunk 2) ─┼──→ build_txhash_index(index_id) ──→ cleanup_txhash(index_id)
-...                     │
-process_chunk(chunk N) ─┘
-```
-
-- All `process_chunk` tasks for a txhash index must complete before `build_txhash_index` fires
-- `cleanup_txhash` runs after `build_txhash_index` succeeds
-- Cleanup deletes the raw `.bin` files and their `chunk:{C}:txhash` meta keys
-
-### Main Flow
-
-```python
-def run_backfill(config, flags):
-
-    # 1. Validate — abort before any work if config is incompatible with existing state
-    validate(config, flags)
-
-    # 2. Build DAG — register all tasks; each task's execute() handles its own no-op check
-    dag = build_dag(config, flags)
-
-    # 3. Execute — dispatch all tasks concurrently, bounded by worker count
-    dag.execute(max_workers=flags.workers)    # default GOMAXPROCS
-```
-
-### Validation
-
-Validation runs before DAG construction, not as a DAG task. If it were a DAG task, other tasks with no dependencies would start executing concurrently before validation completes — and if validation fails, in-flight work that should never have started would need to be cancelled. Running it first means a clean abort with no partial work.
-
-```python
-def validate(config, flags):
-    # See Validation Rules for the full list of checks.
-    assert flags.start_ledger >= 2
-    assert flags.end_ledger > flags.start_ledger
-    assert config.backfill.bsb is not None
-    assert CHUNKS_PER_TXHASH_INDEX unchanged from prior runs (if meta store is non-empty)
-```
-
-### DAG Setup
-
-```python
-def build_dag(config, flags):
-    # Wires up tasks and dependency edges — no completion checks or skip logic.
-    # Each task's execute() handles its own no-op check (early return if already complete).
-
-    dag = new DAG()
-
-    for index_id in configured_indexes(config, flags):
-        chunk_tasks = []
-        for chunk_id in chunks_for_index(index_id):
-            t = dag.add(ProcessChunkTask(chunk_id), deps=[])
-            chunk_tasks.append(t.id)
-        b = dag.add(BuildTxHashIndexTask(index_id),
-                     deps=chunk_tasks)
-        dag.add(CleanupTxHashTask(index_id), deps=[b.id])
-
-    return dag
-```
-
----
-
-## Task Details
-
-### process_chunk(chunk_id)
-
-- Processes a single 10K-ledger chunk end-to-end
-- Occupies one DAG worker slot
-- Only produces missing outputs — checks each flag independently
-- Internal concurrency is an implementation detail
-
-**Outputs** (all produced in a single task, only if missing):
-- Ledger pack file (`{chunkID:08d}.pack`) — compressed ledger data in [packfile format](https://github.com/stellar/stellar-rpc/pull/633)
-- Raw txhash flat file (`{chunkID:08d}.bin`) — 36-byte entries consumed by RecSplit builder
-- Events cold segment (`events.pack` + `index.pack` + `index.hash`) — per [getEvents design](https://github.com/stellar/stellar-rpc/pull/635)
-
-**Pseudocode:**
-
-```python
-process_chunk(chunk_id):
-    bucket_id    = chunk_id / 1000 # hardcoded subdirectory grouping (see Directory Structure)
-    first_ledger = chunk_first_ledger(chunk_id)
-    last_ledger  = chunk_last_ledger(chunk_id)
-
-    # 1. Check which outputs are missing
-    need_lfs    = not meta_store.has(f"chunk:{chunk_id:08d}:lfs")
-    need_txhash = not meta_store.has(f"chunk:{chunk_id:08d}:txhash")
-    need_events = not meta_store.has(f"chunk:{chunk_id:08d}:events")
-
-    if not (need_lfs or need_txhash or need_events):
-        return    # all outputs already present
-
-    # 2. Choose data source
-    if not need_lfs:
-        source = local_packfile(ledger_pack_path(bucket_id, chunk_id))   # NVMe, no BSB
-    else:
-        source = BSBFactory.create(first_ledger, last_ledger)            # BSB connection
-
-    # 3. Open writers only for missing outputs
-    ledger_writer = packfile.create(ledger_pack_path(bucket_id, chunk_id),
-                                    overwrite=True) if need_lfs else None
-    txhash_writer = open(raw_txhash_path(bucket_id, chunk_id),
-                         overwrite=True) if need_txhash else None
-    events_writer = events_segment.create(events_path(bucket_id, chunk_id),
-                                          overwrite=True) if need_events else None
-
-    # 4. Process each ledger
-    for seq in range(first_ledger, last_ledger + 1):
-        lcm = source.get_ledger(seq)
-
-        if need_lfs:    ledger_writer.append(compress(lcm))
-        if need_txhash: txhash_writer.append(extract_txhashes(lcm))   # 36 bytes per tx
-        if need_events: events_writer.append(extract_events(lcm))
-
-    # 5. Fsync + flag each output independently
-    if need_lfs:
-        ledger_writer.fsync_and_close()
-        meta_store.put(f"chunk:{chunk_id:08d}:lfs", "1")
-
-    if need_txhash:
-        txhash_writer.fsync_and_close()
-        meta_store.put(f"chunk:{chunk_id:08d}:txhash", "1")
-
-    if need_events:
-        events_writer.finalize()          # flush, build MPHF + bitmap index, fsync
-        meta_store.put(f"chunk:{chunk_id:08d}:events", "1")
-
-    source.close()
-```
-
-Key properties:
-- Only missing outputs are produced — a partially-completed chunk resumes from where it left off
-- If LFS is already present, reads from local NVMe instead of BSB (avoids redundant download)
-- Each flag is written independently after its output's fsync — no atomic WriteBatch needed
-- `packfile.Create()` with `overwrite=True` handles truncation of partial files from prior crashes — no explicit `delete_if_exists` check needed
-- Naturally extends to new data types (add a fourth flag)
-
-**BSB** (BufferedStorageBackend):
-- Ledger source backed by a remote object store 
-- Each `process_chunk` task creates its own BSB connection
-- Internal prefetch workers: `BUFFER_SIZE` ledgers ahead, `NUM_WORKERS` download goroutines
-
-### build_txhash_index(index_id)
-
-- Builds the RecSplit txhash index for one completed txhash index
-- Occupies one DAG worker slot, but spawns several goroutines internally
-- The DAG guarantees all chunk `.bin` files exist before this runs
-
-**Pseudocode:**
-
-```python
-build_txhash_index(index_id):
-    if meta_store.has(f"index:{index_id:08d}:txhash"):
-        return                                            # already built — no-op
-
-    bin_files = list_bin_files(index_id)      # all .bin files for chunks in this txhash index
-
-    # Phase 1: COUNT — scan all .bin files, count entries per CF
-    cf_counts = parallel_count(bin_files, workers=100)
-    # cf_counts[nibble] = number of (txhash, ledgerSeq) entries routed to that CF
-
-    # Phase 2: ADD — re-read .bin files, route entries to CF builders
-    cf_builders = [RecSplitBuilder(cf_counts[n]) for n in range(16)]
-    parallel_add(bin_files, cf_builders, workers=100)
-    # each entry routed to cf_builders[txhash[0] >> 4] (mutex per CF)
-
-    # Phase 3: BUILD — build MPH index per CF, one .idx file each
-    parallel_build(cf_builders, workers=16)
-    # each CF produces one .idx file; all fsynced
-
-    # Phase 4: VERIFY (optional) — look up every key in the built indexes
-    if verify_recsplit:
-        parallel_verify(bin_files, cf_builders, workers=100)
-
-    # Mark index complete
-    meta_store.put(f"index:{index_id:08d}:txhash", "1")
-```
-
-Key properties:
-- COUNT and ADD each read all `.bin` files (two full passes over the data)
-- BUILD runs 16 goroutines in parallel (one per CF) — each CF is independent
-- VERIFY is skippable via `--verify-recsplit=false` cli flag
-- All-or-nothing recovery: if `index:{N}:txhash` is absent on restart → delete partial `.idx` files → rerun entire build
-
-### cleanup_txhash(index_id)
-
-- Runs after `build_txhash_index` completes successfully
-
-**Pseudocode:**
-
-```python
-cleanup_txhash(index_id):
-    for chunk_id in chunks_for_index(index_id):
-        if not meta_store.has(f"chunk:{chunk_id:08d}:txhash"):
-            continue                                      # already cleaned up — skip
-        delete(raw_txhash_path(bucket_id, chunk_id))      # remove .bin file
-        meta_store.delete(f"chunk:{chunk_id:08d}:txhash")  # remove meta key
-```
-
-Key properties:
-- Modeled as a separate DAG task (not inline in `build_txhash_index`) so crash recovery works naturally
-- Per-chunk idempotency: each chunk checks its own `chunk:{C}:txhash` key before deleting — a crash mid-cleanup resumes from where cleanup left off
-- On restart: DAG sees txhash index key present (build complete) but `chunk:{C}:txhash` keys still exist → cleanup runs as a normal task
-
----
-
-## Execution Model
-
-### DAG Scheduler
-
-- Pipeline builds a single DAG at startup, executes it with bounded concurrency
-- The DAG is the only scheduling mechanism — no per-txhash-index coordinators, no secondary worker pools
-- Each task's `Execute()` is wrapped with a retry loop bounded by `--max-retries` (default 3). Any transient failure (BSB errors, temporary I/O issues) triggers a retry at the task level.
-
-```python
-run_dag(dag, max_workers):
-    worker_slots   = Semaphore(max_workers)
-    runnable_tasks = ThreadSafeQueue(dag.tasks_with_no_pending_dependencies())
-
-    def execute_task(task):
-        """Runs in a background thread — one per dispatched task."""
-        for attempt in range(1, max_retries + 1):
-            error = task.execute()
-            if error is None:
-                break
-            if attempt == max_retries:
-                mark_failed(task, error)              # halt all dependents
-                break
-            log.warn("retry", task, attempt, error)
-
-        worker_slots.release()                        # free worker slot
-
-        # Check if completing this task unblocks any downstream tasks
-        for downstream in dag.dependents_of(task):
-            downstream.mark_dependency_done(task)
-            if downstream.all_dependencies_done():
-                runnable_tasks.push(downstream)       # now eligible to run
-
-    # Main loop — dispatches tasks as they become runnable
-    while runnable_tasks:
-        current_task = runnable_tasks.pop()
-        worker_slots.acquire()                        # block until a worker slot is free
-        run_in_background(execute_task, current_task) # launch — returns immediately
-```
-
-### Worker Pool
-
-- Single flat pool of `workers` slots (default `GOMAXPROCS`)
-- Any mix of task types can occupy slots simultaneously
-- `process_chunk`: 1 slot per task
-- `build_txhash_index`: 1 slot per task (uses many goroutines internally)
-- `cleanup_txhash`: 1 slot per task
-
-### How Work Flows Through the Pipeline
-
-- All `process_chunk` tasks have no dependencies → DAG dispatches up to `workers` slots immediately at startup
-- Chunks from different txhash indexes run side by side — the scheduler does not process txhash indexes sequentially
-- When the last chunk of a txhash index completes → `build_txhash_index` becomes eligible, claims a slot
-- After build completes → `cleanup_txhash` becomes eligible
-- Remaining slots continue processing chunks for other txhash indexes throughout — no special coordination needed
-
----
-
-## Crash Recovery
-
-There is no separate crash recovery, reconciliation, or startup triage phase. Recovery happens organically because every task's `execute()` checks its own completion state:
-
-- On every startup, `build_dag()` registers ALL tasks for the configured range — no meta store scanning in DAG setup
-- `process_chunk` checks each output flag independently — missing outputs are produced, existing outputs are skipped
-- `build_txhash_index` checks `index:{N}:txhash` — if present, returns immediately; if absent, deletes partial `.idx` files and reruns the full build
-- `cleanup_txhash` checks `chunk:{C}:txhash` per-chunk — already-cleaned chunks are skipped, remaining chunks are cleaned up
-
-This works because of three invariants:
-
-1. **Key implies durable file** — a meta store flag is set only after fsync
-2. **Tasks are idempotent** — each checks its own outputs and skips or overwrites what exists
-3. **DAG registers all tasks on every startup** — completed tasks return immediately from `execute()`
-
-### Concurrent Access Prevention
-
-- Meta store RocksDB uses kernel-level `flock()` on a `LOCK` file
-- A second process attempting to open the same meta store fails immediately
-- Released automatically on process exit (including `kill -9`)
-
-
----
-
-## getStatus API Response
-
-During backfill, `getStatus` returns progress as task-type summaries:
-- No per-txhash-index breakdown — just completed/pending/in_progress counts per task type
-
-```json
-{
-  "mode": "BACKFILL",
-  "tasks": {
-    "process_chunk":        {"completed": 288, "pending": 5712, "in_progress": 40},
-    "build_txhash_index":   {"completed": 0, "pending": 6, "in_progress": 0},
-    "cleanup_txhash":       {"completed": 0, "pending": 6, "in_progress": 0}
-  },
-  "eta_seconds": 1820
-}
-```
-
----
-
-## Error Handling
-
-Two layers of retry:
-
-- **BSB retries** — BSB handles transient errors internally (connection resets, throttling, etc). These retries happen within a single task execution and are not visible to the DAG scheduler.
-- **Task-level retries** — the DAG scheduler wraps each task's `execute()` with a retry loop bounded by `--max-retries` (default 3). If a task returns an error after BSB has exhausted its own retries, the scheduler retries the entire task. After `--max-retries` exhausted → task marked failed → DAG halts all dependent tasks → process exits non-zero.
-
-Operator re-runs the same command; completed work is never repeated.
-
-| Error | Handled by | Action |
-|-------|-----------|--------|
-| BSB transient error (throttle, connection reset) | BSB internal retry | Retried within the task; transparent to DAG |
-| BSB persistent error (BSB retries exhausted) | Task-level retry | `--max-retries` attempts; then ABORT |
-| Ledger pack write / fsync failure | Task-level retry | `--max-retries` attempts; then ABORT; flag not set |
-| TxHash write / fsync failure | Task-level retry | `--max-retries` attempts; then ABORT; flag not set |
-| Events write / fsync failure | Task-level retry | `--max-retries` attempts; then ABORT; flag not set |
-| RecSplit build failure | Task-level retry | `--max-retries` attempts; then ABORT; txhash index key absent |
-| Verify phase mismatch | None | ABORT immediately — data corruption, operator investigates |
-| Meta store write failure | None | ABORT immediately — treat as crash, operator re-runs |
diff --git a/full-history/design-docs/README.md b/full-history/design-docs/README.md
deleted file mode 100644
index e682068d6..000000000
--- a/full-history/design-docs/README.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# Stellar Full History RPC Service — Design Docs
-
-> **Scope**: Backfill pipeline only. Streaming pipeline design is covered separately.
-
-## Documents
-
-| Document | Description |
-|----------|-------------|
-| [03-backfill-workflow.md](./03-backfill-workflow.md) | Complete backfill design — geometry, meta store keys, directory layout, configuration, DAG task graph, execution model, crash recovery, getStatus API |
-
-The backfill doc is self-contained. Read it top-to-bottom for the full picture.
-
-## Quick Context
-
-The Stellar Full History RPC Service ingests the complete blockchain history. Primary use cases:
-
-- Retrieve any ledger from history
-- Retrieve any transaction from history
-- Retrieve any events with filter matching from history
-
-It has two modes:
-
-- **Backfill** — offline bulk import. Writes directly to immutable files (LFS chunks + RecSplit indexes). No RocksDB, no queries during ingestion. DAG-scheduled with a flat worker pool.
-- **Streaming** — real-time ingestion via CaptiveStellarCore. Writes to RocksDB active stores, serves queries, transitions to immutable storage at index boundaries. Covered in a separate design doc.
-
-These modes are fully independent — separate code, separate crash recovery, separate transition workflows.

Column family	Holds	Serves
`ledgers`	compressed LCMs, keyed by seq	`getLedger` for the live chunk; the source `processChunk` reads at freeze
`txhash`	tx hash → seq	`getTransaction` for the live chunk
events CFs	live events (schema per the events doc)	`getEvents` for the live chunk
Key	Meaning
`chunk:{c}:ledgers`	Per-chunk `.pack` file state.
`chunk:{c}:txhash`	Per-chunk `.bin` file state. Transient — removed at window finalization, or by retention pruning if its chunk ages out first.
`chunk:{c}:events`	Per-chunk events cold segment state.
`index:{w}:{lo}:{hi}`	One key per index coverage. The key name carries the coverage and maps 1:1 to the file `{lo}-{hi}.idx`; the value is pure lifecycle state. At most one coverage per window is "frozen" at any moment.
`hot:chunk:{c}`	"ready" = dir exists and is usable; "transient" = a directory operation (create or delete) is in flight — the recovery is the same either way, which is why one value suffices.
`config:earliest_ledger`	Written on first start, immutable thereafter (startup aborts on mismatch).
Symptom	Violates	Detected by
A key flips "frozen" before fsync; key's `{lo,hi}` doesn't match the file; a frozen file mutated post-freeze	INV-1	re-derive via a conformant backend, byte-compare
Pruning too aggressive — an in-retention read returns wrong/missing results	INV-1	issue reads
Two frozen index keys in one window (promotion and demotion landed as separate writes)	INV-2	walk `index:*`, count "frozen" per window
A "freezing"/"pruning" key survives served settled	INV-2	walk keys for transient values at settled
A hot DB persists for a chunk cold artifacts fully serve	INV-2	walk `hot:chunk:*` against coverage
Finalization demotions don't complete — `.bin` keys outlive their terminal index	INV-2	walk `chunk:c:txhash` in finalized windows
A file on disk without its key (orphan — invisible to every key-driven scan)	INV-3	walk filesystem against catalog
A key without its file (dangling)	INV-3	walk catalog against filesystem
Duplicate cold artifacts for the same logical data	INV-3	walk filesystem against key-specified paths
Files or keys remain below the retention floor	INV-4	walk keys against the floor