From 17ce12d7f9480175fc7853fea047f1f007783497 Mon Sep 17 00:00:00 2001 From: Simon Chow Date: Tue, 23 Jun 2026 10:13:39 -0400 Subject: [PATCH 1/2] =?UTF-8?q?feat(fullhistory):=20streaming=20daemon=20?= =?UTF-8?q?=E2=80=94=20Slice=203=20(tx-hash)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stacked on slice 2 (ledgers + events); this commit's diff is the tx-hash subsystem on top. Completes the daemon — with slices 1-3 applied the tree is byte-identical to the full 3-type implementation. Adds the TX-HASH data type and its per-window rolling-index subsystem: - the txhash column family in the per-chunk hot DB (one atomic synced WriteBatch per ledger now carries ledgers + events + tx-hashes); - the per-chunk sorted .bin run + the per-window streamhash .idx, with the rolling rebuild on each chunk boundary, coverage [lo,hi], and the atomic promote/demote commit batch; - the resolver's per-window IndexBuild + the executor's index-build stratum (chunk->index done-channel dependency); index-aware discard (a hot DB lives until its window index covers the chunk), prune's redundant-.bin branch, surgical recovery of index keys, and the audit INV-2 (single frozen coverage / no leftover .bin) + INV-3/INV-4 index walks; - the chunks_per_txhash_index config pin, rebuild observability, and the multi-window tx-hash lookup E2E (cross-window false-positive rejection). Composes the txhash store + single-index build (#728/#729) and the read path (#794), plus the streamhash dependency, already on feature/full-history. Built against RocksDB 10.9.1 (grocksdb 1.10.7); fullhistory tree green on the non-short suite incl. the multi-window lookup E2E. --- .../internal/fullhistory/ingest/driver.go | 65 +- .../fullhistory/ingest/ingest_test.go | 856 +++++++++++++++--- .../internal/fullhistory/ingest/service.go | 15 +- .../pkg/stores/hotchunk/hotchunk.go | 131 ++- .../pkg/stores/hotchunk/hotchunk_test.go | 340 ++++--- .../pkg/stores/txhash/hot_store.go | 55 +- .../internal/fullhistory/streaming/PERF.md | 65 ++ .../fullhistory/streaming/artifacts.go | 7 +- .../internal/fullhistory/streaming/audit.go | 30 +- .../fullhistory/streaming/audit_invariants.go | 237 ++++- .../fullhistory/streaming/audit_test.go | 247 ++++- .../fullhistory/streaming/backfill_test.go | 16 +- .../internal/fullhistory/streaming/catalog.go | 111 ++- .../fullhistory/streaming/catalog_protocol.go | 119 ++- .../fullhistory/streaming/catalog_sweep.go | 41 +- .../internal/fullhistory/streaming/config.go | 59 +- .../fullhistory/streaming/config_test.go | 30 +- .../fullhistory/streaming/config_validate.go | 38 +- .../streaming/config_validate_test.go | 141 +-- .../fullhistory/streaming/convergence_test.go | 348 +++++-- .../internal/fullhistory/streaming/daemon.go | 6 +- .../fullhistory/streaming/daemon_test.go | 50 +- .../internal/fullhistory/streaming/doc.go | 51 +- .../fullhistory/streaming/e2e_test.go | 360 ++++++-- .../fullhistory/streaming/eligibility.go | 103 ++- .../internal/fullhistory/streaming/execute.go | 137 ++- .../fullhistory/streaming/execute_test.go | 197 +++- .../internal/fullhistory/streaming/hooks.go | 40 +- .../internal/fullhistory/streaming/ingest.go | 25 +- .../fullhistory/streaming/ingest_test.go | 25 +- .../internal/fullhistory/streaming/keys.go | 91 +- .../fullhistory/streaming/lifecycle.go | 25 +- .../fullhistory/streaming/lifecycle_test.go | 195 +++- .../fullhistory/streaming/observability.go | 77 +- .../streaming/observability_test.go | 94 +- .../internal/fullhistory/streaming/paths.go | 89 +- .../fullhistory/streaming/perf_test.go | 251 +++++ .../internal/fullhistory/streaming/process.go | 1 + .../fullhistory/streaming/process_test.go | 76 +- .../fullhistory/streaming/progress.go | 56 +- .../fullhistory/streaming/progress_test.go | 34 +- .../fullhistory/streaming/recovery.go | 71 +- .../fullhistory/streaming/recovery_test.go | 121 ++- .../internal/fullhistory/streaming/resolve.go | 118 ++- .../fullhistory/streaming/resolve_test.go | 184 +++- .../fullhistory/streaming/retention.go | 28 +- .../fullhistory/streaming/retention_test.go | 305 ++++++- .../internal/fullhistory/streaming/startup.go | 2 +- .../fullhistory/streaming/startup_test.go | 20 +- .../fullhistory/streaming/streaming_test.go | 580 +++++++++++- .../internal/fullhistory/streaming/txindex.go | 267 ++++++ .../fullhistory/streaming/txindex_test.go | 515 +++++++++++ .../internal/fullhistory/streaming/window.go | 69 ++ 53 files changed, 6003 insertions(+), 1211 deletions(-) create mode 100644 cmd/stellar-rpc/internal/fullhistory/streaming/PERF.md create mode 100644 cmd/stellar-rpc/internal/fullhistory/streaming/perf_test.go create mode 100644 cmd/stellar-rpc/internal/fullhistory/streaming/txindex.go create mode 100644 cmd/stellar-rpc/internal/fullhistory/streaming/txindex_test.go create mode 100644 cmd/stellar-rpc/internal/fullhistory/streaming/window.go diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/driver.go b/cmd/stellar-rpc/internal/fullhistory/ingest/driver.go index 0cf3e9bcf..7e0fa7490 100644 --- a/cmd/stellar-rpc/internal/fullhistory/ingest/driver.go +++ b/cmd/stellar-rpc/internal/fullhistory/ingest/driver.go @@ -17,15 +17,18 @@ import ( "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk" ) -// HotStores holds the long-lived, caller-owned per-chunk hot DB injected into -// RunHot. The caller (the daemon) opens and closes it; RunHot only borrows it -// to drive the per-ledger atomic ingest. The DB is chunk-bound (it accumulates -// exactly one chunk before being frozen into cold artifacts), so the injected -// DB must already be bound to the chunk being ingested — RunHot rejects a -// mismatch up front. A nil DB with any data type enabled in cfg is a -// configuration error caught by RunHot. +// HotStores holds the long-lived, caller-owned shared per-chunk hot DB injected +// into RunHot. The caller (the daemon) opens and closes it; RunHot only borrows +// it to drive the per-ledger atomic ingest. Under decision (a) this is ONE +// multi-CF RocksDB instance (ledgers + events CFs + txhash CFs), not three +// independent stores. The DB is chunk-bound (it accumulates exactly one chunk +// before being frozen into cold artifacts), so the injected DB must already be +// bound to the chunk being ingested — RunHot rejects a mismatch up front. A nil +// DB with any data type enabled in cfg is a configuration error caught by +// RunHot. type HotStores struct { - // HotDB is the per-chunk hot DB. Required when any hot data type is enabled. + // HotDB is the shared per-chunk multi-CF hot DB. Required when any hot data + // type is enabled. HotDB *hotchunk.DB } @@ -33,14 +36,15 @@ type HotStores struct { // hotchunk.Ingest toggles that select which CFs the single per-ledger batch // writes. func ingestContributions(cfg Config) hotchunk.Ingest { - return hotchunk.Ingest{Ledgers: cfg.Ledgers, Events: cfg.Events} + return hotchunk.Ingest{Ledgers: cfg.Ledgers, Txhash: cfg.Txhash, Events: cfg.Events} } // buildColdIngesters opens one ColdIngester per data type enabled in cfg, // each opening its own per-chunk writer under coldDir/ (constructors // create their own directories and freely overwrite any prior attempt's -// files — see the package doc's artifact model). On any constructor error it -// closes the ingesters built so far and returns. +// files — see the package doc's artifact model). The constructor table below +// is the single definition site of the canonical ledgers→txhash→events order. +// On any constructor error it closes the ingesters built so far and returns. func buildColdIngesters(coldDir string, chunkID chunk.ID, sink MetricSink, cfg Config) ([]ColdIngester, error) { ctors := []struct { enabled bool @@ -48,6 +52,7 @@ func buildColdIngesters(coldDir string, chunkID chunk.ID, sink MetricSink, cfg C open func(string, chunk.ID, MetricSink) (ColdIngester, error) }{ {cfg.Ledgers, dataTypeLedgers, NewLedgerColdIngester}, + {cfg.Txhash, dataTypeTxhash, NewTxhashColdIngester}, {cfg.Events, dataTypeEvents, NewEventsColdIngester}, } var ings []ColdIngester @@ -115,15 +120,16 @@ func RunHot( if verr := cfg.validate(); verr != nil { return verr } - anyEnabled := cfg.Ledgers || cfg.Events + anyEnabled := cfg.Ledgers || cfg.Txhash || cfg.Events if anyEnabled && hotStores.HotDB == nil { return errors.New("ingest: a hot data type is enabled but HotStores.HotDB is nil") } - // The hot DB is chunk-bound — it accumulates exactly one chunk's data - // before being frozen into the chunk's cold artifacts — and records its - // chunk at open time. An injected DB bound to a different chunk than we're - // ingesting would silently interleave two chunks' data, so catch the - // mismatch up front with a clear message. + // The shared hot DB is chunk-bound — it accumulates exactly one chunk's + // data before being frozen into the chunk's cold artifacts — and records + // its chunk at open time. An injected DB bound to a different chunk than + // we're ingesting would silently interleave two chunks' data or fail every + // per-ledger events write with an out-of-range offset (LedgerOffsets are + // chunk-relative), so catch the mismatch up front with a clear message. if hotStores.HotDB != nil && hotStores.HotDB.ChunkID() != chunkID { return fmt.Errorf("ingest: RunHot chunk %d but injected hot DB is bound to chunk %d", uint32(chunkID), uint32(hotStores.HotDB.ChunkID())) @@ -191,24 +197,28 @@ func drain(ctx context.Context, stream ledgerbackend.LedgerStream, chunkID chunk // ColdDirs names the per-data-type output root for one chunk's cold artifacts. // Each field is the directory UNDER WHICH the matching cold ingester composes // its {bucketID:05d}/ subdirectory — i.e. the same `coldDir` the per-type -// constructor (NewLedgerColdIngester) takes. A field left "" for a data type -// enabled in cfg is a configuration error caught by RunColdChunk. +// constructor (NewLedgerColdIngester / NewTxhashColdIngester / +// NewEventsColdIngester) takes. A field left "" for a data type enabled in cfg +// is a configuration error caught by RunColdChunk. // -// RunCold derives this root from a single coldDir by appending the fixed -// dataType subdirectory (coldDir/ledgers). ColdDirs exists so a caller with a -// DIFFERENT on-disk layout can place each artifact at its own canonical path -// while reusing the very same cold ingesters, ColdService, and drain loop. +// RunCold derives these three roots from a single coldDir by appending the +// fixed dataType subdirectory (coldDir/ledgers, coldDir/txhash, coldDir/events). +// ColdDirs exists so a caller with a DIFFERENT on-disk layout (e.g. the +// streaming daemon, whose raw txhash runs live under txhash/raw, not txhash) +// can place each artifact at its own canonical path while reusing the very same +// cold ingesters, ColdService, and drain loop. type ColdDirs struct { Ledgers string + Txhash string Events string } // buildColdIngestersIn opens one ColdIngester per data type enabled in cfg, // each under its OWN root from dirs (rather than coldDir/). It is the -// ColdDirs counterpart of buildColdIngesters: same constructors, same -// rollback-on-constructor-error semantics; it differs only in resolving each -// type's root from an explicit field instead of a fixed subdirectory of one -// coldDir. +// ColdDirs counterpart of buildColdIngesters: same constructors, same canonical +// ledgers→txhash→events order, same rollback-on-constructor-error semantics; it +// differs only in resolving each type's root from an explicit field instead of +// a fixed subdirectory of one coldDir. func buildColdIngestersIn(dirs ColdDirs, chunkID chunk.ID, sink MetricSink, cfg Config) ([]ColdIngester, error) { ctors := []struct { enabled bool @@ -217,6 +227,7 @@ func buildColdIngestersIn(dirs ColdDirs, chunkID chunk.ID, sink MetricSink, cfg open func(string, chunk.ID, MetricSink) (ColdIngester, error) }{ {cfg.Ledgers, dataTypeLedgers, dirs.Ledgers, NewLedgerColdIngester}, + {cfg.Txhash, dataTypeTxhash, dirs.Txhash, NewTxhashColdIngester}, {cfg.Events, dataTypeEvents, dirs.Events, NewEventsColdIngester}, } var ings []ColdIngester diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/ingest_test.go b/cmd/stellar-rpc/internal/fullhistory/ingest/ingest_test.go index c4ce5dbb9..4d7614b5b 100644 --- a/cmd/stellar-rpc/internal/fullhistory/ingest/ingest_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/ingest/ingest_test.go @@ -1,6 +1,7 @@ package ingest import ( + "bytes" "context" "errors" "iter" @@ -26,6 +27,7 @@ import ( "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash" ) // testPassphrase is a network passphrase literal used only by the test fixtures @@ -198,6 +200,53 @@ func marshalLCM(t *testing.T, seq uint32) []byte { return raw } +// eventTopic is the symbol topic embedded by event-bearing fixtures; the same +// term key derives from it, so tests can look the event up in the index. +const eventTopic = "ingest_test" + +// marshalLCMWithEvent builds a V2 LCM carrying one transaction with one +// operation-level contract event (topic=eventTopic). It returns the wire bytes, +// the transaction hash (for txhash lookups), and the event's term key (for event +// index lookups). +func marshalLCMWithEvent(t *testing.T, seq uint32) ([]byte, [32]byte, events.TermKey) { + t.Helper() + ev := buildContractEvent(eventTopic) + meta := xdr.TransactionMeta{ + V: 4, + V4: &xdr.TransactionMetaV4{Operations: []xdr.OperationMetaV2{{Events: []xdr.ContractEvent{ev}}}}, + } + lcm, hash := buildLCMWithTx(t, seq, meta) + rawBytes, err := lcm.MarshalBinary() + require.NoError(t, err) + + evBytes, err := ev.MarshalBinary() + require.NoError(t, err) + keys, err := events.TermsForBytes(evBytes) + require.NoError(t, err) + require.NotEmpty(t, keys) + return rawBytes, hash, keys[0] +} + +// buildContractEvent returns a contract ContractEvent with a single symbol +// topic, mirroring the events-package test fixture. +func buildContractEvent(topic string) xdr.ContractEvent { + var contractID xdr.ContractId + contractID[0] = 0xab + contractID[1] = 0xcd + sym := xdr.ScSymbol(topic) + return xdr.ContractEvent{ + ContractId: &contractID, + Type: xdr.ContractEventTypeContract, + Body: xdr.ContractEventBody{ + V: 0, + V0: &xdr.ContractEventV0{ + Topics: []xdr.ScVal{{Type: xdr.ScValTypeScvSymbol, Sym: &sym}}, + Data: xdr.ScVal{Type: xdr.ScValTypeScvSymbol, Sym: &sym}, + }, + }, + } +} + func successResult() xdr.TransactionResult { opResults := []xdr.OperationResult{} return xdr.TransactionResult{ @@ -217,6 +266,14 @@ func buildLCM(t *testing.T, seq uint32, txMetas []xdr.TransactionMeta) xdr.Ledge return lcm } +// buildLCMWithTx builds a single-transaction V2 LCM and returns the tx hash. +func buildLCMWithTx(t *testing.T, seq uint32, meta xdr.TransactionMeta) (xdr.LedgerCloseMeta, [32]byte) { + t.Helper() + lcm, hashes := buildLCMReturningHashes(t, seq, []xdr.TransactionMeta{meta}) + require.Len(t, hashes, 1) + return lcm, hashes[0] +} + // buildLCMReturningHashes assembles a V2 LedgerCloseMeta with one envelope per tx // meta and returns the per-tx transaction hashes in order. func buildLCMReturningHashes( @@ -279,50 +336,6 @@ func buildLCMReturningHashes( return lcm, hashes } -// eventTopic is the contract-event topic the events fixtures share, so two -// fixtures' events resolve to the same term key. -const eventTopic = "ingest_test" - -// eventLCM builds a V2 LCM at seq carrying one transaction that emits a single -// contract event. Returns the wire bytes and the event's term key. -func eventLCM(t *testing.T, seq uint32) ([]byte, events.TermKey) { - t.Helper() - ev := buildContractEvent(eventTopic) - meta := xdr.TransactionMeta{ - V: 4, - V4: &xdr.TransactionMetaV4{Operations: []xdr.OperationMetaV2{{Events: []xdr.ContractEvent{ev}}}}, - } - raw, err := buildLCM(t, seq, []xdr.TransactionMeta{meta}).MarshalBinary() - require.NoError(t, err) - - evBytes, err := ev.MarshalBinary() - require.NoError(t, err) - keys, err := events.TermsForBytes(evBytes) - require.NoError(t, err) - require.NotEmpty(t, keys) - return raw, keys[0] -} - -// buildContractEvent returns a contract ContractEvent with a single symbol -// topic, the minimal shape the events extractor indexes. -func buildContractEvent(topic string) xdr.ContractEvent { - var contractID xdr.ContractId - contractID[0] = 0xab - contractID[1] = 0xcd - sym := xdr.ScSymbol(topic) - return xdr.ContractEvent{ - ContractId: &contractID, - Type: xdr.ContractEventTypeContract, - Body: xdr.ContractEventBody{ - V: 0, - V0: &xdr.ContractEventV0{ - Topics: []xdr.ScVal{{Type: xdr.ScValTypeScvSymbol, Sym: &sym}}, - Data: xdr.ScVal{Type: xdr.ScValTypeScvSymbol, Sym: &sym}, - }, - }, - } -} - func testLogger() *supportlog.Entry { l := supportlog.New() l.SetLevel(logrus.ErrorLevel) @@ -335,6 +348,21 @@ func viewOf(t *testing.T, seq uint32) xdr.LedgerCloseMetaView { return xdr.LedgerCloseMetaView(marshalLCM(t, seq)) } +// marshalV0LCM builds a minimal V0 (pre-Soroban) LedgerCloseMeta with no +// transactions and returns its wire bytes. V0 LCMs carry no contract events, +// so the events ingesters record them as a zero-payload ledger. +func marshalV0LCM(t *testing.T, seq uint32) []byte { + t.Helper() + lcm := xdr.LedgerCloseMeta{V: 0, V0: &xdr.LedgerCloseMetaV0{ + LedgerHeader: xdr.LedgerHeaderHistoryEntry{ + Header: xdr.LedgerHeader{LedgerSeq: xdr.Uint32(seq)}, + }, + }} + raw, err := lcm.MarshalBinary() + require.NoError(t, err) + return raw +} + // seqStream is a ledgerbackend.LedgerStream that yields LCMs for an explicit // list of ledger sequences (in order), regardless of the requested range. It // models a backend that hands back a duplicate / out-of-order / wrong-but- @@ -407,6 +435,48 @@ func TestLedgerHotIngester_Readback(t *testing.T) { require.Equal(t, raw, got) } +// TestTxhashHotIngester_Lookup ingests an event/tx-bearing ledger via the hot +// txhash ingester and looks the hash up. +func TestTxhashHotIngester_Lookup(t *testing.T) { + seq := chunk.ID(0).FirstLedger() + raw, hash, _ := marshalLCMWithEvent(t, seq) + dir := t.TempDir() + logger := testLogger() + + store, err := txhash.NewHotStore(dir, chunk.ID(0), logger) + require.NoError(t, err) + defer func() { require.NoError(t, store.Close()) }() + + ing := NewTxhashHotIngester(store, nil) + require.NoError(t, ing.Ingest(context.Background(), seq, xdr.LedgerCloseMetaView(raw))) + + got, err := store.Get(hash) + require.NoError(t, err) + require.Equal(t, seq, got) +} + +// TestEventsHotIngester_Query ingests an event-bearing ledger via the hot events +// ingester and resolves the term. +func TestEventsHotIngester_Query(t *testing.T) { + chunkID := chunk.ID(0) + seq := chunkID.FirstLedger() + raw, _, term := marshalLCMWithEvent(t, seq) + dir := t.TempDir() + logger := testLogger() + + store, err := eventstore.OpenHotStore(dir, chunkID, logger) + require.NoError(t, err) + defer func() { require.NoError(t, store.Close()) }() + + ing := NewEventsHotIngester(store, nil) + require.NoError(t, ing.Ingest(context.Background(), seq, xdr.LedgerCloseMetaView(raw))) + + bm, err := store.Lookup(context.Background(), term) + require.NoError(t, err) + require.NotNil(t, bm) + require.Equal(t, uint64(1), bm.GetCardinality()) +} + // TestLedgerColdIngester_Readback ingests one ledger via the cold ledger // ingester, finalizes, and reads back through the cold reader. func TestLedgerColdIngester_Readback(t *testing.T) { @@ -430,13 +500,199 @@ func TestLedgerColdIngester_Readback(t *testing.T) { require.Equal(t, raw, got) } +// txhashBinPath composes the documented raw-txhash chunk path under root for +// the tests' fixed chunk 0: {root}/{bucketID:05d}/{chunkID:08d}.bin. +func txhashBinPath(root string) string { + c := chunk.ID(0) + return filepath.Join(root, c.BucketID(), txhash.ColdBinName(c)) +} + +// TestTxhashColdIngester_Bin ingests two tx-bearing ledgers via the cold txhash +// ingester, finalizes, and reads the .bin back through the store codec. +func TestTxhashColdIngester_Bin(t *testing.T) { + chunkID := chunk.ID(0) + first := chunkID.FirstLedger() + coldDir := t.TempDir() + + ing, err := NewTxhashColdIngester(coldDir, chunkID, nil) + require.NoError(t, err) + defer func() { require.NoError(t, ing.Close()) }() + + for _, seq := range []uint32{first, first + 1} { + raw, _, _ := marshalLCMWithEvent(t, seq) + require.NoError(t, ing.Ingest(context.Background(), seq, xdr.LedgerCloseMetaView(raw))) + } + require.NoError(t, ing.Finalize(context.Background())) + + entries, err := txhash.ReadColdBin(txhashBinPath(coldDir)) + require.NoError(t, err) + require.Len(t, entries, 2) +} + +// TestEventsColdIngester_Readback ingests two event-bearing ledgers via the cold +// events ingester, finalizes, and resolves the term through the cold reader. +func TestEventsColdIngester_Readback(t *testing.T) { + chunkID := chunk.ID(0) + first := chunkID.FirstLedger() + coldDir := t.TempDir() + + ing, err := NewEventsColdIngester(coldDir, chunkID, nil) + require.NoError(t, err) + defer func() { require.NoError(t, ing.Close()) }() + + var term events.TermKey + for _, seq := range []uint32{first, first + 1} { + raw, _, tk := marshalLCMWithEvent(t, seq) + term = tk + require.NoError(t, ing.Ingest(context.Background(), seq, xdr.LedgerCloseMetaView(raw))) + } + require.NoError(t, ing.Finalize(context.Background())) + + bucketDir := filepath.Join(coldDir, chunkID.BucketID()) + cr, err := eventstore.OpenColdReader(chunkID, bucketDir, eventstore.ColdReaderOptions{}) + require.NoError(t, err) + defer func() { require.NoError(t, cr.Close()) }() + cnt, err := cr.EventCount() + require.NoError(t, err) + require.Equal(t, uint32(2), cnt) + bm, err := cr.Lookup(context.Background(), term) + require.NoError(t, err) + require.NotNil(t, bm) + require.Equal(t, uint64(2), bm.GetCardinality()) +} + +// ───────────────────────── V0 (pre-Soroban) events handling ───────────────────────── + +// TestEventsHotIngester_V0AsEmpty asserts the hot events ingester treats a V0 +// LCM as a zero-event ledger (no error) rather than failing the range, and that +// the store records the empty ledger (its event count is unchanged). +func TestEventsHotIngester_V0AsEmpty(t *testing.T) { + chunkID := chunk.ID(0) + seq := chunkID.FirstLedger() + dir := t.TempDir() + logger := testLogger() + + store, err := eventstore.OpenHotStore(dir, chunkID, logger) + require.NoError(t, err) + defer func() { require.NoError(t, store.Close()) }() + + ing := NewEventsHotIngester(store, nil) + require.NoError(t, ing.Ingest(context.Background(), seq, xdr.LedgerCloseMetaView(marshalV0LCM(t, seq))), + "V0 ledger must ingest as zero events, not error") + + cnt, err := store.EventCount() + require.NoError(t, err) + require.Equal(t, uint32(0), cnt, "V0 ledger contributes no events") +} + +// TestEventsColdIngester_V0KeepsOffsetsContiguous ingests a V0 ledger followed by +// an event-bearing V2 ledger and asserts: the V0 ledger does not error, and the +// LedgerOffsets stay contiguous (both ledgers present, the event-bearing one's +// single event ID immediately follows the empty V0 ledger). +func TestEventsColdIngester_V0KeepsOffsetsContiguous(t *testing.T) { + chunkID := chunk.ID(0) + first := chunkID.FirstLedger() + coldDir := t.TempDir() + + ing, err := NewEventsColdIngester(coldDir, chunkID, nil) + require.NoError(t, err) + defer func() { require.NoError(t, ing.Close()) }() + + // Ledger `first`: V0 → zero events, no error. + require.NoError(t, ing.Ingest(context.Background(), first, xdr.LedgerCloseMetaView(marshalV0LCM(t, first)))) + // Ledger `first+1`: one contract event. + rawEv, _, term := marshalLCMWithEvent(t, first+1) + require.NoError(t, ing.Ingest(context.Background(), first+1, xdr.LedgerCloseMetaView(rawEv))) + require.NoError(t, ing.Finalize(context.Background())) + + bucketDir := filepath.Join(coldDir, chunkID.BucketID()) + cr, err := eventstore.OpenColdReader(chunkID, bucketDir, eventstore.ColdReaderOptions{}) + require.NoError(t, err) + defer func() { require.NoError(t, cr.Close()) }() + + // One event total, from the V2 ledger. + cnt, err := cr.EventCount() + require.NoError(t, err) + require.Equal(t, uint32(1), cnt) + + // Offsets are contiguous: both ledgers recorded, V0 contributes [0,0), the + // event-bearing ledger contributes exactly event ID 0. + offsets, err := cr.Offsets() + require.NoError(t, err) + require.Equal(t, 2, offsets.LedgerCount(), "both V0 and V2 ledgers recorded") + require.Equal(t, first, offsets.StartLedger()) + v0Start, v0End, err := offsets.EventIDs(first) + require.NoError(t, err) + require.Equal(t, uint32(0), v0Start) + require.Equal(t, uint32(0), v0End, "V0 ledger has an empty event range") + evStart, evEnd, err := offsets.EventIDs(first + 1) + require.NoError(t, err) + require.Equal(t, uint32(0), evStart, "event ID follows the empty V0 ledger contiguously") + require.Equal(t, uint32(1), evEnd) + + // And the event is queryable by its term. + bm, err := cr.Lookup(context.Background(), term) + require.NoError(t, err) + require.NotNil(t, bm) + require.Equal(t, uint64(1), bm.GetCardinality()) +} + +// TestRunCold_EventlessChunk_FullyReadable drives a full cold chunk of V0 +// (pre-Soroban, eventless) ledgers with Events enabled — the common backfill +// case for early history. The whole chunk has zero contract events; +// eventstore.WriteColdIndex publishes a valid EMPTY index for it, so all +// three cold artifacts exist and the chunk is fully readable: a term-filtered +// Lookup resolves to "no matches" through the ordinary path instead of a +// missing-file error. +func TestRunCold_EventlessChunk_FullyReadable(t *testing.T) { + chunkID := chunk.ID(0) + coldDir := t.TempDir() + logger := testLogger() + sink := &testSink{} + + // Every ledger in the chunk is a V0 (pre-Soroban) ledger → zero events. + require.NoError(t, RunCold( + context.Background(), logger, sourceOf(fullStream(t, chunkID, marshalV0LCM)), + coldDir, chunkID, 1, 1, sink, Config{Events: true}, + )) + + bucketDir := filepath.Join(coldDir, dataTypeEvents, chunkID.BucketID()) + + // All three cold artifacts exist (events.pack + the empty index pair). + for _, name := range []string{ + eventstore.EventsPackName(chunkID), + eventstore.IndexPackName(chunkID), + eventstore.IndexHashName(chunkID), + } { + _, statErr := os.Stat(filepath.Join(bucketDir, name)) + require.NoError(t, statErr, "eventless chunk must publish %s", name) + } + + // The chunk is readable end to end: zero events, and a filtered lookup + // misses cleanly rather than erroring on a missing index. + cr, err := eventstore.OpenColdReader(chunkID, bucketDir, eventstore.ColdReaderOptions{}) + require.NoError(t, err) + defer func() { require.NoError(t, cr.Close()) }() + cnt, err := cr.EventCount() + require.NoError(t, err) + require.Zero(t, cnt) + _, lerr := cr.Lookup(context.Background(), events.ComputeTermKey([]byte("any"), events.FieldContractID)) + require.ErrorIs(t, lerr, eventstore.ErrTermNotFound) + + // Metrics still fired: one aggregate per-chunk, one (clean) per-ingester. + require.Equal(t, 1, sink.coldChunkTotals, "ColdChunkTotal must fire for an eventless chunk") + require.Equal(t, 1, sink.coldDataTypes()[dataTypeEvents], "one ColdIngest for events") + require.Zero(t, sink.coldErrorTypes()[dataTypeEvents], "eventless chunk is not an error") +} + // ───────────────────────── HotService tests ───────────────────────── -// TestHotService_Ledgers_OneAtomicBatch runs HotService over the SHARED multi-CF -// hot DB (decision (a)) and reads the ledger CF back through the DB's facade, -// asserting the aggregate HotLedgerTotal and the per-type HotIngest signals -// fired. Each ledger committed as ONE atomic synced WriteBatch. -func TestHotService_Ledgers_OneAtomicBatch(t *testing.T) { +// TestHotService_AllTypes_OneAtomicBatch runs HotService over the SHARED +// multi-CF hot DB (decision (a)) for event/tx-bearing ledgers and reads each CF +// back through the DB's facades, asserting the aggregate HotLedgerTotal and the +// per-type HotIngest signals fired. Each ledger committed as ONE atomic synced +// WriteBatch across all CFs. +func TestHotService_AllTypes_OneAtomicBatch(t *testing.T) { chunkID := chunk.ID(0) first := chunkID.FirstLedger() logger := testLogger() @@ -446,22 +702,29 @@ func TestHotService_Ledgers_OneAtomicBatch(t *testing.T) { defer func() { require.NoError(t, db.Close()) }() sink := &testSink{} - service := NewHotService(db, hotchunk.Ingest{Ledgers: true}, sink) + service := NewHotService(db, hotchunk.Ingest{Ledgers: true, Txhash: true, Events: true}, sink) - rawA := marshalLCM(t, first) - rawB := marshalLCM(t, first+1) + rawA, hashA, termA := marshalLCMWithEvent(t, first) + rawB, hashB, _ := marshalLCMWithEvent(t, first+1) require.NoError(t, service.Ingest(context.Background(), first, xdr.LedgerCloseMetaView(rawA))) require.NoError(t, service.Ingest(context.Background(), first+1, xdr.LedgerCloseMetaView(rawB))) - // The ledger CF retained the data (read through the shared DB's facade). + // Every CF retained the data (read through the shared DB's facades). gotRawA, err := db.Ledgers().GetLedgerRaw(first) require.NoError(t, err) require.Equal(t, rawA, gotRawA) - gotRawB, err := db.Ledgers().GetLedgerRaw(first + 1) + gotA, err := db.Txhash().Get(hashA) require.NoError(t, err) - require.Equal(t, rawB, gotRawB) + require.Equal(t, first, gotA) + gotB, err := db.Txhash().Get(hashB) + require.NoError(t, err) + require.Equal(t, first+1, gotB) + bm, err := db.Events().Lookup(context.Background(), termA) + require.NoError(t, err) + require.Equal(t, uint64(2), bm.GetCardinality()) - // The single watermark advanced to the last committed ledger (decision (a)). + // The single watermark advanced to the last committed ledger (every CF in + // lockstep, decision (a)). maxSeq, ok, err := db.MaxCommittedSeq() require.NoError(t, err) require.True(t, ok) @@ -471,10 +734,12 @@ func TestHotService_Ledgers_OneAtomicBatch(t *testing.T) { require.Equal(t, 2, sink.hotLedgerTotals, "one HotLedgerTotal per ledger") dt := sink.hotDataTypes() require.Equal(t, 2, dt[dataTypeLedgers]) + require.Equal(t, 2, dt[dataTypeTxhash]) + require.Equal(t, 2, dt[dataTypeEvents]) } -// TestHotService_EnabledSubset runs HotService with ledgers enabled and asserts -// the ledger signal fires for each ingested ledger. +// TestHotService_EnabledSubset runs HotService with only ledgers enabled and +// asserts only that type's signal fires (txhash/events CFs untouched). func TestHotService_EnabledSubset(t *testing.T) { seq := chunk.ID(0).FirstLedger() logger := testLogger() @@ -490,107 +755,88 @@ func TestHotService_EnabledSubset(t *testing.T) { require.Equal(t, 1, sink.hotLedgerTotals) dt := sink.hotDataTypes() require.Equal(t, 1, dt[dataTypeLedgers]) + require.Zero(t, dt[dataTypeTxhash]) + require.Zero(t, dt[dataTypeEvents]) } // ───────────────────────── ColdService tests ───────────────────────── -// TestColdService_Success drives the ledger cold ingester through a ColdService -// and asserts readback plus the metrics signals. +// TestColdService_Success drives ledger+txhash+events cold ingesters through a +// ColdService and asserts readback plus the metrics signals. func TestColdService_Success(t *testing.T) { chunkID := chunk.ID(0) first := chunkID.FirstLedger() coldDir := t.TempDir() sink := &testSink{} - ings, err := buildColdIngesters(coldDir, chunkID, sink, Config{Ledgers: true}) + ings, err := buildColdIngesters(coldDir, chunkID, sink, Config{Ledgers: true, Txhash: true, Events: true}) require.NoError(t, err) service := NewColdService(ings, sink) defer func() { require.NoError(t, service.Close()) }() + var term events.TermKey for _, seq := range []uint32{first, first + 1} { - require.NoError(t, service.Ingest(context.Background(), seq, viewOf(t, seq))) + raw, _, tk := marshalLCMWithEvent(t, seq) + term = tk + require.NoError(t, service.Ingest(context.Background(), seq, xdr.LedgerCloseMetaView(raw))) } require.NoError(t, service.Finalize(context.Background())) - // Ledger cold readback: the boundary ledger reads back and decodes to the - // right sequence. + // Ledger cold readback: tx hashes use random keypairs, so bytes can't be + // regenerated for comparison — assert the boundary ledger reads back and + // decodes to the right sequence. lcr, err := ledger.OpenColdReader(packPath(filepath.Join(coldDir, dataTypeLedgers), chunkID)) require.NoError(t, err) defer func() { require.NoError(t, lcr.Close()) }() gotFirst, err := lcr.GetLedgerRaw(first) require.NoError(t, err) - require.Equal(t, marshalLCM(t, first), gotFirst) var decoded xdr.LedgerCloseMeta require.NoError(t, decoded.UnmarshalBinary(gotFirst)) require.Equal(t, first, decoded.LedgerSequence()) - // Metrics: one ColdChunkTotal, one ColdIngest for ledgers, no errors. + // Events cold readback. + ecr, err := eventstore.OpenColdReader( + chunkID, filepath.Join(coldDir, dataTypeEvents, chunkID.BucketID()), eventstore.ColdReaderOptions{}) + require.NoError(t, err) + defer func() { require.NoError(t, ecr.Close()) }() + bm, err := ecr.Lookup(context.Background(), term) + require.NoError(t, err) + require.Equal(t, uint64(2), bm.GetCardinality()) + + // Txhash .bin count. + binEntries, err := txhash.ReadColdBin(txhashBinPath(filepath.Join(coldDir, dataTypeTxhash))) + require.NoError(t, err) + require.Len(t, binEntries, 2) + + // Metrics: one ColdChunkTotal, one ColdIngest per data type, no errors. require.Equal(t, 1, sink.coldChunkTotals) cdt := sink.coldDataTypes() require.Equal(t, 1, cdt[dataTypeLedgers]) + require.Equal(t, 1, cdt[dataTypeTxhash]) + require.Equal(t, 1, cdt[dataTypeEvents]) require.Empty(t, sink.coldErrorTypes(), "success path records no ingester errors") - // Per-stage signals: per-ledger cold write fired once per ledger, the - // per-chunk finalize stage once. The exact map is asserted so an unexpected - // stage emission (or a missing one) also fails. + // Per-stage signals: per-ledger cold stages fired once per (non-empty) + // ledger, the per-chunk finalize stage once per ingester. The exact map is + // asserted so an unexpected stage emission (or a missing one) also fails — + // events now emits term_index/write for every ledger, and txhash's extract + // spans its whole per-ledger Ingest. require.Equal(t, map[string]int{ dataTypeLedgers + "/" + tierCold + "/" + stageWrite: 2, dataTypeLedgers + "/" + tierCold + "/" + stageFinalize: 1, + dataTypeTxhash + "/" + tierCold + "/" + stageExtract: 2, + dataTypeTxhash + "/" + tierCold + "/" + stageFinalize: 1, + dataTypeEvents + "/" + tierCold + "/" + stageExtract: 2, + dataTypeEvents + "/" + tierCold + "/" + stageTermIndex: 2, + dataTypeEvents + "/" + tierCold + "/" + stageWrite: 2, + dataTypeEvents + "/" + tierCold + "/" + stageFinalize: 1, }, sink.stageCounts()) // No double-emit: the deferred Close (after this body) must not add a second // ColdIngest or ColdChunkTotal, since Finalize already emitted. require.NoError(t, service.Close()) require.Equal(t, 1, sink.coldChunkTotals, "Close after Finalize must not re-emit the aggregate") - require.Len(t, sink.coldIngests, 1, "Close after Finalize must not re-emit per-ingester signals") -} - -// TestColdService_LedgersAndEvents drives BOTH the ledger and events cold -// ingesters through one ColdService over event-bearing ledgers (via the -// explicit-dirs builder processChunk uses), then reads back the ledger pack -// AND the events cold segment, proving the events kind lands across CFs on the -// cold path. -func TestColdService_LedgersAndEvents(t *testing.T) { - chunkID := chunk.ID(0) - first := chunkID.FirstLedger() - dirs := ColdDirs{Ledgers: t.TempDir(), Events: t.TempDir()} - sink := &testSink{} - - ings, err := buildColdIngestersIn(dirs, chunkID, sink, Config{Ledgers: true, Events: true}) - require.NoError(t, err) - service := NewColdService(ings, sink) - defer func() { require.NoError(t, service.Close()) }() - - raw0, term0 := eventLCM(t, first) - raw1, _ := eventLCM(t, first+1) - require.NoError(t, service.Ingest(context.Background(), first, xdr.LedgerCloseMetaView(raw0))) - require.NoError(t, service.Ingest(context.Background(), first+1, xdr.LedgerCloseMetaView(raw1))) - require.NoError(t, service.Finalize(context.Background())) - - // Ledger cold readback: the boundary ledger reads back to the right bytes. - lcr, err := ledger.OpenColdReader(packPath(dirs.Ledgers, chunkID)) - require.NoError(t, err) - defer func() { require.NoError(t, lcr.Close()) }() - gotFirst, err := lcr.GetLedgerRaw(first) - require.NoError(t, err) - require.Equal(t, raw0, gotFirst) - - // Events cold readback: the shared event term resolves to both ledgers' - // events in the frozen cold segment. - ecr, err := eventstore.OpenColdReader( - chunkID, filepath.Join(dirs.Events, chunkID.BucketID()), eventstore.ColdReaderOptions{}) - require.NoError(t, err) - defer func() { require.NoError(t, ecr.Close()) }() - bm, err := ecr.Lookup(context.Background(), term0) - require.NoError(t, err) - require.NotNil(t, bm) - require.Equal(t, uint64(2), bm.GetCardinality(), "both ledgers share the event term") - - // Metrics: one ColdIngest per data type, no errors. - cdt := sink.coldDataTypes() - require.Equal(t, 1, cdt[dataTypeLedgers]) - require.Equal(t, 1, cdt[dataTypeEvents]) - require.Empty(t, sink.coldErrorTypes(), "success path records no ingester errors") + require.Len(t, sink.coldIngests, 3, "Close after Finalize must not re-emit per-ingester signals") } // failingCold is a ColdIngester whose Ingest always fails, modeling a mid-chunk @@ -612,21 +858,23 @@ func (f *failingCold) Close() error { f.closed = true; return // failing sibling: ColdService.Ingest returns the sibling's error, Finalize is // not called, the deferred Close drops the partial ledger pack, and no finalized // artifact remains. It also asserts the cold metrics still fire on this failure -// path: the real ingester emits exactly one ColdIngest and the service emits one +// path: each real ingester emits exactly one ColdIngest and the service emits one // aggregate ColdChunkTotal — driven from Close, since Finalize never ran. func TestColdService_FailurePath_NoArtifact(t *testing.T) { chunkID := chunk.ID(0) coldDir := t.TempDir() sink := &testSink{} - // A real cold ledger ingester plus a failing sibling, so we can assert the - // real ingester emits its per-chunk ColdIngest from Close. + // Two real cold ingesters (ledger + events) plus a failing sibling, so we can + // assert each real ingester emits its per-chunk ColdIngest from Close. realLedger, err := NewLedgerColdIngester(filepath.Join(coldDir, dataTypeLedgers), chunkID, sink) require.NoError(t, err) + realEvents, err := NewEventsColdIngester(filepath.Join(coldDir, dataTypeEvents), chunkID, sink) + require.NoError(t, err) failing := &failingCold{} - service := NewColdService([]ColdIngester{realLedger, failing}, sink) + service := NewColdService([]ColdIngester{realLedger, realEvents, failing}, sink) - // First ledger: the real ingester succeeds, failing returns an error → the + // First ledger: the real ingesters succeed, failing returns an error → the // sequential Ingest aborts the ledger with the sibling's error. err = service.Ingest(context.Background(), chunkID.FirstLedger(), viewOf(t, chunkID.FirstLedger())) require.ErrorIs(t, err, errFailingCold) @@ -641,9 +889,10 @@ func TestColdService_FailurePath_NoArtifact(t *testing.T) { require.NoError(t, service.Close()) require.True(t, failing.closed) - // The real ingester emitted exactly one ColdIngest; the aggregate fired once. + // Each real ingester emitted exactly one ColdIngest; the aggregate fired once. cdt := sink.coldDataTypes() require.Equal(t, 1, cdt[dataTypeLedgers], "ledger cold ingester emits once on failure path") + require.Equal(t, 1, cdt[dataTypeEvents], "events cold ingester emits once on failure path") require.Equal(t, 1, sink.coldChunkTotals, "exactly one aggregate ColdChunkTotal") // No finalized ledger pack must exist. @@ -691,12 +940,12 @@ func TestPrometheusSink_Smoke(t *testing.T) { require.NotPanics(t, func() { sink := NewPrometheusSink(reg, "test") sink.HotIngest(dataTypeLedgers, time.Millisecond, 1, nil) - sink.HotIngest(dataTypeLedgers, time.Millisecond, 3, errFailingCold) - sink.ColdIngest(dataTypeLedgers, time.Second, 100, nil) + sink.HotIngest(dataTypeEvents, time.Millisecond, 3, errFailingCold) + sink.ColdIngest(dataTypeTxhash, time.Second, 100, nil) sink.HotLedgerTotal(time.Millisecond) sink.ColdChunkTotal(time.Second) - sink.IngestStage(dataTypeLedgers, tierHot, stageWrite, time.Millisecond, 1) - sink.IngestStage(dataTypeLedgers, tierCold, stageFinalize, time.Second, 0) + sink.IngestStage(dataTypeEvents, tierHot, stageExtract, time.Millisecond, 3) + sink.IngestStage(dataTypeEvents, tierCold, stageFinalize, time.Second, 0) }) mfs, err := reg.Gather() @@ -706,11 +955,11 @@ func TestPrometheusSink_Smoke(t *testing.T) { // ───────────────────────── hot driver tests ───────────────────────── -// TestRunHot_Ledgers_Readback runs the RunHot driver with the injected SHARED -// hot DB (decision (a)) and asserts the ledger CF reads back. The short stream -// ends early so RunHot returns the completeness error after both ledgers are -// fully ingested. -func TestRunHot_Ledgers_Readback(t *testing.T) { +// TestRunHot_AllTypes_Readback runs the RunHot driver with the injected SHARED +// hot DB (decision (a)) over event/tx-bearing ledgers and asserts every CF +// reads back. The short stream ends early so RunHot returns the completeness +// error after both ledgers are fully ingested. +func TestRunHot_AllTypes_Readback(t *testing.T) { chunkID := chunk.ID(0) first := chunkID.FirstLedger() logger := testLogger() @@ -719,14 +968,14 @@ func TestRunHot_Ledgers_Readback(t *testing.T) { require.NoError(t, err) defer func() { require.NoError(t, db.Close()) }() - seqA, seqB := first, first+1 - rawA := marshalLCM(t, seqA) - rawB := marshalLCM(t, seqB) + evSeqA, evSeqB := first, first+1 + rawA, hashA, termA := marshalLCMWithEvent(t, evSeqA) + rawB, hashB, _ := marshalLCMWithEvent(t, evSeqB) gen := func(tt *testing.T, seq uint32) []byte { switch seq { - case seqA: + case evSeqA: return rawA - case seqB: + case evSeqB: return rawB default: return marshalLCM(tt, seq) @@ -735,18 +984,27 @@ func TestRunHot_Ledgers_Readback(t *testing.T) { stream := &fakeStream{t: t, count: 2, gen: gen} stores := HotStores{HotDB: db} - cfg := Config{Ledgers: true} + cfg := Config{Ledgers: true, Txhash: true, Events: true} err = RunHot(context.Background(), logger, sourceOf(stream), chunkID, stores, nil, cfg) require.Error(t, err) require.Contains(t, err.Error(), "ended at") - gotRawA, err := db.Ledgers().GetLedgerRaw(seqA) + gotRawA, err := db.Ledgers().GetLedgerRaw(evSeqA) require.NoError(t, err) require.Equal(t, rawA, gotRawA) - gotRawB, err := db.Ledgers().GetLedgerRaw(seqB) + + gotA, err := db.Txhash().Get(hashA) require.NoError(t, err) - require.Equal(t, rawB, gotRawB) + require.Equal(t, evSeqA, gotA) + gotB, err := db.Txhash().Get(hashB) + require.NoError(t, err) + require.Equal(t, evSeqB, gotB) + + bm, err := db.Events().Lookup(context.Background(), termA) + require.NoError(t, err) + require.NotNil(t, bm) + require.Equal(t, uint64(2), bm.GetCardinality(), "both sentinel events share the term") } // TestRunHot_MissingStore asserts RunHot rejects an enabled type with a nil @@ -921,6 +1179,69 @@ func TestRunCold_CustomSource_Extensibility(t *testing.T) { require.Equal(t, marshalLCM(t, first), raw) } +// TestRunCold_TxhashCold_Bin runs the cold txhash driver over a chunk whose +// sentinel ledgers carry one tx each and asserts the .bin entry count. +func TestRunCold_TxhashCold_Bin(t *testing.T) { + chunkID := chunk.ID(0) + first := chunkID.FirstLedger() + coldDir := t.TempDir() + logger := testLogger() + + txSeqs := map[uint32]bool{first: true, first + 1: true} + gen := func(tt *testing.T, seq uint32) []byte { + if txSeqs[seq] { + raw, _, _ := marshalLCMWithEvent(tt, seq) + return raw + } + return marshalLCM(tt, seq) + } + + require.NoError(t, RunCold( + context.Background(), logger, customSource{t: t, gen: gen}, coldDir, chunkID, 1, 1, nil, Config{Txhash: true}, + )) + + entries, err := txhash.ReadColdBin(txhashBinPath(filepath.Join(coldDir, dataTypeTxhash))) + require.NoError(t, err) + require.Len(t, entries, len(txSeqs)) +} + +// TestRunCold_EventsCold_Readback runs the cold events driver over a chunk whose +// sentinel ledgers carry one event each and resolves the term post-Finalize. +func TestRunCold_EventsCold_Readback(t *testing.T) { + chunkID := chunk.ID(0) + first := chunkID.FirstLedger() + coldDir := t.TempDir() + logger := testLogger() + + evSeqs := map[uint32]bool{first: true, first + 1: true} + var term events.TermKey + gen := func(tt *testing.T, seq uint32) []byte { + if evSeqs[seq] { + raw, _, tk := marshalLCMWithEvent(tt, seq) + term = tk + return raw + } + return marshalLCM(tt, seq) + } + + require.NoError(t, RunCold( + context.Background(), logger, customSource{t: t, gen: gen}, coldDir, chunkID, 1, 1, nil, Config{Events: true}, + )) + + bucketDir := filepath.Join(coldDir, "events", chunkID.BucketID()) + cr, err := eventstore.OpenColdReader(chunkID, bucketDir, eventstore.ColdReaderOptions{}) + require.NoError(t, err) + defer func() { require.NoError(t, cr.Close()) }() + + cnt, err := cr.EventCount() + require.NoError(t, err) + require.Equal(t, uint32(len(evSeqs)), cnt) + bm, err := cr.Lookup(context.Background(), term) + require.NoError(t, err) + require.NotNil(t, bm) + require.Equal(t, uint64(len(evSeqs)), bm.GetCardinality()) +} + // ───────────────────────── drain seq guard (P0-1) ───────────────────────── // TestRunCold_OutOfOrderSeq_NoArtifact feeds a stream that yields a ledger out @@ -957,6 +1278,37 @@ func TestRunCold_OutOfOrderSeq_NoArtifact(t *testing.T) { require.True(t, os.IsNotExist(statErr), "expected no cold artifact at %s, stat err: %v", path, statErr) } +// TestDrain_TxhashSeqGuard asserts the guard also fires on the txhash path, +// where a wrong-but-right-count sequence would otherwise be silently absorbed +// (each ledger keys on its own LCM seq). +func TestDrain_TxhashSeqGuard(t *testing.T) { + chunkID := chunk.ID(0) + first := chunkID.FirstLedger() + last := chunkID.LastLedger() + coldDir := t.TempDir() + logger := testLogger() + + seqs := make([]uint32, 0, last-first+1) + for s := first; s <= last; s++ { + seqs = append(seqs, s) + } + require.GreaterOrEqual(t, len(seqs), 2) + // Corrupt the SECOND ledger so at least one valid ledger is ingested + // before the guard fires. + seqs[1] += 100 + + err := RunCold( + context.Background(), logger, sourceOf(&seqStream{t: t, seqs: seqs}), coldDir, chunkID, 1, 1, nil, + Config{Txhash: true}, + ) + require.Error(t, err) + require.Contains(t, err.Error(), "yielded ledger") + + binPath := txhashBinPath(filepath.Join(coldDir, dataTypeTxhash)) + _, statErr := os.Stat(binPath) + require.True(t, os.IsNotExist(statErr), "expected no .bin at %s, stat err: %v", binPath, statErr) +} + // TestRunCold_DrainStreamError_NoArtifact exercises the drain mid-stream error // path: the backend yields valid ledgers, then hands back (nil, err) at a seq in // the middle of the chunk. drain must wrap the error with RawLedgers + the seq, @@ -986,6 +1338,11 @@ func TestRunCold_DrainStreamError_NoArtifact(t *testing.T) { require.True(t, os.IsNotExist(statErr), "expected no cold artifact at %s, stat err: %v", path, statErr) } +// The txhash .bin codec itself — atomic publish, create/rename failure +// cleanup, layout, and the reader round-trip — is owned and tested by +// pkg/stores/txhash (cold_bin_test.go); these tests only cover the +// ingester-level behavior on top of it. + // ───────────────────────── HotService failure path (P1-c) ───────────────────────── // TestHotService_IngestFailureStillEmitsTotal asserts a failed shared-DB ingest @@ -1000,13 +1357,89 @@ func TestHotService_IngestFailureStillEmitsTotal(t *testing.T) { require.NoError(t, db.Close()) // closed DB makes IngestLedger fail sink := &testSink{} - service := NewHotService(db, hotchunk.Ingest{Ledgers: true}, sink) + service := NewHotService(db, hotchunk.Ingest{Ledgers: true, Txhash: true, Events: true}, sink) err = service.Ingest(context.Background(), chunk.ID(0).FirstLedger(), viewOf(t, chunk.ID(0).FirstLedger())) require.Error(t, err) require.Equal(t, 1, sink.hotLedgerTotals, "HotLedgerTotal fires exactly once even on failure") } +// TestHotIngester_Failure_RecordsErrorMetric drives a REAL hot ingester +// (eventsHot, built via NewEventsHotIngester) with a malformed view so its own +// Ingest fails through the production hotMetrics emit path — unlike the +// failingHot/blockingHot stubs, which bypass hotMetrics entirely. Per #765 a +// failed hot Ingest must record exactly one HotIngest carrying a non-nil error +// for that data type. Mirrors the cold-side TestColdIngester_Failure_RecordsErrorMetric. +func TestHotIngester_Failure_RecordsErrorMetric(t *testing.T) { + chunkID := chunk.ID(0) + logger := testLogger() + dir := t.TempDir() + sink := &testSink{} + + store, err := eventstore.OpenHotStore(dir, chunkID, logger) + require.NoError(t, err) + defer func() { require.NoError(t, store.Close()) }() + + ing := NewEventsHotIngester(store, sink) + + // A truncated/garbage view makes the event extraction fail inside the real + // Ingest, so the deferred hotMetrics.emit reports the wrapped error. + bad := xdr.LedgerCloseMetaView([]byte{0x00, 0x01, 0x02}) + require.Error(t, ing.Ingest(context.Background(), chunkID.FirstLedger(), bad)) + + sink.mu.Lock() + defer sink.mu.Unlock() + require.Len(t, sink.hotIngests, 1, "exactly one HotIngest recorded") + require.Equal(t, dataTypeEvents, sink.hotIngests[0].dataType) + require.Error(t, sink.hotIngests[0].err, "the recorded HotIngest carries the ingest error") +} + +// ───────────────────────── cold txhash .bin content (P1-d) ───────────────────────── + +// TestTxhashColdIngester_BinContent ingests two tx-bearing ledgers, finalizes, +// then reads the .bin back through the store codec and asserts the contract +// the deferred streamhash builder relies on: each key == the fixture tx hash +// truncated to txhash.ColdKeySize (pinned to streamhash.MinKeySize by the +// codec), each seq == the ledger it was ingested in, and entries are in +// non-decreasing key order. +func TestTxhashColdIngester_BinContent(t *testing.T) { + chunkID := chunk.ID(0) + first := chunkID.FirstLedger() + coldDir := t.TempDir() + + ing, err := NewTxhashColdIngester(coldDir, chunkID, nil) + require.NoError(t, err) + defer func() { require.NoError(t, ing.Close()) }() + + // Capture each fixture hash + the seq it was ingested in. + wantSeqByKey := map[[txhash.ColdKeySize]byte]uint32{} + for _, seq := range []uint32{first, first + 1} { + raw, hash, _ := marshalLCMWithEvent(t, seq) + var key [txhash.ColdKeySize]byte + copy(key[:], hash[:txhash.ColdKeySize]) + wantSeqByKey[key] = seq + require.NoError(t, ing.Ingest(context.Background(), seq, xdr.LedgerCloseMetaView(raw))) + } + require.NoError(t, ing.Finalize(context.Background())) + + entries, err := txhash.ReadColdBin(txhashBinPath(coldDir)) + require.NoError(t, err) + require.Len(t, entries, 2) + + var prevKey [txhash.ColdKeySize]byte + for i, e := range entries { + wantSeq, known := wantSeqByKey[e.Key] + require.True(t, known, "entry %d key %x is not one of the ingested fixture hashes", i, e.Key) + require.Equal(t, wantSeq, e.Seq, "entry %d seq must equal the ledger it was ingested in", i) + + if i > 0 { + require.LessOrEqual(t, bytes.Compare(prevKey[:], e.Key[:]), 0, + "entries must be in non-decreasing key order") + } + prevKey = e.Key + } +} + // ───────────────────────── OpenStream failure through the driver (P1-e) ───────────────────────── var errOpenStream = errors.New("induced OpenStream failure") @@ -1080,8 +1513,9 @@ func TestRunHot_OpenStreamError(t *testing.T) { // TestRunHot_ChunkIDMismatch asserts RunHot rejects an injected shared hot DB // bound to a different chunk than the one being ingested, with a clear up-front -// error (rather than silently interleaving two chunks' data into one DB). The -// shared DB is chunk-bound (decision (a)). +// error (rather than silently interleaving two chunks' data into one DB, or a +// later per-ledger out-of-range on the events CF). The shared DB is chunk-bound +// (decision (a)). func TestRunHot_ChunkIDMismatch(t *testing.T) { ingestChunk := chunk.ID(1) storeChunk := chunk.ID(0) @@ -1092,7 +1526,7 @@ func TestRunHot_ChunkIDMismatch(t *testing.T) { defer func() { require.NoError(t, db.Close()) }() err = RunHot(context.Background(), logger, sourceOf(&fakeStream{t: t, count: 1}), ingestChunk, - HotStores{HotDB: db}, nil, Config{Ledgers: true}) + HotStores{HotDB: db}, nil, Config{Ledgers: true, Txhash: true, Events: true}) require.Error(t, err) require.Contains(t, err.Error(), "bound to chunk 0") require.Contains(t, err.Error(), "RunHot chunk 1") @@ -1219,19 +1653,79 @@ func countCleanColdIngests(s *testSink) int { return n } +// TestBuildColdIngesters_RollbackNoPhantomMetric makes a LATER constructor +// (txhash) fail by planting a regular file at the txhash per-type directory, +// so the constructor's own MkdirAll fails. The earlier-built ledger ingester +// is rolled back via closeColdAll, which must NOT emit a phantom success +// ColdIngest — the recorded ledger metric (if any) must carry the abort +// error, never a clean (nil-err, 0-items) success. +func TestBuildColdIngesters_RollbackNoPhantomMetric(t *testing.T) { + chunkID := chunk.ID(0) + coldDir := t.TempDir() + sink := &testSink{} + + // Plant a regular FILE where the txhash per-type directory must be + // created: the ledger ingester builds first, then NewTxhashColdIngester + // fails its bucket-dir MkdirAll. + require.NoError(t, os.WriteFile(filepath.Join(coldDir, dataTypeTxhash), []byte("not a dir"), 0o644)) + + _, err := buildColdIngesters(coldDir, chunkID, sink, Config{Ledgers: true, Txhash: true}) + require.Error(t, err, "txhash constructor must fail on the planted file") + + // The ledger ingester was built then rolled back. No phantom SUCCESS metric: + // any recorded ledger ColdIngest must carry an error. + cdt := sink.coldDataTypes() + if cdt[dataTypeLedgers] > 0 { + require.Equal(t, cdt[dataTypeLedgers], sink.coldErrorTypes()[dataTypeLedgers], + "rolled-back ledger ingester must not emit a phantom success ColdIngest") + } + // And the success-only assertion: there must be zero clean (nil-err) cold + // ingest signals recorded. + require.Zero(t, countCleanColdIngests(sink), "no clean ColdIngest on the rollback path") +} + +// TestBuildColdIngesters_RollbackLaterFailure_TxhashAborts makes the LAST +// constructor (events) fail AFTER both the ledger AND txhash ingesters were +// already built, so closeColdAll rolls back two ingesters. It asserts the txhash +// ingester (which DOES implement abortMetric) emits an error-carrying — not a +// clean-success — ColdIngest, complementing the ledger-only abort coverage above. +func TestBuildColdIngesters_RollbackLaterFailure_TxhashAborts(t *testing.T) { + chunkID := chunk.ID(0) + coldDir := t.TempDir() + sink := &testSink{} + + // Plant a directory at the events.pack path: the ledger and txhash + // ingesters build first, then NewEventsColdIngester fails opening the + // pack over the directory. + packPath := filepath.Join(coldDir, dataTypeEvents, chunkID.BucketID(), eventstore.EventsPackName(chunkID)) + require.NoError(t, os.MkdirAll(packPath, 0o755)) + + _, err := buildColdIngesters(coldDir, chunkID, sink, + Config{Ledgers: true, Txhash: true, Events: true}) + require.Error(t, err, "events constructor must fail on the planted directory") + + // The txhash ingester was built then rolled back: its recorded ColdIngest must + // carry the abort error, never a clean success. + cdt := sink.coldDataTypes() + require.Equal(t, 1, cdt[dataTypeTxhash], "rolled-back txhash ingester emits one ColdIngest") + require.Equal(t, 1, sink.coldErrorTypes()[dataTypeTxhash], + "the rolled-back txhash ColdIngest must carry the abort error") + + // No phantom clean success on the rollback path for any ingester. + require.Zero(t, countCleanColdIngests(sink), "no clean ColdIngest on the rollback path") +} + // TestRunCold_ConstructorFailure_EmitsAggregate drives a constructor failure // through RunCold (not buildColdIngesters directly) and asserts the chunk // attempt still produces its single aggregate ColdChunkTotal — the invariant -// is one aggregate per chunk attempt, including pre-service failures. The -// rolled-back ledger ingester must not emit a phantom clean-success ColdIngest. +// is one aggregate per chunk attempt, including pre-service failures. func TestRunCold_ConstructorFailure_EmitsAggregate(t *testing.T) { chunkID := chunk.ID(0) coldDir := t.TempDir() logger := testLogger() sink := &testSink{} - // Plant a regular file where the ledgers per-type subdir must be created so - // the ledger cold constructor's MkdirAll fails. + // Plant a regular file where the ledgers per-type subdir must be created. require.NoError(t, os.WriteFile(filepath.Join(coldDir, dataTypeLedgers), []byte("not a dir"), 0o644)) err := RunCold( @@ -1244,6 +1738,68 @@ func TestRunCold_ConstructorFailure_EmitsAggregate(t *testing.T) { require.Zero(t, countCleanColdIngests(sink), "no clean ColdIngest on the rollback path") } +// ───────────────────────── events Finish-then-WriteColdIndex failure ───────────────────────── + +// TestEventsCold_FinishThenIndexFails_LeavesInertPack forces WriteColdIndex to +// fail AFTER writer.Finish has committed events.pack, by planting a directory +// where the index.hash file must be written (buildMPHF then hits EISDIR). +// Finalize must surface the error; the index-less events.pack stays on disk — +// without the orchestrator's completion record it is inert scratch (see the +// package doc's artifact model), and a retry's overwrite is the cleanup. +func TestEventsCold_FinishThenIndexFails_LeavesInertPack(t *testing.T) { + chunkID := chunk.ID(0) + first := chunkID.FirstLedger() + coldDir := t.TempDir() + + ing, err := NewEventsColdIngester(coldDir, chunkID, nil) + require.NoError(t, err) + + // Ingest one event-bearing ledger so the mirror is non-empty (an empty + // build set would take the valid empty-index path instead of buildMPHF). + rawEv, _, _ := marshalLCMWithEvent(t, first) + require.NoError(t, ing.Ingest(context.Background(), first, xdr.LedgerCloseMetaView(rawEv))) + + // Plant a DIRECTORY where index.hash must be written → buildMPHF fails. + bucketDir := filepath.Join(coldDir, chunkID.BucketID()) + indexHashPath := filepath.Join(bucketDir, eventstore.IndexHashName(chunkID)) + require.NoError(t, os.Mkdir(indexHashPath, 0o755)) + + ferr := ing.Finalize(context.Background()) + require.Error(t, ferr, "Finalize must fail when WriteColdIndex fails") + require.Contains(t, ferr.Error(), "WriteColdIndex") + + // The committed events.pack stays in place as inert scratch (Finish ran, + // so the later Close does not drop it either). + packPath := filepath.Join(bucketDir, eventstore.EventsPackName(chunkID)) + _, statErr := os.Stat(packPath) + require.NoError(t, statErr, "the index-less events.pack stays on disk after WriteColdIndex failure") + + // Close is still safe/idempotent afterwards and does not remove the pack. + require.NoError(t, ing.Close()) + _, statErr = os.Stat(packPath) + require.NoError(t, statErr, "Close after a committed Finish must not drop the pack") +} + +// TestEventsCold_FinalizeAfterFailedIngest_Refuses asserts the failed-Ingest +// latch: once an Ingest errors (here via a malformed view), Finalize must +// refuse rather than commit a pack+index whose mirror may be ahead of the +// offsets commit point. +func TestEventsCold_FinalizeAfterFailedIngest_Refuses(t *testing.T) { + chunkID := chunk.ID(0) + coldDir := t.TempDir() + + ing, err := NewEventsColdIngester(coldDir, chunkID, nil) + require.NoError(t, err) + defer func() { require.NoError(t, ing.Close()) }() + + bad := xdr.LedgerCloseMetaView([]byte{0x00, 0x01, 0x02}) + require.Error(t, ing.Ingest(context.Background(), chunkID.FirstLedger(), bad)) + + ferr := ing.Finalize(context.Background()) + require.Error(t, ferr) + require.Contains(t, ferr.Error(), "Finalize after failed Ingest") +} + // ───────────────────────── ColdService.Finalize first-error ───────────────────────── // finalizeErrCold is a ColdIngester whose Finalize errors; it records whether diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/service.go b/cmd/stellar-rpc/internal/fullhistory/ingest/service.go index 56cb19946..c5447f75a 100644 --- a/cmd/stellar-rpc/internal/fullhistory/ingest/service.go +++ b/cmd/stellar-rpc/internal/fullhistory/ingest/service.go @@ -21,12 +21,14 @@ func errOrFirst(prev, cur error) error { return cur } -// HotService commits one ledger to the per-chunk hot DB as ONE atomic, synced -// WriteBatch (decision (a)) and emits the per-ledger wall-clock plus per-type -// volume signals via the sink. +// HotService commits one ledger to the shared per-chunk hot DB as ONE atomic, +// synced WriteBatch across all enabled CFs (decision (a)) and emits the +// per-ledger wall-clock plus per-type volume signals via the sink. // -// A ledger is fully present or fully absent because it commits in a single -// WriteBatch (hotchunk.DB.IngestLedger). +// There is no fan-out: the three data types are column families of ONE RocksDB +// instance, and a ledger is fully present or fully absent because every CF +// commits in the same WriteBatch (hotchunk.DB.IngestLedger). This replaces the +// old errgroup that committed three independent per-store writes concurrently. type HotService struct { db *hotchunk.DB cfg hotchunk.Ingest @@ -64,6 +66,9 @@ func (s *HotService) emit(counts hotchunk.LedgerCounts, d time.Duration, err err if s.cfg.Ledgers { s.sink.HotIngest(dataTypeLedgers, d, itemsOnSuccess(counts.Ledgers, err), err) } + if s.cfg.Txhash { + s.sink.HotIngest(dataTypeTxhash, d, itemsOnSuccess(counts.Txhash, err), err) + } if s.cfg.Events { s.sink.HotIngest(dataTypeEvents, d, itemsOnSuccess(counts.Events, err), err) } diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk/hotchunk.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk/hotchunk.go index 5b4949a40..695135b9e 100644 --- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk/hotchunk.go +++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk/hotchunk.go @@ -1,18 +1,24 @@ // Package hotchunk implements decision (a): the per-chunk hot tier is -// ONE RocksDB instance holding the ledger column family, and each ledger -// commits as ONE atomic, synced WriteBatch. There is a SINGLE per-chunk -// watermark (the max committed ledger seq, authoritative from the -// ledgers CF's last key), with no per-store frontier markers. +// ONE RocksDB instance holding the union of every hot data type's +// column families — the ledger CF, the three events CFs, and the 16 +// nibble-routed txhash CFs — and each ledger commits as ONE atomic, +// synced WriteBatch across ALL of those CFs. A ledger is therefore +// fully present or fully absent; there is a SINGLE per-chunk watermark +// (the max committed ledger seq, authoritative from the ledgers CF's +// last key), with no per-store frontier markers and no min-of-three. // -// The typed ledger facade (ledger.HotStore) is composed over the shared -// store via its NewWithStore constructor and keeps its existing read API -// for downstream (#770). Its write path is expressed as Puts queued into -// the shared batch, which commits once. +// The three typed facades (ledger.HotStore, txhash.HotStore, +// eventstore.HotStore) are composed over the one shared store via their +// NewWithStore constructors and keep their existing read APIs for +// downstream (#770). Their write paths are expressed as Puts queued +// into the shared batch, which is the whole point: it lets one batch +// span all CFs and commit once. package hotchunk import ( "fmt" + sdkingest "github.com/stellar/go-stellar-sdk/ingest" supportlog "github.com/stellar/go-stellar-sdk/support/log" "github.com/stellar/go-stellar-sdk/xdr" @@ -22,49 +28,61 @@ import ( "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash" ) -// DB is one chunk's hot tier: a single rocksdb.Store plus the typed -// ledger facade composed over it. It owns the store's lifecycle (Close -// closes it exactly once); the facade wraps it without owning it. +// DB is one chunk's hot tier: a single multi-CF rocksdb.Store plus the +// three typed facades composed over it. It owns the store's lifecycle +// (Close closes it exactly once); the facades wrap it without owning it. // // Concurrency: ingestion is single-writer (the daemon's per-chunk // ingestion loop). IngestLedger is not safe to call concurrently with -// itself. Reads via the facade follow its own concurrency contract and -// are safe alongside the single writer. +// itself. Reads via the facades follow each facade's own concurrency +// contract and are safe alongside the single writer. type DB struct { store *rocksdb.Store chunkID chunk.ID ledger *ledger.HotStore + txhash *txhash.HotStore events *eventstore.HotStore } -// columnFamilies returns the CF list for the per-chunk DB: the ledger CF -// plus the three events CFs. Names are non-colliding across the facades -// ("ledgers"; "events_data"/"events_index"/"events_offsets"). +// columnFamilies returns the full CF list for the shared per-chunk DB: +// the ledger CF, the three events CFs, and the 16 txhash CFs. Names are +// already non-colliding across the three facades ("ledgers"; +// "events_data"/"events_index"/"events_offsets"; "cf-0".."cf-f"). func columnFamilies() []string { cfs := make([]string, 0, 1+len(eventstore.CFNames())) cfs = append(cfs, ledger.LedgersCF) cfs = append(cfs, eventstore.CFNames()...) + cfs = append(cfs, txhash.CFNames()...) return cfs } -// config builds the per-chunk store's rocksdb.Config. It rides on -// RocksDB's defaults (zero Tuning) — the same choice ledger.OpenHotStore -// makes for the standalone ledger store: no explicit block cache, bloom -// filter, or WAL cap. Re-tune only with a workload measurement. +// config builds the shared store's rocksdb.Config. Per-CF options come +// from the events facade (ZSTD on DataCF, tuned block sizes); the +// DB-wide + per-CF tuning the txhash workload calibrated (block cache, +// background jobs, WAL cap, bloom, write-buffer sizing) is applied via +// Tuning. The global Tuning's per-CF fields (write buffer, bloom) apply +// to every CF; this is a deliberate, benign over-application — the +// ledger and events CFs simply gain a bloom filter and larger write +// buffer. Per-CF compression/block-size overrides keep events' tuning +// distinct. func config(path string, logger *supportlog.Entry) rocksdb.Config { return rocksdb.Config{ Path: path, ColumnFamilies: columnFamilies(), Logger: logger, + Tuning: txhash.Tuning(), PerCFOptions: eventstore.CFOptions(), } } -// Open opens (or creates) the chunk's hot DB at path and composes the -// ledger facade over it. path and logger are required. +// Open opens (or creates) the chunk's single shared multi-CF hot DB at +// path and composes the three facades over it. path and logger are +// required. On any facade-construction failure (only events' warmup can +// fail) the shared store is closed before returning. func Open(path string, chunkID chunk.ID, logger *supportlog.Entry) (*DB, error) { if path == "" { return nil, stores.ErrInvalidConfig @@ -86,6 +104,7 @@ func Open(path string, chunkID chunk.ID, logger *supportlog.Entry) (*DB, error) store: store, chunkID: chunkID, ledger: ledger.NewWithStore(store, chunkID), + txhash: txhash.NewWithStore(store, chunkID), events: es, }, nil } @@ -96,6 +115,9 @@ func (d *DB) ChunkID() chunk.ID { return d.chunkID } // Ledgers returns the ledger read/write facade over the shared store. func (d *DB) Ledgers() *ledger.HotStore { return d.ledger } +// Txhash returns the txhash read/write facade over the shared store. +func (d *DB) Txhash() *txhash.HotStore { return d.txhash } + // Events returns the events read/write facade over the shared store. func (d *DB) Events() *eventstore.HotStore { return d.events } @@ -106,7 +128,10 @@ func (d *DB) Close() error { return d.store.Close() } // MaxCommittedSeq returns the single authoritative per-chunk watermark: // the highest ledger seq durably committed, read from the ledgers CF's -// last key. ok=false on an empty DB (no ledger committed yet). +// last key. Because every ledger commits as ONE atomic synced batch +// across all CFs (decision (a)), this one value pins the frontier of +// EVERY CF — events and txhash never trail or lead the ledgers CF. +// ok=false on an empty DB (no ledger committed yet). func (d *DB) MaxCommittedSeq() (uint32, bool, error) { return d.ledger.LastSeq() } @@ -116,33 +141,62 @@ func (d *DB) MaxCommittedSeq() (uint32, bool, error) { // dependency on the ingest package (which depends on the stores). type Ingest struct { Ledgers bool + Txhash bool Events bool } // LedgerCounts reports how many items each data type contributed to one -// IngestLedger call: 1 ledger (when Ledgers enabled). Lets the caller -// (HotService) emit per-type volume metrics without re-deriving them. +// IngestLedger call: 1 ledger (when Ledgers enabled), the tx-hash count, +// and the event-payload count. Lets the caller (HotService) emit +// per-type volume metrics without re-deriving them. type LedgerCounts struct { Ledgers int + Txhash int Events int } -// IngestLedger commits ONE ledger to the hot DB as a SINGLE atomic, -// synced WriteBatch (decision (a)). It queues the ledger row into one -// rocksdb.BatchWriter and commits once (sync=true via the store's pinned -// WriteOptions). The single watermark advances atomically. +// IngestLedger commits ONE ledger to the shared hot DB as a SINGLE +// atomic, synced WriteBatch across all enabled CFs (decision (a)). It +// extracts each enabled type's rows from lcm, queues them all into one +// rocksdb.BatchWriter, commits once (sync=true via the store's pinned +// WriteOptions), and only then applies the events facade's in-memory +// mirror/offsets update. A ledger is therefore fully present across +// every CF or fully absent — there is no partial, no per-store +// ordering, and the single watermark advances atomically. // // seq is the driver-validated sequence of lcm. lcm is a borrowed, -// zero-copy view: the ledger bytes are copied into the batch -// synchronously, so the view need not outlive this call. +// zero-copy view: every extractor below copies what it retains (the +// ledger bytes and tx hashes are copied into the batch synchronously; +// the events payloads' bytes are marshaled into fresh buffers in the +// prepare step), so the view need not outlive this call. +// +// If the events ledger is an idempotent duplicate (already committed), +// its prepare step contributes nothing and the apply hook is nil; the +// other CFs still write their (upsert-keyed) rows, matching the merged +// per-store idempotent-retry semantics. func (d *DB) IngestLedger(seq uint32, lcm xdr.LedgerCloseMetaView, cfg Ingest) (LedgerCounts, error) { var counts LedgerCounts if d.store.IsClosed() { return counts, stores.ErrStoreClosed } - // Pre-extract the events payloads BEFORE opening the batch, so a decode - // error rejects the ledger without a half-built batch. + // Pre-extract everything that can fail BEFORE opening the batch, so a + // decode error rejects the ledger without a half-built batch. + var txEntries []txhash.Entry + if cfg.Txhash { + hashes, err := sdkingest.ExtractTxHashes(lcm) + if err != nil { + return counts, fmt.Errorf("hotchunk: extract tx hashes seq %d: %w", seq, err) + } + if len(hashes) > 0 { + txEntries = make([]txhash.Entry, len(hashes)) + for i, h := range hashes { + txEntries[i] = txhash.Entry{Hash: [32]byte(h), LedgerSeq: seq} + } + } + counts.Txhash = len(hashes) + } + var payloads []events.Payload if cfg.Events { p, err := eventPayloads(seq, lcm) @@ -156,9 +210,9 @@ func (d *DB) IngestLedger(seq uint32, lcm xdr.LedgerCloseMetaView, cfg Ingest) ( counts.Ledgers = 1 } - // The events facade validates sequence/order and marshals up front so a - // rejected events ledger never touches the shared batch; it returns the - // post-commit apply hook (nil for an idempotent duplicate). + // The events facade validates sequence/order and marshals up front so + // a rejected events ledger never touches the shared batch; it returns + // the post-commit apply hook (nil for an idempotent duplicate). var applyEvents func() cerr := d.store.Batch(func(b *rocksdb.BatchWriter) error { if cfg.Ledgers { @@ -166,6 +220,11 @@ func (d *DB) IngestLedger(seq uint32, lcm xdr.LedgerCloseMetaView, cfg Ingest) ( return fmt.Errorf("hotchunk: queue ledger seq %d: %w", seq, err) } } + if cfg.Txhash && len(txEntries) > 0 { + if err := d.txhash.AddEntriesToBatch(b, txEntries); err != nil { + return fmt.Errorf("hotchunk: queue tx hashes seq %d: %w", seq, err) + } + } if cfg.Events { apply, err := d.events.IngestLedgerToBatch(b, seq, payloads) if err != nil { diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk/hotchunk_test.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk/hotchunk_test.go index ca296a00b..71ea3452b 100644 --- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk/hotchunk_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk/hotchunk_test.go @@ -19,6 +19,7 @@ import ( "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash" ) const testPassphrase = "Public Global Stellar Network ; September 2015" @@ -37,7 +38,7 @@ func openTestDB(t *testing.T, chunkID chunk.ID) *DB { return db } -func allTypes() Ingest { return Ingest{Ledgers: true, Events: true} } +func allTypes() Ingest { return Ingest{Ledgers: true, Txhash: true, Events: true} } func TestOpen_ValidatesInputs(t *testing.T) { _, err := Open("", chunk.ID(0), silentLogger()) @@ -47,20 +48,29 @@ func TestOpen_ValidatesInputs(t *testing.T) { require.ErrorIs(t, err, stores.ErrInvalidConfig) } -func TestColumnFamilies_IsLedgerAndEventsCFs(t *testing.T) { +func TestColumnFamilies_UnionIsNonColliding(t *testing.T) { cfs := columnFamilies() - // 1 ledger CF + 3 events CFs. - require.Len(t, cfs, 1+len(eventstore.CFNames())) - require.Equal(t, ledger.LedgersCF, cfs[0]) + // 1 ledger CF + 3 events CFs + 16 txhash CFs = 20. + require.Len(t, cfs, 1+len(eventstore.CFNames())+len(txhash.CFNames())) + seen := map[string]bool{} + for _, cf := range cfs { + require.False(t, seen[cf], "CF name %q collides across facades", cf) + seen[cf] = true + } + require.Contains(t, seen, ledger.LedgersCF) for _, cf := range eventstore.CFNames() { - require.Contains(t, cfs, cf) + require.Contains(t, seen, cf) + } + for _, cf := range txhash.CFNames() { + require.Contains(t, seen, cf) } } -// TestIngestLedger_LedgerCommittedAndWatermarkAdvances is the core decision-(a) -// happy path: one IngestLedger call writes the ledger into the hot DB, and the -// single watermark reaches exactly the committed seq. -func TestIngestLedger_LedgerCommittedAndWatermarkAdvances(t *testing.T) { +// TestIngestLedger_AllCFsAdvanceTogether is the core decision-(a) happy path: +// one IngestLedger call writes the ledger, its tx hash, and its event into the +// ONE shared DB, and the single watermark reaches exactly the committed seq — +// every CF readable, every CF in lockstep. +func TestIngestLedger_AllCFsAdvanceTogether(t *testing.T) { chunkID := chunk.ID(0) first := chunkID.FirstLedger() db := openTestDB(t, chunkID) @@ -70,21 +80,34 @@ func TestIngestLedger_LedgerCommittedAndWatermarkAdvances(t *testing.T) { require.NoError(t, err) require.False(t, ok) - rawA := zeroTxLCM(t, first) - rawB := zeroTxLCM(t, first+1) + rawA, hashA, termA := lcmWithEvent(t, first) + rawB, hashB, _ := lcmWithEvent(t, first+1) counts, err := db.IngestLedger(first, xdr.LedgerCloseMetaView(rawA), allTypes()) require.NoError(t, err) - assert.Equal(t, LedgerCounts{Ledgers: 1}, counts) + assert.Equal(t, LedgerCounts{Ledgers: 1, Txhash: 1, Events: 1}, counts) counts, err = db.IngestLedger(first+1, xdr.LedgerCloseMetaView(rawB), allTypes()) require.NoError(t, err) - assert.Equal(t, LedgerCounts{Ledgers: 1}, counts) + assert.Equal(t, LedgerCounts{Ledgers: 1, Txhash: 1, Events: 1}, counts) // ledgers CF. gotA, err := db.Ledgers().GetLedgerRaw(first) require.NoError(t, err) assert.Equal(t, rawA, gotA) + // txhash CFs. + seqA, err := db.Txhash().Get(hashA) + require.NoError(t, err) + assert.Equal(t, first, seqA) + seqB, err := db.Txhash().Get(hashB) + require.NoError(t, err) + assert.Equal(t, first+1, seqB) + // events CFs. + bm, err := db.Events().Lookup(context.Background(), termA) + require.NoError(t, err) + require.NotNil(t, bm) + assert.Equal(t, uint64(2), bm.GetCardinality(), "both ledgers share the event term") + assert.Equal(t, uint32(2), db.Events().NextEventID()) // The single authoritative watermark equals the last committed seq. maxSeq, ok, err := db.MaxCommittedSeq() @@ -93,11 +116,48 @@ func TestIngestLedger_LedgerCommittedAndWatermarkAdvances(t *testing.T) { assert.Equal(t, first+1, maxSeq) } -// TestIngestLedger_DurableAcrossReopen confirms a committed ledger survives a -// close/reopen (sync=true durability), and that a commit into a CLOSED store -// fails and leaves nothing behind — the single synced WriteBatch is -// all-or-nothing. -func TestIngestLedger_DurableAcrossReopen(t *testing.T) { +// TestIngestLedger_RejectedLedgerPersistsNothingAcrossAnyCF is the atomicity +// guarantee for decision (a): a ledger the events facade rejects (here an +// out-of-range seq) must leave EVERY CF untouched — the ledgers and txhash CFs +// included — because the whole ledger is one batch and the events facade's +// validation aborts that batch before commit. The single watermark must not +// advance. +func TestIngestLedger_RejectedLedgerPersistsNothingAcrossAnyCF(t *testing.T) { + chunkID := chunk.ID(0) + db := openTestDB(t, chunkID) + + // A ledger seq ABOVE the chunk's range: the events facade rejects it + // (ErrLedgerOutOfRange) from inside the batch callback, aborting the write. + badSeq := chunkID.LastLedger() + 1 + raw, hash, term := lcmWithEvent(t, badSeq) + + _, err := db.IngestLedger(badSeq, xdr.LedgerCloseMetaView(raw), allTypes()) + require.Error(t, err) + require.ErrorIs(t, err, eventstore.ErrLedgerOutOfRange) + + // NOTHING persisted, across every CF: + // ledgers CF — no row at badSeq. + _, gerr := db.Ledgers().GetLedgerRaw(badSeq) + require.ErrorIs(t, gerr, stores.ErrNotFound) + // txhash CFs — the hash is absent. + _, gerr = db.Txhash().Get(hash) + require.ErrorIs(t, gerr, stores.ErrNotFound) + // events CFs — no term indexed, no event committed. + _, lerr := db.Events().Lookup(context.Background(), term) + require.ErrorIs(t, lerr, eventstore.ErrTermNotFound) + assert.Equal(t, uint32(0), db.Events().NextEventID()) + + // The single watermark is still empty — nothing committed. + _, ok, err := db.MaxCommittedSeq() + require.NoError(t, err) + require.False(t, ok, "a rejected ledger must not advance the watermark") +} + +// TestIngestLedger_MidBatchCommitFailurePersistsNothing simulates a mid-batch +// COMMIT failure (the store closed under the writer) and asserts the partial +// batch persisted nothing across any CF after reopen — the single synced +// WriteBatch is all-or-nothing. +func TestIngestLedger_MidBatchCommitFailurePersistsNothing(t *testing.T) { chunkID := chunk.ID(0) first := chunkID.FirstLedger() dir := t.TempDir() @@ -106,7 +166,7 @@ func TestIngestLedger_DurableAcrossReopen(t *testing.T) { require.NoError(t, err) // Commit one good ledger so there is a known watermark, then close the DB. - rawGood := zeroTxLCM(t, first) + rawGood, hashGood, _ := lcmWithEvent(t, first) _, err = db.IngestLedger(first, xdr.LedgerCloseMetaView(rawGood), allTypes()) require.NoError(t, err) require.NoError(t, db.Close()) @@ -114,6 +174,7 @@ func TestIngestLedger_DurableAcrossReopen(t *testing.T) { // Reopen and confirm the watermark survived (sync=true durability). db2, err := Open(dir, chunkID, silentLogger()) require.NoError(t, err) + t.Cleanup(func() { _ = db2.Close() }) maxSeq, ok, err := db2.MaxCommittedSeq() require.NoError(t, err) @@ -123,12 +184,12 @@ func TestIngestLedger_DurableAcrossReopen(t *testing.T) { // Now close the DB and attempt to ingest the NEXT ledger into the closed // store: the commit fails, and nothing for that ledger persists anywhere. require.NoError(t, db2.Close()) - rawNext := zeroTxLCM(t, first+1) + rawNext, hashNext, _ := lcmWithEvent(t, first+1) _, err = db2.IngestLedger(first+1, xdr.LedgerCloseMetaView(rawNext), allTypes()) require.Error(t, err) - // Reopen a third time: the failed ledger left NO trace, and the watermark is - // still the last good seq. + // Reopen a third time: the failed ledger left NO trace in any CF, and the + // watermark is still the last good seq. db3, err := Open(dir, chunkID, silentLogger()) require.NoError(t, err) t.Cleanup(func() { _ = db3.Close() }) @@ -138,143 +199,128 @@ func TestIngestLedger_DurableAcrossReopen(t *testing.T) { require.True(t, ok) assert.Equal(t, first, maxSeq, "the failed ledger did not advance the watermark") - // The good ledger's data is intact; the failed ledger's is wholly absent. + // The events CF advanced for exactly the one good ledger — the failed + // ledger's event was not committed (warmup reconstructed the offsets from + // disk, which hold only the good ledger). + assert.Equal(t, uint32(1), db3.Events().NextEventID(), + "the failed ledger's event must not be committed to the events CFs") + + // The good ledger's data is intact; the failed ledger's is wholly absent + // across the ledgers and txhash CFs. _, gerr := db3.Ledgers().GetLedgerRaw(first + 1) require.ErrorIs(t, gerr, stores.ErrNotFound) + _, gerr = db3.Txhash().Get(hashNext) + require.ErrorIs(t, gerr, stores.ErrNotFound) gotGood, err := db3.Ledgers().GetLedgerRaw(first) require.NoError(t, err) assert.Equal(t, rawGood, gotGood) + _, err = db3.Txhash().Get(hashGood) + require.NoError(t, err) } -// TestSharedBatch_DirectRocksAbort is the lower-level atomicity proof: queue a -// Put into the ledger CF of the store, then return an error from the batch -// callback — RocksDB applies NONE of it. Pins the property the IngestLedger -// path relies on (atomicity of one WriteBatch). -func TestSharedBatch_DirectRocksAbort(t *testing.T) { +// TestSharedBatch_DirectRocksAbortAcrossCFs is the lower-level atomicity proof: +// queue Puts into DIFFERENT CFs of the shared store, then return an error from +// the batch callback — RocksDB applies NONE of them. Pins the property the +// IngestLedger path relies on (intra-store cross-CF atomicity of one +// WriteBatch). +func TestSharedBatch_DirectRocksAbortAcrossCFs(t *testing.T) { db := openTestDB(t, chunk.ID(0)) + var hash [32]byte + hash[0] = 0xa0 sentinelErr := assert.AnError err := storeOf(db).Batch(func(b *rocksdb.BatchWriter) error { b.Put(ledger.LedgersCF, rocksdb.EncodeUint32(2), []byte("ledger-row")) + b.Put(txhash.CFNames()[0xa], hash[:], rocksdb.EncodeUint32(2)) + b.Put(eventstore.DataCF, []byte{0, 0, 0, 0}, []byte("event-row")) return sentinelErr // abort: nothing should commit }) require.ErrorIs(t, err, sentinelErr) - // The CF did not receive the aborted write. + // None of the three CFs received the aborted writes. _, gerr := db.Ledgers().GetLedgerRaw(2) require.ErrorIs(t, gerr, stores.ErrNotFound) + _, gerr = db.Txhash().Get(hash) + require.ErrorIs(t, gerr, stores.ErrNotFound) _, ok, derr := db.MaxCommittedSeq() require.NoError(t, derr) require.False(t, ok) } -// storeOf exposes the store for the direct-batch atomicity test (same package, -// so no production accessor is needed). +// storeOf exposes the shared store for the direct-batch atomicity test (same +// package, so no production accessor is needed). func storeOf(db *DB) *rocksdb.Store { return db.store } -// TestIngestLedger_ClosedDBFails confirms a closed DB rejects ingest. -func TestIngestLedger_ClosedDBFails(t *testing.T) { +// TestIngestLedger_DisabledTypesUntouched confirms the Ingest toggles select +// which CFs the single batch writes: ledgers-only leaves txhash/events empty. +func TestIngestLedger_DisabledTypesUntouched(t *testing.T) { chunkID := chunk.ID(0) - db, err := Open(t.TempDir(), chunkID, silentLogger()) + first := chunkID.FirstLedger() + db := openTestDB(t, chunkID) + + raw, hash, term := lcmWithEvent(t, first) + counts, err := db.IngestLedger(first, xdr.LedgerCloseMetaView(raw), Ingest{Ledgers: true}) require.NoError(t, err) - require.NoError(t, db.Close()) + assert.Equal(t, LedgerCounts{Ledgers: 1}, counts) - raw := zeroTxLCM(t, chunkID.FirstLedger()) - _, err = db.IngestLedger(chunkID.FirstLedger(), xdr.LedgerCloseMetaView(raw), allTypes()) - require.ErrorIs(t, err, stores.ErrStoreClosed) + got, err := db.Ledgers().GetLedgerRaw(first) + require.NoError(t, err) + assert.Equal(t, raw, got) + + _, gerr := db.Txhash().Get(hash) + require.ErrorIs(t, gerr, stores.ErrNotFound) + _, lerr := db.Events().Lookup(context.Background(), term) + require.ErrorIs(t, lerr, eventstore.ErrTermNotFound) } -// TestIngestLedger_EventsCommittedAcrossEventsCFs is the events decision-(a) -// proof: a ledger carrying one contract event commits the ledger AND the -// event in the SAME batch, so the events facade indexes the event's term and -// the event-id watermark advances alongside the ledger watermark. -func TestIngestLedger_EventsCommittedAcrossEventsCFs(t *testing.T) { +// TestReopen_RecoversEventsMirror confirms the events facade's warmup runs over +// the shared store on reopen (the mirror/offsets are reconstructed from the +// events CFs), so a reopened DB assigns event IDs continuing from disk. +func TestReopen_RecoversEventsMirror(t *testing.T) { chunkID := chunk.ID(0) first := chunkID.FirstLedger() - db := openTestDB(t, chunkID) - - rawA, termA := lcmWithEvent(t, first) - rawB, _ := lcmWithEvent(t, first+1) - - counts, err := db.IngestLedger(first, xdr.LedgerCloseMetaView(rawA), allTypes()) - require.NoError(t, err) - assert.Equal(t, LedgerCounts{Ledgers: 1, Events: 1}, counts) + dir := t.TempDir() - counts, err = db.IngestLedger(first+1, xdr.LedgerCloseMetaView(rawB), allTypes()) + db, err := Open(dir, chunkID, silentLogger()) require.NoError(t, err) - assert.Equal(t, LedgerCounts{Ledgers: 1, Events: 1}, counts) - - // events CFs: the shared term resolves to both ledgers' events, and the - // event-id watermark advanced to 2. - bm, err := db.Events().Lookup(context.Background(), termA) + raw, _, _ := lcmWithEvent(t, first) + _, err = db.IngestLedger(first, xdr.LedgerCloseMetaView(raw), allTypes()) require.NoError(t, err) - require.NotNil(t, bm) - assert.Equal(t, uint64(2), bm.GetCardinality(), "both ledgers share the event term") - assert.Equal(t, uint32(2), db.Events().NextEventID()) + require.NoError(t, db.Close()) - // The single watermark equals the last committed ledger seq. - maxSeq, ok, err := db.MaxCommittedSeq() + db2, err := Open(dir, chunkID, silentLogger()) require.NoError(t, err) - require.True(t, ok) - assert.Equal(t, first+1, maxSeq) + t.Cleanup(func() { _ = db2.Close() }) + assert.Equal(t, uint32(1), db2.Events().NextEventID(), "warmup recovered the events offsets") } -// TestIngestLedger_DisabledEventsUntouched confirms an Ingest selection without -// Events leaves the events CFs empty even when the ledger carries an event. -func TestIngestLedger_DisabledEventsUntouched(t *testing.T) { +// TestIngestLedger_ClosedDBFails confirms a closed shared DB rejects ingest. +func TestIngestLedger_ClosedDBFails(t *testing.T) { chunkID := chunk.ID(0) - first := chunkID.FirstLedger() - db := openTestDB(t, chunkID) - - raw, term := lcmWithEvent(t, first) - counts, err := db.IngestLedger(first, xdr.LedgerCloseMetaView(raw), Ingest{Ledgers: true}) + db, err := Open(t.TempDir(), chunkID, silentLogger()) require.NoError(t, err) - assert.Equal(t, LedgerCounts{Ledgers: 1}, counts) + require.NoError(t, db.Close()) - _, lerr := db.Events().Lookup(context.Background(), term) - require.ErrorIs(t, lerr, eventstore.ErrTermNotFound) - assert.Equal(t, uint32(0), db.Events().NextEventID()) + raw := zeroTxLCM(t, chunkID.FirstLedger()) + _, err = db.IngestLedger(chunkID.FirstLedger(), xdr.LedgerCloseMetaView(raw), allTypes()) + require.ErrorIs(t, err, stores.ErrStoreClosed) } // ──────────────────────────── LCM fixtures ──────────────────────────── -// zeroTxLCM builds a minimal V2 LCM with no transactions at the given sequence. -func zeroTxLCM(t *testing.T, seq uint32) []byte { - t.Helper() - lcm := xdr.LedgerCloseMeta{ - V: 2, - V2: &xdr.LedgerCloseMetaV2{ - LedgerHeader: xdr.LedgerHeaderHistoryEntry{ - Header: xdr.LedgerHeader{ - ScpValue: xdr.StellarValue{CloseTime: xdr.TimePoint(0)}, - LedgerSeq: xdr.Uint32(seq), - }, - }, - TxSet: xdr.GeneralizedTransactionSet{ - V: 1, - V1TxSet: &xdr.TransactionSetV1{Phases: []xdr.TransactionPhase{}}, - }, - TxProcessing: []xdr.TransactionResultMetaV1{}, - }, - } - raw, err := lcm.MarshalBinary() - require.NoError(t, err) - return raw -} - -// lcmWithEvent builds a V2 LCM at seq carrying one transaction that emits a -// single contract event (topic="hotchunk_test"). Returns the wire bytes and -// the event's term key. -func lcmWithEvent(t *testing.T, seq uint32) ([]byte, events.TermKey) { +// lcmWithEvent builds a V2 LCM with one transaction carrying one contract event +// (topic="hotchunk_test"). Returns the wire bytes, the tx hash, and the event's +// term key. +func lcmWithEvent(t *testing.T, seq uint32) ([]byte, [32]byte, events.TermKey) { t.Helper() ev := buildContractEvent("hotchunk_test") meta := xdr.TransactionMeta{ V: 4, V4: &xdr.TransactionMetaV4{Operations: []xdr.OperationMetaV2{{Events: []xdr.ContractEvent{ev}}}}, } - lcm := buildLCMWithTx(t, seq, meta) + lcm, hash := buildLCMWithTx(t, seq, meta) raw, err := lcm.MarshalBinary() require.NoError(t, err) @@ -283,7 +329,15 @@ func lcmWithEvent(t *testing.T, seq uint32) ([]byte, events.TermKey) { keys, err := events.TermsForBytes(evBytes) require.NoError(t, err) require.NotEmpty(t, keys) - return raw, keys[0] + return raw, hash, keys[0] +} + +func zeroTxLCM(t *testing.T, seq uint32) []byte { + t.Helper() + lcm, _ := buildLCM(t, seq, nil) + raw, err := lcm.MarshalBinary() + require.NoError(t, err) + return raw } func buildContractEvent(topic string) xdr.ContractEvent { @@ -315,30 +369,53 @@ func successResult() xdr.TransactionResult { } } -func buildLCMWithTx(t *testing.T, seq uint32, meta xdr.TransactionMeta) xdr.LedgerCloseMeta { +func buildLCMWithTx(t *testing.T, seq uint32, meta xdr.TransactionMeta) (xdr.LedgerCloseMeta, [32]byte) { + t.Helper() + lcm, hashes := buildLCM(t, seq, []xdr.TransactionMeta{meta}) + require.Len(t, hashes, 1) + return lcm, hashes[0] +} + +func buildLCM(t *testing.T, seq uint32, txMetas []xdr.TransactionMeta) (xdr.LedgerCloseMeta, [][32]byte) { t.Helper() - envelope := xdr.TransactionEnvelope{ - Type: xdr.EnvelopeTypeEnvelopeTypeTx, - V1: &xdr.TransactionV1Envelope{ - Tx: xdr.Transaction{ - SourceAccount: xdr.MustMuxedAddress(keypair.MustRandom().Address()), - Ext: xdr.TransactionExt{ - V: 1, - SorobanData: &xdr.SorobanTransactionData{}, + phases := make([]xdr.TransactionPhase, 0, len(txMetas)) + txProcessing := make([]xdr.TransactionResultMetaV1, 0, len(txMetas)) + hashes := make([][32]byte, 0, len(txMetas)) + + for _, meta := range txMetas { + envelope := xdr.TransactionEnvelope{ + Type: xdr.EnvelopeTypeEnvelopeTypeTx, + V1: &xdr.TransactionV1Envelope{ + Tx: xdr.Transaction{ + SourceAccount: xdr.MustMuxedAddress(keypair.MustRandom().Address()), + Ext: xdr.TransactionExt{ + V: 1, + SorobanData: &xdr.SorobanTransactionData{}, + }, }, }, - }, + } + hash, err := network.HashTransactionInEnvelope(envelope, testPassphrase) + require.NoError(t, err) + hashes = append(hashes, hash) + + txProcessing = append(txProcessing, xdr.TransactionResultMetaV1{ + TxApplyProcessing: meta, + Result: xdr.TransactionResultPair{ + TransactionHash: hash, + Result: successResult(), + }, + }) + comp := []xdr.TxSetComponent{{ + Type: xdr.TxSetComponentTypeTxsetCompTxsMaybeDiscountedFee, + TxsMaybeDiscountedFee: &xdr.TxSetComponentTxsMaybeDiscountedFee{ + Txs: []xdr.TransactionEnvelope{envelope}, + }, + }} + phases = append(phases, xdr.TransactionPhase{V: 0, V0Components: &comp}) } - hash, err := network.HashTransactionInEnvelope(envelope, testPassphrase) - require.NoError(t, err) - comp := []xdr.TxSetComponent{{ - Type: xdr.TxSetComponentTypeTxsetCompTxsMaybeDiscountedFee, - TxsMaybeDiscountedFee: &xdr.TxSetComponentTxsMaybeDiscountedFee{ - Txs: []xdr.TransactionEnvelope{envelope}, - }, - }} - return xdr.LedgerCloseMeta{ + lcm := xdr.LedgerCloseMeta{ V: 2, V2: &xdr.LedgerCloseMetaV2{ LedgerHeader: xdr.LedgerHeaderHistoryEntry{ @@ -349,15 +426,10 @@ func buildLCMWithTx(t *testing.T, seq uint32, meta xdr.TransactionMeta) xdr.Ledg }, TxSet: xdr.GeneralizedTransactionSet{ V: 1, - V1TxSet: &xdr.TransactionSetV1{Phases: []xdr.TransactionPhase{{V: 0, V0Components: &comp}}}, + V1TxSet: &xdr.TransactionSetV1{Phases: phases}, }, - TxProcessing: []xdr.TransactionResultMetaV1{{ - TxApplyProcessing: meta, - Result: xdr.TransactionResultPair{ - TransactionHash: hash, - Result: successResult(), - }, - }}, + TxProcessing: txProcessing, }, } + return lcm, hashes } diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/hot_store.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/hot_store.go index 18bfa4420..973103086 100644 --- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/hot_store.go +++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/hot_store.go @@ -45,6 +45,11 @@ type Entry struct { type HotStore struct { store *rocksdb.Store chunkID chunk.ID + // ownsStore is true when this HotStore opened its own dedicated DB + // (standalone NewHotStore); false when wrapping the SHARED per-chunk + // multi-CF DB injected via NewWithStore (decision (a)), which the + // hotchunk.DB owns and closes once. + ownsStore bool } // NewHotStore validates inputs and returns an open HotStore bound to @@ -65,9 +70,30 @@ func NewHotStore(path string, chunkID chunk.ID, logger *supportlog.Entry) (*HotS if err != nil { return nil, err } - return &HotStore{store: store, chunkID: chunkID}, nil + return &HotStore{store: store, chunkID: chunkID, ownsStore: true}, nil } +// NewWithStore wraps an ALREADY-OPEN rocksdb.Store as a txhash HotStore +// operating on the 16 nibble-routed CFs (CFNames()). The store is NOT +// owned by the returned HotStore (Close is a no-op) — this is the +// constructor the hotchunk package uses to compose the txhash facade +// over the shared per-chunk multi-CF DB. The store must have been +// opened with CFNames() registered. +func NewWithStore(store *rocksdb.Store, chunkID chunk.ID) *HotStore { + return &HotStore{store: store, chunkID: chunkID} +} + +// CFNames returns the 16 nibble-routed column-family names this facade +// owns (cf-0..cf-f). Exported so the hotchunk shared-DB opener can +// register them alongside the ledger and events CFs. +func CFNames() []string { return cfNames() } + +// Tuning returns this facade's RocksDB tuning. The DB-wide knobs +// (block cache, background jobs, WAL cap) and the per-CF knobs the +// txhash workload calibrated are applied to the shared per-chunk DB by +// the hotchunk opener (which merges this with the union CF list). +func Tuning() rocksdb.Tuning { return tuning() } + func cfNames() []string { out := make([]string, numCFs) copy(out, cfNameByNibble[:]) @@ -139,7 +165,16 @@ func tuning() rocksdb.Tuning { } } -func (h *HotStore) Close() error { return h.store.Close() } +// Close releases the underlying RocksDB store IF this HotStore owns it +// (standalone NewHotStore). When wrapping the shared per-chunk DB +// (NewWithStore), Close is a no-op — hotchunk.DB owns and closes the +// shared store exactly once. Idempotent. +func (h *HotStore) Close() error { + if !h.ownsStore { + return nil + } + return h.store.Close() +} // ChunkID returns the chunk this store is bound to (constructor-supplied; // never reads the store). @@ -168,6 +203,22 @@ func (h *HotStore) AddEntries(entries []Entry) error { } } +// AddEntriesToBatch queues each (txhash → ledgerSeq) Put into b on its +// nibble-routed CF — the building block the hotchunk package uses to +// fold the ledger's tx-hash writes into the one atomic per-ledger +// WriteBatch shared across all CFs (decision (a)). It does not commit: +// the caller owns the batch and its single synced Write. A closed +// store returns ErrStoreClosed before touching the batch. +func (h *HotStore) AddEntriesToBatch(b *rocksdb.BatchWriter, entries []Entry) error { + if h.store.IsClosed() { + return rocksdb.ErrStoreClosed + } + for _, e := range entries { + b.Put(cfNameForTxHash(e.Hash), e.Hash[:], rocksdb.EncodeUint32(e.LedgerSeq)) + } + return nil +} + // Get returns the ledger sequence the hash was committed in, or // (0, stores.ErrNotFound) on miss. Only the routed CF is queried. func (h *HotStore) Get(hash [32]byte) (uint32, error) { diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/PERF.md b/cmd/stellar-rpc/internal/fullhistory/streaming/PERF.md new file mode 100644 index 000000000..2ff72d33f --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/PERF.md @@ -0,0 +1,65 @@ +# Full-history streaming: tx-hash cold-index performance expectations + +These are the design's **measured** figures for the tx-hash cold tier, taken +from the `bench-fullhistory` harness (on the `rpc-hack` branch: +`cmd/stellar-rpc/scripts/bench-fullhistory`, the `cold-ingest --types=txhash` +and `build-txhash-index` commands). They are recorded here, not re-measured in +this package, because the streaming rebuild produces **byte-format-identical** +artifacts to the merged cold path the harness measures — see +`perf_test.go::TestStreamingRebuild_ByteIdenticalToColdPath`, which proves the +streaming `buildTxhashIndex` and a direct `txhash.BuildColdIndex` over the same +`.bin` inputs write the same bytes. Adopting the formats unchanged is what lets +the harness's figures transfer (gettransaction-full-history-design.md §6.2, +Part 4). + +Geometry assumed below: the default window of `DefaultChunksPerIndex = 1000` +chunks, a dense chunk of ~3M transactions, so a dense full window is +~3×10⁹ transactions. + +## On-disk format (the basis for the transfer) + +| artifact | format | width | +| --- | --- | --- | +| `.bin` per-chunk sorted run (§6.1) | `uint64` LE count header, then `[key:16][seq:4 LE]` entries, sorted by big-endian `uint64` of the key | **20 B/entry exactly** | +| `.idx` per-window MPHF (§6.2) | streamhash MPHF; 16-byte routing key; **3-byte** payload (`seq − MinLedger`); **1-byte** fingerprint; `[MinLedger, MaxLedger]` in user metadata | **≈4.2 B/tx** | + +The `.bin` key is the first 16 bytes of the tx hash (`streamhash.MinKeySize`); +the `.idx` payload is a 3-byte offset from the window's `MinLedger` +(`lo.FirstLedger()`), spanning up to 16.77M ledgers — a window past the 4-byte +payload threshold (>16.77M ledgers, ≥1678 chunks) adds 1 B/tx. + +## Expected figures (from the bench harness) + +- **Index size: ≈4.2 B/tx** at the default 3-byte payload (MPHF structure + + 3-byte payload + 1-byte fingerprint) — **≈12.5 GB** for a dense full window. + (`perf_test.go::TestColdIndexSizing_ConsistentWithPart4` checks a small-N + sanity band around this and pins the inviolable 4 B/tx payload+fingerprint + floor; the asymptote itself is the harness's measurement.) + +- **`.bin` floor: ≈20 B/tx, ≈60 GB** for a dense full window — the runs the + index consumes. Transient `.bin` disk is bounded by the eager sweep at one + dense in-flight window's worth (≈60 GB), irreducible because a window's build + merges all of its runs at once. + +- **Rebuild: ≈1 minute** for a full dense window — merging the ≈60 GB of + sorted `.bin` runs into the ≈12.5 GB `.idx` at a ~200 MB/s write burst. + Mid-window rebuilds scale with `hi − lo`. Against a ~14-hour chunk-boundary + cadence at mainnet rates this is ~0.1% duty cycle. + +- **Transient peak: ~2× the index size** in the window dir during each + rebuild (~25 GB at window end) — old and new coverage files coexist from the + start of the write until the eager sweep's unlink. + +- **Hot `txhash` CF: 36 B/tx raw** (32-byte key + 4-byte value, before RocksDB + overhead), ~110 MB raw per dense chunk — the serving tier for chunks above + the index's `hi` until the next rebuild folds them in. + +## Honesty note + +The streaming package does **not** re-measure these numbers — measuring a dense +full window needs the multi-TB corpus the `bench-fullhistory` harness drives on +`rpc-hack`. What this package proves instead is the precondition that makes the +transfer valid: format identity (byte-for-byte) between the streaming rebuild +and the merged cold path, plus the on-disk format pins (`perf_test.go`). If a +width or MPHF parameter ever changes, those tests fail and these figures must be +re-derived from the harness. diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/artifacts.go b/cmd/stellar-rpc/internal/fullhistory/streaming/artifacts.go index a3253f5b5..2e498bc46 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/artifacts.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/artifacts.go @@ -7,8 +7,8 @@ import ( ) // ArtifactSet is the subset of per-chunk artifact Kinds a processChunk pass must -// produce (design-docs rule 2). It is a small immutable set over the per-chunk -// kinds (currently just ledgers); the resolver builds it from the catalog +// produce (design-docs rule 2). It is a small immutable set over the three +// per-chunk kinds (ledgers, events, txhash); the resolver builds it from the catalog // difference and processChunk narrows it further by dropping already-frozen // kinds (rule 1's per-kind idempotency). // @@ -42,7 +42,7 @@ func NewArtifactSet(kinds ...Kind) ArtifactSet { return s } -// AllArtifacts is the full set (currently just ledgers) — what a from-scratch +// AllArtifacts is the full set (ledgers, events, txhash) — what a from-scratch // chunk freeze requests before per-kind idempotency narrows it. func AllArtifacts() ArtifactSet { return NewArtifactSet(allKinds...) } @@ -98,6 +98,7 @@ func (s ArtifactSet) String() string { func (s ArtifactSet) ingestConfig() ingest.Config { //nolint:unused // called from processChunk in a later layer return ingest.Config{ Ledgers: s.Has(KindLedgers), + Txhash: s.Has(KindTxHash), Events: s.Has(KindEvents), } } diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/audit.go b/cmd/stellar-rpc/internal/fullhistory/streaming/audit.go index c27b2cd31..98eb5bcf3 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/audit.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/audit.go @@ -1,6 +1,7 @@ package streaming import ( + "errors" "fmt" "strings" @@ -29,16 +30,17 @@ import ( // // Each invariant maps to one check, exactly as the design prescribes: // -// - INV-2 (single canonical state): walk meta-store keys, cross-check the -// FORBIDDEN co-existences — a "freezing"/"pruning" artifact key surviving -// quiescence; a hot key for a chunk cold artifacts fully serve. The two -// transients the design explicitly TOLERATES are excluded: a hot key reading -// "transient" (an in-flight directory op bracket), and a "freezing" artifact -// key for a chunk strictly ABOVE completeThrough (the hot-volume-loss tail no -// source can yet repair). +// - INV-2 (single canonical state): walk meta-store keys, cross-check the four +// FORBIDDEN co-existences — two frozen index keys in one window; a +// "freezing"/"pruning" artifact key surviving quiescence; a hot key for a +// chunk cold artifacts fully serve; a per-chunk txhash key in a finalized +// window. The two transients the design explicitly TOLERATES are excluded: +// a hot key reading "transient" (an in-flight directory op bracket), and a +// "freezing" artifact key for a chunk strictly ABOVE completeThrough (the +// hot-volume-loss tail no source can yet repair). // - INV-3 (disk matches meta-store): walk the filesystem against the meta store -// in BOTH directions — every artifact/hot path on disk must trace back to a -// key (no orphan files, no duplicate artifacts), and every key naming an +// in BOTH directions — every artifact/index/hot path on disk must trace back +// to a key (no orphan files, no duplicate artifacts), and every key naming an // expected path that is in a final/tolerated state must have its file (no // dangling keys). // - INV-4 (retention bound): walk meta-store keys, compare each key's ledger @@ -190,6 +192,14 @@ func RunAudit(cfg Config, opts AuditOptions, logger *supportlog.Entry) (AuditRep cfg = cfg.WithDefaults() paths := cfg.ResolvePaths() + if cfg.Backfill.ChunksPerTxhashIndex == nil { + return AuditReport{}, errors.New( + "streaming: audit: chunks_per_txhash_index unresolved (WithDefaults not applied)") + } + windows, err := NewWindows(*cfg.Backfill.ChunksPerTxhashIndex) + if err != nil { + return AuditReport{}, fmt.Errorf("streaming: audit window config: %w", err) + } if cfg.Streaming.RetentionChunks != nil && opts.RetentionChunks == 0 { opts.RetentionChunks = *cfg.Streaming.RetentionChunks } @@ -206,7 +216,7 @@ func RunAudit(cfg Config, opts AuditOptions, logger *supportlog.Entry) (AuditRep } defer func() { _ = store.Close() }() - cat := NewCatalog(store, NewLayoutFromPaths(paths)) + cat := NewCatalog(store, NewLayoutFromPaths(paths), windows) logger.WithField("retention_chunks", opts.RetentionChunks). WithField("deep", opts.Deep != nil). diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/audit_invariants.go b/cmd/stellar-rpc/internal/fullhistory/streaming/audit_invariants.go index ea8491f03..84780e0e9 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/audit_invariants.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/audit_invariants.go @@ -7,6 +7,10 @@ import ( "io/fs" "os" "path/filepath" + "slices" + "strings" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" ) // INV-2 — single canonical state. Walk meta-store keys, cross-check forbidden @@ -14,6 +18,10 @@ import ( // --------------------------------------------------------------------------- func (c *Catalog) auditSingleCanonicalState(through uint32, report *AuditReport) error { + covs, err := c.AllIndexKeys() + if err != nil { + return fmt.Errorf("streaming: audit INV-2 scan index keys: %w", err) + } refs, err := c.ChunkArtifactKeys() if err != nil { return fmt.Errorf("streaming: audit INV-2 scan chunk keys: %w", err) @@ -23,7 +31,41 @@ func (c *Catalog) auditSingleCanonicalState(through uint32, report *AuditReport) return fmt.Errorf("streaming: audit INV-2 scan hot keys: %w", err) } - // Clause 1: at quiescence no artifact key is "freezing" or "pruning", with the + // Clause 1: at most one "frozen" index key per window — at ALL times, not + // just quiescence (the commit batch promotes+demotes atomically). + // + // frozenPerWindow is also the DUPLICATE-TOLERANT frozen-coverage view that + // Clauses 3 and 4 read below. They MUST NOT route through + // Catalog.FrozenCoverage, which errors when a window has two frozen keys + // (catalog.go: "uniqueness invariant violated"): that would abort the whole + // audit with an I/O-shaped error and discard this very report — contradicting + // both Audit's "error only for I/O" contract and "report every breach". The + // two-frozen-keys case is recorded here as an INV-2 violation; the rest of the + // walk then proceeds against this map, tolerating the duplicate exactly as + // frozenCoverageContains and lastCommittedLedger do. + frozenPerWindow := map[WindowID][]IndexCoverage{} + for _, cov := range covs { + if cov.State == StateFrozen { + frozenPerWindow[cov.Window] = append(frozenPerWindow[cov.Window], cov) + } + } + for _, w := range sortedWindowIDs(frozenPerWindow) { + group := frozenPerWindow[w] + if len(group) > 1 { + keys := make([]string, len(group)) + for i, cov := range group { + keys[i] = cov.Key + } + report.Violations = append(report.Violations, Violation{ + Invariant: InvSingleCanonicalState, + Detail: fmt.Sprintf( + "window %s has %d frozen index coverages (must be at most 1): %s", + w, len(group), strings.Join(keys, ", ")), + }) + } + } + + // Clause 2: at quiescence no artifact key is "freezing" or "pruning", with the // ONE tolerated exception — a "freezing" per-chunk key strictly ABOVE // completeThrough (the hot-volume-loss tail, outside every plan range and the // retention window, that no source can yet repair). A "pruning" key is never @@ -58,10 +100,29 @@ func (c *Catalog) auditSingleCanonicalState(through uint32, report *AuditReport) } } - // Clause 2: no hot key for a chunk whose cold artifacts fully serve it (all - // artifacts durable). A "transient" hot key is the tolerated in-flight - // bracket — skip it. The orphan-hot check applies to "ready" keys (and any - // non-transient value). + // Index transients ("freezing"/"pruning") are NEVER tolerated at quiescence — + // the tick that observes them sweeps them, with no above-completeThrough + // carve-out (that carve-out is per-chunk only). + for _, cov := range covs { + if cov.State == StateFreezing || cov.State == StatePruning { + report.Violations = append(report.Violations, Violation{ + Invariant: InvSingleCanonicalState, + Key: cov.Key, + Detail: fmt.Sprintf( + "index coverage key is %q at quiescence: the sweep should have removed this transient", + cov.State), + }) + } + } + + // Clause 3: no hot key for a chunk whose cold artifacts fully serve it (all + // artifacts durable AND the window's frozen index covers it). A "transient" + // hot key is the tolerated in-flight bracket — skip it. The orphan-hot check + // applies to "ready" keys (and any non-transient value). + covered, err := frozenCoverageContains(c) + if err != nil { + return fmt.Errorf("streaming: audit INV-2 frozen coverage: %w", err) + } for _, hc := range hot { hs, herr := c.HotState(hc) if herr != nil { @@ -71,19 +132,48 @@ func (c *Catalog) auditSingleCanonicalState(through uint32, report *AuditReport) // Tolerated in-flight directory-op bracket — not an orphan. continue } - pending, perr := pendingArtifacts(hc, c) + // Duplicate-tolerant equivalent of pendingArtifacts(hc): ledgers and events + // must be frozen, and txhash is exempt when the window's index covers the + // chunk. We resolve that coverage via the `covered` predicate + // (frozenCoverageContains, which keeps every frozen key) rather than + // pendingArtifacts -> indexCovers -> Catalog.FrozenCoverage, so a window + // with two frozen keys does not abort the audit. + pending, perr := auditPendingArtifacts(c, hc, covered) if perr != nil { return fmt.Errorf("streaming: audit INV-2 pending artifacts %s: %w", hc, perr) } - if pending.Empty() { + if pending.Empty() && covered(hc) { report.Violations = append(report.Violations, Violation{ Invariant: InvSingleCanonicalState, Key: hotChunkKey(hc), Detail: fmt.Sprintf( "hot DB key persists for chunk %s whose cold artifacts fully serve it "+ - "(all artifacts frozen): the discard scan missed it", - hc, - ), + "(all artifacts frozen and its window's index covers it): the discard scan missed it", + hc), + }) + } + } + + // Clause 4: no per-chunk txhash key in a FINALIZED window (frozen index whose + // hi == the window's last chunk; its .bin inputs were demoted in the same + // terminal commit). Any state of the txhash key is a leftover here. + for _, ref := range refs { + if ref.Kind != KindTxHash { + continue + } + // Duplicate-tolerant equivalent of txhashRedundantInFinalizedWindow: the + // window is finalized when SOME frozen coverage of it is terminal. We read + // frozenPerWindow (built above, keeps every frozen key) instead of + // Catalog.FrozenCoverage, so a window with two frozen keys is recorded as a + // clause-1 INV-2 violation and still walked here. + if c.auditTerminalCoverage(frozenPerWindow, ref.Chunk) { + report.Violations = append(report.Violations, Violation{ + Invariant: InvSingleCanonicalState, + Key: ref.Key(), + Detail: fmt.Sprintf( + "per-chunk txhash key %q persists for chunk %s in a finalized window "+ + "(its terminal index covers it): finalization demotion did not complete", + ref.State, ref.Chunk), }) } } @@ -91,6 +181,51 @@ func (c *Catalog) auditSingleCanonicalState(through uint32, report *AuditReport) return nil } +// auditPendingArtifacts is the audit's DUPLICATE-TOLERANT counterpart of +// pendingArtifacts (eligibility.go): it lists which processChunk outputs c still +// needs — ledgers and events must be frozen; txhash is exempt when a frozen index +// covers the chunk. It differs ONLY in how it resolves that coverage: it takes +// the `covered` predicate (frozenCoverageContains, which keeps EVERY frozen key) +// instead of routing through Catalog.FrozenCoverage, so a window holding two +// frozen keys is reported as a clause-1 INV-2 violation rather than aborting the +// audit with a uniqueness error that would discard the whole report. +func auditPendingArtifacts(cat *Catalog, c chunk.ID, covered func(chunk.ID) bool) (ArtifactSet, error) { + var need ArtifactSet + for _, kind := range []Kind{KindLedgers, KindEvents} { + state, err := cat.State(c, kind) + if err != nil { + return need, err + } + if state != StateFrozen { + need = need.Add(kind) + } + } + txState, err := cat.State(c, KindTxHash) + if err != nil { + return need, err + } + if txState != StateFrozen && !covered(c) { + need = need.Add(KindTxHash) + } + return need, nil +} + +// auditTerminalCoverage is the audit's DUPLICATE-TOLERANT counterpart of +// txhashRedundantInFinalizedWindow (eligibility.go): it reports whether c's +// window is finalized — i.e. SOME frozen coverage of that window is terminal +// (Hi == the window's last chunk). It reads the per-window frozen-coverage map +// (which keeps every frozen key) instead of Catalog.FrozenCoverage, so a window +// with two frozen keys does not abort the audit; the duplicate is already +// recorded as a clause-1 INV-2 violation. +func (c *Catalog) auditTerminalCoverage(frozenPerWindow map[WindowID][]IndexCoverage, ch chunk.ID) bool { + for _, cov := range frozenPerWindow[c.windows.WindowID(ch)] { + if c.windows.IsTerminalCoverage(cov) { + return true + } + } + return false +} + // --------------------------------------------------------------------------- // INV-3 — disk matches meta-store, BOTH directions. Walk the filesystem against // meta (orphan files, duplicate artifacts) and meta against the filesystem @@ -103,6 +238,10 @@ func (c *Catalog) auditDiskMatchesMeta(through uint32, report *AuditReport) erro if err != nil { return fmt.Errorf("streaming: audit INV-3 scan chunk keys: %w", err) } + covs, err := c.AllIndexKeys() + if err != nil { + return fmt.Errorf("streaming: audit INV-3 scan index keys: %w", err) + } hot, err := c.HotChunkKeys() if err != nil { return fmt.Errorf("streaming: audit INV-3 scan hot keys: %w", err) @@ -150,6 +289,27 @@ func (c *Catalog) auditDiskMatchesMeta(through uint32, report *AuditReport) erro } } } + for _, cov := range covs { + p := c.layout.IndexFilePath(cov) + addExpected(p) + if cov.State == StatePruning { + continue + } + ok, ferr := fileExists(p) + if ferr != nil { + return fmt.Errorf("streaming: audit INV-3 stat %s: %w", p, ferr) + } + if !ok { + report.Violations = append(report.Violations, Violation{ + Invariant: InvDiskMatchesMeta, + Key: cov.Key, + Path: p, + Detail: fmt.Sprintf( + "index coverage key is %q but its .idx file is missing: dangling key", cov.State), + }) + } + } + // Hot DB dirs: a "ready" (or any non-transient) hot key mandates its dir; a // "transient" key is the tolerated in-flight bracket where the dir may be // absent. Register every hot dir as expected either way. @@ -182,9 +342,10 @@ func (c *Catalog) auditDiskMatchesMeta(through uint32, report *AuditReport) erro // disk -> meta (orphan files, duplicate artifacts): walk every artifact tree // and flag any regular file whose path is not in the expected set. A - // duplicate artifact (a stray .pack) is just a path the meta store does not - // name, so it is caught by the same membership test — the design's "the - // meta-store names one expected path; the extras are orphans". + // duplicate artifact (a second events file for a chunk, a stray .idx) is just + // a path the meta store does not name, so it is caught by the same membership + // test — the design's "the meta-store names one expected path; the extras are + // orphans". for _, root := range c.artifactFileRoots() { if err := walkRegularFiles(root, func(path string) { if _, ok := expected[path]; ok { @@ -236,10 +397,11 @@ func (c *Catalog) auditDiskMatchesMeta(through uint32, report *AuditReport) erro func (c *Catalog) auditRetentionBound(floor uint32, report *AuditReport) error { // A chunk is below the floor when its LAST ledger is below the floor (the same - // ChunkBelowFloor predicate the prune/discard scans use). We do not flag a - // chunk merely straddling the floor: the reader retention contract masks the - // below-floor tail of a straddling chunk's window, and the prune scan only - // sweeps keys WHOLLY below the floor. + // ChunkBelowFloor predicate the prune/discard scans use). A window is below + // the floor when its last chunk is below it. We do not flag a chunk/window + // merely straddling the floor: the reader retention contract masks the + // below-floor tail of a straddling window, and the prune scan only sweeps + // keys WHOLLY below the floor. refs, err := c.ChunkArtifactKeys() if err != nil { return fmt.Errorf("streaming: audit INV-4 scan chunk keys: %w", err) @@ -257,6 +419,24 @@ func (c *Catalog) auditRetentionBound(floor uint32, report *AuditReport) error { } } + covs, err := c.AllIndexKeys() + if err != nil { + return fmt.Errorf("streaming: audit INV-4 scan index keys: %w", err) + } + for _, cov := range covs { + // A coverage is wholly below the floor when its highest chunk's last + // ledger is below the floor. + if cov.Hi.LastLedger() < floor { + report.Violations = append(report.Violations, Violation{ + Invariant: InvRetentionBound, + Key: cov.Key, + Detail: fmt.Sprintf( + "index coverage [%s,%s] (last ledger %d) is wholly below the retention floor %d", + cov.Lo, cov.Hi, cov.Hi.LastLedger(), floor), + }) + } + } + hot, err := c.HotChunkKeys() if err != nil { return fmt.Errorf("streaming: audit INV-4 scan hot keys: %w", err) @@ -353,15 +533,17 @@ func (c *Catalog) auditReadCorrectness(opts AuditOptions, report *AuditReport) e // how paths.go owns the durability primitives. // --------------------------------------------------------------------------- -// artifactFileRoots returns the per-chunk cold trees — the dirs that hold -// key-named files. The hot tree is walked separately (by directory, not file). -// These come straight off the bound Layout's per-tree roots, so they honor any -// [immutable_storage.*] path override exactly as the data path and the flock -// (Paths.LockRoots) do. +// artifactFileRoots returns the three per-chunk cold trees plus the index tree — +// the dirs that hold key-named files. The hot tree is walked separately (by +// directory, not file). These come straight off the bound Layout's per-tree +// roots, so they honor any [immutable_storage.*] path override exactly as the +// data path and the flock (Paths.LockRoots) do. func (c *Catalog) artifactFileRoots() []string { return []string{ c.layout.LedgersRoot(), c.layout.EventsRoot(), + c.layout.TxHashRawRoot(), + c.layout.TxHashIndexRoot(), } } @@ -440,3 +622,14 @@ func dirExists(path string) (bool, error) { } return info.IsDir(), nil } + +// sortedWindowIDs returns the map's keys in ascending order for deterministic +// violation reporting. +func sortedWindowIDs(m map[WindowID][]IndexCoverage) []WindowID { + out := make([]WindowID, 0, len(m)) + for w := range m { + out = append(out, w) + } + slices.Sort(out) + return out +} diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/audit_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/audit_test.go index edeb8da9c..b1269c42d 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/audit_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/audit_test.go @@ -9,10 +9,28 @@ import ( "github.com/stretchr/testify/require" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/metastore" ) +// testCatalogCPI is testCatalog with a caller-chosen chunks_per_txhash_index, so +// a test can build a SMALL window (e.g. cpi=2: window 0 = chunks {0,1}) and reach +// the "terminal/finalized window" branch without materializing 1000 chunks. +func testCatalogCPI(t *testing.T, cpi uint32) (*Catalog, string) { + t.Helper() + metaDir := t.TempDir() + artifactRoot := t.TempDir() + + store, err := metastore.New(filepath.Join(metaDir, "rocksdb"), silentLogger()) + require.NoError(t, err) + t.Cleanup(func() { _ = store.Close() }) + + windows, err := NewWindows(cpi) + require.NoError(t, err) + return NewCatalog(store, NewLayout(artifactRoot), windows), artifactRoot +} + // freezeChunkArtifacts marks+writes+freezes every per-chunk artifact kind for a -// chunk (currently ledgers) and writes the real files, so the audit's INV-3 +// chunk (ledgers, events, txhash) and writes the real files, so the audit's INV-3 // disk<->meta walk sees a fully materialized chunk. func freezeChunkArtifacts(t *testing.T, cat *Catalog, c chunk.ID, kinds ...Kind) { t.Helper() @@ -28,6 +46,17 @@ func freezeChunkArtifacts(t *testing.T, cat *Catalog, c chunk.ID, kinds ...Kind) require.NoError(t, cat.FlipChunkFrozen(c, kinds...)) } +// freezeIndex marks+writes+commits a frozen index coverage and writes its .idx. +func freezeIndex(t *testing.T, cat *Catalog, w WindowID, lo, hi chunk.ID) IndexCoverage { + t.Helper() + cov, err := cat.MarkIndexFreezing(w, lo, hi) + require.NoError(t, err) + writeArtifact(t, cat.layout.IndexFilePath(cov)) + require.NoError(t, cat.CommitIndex(cov)) + cov.State = StateFrozen + return cov +} + // hasViolation reports whether the report contains a violation for inv whose key // matches wantKey (empty wantKey matches any). func hasViolation(r AuditReport, inv Invariant, wantKey string) bool { @@ -53,51 +82,107 @@ func countInvariant(r AuditReport, inv Invariant) int { } // --------------------------------------------------------------------------- -// Clean store — a fully materialized, in-retention chunk set yields zero -// violations across every invariant. +// Clean store — a fully materialized, finalized, in-retention chunk set yields +// zero violations across every invariant. // --------------------------------------------------------------------------- func TestAudit_CleanStoreNoViolations(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := testCatalogCPI(t, 2) // window 0 = {0,1}, window 1 = {2,3} require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) + // Window 0 finalized: chunks 0,1 frozen (ledgers+events), terminal index covers + // {0,1}, so the .bin keys are demoted/swept (we never create them, matching a + // finalized window). Use ledgers+events only — txhash is gone post-finalization. freezeChunkArtifacts(t, cat, 0, KindLedgers, KindEvents) freezeChunkArtifacts(t, cat, 1, KindLedgers, KindEvents) + freezeIndex(t, cat, 0, 0, 1) // terminal: hi==1==LastChunk(window 0) report, err := cat.Audit(AuditOptions{}) require.NoError(t, err) require.True(t, report.Clean(), "expected clean audit, got: %v", report.Violations) } -// TestAudit_INV3_OrphanEventsFileNoKey confirms the INV-3 disk->meta walk now -// covers the events tree: a stray events cold-segment file with no meta key is -// flagged as an orphan. -func TestAudit_INV3_OrphanEventsFileNoKey(t *testing.T) { - cat, _ := testCatalog(t) +// --------------------------------------------------------------------------- +// INV-2 — single canonical state. +// --------------------------------------------------------------------------- + +func TestAudit_INV2_TwoFrozenIndexKeysInOneWindow(t *testing.T) { + cat, _ := testCatalogCPI(t, 4) // window 0 = {0,1,2,3} require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) - // An events file on disk at chunk 9's events path with NO meta key — orphan. - orphan := cat.layout.EventsPaths(9)[0] - writeArtifact(t, orphan) + // Two NON-terminal frozen coverages in window 0. CommitIndex demotes a + // predecessor, so to force the forbidden co-existence we write the second + // frozen key directly (simulating a commit batch that failed to demote). + cov1 := freezeIndex(t, cat, 0, 0, 1) + cov2, err := cat.MarkIndexFreezing(0, 0, 2) + require.NoError(t, err) + writeArtifact(t, cat.layout.IndexFilePath(cov2)) + require.NoError(t, cat.store.Put(cov2.Key, string(StateFrozen))) // bug: predecessor not demoted report, err := cat.Audit(AuditOptions{}) require.NoError(t, err) - require.True(t, hasViolation(report, InvDiskMatchesMeta, ""), - "a keyless events file must be flagged as an orphan: %v", report.Violations) + require.True(t, hasViolation(report, InvSingleCanonicalState, ""), + "expected INV-2 two-frozen violation; cov1=%s cov2=%s", cov1.Key, cov2.Key) } -// --------------------------------------------------------------------------- -// INV-2 — single canonical state. -// --------------------------------------------------------------------------- +// TestAudit_INV2_TwoFrozenKeysPlusHotPlusTxhashStillCompletes is the regression +// for the abort-on-duplicate bug: a window with TWO frozen index keys whose +// other clause-3 (orphan hot) and clause-4 (leftover txhash) inputs ALSO route +// through frozen-coverage resolution. Before the fix, clause 3 (pendingArtifacts +// -> indexCovers) and clause 4 (txhashRedundantInFinalizedWindow) called +// Catalog.FrozenCoverage, which ERRORS on two frozen keys; Audit returned a +// zero-value report (Clean()==true) plus an error, discarding the clause-1 +// violation. After the fix the audit completes (err==nil) and records all three +// INV-2 breaches against the duplicate-tolerant frozen-coverage view. +func TestAudit_INV2_TwoFrozenKeysPlusHotPlusTxhashStillCompletes(t *testing.T) { + cat, _ := testCatalogCPI(t, 2) // window 0 = {0,1} + require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) + + // Window 0 finalized: chunks 0,1 frozen (ledgers+events) and a TERMINAL frozen + // coverage [0,1] (hi==1==LastChunk(window 0)). + freezeChunkArtifacts(t, cat, 0, KindLedgers, KindEvents) + freezeChunkArtifacts(t, cat, 1, KindLedgers, KindEvents) + freezeIndex(t, cat, 0, 0, 1) + + // Bug 1: a SECOND frozen coverage [0,0] in the same window (a commit batch that + // failed to demote its predecessor) — clause-1 two-frozen violation. + cov2, err := cat.MarkIndexFreezing(0, 0, 0) + require.NoError(t, err) + writeArtifact(t, cat.layout.IndexFilePath(cov2)) + require.NoError(t, cat.store.Put(cov2.Key, string(StateFrozen))) + + // Bug 2: a "ready" hot DB for the fully-served chunk 0 — clause-3 orphan-hot. + readyHot(t, cat, 0) + + // Bug 3: a leftover per-chunk txhash key for chunk 0 in the finalized window — + // clause-4 leftover-txhash. + require.NoError(t, cat.MarkChunkFreezing(0, KindTxHash)) + writeArtifact(t, cat.layout.TxHashBinPath(0)) + require.NoError(t, cat.FlipChunkFrozen(0, KindTxHash)) + + report, err := cat.Audit(AuditOptions{}) + require.NoError(t, err, "audit must complete (err only for I/O), not abort on the uniqueness breach") + require.False(t, report.Clean(), "a multiply-corrupted store must not report Clean") + + // All three INV-2 breaches must be present — clause 1 (two frozen), clause 3 + // (orphan hot), clause 4 (leftover txhash) — proving the full walk finished. + require.True(t, hasViolation(report, InvSingleCanonicalState, hotChunkKey(0)), + "expected clause-3 orphan-hot INV-2 violation: %v", report.Violations) + require.True(t, hasViolation(report, InvSingleCanonicalState, chunkKey(0, KindTxHash)), + "expected clause-4 leftover-txhash INV-2 violation: %v", report.Violations) + require.GreaterOrEqual(t, countInvariant(report, InvSingleCanonicalState), 3, + "expected at least 3 INV-2 violations (two-frozen + orphan-hot + leftover-txhash): %v", + report.Violations) +} func TestAudit_INV2_FreezingArtifactWithinRetentionIsViolation(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := testCatalogCPI(t, 1000) require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) // A "freezing" ledgers key for chunk 0, and a fully-frozen chunk 5 so // completeThrough advances ABOVE chunk 0 (chunk 0 is within // [floor, completeThrough]). Re-materialization was skipped -> INV-2. - freezeChunkArtifacts(t, cat, 5, KindLedgers) + freezeChunkArtifacts(t, cat, 5, KindLedgers, KindEvents, KindTxHash) require.NoError(t, cat.MarkChunkFreezing(0, KindLedgers)) writeArtifact(t, cat.layout.LedgerPackPath(0)) @@ -108,7 +193,7 @@ func TestAudit_INV2_FreezingArtifactWithinRetentionIsViolation(t *testing.T) { } func TestAudit_INV2_FreezingArtifactAboveCompleteThroughIsTolerated(t *testing.T) { - cat, root := testCatalog(t) + cat, root := testCatalogCPI(t, 1000) require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) // No frozen chunks at all => completeThrough is pre-genesis. A "freezing" key @@ -124,28 +209,29 @@ func TestAudit_INV2_FreezingArtifactAboveCompleteThroughIsTolerated(t *testing.T } func TestAudit_INV2_PruningArtifactIsAlwaysViolation(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := testCatalogCPI(t, 1000) require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) // A "pruning" key surviving quiescence — the sweep should have finished it. // No completeThrough carve-out applies to "pruning" (only "freezing"). - require.NoError(t, cat.MarkChunkFreezing(7, KindLedgers)) - require.NoError(t, cat.store.Put(chunkKey(7, KindLedgers), string(StatePruning))) + require.NoError(t, cat.MarkChunkFreezing(7, KindEvents)) + require.NoError(t, cat.store.Put(chunkKey(7, KindEvents), string(StatePruning))) report, err := cat.Audit(AuditOptions{}) require.NoError(t, err) - require.True(t, hasViolation(report, InvSingleCanonicalState, chunkKey(7, KindLedgers)), + require.True(t, hasViolation(report, InvSingleCanonicalState, chunkKey(7, KindEvents)), "expected INV-2 pruning violation: %v", report.Violations) } func TestAudit_INV2_OrphanHotForFullyServedChunk(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := testCatalogCPI(t, 2) // window 0 = {0,1} require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) - // Chunk 0 fully served by cold artifacts (ledgers + events frozen) yet a - // "ready" hot DB persists — the discard scan missed it. + // Chunk 0 fully served by cold artifacts (ledgers+events frozen, terminal index + // covers it) yet a "ready" hot DB persists — the discard scan missed it. freezeChunkArtifacts(t, cat, 0, KindLedgers, KindEvents) freezeChunkArtifacts(t, cat, 1, KindLedgers, KindEvents) + freezeIndex(t, cat, 0, 0, 1) readyHot(t, cat, 0) report, err := cat.Audit(AuditOptions{}) @@ -155,11 +241,12 @@ func TestAudit_INV2_OrphanHotForFullyServedChunk(t *testing.T) { } func TestAudit_INV2_TransientHotIsTolerated(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := testCatalogCPI(t, 2) require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) - freezeChunkArtifacts(t, cat, 0, KindLedgers) - freezeChunkArtifacts(t, cat, 1, KindLedgers) + freezeChunkArtifacts(t, cat, 0, KindLedgers, KindEvents) + freezeChunkArtifacts(t, cat, 1, KindLedgers, KindEvents) + freezeIndex(t, cat, 0, 0, 1) // A "transient" hot key for the same fully-served chunk is the tolerated // in-flight bracket — NOT an orphan, and its missing dir is NOT a dangling key. require.NoError(t, cat.PutHotTransient(0)) @@ -172,12 +259,31 @@ func TestAudit_INV2_TransientHotIsTolerated(t *testing.T) { "transient hot key with no dir must be tolerated by INV-3: %v", report.Violations) } +func TestAudit_INV2_TxhashKeyInFinalizedWindow(t *testing.T) { + cat, _ := testCatalogCPI(t, 2) // window 0 = {0,1} + require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) + + freezeChunkArtifacts(t, cat, 0, KindLedgers, KindEvents) + freezeChunkArtifacts(t, cat, 1, KindLedgers, KindEvents) + freezeIndex(t, cat, 0, 0, 1) // terminal -> window finalized + // A per-chunk txhash key left behind in the finalized window (finalization + // demotion did not complete). + require.NoError(t, cat.MarkChunkFreezing(0, KindTxHash)) + writeArtifact(t, cat.layout.TxHashBinPath(0)) + require.NoError(t, cat.FlipChunkFrozen(0, KindTxHash)) + + report, err := cat.Audit(AuditOptions{}) + require.NoError(t, err) + require.True(t, hasViolation(report, InvSingleCanonicalState, chunkKey(0, KindTxHash)), + "expected INV-2 leftover-txhash violation: %v", report.Violations) +} + // --------------------------------------------------------------------------- // INV-3 — disk matches meta-store, both directions. // --------------------------------------------------------------------------- func TestAudit_INV3_OrphanFileNoKey(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := testCatalogCPI(t, 1000) require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) // A file on disk at chunk 9's ledgers path with NO meta key — orphan. @@ -196,13 +302,13 @@ func TestAudit_INV3_OrphanFileNoKey(t *testing.T) { } func TestAudit_INV3_DuplicateArtifactIsOrphan(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := testCatalogCPI(t, 1000) require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) - // Chunk 0 ledgers frozen (one legit .pack). A stray SECOND file the meta store - // does not name (in the same bucket dir) is a duplicate -> orphan. - freezeChunkArtifacts(t, cat, 0, KindLedgers) - dupe := filepath.Join(filepath.Dir(cat.layout.LedgerPackPath(0)), "00000000.dupe") + // Chunk 0 events frozen (three legit files). A stray FOURTH events file the + // meta store does not name is a duplicate -> orphan. + freezeChunkArtifacts(t, cat, 0, KindEvents) + dupe := filepath.Join(filepath.Dir(cat.layout.EventsPaths(0)[0]), "00000000-events.dupe") writeArtifact(t, dupe) report, err := cat.Audit(AuditOptions{}) @@ -217,7 +323,7 @@ func TestAudit_INV3_DuplicateArtifactIsOrphan(t *testing.T) { } func TestAudit_INV3_DanglingKeyNoFile(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := testCatalogCPI(t, 1000) require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) // A "frozen" ledgers key for chunk 2 but no file on disk — dangling key. @@ -231,7 +337,7 @@ func TestAudit_INV3_DanglingKeyNoFile(t *testing.T) { } func TestAudit_INV3_PruningKeyNoFileIsTolerated(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := testCatalogCPI(t, 1000) require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) // A "pruning" key whose file the sweep already unlinked (before deleting the @@ -246,7 +352,7 @@ func TestAudit_INV3_PruningKeyNoFileIsTolerated(t *testing.T) { } func TestAudit_INV3_OrphanHotDir(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := testCatalogCPI(t, 1000) require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) // A hot DB directory on disk for chunk 4 with no hot:chunk key — orphan tier. @@ -268,14 +374,14 @@ func TestAudit_INV3_OrphanHotDir(t *testing.T) { // --------------------------------------------------------------------------- func TestAudit_INV4_ChunkBelowFloor(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := testCatalogCPI(t, 1000) // Pin earliest_ledger to chunk 5's first ledger -> floor is chunk 5's first // ledger, so chunk 0..4 are wholly below the floor. require.NoError(t, cat.PutEarliestLedger(chunk.ID(5).FirstLedger())) // A frozen chunk 1 below the floor (its files exist so INV-3 is clean) — but // it's below floor, so INV-4 fires. - freezeChunkArtifacts(t, cat, 1, KindLedgers) + freezeChunkArtifacts(t, cat, 1, KindLedgers, KindEvents, KindTxHash) report, err := cat.Audit(AuditOptions{}) require.NoError(t, err) @@ -284,12 +390,12 @@ func TestAudit_INV4_ChunkBelowFloor(t *testing.T) { } func TestAudit_INV4_StraddlingFloorNotFlagged(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := testCatalogCPI(t, 1000) // earliest at chunk 0 first ledger + 1 (mid chunk 0). floor = // effectiveRetentionFloor with earliest just above genesis; chunk 0's last // ledger is ABOVE that, so chunk 0 straddles and must NOT be flagged. require.NoError(t, cat.PutEarliestLedger(chunk.ID(0).FirstLedger()+1)) - freezeChunkArtifacts(t, cat, 0, KindLedgers) + freezeChunkArtifacts(t, cat, 0, KindLedgers, KindEvents, KindTxHash) report, err := cat.Audit(AuditOptions{}) require.NoError(t, err) @@ -297,6 +403,51 @@ func TestAudit_INV4_StraddlingFloorNotFlagged(t *testing.T) { "a chunk straddling the floor must not be an INV-4 violation: %v", report.Violations) } +// TestAudit_INV4_StraddlingIndexCoverageNotFlagged is the index-key carve-out +// (item R2-7): a frozen index coverage [lo, hi] whose WINDOW straddles the floor +// keeps the stale lo it was built with — so its coverage reaches BELOW the floor. +// That below-floor portion is never served (reader contract rule 2), and the +// key/file are swept only once the WHOLE window falls below the floor. So a +// straddling .idx (hi at/above the floor) must NOT be an INV-4 violation, while a +// genuinely-below-floor index key (hi wholly below) still IS. +func TestAudit_INV4_StraddlingIndexCoverageNotFlagged(t *testing.T) { + cat, _ := testCatalogCPI(t, 4) // window 0 = chunks [0,1,2,3] + // Floor at chunk 2's first ledger: chunks 0..1 are below it, chunks 2..3 at/above. + require.NoError(t, cat.PutEarliestLedger(chunk.ID(2).FirstLedger())) + + // The window's single frozen coverage was built with a STALE lo that reaches + // below the floor: [1,3] straddles (lo=1 below the floor; hi=3 above). The + // window straddles the floor, so this legitimate stale-lo .idx must NOT be + // flagged — its below-floor tail is masked by the reader retention contract, + // and the key/file are swept only once the whole window falls below the floor. + freezeCoverage(t, cat, 0, 1, 3) + + report, err := cat.Audit(AuditOptions{}) + require.NoError(t, err) + require.Equal(t, 0, countInvariant(report, InvRetentionBound), + "a straddling index coverage (hi above the floor) must not be an INV-4 violation: %v", report.Violations) +} + +// TestAudit_INV4_IndexCoverageWhollyBelowFloorFlagged is the other half of the +// carve-out: an index coverage whose HIGHEST chunk is wholly below the floor +// (the whole window has aged out) is a genuine stray key — pruning failed past +// the floor — and MUST be flagged. +func TestAudit_INV4_IndexCoverageWhollyBelowFloorFlagged(t *testing.T) { + cat, _ := testCatalogCPI(t, 2) // window 0 = chunks [0,1] + // Floor at chunk 4's first ledger: window 0 (chunks [0,1]) is wholly below it. + require.NoError(t, cat.PutEarliestLedger(chunk.ID(4).FirstLedger())) + + // A frozen window-0 coverage [0,1] whose hi=1 is wholly below the floor. + cov, err := cat.MarkIndexFreezing(0, 0, 1) + require.NoError(t, err) + require.NoError(t, cat.store.Put(cov.Key, string(StateFrozen))) + + report, err := cat.Audit(AuditOptions{}) + require.NoError(t, err) + require.True(t, hasViolation(report, InvRetentionBound, cov.Key), + "an index coverage wholly below the floor must be an INV-4 violation: %v", report.Violations) +} + // --------------------------------------------------------------------------- // INV-1 — deep mode. // --------------------------------------------------------------------------- @@ -320,7 +471,7 @@ func (f *fakeDeriver) DeriveArtifact(c chunk.ID, kind Kind) ([]byte, bool, error } func TestAudit_INV1_DeepByteMatchClean(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := testCatalogCPI(t, 1000) require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) freezeChunkArtifacts(t, cat, 0, KindLedgers) // writeArtifact writes "artifact"; deriver returns the same bytes -> match. @@ -333,7 +484,7 @@ func TestAudit_INV1_DeepByteMatchClean(t *testing.T) { } func TestAudit_INV1_DeepByteMismatch(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := testCatalogCPI(t, 1000) require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) freezeChunkArtifacts(t, cat, 0, KindLedgers) dv := &fakeDeriver{bytesFor: map[string][]byte{chunkKey(0, KindLedgers): []byte("DIFFERENT")}} @@ -345,7 +496,7 @@ func TestAudit_INV1_DeepByteMismatch(t *testing.T) { } func TestAudit_INV1_DeclinedSampleNotChecked(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := testCatalogCPI(t, 1000) require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) freezeChunkArtifacts(t, cat, 0, KindLedgers) dv := &fakeDeriver{declined: map[string]bool{chunkKey(0, KindLedgers): true}} @@ -357,7 +508,7 @@ func TestAudit_INV1_DeclinedSampleNotChecked(t *testing.T) { } func TestAudit_INV1_DeriverErrorSurfaces(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := testCatalogCPI(t, 1000) require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) freezeChunkArtifacts(t, cat, 0, KindLedgers) dv := &fakeDeriver{err: errors.New("backend down")} @@ -368,7 +519,7 @@ func TestAudit_INV1_DeriverErrorSurfaces(t *testing.T) { } func TestAudit_INV1_NoDeriverSkipsDeep(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := testCatalogCPI(t, 1000) require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) freezeChunkArtifacts(t, cat, 0, KindLedgers) diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/backfill_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/backfill_test.go index 65d3e2155..6266f94bb 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/backfill_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/backfill_test.go @@ -16,9 +16,9 @@ import ( // --------------------------------------------------------------------------- func TestRunBackfill_ResolvesThenExecutes(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := smallWindowCatalog(t, 4) - var chunksRun atomic.Int32 + var chunksRun, indexRun atomic.Int32 cfg := ExecConfig{ Catalog: cat, Logger: silentLogger(), Workers: 2, Process: ProcessConfig{Backend: zeroTxBackend(t)}, @@ -26,11 +26,17 @@ func TestRunBackfill_ResolvesThenExecutes(t *testing.T) { chunksRun.Add(1) return nil }, + runIndex: func(context.Context, IndexBuild, ExecConfig) error { + indexRun.Add(1) + return nil + }, } - // Fresh catalog, range [0,3]: resolve schedules 4 chunk builds. + // Fresh catalog, range [0,3] (window 0): resolve schedules 4 chunk builds + + // 1 terminal index build. require.NoError(t, runBackfill(context.Background(), cfg, 0, 3)) require.Equal(t, int32(4), chunksRun.Load()) + require.Equal(t, int32(1), indexRun.Load()) } // No backend AND a genuine fall-through chunk (nothing local): the daemon still @@ -39,7 +45,7 @@ func TestRunBackfill_ResolvesThenExecutes(t *testing.T) { // REAL processChunk path runs (no runChunk seam), so backfillSource picks the // (3) bulk-backend branch, finds no backend, and aborts the plan. func TestRunBackfill_NoBackendNoLocalCopyFatals(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := smallWindowCatalog(t, 4) cfg := ExecConfig{ Catalog: cat, Logger: silentLogger(), Workers: 1, Process: ProcessConfig{HotProbe: &fakeHotProbe{}}, // not "ready", no backend @@ -51,7 +57,7 @@ func TestRunBackfill_NoBackendNoLocalCopyFatals(t *testing.T) { // An inverted range (younger-than-one-chunk network) backfills nothing. func TestRunBackfill_InvertedRangeIsNoop(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := smallWindowCatalog(t, 4) var ran int cfg := ExecConfig{ Catalog: cat, Logger: silentLogger(), Workers: 1, diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/catalog.go b/cmd/stellar-rpc/internal/fullhistory/streaming/catalog.go index 7d58903ba..023e18303 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/catalog.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/catalog.go @@ -15,32 +15,37 @@ import ( // metastore.Store — the merged RocksDB KV store with sync Put/Delete, atomic // Batch, and PrefixScan — and never reaches around it to RocksDB directly. The // catalog adds: the key schema and its bijection to disk paths (keys.go, -// paths.go), the one-write protocol (protocol.go), and the key-driven sweeps -// (sweep.go). +// paths.go), window arithmetic (window.go), the one-write protocol +// (protocol.go), and the key-driven sweeps (sweep.go). // // Every method here is a pure function of meta-store keys plus the on-disk // layout. The catalog stays a *pure* catalog — every key names a file/dir // state or a config pin; progress is derived, never stored (see the data // model's "Progress is derived, never stored"). type Catalog struct { - store *metastore.Store - layout Layout + store *metastore.Store + layout Layout + windows Windows // hooks are test-only fault-injection points (see hooks.go); every field // is nil in production, making each call site a no-op nil-check. hooks crashHooks } -// NewCatalog binds a catalog to an open metastore.Store and the on-disk layout. -// The store is owned by the caller (the catalog does not close it) so a single -// Store can back both the catalog and any other consumer in the process. -func NewCatalog(store *metastore.Store, layout Layout) *Catalog { - return &Catalog{store: store, layout: layout} +// NewCatalog binds a catalog to an open metastore.Store, the on-disk layout, +// and the window arithmetic. The store is owned by the caller (the catalog +// does not close it) so a single Store can back both the catalog and any other +// consumer in the process. +func NewCatalog(store *metastore.Store, layout Layout, windows Windows) *Catalog { + return &Catalog{store: store, layout: layout, windows: windows} } // Layout returns the path layout bound to this catalog. func (c *Catalog) Layout() Layout { return c.layout } +// Windows returns the window arithmetic bound to this catalog. +func (c *Catalog) Windows() Windows { return c.windows } + // --------------------------------------------------------------------------- // Raw key access. Get/Has are the value-blind primitives the rest build on. // --------------------------------------------------------------------------- @@ -129,6 +134,50 @@ func (c *Catalog) ReadyHotChunkKeys() ([]chunk.ID, error) { return c.hotChunkKeysWith(func(s HotState) bool { return s == HotReady }) } +// IndexKeys returns every coverage key under window w with its State, sorted by +// key. Used to enumerate a window's coverages (the frozen one plus transient +// debris). +func (c *Catalog) IndexKeys(w WindowID) ([]IndexCoverage, error) { + return c.indexKeysPrefix(indexWindowPrefix(w)) +} + +// AllIndexKeys returns every coverage key across all windows with its State, +// sorted by key. +func (c *Catalog) AllIndexKeys() ([]IndexCoverage, error) { + return c.indexKeysPrefix(indexPrefix) +} + +// FrozenCoverage returns the window's UNIQUE "frozen" coverage, or ok=false if +// the window has none yet. It asserts the uniqueness invariant — at most one +// coverage per window is "frozen" at any moment (INV-2) — by erroring if it +// observes two. More than one frozen key in a window is a detectable bug, not +// a tie-break to resolve: readers resolve "the window's index" as exactly this +// key. +func (c *Catalog) FrozenCoverage(w WindowID) (IndexCoverage, bool, error) { + covs, err := c.IndexKeys(w) + if err != nil { + return IndexCoverage{}, false, err + } + var ( + frozen IndexCoverage + found bool + ) + for _, candidate := range covs { + if candidate.State != StateFrozen { + continue + } + if found { + return IndexCoverage{}, false, fmt.Errorf( + "streaming: window %s has two frozen coverages (%s and %s) — "+ + "uniqueness invariant violated", + w, frozen.Key, candidate.Key, + ) + } + frozen, found = candidate, true + } + return frozen, found, nil +} + // --------------------------------------------------------------------------- // Config pins. Written once on first start, immutable thereafter. // --------------------------------------------------------------------------- @@ -139,6 +188,12 @@ func (c *Catalog) EarliestLedger() (uint32, bool, error) { return c.uint32Pin(configEarliestLedger) } +// ChunksPerTxhashIndex returns the pinned config:chunks_per_txhash_index. ok +// is false if the pin has not been written yet. +func (c *Catalog) ChunksPerTxhashIndex() (uint32, bool, error) { + return c.uint32Pin(configChunksPerTxhashIdx) +} + // PutEarliestLedger writes the config:earliest_ledger pin (decimal string). // The immutability check (abort if a later value differs) is the caller's // validateConfig responsibility, not the catalog's. @@ -146,13 +201,21 @@ func (c *Catalog) PutEarliestLedger(ledger uint32) error { return c.store.Put(configEarliestLedger, strconv.FormatUint(uint64(ledger), 10)) } -// PinLayout commits the layout pin (config:earliest_ledger) in ONE atomic -// synced batch — the first-start commit the design's validateConfig mandates. -// Its presence ⟹ a prior first start completed and the layout is immutable; -// otherwise startup never got past config validation and re-validating + -// re-pinning is safe. -func (c *Catalog) PinLayout(earliestLedger uint32) error { +// PutChunksPerTxhashIndex writes the config:chunks_per_txhash_index pin. +func (c *Catalog) PutChunksPerTxhashIndex(n uint32) error { + return c.store.Put(configChunksPerTxhashIdx, strconv.FormatUint(uint64(n), 10)) +} + +// PinLayout commits BOTH layout pins (config:chunks_per_txhash_index and +// config:earliest_ledger) in ONE atomic synced batch — the first-start commit +// the design's validateConfig mandates. Committing them together is what makes +// the all-or-nothing invariant hold: BOTH present ⟹ a prior first start +// completed and the layout is immutable; otherwise startup never got past +// config validation and re-validating + re-pinning is safe. A torn write that +// pinned only one would break that invariant, so the two MUST share a batch. +func (c *Catalog) PinLayout(chunksPerTxhashIndex, earliestLedger uint32) error { return c.store.Batch(func(w *metastore.BatchWriter) error { + w.Put(configChunksPerTxhashIdx, strconv.FormatUint(uint64(chunksPerTxhashIndex), 10)) w.Put(configEarliestLedger, strconv.FormatUint(uint64(earliestLedger), 10)) return nil }) @@ -200,6 +263,24 @@ func (c *Catalog) hotChunkKeysWith(keep func(HotState) bool) ([]chunk.ID, error) return ids, nil } +// indexKeysPrefix scans coverage keys under prefix, parsing each name and +// attaching its scanned lifecycle value as State. +func (c *Catalog) indexKeysPrefix(prefix string) ([]IndexCoverage, error) { + var covs []IndexCoverage + for e, err := range c.store.PrefixScan(prefix) { + if err != nil { + return nil, err + } + cov, ok := parseIndexKey(e.Key) + if !ok { + return nil, fmt.Errorf("streaming: malformed index key %q", e.Key) + } + cov.State = State(e.Value) + covs = append(covs, cov) + } + return covs, nil +} + // uint32Pin reads a config pin as a uint32 decimal string. func (c *Catalog) uint32Pin(key string) (uint32, bool, error) { v, ok, err := c.Get(key) diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/catalog_protocol.go b/cmd/stellar-rpc/internal/fullhistory/streaming/catalog_protocol.go index 6a8051735..ad207b6c0 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/catalog_protocol.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/catalog_protocol.go @@ -7,20 +7,28 @@ import ( "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/metastore" ) -// The one write protocol — mark-then-write. Every durable per-chunk artifact -// flows through here: +// errCommitBatchFaultInjected is returned only by the test-only +// failCommitBatch hook (hooks.go) to force CommitIndex's batch to be dropped. +// It never surfaces in production, where the hook is nil. +var errCommitBatchFaultInjected = errors.New("streaming: commit batch fault-injected (test only)") + +// The one write protocol — mark-then-write. Every durable artifact (per-chunk +// file or index coverage) flows through here: // // 1. Put the key "freezing" via metastore BEFORE any I/O. // 2. The caller writes the file. // 3. The caller fsyncs the FILE + its PARENT dirent (+ the GRANDPARENT dirent // when the parent dir was just created) — barrierNewFile in paths.go. -// 4. Flip to "frozen": a single Put for per-chunk artifacts. +// 4. Flip to "frozen": a single Put for per-chunk artifacts, or one atomic +// Batch for the index (promote new coverage + demote predecessor + on a +// terminal build demote every in-window chunk:{c}:txhash key). // // The pre-mark gives "every file on disk has its meta key"; the dirent // barriers guarantee the key never outlives the file's creation; the frozen // flip is the only transition readers trust. The catalog owns steps 1 and 4 // (meta-store writes); the caller owns steps 2 and 3 (I/O), calling -// MarkChunkFreezing before and FlipChunkFrozen after. +// MarkChunkFreezing/MarkIndexFreezing before and FlipChunkFrozen/CommitIndex +// after. // MarkChunkFreezing puts every requested kind's key to "freezing" in one // atomic synced batch, BEFORE any file I/O. Re-marking a "freezing"/"pruning"/ @@ -53,6 +61,109 @@ func (c *Catalog) FlipChunkFrozen(chunkID chunk.ID, kinds ...Kind) error { }) } +// MarkIndexFreezing puts the coverage's key to "freezing" before any index +// I/O. It returns the IndexCoverage (with State set) the caller threads into +// CommitIndex. lo > hi panics (indexKey enforces it). +func (c *Catalog) MarkIndexFreezing(w WindowID, lo, hi chunk.ID) (IndexCoverage, error) { + cov := IndexCoverage{ + Window: w, + Lo: lo, + Hi: hi, + Key: indexKey(w, lo, hi), + State: StateFreezing, + } + if err := c.store.Put(cov.Key, string(StateFreezing)); err != nil { + return IndexCoverage{}, err + } + return cov, nil +} + +// CommitIndex is the index's frozen flip — the batch extension of the one +// write protocol and the ENTIRE finalization protocol. In one atomic synced +// batch it: +// +// - promotes cov ("freezing" -> "frozen"); +// - demotes the window's predecessor frozen coverage (if any) to "pruning"; +// - iff this build is terminal (cov.Hi == window's last chunk), demotes +// every chunk:{c}:txhash key in the window to "pruning". +// +// The batch only ever DEMOTES keys and unlinks nothing — file deletion is +// exclusively the sweeps' job. A crash before this lands leaves the +// predecessor frozen and cov as "freezing" debris; a crash after leaves cov +// frozen and the demoted keys as "pruning" sweep work. There is no instant +// with two frozen coverages, no live index unreachable, and no "frozen" +// chunk:c:txhash whose .bin was deleted. +// +// The caller MUST have fsynced the .idx file and its dir first. CommitIndex +// re-reads the predecessor inside the batch-composition phase from durable +// state, so it is safe to call after a crash without external bookkeeping. +func (c *Catalog) CommitIndex(cov IndexCoverage) error { + // Compose the demotions against durable state BEFORE opening the batch, so + // the batch body is a pure sequence of puts (the scans below read the same + // store the batch will write, but only keys this batch does not also + // write — the predecessor differs from cov, and the txhash keys are a + // different family). + prev, hasPrev, err := c.FrozenCoverage(cov.Window) + if err != nil { + return err + } + if hasPrev && prev.Key == cov.Key { + // The predecessor IS this coverage already frozen — a re-commit of an + // already-landed batch. Nothing to demote against itself; the promote + // below is an idempotent overwrite. + hasPrev = false + } + + terminal := c.windows.IsTerminalCoverage(cov) + var txhashKeys []string + if terminal { + txhashKeys, err = c.windowTxhashKeysPresent(cov.Window) + if err != nil { + return err + } + } + + return c.store.Batch(func(bw *metastore.BatchWriter) error { + bw.Put(cov.Key, string(StateFrozen)) + if hasPrev { + bw.Put(prev.Key, string(StatePruning)) + } + for _, k := range txhashKeys { + bw.Put(k, string(StatePruning)) + } + // Fault injection: returning an error here makes metastore drop the + // whole batch, so a test can assert none of the puts above became + // observable — the all-or-nothing property the protocol depends on. + if c.hooks.commitBatchShouldFail() { + return errCommitBatchFaultInjected + } + return nil + }) +} + +// windowTxhashKeysPresent returns the chunk:{c}:txhash keys that EXIST in the +// window [firstChunk, lastChunk], so the terminal commit demotes only present +// keys (matching the spec's cat.Has guard) rather than conjuring keys for +// chunks whose .bin was never produced. +func (c *Catalog) windowTxhashKeysPresent(w WindowID) ([]string, error) { + first := c.windows.FirstChunk(w) + last := c.windows.LastChunk(w) + var keys []string + for cid := first; cid <= last; cid++ { + ok, err := c.Has(chunkKey(cid, KindTxHash)) + if err != nil { + return nil, err + } + if ok { + keys = append(keys, chunkKey(cid, KindTxHash)) + } + if cid == last { // guard against chunk.ID wraparound at the top + break + } + } + return keys, nil +} + // --------------------------------------------------------------------------- // Hot-DB key bracket. The directory operation's two ends: PutHotTransient // before the dir is created (or before a discard rmdirs it), FlipHotReady diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/catalog_sweep.go b/cmd/stellar-rpc/internal/fullhistory/streaming/catalog_sweep.go index 75040cf6b..5bafa4e20 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/catalog_sweep.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/catalog_sweep.go @@ -4,13 +4,13 @@ import ( "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/metastore" ) -// Key-driven sweep — the ONLY deletion body in the system. Its ordering is -// load-bearing: +// Key-driven sweeps — the ONLY two deletion bodies in the system, one per key +// family. Both share the same ordering, which is load-bearing: // // demote-if-still-"frozen" (never unlink under a frozen key) // -> unlink file(s) // -> fsyncDir(parent) (the unlink becomes durable BEFORE the key goes) -// -> delete key (batched) +// -> delete key (batched per family) // // This gives the exit-side invariant "key absent => file gone": because the // key outlives the durable unlink, a crash anywhere leaves the key in place @@ -77,3 +77,38 @@ func (c *Catalog) SweepChunkArtifacts(refs []ArtifactRef) error { return nil }) } + +// SweepIndexKey deletes one index coverage's file and removes its key. A +// "frozen" coverage is demoted to "pruning" first (a crash mid-sweep must not +// leave a frozen key fileless); "freezing" debris (a crashed attempt — never +// salvaged) and "pruning" coverages (superseded or retention-demoted) take the +// same path from here. The key outlives the durable unlink, so a crash anywhere +// re-runs the sweep. +func (c *Catalog) SweepIndexKey(cov IndexCoverage) error { + if cov.State == StateFrozen { + // Never unlink under a "frozen" key. + if err := c.store.Put(cov.Key, string(StatePruning)); err != nil { + return err + } + } + // Between the demote and the unlink: the key must read "pruning", never + // "frozen". Dropping the demote above would leave it "frozen" here. + c.hooks.fireBeforeUnlink() + path := c.layout.IndexFilePath(cov) + if err := deleteFileIfExists(path); err != nil { + return err + } + dir := c.layout.IndexWindowDir(cov.Window) + if err := fsyncDir(dir); err != nil { // unlink durable BEFORE key delete + return err + } + // Between the durable unlink and the key delete: the file is gone but the + // key still exists. Reordering the delete ahead of the unlink would leave a + // fileless "frozen"/"pruning" coverage's file present here under no key. + c.hooks.fireBeforeKeyDelete() + if err := c.store.Delete(cov.Key); err != nil { + return err + } + rmdirIfEmpty(dir) // best-effort tidiness; an empty dir is not an artifact + return nil +} diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/config.go b/cmd/stellar-rpc/internal/fullhistory/streaming/config.go index 6bb2c28c4..16fed8ddc 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/config.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/config.go @@ -38,6 +38,10 @@ type ServiceConfig struct { // BackfillConfig is [backfill] plus the nested [backfill.bsb]. type BackfillConfig struct { + // ChunksPerTxhashIndex is chunks per tx-hash window — it defines the index + // layout and is immutable once stored. Default DefaultChunksPerTxhashIndex. + ChunksPerTxhashIndex *uint32 `toml:"chunks_per_txhash_index"` + // Workers is the concurrent task-slot count for bulk catch-up. Default // GOMAXPROCS. Must be >= 1. Workers *int `toml:"workers"` @@ -66,16 +70,21 @@ type BSBConfig struct { NumWorkers *int `toml:"num_workers"` } -// ImmutableStorageConfig is [immutable_storage.*] — one optional path per -// artifact tree. An empty path means "default under default_data_dir". +// ImmutableStorageConfig is [immutable_storage]: the single cold-tier root, plus +// an optional override for just the tx-hash index (the one read-hot artifact). type ImmutableStorageConfig struct { // Path is the single cold-tier root: every immutable artifact tree (ledger // .pack, events segments, tx-hash .bin/.idx) lives as a fixed subdirectory // beneath it. Empty ⇒ default_data_dir. One knob relocates the whole cold // tier to a separate (cheap/large/durable) volume; the per-data-type subdirs - // are not independently configurable. (Slice 3 adds an optional txhash-index - // override for the one artifact with a distinct read profile.) + // are not independently configurable. Path string `toml:"path"` + + // TxhashIndexPath optionally relocates JUST the per-window tx-hash .idx off + // the cold tier — the one immutable artifact with a read-hot profile + // (getTransaction lookups), so it can sit on faster storage. Empty ⇒ + // {cold}/txhash/index. + TxhashIndexPath string `toml:"txhash_index_path"` } // StoragePathConfig is one [immutable_storage.*] / [catalog] / [hot_storage] @@ -117,11 +126,13 @@ type LoggingConfig struct { Format string `toml:"format"` } -// Documented defaults (design "Configuration"). +// Documented defaults (design "Configuration"). DefaultChunksPerTxhashIndex +// matches the design's 1000 (= 10M ledgers per window). const ( - DefaultMaxRetries int = 3 - DefaultBSBBufferSize int = 1000 - DefaultBSBNumWorkers int = 20 + DefaultChunksPerTxhashIndex uint32 = 1000 + DefaultMaxRetries int = 3 + DefaultBSBBufferSize int = 1000 + DefaultBSBNumWorkers int = 20 DefaultEarliestLedger = "genesis" DefaultLogLevel = "info" @@ -151,9 +162,9 @@ func LoadConfig(path string) (Config, error) { // Decoding is STRICT (Decoder.Strict(true)): any key in the document with no // corresponding struct field is an error rather than silently ignored. This is // what backs the LoadConfig docstring's "unknown keys are rejected" promise — a -// typo in an immutable, layout-defining key (earliest_ledger) must fail loudly, -// not silently fall back to a default and pin the wrong value on first start. -// go-toml v1's plain Unmarshal ignores +// typo in an immutable, layout-defining key (chunks_per_txhash_index, +// earliest_ledger) must fail loudly, not silently fall back to a default and +// pin the wrong value on first start. go-toml v1's plain Unmarshal ignores // unknown keys (it mirrors the encoding/json decoder), so strict decoding is // required here. func ParseConfig(data []byte) (Config, error) { @@ -167,8 +178,12 @@ func ParseConfig(data []byte) (Config, error) { // WithDefaults returns a copy of cfg with every documented default filled for // an unset (nil pointer / empty string) field. Numeric pointers left nil are // resolved to their defaults; explicit zeros are preserved (and later rejected -// by validateConfig where a zero is illegal, e.g. workers). +// by validateConfig where a zero is illegal, e.g. chunks_per_txhash_index). func (cfg Config) WithDefaults() Config { + if cfg.Backfill.ChunksPerTxhashIndex == nil { + v := DefaultChunksPerTxhashIndex + cfg.Backfill.ChunksPerTxhashIndex = &v + } if cfg.Backfill.Workers == nil { v := runtime.GOMAXPROCS(0) cfg.Backfill.Workers = &v @@ -206,10 +221,11 @@ func (cfg Config) WithDefaults() Config { // place the {default_data_dir}/... layout lives, so locking and store-opening // agree on every root. type Paths struct { - DataDir string // default_data_dir (the data root) - Catalog string // catalog RocksDB dir - Cold string // immutable cold-tier root (ledgers/events/txhash subdirs) - HotStorage string // per-chunk hot RocksDB root + DataDir string // default_data_dir (the data root) + Catalog string // catalog RocksDB dir + Cold string // immutable cold-tier root (ledgers/events/txhash subdirs) + TxhashIndex string // frozen txhash .idx root (defaults under Cold; separately overridable) + HotStorage string // per-chunk hot RocksDB root } // ResolvePaths fills every storage path, defaulting under default_data_dir per @@ -224,11 +240,13 @@ func (cfg Config) ResolvePaths() Paths { } return def } + cold := pick(cfg.ImmutableStorage.Path, dataDir) return Paths{ - DataDir: dataDir, - Catalog: pick(cfg.Catalog.Path, filepath.Join(dataDir, "catalog", "rocksdb")), - Cold: pick(cfg.ImmutableStorage.Path, dataDir), - HotStorage: pick(cfg.Streaming.HotStorage.Path, filepath.Join(dataDir, "hot")), + DataDir: dataDir, + Catalog: pick(cfg.Catalog.Path, filepath.Join(dataDir, "catalog", "rocksdb")), + Cold: cold, + TxhashIndex: pick(cfg.ImmutableStorage.TxhashIndexPath, filepath.Join(cold, "txhash", "index")), + HotStorage: pick(cfg.Streaming.HotStorage.Path, filepath.Join(dataDir, "hot")), } } @@ -243,6 +261,7 @@ func (p Paths) LockRoots() []string { return []string{ p.Catalog, p.Cold, + p.TxhashIndex, p.HotStorage, } } diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/config_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/config_test.go index 3a8ddd12f..756f62e54 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/config_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/config_test.go @@ -17,6 +17,7 @@ const fullValidConfig = ` default_data_dir = "/var/lib/fullhistory" [backfill] +chunks_per_txhash_index = 500 workers = 8 max_retries = 5 @@ -27,6 +28,7 @@ num_workers = 40 [immutable_storage] path = "/mnt/cold" +txhash_index_path = "/mnt/txidx" [catalog] path = "/mnt/catalog" @@ -61,12 +63,14 @@ func TestParseConfig_FullDocument(t *testing.T) { require.NoError(t, err) assert.Equal(t, "/var/lib/fullhistory", cfg.Service.DefaultDataDir) + assert.Equal(t, uint32(500), *cfg.Backfill.ChunksPerTxhashIndex) assert.Equal(t, 8, *cfg.Backfill.Workers) assert.Equal(t, 5, *cfg.Backfill.MaxRetries) assert.Equal(t, "my-bucket/ledgers", cfg.Backfill.BSB.BucketPath) assert.Equal(t, 2000, *cfg.Backfill.BSB.BufferSize) assert.Equal(t, 40, *cfg.Backfill.BSB.NumWorkers) assert.Equal(t, "/mnt/cold", cfg.ImmutableStorage.Path) + assert.Equal(t, "/mnt/txidx", cfg.ImmutableStorage.TxhashIndexPath) assert.Equal(t, "/mnt/catalog", cfg.Catalog.Path) assert.Equal(t, uint32(100), *cfg.Streaming.RetentionChunks) assert.Equal(t, "now", cfg.Streaming.EarliestLedger) @@ -86,6 +90,7 @@ func TestParseConfig_MinimalAppliesDefaults(t *testing.T) { assert.Equal(t, "/etc/cc.toml", cfg.Streaming.CaptiveCoreConfig) // Documented defaults filled. + assert.Equal(t, DefaultChunksPerTxhashIndex, *cfg.Backfill.ChunksPerTxhashIndex) assert.Equal(t, runtime.GOMAXPROCS(0), *cfg.Backfill.Workers) assert.Equal(t, DefaultMaxRetries, *cfg.Backfill.MaxRetries) assert.Equal(t, DefaultBSBBufferSize, *cfg.Backfill.BSB.BufferSize) @@ -98,12 +103,13 @@ func TestParseConfig_MinimalAppliesDefaults(t *testing.T) { func TestParseConfig_ExplicitZeroPreserved(t *testing.T) { // An explicit zero must NOT be overwritten by the default — validateConfig - // is what rejects an illegal zero (e.g. workers), so the defaulting layer - // must preserve it for that rejection to fire. + // is what rejects an illegal zero (e.g. chunks_per_txhash_index), so the + // defaulting layer must preserve it for that rejection to fire. const cfgText = ` [service] default_data_dir = "/d" [backfill] +chunks_per_txhash_index = 0 workers = 0 max_retries = 0 [streaming] @@ -111,6 +117,7 @@ captive_core_config = "/cc" ` cfg, err := ParseConfig([]byte(cfgText)) require.NoError(t, err) + assert.Equal(t, uint32(0), *cfg.Backfill.ChunksPerTxhashIndex) assert.Equal(t, 0, *cfg.Backfill.Workers) assert.Equal(t, 0, *cfg.Backfill.MaxRetries) } @@ -120,22 +127,22 @@ func TestParseConfig_Malformed(t *testing.T) { require.Error(t, err) } -// A typo'd key must be REJECTED, not silently dropped to a default. The -// layout-defining key (earliest_ledger) is pinned immutably on first start, so -// a silent fallback would permanently pin the wrong value. Strict decoding -// catches the typo before any pin is written. +// A typo'd key must be REJECTED, not silently dropped to a default. The two +// layout-defining keys (chunks_per_txhash_index, earliest_ledger) are pinned +// immutably on first start, so a silent fallback would permanently pin the +// wrong value. Strict decoding catches the typo before any pin is written. func TestParseConfig_RejectsUnknownKeys(t *testing.T) { tests := []struct { name string text string }{ { - name: "typo'd workers", + name: "typo'd chunks_per_txhash_index", text: ` [service] default_data_dir = "/d" [backfill] -workrs = 7 +chunks_per_txhash_indx = 7 [streaming] captive_core_config = "/cc" `, @@ -201,6 +208,7 @@ func TestResolvePaths_DefaultsUnderDataDir(t *testing.T) { assert.Equal(t, "/data", p.DataDir) assert.Equal(t, filepath.Join("/data", "catalog", "rocksdb"), p.Catalog) assert.Equal(t, "/data", p.Cold, "the cold root defaults to the data dir") + assert.Equal(t, filepath.Join("/data", "txhash", "index"), p.TxhashIndex, "the index defaults under the cold tier") assert.Equal(t, filepath.Join("/data", "hot"), p.HotStorage) } @@ -211,6 +219,7 @@ func TestResolvePaths_OverridesWin(t *testing.T) { assert.Equal(t, "/mnt/catalog", p.Catalog) assert.Equal(t, "/mnt/cold", p.Cold) + assert.Equal(t, "/mnt/txidx", p.TxhashIndex) assert.Equal(t, "/mnt/hot", p.HotStorage) } @@ -218,9 +227,10 @@ func TestLockRoots_AllDistinctRoots(t *testing.T) { cfg, err := ParseConfig([]byte(minimalValidConfig)) require.NoError(t, err) roots := cfg.ResolvePaths().LockRoots() - // Meta store + cold-tier root + hot storage = three roots. - require.Len(t, roots, 3) + // Meta store + cold-tier root + tx-hash index + hot storage = four roots. + require.Len(t, roots, 4) assert.Contains(t, roots, filepath.Join("/data", "catalog", "rocksdb")) assert.Contains(t, roots, "/data", "the cold-tier root (defaulting to the data dir) is locked") + assert.Contains(t, roots, filepath.Join("/data", "txhash", "index"), "the tx-hash index root is locked") assert.Contains(t, roots, filepath.Join("/data", "hot")) } diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/config_validate.go b/cmd/stellar-rpc/internal/fullhistory/streaming/config_validate.go index 3f7dd9ae8..eac3d806d 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/config_validate.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/config_validate.go @@ -14,19 +14,22 @@ import ( // validateConfig pseudocode), run BEFORE startStreaming. It does three things, // in order: // -// 1. Stateless form validation — workers >= 1, max_retries >= 0, and +// 1. Stateless form validation — chunks_per_txhash_index in +// [1, MaxChunksPerTxhashIndex], workers >= 1, max_retries >= 0, and // earliest_ledger a well-formed "genesis" | "now" | chunk-aligned numeric. // Validating the full static form here keeps every later parse well-formed. // -// 2. Restart vs first start — the layout pin (config:earliest_ledger) is -// committed on first start. Present ⟹ a prior first start completed and the -// layout is immutable: confirm earliest_ledger is unchanged — with the +// 2. Restart vs first start — the two layout pins +// (config:chunks_per_txhash_index, config:earliest_ledger) are committed +// ATOMICALLY on first start, so they exist all-or-nothing. BOTH present ⟹ a +// prior first start completed and the layout is immutable: confirm cpi is +// unchanged (abort on mismatch) and earliest_ledger is unchanged — with the // "now"-on-restart no-op rule (a frontfill deployment keeps "now" in its // config across restarts and must not abort). // // 3. First start — resolve earliest_ledger (genesis needs no tip; "now" and a // numeric floor each require a reachable, ready backend through the SAME -// injected NetworkTipBackend startStreaming uses), then commit the pin in +// injected NetworkTipBackend startStreaming uses), then commit BOTH pins in // one atomic synced batch via the Catalog. // // It returns the RESOLVED earliest ledger (chunk-aligned, >= genesis) the caller @@ -45,10 +48,16 @@ func validateConfig( return 0, errors.New("streaming: validateConfig requires a non-nil Catalog") } + cpi := derefU32(cfg.Backfill.ChunksPerTxhashIndex) workers := derefInt(cfg.Backfill.Workers) maxRetries := derefInt(cfg.Backfill.MaxRetries) // --- 1. Stateless form validation. --- + if cpi == 0 || cpi > MaxChunksPerTxhashIndex { + return 0, fmt.Errorf("streaming: chunks_per_txhash_index must be in [1, %d] "+ + "(it defines the index layout, immutable once stored); got %d", + MaxChunksPerTxhashIndex, cpi) + } if workers < 1 { return 0, fmt.Errorf("streaming: workers must be >= 1 (got %d) — a zero pool deadlocks executePlan", workers) } @@ -62,14 +71,23 @@ func validateConfig( return 0, err } - // --- 2/3. Pin inspection. --- + // --- 2/3. Pin inspection. The two pins are written together (PinLayout's + // atomic batch), so they are present all-or-nothing. --- + cpiStored, cpiPinned, err := cat.ChunksPerTxhashIndex() + if err != nil { + return 0, fmt.Errorf("streaming: read chunks_per_txhash_index pin: %w", err) + } earliestStored, earliestPinned, err := cat.EarliestLedger() if err != nil { return 0, fmt.Errorf("streaming: read earliest_ledger pin: %w", err) } - if earliestPinned { //nolint:nestif // first-start vs restart immutability branch + if cpiPinned && earliestPinned { //nolint:nestif // first-start vs restart immutability branch // --- 2. Restart: the layout is committed — confirm nothing changed. --- + if cpiStored != cpi { + return 0, fmt.Errorf("streaming: chunks_per_txhash_index changed: stored=%d, config=%d "+ + "(the index layout is immutable once stored)", cpiStored, cpi) + } // earliest_ledger immutability. The backend tip is NOT re-sampled — it // may lag below the pinned floor and the catch-up loop's // max(tip, lastCommitted) handles that. A genesis/numeric value must @@ -92,13 +110,13 @@ func validateConfig( } // --- 3. First start (or an incomplete prior start — no artifacts yet). --- - // Resolve earliest_ledger, then commit the layout pin in one atomic batch. + // Resolve earliest_ledger, then commit BOTH layout pins in one atomic batch. earliest, err := resolveEarliestFirstStart(ctx, cfg.Streaming.EarliestLedger, tip, tipBackoff, tipMaxAttempts) if err != nil { return 0, err } - if err := cat.PinLayout(earliest); err != nil { - return 0, fmt.Errorf("streaming: pin layout (earliest=%d): %w", earliest, err) + if err := cat.PinLayout(cpi, earliest); err != nil { + return 0, fmt.Errorf("streaming: pin layout (cpi=%d, earliest=%d): %w", cpi, earliest, err) } return earliest, nil } diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/config_validate_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/config_validate_test.go index e99092f25..a62d23bcb 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/config_validate_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/config_validate_test.go @@ -13,12 +13,12 @@ import ( "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" ) -// validCfg builds a documented-valid Config with the validateConfig-relevant -// knobs set; callers mutate one field to drive a rejection case. -func validCfg(workers, maxRetries int, earliest string) Config { +// validCfg builds a documented-valid Config with the four validateConfig- +// relevant knobs set; callers mutate one field to drive a rejection case. +func validCfg(cpi uint32, workers, maxRetries int, earliest string) Config { return Config{ Service: ServiceConfig{DefaultDataDir: "/data"}, - Backfill: BackfillConfig{Workers: &workers, MaxRetries: &maxRetries}, + Backfill: BackfillConfig{ChunksPerTxhashIndex: &cpi, Workers: &workers, MaxRetries: &maxRetries}, Streaming: StreamingConfig{EarliestLedger: earliest, CaptiveCoreConfig: "/cc"}, } } @@ -38,14 +38,20 @@ func callValidate(t *testing.T, cfg Config, cat *Catalog, tip NetworkTipBackend) return validateConfig(context.Background(), cfg, cat, tip, time.Millisecond, 3) } -// requireEarliestPin reads the layout pin straight back from the live metastore -// and asserts it equals the expected value. Used right after a first-start or a -// restart call so a metastore read-visibility anomaly surfaces LOUDLY here as a -// direct "pin readback missed" failure. Also the anchor for the -// restart-mutates-nothing assertions: a successful restart must leave the pin -// byte-identical. -func requireEarliestPin(t *testing.T, cat *Catalog, wantEarliest uint32) { +// requirePins reads both layout pins straight back from the live metastore and +// asserts they equal the expected values. Used right after a first-start or a +// restart call so a metastore read-visibility anomaly (the suspected source of +// the intermittent restart-immutability flake) surfaces LOUDLY here as a direct +// "pin readback missed" failure, rather than downstream as a confusing nil +// error from a later validateConfig. Also the anchor for the restart-mutates- +// nothing assertions: a successful restart must leave both pins byte-identical. +func requirePins(t *testing.T, cat *Catalog, wantCPI, wantEarliest uint32) { t.Helper() + cpi, ok, err := cat.ChunksPerTxhashIndex() + require.NoError(t, err, "readback of chunks_per_txhash_index pin") + require.True(t, ok, "chunks_per_txhash_index pin must be present after validateConfig") + require.Equal(t, wantCPI, cpi, "chunks_per_txhash_index pin readback") + el, ok, err := cat.EarliestLedger() require.NoError(t, err, "readback of earliest_ledger pin") require.True(t, ok, "earliest_ledger pin must be present after validateConfig") @@ -59,11 +65,15 @@ func requireEarliestPin(t *testing.T, cat *Catalog, wantEarliest uint32) { func TestValidateConfig_AcceptsGenesisFirstStart(t *testing.T) { cat, _ := testCatalog(t) // Genesis needs no tip: a down backend is fine. - earliest, err := callValidate(t, validCfg(4, 3, "genesis"), cat, downTip()) + earliest, err := callValidate(t, validCfg(testCPI, 4, 3, "genesis"), cat, downTip()) require.NoError(t, err) assert.Equal(t, uint32(chunk.FirstLedgerSeq), earliest) - // The pin is committed. + // Both pins committed. + cpi, ok, err := cat.ChunksPerTxhashIndex() + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, uint32(testCPI), cpi) el, ok, err := cat.EarliestLedger() require.NoError(t, err) require.True(t, ok) @@ -74,7 +84,7 @@ func TestValidateConfig_AcceptsNowFirstStart(t *testing.T) { cat, _ := testCatalog(t) // chunk 5 first ledger is 50002; a tip mid-chunk-5 resolves "now" to 50002. tipLedger := chunk.ID(5).FirstLedger() + 1234 - earliest, err := callValidate(t, validCfg(4, 3, "now"), cat, readyTip(tipLedger)) + earliest, err := callValidate(t, validCfg(testCPI, 4, 3, "now"), cat, readyTip(tipLedger)) require.NoError(t, err) assert.Equal(t, chunk.ID(5).FirstLedger(), earliest) @@ -86,14 +96,14 @@ func TestValidateConfig_AcceptsNumericFirstStart(t *testing.T) { cat, _ := testCatalog(t) floor := chunk.ID(3).FirstLedger() // 30002, chunk-aligned tipLedger := chunk.ID(10).FirstLedger() - earliest, err := callValidate(t, validCfg(4, 3, itoa(floor)), cat, readyTip(tipLedger)) + earliest, err := callValidate(t, validCfg(testCPI, 4, 3, itoa(floor)), cat, readyTip(tipLedger)) require.NoError(t, err) assert.Equal(t, floor, earliest) } -func TestValidateConfig_AcceptsZeroRetries(t *testing.T) { +func TestValidateConfig_AcceptsMaxCPIAndZeroRetries(t *testing.T) { cat, _ := testCatalog(t) - _, err := callValidate(t, validCfg(1, 0, "genesis"), cat, downTip()) + _, err := callValidate(t, validCfg(MaxChunksPerTxhashIndex, 1, 0, "genesis"), cat, downTip()) require.NoError(t, err) } @@ -107,12 +117,14 @@ func TestValidateConfig_RejectsMalformed(t *testing.T) { cfg Config want string }{ - {"zero workers", validCfg(0, 3, "genesis"), "workers"}, - {"negative workers", validCfg(-1, 3, "genesis"), "workers"}, - {"negative max_retries", validCfg(4, -1, "genesis"), "max_retries"}, - {"bogus earliest string", validCfg(4, 3, "yesterday"), "earliest_ledger"}, - {"sub-genesis numeric floor", validCfg(4, 3, "1"), "earliest_ledger"}, - {"misaligned numeric floor", validCfg(4, 3, "12345"), "earliest_ledger"}, + {"zero cpi", validCfg(0, 4, 3, "genesis"), "chunks_per_txhash_index"}, + {"over-max cpi", validCfg(MaxChunksPerTxhashIndex+1, 4, 3, "genesis"), "chunks_per_txhash_index"}, + {"zero workers", validCfg(testCPI, 0, 3, "genesis"), "workers"}, + {"negative workers", validCfg(testCPI, -1, 3, "genesis"), "workers"}, + {"negative max_retries", validCfg(testCPI, 4, -1, "genesis"), "max_retries"}, + {"bogus earliest string", validCfg(testCPI, 4, 3, "yesterday"), "earliest_ledger"}, + {"sub-genesis numeric floor", validCfg(testCPI, 4, 3, "1"), "earliest_ledger"}, + {"misaligned numeric floor", validCfg(testCPI, 4, 3, "12345"), "earliest_ledger"}, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { @@ -122,26 +134,33 @@ func TestValidateConfig_RejectsMalformed(t *testing.T) { assert.Contains(t, err.Error(), tc.want) // A rejected config pins nothing. - _, ok, _ := cat.EarliestLedger() + _, ok, _ := cat.ChunksPerTxhashIndex() + assert.False(t, ok, "no cpi pin on a rejected config") + _, ok, _ = cat.EarliestLedger() assert.False(t, ok, "no earliest pin on a rejected config") }) } } // --------------------------------------------------------------------------- -// First start pins the earliest_ledger key. +// First start pins BOTH keys atomically. // --------------------------------------------------------------------------- -func TestValidateConfig_FirstStartPinsEarliest(t *testing.T) { +func TestValidateConfig_FirstStartPinsBothAtomically(t *testing.T) { cat, _ := testCatalog(t) - // Before: not pinned. - _, ok, _ := cat.EarliestLedger() + // Before: neither pinned. + _, ok, _ := cat.ChunksPerTxhashIndex() + require.False(t, ok) + _, ok, _ = cat.EarliestLedger() require.False(t, ok) - _, err := callValidate(t, validCfg(4, 3, "genesis"), cat, downTip()) + _, err := callValidate(t, validCfg(777, 4, 3, "genesis"), cat, downTip()) require.NoError(t, err) - // After: present. + // After: BOTH present. + cpi, ok, _ := cat.ChunksPerTxhashIndex() + require.True(t, ok) + assert.Equal(t, uint32(777), cpi) el, ok, _ := cat.EarliestLedger() require.True(t, ok) assert.Equal(t, uint32(chunk.FirstLedgerSeq), el) @@ -153,7 +172,7 @@ func TestValidateConfig_FirstStartPinsEarliest(t *testing.T) { func TestValidateConfig_NowFirstStartNeedsTip(t *testing.T) { cat, _ := testCatalog(t) - _, err := callValidate(t, validCfg(4, 3, "now"), cat, downTip()) + _, err := callValidate(t, validCfg(testCPI, 4, 3, "now"), cat, downTip()) require.Error(t, err) assert.Contains(t, err.Error(), "now") _, ok, _ := cat.EarliestLedger() @@ -163,7 +182,7 @@ func TestValidateConfig_NowFirstStartNeedsTip(t *testing.T) { func TestValidateConfig_NumericFirstStartNeedsTip(t *testing.T) { cat, _ := testCatalog(t) floor := chunk.ID(3).FirstLedger() - _, err := callValidate(t, validCfg(4, 3, itoa(floor)), cat, downTip()) + _, err := callValidate(t, validCfg(testCPI, 4, 3, itoa(floor)), cat, downTip()) require.Error(t, err) assert.Contains(t, err.Error(), "network tip") } @@ -172,7 +191,7 @@ func TestValidateConfig_NumericFloorPastTipRejected(t *testing.T) { cat, _ := testCatalog(t) floor := chunk.ID(100).FirstLedger() // way ahead tipLedger := chunk.ID(5).FirstLedger() + 1 // tip far below the floor - _, err := callValidate(t, validCfg(4, 3, itoa(floor)), cat, readyTip(tipLedger)) + _, err := callValidate(t, validCfg(testCPI, 4, 3, itoa(floor)), cat, readyTip(tipLedger)) require.Error(t, err) assert.Contains(t, err.Error(), "past the current network tip") _, ok, _ := cat.EarliestLedger() @@ -181,7 +200,7 @@ func TestValidateConfig_NumericFloorPastTipRejected(t *testing.T) { func TestValidateConfig_SubGenesisTipRejectedAsNotReady(t *testing.T) { cat, _ := testCatalog(t) - _, err := callValidate(t, validCfg(4, 3, "now"), cat, readyTip(chunk.FirstLedgerSeq-1)) + _, err := callValidate(t, validCfg(testCPI, 4, 3, "now"), cat, readyTip(chunk.FirstLedgerSeq-1)) require.Error(t, err) assert.Contains(t, err.Error(), "now") } @@ -192,20 +211,32 @@ func TestValidateConfig_SubGenesisTipRejectedAsNotReady(t *testing.T) { func TestValidateConfig_RestartAcceptsUnchanged(t *testing.T) { cat, _ := testCatalog(t) - // First start pins earliest=genesis. Read the pin straight back so a metastore - // visibility anomaly fails here, not as a downstream nil error. - _, err := callValidate(t, validCfg(4, 3, "genesis"), cat, downTip()) + // First start pins cpi=500, earliest=genesis. Read the pins straight back so + // a metastore visibility anomaly fails here, not as a downstream nil error. + _, err := callValidate(t, validCfg(500, 4, 3, "genesis"), cat, downTip()) require.NoError(t, err) - requireEarliestPin(t, cat, uint32(chunk.FirstLedgerSeq)) + requirePins(t, cat, 500, uint32(chunk.FirstLedgerSeq)) - // Restart with the identical earliest: no error, no re-sample needed. - earliest, err := callValidate(t, validCfg(8, 1, "genesis"), cat, downTip()) + // Restart with the identical config: no error, no re-sample needed. + earliest, err := callValidate(t, validCfg(500, 8, 1, "genesis"), cat, downTip()) require.NoError(t, err) assert.Equal(t, uint32(chunk.FirstLedgerSeq), earliest) - // A successful restart MUTATES NOTHING: the pin is byte-identical to the - // first-start value. - requireEarliestPin(t, cat, uint32(chunk.FirstLedgerSeq)) + // A successful restart MUTATES NOTHING: both pins are byte-identical to the + // first-start values. This kills the corrupt-re-pin mutation (a restart that + // returns the right value but rewrites a wrong pin would be invisible until + // the next restart). + requirePins(t, cat, 500, uint32(chunk.FirstLedgerSeq)) +} + +func TestValidateConfig_RestartAbortsOnChangedCPI(t *testing.T) { + cat, _ := testCatalog(t) + _, err := callValidate(t, validCfg(500, 4, 3, "genesis"), cat, downTip()) + require.NoError(t, err) + + _, err = callValidate(t, validCfg(600, 4, 3, "genesis"), cat, downTip()) + require.Error(t, err) + assert.Contains(t, err.Error(), "chunks_per_txhash_index changed") } func TestValidateConfig_RestartAbortsOnChangedEarliest(t *testing.T) { @@ -214,35 +245,35 @@ func TestValidateConfig_RestartAbortsOnChangedEarliest(t *testing.T) { // visibility anomaly surfaces here as a missed pin, not downstream as the // restart branch spuriously returning nil. floor := chunk.ID(3).FirstLedger() - _, err := callValidate(t, validCfg(4, 3, itoa(floor)), cat, readyTip(chunk.ID(50).FirstLedger())) + _, err := callValidate(t, validCfg(testCPI, 4, 3, itoa(floor)), cat, readyTip(chunk.ID(50).FirstLedger())) require.NoError(t, err) - requireEarliestPin(t, cat, floor) + requirePins(t, cat, testCPI, floor) // Restart with a different numeric floor aborts. other := chunk.ID(7).FirstLedger() - _, err = callValidate(t, validCfg(4, 3, itoa(other)), cat, readyTip(chunk.ID(50).FirstLedger())) + _, err = callValidate(t, validCfg(testCPI, 4, 3, itoa(other)), cat, readyTip(chunk.ID(50).FirstLedger())) require.Error(t, err) assert.Contains(t, err.Error(), "earliest_ledger changed") // The aborted restart left the original pin untouched. - requireEarliestPin(t, cat, floor) + requirePins(t, cat, testCPI, floor) } func TestValidateConfig_RestartGenesisVsNumericAborts(t *testing.T) { cat, _ := testCatalog(t) // First start: genesis (earliest pinned = 2). - _, err := callValidate(t, validCfg(4, 3, "genesis"), cat, downTip()) + _, err := callValidate(t, validCfg(testCPI, 4, 3, "genesis"), cat, downTip()) require.NoError(t, err) - requireEarliestPin(t, cat, uint32(chunk.FirstLedgerSeq)) + requirePins(t, cat, testCPI, uint32(chunk.FirstLedgerSeq)) // Restart edited to a numeric floor != genesis: abort. - _, err = callValidate(t, validCfg(4, 3, itoa(chunk.ID(3).FirstLedger())), cat, + _, err = callValidate(t, validCfg(testCPI, 4, 3, itoa(chunk.ID(3).FirstLedger())), cat, readyTip(chunk.ID(50).FirstLedger())) require.Error(t, err) assert.Contains(t, err.Error(), "earliest_ledger changed") // The aborted restart left the genesis pin untouched. - requireEarliestPin(t, cat, uint32(chunk.FirstLedgerSeq)) + requirePins(t, cat, testCPI, uint32(chunk.FirstLedgerSeq)) } // "now" on restart is a deliberate no-op — it keeps the pinned floor and never @@ -251,20 +282,20 @@ func TestValidateConfig_RestartGenesisVsNumericAborts(t *testing.T) { func TestValidateConfig_RestartNowIsNoOp(t *testing.T) { cat, _ := testCatalog(t) // First start: "now" resolves against a tip in chunk 5 -> pin 50002. - _, err := callValidate(t, validCfg(4, 3, "now"), cat, readyTip(chunk.ID(5).FirstLedger()+10)) + _, err := callValidate(t, validCfg(testCPI, 4, 3, "now"), cat, readyTip(chunk.ID(5).FirstLedger()+10)) require.NoError(t, err) - requireEarliestPin(t, cat, chunk.ID(5).FirstLedger()) + requirePins(t, cat, testCPI, chunk.ID(5).FirstLedger()) // Restart with "now" and a tip that now sits in a DIFFERENT chunk: no // abort, no re-resolve — the original pin is kept, and a down backend is // even tolerated (no tip sample at all). - earliest, err := callValidate(t, validCfg(4, 3, "now"), cat, downTip()) + earliest, err := callValidate(t, validCfg(testCPI, 4, 3, "now"), cat, downTip()) require.NoError(t, err) assert.Equal(t, chunk.ID(5).FirstLedger(), earliest, "restart with now keeps the original pin") // A "now" restart MUTATES NOTHING: the original pin is byte-identical, even // though a live backend would have resolved "now" to a different chunk. - requireEarliestPin(t, cat, chunk.ID(5).FirstLedger()) + requirePins(t, cat, testCPI, chunk.ID(5).FirstLedger()) } // itoa is the test-local uint32 -> decimal-string helper for building numeric diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/convergence_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/convergence_test.go index c0658901a..d4a8c7866 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/convergence_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/convergence_test.go @@ -15,30 +15,42 @@ import ( // ============================================================================= // Crash-injection + convergence suite — the design's strongest validation // (design-docs/full-history-streaming-workflow.md "Convergence", "Scenario -// coverage", "What a bug looks like"). +// coverage", "What a bug looks like"; gettransaction-full-history-design.md +// §7.6 crash matrix). // // Each case (1) CONSTRUCTS a durable crash / partial-completion state on a real // Catalog + real hotchunk DB + temp artifact dirs — by driving the REAL protocol -// ops (MarkChunkFreezing, SurgicalRecovery, the hot-tier open/ingest) to a chunk -// boundary and then STOPPING before the next op runs, and/or by directly -// planting the durable keys+files a crash at that instant would leave. (2) runs -// the REAL convergence path — a lifecycle tick (runLifecycleTick) and/or a -// re-derivation (deriveCompleteThrough / deriveWatermark). (3) ASSERTS the -// system converges to quiescence satisfying INV-2..4 by calling the REAL -// Catalog.Audit and requiring report.Clean(), PLUS idempotency (re-running the -// convergence op changes nothing) and that the derived watermark equals the -// durable state. +// ops (MarkChunkFreezing, MarkIndexFreezing, buildTxhashIndex, SurgicalRecovery, +// the hot-tier open/ingest) to a chunk boundary and then STOPPING before the next +// op runs, and/or by directly planting the durable keys+files a crash at that +// instant would leave. (The crashHooks in hooks.go — fired from INSIDE build.go — +// drive the finer-grained §7.6 instants; those rows live in build_test.go. This +// file reproduces the SAME durable states at op granularity, which is sufficient +// because the only convergence step here is the next tick / derivation, not a +// resumed mid-op.) (2) runs the REAL convergence path — a lifecycle tick +// (runLifecycleTick) and/or a re-derivation (deriveCompleteThrough / +// deriveWatermark). (3) ASSERTS the system converges to quiescence satisfying +// INV-1..4 by calling the REAL Catalog.Audit and requiring report.Clean(), PLUS +// idempotency (re-running the convergence op changes nothing) and that the +// derived watermark equals the durable state. // // The point of using the real ops + real audit (rather than hand-rolled // assertions) is the design's "None of the invariants reference the phase -// scans": a bug in freeze / discard / prune / sweep surfaces here as a genuine -// Audit violation, not something the same code that produced it judges +// scans": a bug in freeze / discard / prune / commit / sweep surfaces here as a +// genuine Audit violation, not something the same code that produced it judges // acceptable. // -// CAVEAT — INV-1's deep byte-compare (audit_test.go's DeepDeriver) is NOT wired -// here — this suite asserts INV-1 only structurally (no orphan/dangling/ -// duplicate, single canonical state); content re-derivation is audit_test.go's -// job. +// CAVEAT — which cases genuinely exercise convergence. With the deliberate +// exception of HotVolumeLossCase4 (whose convergence value is the +// ErrHotVolumeLost fatal + watermark healing, the tick being a verified no-op +// because the cold history survived intact — see that test), every case here +// reaches the tick from a state the audit reports DIRTY, and the tick changes +// durable keys: the construct is a real crash residue, not a happy path dressed +// as one. PerChunkPruningInputSwept makes that explicit with a pre-tick +// require.False(pre.Clean()). INV-1's deep byte-compare (audit_test.go's +// DeepDeriver) is NOT wired here — this suite asserts INV-1 only structurally +// (no orphan/dangling/duplicate, single canonical state); content re-derivation +// is audit_test.go's job. // ============================================================================= // convergenceHarness bundles the catalog, its lifecycle config (real production @@ -51,13 +63,13 @@ type convergenceHarness struct { probe HotProbe } -// newConvergenceHarness builds a harness over a catalog with the genesis -// earliest_ledger pin and the given retention width. -// -//nolint:unparam // retentionChunks varies across slices' convergence tests -func newConvergenceHarness(t *testing.T, retentionChunks uint32) *convergenceHarness { +// newConvergenceHarness builds a harness over a cpi-wide-window catalog with the +// genesis earliest_ledger pin and the given retention width. cpi=1 makes every +// one-chunk window finalize immediately (the common boundary-convergence shape); +// larger cpi exercises multi-chunk windows. +func newConvergenceHarness(t *testing.T, cpi, retentionChunks uint32) *convergenceHarness { t.Helper() - cat, _ := testCatalog(t) + cat, _ := smallWindowCatalog(t, cpi) require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) cfg, rec := lifecycleTestConfig(t, cat, retentionChunks) return &convergenceHarness{ @@ -85,7 +97,7 @@ func (h *convergenceHarness) auditClean(t *testing.T) AuditReport { report, err := h.cat.Audit(AuditOptions{RetentionChunks: h.cfg.RetentionChunks}) require.NoError(t, err, "audit must complete (error only for I/O)") require.True(t, report.Clean(), - "after convergence the store must satisfy INV-2..4; violations:\n%s", violationsString(report)) + "after convergence the store must satisfy INV-1..4; violations:\n%s", violationsString(report)) return report } @@ -100,7 +112,8 @@ func (h *convergenceHarness) requireQuiescent(t *testing.T) { // requireWatermarkMatchesDurable asserts the derived watermark equals the // expected durable frontier — the design's "the startup derivation equals -// exactly the durable state". +// exactly the durable state". A nil-keyed live DB is not opened here; callers +// that have a live hot DB pass its committed seq. func (h *convergenceHarness) requireWatermarkMatchesDurable(t *testing.T, want uint32) { t.Helper() got, err := deriveWatermark(h.cat, h.probe) @@ -121,25 +134,155 @@ func violationsString(r AuditReport) string { return s } +// ============================================================================= +// §7.6 index crash matrix — driven through the REAL build op (buildThenSweep) +// with the crashHooks fired from inside it, so the durable state left is exactly +// what a crash at that instant would leave, not a hand-replay. +// ============================================================================= + +// The three §7.6 rows are constructed as: +// - after-mark / mid step 3: plant the "freezing" coverage key via the real +// MarkIndexFreezing (step 2) plus a partial .idx file — exactly what +// buildTxhashIndex leaves after step 2, before its commit (step 4). +// - after-commit-before-sweep: run the real terminal commit (buildTxhashIndex, +// which IS step 4) to land the frozen coverage + demoted "pruning" inputs, +// then STOP before the eager sweep (we do not call buildThenSweep's sweep). +// - mid-sweep: leave a "pruning" coverage key whose file is already unlinked +// (the instant after the durable unlink, before the key-delete). +// +// Each is then converged by a real lifecycle tick (the prune scan is the §7.6 +// backstop, plus the freeze stage rebuilds a desired-but-missing coverage) and +// audited clean. + +// seedFrozenInputsForWindow makes chunks [lo,hi] fully frozen — ledgers + events +// (real placeholder files) and a real non-empty sorted txhash .bin (frozen) — +// so buildTxhashIndex's blindly-trusted "frozen .bin" precondition holds and a +// terminal index over the window is buildable. It does NOT build the index; the +// caller drives that. cpi must equal hi+1 for the window to be terminal at hi. +func seedFrozenInputsForWindow(t *testing.T, cat *Catalog, lo, hi chunk.ID) { + t.Helper() + for c := lo; c <= hi; c++ { + // ledgers + events: real files + frozen keys. + freezeChunkArtifacts(t, cat, c, KindLedgers, KindEvents) + // txhash .bin: a real non-empty sorted bin + frozen key (buildTxhashIndex's + // blindly-trusted precondition input). + freezeChunkBin(t, cat, c, []txEntry{{hash: hashAt(uint64(c) + 1), seq: seqIn(c, 0)}}) + } +} + +func TestConvergence_IndexCrashMatrix(t *testing.T) { + tests := []struct { + name string + cpi uint32 + // construct plants the durable state a crash at this §7.6 row leaves. The + // chunk(s) below a live chunk are kept complete so completeThrough advances. + construct func(t *testing.T, h *convergenceHarness) + }{ + { + // Row 1: "after step 2, or mid step 3" — predecessor (none here) still + // frozen; the new coverage key is "freezing" with its .idx absent/partial. + // Planted via the REAL MarkIndexFreezing (step 2) + a partial file. + name: "after-mark/mid-step-3 freezing-coverage-debris", + cpi: 1, + construct: func(t *testing.T, h *convergenceHarness) { + seedFrozenInputsForWindow(t, h.cat, 0, 0) + // Step 2 of the real protocol: mark "freezing". Then write a PARTIAL + // .idx (a crash mid step 3 leaves the file present-but-untrusted). + cov, err := h.cat.MarkIndexFreezing(0, 0, 0) + require.NoError(t, err) + writeArtifact(t, h.cat.layout.IndexFilePath(cov)) // partial file under the freezing key + // The window has NO frozen coverage yet, so the chunk's hot DB (if any) + // must persist; we leave none. completeThrough comes from the durable + // ledgers/events/txhash chunk being below a live chunk 1. + require.NoError(t, h.cat.PutHotTransient(1)) // live chunk above the partition + }, + }, + { + // Row 2: "after step 4, before the eager sweep" — new coverage frozen and + // live; predecessor "pruning"; terminal: the window's .bin keys "pruning". + // Driven through the REAL build, STOPPED at the afterCommitBeforeSweep hook. + name: "after-commit-before-sweep demoted-keys-unswept", + cpi: 1, + construct: func(t *testing.T, h *convergenceHarness) { + seedFrozenInputsForWindow(t, h.cat, 0, 0) + require.NoError(t, h.cat.PutHotTransient(1)) // live chunk above the partition + + // Run the REAL terminal commit (buildTxhashIndex IS §7.6 step 4: it + // promotes coverage [0,0] to "frozen" and, because the build is + // terminal, demotes the window's chunk:0:txhash .bin key to "pruning" + // in the SAME atomic batch), then STOP before the eager sweep — exactly + // the "after step 4, before the eager sweep" row. buildThenSweep's eager + // sweep (and its afterCommitBeforeSweep hook) is intentionally NOT run, + // so the demoted .bin key/file is the unswept leftover the row describes. + require.NoError(t, buildTxhashIndex(context.Background(), 0, 0, 0, h.cfg.buildConfig())) + require.Equal(t, StatePruning, mustState(t, h.cat, 0, KindTxHash), + "terminal commit demoted the .bin input") + require.FileExists(t, h.cat.layout.TxHashBinPath(0), + "the demoted .bin file is unswept (the crash row's leftover)") + }, + }, + { + // Row 3: "mid-sweep" — a "pruning" key outlives the durable unlink (the + // file is already gone, the key-delete has not yet run). Planted as the + // exact durable bytes that instant leaves: a "pruning" index coverage key + // with NO file on disk. The prune scan re-runs the sweep (SweepIndexKey on + // a "pruning" key: unlink-already-gone is a no-op, then the key delete), + // restoring "key absent => file gone". No frozen chunks => the freeze + // stage's range is empty, isolating the sweep as the sole convergence step. + name: "mid-sweep pruning-key-outlives-unlink", + cpi: 1, + construct: func(t *testing.T, h *convergenceHarness) { + cov, err := h.cat.MarkIndexFreezing(0, 0, 0) + require.NoError(t, err) + // Demote to "pruning" and DO NOT write its file — the mid-sweep instant + // after the durable unlink. + require.NoError(t, h.cat.store.Put(cov.Key, string(StatePruning))) + require.NoFileExists(t, h.cat.layout.IndexFilePath(cov)) + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + h := newConvergenceHarness(t, tc.cpi, 0) + tc.construct(t, h) + + // Converge: one real lifecycle tick (its prune scan is the §7.6 backstop; + // for the after-commit row it also re-builds/finishes via the freeze + // stage and prune stage). Then assert INV-1..4 clean and quiescent. + h.tick(t) + h.auditClean(t) + h.requireQuiescent(t) + + // Idempotency: a second tick changes nothing and still audits clean. + before := snapshotAllKeys(t, h.cat) + h.tick(t) + after := snapshotAllKeys(t, h.cat) + require.Equal(t, before, after, "re-running the convergence tick must be a no-op") + h.auditClean(t) + }) + } +} + // ============================================================================= // Per-chunk artifact crash states (freezing / pruning) — the "freezing" tail // is re-materialized by the freeze stage from its still-present hot DB // (processChunk's hot branch, the design's "freeze from a live hot DB"); the -// "pruning" demoted artifact is swept by the prune scan. +// "pruning" demoted input is swept by the prune scan. // ============================================================================= // TestConvergence_PerChunkFreezingReMaterializesFromHotDB constructs the // per-chunk "freezing" crash state WITHIN retention (a crashed freeze that -// marked the key but did not finish): chunk 0's ledgers are "freezing" with a -// complete hot DB still behind the chunk. The freeze stage re-derives the cold -// artifact FROM that hot DB (backfillSource's hot branch), then discards the -// now-redundant hot DB — converging to a clean, quiescent store satisfying -// INV-2..4. +// marked the key but did not finish): chunk 0's ledgers/events/txhash are "freezing" +// with a complete hot DB still behind the chunk. The freeze stage re-derives the +// cold artifacts FROM that hot DB (backfillSource's hot branch) and folds the +// window's index, then discards the now-redundant hot DB — converging to a clean, +// quiescent store satisfying INV-1..4. func TestConvergence_PerChunkFreezingReMaterializesFromHotDB(t *testing.T) { // full-chunk ingest; isolated TempDir/catalog — overlaps the other heavy // tests to fit the gate's go-test timeout. t.Parallel() - h := newConvergenceHarness(t, 0) // a chunk finalizes at chunk 0 + h := newConvergenceHarness(t, 1, 0) // cpi=1: a one-chunk window finalizes at chunk 0 // Chunk 0: a COMPLETE hot DB on disk (every ledger ingested, write handle // closed — the just-closed-chunk shape). This is the source the freeze stage @@ -149,23 +292,23 @@ func TestConvergence_PerChunkFreezingReMaterializesFromHotDB(t *testing.T) { live := openLiveHotDB(t, h.cat, 1) t.Cleanup(func() { _ = live.Close() }) - // Now plant the crash: chunk 0's cold artifact marked "freezing" (a crashed + // Now plant the crash: chunk 0's cold artifacts marked "freezing" (a crashed // freeze that pre-marked but did not fsync+flip). Mark via the REAL protocol. - require.NoError(t, h.cat.MarkChunkFreezing(0, KindLedgers)) + require.NoError(t, h.cat.MarkChunkFreezing(0, KindLedgers, KindEvents, KindTxHash)) require.Equal(t, StateFreezing, mustState(t, h.cat, 0, KindLedgers)) // Converge: one real tick. The freeze stage's resolver sees the non-frozen - // key, re-materializes chunk 0 from its hot DB, and the discard stage retires - // the hot DB. + // keys, re-materializes chunk 0 from its hot DB, folds the index, and the + // discard stage retires the hot DB. h.tick(t) h.auditClean(t) h.requireQuiescent(t) // The chunk is now frozen and its hot DB discarded. require.Equal(t, StateFrozen, mustState(t, h.cat, 0, KindLedgers)) - has, err := h.cat.Has(hotChunkKey(0)) + covered, err := indexCovers(0, h.cat) require.NoError(t, err) - require.False(t, has, "chunk 0's hot DB was discarded after the freeze") + require.True(t, covered, "the window index folded chunk 0 in") // Idempotency. before := snapshotAllKeys(t, h.cat) @@ -174,34 +317,40 @@ func TestConvergence_PerChunkFreezingReMaterializesFromHotDB(t *testing.T) { h.auditClean(t) } -// TestConvergence_PerChunkPruningArtifactSwept constructs the per-chunk -// "pruning" crash state: a recovery-demoted ledger artifact whose sweep did not -// run, sitting in-retention. The prune scan sweeps it (file + key), converging -// to INV-2..4 clean. -func TestConvergence_PerChunkPruningArtifactSwept(t *testing.T) { - h := newConvergenceHarness(t, 0) +// TestConvergence_PerChunkPruningInputSwept constructs the per-chunk "pruning" +// crash state: a demoted .bin input (its terminal commit demoted it) whose eager +// sweep did not run, sitting in-retention. The prune scan sweeps it (file + key), +// converging to INV-1..4 clean. +func TestConvergence_PerChunkPruningInputSwept(t *testing.T) { + h := newConvergenceHarness(t, 1, 0) - // A live chunk 1 above the partition so chunk 0 is below it and complete. - require.NoError(t, h.cat.PutHotTransient(1)) - - // The crash leftover: a chunk:0:ledgers key demoted to "pruning" with its pack - // file still on disk (a demotion whose sweep did not unlink). - writeArtifact(t, h.cat.layout.LedgerPackPath(0)) - require.NoError(t, h.cat.store.Put(chunkKey(0, KindLedgers), string(StatePruning))) - - // Before convergence the audit FAILS (a "pruning" key surviving quiescence is - // an INV-2 violation) — proving the suite catches the bug class. + // A finalized window: chunk 0 ledgers+events frozen, a terminal frozen coverage + // [0,0] covering it (so the window is finalized and the .bin is redundant). + freezeChunkArtifacts(t, h.cat, 0, KindLedgers, KindEvents) + freezeIndex(t, h.cat, 0, 0, 0) + require.NoError(t, h.cat.PutHotTransient(1)) // live chunk above the partition + + // The crash leftover: a chunk:0:txhash key demoted to "pruning" with its .bin + // file still on disk (the terminal commit demoted the key; the eager sweep did + // not unlink). This is exactly the "after step 4, before the eager sweep" .bin + // residue, persisted across the boundary. + require.NoError(t, h.cat.MarkChunkFreezing(0, KindTxHash)) + writeArtifact(t, h.cat.layout.TxHashBinPath(0)) + require.NoError(t, h.cat.store.Put(chunkKey(0, KindTxHash), string(StatePruning))) + + // Before convergence the audit FAILS (a leftover txhash key in a finalized + // window is an INV-2 violation) — proving the suite catches the bug class. pre, err := h.cat.Audit(AuditOptions{RetentionChunks: h.cfg.RetentionChunks}) require.NoError(t, err) - require.False(t, pre.Clean(), "the unswept pruning artifact must be a detectable violation pre-convergence") + require.False(t, pre.Clean(), "the unswept pruning .bin must be a detectable violation pre-convergence") // Converge: the prune scan sweeps the "pruning" ref. h.tick(t) h.auditClean(t) h.requireQuiescent(t) - require.Equal(t, State(""), mustState(t, h.cat, 0, KindLedgers), "the pruning key is swept") - require.NoFileExists(t, h.cat.layout.LedgerPackPath(0), "the pruning file is unlinked") + require.Equal(t, State(""), mustState(t, h.cat, 0, KindTxHash), "the pruning .bin key is swept") + require.NoFileExists(t, h.cat.layout.TxHashBinPath(0), "the pruning .bin file is unlinked") before := snapshotAllKeys(t, h.cat) h.tick(t) @@ -211,9 +360,12 @@ func TestConvergence_PerChunkPruningArtifactSwept(t *testing.T) { // ============================================================================= // Boundary crash — recovered by the watermark refinement. A crash at a chunk -// boundary can leave the just-completed chunk's hot key "ready" and C+1's hot -// key "transient". deriveWatermark's ONE read of the highest *ready* chunk -// recovers the chunk-level frontier the "transient" key no longer advertises. +// boundary can leave the just-completed chunk's hot key "transient" (the next +// chunk's "transient" key was written, the predecessor's not yet demoted/frozen) +// and C+1's hot key absent. deriveWatermark's ONE read of the highest *ready* +// chunk recovers the chunk-level frontier the "transient" key no longer +// advertises (progress.go's "recovering the chunk-level frontier when the +// positional term under-counts"). // ============================================================================= // TestConvergence_BoundaryCrashWatermarkRefinement plants the boundary-crash @@ -229,7 +381,7 @@ func TestConvergence_BoundaryCrashWatermarkRefinement(t *testing.T) { // full-chunk ingest; isolated TempDir/catalog — overlaps the other heavy // tests to fit the gate's go-test timeout. t.Parallel() - h := newConvergenceHarness(t, 0) + h := newConvergenceHarness(t, 1, 0) // Chunk 0: a complete, "ready" hot DB (every ledger committed). Chunk 1: // "transient" only (the next bracket opened its key but crashed before "ready"). @@ -249,13 +401,13 @@ func TestConvergence_BoundaryCrashWatermarkRefinement(t *testing.T) { // last committed seq — the design's boundary-crash recovery. h.requireWatermarkMatchesDurable(t, chunk.ID(0).LastLedger()) - // Pre-resume the store is already INV-2..4 clean (chunk 0's hot DB is the live + // Pre-resume the store is already INV-1..4 clean (chunk 0's hot DB is the live // tier from the lifecycle's view; nothing is orphaned or dangling). h.auditClean(t) // Ingestion resumes: chunk 1's bracket completes ("ready"), moving the partition - // above chunk 0. Now a tick freezes chunk 0 from its ready hot DB and discards - // the hot DB — converging to INV-2..4 clean and quiescent. + // above chunk 0. Now a tick freezes chunk 0 from its ready hot DB, folds the + // index, and discards the hot DB — converging to INV-1..4 clean and quiescent. live := openLiveHotDB(t, h.cat, 1) t.Cleanup(func() { _ = live.Close() }) h.tick(t) @@ -273,27 +425,32 @@ func TestConvergence_BoundaryCrashWatermarkRefinement(t *testing.T) { // ============================================================================= // TestConvergence_SurgicalRecoveryCase3ReDerives ties case 3 end to end on real -// state: a fully-converged chunk 0 (frozen cold) is tainted by a cold+hot -// surgical recovery (cold -> "freezing"); the next tick re-derives the cold -// artifact from a re-ingested hot DB, returning to INV-2..4 clean. +// state: a fully-converged chunk 0 (frozen cold + terminal index + a complete +// hot DB still behind it) is tainted by a cold+hot surgical recovery (cold -> +// "freezing", hot -> "transient"); the next tick re-derives the cold artifacts +// from the surviving hot DB and re-folds the index, returning to INV-1..4 clean. func TestConvergence_SurgicalRecoveryCase3ReDerives(t *testing.T) { // full-chunk ingest; isolated TempDir/catalog — overlaps the other heavy // tests to fit the gate's go-test timeout. t.Parallel() - h := newConvergenceHarness(t, 0) + h := newConvergenceHarness(t, 1, 0) - // Converged steady state for chunk 0: frozen cold artifact, served PURELY by - // cold (no hot DB — the hot tier was already discarded in steady state). A live - // chunk 1 sits above the partition. + // Converged steady state for chunk 0: frozen cold artifacts + a real terminal + // index, served PURELY by cold (no hot DB — the hot tier was already discarded + // in steady state). A live chunk 1 sits above the partition. live := openLiveHotDB(t, h.cat, 1) t.Cleanup(func() { _ = live.Close() }) - freezeChunkArtifacts(t, h.cat, 0, KindLedgers) + freezeChunkArtifacts(t, h.cat, 0, KindLedgers, KindEvents) + freezeChunkBin(t, h.cat, 0, []txEntry{{hash: hashAt(1), seq: seqIn(0, 0)}}) + // Build the terminal index for chunk 0 through the real op so the .idx is real; + // it demotes+sweeps chunk:0:txhash, leaving chunk 0 served by ledgers/events + .idx. + require.NoError(t, buildThenSweep(context.Background(), IndexBuild{Window: 0, Lo: 0, Hi: 0}, h.cfg.buildConfig())) h.auditClean(t) // sanity: the pre-recovery state is already clean and quiescent // Operator runs the case-3 recovery over chunk 0 (cold + hot). The present cold - // key (ledgers) drops to "freezing" — one atomic batch. There is no hot key for - // chunk 0 to demote (it was discarded in steady state), so the recovery's hot - // tier is a no-op for this chunk; the cold demotion is what regresses it. + // keys (ledgers, events) drop to "freezing" — one atomic batch. There is no hot key + // for chunk 0 to demote (it was discarded in steady state), so the recovery's + // hot tier is a no-op for this chunk; the cold demotion is what regresses it. plan, err := h.cat.SurgicalRecovery(RecoveryRequest{Lo: 0, Hi: 0, Tier: RecoverColdAndHot}) require.NoError(t, err) require.False(t, plan.Empty()) @@ -302,18 +459,16 @@ func TestConvergence_SurgicalRecoveryCase3ReDerives(t *testing.T) { // Re-ingestion refills the chunk's hot tail (the design's "captive core // re-ingests the un-frozen tail forward" / "openHotDB wipes and recreates one // when re-ingestion re-opens that chunk") — the local source the freeze stage - // re-derives the cold artifact from (production uses the bulk backend). + // re-derives the cold artifacts from (production uses the bulk backend). ingestFullHotChunk(t, h.cat, 0) require.Equal(t, HotReady, mustHotState(t, h.cat, 0)) - // Converge: the tick re-materializes chunk 0's cold artifact, then discards the - // hot DB. Back to INV-2..4 clean and quiescent. + // Converge: the tick re-materializes chunk 0's cold artifacts and re-folds the + // index, then discards the hot DB. Back to INV-1..4 clean and quiescent. h.tick(t) h.auditClean(t) h.requireQuiescent(t) require.Equal(t, StateFrozen, mustState(t, h.cat, 0, KindLedgers)) - require.Equal(t, StateFrozen, mustState(t, h.cat, 0, KindEvents), - "the re-ingested hot DB re-derives the events cold segment too") before := snapshotAllKeys(t, h.cat) h.tick(t) @@ -333,12 +488,13 @@ func TestConvergence_SurgicalRecoveryCase3ReDerives(t *testing.T) { // asserts the watermark heals to the last frozen boundary, a re-ingested hot DB // converges, and the audit is clean. func TestConvergence_HotVolumeLossCase4(t *testing.T) { - h := newConvergenceHarness(t, 0) + h := newConvergenceHarness(t, 1, 0) // Durable cold history through chunk 0 (survives on durable storage): frozen - // ledgers + events. Chunk 0's last ledger is the last frozen boundary the - // watermark must heal to. + // ledgers+events + a terminal index. Chunk 0's last ledger is the last frozen + // boundary the watermark must heal to. freezeChunkArtifacts(t, h.cat, 0, KindLedgers, KindEvents) + freezeIndex(t, h.cat, 0, 0, 0) // The lost live chunk 1: "ready" with its hot dir GONE (the ephemeral volume // died while the meta store survived). @@ -373,7 +529,10 @@ func TestConvergence_HotVolumeLossCase4(t *testing.T) { // this case lives in the two halves above — the ErrHotVolumeLost fatal and the // watermark healing to the last frozen boundary — NOT in the tick: the cold // history survived intact and the re-ingested chunk is the new live tier, so - // nothing is dirty for the tick to repair. + // nothing is dirty for the tick to repair. We assert that explicitly — the + // post-recovery store is ALREADY INV-1..4 clean, and the tick is a verified + // no-op (the design's "the dirs are already gone, so recovery is pure key + // demotion": there is no tainted frozen artifact to re-materialize). h.requireWatermarkMatchesDurable(t, committed) h.auditClean(t) // already clean BEFORE the tick — the recovery left nothing dirty before := snapshotAllKeys(t, h.cat) @@ -387,21 +546,27 @@ func TestConvergence_HotVolumeLossCase4(t *testing.T) { // ============================================================================= // Retention widen / shorten — the floor recomputes; convergence prunes below a // raised floor (shorten) and the next tick is a no-op once below-floor data is -// gone. +// gone. (Widening's re-materialization is exclusively backfill's job — the +// tick's production range never starts below existing storage, and producibility +// is enforced lazily per chunk during the build, not by a pre-flight gate — so +// the tick-side convergence we assert for widening is that +// it does NOT spuriously prune or fail; the actual bottom-extension is backfill.) // ============================================================================= // TestConvergence_RetentionShortenPrunesBelowRaisedFloor seeds several finalized // chunks, then SHORTENS retention so a higher floor leaves the lowest chunks // wholly below it. One tick prunes them (keys + files + hot DBs) and the store -// converges to INV-2..4 clean against the NEW (shorter) retention. +// converges to INV-1..4 clean against the NEW (shorter) retention. func TestConvergence_RetentionShortenPrunesBelowRaisedFloor(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := smallWindowCatalog(t, 1) require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) - // Six finalized chunks (0..5) with real files, plus a live chunk 6. + // Six finalized one-chunk windows (0..5) with real files + terminal indexes, + // plus a live chunk 6. for c := chunk.ID(0); c <= 5; c++ { freezeChunkArtifacts(t, cat, c, KindLedgers, KindEvents) writeArtifact(t, cat.layout.LedgerPackPath(c)) + freezeIndex(t, cat, cat.windows.WindowID(c), c, c) } makeReadyHotDirNoData(t, cat, 1) // a below-floor hot DB too live := openLiveHotDB(t, cat, 6) @@ -438,16 +603,17 @@ func TestConvergence_RetentionShortenPrunesBelowRaisedFloor(t *testing.T) { // claim from the tick's perspective: a lowered floor does NOT make the tick // prune (it never does) NOR materialize new bottom storage (that is backfill's // job). The tick over already-converged storage with a wider retention window is -// a clean no-op, and the store stays INV-2..4 clean — the bottom-extension is +// a clean no-op, and the store stays INV-1..4 clean — the bottom-extension is // deferred to the next backfill, not the tick. func TestConvergence_RetentionWidenIsTickNoOpAuditClean(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := smallWindowCatalog(t, 1) require.NoError(t, cat.PutEarliestLedger(chunk.FirstLedgerSeq)) // Chunks 3..5 finalized (the existing bottom of storage is chunk 3), live 6. for c := chunk.ID(3); c <= 5; c++ { freezeChunkArtifacts(t, cat, c, KindLedgers, KindEvents) writeArtifact(t, cat.layout.LedgerPackPath(c)) + freezeIndex(t, cat, cat.windows.WindowID(c), c, c) } live := openLiveHotDB(t, cat, 6) t.Cleanup(func() { _ = live.Close() }) @@ -470,7 +636,7 @@ func TestConvergence_RetentionWidenIsTickNoOpAuditClean(t *testing.T) { // ============================================================================= // Young network — no complete chunk exists yet. The tick produces nothing (the // freeze stage's range is empty), and the empty store trivially satisfies -// INV-2..4. The convergence here is "no spurious work, no fatal". +// INV-1..4. The convergence here is "no spurious work, no fatal". // ============================================================================= // TestConvergence_YoungNetworkNoOp seeds a network younger than one complete @@ -478,7 +644,7 @@ func TestConvergence_RetentionWidenIsTickNoOpAuditClean(t *testing.T) { // complete chunk below the live one. A tick must do nothing and the audit must // be clean. func TestConvergence_YoungNetworkNoOp(t *testing.T) { - h := newConvergenceHarness(t, 0) + h := newConvergenceHarness(t, 1, 0) // A live chunk 0's hot DB, mid-ingest (a few ledgers, not the whole chunk), so // nothing below it is complete and no chunk has frozen. diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/daemon.go b/cmd/stellar-rpc/internal/fullhistory/streaming/daemon.go index 7df864f6d..07ba3e8b2 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/daemon.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/daemon.go @@ -166,7 +166,11 @@ func RunDaemonWith(ctx context.Context, configPath string, opts DaemonOptions) e } defer func() { _ = store.Close() }() - cat := NewCatalog(store, NewLayoutFromPaths(paths)) + windows, err := NewWindows(derefU32(cfg.Backfill.ChunksPerTxhashIndex)) + if err != nil { + return err + } + cat := NewCatalog(store, NewLayoutFromPaths(paths), windows) // --- 5a. Build the external boundaries (validateConfig needs NetworkTip). --- build := opts.BuildBoundaries diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/daemon_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/daemon_test.go index ff384d5a6..ff4862295 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/daemon_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/daemon_test.go @@ -6,6 +6,7 @@ import ( "fmt" "os" "path/filepath" + "strings" "sync/atomic" "testing" "time" @@ -122,15 +123,21 @@ func TestRunDaemon_LoadValidateWireStartCleanShutdown(t *testing.T) { assert.Equal(t, filepath.Join(dataDir, "hot"), capture.gotPaths.HotStorage) assert.Equal(t, filepath.Join(dataDir, "catalog", "rocksdb"), capture.gotPaths.Catalog) - // validateConfig pinned the immutable layout (earliest) before start. + // validateConfig pinned the immutable layout (cpi + earliest) before start. store, err := openMetaAt(t, capture.gotPaths.Catalog) require.NoError(t, err) defer func() { _ = store.Close() }() - cat := NewCatalog(store, NewLayout(dataDir)) + windows, err := NewWindows(testCPI) + require.NoError(t, err) + cat := NewCatalog(store, NewLayout(dataDir), windows) earliest, pinned, err := cat.EarliestLedger() require.NoError(t, err) require.True(t, pinned, "validateConfig must pin earliest_ledger before startStreaming") assert.Equal(t, uint32(chunk.FirstLedgerSeq), earliest) + cpi, cpiPinned, err := cat.ChunksPerTxhashIndex() + require.NoError(t, err) + require.True(t, cpiPinned) + assert.Equal(t, uint32(DefaultChunksPerTxhashIndex), cpi) } // Storage-path overrides must be HONORED by the data path, not just locked. The @@ -148,13 +155,17 @@ func TestRunDaemon_StoragePathOverridesHonored(t *testing.T) { overrideRoot := t.TempDir() // a distinct mount, e.g. /mnt/nvme hotOverride := filepath.Join(overrideRoot, "hot") coldOverride := filepath.Join(overrideRoot, "cold") + txhashIndexOverride := filepath.Join(overrideRoot, "txidx") // the one cold artifact with its own override catalogOverride := filepath.Join(overrideRoot, "meta") cfg := Config{ - Service: ServiceConfig{DefaultDataDir: dataDir}, - Catalog: CatalogConfig{Path: catalogOverride}, - ImmutableStorage: ImmutableStorageConfig{Path: coldOverride}, - Streaming: StreamingConfig{HotStorage: StoragePathConfig{Path: hotOverride}}, + Service: ServiceConfig{DefaultDataDir: dataDir}, + Catalog: CatalogConfig{Path: catalogOverride}, + ImmutableStorage: ImmutableStorageConfig{ + Path: coldOverride, + TxhashIndexPath: txhashIndexOverride, + }, + Streaming: StreamingConfig{HotStorage: StoragePathConfig{Path: hotOverride}}, }.WithDefaults() paths := cfg.ResolvePaths() @@ -169,6 +180,18 @@ func TestRunDaemon_StoragePathOverridesHonored(t *testing.T) { assert.Equal(t, filepath.Join(ledgersRoot, cid.BucketID(), cid.String()+".pack"), layout.LedgerPackPath(cid)) assert.Equal(t, ledgersRoot, layout.LedgersRoot()) + // events and txhash-raw are fixed subdirs of the cold root; only the + // txhash index honors its own override. + eventsRoot := filepath.Join(coldOverride, "events") + txhashRawRoot := filepath.Join(coldOverride, "txhash", "raw") + assert.Equal(t, eventsRoot, layout.EventsRoot()) + assert.Equal(t, txhashRawRoot, layout.TxHashRawRoot()) + assert.Equal(t, filepath.Join(txhashRawRoot, cid.BucketID(), cid.String()+".bin"), + layout.TxHashBinPath(cid)) + assert.Equal(t, txhashIndexOverride, layout.TxHashIndexRoot()) + for _, p := range layout.EventsPaths(cid) { + assert.True(t, filepathHasPrefix(p, eventsRoot), "events path %q under cold override", p) + } // Nothing resolves under {DataDir}/hot or {DataDir}/ledgers. assert.NotEqual(t, filepath.Join(dataDir, "hot", cid.String()), layout.HotChunkPath(cid)) @@ -178,7 +201,9 @@ func TestRunDaemon_StoragePathOverridesHonored(t *testing.T) { store, err := metastore.New(paths.Catalog, silentLogger()) require.NoError(t, err) defer func() { _ = store.Close() }() - cat := NewCatalog(store, layout) + windows, err := NewWindows(testCPI) + require.NoError(t, err) + cat := NewCatalog(store, layout, windows) db, err := openHotTierForChunk(cat, cid, silentLogger()) require.NoError(t, err) @@ -194,6 +219,17 @@ func TestRunDaemon_StoragePathOverridesHonored(t *testing.T) { assert.True(t, os.IsNotExist(err), "no hot data may land under DataDir when an override is set") } +// filepathHasPrefix reports whether path lives under prefix (prefix is an +// ancestor dir of path). It compares cleaned components, not raw string +// prefixes, so /a/bc is not treated as under /a/b. +func filepathHasPrefix(path, prefix string) bool { + rel, err := filepath.Rel(prefix, path) + if err != nil { + return false + } + return rel != ".." && !strings.HasPrefix(rel, ".."+string(filepath.Separator)) +} + // A second daemon on the same data dir fails fast on the storage-root flock — the // single-process invariant the entrypoint must enforce before opening any store. func TestRunDaemon_LockContentionFailsFast(t *testing.T) { diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/doc.go b/cmd/stellar-rpc/internal/fullhistory/streaming/doc.go index 30bca677e..5f8a5efb5 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/doc.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/doc.go @@ -1,23 +1,24 @@ // Package streaming holds the orchestration spine for the full-history // streaming daemon: catch-up on startup, live ingestion from captive core, and -// the freeze → discard → prune lifecycle over the merged storage layer -// (fullhistory/pkg/...). It is built ON that layer — the catalog WRAPS +// the freeze → rebuild → discard → prune lifecycle over the merged storage +// layer (fullhistory/pkg/...). It is built ON that layer — the catalog WRAPS // metastore.Store rather than reinventing a RocksDB wrapper. // -// This file map covers Slice 1 (the daemon skeleton) plus Slice 2 (events). -// Events is a second per-chunk artifact woven into the existing seams — it adds -// no new files here, only events column families, a processChunk segment -// writer, and the matching resolver/audit kind-loops. Slice 3 then adds the -// tx-hash data type (see "Later slices" below). +// This file map covers the complete daemon — Slice 1 (the skeleton) + Slice 2 +// (events) + Slice 3 (tx-hash). Tx-hash is the per-window rolling-index +// subsystem: it threads a window dimension through the catalog and adds the +// cold .bin/.idx artifacts and their rebuild. // // # Data model (keys-first) // -// Every durable artifact (a per-chunk file: ledger or events) and every per-chunk hot DB is named -// by exactly one catalog key, and the path on disk is a fixed bijection of that -// key. Nothing ever lists a directory to find work; every scan and sweep -// iterates keys. The authoritative spec is +// Every durable artifact (a per-chunk file — ledger, events, or tx-hash .bin — +// or a per-window index .idx) and every per-chunk hot DB is named by exactly +// one catalog key, and the path on disk is a fixed bijection of that key. +// Nothing ever lists a directory to find work; every scan and sweep iterates +// keys. The authoritative spec is // design-docs/full-history-streaming-workflow.md (Data model, One write -// protocol). +// protocol) and gettransaction-full-history-design.md §6.3 (index keys, +// coverage, and the uniqueness invariant). // // # File map // @@ -28,27 +29,34 @@ // package-private and the invariant tests meaningful. The files group by // concern: // -// Foundation keys.go, paths.go -// the catalog key schema, the key↔path bijection, and chunk -// geometry. +// Foundation keys.go, paths.go, window.go +// the catalog key schema, the key↔path bijection, and the +// chunk + window geometry. // Catalog catalog.go, catalog_protocol.go, catalog_sweep.go // the catalog (a metastore.Store wrapper), the one-write // protocol (mark "freezing" → fsync file+dirent → flip -// "frozen"), and the key-driven sweep (the only deletion body). +// "frozen"), and the key-driven sweeps (the only deletion +// bodies — per-chunk and per-window index). // Config config.go, config_lock.go, config_validate.go // the TOML schema/loader/defaults, the single-process flock, // and validateConfig (the network-dependent earliest-ledger // resolution + the two-pin first-start commit). // Cross-cutting artifacts.go -// the ArtifactSet/Kind abstraction the later layers subset. +// the ArtifactSet/Kind abstraction the data-type slices subset. // Storage process.go, hotsource.go // processChunk + backfillSource materialize a chunk's cold // artifacts from the cheapest source (ready hot DB → frozen // local .pack → bulk backend); hotsource exposes the hot tier // as a freeze source. +// Index txindex.go +// the per-window rolling cold tx-hash index: buildTxhashIndex +// (k-way merge of the chunk .bin runs → a coverage-named .idx), +// the atomic promote/demote commit batch, and buildThenSweep. // Planner resolve.go, execute.go, eligibility.go -// the postcondition resolver (catalog diff → Plan), the -// bounded-worker executor, and discard/prune eligibility. +// the postcondition resolver (catalog diff → Plan, incl. the +// per-window index rule), the bounded-worker executor (the +// chunk→index done-channel stratum), and discard/prune +// eligibility (incl. the index-aware discard gate). // Ingestion ingest.go // the live hot-DB ingestion loop (indexed GetLedger, one synced // WriteBatch per ledger) and the chunk-boundary handoff. @@ -67,9 +75,4 @@ // Test seam hooks.go // test-only crash-injection points fired from inside the real // protocol/sweep methods (every field nil in production). -// -// # Later slices -// -// Slice 3 adds the tx-hash data type with its per-window rolling index — -// additive on this ledgers+events skeleton. package streaming diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/e2e_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/e2e_test.go index fd177b258..6bceacb7c 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/e2e_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/e2e_test.go @@ -1,8 +1,7 @@ package streaming // ============================================================================= -// Issue 19 — in-process end-to-end integration of the streaming daemon -// (ledgers-only slice). +// Issue 19 — in-process end-to-end integration of the streaming daemon. // // WHAT IS REAL HERE // Everything inside the process is the real production code path: @@ -11,24 +10,40 @@ package streaming // validateConfig gate (pins the immutable layout + resolves the floor), // and the supervised startStreaming loop. // - startStreaming → catchUp → openHotTierForChunk → runIngestionLoop (the -// real atomic per-ledger WriteBatch over the real per-chunk hotchunk -// RocksDB), the real boundary handoff, the real doorbell. +// real atomic per-ledger WriteBatch across all CFs of the real per-chunk +// hotchunk RocksDB), the real boundary handoff, the real doorbell. // - lifecycleLoop / runLifecycleTick: the real resolve + executePlan freeze -// (the ledger cold artifact derived FROM the live hot DB via processChunk's -// hot branch), the real discard + prune scans. -// - Catalog.Audit (INV-2..4) over the real durable keys + files. +// (cold artifacts derived FROM the live hot DB via processChunk's hot +// branch), the real txhash index fold (a real streamhash .idx on disk), +// the real discard + prune scans. +// - The real txhash stores on both sides of a getTransaction-style hash→seq +// lookup: the cold ColdReader over the frozen .idx and the live HotStore +// CF. +// - Catalog.Audit (INV-1..4) over the real durable keys + files. // // WHAT IS FAKED (and why that is the right boundary) // Only the two EXTERNAL boundaries the daemon injects on purpose: -// - The ledger SOURCE (CoreStreamOpener / NetworkTipBackend), fed -// SYNTHETIC-BUT-WELL-FORMED zero-tx LedgerCloseMeta. No captive core, no -// object store, no network. -// - ServeReads is a no-op recorder (#772). +// - The ledger SOURCE. Production drives ingestion from captive +// stellar-core (a child process) and backfill from a bulk object-store +// backend. Here both cross their injected interfaces (CoreStreamOpener / +// NetworkTipBackend) and are fed SYNTHETIC-BUT-WELL-FORMED LedgerCloseMeta +// built by the same fixtures the merged store tests use (zero-tx LCM for +// bulk, plus a one-tx LCM where a real, network-hashed transaction hash is +// needed so the txhash index has a real key to resolve). No captive core, +// no docker-stellar-core, no object store, no network. +// - ServeReads is a no-op recorder (the SQLite→full-history read cutover is +// #772; see daemon.go). The read PATH we actually exercise is the txhash +// index lookup the getTransaction handler will sit on top of. // -// This in-process test is a LIFECYCLE + STORAGE-STATE test: it drives the whole -// freeze→discard→restart-resume→prune sequence and audits the result. It does -// not exercise a read PATH (the tx-hash lookups were removed with the tx-hash -// subsystem in this slice). +// FOLLOW-UP (out of scope here; requires infra not available in this sandbox) +// A full captive-core + docker-stellar-core E2E belongs in the existing +// integrationtest harness (cmd/stellar-rpc/internal/integrationtest): it +// stands up a real core + a real history archive and ingests real network +// ledgers. That validates the ledger SOURCE adapters (captiveCoreOpener, +// backendTip/DataStoreSource) this test fakes, and is gated on the #772 read +// cutover for an end-user getTransaction round-trip over RPC. This in-process +// test deliberately stops at the daemon's injected boundaries so it runs with +// no external services. // ============================================================================= import ( @@ -43,19 +58,91 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/stellar/go-stellar-sdk/keypair" + "github.com/stellar/go-stellar-sdk/network" supportlog "github.com/stellar/go-stellar-sdk/support/log" "github.com/stellar/go-stellar-sdk/xdr" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash" ) +// e2ePassphrase is the network passphrase the synthetic tx hashes are computed +// against. Any stable value works; the index only needs deterministic hashes +// the test can then look up. +const e2ePassphrase = network.PublicNetworkPassphrase + +// oneTxLCMReturningHash builds a well-formed V2 LedgerCloseMeta carrying exactly +// ONE transaction for seq and returns BOTH the wire bytes and the real, +// network-hashed transaction hash. A non-zero-tx ledger is required somewhere in +// a chunk so its txhash .bin is non-empty (streamhash refuses a zero-key cold +// index, txhash.ErrEmptyBuildSet); returning the hash lets the E2E assert the +// getTransaction-style hash→seq lookup against a hash the daemon really +// committed. It mirrors lifecycle_test's oneTxLCMBytes, exposing the hash. +func oneTxLCMReturningHash(t *testing.T, seq uint32) ([]byte, [32]byte) { + t.Helper() + envelope := xdr.TransactionEnvelope{ + Type: xdr.EnvelopeTypeEnvelopeTypeTx, + V1: &xdr.TransactionV1Envelope{ + Tx: xdr.Transaction{ + SourceAccount: xdr.MustMuxedAddress(keypair.MustRandom().Address()), + Ext: xdr.TransactionExt{V: 1, SorobanData: &xdr.SorobanTransactionData{}}, + }, + }, + } + hash, err := network.HashTransactionInEnvelope(envelope, e2ePassphrase) + require.NoError(t, err) + + comp := []xdr.TxSetComponent{{ + Type: xdr.TxSetComponentTypeTxsetCompTxsMaybeDiscountedFee, + TxsMaybeDiscountedFee: &xdr.TxSetComponentTxsMaybeDiscountedFee{ + Txs: []xdr.TransactionEnvelope{envelope}, + }, + }} + opResults := []xdr.OperationResult{} + lcm := xdr.LedgerCloseMeta{ + V: 2, + V2: &xdr.LedgerCloseMetaV2{ + LedgerHeader: xdr.LedgerHeaderHistoryEntry{ + Header: xdr.LedgerHeader{ + ScpValue: xdr.StellarValue{CloseTime: xdr.TimePoint(0)}, + LedgerSeq: xdr.Uint32(seq), + }, + }, + TxSet: xdr.GeneralizedTransactionSet{ + V: 1, + V1TxSet: &xdr.TransactionSetV1{Phases: []xdr.TransactionPhase{{V: 0, V0Components: &comp}}}, + }, + TxProcessing: []xdr.TransactionResultMetaV1{{ + TxApplyProcessing: xdr.TransactionMeta{ + V: 4, + V4: &xdr.TransactionMetaV4{Operations: []xdr.OperationMetaV2{}}, + }, + Result: xdr.TransactionResultPair{ + TransactionHash: hash, + Result: xdr.TransactionResult{ + FeeCharged: 100, + Result: xdr.TransactionResultResult{Code: xdr.TransactionResultCodeTxSuccess, Results: &opResults}, + }, + }, + }}, + }, + } + raw, err := lcm.MarshalBinary() + require.NoError(t, err) + return raw, hash +} + // e2eGetter is the FAKE captive-core ledger getter: a resumable LedgerGetter the // ingestion loop polls by sequence (the design's core.GetLedger(ctx, seq)). It // returns the frame for the requested seq when it has one, and once the poll // runs past the synthetic backlog it blocks until ctx is canceled (a live tip // stream ends only on shutdown). It records the FIRST seq it was asked for so // the restart step can assert the daemon re-derived the watermark and resumed -// with no gap. +// with no gap. The ctx-cancelled GetLedger return is the clean-shutdown path the +// daemon top level classifies as clean. type e2eGetter struct { frames map[uint32][]byte maxSeq uint32 @@ -115,9 +202,11 @@ func (c *e2eCore) OpenCore(_ context.Context, resume uint32) (LedgerGetter, func } // e2eConfigPath writes a daemon TOML for an in-process E2E: genesis floor (no -// tip needed to validate/start) and the given retention width. -// captive_core_config is a stub path the test's BuildBoundaries replaces with a -// fake stream, never opening a real core. +// tip needed to validate/start), a one-chunk index window (chunks_per_txhash_- +// index = 1, so every window is terminal the instant its chunk freezes — the +// freeze→fold→discard sequence completes on the boundary tick), and the given +// retention width. captive_core_config is a stub path the test's BuildBoundaries +// replaces with a fake stream, never opening a real core. func e2eConfigPath(t *testing.T, dataDir string, retentionChunks uint32) string { t.Helper() cfgPath := filepath.Join(t.TempDir(), "daemon.toml") @@ -130,6 +219,9 @@ earliest_ledger = "genesis" captive_core_config = "/dev/null" retention_chunks = %d +[backfill] +chunks_per_txhash_index = 1 + [logging] level = "error" format = "text" @@ -144,7 +236,9 @@ format = "text" // callback). The metastore is opened RocksDB-primary (exclusive LOCK), so a test // CANNOT open a second handle on the same path while the daemon runs — instead // it reads durable state through the daemon's own catalog, which is safe for -// concurrent reads. +// concurrent reads. ServeReads records the serve count; a young-network tip +// (inside chunk 0) means backfill is a no-op and first-start ingests directly +// from genesis via the fake core. // //nolint:nonamedreturns // named outputs label the (cancel, done, catalog) handles func runDaemonInBackground( @@ -196,8 +290,8 @@ func waitClean(t *testing.T, cancel context.CancelFunc, done <-chan error) { require.NoError(t, err, "ctx cancel is a clean daemon shutdown") case <-time.After(60 * time.Second): // Post-cancel shutdown joins one in-flight lifecycle unit; a mid-flight - // freeze's Finalize fsync is unpreemptible and slow under -race + - // contention — the same reason the boundary-cross budget is 600s. + // freeze's Finalize fsync + index build is unpreemptible and slow under + // -race + contention — the same reason the boundary-cross budget is 600s. t.Fatal("daemon did not shut down cleanly after ctx cancel") } } @@ -206,22 +300,26 @@ func waitClean(t *testing.T, cancel context.CancelFunc, done <-chan error) { // The end-to-end walk. // ============================================================================ -// TestE2E_DaemonLifecycle_FirstStartIngestFreezeRestartPrune drives the whole -// daemon lifecycle in one process against the real stores and the fake ledger -// source: +// TestE2E_DaemonLifecycle_FirstStartIngestFreezeLookupRestartPrune drives the +// whole daemon lifecycle in one process against the real stores and the fake +// ledger source: // // first start (genesis, young-network tip ⇒ direct ingest) → // ingest a FULL chunk + cross into the next (real boundary handoff) → -// lifecycle tick freezes chunk 0's ledger artifact + discards its hot tier → +// lifecycle tick freezes chunk 0 + folds its terminal txhash index + discards +// its hot tier → +// getTransaction-style hash→seq lookup resolves from the cold .idx (chunk 0) +// AND from the live hot CF (chunk 1) → // clean shutdown → // RESTART: re-derive the watermark, resume at exactly watermark+1 (no gap) → -// drive retention far enough to prune chunk 0, and confirm its keys/files go → +// drive retention far enough to prune chunk 0, and confirm a pruned read is +// not-found → // finish with Catalog.Audit → Clean. // // Correctness is asserted at every step. // //nolint:funlen // full lifecycle E2E with assertions at every step -func TestE2E_DaemonLifecycle_FirstStartIngestFreezeRestartPrune(t *testing.T) { +func TestE2E_DaemonLifecycle_FirstStartIngestFreezeLookupRestartPrune(t *testing.T) { if testing.Short() { t.Skip("e2e ingests a full 10k-ledger chunk; skipped in -short") } @@ -235,14 +333,38 @@ func TestE2E_DaemonLifecycle_FirstStartIngestFreezeRestartPrune(t *testing.T) { // --- Synthetic ledgers. We cross TWO chunk boundaries so chunks 0 AND 1 both // freeze (completeThrough reaches chunk 1's last ledger), leaving chunk 2 as // the live (un-frozen) chunk. That layout lets a later retention_chunks=1 run - // prune chunk 0 (wholly below the floor) while chunk 1 survives. Every ledger - // is zero-tx for speed. + // prune chunk 0 (wholly below the floor) while chunk 1 survives. + // + // Each chunk is ingested in FULL and contiguously from its first ledger (the + // events CF's strict-contiguity precondition), so the freeze derives every + // cold artifact. One real, network-hashed tx is planted where a resolvable + // hash is needed — chunk 0's first ledger (→ frozen cold .idx) and chunk 2's + // first ledger (→ the live hot CF). Every other ledger is zero-tx for speed. c0First := c0.FirstLedger() + c1First := c1.FirstLedger() c2First := c2.FirstLedger() + coldRaw, coldHash := oneTxLCMReturningHash(t, c0First) // → frozen cold .idx (chunk 0) + hotRaw, hotHash := oneTxLCMReturningHash(t, c2First) // → live hot CF (chunk 2) + // Chunk 1's first ledger also carries a tx so its txhash .bin is non-empty — + // streamhash refuses to build a cold index over zero keys (ErrEmptyBuildSet), + // which would otherwise abort the lifecycle tick when chunk 1 freezes. + c1Raw, _ := oneTxLCMReturningHash(t, c1First) + frames := make([]e2eFrame, 0, 2*int(chunk.LedgersPerChunk)+2) appendLedger := func(seq uint32) { - frames = append(frames, e2eFrame{seq: seq, raw: zeroTxLCMBytes(t, seq)}) + var raw []byte + switch seq { + case c0First: + raw = coldRaw + case c1First: + raw = c1Raw + case c2First: + raw = hotRaw + default: + raw = zeroTxLCMBytes(t, seq) + } + frames = append(frames, e2eFrame{seq: seq, raw: raw}) } // Chunks 0 and 1 in full (both freeze), then chunk 2's first two ledgers (the // live chunk; boundary 1→2 fired, chunk 2 opened, its first ledger committed). @@ -259,28 +381,49 @@ func TestE2E_DaemonLifecycle_FirstStartIngestFreezeRestartPrune(t *testing.T) { // ===================================================================== // STEP 1 — first start: config → lock → validate (pin genesis) → start → // direct ingest across the chunk-0 AND chunk-1 boundaries, with the lifecycle - // freezing and discarding each just-closed chunk off the doorbell. + // freezing, folding, and discarding each just-closed chunk off the doorbell. // ===================================================================== cfgPath := e2eConfigPath(t, dataDir, 0) // retention 0 (full history) for now cancel, done, catCh := runDaemonInBackground(t, cfgPath, core, &served, metrics) + // Inspect durable state through the daemon's OWN bound catalog (the metastore + // is opened RocksDB-primary, so a second handle would fail the LOCK). The + // catalog is safe for concurrent reads alongside the daemon's writes. cat := awaitCatalog(t, catCh) // First wait until ingestion crosses BOTH boundaries and commits into chunk 2 // (the new live chunk). Delivering c2First proves both boundary handoffs fired - // (chunks 0 and 1 closed, chunk 2 opened). + // (chunks 0 and 1 closed, chunk 2 opened) and seeds the live hot-CF lookup. + // (NOTE: we must NOT gate on "chunk 0's hot key absent" first — the daemon + // hands the test its catalog from BuildBoundaries, BEFORE startStreaming opens + // the resume chunk's hot DB, so that key is transiently absent at start.) + // Budget note: crossing both boundaries is ~20k per-ledger SYNCED WriteBatches + // (the design's one-atomic-synced-batch-per-ledger durability boundary) racing + // the lifecycle freezes that re-read 10k ledgers each. fsync throughput is + // highly variable under contention: in isolation this reaches chunk 2 in ~110s + // (no -race) but ~175s under -race, and the CI gate runs the whole tree under + // `-race` (so this E2E is NOT -short-skipped there) alongside this package's + // six t.Parallel() full-chunk ticks, all competing for the same disk. 180s was + // too tight (flaky timeouts at 161/167s/killed). 600s absorbs the worst-case + // contended -race path while staying far under the 25m package envelope. require.Eventually(t, func() bool { return core.delivered.Load() >= c2First }, 600*time.Second, 200*time.Millisecond, "ingestion must cross both boundaries into chunk 2") // The boundary doorbells have rung. A lifecycle tick freezes each just-closed - // chunk's cold ledger artifact (from its closed hot DB), then discards its hot - // tier. The durable completion signal per chunk: the ledgers key is FROZEN AND - // the chunk's hot key is gone (discarded). + // chunk's cold artifacts (from its closed hot DB), folds its terminal (cpi=1) + // txhash index, then discards its hot tier. The durable completion signal per + // chunk: the window has a FROZEN txhash coverage (the .idx) AND the chunk's hot + // key is gone (discarded). (NOTE: the per-chunk chunk:{c}:txhash key is the + // .bin input the one-write index fold CONSUMES — after the fold it is + // demoted+swept, reading "" not "frozen"; the durable txhash artifact is the + // window's frozen coverage, not the per-chunk key.) + w0 := cat.windows.WindowID(c0) + w1 := cat.windows.WindowID(c1) require.Eventually(t, func() bool { - for _, c := range []chunk.ID{c0, c1} { - st, err := cat.State(c, KindLedgers) - if err != nil || st != StateFrozen { + for w, c := range map[WindowID]chunk.ID{w0: c0, w1: c1} { + _, hasCov, err := cat.FrozenCoverage(w) + if err != nil || !hasCov { return false } has, err := cat.Has(hotChunkKey(c)) @@ -289,27 +432,50 @@ func TestE2E_DaemonLifecycle_FirstStartIngestFreezeRestartPrune(t *testing.T) { } } return true - }, 60*time.Second, 50*time.Millisecond, "the boundary ticks must freeze+discard chunks 0 and 1") + }, 60*time.Second, 50*time.Millisecond, "the boundary ticks must freeze+fold+discard chunks 0 and 1") require.GreaterOrEqual(t, served.Load(), int32(1), "reads were served") require.Equal(t, c0First, core.resumeSeen.Load(), "first start resumes captive core at genesis (watermark+1)") - // --- Correctness: chunks 0 and 1 ledger + events cold artifacts froze and - // exist on disk. --- + // --- Correctness: chunks 0 and 1 per-chunk cold artifacts (ledgers + events) froze. --- for _, c := range []chunk.ID{c0, c1} { - st, err := cat.State(c, KindLedgers) - require.NoError(t, err) - assert.Equal(t, StateFrozen, st, "chunk %s ledgers is frozen", c) - require.FileExists(t, cat.layout.LedgerPackPath(c), "chunk %s pack exists on disk", c) - - est, err := cat.State(c, KindEvents) - require.NoError(t, err) - assert.Equal(t, StateFrozen, est, "chunk %s events is frozen", c) - for _, p := range cat.layout.EventsPaths(c) { - require.FileExists(t, p, "chunk %s events segment file %s exists on disk", c, p) + for _, kind := range []Kind{KindLedgers, KindEvents} { + st, err := cat.State(c, kind) + require.NoError(t, err) + assert.Equal(t, StateFrozen, st, "chunk %s %s is frozen", c, kind) } } + // The window's txhash index is a frozen, terminal coverage (the .idx the cold + // getTransaction read resolves against). + frozenCov, ok, err := cat.FrozenCoverage(w0) + require.NoError(t, err) + require.True(t, ok, "chunk 0's window has a frozen txhash coverage") + require.True(t, cat.windows.IsTerminalCoverage(frozenCov), "a one-chunk (cpi=1) window is terminal") + + // ===================================================================== + // STEP 2 — getTransaction-style hash→seq lookup, both tiers. + // (a) cold: resolve chunk 0's tx via the frozen .idx on disk. + // (b) hot: resolve chunk 2's tx via the live hot DB's txhash CF. + // ===================================================================== + + // (a) Cold .idx — the exact reader getTransaction will sit on for frozen + // history. It resolves the committed hash to its real ledger seq. + coldReader, err := txhash.OpenColdReader(cat.layout.IndexFilePath(frozenCov)) + require.NoError(t, err) + gotSeq, err := coldReader.Get(coldHash) + require.NoError(t, err, "the chunk-0 tx hash must resolve from the frozen cold index") + assert.Equal(t, c0First, gotSeq, "cold lookup returns the ledger the tx was committed in") + // A hash that was never committed misses (not-found, not a wrong answer). + _, missErr := coldReader.Get(hashAt(0xE2EDEADBEEF)) + require.ErrorIs(t, missErr, stores.ErrNotFound, "an uncommitted hash misses the cold index") + require.NoError(t, coldReader.Close()) + + // (b) is performed AFTER the clean shutdown below — opening chunk 2's hot DB + // read-only would conflict with the live ingestion writer's exclusive RocksDB + // LOCK while the daemon runs; once the daemon stops cleanly the live chunk's + // hot DB is on disk and reopenable. The hot tier is the UN-frozen live chunk's + // sole copy, so this still exercises the hot read path. // Observability: the daemon emitted the boundary + freeze phase signals (the // control-plane health gauges). @@ -317,8 +483,11 @@ func TestE2E_DaemonLifecycle_FirstStartIngestFreezeRestartPrune(t *testing.T) { assert.GreaterOrEqual(t, metrics.snapshotFreezeCount(), 1, "at least one freeze stage ran") // ===================================================================== - // STEP 2 — clean shutdown. The supervised loop returns nil on ctx cancel. + // STEP 3 — clean shutdown. The supervised loop returns nil on ctx cancel. // ===================================================================== + // (Watermark derivation opens the live hot DB read-only, so it MUST run after + // the daemon — the live writer — releases the exclusive RocksDB LOCK; do it + // after waitClean below.) waitClean(t, cancel, done) // The daemon's catalog rode its now-closed metastore handle; bind a fresh @@ -331,7 +500,10 @@ func TestE2E_DaemonLifecycle_FirstStartIngestFreezeRestartPrune(t *testing.T) { wmBeforeRestart := mustDeriveWatermark(t, postCat) require.GreaterOrEqual(t, wmBeforeRestart, c2First, "watermark advanced into chunk 2") - // Chunk 2 is the un-frozen live chunk: its hot key is "ready", no cold artifacts. + // (b) Live hot CF — now the daemon has stopped, chunk 2 (still the un-frozen + // live chunk: its hot key is "ready", no cold artifacts) is reopenable. Open + // its real hot DB and resolve the chunk-2 tx hash through the txhash CF — the + // read path getTransaction uses for live history before a chunk freezes. hotState, err := postCat.HotState(c2) require.NoError(t, err) require.Equal(t, HotReady, hotState, "chunk 2 is the un-frozen live chunk") @@ -339,8 +511,24 @@ func TestE2E_DaemonLifecycle_FirstStartIngestFreezeRestartPrune(t *testing.T) { require.NoError(t, err) require.Equal(t, State(""), c2lfs, "the live chunk has no cold artifacts yet") + // Retry the open: RocksDB's process-level LOCK can linger momentarily after the + // writer closed (the same transient a production reader retries through). + var liveDB *hotchunk.DB + require.Eventually(t, func() bool { + db, oerr := hotchunk.Open(cat.layout.HotChunkPath(c2), c2, silentLogger()) + if oerr != nil { + return false + } + liveDB = db + return true + }, 10*time.Second, 50*time.Millisecond, "chunk 2's hot DB must be reopenable after shutdown") + hotSeq, err := liveDB.Txhash().Get(hotHash) + require.NoError(t, err, "the chunk-2 tx hash must resolve from the live hot CF") + assert.Equal(t, c2First, hotSeq, "hot lookup returns the live tx's ledger") + require.NoError(t, liveDB.Close()) // release before the restart reopens it as the live writer + // ===================================================================== - // STEP 3 — RESTART. A fresh RunDaemonWith re-opens everything, re-derives the + // STEP 4 — RESTART. A fresh RunDaemonWith re-opens everything, re-derives the // watermark from durable state, and resumes captive core at watermark+1 with // no gap. (The shared e2eCore records the new resume + the stream's From.) // ===================================================================== @@ -364,27 +552,35 @@ func TestE2E_DaemonLifecycle_FirstStartIngestFreezeRestartPrune(t *testing.T) { waitClean(t, cancel2, done2) // ===================================================================== - // STEP 4 — retention prune. Re-run the daemon with retention_chunks = 1: the - // effective floor anchors at chunk 1, so chunk 0 (frozen) falls WHOLLY below - // the floor and the prune scan sweeps its files + keys, while chunk 1 (the - // floor chunk) survives. + // STEP 5 — retention prune. Re-run the daemon with retention_chunks = 1: the + // effective floor anchors at chunk 1 (lastCompleteChunkAt(through=chunk 1) - + // 1 + 1), so chunk 0 (frozen + folded) falls WHOLLY below the floor and the + // prune scan sweeps its files + keys, while chunk 1 (the floor chunk) survives. + // A read of a pruned chunk-0 hash is then not-found (no coverage to resolve it). // ===================================================================== prunedCfg := e2eConfigPath(t, dataDir, 1) // retain ~1 chunk - // Capture chunk 0's frozen pack path BEFORE the prune so we can confirm the + // Capture chunk 0's frozen .idx path BEFORE the prune so we can confirm the // file itself is gone afterward. (cat's layout is path-only and stays valid - // even though its metastore handle closed at the Step-2 shutdown.) - prunedPackPath := cat.layout.LedgerPackPath(c0) - require.FileExists(t, prunedPackPath, "chunk 0's cold pack exists before the prune") + // even though its metastore handle closed at the Step-3 shutdown.) + prunedIdxPath := cat.layout.IndexFilePath(frozenCov) + require.FileExists(t, prunedIdxPath, "chunk 0's cold index exists before the prune") cancel3, done3, catCh3 := runDaemonInBackground(t, prunedCfg, core, &served, newRecordingMetrics()) pruneCat := awaitCatalog(t, catCh3) // the pruning daemon's own catalog // The prune scan runs on the first lifecycle tick (the at-start doorbell ring, - // which is startup convergence). Poll for chunk 0's per-chunk artifact key - // (the frozen cold ledger) to vanish. + // which is startup convergence). Poll for chunk 0's per-chunk artifact keys + // (ledgers + events — the frozen cold artifacts) to vanish. require.Eventually(t, func() bool { ledgers, err := pruneCat.State(c0, KindLedgers) - return err == nil && ledgers == State("") + if err != nil { + return false + } + ev, err := pruneCat.State(c0, KindEvents) + if err != nil { + return false + } + return ledgers == State("") && ev == State("") }, 60*time.Second, 50*time.Millisecond, "retention must prune chunk 0's artifact keys") // Chunk 1 (the floor chunk) is WITHIN retention and survives the prune. @@ -392,17 +588,26 @@ func TestE2E_DaemonLifecycle_FirstStartIngestFreezeRestartPrune(t *testing.T) { require.NoError(t, err) assert.Equal(t, StateFrozen, c1lfs, "chunk 1 is at the retention floor and survives") - // The on-disk cold pack file is gone too (prune unlinks the files, not just - // the keys). + // The on-disk cold index file is gone too (prune unlinks the files, not just + // the keys) — a pruned read therefore cannot even open the reader. require.Eventually(t, func() bool { - _, statErr := os.Stat(prunedPackPath) + _, statErr := os.Stat(prunedIdxPath) return os.IsNotExist(statErr) - }, 10*time.Second, 50*time.Millisecond, "the pruned cold pack file is unlinked") + }, 10*time.Second, 50*time.Millisecond, "the pruned cold index file is unlinked") + + // getTransaction-style "pruned read is not-found": the frozen coverage key is + // gone, so the read path has no index to resolve the (formerly resolvable) + // chunk-0 hash against — the production reader returns not-found. After prune + // the window has no frozen coverage (ok=false): the read layer's "no coverage + // ⇒ not-found" gate. + _, covOK, err := pruneCat.FrozenCoverage(w0) + require.NoError(t, err) + assert.False(t, covOK, "chunk 0's window coverage is pruned ⇒ a chunk-0 hash read is not-found") waitClean(t, cancel3, done3) // ===================================================================== - // STEP 5 — Catalog.Audit (INV-2..4) → Clean. The store must be at a single + // STEP 6 — Catalog.Audit (INV-1..4) → Clean. The store must be at a single // canonical state with no orphans/dangling/duplicates and nothing below the // retention floor. RetentionChunks matches the daemon's last config so INV-4 // checks against the EXACT floor it enforced. @@ -412,7 +617,7 @@ func TestE2E_DaemonLifecycle_FirstStartIngestFreezeRestartPrune(t *testing.T) { report, err := auditCat.Audit(AuditOptions{RetentionChunks: 1}) require.NoError(t, err, "audit completes (error only for I/O)") require.True(t, report.Clean(), - "after the full lifecycle the store satisfies INV-2..4; violations:\n%s", violationsString(report)) + "after the full lifecycle the store satisfies INV-1..4; violations:\n%s", violationsString(report)) } // ============================================================================ @@ -420,15 +625,18 @@ func TestE2E_DaemonLifecycle_FirstStartIngestFreezeRestartPrune(t *testing.T) { // ============================================================================ // e2eReadCatalog binds a Catalog over a SEPARATE metastore handle on the -// daemon's data dir for read-only inspection BETWEEN daemon runs (the metastore -// is RocksDB-primary / exclusive-LOCK, so this MUST be closed via the returned -// close func before the next daemon run reopens it). +// daemon's data dir, with the same one-chunk window the daemon config pins, for +// read-only inspection BETWEEN daemon runs (the metastore is RocksDB-primary / +// exclusive-LOCK, so this MUST be closed via the returned close func before the +// next daemon run reopens it). func e2eReadCatalog(t *testing.T, dataDir string) (*Catalog, func()) { t.Helper() paths := Config{Service: ServiceConfig{DefaultDataDir: dataDir}}.WithDefaults().ResolvePaths() store, err := openMetaAt(t, paths.Catalog) require.NoError(t, err) - return NewCatalog(store, NewLayoutFromPaths(paths)), func() { _ = store.Close() } + windows, err := NewWindows(1) // matches chunks_per_txhash_index = 1 + require.NoError(t, err) + return NewCatalog(store, NewLayoutFromPaths(paths), windows), func() { _ = store.Close() } } // mustDeriveWatermark derives the durable watermark through the production probe. diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/eligibility.go b/cmd/stellar-rpc/internal/fullhistory/streaming/eligibility.go index 626b42511..2312ce1df 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/eligibility.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/eligibility.go @@ -17,9 +17,9 @@ import ( // - chunkLastLedger < floor (past retention OR below earliest_ledger): discard. // Its artifact files, if any, carry their own keys and are picked up by the // prune stage on the same tick. -// - complete (last ledger <= through) and nothing pending (cold artifacts fully -// serve it): discard. -// - otherwise (live, or still producing): leave alone. +// - complete (last ledger <= through), nothing pending, and the window's index +// covers it (cold artifacts fully serve it): discard. +// - otherwise (live, or frozen and awaiting coverage): leave alone. // // discardHotTierForChunk is idempotent and re-derives from durable keys, so a // crash between freeze and discard self-heals on the next tick. @@ -47,14 +47,18 @@ func eligibleDiscardOps(cfg LifecycleConfig, cat *Catalog, through uint32) ([]fu case gate.ChunkBelowFloor(c): ops = append(ops, func() error { return discardHotTierForChunk(cat, c) }) case last <= through: - pending, perr := pendingArtifacts(c, cat) + pending, perr := pendingArtifacts(c, cfg, cat) if perr != nil { return nil, perr } - if pending.Empty() { + covers, cerr := indexCovers(c, cat) + if cerr != nil { + return nil, cerr + } + if pending.Empty() && covers { ops = append(ops, func() error { return discardHotTierForChunk(cat, c) }) } - // else: still producing — leave alone. + // else: frozen but awaiting coverage, or still producing — leave alone. } // default (last > through): the live chunk or above — ingestion's, never // the lifecycle's to touch. @@ -62,9 +66,12 @@ func eligibleDiscardOps(cfg LifecycleConfig, cat *Catalog, through uint32) ([]fu return ops, nil } -// pendingArtifacts lists which processChunk outputs chunk still needs: the -// per-chunk kinds (ledgers, events) that are not yet frozen. -func pendingArtifacts(c chunk.ID, cat *Catalog) (ArtifactSet, error) { +// pendingArtifacts lists which processChunk outputs chunk still needs. It is the +// per-chunk counterpart of backfill's per-window rule: ledgers and events must be +// frozen; txhash/.bin is exempt when the window's index already covers the +// chunk — after finalization the chunk:c:txhash key is legitimately demoted or +// swept, and regenerating the .bin would orphan it. +func pendingArtifacts(c chunk.ID, cfg LifecycleConfig, cat *Catalog) (ArtifactSet, error) { var need ArtifactSet for _, kind := range []Kind{KindLedgers, KindEvents} { state, err := cat.State(c, kind) @@ -75,11 +82,36 @@ func pendingArtifacts(c chunk.ID, cat *Catalog) (ArtifactSet, error) { need = need.Add(kind) } } + txState, err := cat.State(c, KindTxHash) + if err != nil { + return need, err + } + if txState != StateFrozen { + covers, cerr := indexCovers(c, cat) + if cerr != nil { + return need, cerr + } + if !covers { + need = need.Add(KindTxHash) + } + } return need, nil } -// eligiblePruneOps is the system's only file-deleter, driven entirely by keys. -// It returns one batched SweepChunkArtifacts closure for the chunk family. +// indexCovers reports whether the durable .idx for chunk's window already +// hashes that chunk — the unique "frozen" coverage's [Lo, Hi] contains it. +func indexCovers(c chunk.ID, cat *Catalog) (bool, error) { + fk, ok, err := cat.FrozenCoverage(cat.windows.WindowID(c)) + if err != nil { + return false, err + } + return ok && fk.Lo <= c && c <= fk.Hi, nil +} + +// eligiblePruneOps is the system's only file-deleter, driven entirely by keys — +// one stage, both key families. It returns closures wrapping the two sweep +// bodies (SweepIndexKey per index key, one batched SweepChunkArtifacts for the +// chunk family). // // "Wholly below the floor" is the RetentionGate's predicate — the same one the // discard scan and the read path use, so prune deletes exactly what the reader @@ -94,6 +126,26 @@ func eligiblePruneOps(cfg LifecycleConfig, cat *Catalog, through uint32) ([]func var ops []func() error + // Index family: transient debris from any window, plus frozen keys wholly + // below the floor. + idxKeys, err := cat.AllIndexKeys() + if err != nil { + return nil, err + } + for _, cov := range idxKeys { + switch { + case cov.State == StateFreezing || cov.State == StatePruning: + // Transient debris: a crashed build attempt ("freezing": delete, never + // salvage) or an unfinished demotion ("pruning"). Safe only because no + // build is in flight when this scan runs (it follows executePlan's + // return within the tick, and backfill finishes before the loop starts). + ops = append(ops, func() error { return cat.SweepIndexKey(cov) }) + case gate.WindowBelowFloor(cov.Window, cat.windows): + // A frozen index key wholly below the floor; the sweep demotes it first. + ops = append(ops, func() error { return cat.SweepIndexKey(cov) }) + } + } + // Chunk family: swept in one batch. refs, err := cat.ChunkArtifactKeys() if err != nil { @@ -106,8 +158,21 @@ func eligiblePruneOps(cfg LifecycleConfig, cat *Catalog, through uint32) ([]func // Wholly past retention: any state goes. sweep = append(sweep, ref) case ref.State == StatePruning: - // In-retention artifact demoted by a recovery. + // In-retention .bin demoted by its window's terminal commit batch. sweep = append(sweep, ref) + case ref.Kind == KindTxHash: + // "frozen" OR "freezing" chunk:c:txhash inside a FINALIZED window — + // re-derived (or left mid-write) by a widening backfill that crashed + // before its terminal rebuild, then abandoned when retention narrowed + // back. The terminal .idx provably covers the chunk and the resolver + // never re-materializes a covered window, so it is redundant. + redundant, rerr := txhashRedundantInFinalizedWindow(cat, ref.Chunk) + if rerr != nil { + return nil, rerr + } + if redundant { + sweep = append(sweep, ref) + } } } if len(sweep) > 0 { @@ -115,3 +180,17 @@ func eligiblePruneOps(cfg LifecycleConfig, cat *Catalog, through uint32) ([]func } return ops, nil } + +// txhashRedundantInFinalizedWindow reports whether c's window has a TERMINAL +// frozen index coverage (Hi == the window's last chunk). A frozen-or-freezing +// chunk:c:txhash key in such a window is a redundant input the prune scan sweeps +// — this is the branch that makes INV-2's no-leftover-txhash-keys clause self- +// healing rather than merely auditable. +func txhashRedundantInFinalizedWindow(cat *Catalog, c chunk.ID) (bool, error) { + w := cat.windows.WindowID(c) + fk, ok, err := cat.FrozenCoverage(w) + if err != nil { + return false, err + } + return ok && cat.windows.IsTerminalCoverage(fk), nil +} diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/execute.go b/cmd/stellar-rpc/internal/fullhistory/streaming/execute.go index 965ea0d47..d8cc28413 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/execute.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/execute.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "runtime" + "time" "golang.org/x/sync/errgroup" @@ -14,11 +15,12 @@ import ( ) // ExecConfig is the scheduler's dependency bundle — everything resolve, -// executePlan, and runBackfill read. It COMPOSES process.go's ProcessConfig -// (which drives processChunk + backfillSource) rather than redeclaring its +// executePlan, and runBackfill read. It COMPOSES the two existing primitive +// configs (process.go's ProcessConfig drives processChunk + backfillSource; +// build.go's BuildConfig drives buildThenSweep) rather than redeclaring their // fields, and adds the two scheduler knobs. The Catalog and Logger are shared, -// so they live here and are projected down to the primitive; the rest of the -// primitive config (HotProbe, Backend, …) is carried verbatim. +// so they live here and are projected down to the primitives; the rest of each +// primitive config (HotProbe, Backend, BuildOpts, …) is carried verbatim. // // This is the "one Config" the design's resolve/executePlan/runBackfill // pseudocode reads `cfg.Catalog`, `cfg.Workers`, and `cfg.MaxRetries` from; the @@ -34,26 +36,28 @@ type ExecConfig struct { // phase sink, distinct from Process.Sink (the per-data-type ingest sink). Metrics Metrics - // Process carries the primitive-specific dependencies. Its Catalog and - // Logger fields are filled from the shared ones above by the projection - // accessor, so a caller need not duplicate them. + // Process and Build carry the primitive-specific dependencies. Their Catalog + // and Logger fields are filled from the shared ones above by the projection + // accessors, so a caller need not duplicate them. Process ProcessConfig + Build BuildConfig // Workers is the ONLY concurrency knob: the size of the single bounded pool - // every chunk build draws from. Must be > 0 — a zero pool deadlocks - // executePlan (every task blocks acquiring a slot that never frees). - // Defaults to GOMAXPROCS via WithDefaults. + // every task (chunk build or index build) draws from. Must be > 0 — a zero + // pool deadlocks executePlan (every task blocks acquiring a slot that never + // frees). Defaults to GOMAXPROCS via WithDefaults. Workers int // MaxRetries bounds per-task retries before a task aborts the whole plan // (and, in production, the daemon). 0 means "try once, no retry". MaxRetries int - // runChunk is a test-only seam: when nil (production) the executor runs the - // real processChunk. Tests override it to drive the failure paths - // deterministically without standing up the full ingestion pipeline. It - // never appears in production wiring. + // runChunk / runIndex are test-only seams: when nil (production) the executor + // runs the real processChunk / buildThenSweep. Tests override them to drive + // the wait-ordering and failure paths deterministically without standing up + // the full ingestion pipeline. They never appear in production wiring. runChunk func(ctx context.Context, cb ChunkBuild, cfg ExecConfig) error + runIndex func(ctx context.Context, b IndexBuild, cfg ExecConfig) error } // WithDefaults returns a copy of cfg with Workers defaulted to GOMAXPROCS when @@ -99,31 +103,61 @@ func (cfg ExecConfig) processConfig() ProcessConfig { return p } +// buildConfig projects the ExecConfig down to the BuildConfig buildThenSweep +// reads, filling the shared Catalog/Logger. +func (cfg ExecConfig) buildConfig() BuildConfig { + b := cfg.Build + b.Catalog = cfg.Catalog + b.Logger = cfg.Logger + return b +} + // executePlan runs a Plan on one bounded worker pool (cfg.Workers — the only // resource knob). It is the SAME executor both callers use: runBackfill (catch- -// up) and the lifecycle tick. The structure is map without a job tracker — -// chunk builds are the maps — and there is deliberately no task engine and no -// persisted task state: resolve re-plans from durable keys on every run, so -// there is nothing to resume. +// up) and the lifecycle tick. The structure is map/reduce without a job +// tracker — chunk builds are the maps, index builds are the per-group reduces — +// and there is deliberately no task engine and no persisted task state: +// resolve re-plans from durable keys on every run, so there is nothing to +// resume. // -// Each ChunkBuild acquires a worker slot, runs (with retries), and on SUCCESS -// closes its done-channel AFTER its artifacts are durable (done-channels signal -// SUCCESS, not mere completion). A build that exhausts its retries LEAVES the -// channel open and RETURNS the error, which cancels gctx. +// The dependency graph is two strata with one edge type — an IndexBuild waits +// on the ChunkBuilds inside its coverage — expressed directly in the runtime: // -// At most Workers chunk builds execute at any instant. A task exhausting its +// - Each ChunkBuild closes its done-channel only on SUCCESS, AFTER its +// artifacts are durable (item R2-2): done-channels signal SUCCESS, not mere +// completion. A build that exhausts its retries LEAVES the channel open and +// RETURNS the error, which cancels gctx. +// - Each IndexBuild FIRST waits on the done-channels of the in-coverage +// chunks that have a ChunkBuild in this plan (already-frozen inputs have no +// channel and need no wait), THEN acquires a worker slot. Waiting before +// acquiring is what avoids deadlock: a parked-on-its-dependency index build +// holds no slot, so chunk builds always have slots to make progress. (The +// reverse order — acquire then wait — could fill every slot with index +// builds blocked on chunk builds that can never get a slot.) +// - A failed chunk build never closes its channel, so a dependent index build +// never proceeds on a missing input: it unblocks through the <-gctx.Done() +// case (the failure cancelled gctx) and bails with gctx.Err(). buildTxhash +// Index also keeps a loud .bin precondition as a cheap defensive backstop +// (kept — see buildTxhashIndex), but the success-semantics close is the +// primary guard now. +// +// The "ready set" a DAG scheduler would maintain is simply the goroutines +// parked on the one semaphore; thousands of goroutines may exist (a few KB +// each), but at most Workers execute at any instant. A task exhausting its // retries returns an error, which errgroup propagates: gctx is canceled, every -// other task's slot-acquire/processChunk observes it, and g.Wait returns the -// first error — the daemon aborts and a restart re-resolves from durable keys. +// other task's wait/slot-acquire/processChunk observes it, and g.Wait returns +// the first error — the daemon aborts and a restart re-resolves from durable +// keys. func executePlan(ctx context.Context, plan Plan, cfg ExecConfig) error { if err := cfg.validate(); err != nil { return err } - // One slot per worker — the single pool all chunk builds share. + // One slot per worker — the single pool all task kinds share. slots := make(chan struct{}, cfg.Workers) - // One done-channel per planned chunk build, created up front. + // One done-channel per planned chunk build, created up front so an index + // build can look up its in-coverage dependencies before any goroutine runs. done := make(map[chunk.ID]chan struct{}, len(plan.ChunkBuilds)) for _, cb := range plan.ChunkBuilds { done[cb.Chunk] = make(chan struct{}) @@ -136,6 +170,13 @@ func executePlan(ctx context.Context, plan Plan, cfg ExecConfig) error { return processChunk(gctx, cb.Chunk, cb.Artifacts, procCfg) } } + runIndex := cfg.runIndex + if runIndex == nil { + buildCfg := cfg.buildConfig() + runIndex = func(gctx context.Context, b IndexBuild, _ ExecConfig) error { + return buildThenSweep(gctx, b, buildCfg) + } + } g, gctx := errgroup.WithContext(ctx) @@ -149,15 +190,53 @@ func executePlan(ctx context.Context, plan Plan, cfg ExecConfig) error { return runChunk(gctx, cb, cfg) }); err != nil { // SUCCESS semantics: leave done[cb.Chunk] OPEN and return the error. - // errgroup cancels gctx and g.Wait returns the first error. + // errgroup cancels gctx; a dependent index build waiting on this + // chunk unblocks through its <-gctx.Done() case and bails. return err } - // Success: artifacts are durable. + // Success: artifacts are durable. Closing now unblocks dependents that + // may safely read this chunk's frozen .bin. close(done[cb.Chunk]) return nil }) } + for _, b := range plan.IndexBuilds { + g.Go(func() error { + // Step 1 — wait on the in-coverage chunk builds FIRST, holding no slot. + // Dependencies are DERIVED from the plan (every in-[Lo,Hi] chunk that + // has a ChunkBuild), never carried on the IndexBuild, so they cannot + // drift from what was actually scheduled. + for c := b.Lo; ; c++ { + if ch, ok := done[c]; ok { + select { + case <-ch: + case <-gctx.Done(): + return gctx.Err() + } + } + if c == b.Hi { + break + } + } + // Step 2 — only now acquire a slot (index builds draw from the same + // pool) and run the build + eager sweep. + if err := acquireSlot(gctx, slots); err != nil { + return err + } + defer releaseSlot(slots) + // Time the build and report its burst throughput — chunks folded into + // one .idx over the wall-clock. Reported on completion (success OR + // exhausted retries); a failed rebuild's duration is signal too. + start := time.Now() + err := withRetries(gctx, cfg.MaxRetries, func() error { + return runIndex(gctx, b, cfg) + }) + cfg.metrics().Rebuild(int(b.Hi-b.Lo)+1, time.Since(start)) + return err + }) + } + return g.Wait() } diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/execute_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/execute_test.go index 4d4738dbc..9308de6c5 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/execute_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/execute_test.go @@ -14,22 +14,27 @@ import ( ) // --------------------------------------------------------------------------- -// Executor test harness. The runChunk seam lets a test drive the chunk-build -// pool deterministically: a fake chunk build records its order and optionally -// blocks on a release signal. +// Executor test harness. The runChunk/runIndex seams let a test drive the +// dependency graph deterministically: a fake chunk build records its order and +// optionally blocks on a release signal; a fake index build records the chunk +// states it observed at the instant it ran. // --------------------------------------------------------------------------- -// execRecorder captures chunk task execution so a test can assert completion. -// All access is mutex-guarded — the executor runs tasks on many goroutines. +// execRecorder captures the interleaving of chunk and index task execution so a +// test can assert wait ordering. All access is mutex-guarded — the executor +// runs tasks on many goroutines. type execRecorder struct { mu sync.Mutex // chunkDone[c] is true once the chunk build for c has returned. chunkDone map[chunk.ID]bool - order []string + // indexSawAllDeps[w] records, for each index build's window, whether every + // in-coverage chunk build had already completed when the index build began. + indexSawAllDeps map[WindowID]bool + order []string } func newExecRecorder() *execRecorder { - return &execRecorder{chunkDone: map[chunk.ID]bool{}} + return &execRecorder{chunkDone: map[chunk.ID]bool{}, indexSawAllDeps: map[WindowID]bool{}} } func (r *execRecorder) markChunkDone(c chunk.ID) { @@ -39,25 +44,48 @@ func (r *execRecorder) markChunkDone(c chunk.ID) { r.order = append(r.order, "chunk:"+c.String()) } -// execTestCfg builds an ExecConfig with the chunk-build seam installed. workers -// sets the pool size. -func execTestCfg(cat *Catalog, workers int, runChunk func(context.Context, ChunkBuild, ExecConfig) error) ExecConfig { +// indexBegan records, for window w covering [lo,hi], whether all in-coverage +// chunks were already done — the invariant the wait ordering must guarantee. +func (r *execRecorder) indexBegan(w WindowID, lo, hi chunk.ID) { + r.mu.Lock() + defer r.mu.Unlock() + all := true + for c := lo; c <= hi; c++ { + if !r.chunkDone[c] { + all = false + break + } + if c == hi { + break + } + } + r.indexSawAllDeps[w] = all + r.order = append(r.order, "index:"+w.String()) +} + +// execTestCfg builds an ExecConfig with the task seams installed. workers sets +// the pool size. +func execTestCfg(cat *Catalog, workers int, runChunk func(context.Context, ChunkBuild, ExecConfig) error, + runIndex func(context.Context, IndexBuild, ExecConfig) error, +) ExecConfig { return ExecConfig{ Catalog: cat, Logger: silentLogger(), Workers: workers, runChunk: runChunk, + runIndex: runIndex, } } // --------------------------------------------------------------------------- -// No deadlock at Workers=1; every planned chunk build runs. +// Wait ordering + no deadlock at Workers=1. // --------------------------------------------------------------------------- -func TestExecutePlan_RunsEveryChunkBuild_Workers1(t *testing.T) { - cat, _ := testCatalog(t) +func TestExecutePlan_IndexWaitsOnInCoverageChunks_Workers1(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) rec := newExecRecorder() + // Two windows, each with two chunk builds and one index build covering them. plan := Plan{ ChunkBuilds: []ChunkBuild{ {Chunk: 0, Artifacts: AllArtifacts()}, @@ -65,21 +93,37 @@ func TestExecutePlan_RunsEveryChunkBuild_Workers1(t *testing.T) { {Chunk: 4, Artifacts: AllArtifacts()}, {Chunk: 5, Artifacts: AllArtifacts()}, }, + IndexBuilds: []IndexBuild{ + {Window: 0, Lo: 0, Hi: 1}, + {Window: 1, Lo: 4, Hi: 5}, + }, } - cfg := execTestCfg(cat, 1, func(_ context.Context, cb ChunkBuild, _ ExecConfig) error { - rec.markChunkDone(cb.Chunk) - return nil - }) + cfg := execTestCfg(cat, 1, + func(_ context.Context, cb ChunkBuild, _ ExecConfig) error { + rec.markChunkDone(cb.Chunk) + return nil + }, + func(_ context.Context, b IndexBuild, _ ExecConfig) error { + rec.indexBegan(b.Window, b.Lo, b.Hi) + return nil + }, + ) require.NoError(t, executePlan(context.Background(), plan, cfg), - "Workers=1 must not deadlock") + "Workers=1 must not deadlock — index builds wait on done-channels BEFORE acquiring the single slot") + + // Every index build observed all of its in-coverage chunk builds as already + // complete — the freeze-before-build dependency held. + require.True(t, rec.indexSawAllDeps[0], "window 0 index must run after chunks 0,1") + require.True(t, rec.indexSawAllDeps[1], "window 1 index must run after chunks 4,5") require.Len(t, rec.chunkDone, 4) } -// A high worker count runs every chunk build concurrently without losing any. -func TestExecutePlan_RunsEveryChunkBuildUnderConcurrency(t *testing.T) { - cat, _ := testCatalog(t) +// A high worker count must also honor the per-window dependency (no index build +// jumps ahead of its own chunks) while running independent windows concurrently. +func TestExecutePlan_DependencyHoldsUnderConcurrency(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) rec := newExecRecorder() plan := Plan{ @@ -89,37 +133,116 @@ func TestExecutePlan_RunsEveryChunkBuildUnderConcurrency(t *testing.T) { {Chunk: 2, Artifacts: AllArtifacts()}, {Chunk: 3, Artifacts: AllArtifacts()}, }, + IndexBuilds: []IndexBuild{{Window: 0, Lo: 0, Hi: 3}}, } - cfg := execTestCfg(cat, 8, func(_ context.Context, cb ChunkBuild, _ ExecConfig) error { - time.Sleep(time.Duration(uint32(cb.Chunk)+1) * 5 * time.Millisecond) - rec.markChunkDone(cb.Chunk) - return nil - }) + cfg := execTestCfg(cat, 8, + func(_ context.Context, cb ChunkBuild, _ ExecConfig) error { + // Stagger completion so an unsynchronized index build would likely + // observe a not-yet-done chunk if the wait were broken. + time.Sleep(time.Duration(uint32(cb.Chunk)+1) * 5 * time.Millisecond) + rec.markChunkDone(cb.Chunk) + return nil + }, + func(_ context.Context, b IndexBuild, _ ExecConfig) error { + rec.indexBegan(b.Window, b.Lo, b.Hi) + return nil + }, + ) require.NoError(t, executePlan(context.Background(), plan, cfg)) - require.Len(t, rec.chunkDone, 4) + require.True(t, rec.indexSawAllDeps[0], + "the index build must wait on ALL four in-coverage chunk builds") +} + +// An index build whose coverage chunks are ALREADY frozen (no ChunkBuild in the +// plan) must run immediately — there is no channel to wait on. Models the +// risen-floor / re-derive case where some inputs self-skipped. +func TestExecutePlan_IndexWithNoInPlanDepsRunsImmediately(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) + var ran atomic.Bool + + plan := Plan{ + // No chunk builds — every input already frozen. + IndexBuilds: []IndexBuild{{Window: 0, Lo: 0, Hi: 3}}, + } + cfg := execTestCfg(cat, 2, + func(context.Context, ChunkBuild, ExecConfig) error { return nil }, + func(context.Context, IndexBuild, ExecConfig) error { ran.Store(true); return nil }, + ) + require.NoError(t, executePlan(context.Background(), plan, cfg)) + require.True(t, ran.Load(), "an index build with no in-plan deps runs without waiting") } // --------------------------------------------------------------------------- -// SUCCESS semantics (item R2-2): a failed chunk build returns the error, which -// cancels gctx; the plan ALWAYS aborts with the first error. +// SUCCESS semantics (item R2-2): a failed chunk build LEAVES its done-channel +// OPEN and returns the error, which cancels gctx. The dependent index build is +// therefore never wedged forever waiting on a failed input: it unblocks through +// the <-gctx.Done() case in its wait loop and bails with gctx.Err() — it never +// proceeds on a missing input. The plan ALWAYS aborts, and the index build never +// hangs (g.Wait returning is itself the proof). // --------------------------------------------------------------------------- -func TestExecutePlan_FailedChunkAbortsPlan(t *testing.T) { - cat, _ := testCatalog(t) +func TestExecutePlan_FailedChunkAbortsPlanAndIndexNeverHangs(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) chunkErr := errors.New("chunk build boom") plan := Plan{ ChunkBuilds: []ChunkBuild{{Chunk: 0, Artifacts: AllArtifacts()}}, + IndexBuilds: []IndexBuild{{Window: 0, Lo: 0, Hi: 0}}, } - cfg := execTestCfg(cat, 1, func(context.Context, ChunkBuild, ExecConfig) error { return chunkErr }) + cfg := execTestCfg(cat, 1, + func(context.Context, ChunkBuild, ExecConfig) error { return chunkErr }, + func(_ context.Context, _ IndexBuild, _ ExecConfig) error { + // Under SUCCESS semantics the failed chunk never closes its channel, so + // this index build should bail through <-gctx.Done() and NEVER reach + // here. (Left as a guard: if it ever did run, the plan still aborts.) + return errors.New("index build must bail via gctx, never run on a failed input") + }, + ) + // The plan aborts regardless of which branch the index build took. err := executePlan(context.Background(), plan, cfg) require.Error(t, err, "a task exhausting retries aborts the plan") - require.ErrorIs(t, err, chunkErr, "the chunk failure propagates") + require.ErrorIs(t, err, chunkErr, "the first error (the chunk failure) propagates") +} + +// The production-path version: a REAL buildThenSweep. Under SUCCESS semantics +// (item R2-2) the failed chunk build leaves its done-channel open, so the index +// build normally bails via <-gctx.Done() before it ever runs. buildTxhashIndex's +// loud .bin precondition is KEPT as a cheap defensive backstop for the case the +// index build wins the race and starts anyway. Either way the invariant holds: +// NO coverage key is written when an input chunk's .bin is not frozen. +func TestExecutePlan_FailedChunkHitsLoudPrecondition(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) + + plan := Plan{ + ChunkBuilds: []ChunkBuild{{Chunk: 0, Artifacts: NewArtifactSet(KindTxHash)}}, + IndexBuilds: []IndexBuild{{Window: 0, Lo: 0, Hi: 0}}, + } + + // runChunk fails (never freezes chunk:0:txhash); runIndex is the REAL + // buildThenSweep via the production path (cfg.runIndex left nil). + cfg := ExecConfig{ + Catalog: cat, + Logger: silentLogger(), + Workers: 1, + runChunk: func(context.Context, ChunkBuild, ExecConfig) error { + return errors.New("simulated chunk build failure: .bin never frozen") + }, + // runIndex nil ⇒ executePlan uses the real buildThenSweep. + } + + err := executePlan(context.Background(), plan, cfg) + require.Error(t, err) + + // The real precondition fired: chunk 0's txhash is not "frozen", so + // buildTxhashIndex refused before touching any key — no coverage was created. + covs, qerr := cat.IndexKeys(0) + require.NoError(t, qerr) + require.Empty(t, covs, "no index coverage key may be written when the .bin precondition fails") } // --------------------------------------------------------------------------- @@ -127,7 +250,7 @@ func TestExecutePlan_FailedChunkAbortsPlan(t *testing.T) { // --------------------------------------------------------------------------- func TestExecutePlan_RetriesThenSucceeds(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := smallWindowCatalog(t, 4) var attempts atomic.Int32 plan := Plan{ChunkBuilds: []ChunkBuild{{Chunk: 0, Artifacts: AllArtifacts()}}} @@ -145,7 +268,7 @@ func TestExecutePlan_RetriesThenSucceeds(t *testing.T) { } func TestExecutePlan_ExhaustsRetriesAndAborts(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := smallWindowCatalog(t, 4) var attempts atomic.Int32 plan := Plan{ChunkBuilds: []ChunkBuild{{Chunk: 0, Artifacts: AllArtifacts()}}} @@ -161,7 +284,7 @@ func TestExecutePlan_ExhaustsRetriesAndAborts(t *testing.T) { } func TestExecutePlan_ZeroWorkersIsLoudNotADeadlock(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := smallWindowCatalog(t, 4) cfg := ExecConfig{Catalog: cat, Logger: silentLogger(), Workers: 0} err := executePlan(context.Background(), Plan{ChunkBuilds: []ChunkBuild{{Chunk: 0}}}, cfg) require.ErrorContains(t, err, "Workers must be > 0", @@ -171,7 +294,7 @@ func TestExecutePlan_ZeroWorkersIsLoudNotADeadlock(t *testing.T) { // Context cancellation propagates: a long-running chunk build observing a // canceled context returns promptly and the whole plan aborts. func TestExecutePlan_ContextCancelAborts(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := smallWindowCatalog(t, 4) ctx, cancel := context.WithCancel(context.Background()) plan := Plan{ChunkBuilds: []ChunkBuild{ diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/hooks.go b/cmd/stellar-rpc/internal/fullhistory/streaming/hooks.go index 57903f1c0..798ab5817 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/hooks.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/hooks.go @@ -21,9 +21,9 @@ import "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/ // - beforeUnlink fires AFTER the frozen->pruning demote and BEFORE the // unlink. Asserts never-unlink-under-a-frozen-key: the value must already // be "pruning"; if the demote were dropped, it would still be "frozen". -// - failCommitBatch, when it returns true, forces a recovery batch callback to -// return an error so the batch is dropped wholesale. Asserts all-or-nothing: -// nothing the batch would have written may be observable. +// - failCommitBatch, when it returns true, forces CommitIndex's batch +// callback to return an error so the batch is dropped wholesale. Asserts +// all-or-nothing: nothing the batch would have written may be observable. // - afterMarkFreezing fires INSIDE processChunk, AFTER MarkChunkFreezing has // put every requested kind's key to "freezing" and BEFORE any file I/O. // Asserts mark-then-write: at this instant every requested kind reads @@ -31,6 +31,16 @@ import "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/ // reordering the write ahead of it) would leave the keys absent (or a file // on disk) here — defeating "every file on disk is reachable from a key" // and crash detectability. +// - afterIndexMark fires INSIDE buildTxhashIndex, AFTER the coverage key is +// put "freezing" and BEFORE the .idx is written. Asserts the §7.6 "after +// step 2, mid step 3" row: the new coverage reads "freezing", the +// predecessor is still the unique "frozen" coverage, and no reader can +// resolve the in-flight name. +// - afterCommitBeforeSweep fires INSIDE buildThenSweep, AFTER buildTxhashIndex's +// commit batch landed and BEFORE the eager sweeps run. Asserts the §7.6 +// "after step 4, before the eager sweep" row: the new coverage is frozen +// and live, the predecessor and (terminal) .bin inputs are "pruning" sweep +// work that has not yet run. A crash here re-runs the sweeps on restart. // - beforeHotTransient fires INSIDE PutHotTransient, BEFORE the hot:chunk key // is written "transient", carrying the chunk whose key is about to appear. // At a boundary handoff this is the exact instant the next chunk's key is @@ -40,11 +50,13 @@ import "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/ // partition moves. Dropping the close-before-open order would leave the // predecessor's DB open under a live writer here. type crashHooks struct { - beforeKeyDelete func() - beforeUnlink func() - failCommitBatch func() bool //nolint:unused // fired from a later layer (recovery/CommitIndex) - afterMarkFreezing func() //nolint:unused // fired from a later layer (processChunk) - beforeHotTransient func(chunkID chunk.ID) + beforeKeyDelete func() + beforeUnlink func() + failCommitBatch func() bool + afterMarkFreezing func() + afterIndexMark func() + afterCommitBeforeSweep func() + beforeHotTransient func(chunkID chunk.ID) } func (h crashHooks) fireBeforeKeyDelete() { @@ -71,6 +83,18 @@ func (h crashHooks) fireAfterMarkFreezing() { } } +func (h crashHooks) fireAfterIndexMark() { + if h.afterIndexMark != nil { + h.afterIndexMark() + } +} + +func (h crashHooks) fireAfterCommitBeforeSweep() { + if h.afterCommitBeforeSweep != nil { + h.afterCommitBeforeSweep() + } +} + func (h crashHooks) fireBeforeHotTransient(chunkID chunk.ID) { if h.beforeHotTransient != nil { h.beforeHotTransient(chunkID) diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/ingest.go b/cmd/stellar-rpc/internal/fullhistory/streaming/ingest.go index e9302b78e..b6cd1642e 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/ingest.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/ingest.go @@ -15,9 +15,10 @@ import ( // The hot-DB ingestion loop (DECISION (a)). One goroutine polls one ledger // source by sequence (the design's indexed core.GetLedger(ctx, seq)) into the -// per-chunk hot DB, committing each ledger as one atomic synced WriteBatch over -// the ledger CF. A ledger is therefore fully present or fully absent, and the -// per-chunk frontier is a SINGLE authoritative value — the DB's +// SINGLE per-chunk shared multi-CF hot DB, committing each ledger as one atomic +// synced WriteBatch across all CFs (ledgers + the three events CFs + the 16 +// txhash CFs). A ledger is therefore fully present across every CF or fully +// absent, and the per-chunk frontier is a SINGLE authoritative value — the DB's // MaxCommittedSeq. The loop keeps NO progress variable: the last synced batch IS // the watermark, re-derived from durable catalog state at the next startup (see // lastCommittedLedger). @@ -41,13 +42,13 @@ type LedgerGetter interface { GetLedger(ctx context.Context, seq uint32) (xdr.LedgerCloseMetaView, error) } -// allHotTypes is the hot tier's ingest selection: every data type the per-chunk -// DB holds. The hot DB is the sole copy of a chunk's recently ingested ledgers -// until the cold artifacts are frozen, so it ingests them in the one atomic -// batch. +// allHotTypes is the hot tier's ingest selection: every data type the shared +// per-chunk DB holds. The hot DB is the sole copy of a chunk's recently +// ingested ledgers until the cold artifacts are frozen, so it always ingests +// all three types in the one atomic batch. // //nolint:gochecknoglobals // immutable selection, the production ingest config -var allHotTypes = hotchunk.Ingest{Ledgers: true, Events: true} +var allHotTypes = hotchunk.Ingest{Ledgers: true, Txhash: true, Events: true} // openHotTierForChunk opens (or recovers, or creates) the ONE shared hot DB for // chunkID under the Phase A catalog hot:chunk bracket, returning an open handle @@ -169,7 +170,7 @@ func discardHotTierForChunk(cat *Catalog, chunkID chunk.ID) error { } // runIngestionLoop polls core for LCMs by sequence into hotDB, committing each -// ledger as one atomic synced WriteBatch over the ledger CF, and at each chunk +// ledger as one atomic synced WriteBatch across all CFs, and at each chunk // boundary hands the live-chunk frontier forward by closing the just-filled DB // and opening the next chunk's. It returns the error GetLedger or a boundary // step produced (nil never, since the poll is unbounded) — the daemon top level @@ -245,9 +246,9 @@ func runIngestionLoop( return fmt.Errorf("streaming: get ledger %d: %w", seq, gerr) } - // One atomic, synced WriteBatch — a ledger is either fully in the hot DB - // or absent. The batch IS the durability boundary; no progress variable - // is kept. + // One atomic, synced WriteBatch across all enabled CFs — a ledger is + // either fully in the hot DB or absent. The batch IS the durability + // boundary; no progress variable is kept. if _, ierr := hotDB.IngestLedger(seq, lcm, ingestTypes); ierr != nil { return fmt.Errorf("streaming: ingest ledger %d: %w", seq, ierr) } diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/ingest_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/ingest_test.go index d91baa1bf..cfb77282d 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/ingest_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/ingest_test.go @@ -98,7 +98,9 @@ func openLiveHotDB(t *testing.T, cat *Catalog, c chunk.ID) *hotchunk.DB { // seedWatermark writes a single ledgers-CF entry at seq into the chunk's hot DB // so the indexed poll resumes at seq+1 — letting a boundary test drive the loop // over only the last ledger or two of a chunk instead of all 10,000. The -// returned DB is the (re-opened, ready) live handle the loop then owns. +// returned DB is the (re-opened, ready) live handle the loop then owns. Used by +// the boundary tests, whose ingestTypes are Ledgers+Txhash (no events +// contiguity requirement, so a sparse ledgers-CF watermark is valid). func seedWatermark(t *testing.T, cat *Catalog, c chunk.ID, seq uint32) *hotchunk.DB { t.Helper() db := openLiveHotDB(t, cat, c) @@ -201,18 +203,18 @@ func TestDiscardHotTier_RemovesDirAndKey(t *testing.T) { // runIngestionLoop — atomic landing. // --------------------------------------------------------------------------- -// TestRunIngestionLoop_LedgerLandsInLedgerCF: polling a short contiguous prefix -// lands each ledger atomically in the ledger CF — the single watermark advances -// to the last committed seq, and the CF is readable. The getter then errs -// (backend crash), which the loop returns. -func TestRunIngestionLoop_LedgerLandsInLedgerCF(t *testing.T) { +// TestRunIngestionLoop_LedgerLandsAcrossAllCFs: polling a short contiguous +// prefix lands each ledger atomically across the ledgers, txhash, and events +// CFs — the single watermark advances to the last committed seq, and every CF +// is readable. The getter then errs (backend crash), which the loop returns. +func TestRunIngestionLoop_LedgerLandsAcrossAllCFs(t *testing.T) { cat, _ := testCatalog(t) c := chunk.ID(0) first := c.FirstLedger() db := openLiveHotDB(t, cat, c) - // A short contiguous prefix from the chunk's first ledger, then the poll runs - // dry and errs. + // A short contiguous prefix from the chunk's first ledger (events require + // strict contiguity from FirstLedger), then the poll runs dry and errs. getter := getterForSeqs(t, first, first+2) getter.endErr = errors.New("backend crashed") ch := make(chan chunk.ID, lifecycleQueueDepth) @@ -221,7 +223,7 @@ func TestRunIngestionLoop_LedgerLandsInLedgerCF(t *testing.T) { require.Error(t, err, "poll ran past the prefix and the getter errored") require.NotErrorIs(t, err, ErrHotVolumeLost) - // Reopen the (loop-closed) DB and assert the ledger CF advanced. + // Reopen the (loop-closed) DB and assert every CF advanced together. reopened, err := hotchunk.Open(cat.layout.HotChunkPath(c), c, silentLogger()) require.NoError(t, err) t.Cleanup(func() { _ = reopened.Close() }) @@ -234,6 +236,7 @@ func TestRunIngestionLoop_LedgerLandsInLedgerCF(t *testing.T) { raw, err := reopened.Ledgers().GetLedgerRaw(first + 2) require.NoError(t, err) assert.NotEmpty(t, raw) + assert.Equal(t, uint32(0), reopened.Events().NextEventID(), "zero-tx ledgers carry no events") } // --------------------------------------------------------------------------- @@ -274,7 +277,7 @@ func TestRunIngestionLoop_BoundaryClosesBeforeNextKey(t *testing.T) { // ledgers+txhash only — fast, and the boundary detection is seq-based. Poll // the chunk's true last ledger (boundary 0->1), then the first ledger of the // next chunk, then the getter errs. - ingestTypes := hotchunk.Ingest{Ledgers: true} + ingestTypes := hotchunk.Ingest{Ledgers: true, Txhash: true} getter := &fakeLedgerGetter{frames: map[uint32][]byte{ last: zeroTxLCMBytes(t, last), next.FirstLedger(): zeroTxLCMBytes(t, next.FirstLedger()), @@ -312,7 +315,7 @@ func TestRunIngestionLoop_BoundaryNotifiesCompletedChunk(t *testing.T) { c1 := c + 1 db := seedWatermark(t, cat, c, c.LastLedger()-1) - ingestTypes := hotchunk.Ingest{Ledgers: true} + ingestTypes := hotchunk.Ingest{Ledgers: true, Txhash: true} getter := &fakeLedgerGetter{frames: map[uint32][]byte{ c.LastLedger(): zeroTxLCMBytes(t, c.LastLedger()), // boundary 0->1 c1.FirstLedger(): zeroTxLCMBytes(t, c1.FirstLedger()), // a ledger in chunk 1 diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/keys.go b/cmd/stellar-rpc/internal/fullhistory/streaming/keys.go index eecdb2401..7ffeec049 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/keys.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/keys.go @@ -9,8 +9,9 @@ import ( "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" ) -// State is an artifact key's lifecycle value. The empty State (key absent) -// means "neither file nor in-progress write exists". +// State is an artifact key's lifecycle value. Per-chunk artifacts and index +// coverages share the same three states with the same meanings; the empty +// State (key absent) means "neither file nor in-progress write exists". type State string const ( @@ -19,7 +20,8 @@ const ( // key alone and every file on disk is reachable from a key. StateFreezing State = "freezing" // StateFrozen — the file and its dirent are fsynced and durable. Truth: - // readers and the resolver trust it blindly. + // readers, the resolver, and buildTxhashIndex's precondition trust it + // blindly. StateFrozen State = "frozen" // StatePruning — the file is queued for removal; it may or may not still be // on disk. A sweep finishes the unlink and then deletes the key. @@ -48,16 +50,28 @@ const ( KindLedgers Kind = "ledgers" // KindEvents is the events cold segment (three files per chunk). KindEvents Kind = "events" + // KindTxHash is the per-chunk sorted txhash run (.bin). Transient — + // removed at window finalization. + KindTxHash Kind = "txhash" ) // allKinds is the canonical iteration order for per-chunk artifact kinds. // //nolint:gochecknoglobals // immutable kind registry, single source of truth -var allKinds = []Kind{KindLedgers, KindEvents} +var allKinds = []Kind{KindLedgers, KindEvents, KindTxHash} // AllKinds returns the per-chunk artifact kinds in canonical order. func AllKinds() []Kind { return append([]Kind(nil), allKinds...) } +// WindowID identifies a txhash index window: a contiguous run of +// chunks_per_txhash_index chunks. Distinct type from chunk.ID so window ids +// and chunk ids never silently interchange — both are uint32. +type WindowID uint32 + +// String formats a window id as zero-padded 8-digit decimal — the same width +// chunk ids use, matching the {window:08d} segment in keys and paths. +func (w WindowID) String() string { return fmt.Sprintf("%08d", uint32(w)) } + // --------------------------------------------------------------------------- // Key prefixes and constructors. Every key is built here so the key<->path // bijection has exactly one source of truth (see paths.go for the inverse). @@ -66,9 +80,11 @@ func AllKinds() []Kind { return append([]Kind(nil), allKinds...) } const ( chunkPrefix = "chunk:" hotPrefix = "hot:chunk:" + indexPrefix = "index:" // Config pins. - configEarliestLedger = "config:earliest_ledger" + configEarliestLedger = "config:earliest_ledger" + configChunksPerTxhashIdx = "config:chunks_per_txhash_index" ) // chunkKey returns the per-chunk artifact key chunk:{chunk:08d}:{kind}. @@ -81,11 +97,36 @@ func hotChunkKey(c chunk.ID) string { return hotPrefix + c.String() } +// indexKey returns the index coverage key index:{window:08d}:{lo:08d}:{hi:08d}. +// The COVERAGE [lo, hi] lives in the key NAME; the value is pure lifecycle +// state. lo > hi is a programmer error worth surfacing loudly. +func indexKey(w WindowID, lo, hi chunk.ID) string { + if lo > hi { + panic(fmt.Sprintf("streaming: indexKey lo %s > hi %s", lo, hi)) + } + return indexPrefix + w.String() + ":" + lo.String() + ":" + hi.String() +} + +// indexWindowPrefix returns the scan prefix for all coverage keys of one +// window: index:{window:08d}:. Used to enumerate a window's coverages. +func indexWindowPrefix(w WindowID) string { + return indexPrefix + w.String() + ":" +} + // --------------------------------------------------------------------------- // Key parsing. The inverse of the constructors above; every parser is the // reverse bijection of exactly one constructor. // --------------------------------------------------------------------------- +// IndexCoverage is one parsed index coverage key: the window, the covered +// chunk range [Lo, Hi], the full key string, and its lifecycle State. +type IndexCoverage struct { + Window WindowID + Lo, Hi chunk.ID + Key string + State State +} + // parseChunkKey decodes chunk:{chunk:08d}:{kind}. ok is false for any key that // is not a well-formed per-chunk artifact key. func parseChunkKey(key string) (chunk.ID, Kind, bool) { @@ -121,10 +162,44 @@ func parseHotChunkKey(key string) (chunk.ID, bool) { return chunk.ID(n), true } +// parseIndexKey decodes index:{window:08d}:{lo:08d}:{hi:08d}. The value is not +// part of the key; callers fill IndexCoverage.State from the scanned value. +func parseIndexKey(key string) (IndexCoverage, bool) { + rest, found := strings.CutPrefix(key, indexPrefix) + if !found { + return IndexCoverage{}, false + } + parts := strings.Split(rest, ":") + if len(parts) != 3 { + return IndexCoverage{}, false + } + w, err := parsePadded(parts[0]) + if err != nil { + return IndexCoverage{}, false + } + lo, err := parsePadded(parts[1]) + if err != nil { + return IndexCoverage{}, false + } + hi, err := parsePadded(parts[2]) + if err != nil { + return IndexCoverage{}, false + } + if lo > hi { + return IndexCoverage{}, false + } + return IndexCoverage{ + Window: WindowID(w), + Lo: chunk.ID(lo), + Hi: chunk.ID(hi), + Key: key, + }, true +} + // parsePadded parses an 8-digit zero-padded decimal segment as produced by -// chunk.ID.String(). It enforces the fixed 8-char width so the bijection is -// exact — a non-padded or wrong-width segment is rejected, not silently -// accepted. +// chunk.ID.String()/WindowID.String(). It enforces the fixed 8-char width so +// the bijection is exact — a non-padded or wrong-width segment is rejected, +// not silently accepted. func parsePadded(s string) (uint32, error) { if len(s) != 8 { return 0, fmt.Errorf("streaming: %q is not an 8-digit padded id", s) diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/lifecycle.go b/cmd/stellar-rpc/internal/fullhistory/streaming/lifecycle.go index 32fbc3184..0ec3dd2bf 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/lifecycle.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/lifecycle.go @@ -14,9 +14,9 @@ import ( // // 1. plan-and-execute — the SAME resolve + executePlan catch-up uses, over // [floor, lastChunk]. This is where a just-closed chunk freezes (from its hot -// DB via backfillSource's hot branch). lastChunk is the id ingestion handed -// over — "how far to go"; what to build, discard, and prune is read from the -// catalog. +// DB via backfillSource's hot branch) and the current window's index folds it +// in. lastChunk is the id ingestion handed over — "how far to go"; what to +// build, discard, and prune is read from the catalog. // 2. discard scan — retire hot DBs the cold artifacts now fully serve (or that // fell past retention). // 3. prune scan — sweep demoted and past-retention files, both key families. @@ -32,8 +32,8 @@ import ( // produce. So the tick's plan range never starts below existing storage: // start is RAISED to lowestMaterializedChunk when the floor sits lower. // Extending the bottom of storage (retention widening) is exclusively catch- -// up's job; producibility is enforced lazily there, per chunk, by the cold -// ingest during the build (no pre-flight gate). +// up's job; producibility is enforced lazily there, per chunk, by the +// buildTxhashIndex .bin precondition during the build (no pre-flight gate). // // The two goroutines (ingestion, lifecycle) share NO state: the tick is a pure // function of the catalog, deriving everything from durable keys on every run. @@ -235,9 +235,9 @@ func runLifecycleTick(ctx context.Context, cfg LifecycleConfig, cat *Catalog, la start = int64(low) } - // Stage 1 — plan-and-execute (the freeze). Timed and counted as one phase; - // the plan's size is the chunk build count (0 when there is no producible - // range, still reported so the empty-tick rate is visible). + // Stage 1 — plan-and-execute (the freeze + index fold). Timed and counted as + // one phase; the plan's sizes are the chunk/index build counts (0/0 when there + // is no producible range, still reported so the empty-tick rate is visible). // // rangeEnd is the just-completed chunk ingestion handed over (lastChunk), but // CLAMPED to the highest chunk that is actually complete in durable storage: @@ -249,7 +249,7 @@ func runLifecycleTick(ctx context.Context, cfg LifecycleConfig, cat *Catalog, la // result (no complete chunk) makes the range empty — production is skipped, // while the discard and prune scans below still run. freezeStart := time.Now() - var chunkBuilds int + var chunkBuilds, indexBuilds int durableThrough, derr := lastCommittedLedger(cat, nil) // chunk-granularity, no hot DB read if derr != nil { if ctx.Err() != nil { @@ -272,7 +272,7 @@ func runLifecycleTick(ctx context.Context, cfg LifecycleConfig, cat *Catalog, la cfg.Fatalf("streaming: lifecycle tick: resolve [%d,%s]: %v", start, rangeEnd, perr) return } - chunkBuilds = len(plan.ChunkBuilds) + chunkBuilds, indexBuilds = len(plan.ChunkBuilds), len(plan.IndexBuilds) if eerr := executePlan(ctx, plan, cfg.ExecConfig); eerr != nil { // CLEAN-SHUTDOWN FIX: a canceled ctx makes executePlan return ctx.Err() // (every task's slot-acquire/wait observes the errgroup cancel). That is @@ -287,9 +287,10 @@ func runLifecycleTick(ctx context.Context, cfg LifecycleConfig, cat *Catalog, la // else: no complete chunk in range (young network / empty store) — skip // production. The discard and prune scans still run: a past-retention hot DB // or stale key can exist with no producible range. - metrics.Freeze(chunkBuilds, time.Since(freezeStart)) - if logger != nil && chunkBuilds > 0 { + metrics.Freeze(chunkBuilds, indexBuilds, time.Since(freezeStart)) + if logger != nil && (chunkBuilds > 0 || indexBuilds > 0) { logger.WithField("chunk_builds", chunkBuilds). + WithField("index_builds", indexBuilds). Info("streaming: lifecycle freeze stage complete") } diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/lifecycle_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/lifecycle_test.go index 6a8cb4790..55d8b46da 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/lifecycle_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/lifecycle_test.go @@ -10,11 +10,77 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/stellar/go-stellar-sdk/keypair" + "github.com/stellar/go-stellar-sdk/network" "github.com/stellar/go-stellar-sdk/xdr" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" ) +// lifecyclePassphrase is the network passphrase the one-tx fixture hashes +// against (any stable value works; the index only needs deterministic hashes). +const lifecyclePassphrase = network.PublicNetworkPassphrase + +// oneTxLCMBytes builds the wire bytes of a V2 LedgerCloseMeta carrying ONE +// transaction for seq, so a chunk ingested with at least one such ledger yields +// a NON-empty txhash .bin — streamhash refuses to build a cold index over zero +// keys (txhash.ErrEmptyBuildSet), so a fully zero-tx chunk cannot exercise the +// real index fold. Mirrors ingest_test's buildLCMReturningHashes, trimmed to one +// tx. +func oneTxLCMBytes(t *testing.T, seq uint32) []byte { + t.Helper() + envelope := xdr.TransactionEnvelope{ + Type: xdr.EnvelopeTypeEnvelopeTypeTx, + V1: &xdr.TransactionV1Envelope{ + Tx: xdr.Transaction{ + SourceAccount: xdr.MustMuxedAddress(keypair.MustRandom().Address()), + Ext: xdr.TransactionExt{V: 1, SorobanData: &xdr.SorobanTransactionData{}}, + }, + }, + } + hash, err := network.HashTransactionInEnvelope(envelope, lifecyclePassphrase) + require.NoError(t, err) + + comp := []xdr.TxSetComponent{{ + Type: xdr.TxSetComponentTypeTxsetCompTxsMaybeDiscountedFee, + TxsMaybeDiscountedFee: &xdr.TxSetComponentTxsMaybeDiscountedFee{ + Txs: []xdr.TransactionEnvelope{envelope}, + }, + }} + opResults := []xdr.OperationResult{} + lcm := xdr.LedgerCloseMeta{ + V: 2, + V2: &xdr.LedgerCloseMetaV2{ + LedgerHeader: xdr.LedgerHeaderHistoryEntry{ + Header: xdr.LedgerHeader{ + ScpValue: xdr.StellarValue{CloseTime: xdr.TimePoint(0)}, + LedgerSeq: xdr.Uint32(seq), + }, + }, + TxSet: xdr.GeneralizedTransactionSet{ + V: 1, + V1TxSet: &xdr.TransactionSetV1{Phases: []xdr.TransactionPhase{{V: 0, V0Components: &comp}}}, + }, + TxProcessing: []xdr.TransactionResultMetaV1{{ + TxApplyProcessing: xdr.TransactionMeta{ + V: 4, + V4: &xdr.TransactionMetaV4{Operations: []xdr.OperationMetaV2{}}, + }, + Result: xdr.TransactionResultPair{ + TransactionHash: hash, + Result: xdr.TransactionResult{ + FeeCharged: 100, + Result: xdr.TransactionResultResult{Code: xdr.TransactionResultCodeTxSuccess, Results: &opResults}, + }, + }, + }}, + }, + } + raw, err := lcm.MarshalBinary() + require.NoError(t, err) + return raw +} + // --------------------------------------------------------------------------- // Arithmetic: lastCompleteChunkAt, effectiveRetentionFloor. // --------------------------------------------------------------------------- @@ -115,7 +181,7 @@ func TestLowestMaterializedChunk(t *testing.T) { cat, _ := testCatalog(t) freezeKinds(t, cat, 7, KindLedgers) // chunk artifact key at 7 require.NoError(t, cat.PutHotTransient(4)) // hot key at 4 (lower) - freezeKinds(t, cat, 9, KindLedgers) + freezeKinds(t, cat, 9, KindEvents) low, ok, err := lowestMaterializedChunk(cat) require.NoError(t, err) require.True(t, ok) @@ -128,14 +194,22 @@ func TestLowestMaterializedChunk(t *testing.T) { // --------------------------------------------------------------------------- // ingestFullHotChunk creates a "ready" hot DB for chunk c and ingests every -// ledger in the chunk (contiguous from FirstLedger), then closes the write -// handle — the post-boundary state the lifecycle freezes from. The hot key is -// left "ready" and the dir is on disk, as the boundary handoff leaves it. +// ledger in the chunk (all CFs, contiguous from FirstLedger), then closes the +// write handle — the post-boundary state the lifecycle freezes from. The hot +// key is left "ready" and the dir is on disk, as the boundary handoff leaves it. func ingestFullHotChunk(t *testing.T, cat *Catalog, c chunk.ID) { t.Helper() db := openLiveHotDB(t, cat, c) for seq := c.FirstLedger(); seq <= c.LastLedger(); seq++ { - _, err := db.IngestLedger(seq, xdr.LedgerCloseMetaView(zeroTxLCMBytes(t, seq)), allHotTypes) + // The first ledger carries one tx so the chunk's txhash .bin is non-empty + // (streamhash refuses a zero-key index); the rest stay zero-tx for speed. + var raw []byte + if seq == c.FirstLedger() { + raw = oneTxLCMBytes(t, seq) + } else { + raw = zeroTxLCMBytes(t, seq) + } + _, err := db.IngestLedger(seq, xdr.LedgerCloseMetaView(raw), allHotTypes) require.NoError(t, err) } require.NoError(t, db.Close()) // release the write handle (boundary handoff) @@ -176,20 +250,21 @@ func (r *fatalRecorder) fatalf(format string, args ...any) { func (r *fatalRecorder) fired() bool { return r.count.Load() > 0 } -// TestRunLifecycleTick_BoundaryFreezesDiscards is the "one boundary, end to +// TestRunLifecycleTick_BoundaryFreezesFoldsDiscards is the "one boundary, end to // end" walk: chunk 0 just closed (its full hot DB is on disk, ready), chunk 1 is // the new live chunk. One tick must: -// - freeze chunk 0's cold ledger artifact FROM its hot DB (via processChunk's -// hot branch), +// - freeze chunk 0's cold artifacts FROM its hot DB (via processChunk's hot +// branch), +// - fold chunk 0 into its window's index (terminal coverage, cpi=1), // - discard chunk 0's hot DB (cold artifacts now fully serve it), // - leave the live chunk 1 untouched. // // Then re-running the tick is a no-op (quiescence). -func TestRunLifecycleTick_BoundaryFreezesDiscards(t *testing.T) { +func TestRunLifecycleTick_BoundaryFreezesFoldsDiscards(t *testing.T) { // full-chunk ingest; isolated TempDir/catalog — overlaps the other heavy // tests to fit the gate's go-test timeout. t.Parallel() - cat, _ := testCatalog(t) // a chunk finalizes immediately + cat, _ := smallWindowCatalog(t, 1) // window w == chunk w; a one-chunk window finalizes immediately cfg, rec := lifecycleTestConfig(t, cat, 0) // Chunk 0: just-closed, full hot DB on disk. Chunk 1: the new live chunk. @@ -200,10 +275,20 @@ func TestRunLifecycleTick_BoundaryFreezesDiscards(t *testing.T) { runTickForCatalog(context.Background(), t, cfg, cat) require.False(t, rec.fired(), "a healthy tick never aborts: %v", rec.last.Load()) - // Chunk 0's cold ledger artifact is frozen. - state, err := cat.State(0, KindLedgers) + // Chunk 0's cold artifacts are all frozen. + for _, kind := range []Kind{KindLedgers, KindEvents} { + state, err := cat.State(0, kind) + require.NoError(t, err) + assert.Equal(t, StateFrozen, state, "chunk 0 %s frozen", kind) + } + // The window's index is terminal and covers chunk 0. + covered, err := indexCovers(0, cat) + require.NoError(t, err) + assert.True(t, covered, "the window index folded chunk 0 in") + fk, ok, err := cat.FrozenCoverage(cat.windows.WindowID(0)) require.NoError(t, err) - assert.Equal(t, StateFrozen, state, "chunk 0 ledgers frozen") + require.True(t, ok) + assert.True(t, cat.windows.IsTerminalCoverage(fk), "a one-chunk window is terminal") // Chunk 0's hot DB is discarded (cold artifacts fully serve it). has, err := cat.Has(hotChunkKey(0)) @@ -224,48 +309,60 @@ func TestRunLifecycleTick_BoundaryFreezesDiscards(t *testing.T) { assertQuiescent(t, cfg, cat, through) } -// TestRunLifecycleTick_DiscardWhenComplete: a complete chunk whose cold ledger -// artifact is frozen (nothing pending) has its hot DB discarded; an incomplete -// chunk (ledgers not yet frozen) keeps its hot DB. -func TestRunLifecycleTick_DiscardWhenComplete(t *testing.T) { - cat, _ := testCatalog(t) +// TestRunLifecycleTick_DiscardGatedOnIndexCoverage: a complete chunk whose cold +// ledgers+events are frozen but whose window index does NOT yet cover it keeps its +// hot DB (it still serves tx lookups). Only once a terminal coverage exists does +// the discard fire. cpi=2 so a single chunk does NOT finalize the window. +func TestRunLifecycleTick_DiscardGatedOnIndexCoverage(t *testing.T) { + cat, _ := smallWindowCatalog(t, 2) // window 0 = chunks [0,1] cfg, _ := lifecycleTestConfig(t, cat, 0) - // Chunk 0 with a "ready" hot DB on disk but NOT yet frozen: still pending. + // Pre-freeze chunk 0's ledgers+events+txhash directly (no hot dependence), and + // leave it with a "ready" hot DB on disk. The window is NOT finalized (cpi=2, + // only chunk 0 present), so no terminal coverage exists. + freezeKinds(t, cat, 0, KindLedgers, KindEvents, KindTxHash) makeReadyHotDirNoData(t, cat, 0) // A live chunk 1 above it so chunk 0 is below the partition boundary. require.NoError(t, cat.PutHotTransient(1)) - through := chunk.ID(0).LastLedger() // chunk 0 complete via positional/cold + through := chunk.ID(0).LastLedger() // chunk 0 complete via cold + // txhash is frozen, ledgers/events frozen, but the window has no FROZEN coverage + // yet => indexCovers(0) is false => NOT discarded (still needed for lookups via + // its .bin/hot DB until the index folds it in). ops, err := eligibleDiscardOps(cfg, cat, through) require.NoError(t, err) - require.Empty(t, ops, "ledgers not frozen yet: the hot DB stays") + require.Empty(t, ops, "no index coverage yet: the hot DB stays") + + // Now finalize the window's index so it covers chunk 0 (terminal needs chunk + // 1's .bin too; build a non-terminal-but-covering frozen coverage [0,0]). + freezeCoverage(t, cat, 0, 0, 0) + covered, err := indexCovers(0, cat) + require.NoError(t, err) + require.True(t, covered) - // Now freeze chunk 0's ledgers + events artifacts: nothing pending => discard - // eligible. - freezeKinds(t, cat, 0, KindLedgers, KindEvents) ops, err = eligibleDiscardOps(cfg, cat, through) require.NoError(t, err) - require.Len(t, ops, 1, "frozen + nothing pending => discard eligible") + require.Len(t, ops, 1, "covered + nothing pending => discard eligible") require.NoError(t, ops[0]()) has, err := cat.Has(hotChunkKey(0)) require.NoError(t, err) - assert.False(t, has, "the now-complete chunk's hot DB is discarded") + assert.False(t, has, "the now-covered chunk's hot DB is discarded") } // TestRunLifecycleTick_PastFloorPrune: a chunk wholly below the effective // retention floor has its artifact files and hot DB swept, regardless of state. func TestRunLifecycleTick_PastFloorPrune(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := smallWindowCatalog(t, 1) cfg, rec := lifecycleTestConfig(t, cat, 2) // retain ~2 chunks // completeThrough will be chunk 5's last ledger (positional: live chunk 6). // floor = lastCompleteChunkAt(through)-retention+1 = 5-2+1 = chunk 4's first // ledger. So chunks 0..3 are wholly past the floor and must be swept. for c := chunk.ID(0); c <= 5; c++ { - freezeKinds(t, cat, c, KindLedgers, KindEvents) + freezeKinds(t, cat, c, KindLedgers, KindEvents, KindTxHash) writeArtifact(t, cat.layout.LedgerPackPath(c)) + freezeCoverage(t, cat, cat.windows.WindowID(c), c, c) // each one-chunk window terminal } // A past-floor hot DB too (chunk 1). makeReadyHotDirNoData(t, cat, 1) @@ -301,27 +398,27 @@ func TestRunLifecycleTick_PastFloorPrune(t *testing.T) { assertQuiescent(t, cfg, cat, through) } -// TestRunLifecycleTick_PrunesTransientChunkDebris: a "pruning" chunk artifact -// key (a recovery-demoted leftover) is swept by the prune scan. -func TestRunLifecycleTick_PrunesTransientChunkDebris(t *testing.T) { - cat, _ := testCatalog(t) +// TestRunLifecycleTick_PrunesTransientIndexDebris: a "freezing" index key (a +// crashed build attempt) is swept regardless of window, even within retention. +func TestRunLifecycleTick_PrunesTransientIndexDebris(t *testing.T) { + cat, _ := smallWindowCatalog(t, 2) cfg, rec := lifecycleTestConfig(t, cat, 0) - // A "pruning" chunk artifact key (in-retention demotion) with a real file. - writeArtifact(t, cat.layout.LedgerPackPath(0)) - require.NoError(t, cat.store.Put(chunkKey(0, KindLedgers), string(StatePruning))) + // A crashed build left a "freezing" coverage key (no commit). + _, err := cat.MarkIndexFreezing(0, 0, 0) + require.NoError(t, err) through, err := deriveCompleteThrough(cat) require.NoError(t, err) ops, err := eligiblePruneOps(cfg, cat, through) require.NoError(t, err) - require.Len(t, ops, 1, "the pruning debris is swept") + require.Len(t, ops, 1, "the freezing debris is swept") require.NoError(t, ops[0]()) require.False(t, rec.fired()) - s, err := cat.State(0, KindLedgers) + covs, err := cat.AllIndexKeys() require.NoError(t, err) - require.Equal(t, State(""), s, "the pruning chunk key is gone") + require.Empty(t, covs, "the freezing index key is gone") } // --------------------------------------------------------------------------- @@ -333,7 +430,7 @@ func TestRunLifecycleTick_PrunesTransientChunkDebris(t *testing.T) { // never an op failure. The plan stage's work is real (a backend-only chunk that // the canceled ctx aborts), so executePlan genuinely returns an error here. func TestRunLifecycleTick_CleanShutdownNoFatal(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := smallWindowCatalog(t, 1) rec := &fatalRecorder{} // A READY live chunk 1 so chunk 0 sits BELOW the partition and counts as @@ -385,7 +482,7 @@ func TestRunLifecycleTick_CleanShutdownNoFatal(t *testing.T) { // TestRunLifecycleTick_GenuineFailureAborts: when a plan op fails for a real // reason (NOT ctx cancellation), the tick aborts via Fatalf per the error policy. func TestRunLifecycleTick_GenuineFailureAborts(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := smallWindowCatalog(t, 1) rec := &fatalRecorder{} readyHot(t, cat, 1) // ready live chunk => through = chunk 0 last ledger @@ -415,14 +512,15 @@ func TestRunLifecycleTick_GenuineFailureAborts(t *testing.T) { // chunk id) runs a tick; a ctx cancellation returns the loop. The loop never // blocks forever and never fatals on shutdown. func TestLifecycleLoop_RunsTickPerNotifyThenStopsOnCtx(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := smallWindowCatalog(t, 1) cfg, rec := lifecycleTestConfig(t, cat, 0) // Make the tick observable WITHOUT a slow full ingest: chunk 0 is already - // fully frozen (ledgers + events), with a leftover "ready" hot DB on disk. The - // plan stage is a no-op; the discard scan retires chunk 0's hot DB. A live - // chunk 1 keeps chunk 0 below the partition. - freezeKinds(t, cat, 0, KindLedgers, KindEvents) + // fully frozen and folded into its (terminal, cpi=1) window, with a leftover + // "ready" hot DB on disk. The plan stage is a no-op; the discard scan retires + // chunk 0's hot DB. A live chunk 1 keeps chunk 0 below the partition. + freezeKinds(t, cat, 0, KindLedgers, KindEvents, KindTxHash) + freezeCoverage(t, cat, cat.windows.WindowID(0), 0, 0) // terminal coverage of chunk 0 makeReadyHotDirNoData(t, cat, 0) live := openLiveHotDB(t, cat, 1) t.Cleanup(func() { _ = live.Close() }) @@ -455,11 +553,12 @@ func TestLifecycleLoop_RunsTickPerNotifyThenStopsOnCtx(t *testing.T) { // and 1 both frozen+covered and a live chunk 2, sending 0 then 1 runs a single // tick up to chunk 1 that discards both. func TestLifecycleLoop_DrainsToMostRecent(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := smallWindowCatalog(t, 1) cfg, rec := lifecycleTestConfig(t, cat, 0) for c := chunk.ID(0); c <= 1; c++ { - freezeKinds(t, cat, c, KindLedgers, KindEvents) + freezeKinds(t, cat, c, KindLedgers, KindEvents, KindTxHash) + freezeCoverage(t, cat, cat.windows.WindowID(c), c, c) makeReadyHotDirNoData(t, cat, c) } live := openLiveHotDB(t, cat, 2) @@ -495,7 +594,7 @@ func TestLifecycleLoop_DrainsToMostRecent(t *testing.T) { // ctx makes the loop return without running any tick (never blocks on the // channel forever). func TestLifecycleLoop_ReturnsImmediatelyOnAlreadyCancelledCtx(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := smallWindowCatalog(t, 1) cfg, _ := lifecycleTestConfig(t, cat, 0) ctx, cancel := context.WithCancel(context.Background()) diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/observability.go b/cmd/stellar-rpc/internal/fullhistory/streaming/observability.go index 22f8550c3..9a1b547a2 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/observability.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/observability.go @@ -66,8 +66,8 @@ type Metrics interface { //nolint:interfacebloat // one cohesive control-plane s LiveHotChunks(count int) // ColdTierBytes sets the cold-tier on-disk footprint in bytes (the summed size - // of the ledgers tree). Reported by every lifecycle tick after the prune - // stage. + // of the ledgers/events/txhash trees). Reported by every lifecycle tick after + // the prune stage. ColdTierBytes(bytes int64) // --- counters + durations (one call per completed phase action) --- @@ -82,11 +82,16 @@ type Metrics interface { //nolint:interfacebloat // one cohesive control-plane s // reported — only passes that ran runBackfill. CatchupPass(lo, hi uint32, d time.Duration) - // Freeze counts one lifecycle-tick plan-and-execute stage (the freeze) and - // records its wall-clock. chunkBuilds is the plan's size — 0 when the tick had - // no producible range (the stage still reports, with a zero count, so the rate - // of empty ticks is observable). - Freeze(chunkBuilds int, d time.Duration) + // Freeze counts one lifecycle-tick plan-and-execute stage (the freeze + index + // fold) and records its wall-clock. chunkBuilds / indexBuilds are the plan's + // sizes — 0/0 when the tick had no producible range (the stage still reports, + // with a zero count, so the rate of empty ticks is observable). + Freeze(chunkBuilds, indexBuilds int, d time.Duration) + + // Rebuild records the burst throughput of an index rebuild: chunks folded into + // one .idx over a wall-clock. It is the per-IndexBuild signal the Freeze + // aggregate cannot decompose; emitted once per index build executePlan ran. + Rebuild(chunks int, d time.Duration) // Discard counts the hot DBs a tick retired and records the stage wall-clock. Discard(count int, d time.Duration) @@ -96,8 +101,8 @@ type Metrics interface { //nolint:interfacebloat // one cohesive control-plane s Prune(count int, d time.Duration) // Recovery counts one surgical-recovery apply and records how many keys it - // demoted across the cold/hot tiers. - Recovery(coldKeys, hotKeys int, d time.Duration) + // demoted across the cold/index/hot tiers. + Recovery(coldKeys, indexKeys, hotKeys int, d time.Duration) } // nopMetrics discards every signal. It is the default when a config carries no @@ -112,10 +117,11 @@ func (nopMetrics) LiveHotChunks(int) {} func (nopMetrics) ColdTierBytes(int64) {} func (nopMetrics) ChunkBoundary(uint32) {} func (nopMetrics) CatchupPass(uint32, uint32, time.Duration) {} -func (nopMetrics) Freeze(int, time.Duration) {} +func (nopMetrics) Freeze(int, int, time.Duration) {} +func (nopMetrics) Rebuild(int, time.Duration) {} func (nopMetrics) Discard(int, time.Duration) {} func (nopMetrics) Prune(int, time.Duration) {} -func (nopMetrics) Recovery(int, int, time.Duration) {} +func (nopMetrics) Recovery(int, int, int, time.Duration) {} // metricsOrNop returns m, or nopMetrics{} when m is nil, so call sites never // nil-check before reporting a phase signal. @@ -161,6 +167,8 @@ type PrometheusMetrics struct { chunkBoundaries prometheus.Counter catchupPasses prometheus.Counter freezeChunks prometheus.Counter + freezeIndexes prometheus.Counter + rebuiltChunks prometheus.Counter discarded prometheus.Counter pruned prometheus.Counter recoveries prometheus.Counter @@ -168,12 +176,15 @@ type PrometheusMetrics struct { // Durations — per-phase wall-clock histograms, keyed by phase label. phaseDuration *prometheus.HistogramVec + // Rebuild burst throughput (chunks folded per .idx) as its own histogram. + rebuildChunksPerIdx prometheus.Histogram } // Phase labels for the per-phase duration histogram. const ( phaseCatchupPass = "catchup_pass" phaseFreeze = "freeze" + phaseRebuild = "rebuild" phaseDiscard = "discard" phasePrune = "prune" phaseRecovery = "recovery" @@ -207,6 +218,8 @@ func NewPrometheusMetrics(registry *prometheus.Registry, namespace string) *Prom chunkBoundaries: counter("chunk_boundaries_total", "ingestion chunk-boundary handoffs"), catchupPasses: counter("catchup_passes_total", "completed catch-up backfill passes"), freezeChunks: counter("freeze_chunks_total", "chunks frozen by the lifecycle freeze stage"), + freezeIndexes: counter("freeze_indexes_total", "indexes built by the lifecycle freeze stage"), + rebuiltChunks: counter("rebuilt_chunks_total", "chunks folded into rebuilt indexes"), discarded: counter("discarded_hot_chunks_total", "hot DBs retired by the discard stage"), pruned: counter("pruned_ops_total", "prune-stage sweep ops"), recoveries: counter("recoveries_total", "surgical-recovery applies"), @@ -220,14 +233,20 @@ func NewPrometheusMetrics(registry *prometheus.Registry, namespace string) *Prom Name: "phase_duration_seconds", Help: "wall-clock of a daemon phase action", Buckets: phaseBuckets, }, []string{"phase"}), + rebuildChunksPerIdx: prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: namespace, Subsystem: streamingSubsystem, + Name: "rebuild_chunks_per_index", Help: "chunks folded into one index rebuild (burst throughput)", + // 1 … ~4096 chunks, doubling. + Buckets: prometheus.ExponentialBuckets(1, 2, 13), + }), } registry.MustRegister( m.ingestionLag, m.lastCommitted, m.watermark, m.retentionFloor, m.catchupBackfilled, m.catchupTarget, m.liveHotChunks, m.coldTierBytes, - m.chunkBoundaries, m.catchupPasses, m.freezeChunks, + m.chunkBoundaries, m.catchupPasses, m.freezeChunks, m.freezeIndexes, m.rebuiltChunks, m.discarded, m.pruned, m.recoveries, m.recoveredKeys, - m.phaseDuration, + m.phaseDuration, m.rebuildChunksPerIdx, ) return m } @@ -261,13 +280,24 @@ func (m *PrometheusMetrics) CatchupPass(_, _ uint32, d time.Duration) { m.phaseDuration.WithLabelValues(phaseCatchupPass).Observe(d.Seconds()) } -func (m *PrometheusMetrics) Freeze(chunkBuilds int, d time.Duration) { +func (m *PrometheusMetrics) Freeze(chunkBuilds, indexBuilds int, d time.Duration) { if chunkBuilds > 0 { m.freezeChunks.Add(float64(chunkBuilds)) } + if indexBuilds > 0 { + m.freezeIndexes.Add(float64(indexBuilds)) + } m.phaseDuration.WithLabelValues(phaseFreeze).Observe(d.Seconds()) } +func (m *PrometheusMetrics) Rebuild(chunks int, d time.Duration) { + if chunks > 0 { + m.rebuiltChunks.Add(float64(chunks)) + } + m.rebuildChunksPerIdx.Observe(float64(chunks)) + m.phaseDuration.WithLabelValues(phaseRebuild).Observe(d.Seconds()) +} + func (m *PrometheusMetrics) Discard(count int, d time.Duration) { if count > 0 { m.discarded.Add(float64(count)) @@ -282,11 +312,14 @@ func (m *PrometheusMetrics) Prune(count int, d time.Duration) { m.phaseDuration.WithLabelValues(phasePrune).Observe(d.Seconds()) } -func (m *PrometheusMetrics) Recovery(coldKeys, hotKeys int, d time.Duration) { +func (m *PrometheusMetrics) Recovery(coldKeys, indexKeys, hotKeys int, d time.Duration) { m.recoveries.Inc() if coldKeys > 0 { m.recoveredKeys.WithLabelValues("cold").Add(float64(coldKeys)) } + if indexKeys > 0 { + m.recoveredKeys.WithLabelValues("index").Add(float64(indexKeys)) + } if hotKeys > 0 { m.recoveredKeys.WithLabelValues("hot").Add(float64(hotKeys)) } @@ -296,17 +329,21 @@ func (m *PrometheusMetrics) Recovery(coldKeys, hotKeys int, d time.Duration) { // compile-time assertion: the production sink satisfies the interface. var _ Metrics = (*PrometheusMetrics)(nil) -// coldTierBytes sums the on-disk footprint of the cold tier — the ledgers tree -// (the hot tier and the meta store are excluded: the hot tier is transient, the -// meta store tiny). It walks the tree's root once, ignoring a missing tree (a -// frontfill deployment may not have materialized it). A walk error is non-fatal -// — the lifecycle caller treats a returned error as "skip the gauge this tick" +// coldTierBytes sums the on-disk footprint of the cold tier — the +// ledgers/events/txhash-raw/txhash-index trees (the hot tier and the meta store +// are excluded: the hot tier is transient, the meta store tiny). It walks each +// tree's roots once, ignoring missing trees (a frontfill deployment may not have +// materialized any). A walk error on a single tree is non-fatal to the others — +// the lifecycle caller treats a returned error as "skip the gauge this tick" // rather than failing the tick, so a transient FS hiccup never aborts the daemon. func coldTierBytes(layout Layout) (int64, error) { var total int64 var firstErr error for _, root := range []string{ layout.LedgersRoot(), + layout.EventsRoot(), + layout.TxHashRawRoot(), + layout.TxHashIndexRoot(), } { err := filepath.WalkDir(root, func(_ string, d fs.DirEntry, err error) error { if err != nil { diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/observability_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/observability_test.go index fe9aaac26..54f043f79 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/observability_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/observability_test.go @@ -52,6 +52,7 @@ type recordingMetrics struct { boundaries []uint32 catchupPass []passRec freeze []freezeRec + rebuild []rebuildRec discard []countDur prune []countDur recovery []recoveryRec @@ -62,16 +63,20 @@ type passRec struct { d time.Duration } type freezeRec struct { - chunkBuilds int - d time.Duration + chunkBuilds, indexBuilds int + d time.Duration +} +type rebuildRec struct { + chunks int + d time.Duration } type countDur struct { count int d time.Duration } type recoveryRec struct { - cold, hot int - d time.Duration + cold, index, hot int + d time.Duration } func newRecordingMetrics() *recordingMetrics { @@ -132,10 +137,16 @@ func (r *recordingMetrics) CatchupPass(lo, hi uint32, d time.Duration) { r.catchupPass = append(r.catchupPass, passRec{lo, hi, d}) } -func (r *recordingMetrics) Freeze(chunkBuilds int, d time.Duration) { +func (r *recordingMetrics) Freeze(chunkBuilds, indexBuilds int, d time.Duration) { + r.mu.Lock() + defer r.mu.Unlock() + r.freeze = append(r.freeze, freezeRec{chunkBuilds, indexBuilds, d}) +} + +func (r *recordingMetrics) Rebuild(chunks int, d time.Duration) { r.mu.Lock() defer r.mu.Unlock() - r.freeze = append(r.freeze, freezeRec{chunkBuilds, d}) + r.rebuild = append(r.rebuild, rebuildRec{chunks, d}) } func (r *recordingMetrics) Discard(count int, d time.Duration) { @@ -150,10 +161,10 @@ func (r *recordingMetrics) Prune(count int, d time.Duration) { r.prune = append(r.prune, countDur{count, d}) } -func (r *recordingMetrics) Recovery(cold, hot int, d time.Duration) { +func (r *recordingMetrics) Recovery(cold, index, hot int, d time.Duration) { r.mu.Lock() defer r.mu.Unlock() - r.recovery = append(r.recovery, recoveryRec{cold, hot, d}) + r.recovery = append(r.recovery, recoveryRec{cold, index, hot, d}) } func (r *recordingMetrics) snapshotBoundaries() []uint32 { @@ -205,10 +216,11 @@ func TestMetricsOrNop_NilNeverPanics(t *testing.T) { m.ColdTierBytes(1024) m.ChunkBoundary(0) m.CatchupPass(0, 4, time.Second) - m.Freeze(2, time.Second) + m.Freeze(2, 1, time.Second) + m.Rebuild(4, time.Second) m.Discard(1, time.Second) m.Prune(2, time.Second) - m.Recovery(1, 1, time.Second) + m.Recovery(1, 1, 1, time.Second) } // --------------------------------------------------------------------------- @@ -246,7 +258,7 @@ func TestRunIngestionLoop_ReportsChunkBoundaries(t *testing.T) { c.LastLedger(): zeroTxLCMBytes(t, c.LastLedger()), // boundary 0->1 lastSeq: zeroTxLCMBytes(t, lastSeq), // no boundary }, endErr: errors.New("end")} - ingestTypes := hotchunk.Ingest{Ledgers: true} + ingestTypes := hotchunk.Ingest{Ledgers: true, Txhash: true} ch := make(chan chunk.ID, lifecycleQueueDepth) rec := newRecordingMetrics() @@ -308,7 +320,7 @@ func TestRunIngestionLoop_BoundaryLogFields(t *testing.T) { done := make(chan error, 1) go func() { done <- runIngestionLoop(context.Background(), getter, db, cat, ch, - hotchunk.Ingest{Ledgers: true}, logger, newRecordingMetrics()) + hotchunk.Ingest{Ledgers: true, Txhash: true}, logger, newRecordingMetrics()) }() select { case <-done: @@ -332,7 +344,7 @@ func TestRunLifecycleTick_LogFields(t *testing.T) { // full-chunk ingest; isolated TempDir/catalog + per-instance logger — // overlaps to fit the gate's go-test timeout. t.Parallel() - cat, _ := testCatalog(t) + cat, _ := smallWindowCatalog(t, 1) cfg, _ := lifecycleTestConfig(t, cat, 0) cfg.Metrics = newRecordingMetrics() @@ -355,6 +367,7 @@ func TestRunLifecycleTick_LogFields(t *testing.T) { freeze := findLog(t, entries, "streaming: lifecycle freeze stage complete") assert.Equal(t, logrus.InfoLevel, freeze.Level, "a non-empty freeze is Info") + assert.Equal(t, 1, freeze.Data["index_builds"], "the one-chunk window built one index") assert.Positive(t, freeze.Data["chunk_builds"], "chunk 0 was built") } @@ -369,7 +382,7 @@ func TestRunLifecycleTick_ReportsPhaseSignals(t *testing.T) { // full-chunk ingest; isolated TempDir/catalog — overlaps the other heavy // tests to fit the gate's go-test timeout. t.Parallel() - cat, _ := testCatalog(t) // one-chunk window finalizes immediately + cat, _ := smallWindowCatalog(t, 1) // one-chunk window finalizes immediately cfg, rec := lifecycleTestConfig(t, cat, 0) metrics := newRecordingMetrics() cfg.Metrics = metrics @@ -382,9 +395,15 @@ func TestRunLifecycleTick_ReportsPhaseSignals(t *testing.T) { runTickForCatalog(context.Background(), t, cfg, cat) require.False(t, rec.fired(), "a healthy tick never aborts: %v", rec.last.Load()) - // Freeze stage reported once, with a non-trivial plan (chunk 0's build). + // Freeze stage reported once, with a non-trivial plan (chunk 0's builds + the + // terminal index build). require.Len(t, metrics.freeze, 1, "freeze stage reported once") assert.Positive(t, metrics.freeze[0].chunkBuilds, "chunk 0 was built") + assert.Positive(t, metrics.freeze[0].indexBuilds, "the window index was built") + + // The index build (a rebuild) reported its burst throughput: 1 chunk folded. + require.NotEmpty(t, metrics.rebuild, "the index build reported a rebuild") + assert.Equal(t, 1, metrics.rebuild[0].chunks, "a one-chunk window folds one chunk") // Discard stage retired chunk 0's hot DB (cold artifacts now serve it). require.Len(t, metrics.discard, 1, "discard stage reported once") @@ -406,19 +425,21 @@ func TestRunLifecycleTick_ReportsPhaseSignals(t *testing.T) { // observable. Chunk 0 is already fully frozen and covered (no hot key), so the // plan over [0,0] resolves to nothing and the discard/prune scans find nothing. func TestRunLifecycleTick_EmptyTickStillReportsStages(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := smallWindowCatalog(t, 1) cfg, _ := lifecycleTestConfig(t, cat, 0) metrics := newRecordingMetrics() cfg.Metrics = metrics - freezeKinds(t, cat, 0, KindLedgers, KindEvents) + freezeKinds(t, cat, 0, KindLedgers, KindEvents, KindTxHash) + freezeCoverage(t, cat, cat.windows.WindowID(0), 0, 0) // terminal coverage; no hot key // Drive the tick with chunk 0 (the just-completed chunk): the range [0,0] is - // already fully materialized, so no build, no discard, no prune. + // already fully materialized and covered, so no build, no discard, no prune. runLifecycleTick(context.Background(), cfg, cat, 0) require.Len(t, metrics.freeze, 1) assert.Equal(t, 0, metrics.freeze[0].chunkBuilds, "no producible range — all frozen") + assert.Equal(t, 0, metrics.freeze[0].indexBuilds, "the window is already covered") require.Len(t, metrics.discard, 1) assert.Equal(t, 0, metrics.discard[0].count) require.Len(t, metrics.prune, 1) @@ -467,12 +488,14 @@ func TestBackfill_ReportsPassAndProgress(t *testing.T) { func TestRunSurgicalRecovery_ReportsRecoveryMetric(t *testing.T) { cfg := recoveryConfig(t) paths := cfg.WithDefaults().ResolvePaths() + windows, err := NewWindows(DefaultChunksPerTxhashIndex) + require.NoError(t, err) // Seed durable state, then close (RocksDB single-writer; the entrypoint reopens). seedStore, err := openMetaAt(t, paths.Catalog) require.NoError(t, err) - seedCat := NewCatalog(seedStore, NewLayout(paths.DataDir)) - for _, kind := range []Kind{KindLedgers} { + seedCat := NewCatalog(seedStore, NewLayout(paths.DataDir), windows) + for _, kind := range []Kind{KindLedgers, KindEvents, KindTxHash} { require.NoError(t, seedCat.MarkChunkFreezing(5, kind)) require.NoError(t, seedCat.FlipChunkFrozen(5, kind)) } @@ -490,15 +513,15 @@ func TestRunSurgicalRecovery_ReportsRecoveryMetric(t *testing.T) { assert.Equal(t, len(plan.ColdKeys), got.cold, "cold key count matches the plan") assert.Equal(t, len(plan.HotKeys), got.hot, "hot key count matches the plan") assert.Equal(t, 1, got.hot, "chunk 5's hot key demoted") - assert.Equal(t, 1, got.cold, "chunk 5's ledger cold key demoted") + assert.Equal(t, 3, got.cold, "chunk 5's three cold keys demoted") } // --------------------------------------------------------------------------- // coldTierBytes — the disk-footprint helper. // --------------------------------------------------------------------------- -// A missing tree contributes zero; populated files are summed across the cold -// tree (ledgers); the hot tree and meta store are excluded. +// A missing tree contributes zero; populated files are summed across all four +// cold trees; the hot tree and meta store are excluded. func TestColdTierBytes(t *testing.T) { root := t.TempDir() layout := NewLayout(root) @@ -508,19 +531,19 @@ func TestColdTierBytes(t *testing.T) { require.NoError(t, err) assert.Zero(t, total, "an un-materialized cold tier is zero bytes") - // Write two files in the ledgers tree. + // Write a file in the ledgers tree and one in the events tree. write := func(dir, name string, n int) { require.NoError(t, os.MkdirAll(dir, 0o755)) require.NoError(t, os.WriteFile(filepath.Join(dir, name), make([]byte, n), 0o644)) } write(filepath.Join(layout.LedgersRoot(), "00000"), "x.pack", 100) - write(filepath.Join(layout.LedgersRoot(), "00000"), "y.pack", 50) + write(filepath.Join(layout.EventsRoot(), "00000"), "y-events.pack", 50) // A file under the HOT tree must NOT be counted. write(layout.HotRoot(), "ignored.sst", 9999) total, err = coldTierBytes(layout) require.NoError(t, err) - assert.Equal(t, int64(150), total, "only the cold tree is summed; the hot tree is excluded") + assert.Equal(t, int64(150), total, "only the cold trees are summed; the hot tree is excluded") } // --------------------------------------------------------------------------- @@ -541,10 +564,11 @@ func TestPrometheusMetrics_RegistersAndRecords(t *testing.T) { m.ColdTierBytes(2048) m.ChunkBoundary(3) m.CatchupPass(0, 3, 250*time.Millisecond) - m.Freeze(2, 100*time.Millisecond) + m.Freeze(2, 1, 100*time.Millisecond) + m.Rebuild(4, 50*time.Millisecond) m.Discard(1, 10*time.Millisecond) m.Prune(2, 5*time.Millisecond) - m.Recovery(3, 1, time.Millisecond) + m.Recovery(3, 1, 1, time.Millisecond) families, err := reg.Gather() require.NoError(t, err) @@ -578,12 +602,14 @@ func TestPrometheusMetrics_RegistersAndRecords(t *testing.T) { assert.InDelta(t, float64(1), values["test_ns_fullhistory_streaming_discarded_hot_chunks_total"], 0) assert.InDelta(t, float64(2), values["test_ns_fullhistory_streaming_pruned_ops_total"], 0) assert.InDelta(t, float64(1), values["test_ns_fullhistory_streaming_recoveries_total"], 0) - // recovered_keys_total aggregates 3+1 = 4 across the tier label. - assert.InDelta(t, float64(4), values["test_ns_fullhistory_streaming_recovered_keys_total"], 0) - - // Phase-duration histogram saw catchup_pass + freeze + discard + prune + - // recovery = 5 observations. - assert.Equal(t, uint64(5), counts["test_ns_fullhistory_streaming_phase_duration_seconds"]) + assert.InDelta(t, float64(4), values["test_ns_fullhistory_streaming_rebuilt_chunks_total"], 0) + // recovered_keys_total aggregates 3+1+1 = 5 across the tier label. + assert.InDelta(t, float64(5), values["test_ns_fullhistory_streaming_recovered_keys_total"], 0) + + // Phase-duration histogram saw catchup_pass + freeze + rebuild + discard + + // prune + recovery = 6 observations; the rebuild-chunks histogram saw 1. + assert.Equal(t, uint64(6), counts["test_ns_fullhistory_streaming_phase_duration_seconds"]) + assert.Equal(t, uint64(1), counts["test_ns_fullhistory_streaming_rebuild_chunks_per_index"]) } // Double-registration on the same registry panics inside MustRegister — the diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/paths.go b/cmd/stellar-rpc/internal/fullhistory/streaming/paths.go index 56ce0def0..745ca961e 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/paths.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/paths.go @@ -12,26 +12,31 @@ import ( // (design-docs/full-history-streaming-workflow.md "Directory layout"), so a // Layout plus a key is enough to find any file without listing a directory. // -// In the default deployment all roots sit under one data dir (NewLayout): +// In the default deployment all six roots sit under one data dir (NewLayout): // // {root}/ // ├── catalog/rocksdb/ // ├── hot/{chunk:08d}/ // ├── ledgers/{bucket:05d}/{chunk:08d}.pack -// └── events/{bucket:05d}/{chunk:08d}-events.pack (+ -index.pack, -index.hash) +// ├── events/{bucket:05d}/{chunk:08d}-events.pack (+ -index.pack, -index.hash) +// └── txhash/ +// ├── raw/{bucket:05d}/{chunk:08d}.bin +// └── index/{window:08d}/{lo:08d}-{hi:08d}.idx // // But each tree's root is independently settable (NewLayoutFromPaths) so an // operator's [catalog]/[immutable_storage.*]/[streaming.hot_storage] path // overrides are honored — Layout is the SINGLE source of truth for storage // paths, and the same roots that get flocked (Paths.LockRoots) are the ones the -// data path reads/writes. Below each per-tree root the bucket structure is -// fixed (a bucket is a filesystem concern only; bucket ids never appear in +// data path reads/writes. Below each per-tree root the bucket/window structure +// is fixed (a bucket is a filesystem concern only; bucket ids never appear in // meta-store keys). type Layout struct { - catalogRoot string // meta-store RocksDB dir (a leaf, not a tree root) - hotRoot string // per-chunk hot RocksDB dirs live directly under here - ledgersRoot string // {ledgersRoot}/{bucket}/{chunk}.pack - eventsRoot string // {eventsRoot}/{bucket}/{chunk}-*.{pack,hash} + catalogRoot string // meta-store RocksDB dir (a leaf, not a tree root) + hotRoot string // per-chunk hot RocksDB dirs live directly under here + ledgersRoot string // {ledgersRoot}/{bucket}/{chunk}.pack + eventsRoot string // {eventsRoot}/{bucket}/{chunk}-*.{pack,hash} + txhashRawRoot string // {txhashRawRoot}/{bucket}/{chunk}.bin + txhashIndexRoot string // {txhashIndexRoot}/{window}/{lo}-{hi}.idx } // NewLayout returns a Layout with every tree defaulting under a single data @@ -40,10 +45,12 @@ type Layout struct { // override is set. Tests and the default production layout use this. func NewLayout(root string) Layout { return Layout{ - catalogRoot: filepath.Join(root, "catalog", "rocksdb"), - hotRoot: filepath.Join(root, "hot"), - ledgersRoot: filepath.Join(root, "ledgers"), - eventsRoot: filepath.Join(root, "events"), + catalogRoot: filepath.Join(root, "catalog", "rocksdb"), + hotRoot: filepath.Join(root, "hot"), + ledgersRoot: filepath.Join(root, "ledgers"), + eventsRoot: filepath.Join(root, "events"), + txhashRawRoot: filepath.Join(root, "txhash", "raw"), + txhashIndexRoot: filepath.Join(root, "txhash", "index"), } } @@ -55,10 +62,12 @@ func NewLayout(root string) Layout { // flock was taken on. func NewLayoutFromPaths(p Paths) Layout { return Layout{ - catalogRoot: p.Catalog, - hotRoot: p.HotStorage, - ledgersRoot: filepath.Join(p.Cold, "ledgers"), - eventsRoot: filepath.Join(p.Cold, "events"), + catalogRoot: p.Catalog, + hotRoot: p.HotStorage, + ledgersRoot: filepath.Join(p.Cold, "ledgers"), + eventsRoot: filepath.Join(p.Cold, "events"), + txhashRawRoot: filepath.Join(p.Cold, "txhash", "raw"), + txhashIndexRoot: p.TxhashIndex, } } @@ -78,11 +87,6 @@ func (l Layout) LedgerPackPath(c chunk.ID) string { return filepath.Join(l.ledgersRoot, c.BucketID(), c.String()+".pack") } -// LedgersRoot is the directory under which per-chunk ledger packs are bucketed. -// A cold ledger ingester rooted here composes the {bucket:05d}/{chunk:08d}.pack -// path matching LedgerPackPath. -func (l Layout) LedgersRoot() string { return l.ledgersRoot } - // EventsPaths are the three events cold-segment files for a chunk: // {chunk}-events.pack, {chunk}-index.pack, {chunk}-index.hash. func (l Layout) EventsPaths(c chunk.ID) []string { @@ -95,12 +99,44 @@ func (l Layout) EventsPaths(c chunk.ID) []string { } } +// TxHashBinPath is {txhashRawRoot}/{bucket:05d}/{chunk:08d}.bin. +func (l Layout) TxHashBinPath(c chunk.ID) string { + return filepath.Join(l.txhashRawRoot, c.BucketID(), c.String()+".bin") +} + +// LedgersRoot is the directory under which per-chunk ledger packs are bucketed. +// A cold ledger ingester rooted here composes the {bucket:05d}/{chunk:08d}.pack +// path matching LedgerPackPath. +func (l Layout) LedgersRoot() string { return l.ledgersRoot } + // EventsRoot is the directory under which per-chunk events segments are // bucketed. Matches the dir EventsPaths composes. func (l Layout) EventsRoot() string { return l.eventsRoot } +// TxHashRawRoot is the directory under which per-chunk raw txhash runs are +// bucketed. Matches the dir TxHashBinPath composes — the cold pipeline takes an +// explicit per-kind root (ingest.ColdDirs) rather than the single +// coldDir/ layout RunCold derives, which is why this is its own root. +func (l Layout) TxHashRawRoot() string { return l.txhashRawRoot } + +// TxHashIndexRoot is the directory under which per-window index files live: +// {txhashIndexRoot}/{window:08d}/. Matches the dir IndexWindowDir composes. +func (l Layout) TxHashIndexRoot() string { return l.txhashIndexRoot } + +// IndexWindowDir is {txhashIndexRoot}/{window:08d}/. +func (l Layout) IndexWindowDir(w WindowID) string { + return filepath.Join(l.txhashIndexRoot, w.String()) +} + +// IndexFilePath is txhash/index/{window:08d}/{lo:08d}-{hi:08d}.idx — the file +// name derived from a coverage by the fixed bijection. +func (l Layout) IndexFilePath(cov IndexCoverage) string { + name := cov.Lo.String() + "-" + cov.Hi.String() + ".idx" + return filepath.Join(l.IndexWindowDir(cov.Window), name) +} + // ArtifactPaths returns every file a per-chunk artifact kind owns on disk. -// One path for ledgers; three for events. The single place that maps a +// One path for ledgers and txhash; three for events. The single place that maps a // (chunk, kind) to its files, so the sweep and the freeze writer agree. func (l Layout) ArtifactPaths(c chunk.ID, kind Kind) []string { switch kind { @@ -108,6 +144,8 @@ func (l Layout) ArtifactPaths(c chunk.ID, kind Kind) []string { return []string{l.LedgerPackPath(c)} case KindEvents: return l.EventsPaths(c) + case KindTxHash: + return []string{l.TxHashBinPath(c)} default: return nil } @@ -215,3 +253,10 @@ func deleteFileIfExists(path string) error { } return nil } + +// rmdirIfEmpty removes dir only if it is empty. Best-effort tidiness — an +// empty window dir is not an artifact — so a non-empty dir (still holding +// other coverages) or a missing dir is not an error. +func rmdirIfEmpty(dir string) { + _ = os.Remove(dir) // os.Remove on a non-empty dir fails harmlessly +} diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/perf_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/perf_test.go new file mode 100644 index 000000000..dae1d2623 --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/perf_test.go @@ -0,0 +1,251 @@ +package streaming + +// perf_test.go pins the tx-hash cold-index format the streaming rebuild +// produces to the merged #728/#780 cold path, and records the design's +// Part-4 sizing expectation (see PERF.md). It is the load-bearing assertion +// behind PERF.md's "the formats are identical, so the bench figures transfer" +// claim: the perf numbers are honest only if the bytes the streaming rebuild +// writes are the same bytes the bench harness measured. +// +// Two independent assertions: +// +// - Format identity. buildTxhashIndex (the streaming rebuild) and a direct +// txhash.BuildColdIndex over the SAME .bin inputs produce a byte-identical +// .idx — same MPHF structure, same 3-byte payload, same 1-byte fingerprint, +// same [MinLedger, MaxLedger] metadata. The streaming path adds catalog +// bookkeeping around the build; it must not perturb the artifact. +// +// - On-disk format pins. The .bin inputs match gettransaction §6.1 +// (uint64-LE count header, 20-byte [16-key|4-seq-LE] entries) and the .idx +// matches §6.2 (16-byte routing key, 3-byte payload offset from MinLedger, +// 1-byte fingerprint), read back through the real reader. + +import ( + "context" + "encoding/binary" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/stellar/streamhash" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash" +) + +// --------------------------------------------------------------------------- +// Format identity: the streaming rebuild writes the same bytes as the merged +// cold path. +// --------------------------------------------------------------------------- + +// TestStreamingRebuild_ByteIdenticalToColdPath is the heart of Issue 20. It +// freezes a set of per-chunk .bin runs through the one-write protocol (the real +// txhash.WriteColdBin codec), then builds the SAME coverage two ways: +// +// 1. the streaming rebuild — buildTxhashIndex, which the daemon's executor +// drives on every boundary (build.go); and +// 2. a direct txhash.BuildColdIndex over the identical inputs — the merged +// cold path the bench harness on rpc-hack measures. +// +// The two .idx files must be byte-for-byte identical. That is what licenses +// PERF.md to transfer the bench harness's measured ≈4.2 B/tx and ≈1-min +// figures to the streaming daemon: the streaming rebuild is not a re-derivation +// of the format, it is the same txhash.BuildColdIndex call wrapped in catalog +// bookkeeping, and the bookkeeping does not touch the artifact. +func TestStreamingRebuild_ByteIdenticalToColdPath(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) // window 0 = chunks [0,3] + cfg := testBuildConfig(cat) + + // Spread entries across several chunks so the build genuinely k-way merges + // the runs (not a single trivial input). + entriesByChunk := map[chunk.ID][]txEntry{ + 0: {{hashAt(1), seqIn(0, 5)}, {hashAt(2), seqIn(0, 9000)}}, + 1: {{hashAt(3), seqIn(1, 1)}, {hashAt(4), seqIn(1, 4321)}}, + 2: {{hashAt(5), seqIn(2, 77)}}, + } + var inputs []string + for c := chunk.ID(0); c <= 2; c++ { + freezeChunkBin(t, cat, c, entriesByChunk[c]) + inputs = append(inputs, cat.layout.TxHashBinPath(c)) + } + + // (1) The streaming rebuild. Non-terminal coverage [0,2] (hi 2 < window-last + // 3) so it keeps its inputs frozen — we reuse them for path (2). + require.NoError(t, buildTxhashIndex(context.Background(), 0, 0, 2, cfg)) + frozen, ok, err := cat.FrozenCoverage(0) + require.NoError(t, err) + require.True(t, ok) + streamingIdx := cat.layout.IndexFilePath(frozen) + + // (2) The merged cold path, over the SAME .bin inputs, with the SAME + // MinLedger/MaxLedger anchor the streaming path derives (lo.FirstLedger, + // hi.LastLedger — build.go step 3). + minLedger := chunk.ID(0).FirstLedger() + maxLedger := chunk.ID(2).LastLedger() + directIdx := filepath.Join(t.TempDir(), "direct.idx") + require.NoError(t, txhash.BuildColdIndex(context.Background(), inputs, directIdx, minLedger, maxLedger)) + + streamingBytes, err := os.ReadFile(streamingIdx) + require.NoError(t, err) + directBytes, err := os.ReadFile(directIdx) + require.NoError(t, err) + + require.Equal(t, directBytes, streamingBytes, + "the streaming rebuild must write a byte-identical .idx to the merged cold path "+ + "(this is what lets PERF.md transfer the bench harness's measured figures)") +} + +// --------------------------------------------------------------------------- +// On-disk format pins: §6.1 (.bin) and §6.2 (.idx). +// --------------------------------------------------------------------------- + +// TestStreamingBin_MatchesSpecFormat asserts the .bin a frozen chunk leaves on +// disk matches gettransaction §6.1: a uint64-LE entry-count header followed by +// 20-byte [16-byte key | 4-byte LE seq] entries. freezeChunkBin uses the real +// txhash.WriteColdBin, so this is the producer's actual on-disk contract. +func TestStreamingBin_MatchesSpecFormat(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) + + e0 := txEntry{hashAt(11), seqIn(0, 5)} + e1 := txEntry{hashAt(12), seqIn(0, 9999)} + freezeChunkBin(t, cat, 0, []txEntry{e0, e1}) + + raw, err := os.ReadFile(cat.layout.TxHashBinPath(0)) + require.NoError(t, err) + + // §6.1: 8-byte header + N * 20-byte entries. + const ( + hdrSize = 8 + keyW = 16 // streamhash.MinKeySize + seqW = 4 + entryW = keyW + seqW // 20 bytes exactly + wantCount = 2 + ) + require.Equal(t, txhash.ColdKeySize, keyW, "spec pins the .bin key to 16 bytes") + require.Equal(t, streamhash.MinKeySize, keyW, "16-byte key == streamhash routing-key width") + require.Len(t, raw, hdrSize+wantCount*entryW, "header + 20-byte entries") + + count := binary.LittleEndian.Uint64(raw[:hdrSize]) + require.Equal(t, uint64(wantCount), count, "uint64-LE entry-count header") + + // Each entry: 16-byte truncated key, then a uint32-LE absolute seq. Entries + // are written sorted lex by key, so locate each by its known key prefix. + wantSeqByKey := map[[keyW]byte]uint32{} + for _, e := range []txEntry{e0, e1} { + var k [keyW]byte + copy(k[:], e.hash[:keyW]) + wantSeqByKey[k] = e.seq + } + for i := 0; i < wantCount; i++ { + off := hdrSize + i*entryW + var k [keyW]byte + copy(k[:], raw[off:off+keyW]) + gotSeq := binary.LittleEndian.Uint32(raw[off+keyW : off+entryW]) + require.Equal(t, wantSeqByKey[k], gotSeq, "entry %d: 16-byte key then uint32-LE seq", i) + } +} + +// TestStreamingIdx_MatchesSpecFormat asserts the .idx the streaming rebuild +// writes matches gettransaction §6.2 — the merged #728/#780 cold-index format — +// read back through the real streamhash reader and the cold metadata codec: +// 16-byte routing key, 3-byte payload (ledgerSeq - MinLedger), 1-byte +// fingerprint, [MinLedger, MaxLedger] in the user-metadata slot. +func TestStreamingIdx_MatchesSpecFormat(t *testing.T) { + // Pin the spec constants themselves (a config change that moved a width + // would break the bench-transferred figures, so fail here too). + require.Equal(t, 3, txhash.ColdPayloadSize, "§6.2: 3-byte payload at the default window") + require.Equal(t, 1, txhash.ColdFingerprintSize, "§6.2: 1-byte fingerprint default") + require.Equal(t, 16, txhash.ColdKeySize, "§6.1/§6.2: 16-byte routing key") + + cat, _ := smallWindowCatalog(t, 4) + cfg := testBuildConfig(cat) + + e0 := txEntry{hashAt(21), seqIn(0, 5)} + e1 := txEntry{hashAt(22), seqIn(1, 4242)} + freezeChunkBin(t, cat, 0, []txEntry{e0}) + freezeChunkBin(t, cat, 1, []txEntry{e1}) + + require.NoError(t, buildTxhashIndex(context.Background(), 0, 0, 1, cfg)) + frozen, ok, err := cat.FrozenCoverage(0) + require.NoError(t, err) + require.True(t, ok) + + idx, err := streamhash.OpenPayload(cat.layout.IndexFilePath(frozen)) + require.NoError(t, err) + t.Cleanup(func() { _ = idx.Close() }) + + // Payload, fingerprint, metadata as written by the build. + require.Equal(t, txhash.ColdPayloadSize, idx.PayloadSize(), "3-byte payload on disk") + require.Equal(t, txhash.ColdFingerprintSize, idx.Stats().FingerprintSize, "1-byte fingerprint on disk") + require.Equal(t, uint64(2), idx.NumKeys(), "one key per indexed transaction") + + gotMin, gotMax, err := txhash.ParseLedgerRange(idx.UserMetadata()) + require.NoError(t, err) + require.Equal(t, chunk.ID(0).FirstLedger(), gotMin, "MinLedger anchor = lo.FirstLedger") + require.Equal(t, chunk.ID(1).LastLedger(), gotMax, "MaxLedger = hi.LastLedger") + + // The 3-byte payload is the seq's offset from MinLedger, recovered as the + // absolute seq by the reader. + reader, err := txhash.OpenColdReader(cat.layout.IndexFilePath(frozen)) + require.NoError(t, err) + t.Cleanup(func() { _ = reader.Close() }) + for _, e := range []txEntry{e0, e1} { + got, gerr := reader.Get(e.hash) + require.NoError(t, gerr) + require.Equal(t, e.seq, got, "payload decodes to absolute seq (offset + MinLedger)") + } +} + +// --------------------------------------------------------------------------- +// Sizing: bytes-per-tx consistent with the design's Part-4 number. +// --------------------------------------------------------------------------- + +// TestColdIndexSizing_ConsistentWithPart4 asserts the .idx the streaming +// rebuild writes lands near the design's Part-4 ≈4.2 B/tx figure (PERF.md). The +// MPHF's per-key overhead has a fixed component that dominates at small key +// counts, so this is a small-N sanity band, not the asymptotic figure — at the +// dense full window (~3e9 keys) the bench harness measures ≈4.2 B/tx, and the +// width pins above guarantee the per-key payload+fingerprint contribution (4 B) +// is identical here. The band exists to catch a gross regression (e.g. a +// payload or fingerprint width change, or an MPHF parameter blow-up), not to +// re-measure the asymptote. +func TestColdIndexSizing_ConsistentWithPart4(t *testing.T) { + const nKeys = 20_000 + + cat, _ := smallWindowCatalog(t, 4) + cfg := testBuildConfig(cat) + + // Spread nKeys across chunks 0..2, each seq inside its chunk's range. + perChunk := nKeys / 3 + var n uint64 + for c := chunk.ID(0); c <= 2; c++ { + entries := make([]txEntry, 0, perChunk) + for i := 0; i < perChunk; i++ { + //nolint:gosec // small test offsets, well within the chunk + entries = append(entries, txEntry{hashAt(uint64(c)<<40 | uint64(i)), seqIn(c, uint32(i)+1)}) + } + freezeChunkBin(t, cat, c, entries) + n += uint64(len(entries)) + } + + require.NoError(t, buildTxhashIndex(context.Background(), 0, 0, 2, cfg)) + frozen, ok, err := cat.FrozenCoverage(0) + require.NoError(t, err) + require.True(t, ok) + + info, err := os.Stat(cat.layout.IndexFilePath(frozen)) + require.NoError(t, err) + bytesPerTx := float64(info.Size()) / float64(n) + t.Logf("cold .idx: %d bytes over %d keys = %.3f B/tx (design Part-4 asymptote ≈4.2 B/tx at the dense window)", info.Size(), n, bytesPerTx) + + // The per-key contribution is 4 B (3-byte payload + 1-byte fingerprint) plus + // the MPHF structure; at small N the fixed header + block overhead inflates + // B/tx, so allow a generous upper band and a hard floor (payload+fingerprint + // alone is 4 B, so anything <4 means a width regressed away). + require.GreaterOrEqual(t, bytesPerTx, 4.0, + "payload (3B) + fingerprint (1B) is an inviolable 4 B/tx floor") + require.LessOrEqual(t, bytesPerTx, 8.0, + "small-N .idx should stay within a small multiple of the ≈4.2 B/tx asymptote") +} diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/process.go b/cmd/stellar-rpc/internal/fullhistory/streaming/process.go index 59cbfcb08..29424cacf 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/process.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/process.go @@ -177,6 +177,7 @@ func processChunk(ctx context.Context, chunkID chunk.ID, artifacts ArtifactSet, // any partial from a crashed "freezing" attempt. dirs := ingest.ColdDirs{ Ledgers: cat.layout.LedgersRoot(), + Txhash: cat.layout.TxHashRawRoot(), Events: cat.layout.EventsRoot(), } rerr := ingest.RunColdChunk(ctx, cfg.Logger, source, dirs, chunkID, cfg.Sink, artifacts.ingestConfig()) diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/process_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/process_test.go index 005be936d..1bb7d3837 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/process_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/process_test.go @@ -16,9 +16,9 @@ import ( "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/ingest" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash" ) // --------------------------------------------------------------------------- @@ -167,7 +167,7 @@ func testProcessConfig(t *testing.T, cat *Catalog) ProcessConfig { } // --------------------------------------------------------------------------- -// processChunk — produces the ledger artifact and flips the key to frozen. +// processChunk — produces the three artifacts and flips the keys to frozen. // --------------------------------------------------------------------------- func TestProcessChunk_ProducesAllArtifactsAndFreezes(t *testing.T) { @@ -180,21 +180,26 @@ func TestProcessChunk_ProducesAllArtifactsAndFreezes(t *testing.T) { chunkID := chunk.ID(0) require.NoError(t, processChunk(context.Background(), chunkID, AllArtifacts(), cfg)) - // The ledgers catalog key flipped to frozen (verified via Phase A Catalog). + // All three catalog keys flipped to frozen (verified via Phase A Catalog). for _, kind := range AllKinds() { state, err := cat.State(chunkID, kind) require.NoError(t, err) require.Equal(t, StateFrozen, state, "kind %s should be frozen", kind) } - // The ledger artifact exists on disk at its canonical Layout path. + // All three artifacts exist on disk at their canonical Layout paths. require.FileExists(t, cat.layout.LedgerPackPath(chunkID)) - - // The events cold segment (all three files) exists at its canonical paths. + require.FileExists(t, cat.layout.TxHashBinPath(chunkID)) for _, p := range cat.layout.EventsPaths(chunkID) { - require.FileExists(t, p, "events cold-segment file %s should exist", p) + require.FileExists(t, p) } + // The .bin is readable as a sorted run (rule 5) — exercises the merged + // txhash cold writer's output via its reader. + entries, err := txhash.ReadColdBin(cat.layout.TxHashBinPath(chunkID)) + require.NoError(t, err) + require.Empty(t, entries, "zero-tx chunk yields an empty sorted .bin") + // The pack is a valid cold ledger pack covering the whole chunk. cr, err := ledger.OpenColdReader(cat.layout.LedgerPackPath(chunkID)) require.NoError(t, err) @@ -202,15 +207,31 @@ func TestProcessChunk_ProducesAllArtifactsAndFreezes(t *testing.T) { last, err := cr.LastSeq() require.NoError(t, err) require.Equal(t, chunkID.LastLedger(), last) - - // The events cold segment opens as a valid (eventless, since zero-tx) reader. - ecr, err := eventstore.OpenColdReader( - chunkID, filepath.Join(cat.layout.EventsRoot(), chunkID.BucketID()), eventstore.ColdReaderOptions{}) - require.NoError(t, err) - require.NoError(t, ecr.Close()) _ = root } +func TestProcessChunk_SubsetOfKinds(t *testing.T) { + cat, _ := testCatalog(t) + cfg := testProcessConfig(t, cat) + cfg.Backend = zeroTxBackend(t) + cfg.BackendWaiter = &fakeWaiter{} + + chunkID := chunk.ID(3) + // Request only events + txhash; ledgers stays absent. + set := NewArtifactSet(KindEvents, KindTxHash) + require.NoError(t, processChunk(context.Background(), chunkID, set, cfg)) + + eState, _ := cat.State(chunkID, KindEvents) + tState, _ := cat.State(chunkID, KindTxHash) + lState, _ := cat.State(chunkID, KindLedgers) + require.Equal(t, StateFrozen, eState) + require.Equal(t, StateFrozen, tState) + require.Equal(t, State(""), lState, "ledgers was not requested — key stays absent") + + require.NoFileExists(t, cat.layout.LedgerPackPath(chunkID)) + require.FileExists(t, cat.layout.TxHashBinPath(chunkID)) +} + // --------------------------------------------------------------------------- // Idempotency: a frozen kind self-skips. // --------------------------------------------------------------------------- @@ -285,6 +306,7 @@ func TestProcessChunk_MarksFreezingBeforeWrite(t *testing.T) { artifacts ArtifactSet }{ {"all kinds", AllArtifacts()}, + {"events+txhash subset", NewArtifactSet(KindEvents, KindTxHash)}, {"ledgers only", NewArtifactSet(KindLedgers)}, } { t.Run(tc.name, func(t *testing.T) { @@ -427,6 +449,32 @@ func TestBackfillSource_LossOnOpenError(t *testing.T) { require.ErrorIs(t, err, ErrHotVolumeLost) } +func TestBackfillSource_PrefersFrozenPackWhenLFSNotRequested(t *testing.T) { + cat, _ := testCatalog(t) + cfg := testProcessConfig(t, cat) + + chunkID := chunk.ID(0) + // Frozen ledgers with a real pack on disk; ledgers is NOT requested. + require.NoError(t, cat.MarkChunkFreezing(chunkID, KindLedgers)) + require.NoError(t, os.MkdirAll(filepath.Dir(cat.layout.LedgerPackPath(chunkID)), 0o755)) + writeRealPack(t, cat, chunkID) + require.NoError(t, cat.FlipChunkFrozen(chunkID, KindLedgers)) + + // hot not ready; bulk configured but should not be used. + bulk := zeroTxBackend(t) + cfg.Backend = bulk + cfg.BackendWaiter = &fakeWaiter{} + + set := NewArtifactSet(KindEvents, KindTxHash) // ledgers NOT requested + src, closeSrc, err := backfillSource(context.Background(), chunkID, set, cfg) + require.NoError(t, err) + require.NoError(t, closeSrc()) + // It is a pack source (re-derivation without download); the bulk backend was + // not consulted. + require.IsType(t, ingest.NewPackSource(""), src) + require.Equal(t, int32(0), bulk.opens.Load()) +} + func TestBackfillSource_DoesNotUsePackWhenLFSRequested(t *testing.T) { cat, _ := testCatalog(t) cfg := testProcessConfig(t, cat) @@ -540,7 +588,7 @@ func ingestHotPrefix(t *testing.T, chunkDir string, chunkID chunk.ID, throughSeq db, err := hotchunk.Open(chunkDir, chunkID, silentLogger()) require.NoError(t, err) - cfg := hotchunk.Ingest{Ledgers: true} + cfg := hotchunk.Ingest{Ledgers: true, Txhash: true, Events: true} for seq := chunkID.FirstLedger(); seq <= throughSeq; seq++ { lcm := xdr.LedgerCloseMetaView(zeroTxLCMBytes(t, seq)) _, err := db.IngestLedger(seq, lcm, cfg) diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/progress.go b/cmd/stellar-rpc/internal/fullhistory/streaming/progress.go index 8f21f4677..53fda1535 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/progress.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/progress.go @@ -153,9 +153,12 @@ func refineWithHotDB(_ *Catalog, probe HotProbe, live int64) (uint32, error) { // highestDurableChunk returns the highest chunk id whose artifacts are ALL // durable, or -1 when no chunk is fully durable (a fresh start). "All durable" -// is the pendingArtifacts-empty test: every per-chunk kind (currently just -// ledgers) frozen. A chunk whose only kind is not yet frozen DEGRADES the bound -// and backfill repairs it. +// is the pendingArtifacts-empty test: ledgers frozen AND events frozen AND (txhash +// frozen OR the chunk is covered by a frozen index coverage). It is NOT merely +// "ledgers frozen": a crash mid-freeze can leave ledgers frozen while events is still +// "freezing", and counting that chunk would let reads open over a partial +// artifact — so an incompletely frozen tip chunk DEGRADES the bound and backfill +// repairs it. // // Returns int64 so the -1 sentinel is representable; lastCommittedLedger feeds // it through completeThrough. @@ -166,7 +169,7 @@ func highestDurableChunk(cat *Catalog) (int64, error) { } // Collect frozen per-kind state per chunk. - type kinds struct{ ledgers bool } + type kinds struct{ ledgers, events, txhash bool } frozen := map[chunk.ID]*kinds{} for _, ref := range refs { if ref.State != StateFrozen { @@ -177,14 +180,29 @@ func highestDurableChunk(cat *Catalog) (int64, error) { k = &kinds{} frozen[ref.Chunk] = k } - if ref.Kind == KindLedgers { + switch ref.Kind { + case KindLedgers: k.ledgers = true + case KindEvents: + k.events = true + case KindTxHash: + k.txhash = true } } + // Frozen index coverages let a chunk's txhash requirement be satisfied even + // after the per-chunk .bin was demoted at window finalization. + covered, err := frozenCoverageContains(cat) + if err != nil { + return 0, err + } + highest := int64(-1) for c, k := range frozen { - if !k.ledgers { + if !k.ledgers || !k.events { + continue + } + if !k.txhash && !covered(c) { continue } if id := int64(c); id > highest { @@ -194,6 +212,32 @@ func highestDurableChunk(cat *Catalog) (int64, error) { return highest, nil } +// frozenCoverageContains returns a predicate reporting whether a chunk falls +// inside SOME frozen index coverage [Lo, Hi]. It reads every window's coverages +// once (AllIndexKeys) and keeps only the frozen ones; the per-chunk artifact +// scan then asks "is this chunk's txhash satisfied by a covering index" without +// re-scanning. +func frozenCoverageContains(cat *Catalog) (func(chunk.ID) bool, error) { + covs, err := cat.AllIndexKeys() + if err != nil { + return nil, err + } + var frozen []IndexCoverage + for _, cov := range covs { + if cov.State == StateFrozen { + frozen = append(frozen, cov) + } + } + return func(c chunk.ID) bool { + for _, cov := range frozen { + if cov.Lo <= c && c <= cov.Hi { + return true + } + } + return false + }, nil +} + // highestReadyChunkSigned returns the highest "ready" hot chunk id as int64, or // -1 when there is no ready hot key. The signed return lets completeThrough // compute the positional term (max ready - 1) without a uint32 underflow when the diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/progress_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/progress_test.go index 7f540a790..ff90c298b 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/progress_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/progress_test.go @@ -14,11 +14,11 @@ import ( // progress derivation test helpers. // --------------------------------------------------------------------------- -// makeChunkDurable flips ledgers to frozen for a chunk — the +// makeChunkDurable flips ledgers + events + txhash to frozen for a chunk — the // pendingArtifacts-empty state highestDurableChunk counts. func makeChunkDurable(t *testing.T, cat *Catalog, c chunk.ID) { t.Helper() - freezeKinds(t, cat, c, KindLedgers) + freezeKinds(t, cat, c, KindLedgers, KindEvents, KindTxHash) } // makeHotDir creates the on-disk hot dir for a chunk so deriveWatermark's @@ -110,18 +110,40 @@ func TestDeriveCompleteThrough(t *testing.T) { require.Equal(t, chunk.ID(2).LastLedger(), got) }) - t.Run("incompletely-frozen tip degrades the bound (ledgers freezing, not frozen)", func(t *testing.T) { + t.Run("incompletely-frozen tip degrades the bound (ledgers frozen, events freezing)", func(t *testing.T) { cat, _ := testCatalog(t) makeChunkDurable(t, cat, 0) makeChunkDurable(t, cat, 1) - // Chunk 2: ledgers only "freezing" — a mid-freeze crash. It must NOT - // count: bound stays at chunk 1. - require.NoError(t, cat.MarkChunkFreezing(2, KindLedgers)) + // Chunk 2: ledgers frozen but events only "freezing" — a mid-freeze crash. + // It must NOT count: bound stays at chunk 1. + freezeKinds(t, cat, 2, KindLedgers, KindTxHash) + require.NoError(t, cat.MarkChunkFreezing(2, KindEvents)) got, err := deriveCompleteThrough(cat) require.NoError(t, err) require.Equal(t, chunk.ID(1).LastLedger(), got) }) + t.Run("txhash satisfied by a frozen index coverage (post-finalization demote)", func(t *testing.T) { + cat, _ := testCatalog(t) + // Chunk 7: ledgers+events frozen, but txhash NOT frozen (demoted) — instead a + // frozen index coverage spans it. It must still count as durable. + freezeKinds(t, cat, 7, KindLedgers, KindEvents) + freezeCoverage(t, cat, cat.windows.WindowID(7), 0, 999) // window 0 covers chunk 7 + got, err := deriveCompleteThrough(cat) + require.NoError(t, err) + require.Equal(t, chunk.ID(7).LastLedger(), got) + }) + + t.Run("chunk NOT covered by any frozen index and no frozen txhash does not count", func(t *testing.T) { + cat, _ := testCatalog(t) + makeChunkDurable(t, cat, 0) + // Chunk 1: ledgers+events frozen, no txhash, no covering frozen index. + freezeKinds(t, cat, 1, KindLedgers, KindEvents) + got, err := deriveCompleteThrough(cat) + require.NoError(t, err) + require.Equal(t, chunk.ID(0).LastLedger(), got, "chunk 1 not durable; bound stays at chunk 0") + }) + t.Run("positional term leads in steady state: everything below the live chunk", func(t *testing.T) { cat, _ := testCatalog(t) // No cold artifacts yet (steady state: chunks complete before cold exists). diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/recovery.go b/cmd/stellar-rpc/internal/fullhistory/streaming/recovery.go index e491ac388..a76528cd7 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/recovery.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/recovery.go @@ -11,20 +11,16 @@ import ( "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/metastore" ) -// errCommitBatchFaultInjected is returned only by the test-only -// failCommitBatch hook (hooks.go) to force a recovery batch to be dropped. It -// never surfaces in production, where the hook is nil. -var errCommitBatchFaultInjected = errors.New("streaming: commit batch fault-injected (test only)") - // Surgical recovery — design "Scenario coverage" cases 3 (tainted data) and 4 // (hot-volume loss). The operator NEVER touches the filesystem. Recovery is ONE // atomic meta-store batch that DEMOTES the affected keys — never removes them — // split by tier: // -// - Tainted COLD artifacts (chunk:{c}:* keys) -> "freezing", the state that -// already means "this file is not to be trusted: re-derive or delete". -// Catch-up's per-chunk re-materialization (rule 1) overwrites the .pack in -// place. +// - Tainted COLD artifacts (chunk:{c}:* and every overlapping index:* key) -> +// "freezing", the state that already means "this file is not to be trusted: +// re-derive or delete". Catch-up's per-chunk re-materialization (rule 1) +// overwrites the .pack/.events/.bin in place; the per-window resolver +// rebuilds any overlapped index coverage from the re-derived inputs. // - Tainted or LOST HOT DBs (hot:chunk, the live chunk's included) -> // "transient", instantly ineligible as a source (backfillSource reads only // "ready") and ignored by the watermark (deriveWatermark counts only @@ -160,6 +156,8 @@ type RecoveryPlan struct { // ColdKeys are the chunk:{c}:* keys to demote to "freezing", in key order. ColdKeys []ArtifactRef + // IndexKeys are the overlapping index coverages to demote to "freezing". + IndexKeys []IndexCoverage // HotKeys are the hot:chunk:{c} chunk ids to demote to "transient", // ascending. HotKeys []chunk.ID @@ -168,7 +166,7 @@ type RecoveryPlan struct { // Empty reports whether the plan would demote nothing — a recovery over a range // with no matching keys (e.g. a range entirely below the floor, already pruned). func (p RecoveryPlan) Empty() bool { - return len(p.ColdKeys) == 0 && len(p.HotKeys) == 0 + return len(p.ColdKeys) == 0 && len(p.IndexKeys) == 0 && len(p.HotKeys) == 0 } // PlanSurgicalRecovery computes — but does not apply — the demotion plan for req @@ -184,8 +182,8 @@ func PlanSurgicalRecovery(cat *Catalog, req RecoveryRequest) (RecoveryPlan, erro } plan := RecoveryPlan{Request: req} - // Cold tier: chunk:{c}:* artifact keys in [Lo, Hi]. Skipped entirely for the - // hot-only (case-4) recovery. + // Cold tier: chunk:{c}:* artifact keys in [Lo, Hi], and every index coverage + // overlapping [Lo, Hi]. Skipped entirely for the hot-only (case-4) recovery. if req.Tier == RecoverColdAndHot { coldRefs, err := cat.ChunkArtifactKeys() if err != nil { @@ -196,6 +194,17 @@ func PlanSurgicalRecovery(cat *Catalog, req RecoveryRequest) (RecoveryPlan, erro plan.ColdKeys = append(plan.ColdKeys, ref) } } + + covs, err := cat.AllIndexKeys() + if err != nil { + return RecoveryPlan{}, err + } + for _, cov := range covs { + // Overlap: the coverage [Lo, Hi] and the requested [Lo, Hi] intersect. + if cov.Lo <= req.Hi && req.Lo <= cov.Hi { + plan.IndexKeys = append(plan.IndexKeys, cov) + } + } } // Hot tier: every hot:chunk:{c} key (any value) in [Lo, Hi]. Demoting the @@ -216,11 +225,11 @@ func PlanSurgicalRecovery(cat *Catalog, req RecoveryRequest) (RecoveryPlan, erro } // ApplySurgicalRecovery commits the plan's demotions in ONE atomic synced -// meta-store batch: every cold artifact key -> "freezing", every hot key -> -// "transient". The batch only ever demotes existing keys and unlinks nothing — -// file/dir surgery is left to the daemon's sweeps and openHotTierForChunk on -// the next start. Re-applying an already-committed plan re-writes the same -// values (a no-op in effect). +// meta-store batch: every cold artifact key -> "freezing", every overlapping +// index coverage -> "freezing", every hot key -> "transient". The batch only +// ever demotes existing keys and unlinks nothing — file/dir surgery is left to +// the daemon's sweeps and openHotTierForChunk on the next start. Re-applying an +// already-committed plan re-writes the same values (a no-op in effect). // // An empty plan commits an empty batch (harmless) rather than erroring, so a // recovery over an already-repaired or fully-pruned range is a clean no-op. @@ -229,13 +238,17 @@ func (c *Catalog) ApplySurgicalRecovery(plan RecoveryPlan) error { for _, ref := range plan.ColdKeys { w.Put(ref.Key(), string(StateFreezing)) } + for _, cov := range plan.IndexKeys { + w.Put(cov.Key, string(StateFreezing)) + } for _, id := range plan.HotKeys { w.Put(hotChunkKey(id), string(HotTransient)) } // Fault injection: returning an error here makes metastore drop the - // whole batch, so a test can assert NONE of the cold/hot demotions above - // became observable — the all-or-nothing property the runbook's "no - // interruption analysis" claim depends on. nil in production. + // whole batch, so a test can assert NONE of the cold/index/hot demotions + // above became observable — the all-or-nothing property the runbook's + // "no interruption analysis" claim depends on. Mirrors CommitIndex + // (protocol.go) exactly; nil in production. if c.hooks.commitBatchShouldFail() { return errCommitBatchFaultInjected } @@ -295,6 +308,19 @@ func RunSurgicalRecovery( cfg = cfg.WithDefaults() paths := cfg.ResolvePaths() + // Pin the window arithmetic the same way the daemon does. cpi is immutable + // per deployment and validated here so a malformed config cannot mis-map the + // overlapping-index scan. WithDefaults has filled the pointer; a nil here + // would be a programmer error. + if cfg.Backfill.ChunksPerTxhashIndex == nil { + return RecoveryPlan{}, errors.New( + "streaming: surgical recovery: chunks_per_txhash_index unresolved (WithDefaults not applied)") + } + windows, err := NewWindows(*cfg.Backfill.ChunksPerTxhashIndex) + if err != nil { + return RecoveryPlan{}, fmt.Errorf("streaming: surgical recovery window config: %w", err) + } + // Take EVERY storage root's flock — the exact set the daemon is meant to hold // for its whole life once the daemon-side LockRoots wiring lands. If another // process holds one (a second recovery, or a daemon that DOES wire the flock), @@ -314,7 +340,7 @@ func RunSurgicalRecovery( } defer func() { _ = store.Close() }() - cat := NewCatalog(store, NewLayoutFromPaths(paths)) + cat := NewCatalog(store, NewLayoutFromPaths(paths), windows) logger.WithField("range_lo", req.Lo.String()). WithField("range_hi", req.Hi.String()). @@ -326,9 +352,10 @@ func RunSurgicalRecovery( if err != nil { return RecoveryPlan{}, err } - metrics.Recovery(len(plan.ColdKeys), len(plan.HotKeys), time.Since(applyStart)) + metrics.Recovery(len(plan.ColdKeys), len(plan.IndexKeys), len(plan.HotKeys), time.Since(applyStart)) logger.WithField("cold_keys", len(plan.ColdKeys)). + WithField("index_keys", len(plan.IndexKeys)). WithField("hot_keys", len(plan.HotKeys)). WithField("duration", time.Since(applyStart).String()). Info("surgical recovery: demotion batch committed") diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/recovery_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/recovery_test.go index 6c0ef2ba9..fa8cc350c 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/recovery_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/recovery_test.go @@ -34,23 +34,34 @@ func mustHotState(t *testing.T, cat *Catalog, c chunk.ID) HotState { return s } +// mustIndexState reads one coverage key's State by re-scanning its window. +func mustIndexState(t *testing.T, cat *Catalog, w WindowID, lo, hi chunk.ID) State { + t.Helper() + v, ok, err := cat.Get(indexKey(w, lo, hi)) + require.NoError(t, err) + require.True(t, ok, "coverage key index:%s:%s:%s must exist", w, lo, hi) + return State(v) +} + // --------------------------------------------------------------------------- // The demotion batch: atomic, idempotent, scoped to the range, never creating // absent keys. // --------------------------------------------------------------------------- -func TestSurgicalRecovery_DemotesColdAndHot(t *testing.T) { +func TestSurgicalRecovery_DemotesColdIndexAndHot(t *testing.T) { cat, _ := testCatalog(t) - // In-range frozen cold artifacts on chunks 5 and 6. - freezeKinds(t, cat, 5, KindLedgers) - freezeKinds(t, cat, 6, KindLedgers) + // In-range frozen cold artifacts (all three kinds) on chunks 5 and 6. + freezeKinds(t, cat, 5, KindLedgers, KindEvents, KindTxHash) + freezeKinds(t, cat, 6, KindLedgers, KindEvents) + // A frozen index coverage [0, 7] in window 0 that OVERLAPS the range. + freezeCoverage(t, cat, 0, 0, 7) // In-range ready hot DBs on chunks 5 and 6 (the live chunk 6 included). readyHot(t, cat, 5) readyHot(t, cat, 6) // Out-of-range keys that MUST stay untouched. - freezeKinds(t, cat, 9, KindLedgers) + freezeKinds(t, cat, 9, KindLedgers, KindEvents, KindTxHash) readyHot(t, cat, 9) plan, err := cat.SurgicalRecovery(RecoveryRequest{Lo: 5, Hi: 6, Tier: RecoverColdAndHot}) @@ -59,7 +70,13 @@ func TestSurgicalRecovery_DemotesColdAndHot(t *testing.T) { // Cold artifacts in range -> "freezing". require.Equal(t, StateFreezing, mustState(t, cat, 5, KindLedgers)) + require.Equal(t, StateFreezing, mustState(t, cat, 5, KindEvents)) + require.Equal(t, StateFreezing, mustState(t, cat, 5, KindTxHash)) require.Equal(t, StateFreezing, mustState(t, cat, 6, KindLedgers)) + require.Equal(t, StateFreezing, mustState(t, cat, 6, KindEvents)) + + // Overlapping index coverage -> "freezing". + require.Equal(t, StateFreezing, mustIndexState(t, cat, 0, 0, 7)) // Hot DBs in range -> "transient" (the live chunk's included). require.Equal(t, HotTransient, mustHotState(t, cat, 5)) @@ -73,7 +90,8 @@ func TestSurgicalRecovery_DemotesColdAndHot(t *testing.T) { func TestSurgicalRecovery_Idempotent_ReRunIsNoOp(t *testing.T) { cat, _ := testCatalog(t) - freezeKinds(t, cat, 2, KindLedgers) + freezeKinds(t, cat, 2, KindLedgers, KindEvents, KindTxHash) + freezeCoverage(t, cat, 0, 0, 4) readyHot(t, cat, 2) readyHot(t, cat, 3) @@ -93,11 +111,12 @@ func TestSurgicalRecovery_Idempotent_ReRunIsNoOp(t *testing.T) { require.Equal(t, before, after, "re-running surgical recovery must be a no-op") require.Len(t, second.ColdKeys, len(first.ColdKeys)) + require.Len(t, second.IndexKeys, len(first.IndexKeys)) require.Len(t, second.HotKeys, len(first.HotKeys)) } // TestSurgicalRecovery_BatchIsAtomic proves ApplySurgicalRecovery commits its -// cold/hot demotions in ONE all-or-nothing batch — the core property the +// cold/index/hot demotions in ONE all-or-nothing batch — the core property the // design's "commits atomically or not at all" / "no interruption analysis" // claim rests on. We fault-inject a failure INSIDE the batch callback (which // makes metastore drop the whole batch) and assert the FULL key snapshot is @@ -107,11 +126,12 @@ func TestSurgicalRecovery_Idempotent_ReRunIsNoOp(t *testing.T) { func TestSurgicalRecovery_BatchIsAtomic(t *testing.T) { cat, _ := testCatalog(t) - // A fixture spanning both demotion families: frozen cold artifacts and ready - // hot DBs (the live chunk's included) — so a partial-commit impl would leak at - // least one of them. - freezeKinds(t, cat, 5, KindLedgers) - freezeKinds(t, cat, 6, KindLedgers) + // A fixture spanning all three demotion families: frozen cold artifacts, an + // overlapping frozen index coverage, and ready hot DBs (the live chunk's + // included) — so a partial-commit impl would leak at least one of them. + freezeKinds(t, cat, 5, KindLedgers, KindEvents, KindTxHash) + freezeKinds(t, cat, 6, KindLedgers, KindEvents) + freezeCoverage(t, cat, 0, 0, 7) readyHot(t, cat, 5) readyHot(t, cat, 6) @@ -122,6 +142,7 @@ func TestSurgicalRecovery_BatchIsAtomic(t *testing.T) { require.NoError(t, err) require.False(t, plan.Empty()) require.NotEmpty(t, plan.ColdKeys) + require.NotEmpty(t, plan.IndexKeys) require.NotEmpty(t, plan.HotKeys) before := snapshotAllKeys(t, cat) @@ -132,8 +153,8 @@ func TestSurgicalRecovery_BatchIsAtomic(t *testing.T) { require.Error(t, err, "ApplySurgicalRecovery must surface the injected batch failure") cat.hooks.failCommitBatch = nil - // All-or-nothing: the failed batch wrote NOTHING — every cold/hot key is - // still exactly as seeded. + // All-or-nothing: the failed batch wrote NOTHING — every cold/index/hot key + // is still exactly as seeded. after := snapshotAllKeys(t, cat) require.Equal(t, before, after, "a dropped recovery batch must leave every demotion key unchanged (atomicity)") @@ -141,13 +162,14 @@ func TestSurgicalRecovery_BatchIsAtomic(t *testing.T) { // And a clean re-apply (no fault) lands the whole batch. require.NoError(t, cat.ApplySurgicalRecovery(plan)) require.Equal(t, StateFreezing, mustState(t, cat, 5, KindLedgers)) - require.Equal(t, StateFreezing, mustState(t, cat, 6, KindLedgers)) + require.Equal(t, StateFreezing, mustState(t, cat, 6, KindEvents)) + require.Equal(t, StateFreezing, mustIndexState(t, cat, 0, 0, 7)) require.Equal(t, HotTransient, mustHotState(t, cat, 5)) require.Equal(t, HotTransient, mustHotState(t, cat, 6)) } // snapshotAllKeys returns a map of every meta-store key to its value, for -// no-op / atomicity assertions. It walks the chunk + hot key families. +// no-op / atomicity assertions. It walks the three key families plus the pins. func snapshotAllKeys(t *testing.T, cat *Catalog) map[string]string { t.Helper() m := map[string]string{} @@ -156,6 +178,11 @@ func snapshotAllKeys(t *testing.T, cat *Catalog) map[string]string { for _, r := range refs { m[r.Key()] = string(r.State) } + covs, err := cat.AllIndexKeys() + require.NoError(t, err) + for _, c := range covs { + m[c.Key] = string(c.State) + } hots, err := cat.HotChunkKeys() require.NoError(t, err) for _, id := range hots { @@ -168,8 +195,9 @@ func TestSurgicalRecovery_HotOnly_LeavesColdUntouched(t *testing.T) { cat, _ := testCatalog(t) // The case-4 fixture: cold artifacts survive on durable storage; only the - // hot DBs are lost. A hot-only recovery must NOT touch any cold key. - freezeKinds(t, cat, 5, KindLedgers) + // hot DBs are lost. A hot-only recovery must NOT touch any cold/index key. + freezeKinds(t, cat, 5, KindLedgers, KindEvents, KindTxHash) + freezeCoverage(t, cat, 0, 0, 9) readyHot(t, cat, 5) readyHot(t, cat, 6) @@ -177,10 +205,13 @@ func TestSurgicalRecovery_HotOnly_LeavesColdUntouched(t *testing.T) { require.NoError(t, err) require.Empty(t, plan.ColdKeys, "hot-only recovery must not list cold keys") + require.Empty(t, plan.IndexKeys, "hot-only recovery must not list index keys") require.Len(t, plan.HotKeys, 2) - // Cold keys are exactly as seeded. + // Cold + index keys are exactly as seeded. require.Equal(t, StateFrozen, mustState(t, cat, 5, KindLedgers)) + require.Equal(t, StateFrozen, mustState(t, cat, 5, KindTxHash)) + require.Equal(t, StateFrozen, mustIndexState(t, cat, 0, 0, 9)) // Only the hot keys were demoted. require.Equal(t, HotTransient, mustHotState(t, cat, 5)) @@ -191,7 +222,7 @@ func TestSurgicalRecovery_NeverCreatesAbsentKeys(t *testing.T) { cat, _ := testCatalog(t) // Seed only chunk 5; recover a DISJOINT range [20, 25] that matches nothing. - freezeKinds(t, cat, 5, KindLedgers) + freezeKinds(t, cat, 5, KindLedgers, KindEvents, KindTxHash) readyHot(t, cat, 5) plan, err := cat.SurgicalRecovery(RecoveryRequest{Lo: 20, Hi: 25, Tier: RecoverColdAndHot}) @@ -215,27 +246,35 @@ func TestSurgicalRecovery_RangeValidation(t *testing.T) { require.Contains(t, err.Error(), "lo") } -// TestSurgicalRecovery_ColdBoundary proves the cold-key range predicate is -// inclusive at both endpoints and excludes strictly-out-of-range chunks. -func TestSurgicalRecovery_ColdBoundary(t *testing.T) { +// TestSurgicalRecovery_IndexOverlapBoundary proves the index-overlap predicate +// is inclusive at both endpoints and excludes strictly-disjoint coverages. +func TestSurgicalRecovery_IndexOverlapBoundary(t *testing.T) { cat, _ := testCatalog(t) - // Frozen cold artifacts at the range edges and just outside [10, 20]. - for _, c := range []chunk.ID{9, 10, 20, 21} { - freezeKinds(t, cat, c, KindLedgers) - } + // Four coverages in window 0 around the recovery range [10, 20]. The overlap + // predicate is state-blind, so seed them all as raw "freezing" marks (only one + // frozen coverage per window is allowed; we assert which keys the plan selects, + // not their lifecycle state). + _, err := cat.MarkIndexFreezing(0, 0, 9) // [0,9] — disjoint (hi < lo) + require.NoError(t, err) + _, err = cat.MarkIndexFreezing(0, 9, 10) // [9,10] — overlaps at the low edge + require.NoError(t, err) + _, err = cat.MarkIndexFreezing(0, 21, 30) // [21,30] — disjoint (lo > hi) + require.NoError(t, err) + _, err = cat.MarkIndexFreezing(0, 20, 25) // [20,25] — overlaps at the high edge + require.NoError(t, err) plan, err := PlanSurgicalRecovery(cat, RecoveryRequest{Lo: 10, Hi: 20, Tier: RecoverColdAndHot}) require.NoError(t, err) selected := map[string]bool{} - for _, ref := range plan.ColdKeys { - selected[ref.Key()] = true + for _, cov := range plan.IndexKeys { + selected[cov.Key] = true } - require.True(t, selected[chunkKey(10, KindLedgers)], "chunk 10 is the low edge (inclusive)") - require.True(t, selected[chunkKey(20, KindLedgers)], "chunk 20 is the high edge (inclusive)") - require.False(t, selected[chunkKey(9, KindLedgers)], "chunk 9 is below the range") - require.False(t, selected[chunkKey(21, KindLedgers)], "chunk 21 is above the range") + require.True(t, selected[indexKey(0, 9, 10)], "[9,10] overlaps at the low edge") + require.True(t, selected[indexKey(0, 20, 25)], "[20,25] overlaps at the high edge") + require.False(t, selected[indexKey(0, 0, 9)], "[0,9] is strictly below the range") + require.False(t, selected[indexKey(0, 21, 30)], "[21,30] is strictly above the range") } // --------------------------------------------------------------------------- @@ -341,7 +380,7 @@ func TestSurgicalRecovery_CatchupReDerivesFreezingColdArtifacts(t *testing.T) { _, err = cat.SurgicalRecovery(RecoveryRequest{Lo: 2, Hi: 3, Tier: RecoverColdAndHot}) require.NoError(t, err) require.Equal(t, StateFreezing, mustState(t, cat, 2, KindLedgers)) - require.Equal(t, StateFreezing, mustState(t, cat, 3, KindLedgers)) + require.Equal(t, StateFreezing, mustState(t, cat, 3, KindEvents)) // The durable frontier regresses to chunk 1 — chunks 2 and 3 are now // re-derivable "freezing" debris, not durable truth. Catch-up's resolver will @@ -475,13 +514,17 @@ func TestRunSurgicalRecovery_HappyPath_OpensDemotesCloses(t *testing.T) { cfg := recoveryConfig(t) paths := cfg.WithDefaults().ResolvePaths() + windows, err := NewWindows(DefaultChunksPerTxhashIndex) + require.NoError(t, err) + // Seed durable state through a catalog on the SAME meta path the entrypoint // will reopen, then CLOSE it (RocksDB is single-writer; the entrypoint takes // the lock + reopens). seedStore, err := metastore.New(paths.Catalog, silentLogger()) require.NoError(t, err) - seedCat := NewCatalog(seedStore, NewLayout(paths.DataDir)) - freezeKinds(t, seedCat, 5, KindLedgers) + seedCat := NewCatalog(seedStore, NewLayout(paths.DataDir), windows) + freezeKinds(t, seedCat, 5, KindLedgers, KindEvents, KindTxHash) + freezeCoverage(t, seedCat, 0, 0, 9) require.NoError(t, seedCat.PutHotTransient(5)) require.NoError(t, seedCat.FlipHotReady(5)) require.NoError(t, seedStore.Close()) @@ -492,16 +535,18 @@ func TestRunSurgicalRecovery_HappyPath_OpensDemotesCloses(t *testing.T) { RecoveryRequest{Lo: 5, Hi: 5, Tier: RecoverColdAndHot}, silentLogger(), nil) require.NoError(t, err) require.False(t, plan.Empty()) - require.Len(t, plan.ColdKeys, 1) + require.Len(t, plan.ColdKeys, 3) + require.Len(t, plan.IndexKeys, 1) require.Len(t, plan.HotKeys, 1) // The entrypoint released its locks, so a fresh reopen sees the demotions. verifyStore, err := metastore.New(paths.Catalog, silentLogger()) require.NoError(t, err) defer func() { _ = verifyStore.Close() }() - verifyCat := NewCatalog(verifyStore, NewLayout(paths.DataDir)) + verifyCat := NewCatalog(verifyStore, NewLayout(paths.DataDir), windows) require.Equal(t, StateFreezing, mustState(t, verifyCat, 5, KindLedgers)) + require.Equal(t, StateFreezing, mustIndexState(t, verifyCat, 0, 0, 9)) require.Equal(t, HotTransient, mustHotState(t, verifyCat, 5)) } diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/resolve.go b/cmd/stellar-rpc/internal/fullhistory/streaming/resolve.go index c6676a96a..8cdd02cf2 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/resolve.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/resolve.go @@ -15,16 +15,34 @@ type ChunkBuild struct { Artifacts ArtifactSet } -// Plan is the resolver's output: the per-chunk freeze work. It carries no -// behavior — it can be logged, diffed, and tested without running it, which is -// what makes "the plan is just a value" literally true. +// Plan is the resolver's output: the two strata of work (chunk freezes and +// index rebuilds). It carries no behavior — it can be logged, diffed, and +// tested without running it, which is what makes "the plan is just a value" +// literally true. IndexBuild itself is defined in build.go (the executor runs +// it via buildThenSweep). type Plan struct { ChunkBuilds []ChunkBuild + IndexBuilds []IndexBuild } // Empty reports whether the plan schedules no work — the steady-state / // quiescent case. -func (p Plan) Empty() bool { return len(p.ChunkBuilds) == 0 } +func (p Plan) Empty() bool { return len(p.ChunkBuilds) == 0 && len(p.IndexBuilds) == 0 } + +// coverageRange is a [Lo, Hi] chunk range, inclusive on both ends. It is the +// resolver's local arithmetic type for the per-window txhash rule's "desired" +// coverage; the stored coverage comes from a parsed IndexCoverage key. +type coverageRange struct { + Lo, Hi chunk.ID +} + +// covers reports whether this range fully contains other ("other ⊆ this"): its +// Lo is at or below other's Lo and its Hi is at or above other's Hi. The +// resolver schedules nothing for a window when the stored frozen coverage +// covers the desired range. +func (r coverageRange) covers(other coverageRange) bool { + return r.Lo <= other.Lo && r.Hi >= other.Hi +} // resolve computes the diff between the desired state — every artifact derived // from every ledger in [rangeStart, rangeEnd] is durable and servable — and the @@ -33,11 +51,29 @@ func (p Plan) Empty() bool { return len(p.ChunkBuilds) == 0 } // on every run, so a restart re-plans from what is actually on disk with // nothing to reconcile (design-docs "Postcondition-driven scheduling"). // -// The kind rule: +// The kind rules: // // - ledgers / events (per-chunk): chunk c is needed iff chunk:{c}:{kind} is not // "frozen". A "freezing"/"pruning"/absent key re-materializes (idempotent // inside processChunk); a "frozen" key self-skips here. +// - txhash (per-window): for EACH window overlapping the range, compare the +// stored coverage (the window's unique "frozen" index key, via the Phase A +// Catalog.FrozenCoverage) with the desired coverage +// [max(windowFirstChunk, rangeStart), min(windowLastChunk, rangeEnd)]. +// Desired ⊆ stored → schedule nothing (steady-state restart, a risen floor, +// or a finalized window the range ends in). Otherwise request a .bin for +// every desired chunk not already frozen (already-frozen .bins self-skip) +// and emit one IndexBuild for [desired.Lo, desired.Hi]; the build is +// terminal — derived later via Windows.IsTerminalCoverage — iff desired.Hi +// is the window's last chunk. +// +// The stored_hi clause is load-bearing: a window that was CURRENT at shutdown +// carries a frozen key with hi < windowLastChunk, and when downtime crosses the +// window boundary it becomes a complete window still needing its tail chunks' +// .bin and a full rebuild — classifying by lo alone would strand chunks +// (stored_hi, windowLastChunk] permanently. The desired.Hi upper cap +// (min(windowLastChunk, rangeEnd)) makes the rule uniform: no special trailing- +// window case exists. // // Inverted range (rangeEnd < rangeStart, a network younger than one complete // chunk) returns the empty Plan. @@ -46,6 +82,7 @@ func resolve(cfg ExecConfig, rangeStart, rangeEnd chunk.ID) (Plan, error) { return Plan{}, nil // no complete chunk exists yet } cat := cfg.Catalog + wins := cat.Windows() // Per-chunk work, unioned across kinds; one ChunkBuild per chunk regardless // of how many kinds it needs (one processChunk pass produces all). @@ -67,7 +104,43 @@ func resolve(cfg ExecConfig, rangeStart, rangeEnd chunk.ID) (Plan, error) { } } - return Plan{ChunkBuilds: chunkBuildsFrom(needs)}, nil + // The txhash kind: one rule per overlapping window. + var builds []IndexBuild + for _, w := range windowsOverlapping(wins, rangeStart, rangeEnd) { + desired := coverageRange{ + Lo: maxChunk(wins.FirstChunk(w), rangeStart), + Hi: minChunk(wins.LastChunk(w), rangeEnd), // capped by range end ⇒ uniform trailing window + } + + frozen, hasFrozen, err := cat.FrozenCoverage(w) + if err != nil { + return Plan{}, err + } + if hasFrozen { + stored := coverageRange{Lo: frozen.Lo, Hi: frozen.Hi} + if stored.covers(desired) { + continue // steady-state restart, risen floor, or finalized window + } + } + + // Desired exceeds stored (or no frozen key): request a .bin for every + // desired chunk not already frozen, and emit one IndexBuild. + for c := desired.Lo; ; c++ { + state, err := cat.State(c, KindTxHash) + if err != nil { + return Plan{}, err + } + if state != StateFrozen { + needs[c] = needs[c].Add(KindTxHash) + } + if c == desired.Hi { + break + } + } + builds = append(builds, IndexBuild{Window: w, Lo: desired.Lo, Hi: desired.Hi}) + } + + return Plan{ChunkBuilds: chunkBuildsFrom(needs), IndexBuilds: builds}, nil } // chunkBuildsFrom flattens the per-chunk needs map into a ChunkBuild slice, @@ -94,3 +167,36 @@ func chunkBuildsFrom(needs map[chunk.ID]ArtifactSet) []ChunkBuild { } return builds } + +// windowsOverlapping returns the window ids overlapping [rangeStart, rangeEnd] +// inclusive, ascending. The endpoints' windows bracket the run; the range is +// contiguous so every window between them overlaps. +func windowsOverlapping(wins Windows, rangeStart, rangeEnd chunk.ID) []WindowID { + if rangeEnd < rangeStart { + return nil + } + first := wins.WindowID(rangeStart) + last := wins.WindowID(rangeEnd) + out := make([]WindowID, 0, uint32(last)-uint32(first)+1) + for w := first; ; w++ { + out = append(out, w) + if w == last { + break + } + } + return out +} + +func maxChunk(a, b chunk.ID) chunk.ID { + if a > b { + return a + } + return b +} + +func minChunk(a, b chunk.ID) chunk.ID { + if a < b { + return a + } + return b +} diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/resolve_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/resolve_test.go index 9f459242d..c1551626e 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/resolve_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/resolve_test.go @@ -21,8 +21,19 @@ func freezeKinds(t *testing.T, cat *Catalog, chunkID chunk.ID, kinds ...Kind) { require.NoError(t, cat.FlipChunkFrozen(chunkID, kinds...)) } -// resolveCfg wires a minimal ExecConfig over a catalog for resolve tests -// (resolve never runs a task, so the primitive deps stay nil). +// freezeCoverage marks and commits a frozen index coverage [lo, hi] for window +// w. With no present chunk:{c}:txhash keys in the window, a terminal commit +// demotes nothing, so this leaves exactly one "frozen" coverage — the stored +// state resolve's per-window rule compares against. +func freezeCoverage(t *testing.T, cat *Catalog, w WindowID, lo, hi chunk.ID) { + t.Helper() + cov, err := cat.MarkIndexFreezing(w, lo, hi) + require.NoError(t, err) + require.NoError(t, cat.CommitIndex(cov)) +} + +// resolveCfg wires a minimal ExecConfig over a small-window catalog for resolve +// tests (resolve never runs a task, so the primitive deps stay nil). func resolveCfg(cat *Catalog) ExecConfig { return ExecConfig{Catalog: cat, Logger: silentLogger(), Workers: 1} } @@ -51,70 +62,179 @@ func findChunkBuild(p Plan, c chunk.ID) (ChunkBuild, bool) { // --------------------------------------------------------------------------- func TestResolve_InvertedRangeIsEmpty(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := smallWindowCatalog(t, 4) plan, err := resolve(resolveCfg(cat), 5, 4) require.NoError(t, err) require.True(t, plan.Empty(), "rangeEnd < rangeStart must yield an empty plan") } // --------------------------------------------------------------------------- -// Steady-state restart: a fully-frozen range resolves to nothing. +// Steady-state restart: a fully-frozen, finalized window resolves to nothing. // --------------------------------------------------------------------------- func TestResolve_SteadyStateRestartIsEmpty(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := smallWindowCatalog(t, 4) // window 0 = chunks [0,3] - // Every chunk in [0,3] has its ledgers + events frozen — the post-freeze - // steady state. + // Every chunk has ledgers + events frozen; the window's terminal coverage [0,3] + // is frozen (the .bins were demoted+swept at finalization, so no txhash keys + // remain). This is exactly the post-finalization steady state. for c := chunk.ID(0); c <= 3; c++ { freezeKinds(t, cat, c, KindLedgers, KindEvents) } + freezeCoverage(t, cat, 0, 0, 3) plan, err := resolve(resolveCfg(cat), 0, 3) require.NoError(t, err) require.True(t, plan.Empty(), - "steady-state restart of fully-frozen chunks must schedule nothing, got %+v", plan) + "steady-state restart of a finalized window must schedule nothing, got %+v", plan) } // --------------------------------------------------------------------------- -// A range with a partly-frozen middle: only the un-frozen chunks are scheduled, -// and each scheduled chunk requests the ledgers artifact. +// A risen floor: stored coverage starts BELOW the desired lo. desired ⊆ stored +// (stored is wider), so nothing is scheduled — the stale stored lo is the +// reader retention contract's problem, not a rebuild trigger. // --------------------------------------------------------------------------- -func TestResolve_SchedulesOnlyUnfrozenChunks(t *testing.T) { - cat, _ := testCatalog(t) +func TestResolve_RisenFloorSchedulesNothing(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) // window 0 = chunks [0,3] - // Chunks 0,1,5 frozen (ledgers + events); 2,3,4 absent. - for _, c := range []chunk.ID{0, 1, 5} { + for c := chunk.ID(0); c <= 3; c++ { freezeKinds(t, cat, c, KindLedgers, KindEvents) } + // Stored terminal coverage spans the whole window [0,3]. + freezeCoverage(t, cat, 0, 0, 3) - plan, err := resolve(resolveCfg(cat), 0, 5) + // The floor rose to chunk 2: desired = [2,3] ⊆ stored [0,3]. + plan, err := resolve(resolveCfg(cat), 2, 3) + require.NoError(t, err) + require.Empty(t, plan.IndexBuilds, "a risen floor must not trigger a rebuild") + require.Empty(t, plan.ChunkBuilds, "ledgers/events frozen for the in-range chunks") +} + +// --------------------------------------------------------------------------- +// A window mid-roll at shutdown: the stored frozen coverage has hi < the +// window's last chunk. When downtime crosses the window boundary the window +// becomes complete and the tail chunks (stored_hi, lastChunk] must be scheduled +// — classifying by lo alone would strand them. This is the stored_hi clause. +// --------------------------------------------------------------------------- + +func TestResolve_WindowMidRollAtShutdownSchedulesTail(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) // window 0 = chunks [0,3] + + // At shutdown the window was current with coverage [0,1]; chunks 0,1 have + // their .bin + ledgers/events frozen, chunks 2,3 are not yet produced. + for c := chunk.ID(0); c <= 1; c++ { + freezeKinds(t, cat, c, KindLedgers, KindEvents, KindTxHash) + } + freezeCoverage(t, cat, 0, 0, 1) // stored_hi = 1 < lastChunk(0) = 3 + + // Restart catches up the now-complete window [0,3]. + plan, err := resolve(resolveCfg(cat), 0, 3) + require.NoError(t, err) + + // Exactly one index build, covering the whole (now complete) window. + require.Len(t, plan.IndexBuilds, 1) + require.Equal(t, IndexBuild{Window: 0, Lo: 0, Hi: 3}, plan.IndexBuilds[0]) + + // Tail chunks 2 and 3 must be scheduled for ALL kinds (nothing frozen); + // chunks 0 and 1 (ledgers/events/txhash already frozen) self-skip entirely. + require.Equal(t, []chunk.ID{2, 3}, chunkSet(plan), + "only the tail chunks (stored_hi, lastChunk] need work — lo-only classification would strand them") + + cb2, ok := findChunkBuild(plan, 2) + require.True(t, ok) + require.True(t, cb2.Artifacts.Has(KindLedgers)) + require.True(t, cb2.Artifacts.Has(KindEvents)) + require.True(t, cb2.Artifacts.Has(KindTxHash)) +} + +// A subtler mid-roll: the head chunks already have ledgers/events frozen but NOT +// their .bin (a crash after the cold pass but the txhash key was demoted/swept +// is impossible mid-roll, but an in-progress window can legitimately have a +// head chunk needing only its .bin re-derived). resolve must request txhash for +// every desired chunk whose .bin is not frozen, head chunks included. +func TestResolve_MidRollReDerivesMissingBins(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) + + // ledgers+events frozen for all four chunks; .bin frozen only for 0,1. + for c := chunk.ID(0); c <= 3; c++ { + freezeKinds(t, cat, c, KindLedgers, KindEvents) + } + freezeKinds(t, cat, 0, KindTxHash) + freezeKinds(t, cat, 1, KindTxHash) + freezeCoverage(t, cat, 0, 0, 1) // current window, hi=1 + + plan, err := resolve(resolveCfg(cat), 0, 3) require.NoError(t, err) - require.Equal(t, []chunk.ID{2, 3, 4}, chunkSet(plan), - "only the un-frozen chunks need work; frozen chunks self-skip") - for _, c := range []chunk.ID{2, 3, 4} { + require.Equal(t, []IndexBuild{{Window: 0, Lo: 0, Hi: 3}}, plan.IndexBuilds) + // Only chunks 2,3 need a .bin (and only the .bin — ledgers/events are frozen). + require.Equal(t, []chunk.ID{2, 3}, chunkSet(plan)) + for _, c := range []chunk.ID{2, 3} { cb, ok := findChunkBuild(plan, c) require.True(t, ok) - require.True(t, cb.Artifacts.Has(KindLedgers), "an un-frozen chunk requests ledgers") - require.Equal(t, AllArtifacts(), cb.Artifacts) + require.Equal(t, NewArtifactSet(KindTxHash), cb.Artifacts, + "head chunks' ledgers/events frozen ⇒ only txhash requested") } } -// A "freezing" (not "frozen") key re-materializes: a partial/crashed freeze -// attempt is re-scheduled, never trusted. -func TestResolve_FreezingKeyReMaterializes(t *testing.T) { - cat, _ := testCatalog(t) +// --------------------------------------------------------------------------- +// A finalized window the range ENDS in: desired hi = rangeEnd < lastChunk, and +// the stored terminal coverage already covers it. Nothing scheduled — a crash +// right after a terminal commit resumes here and the terminal coverage covers +// any desired sub-range. +// --------------------------------------------------------------------------- + +func TestResolve_FinalizedWindowRangeEndsIn(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) // windows: 0=[0,3], 1=[4,7] - // Chunk 1 is mid-freeze ("freezing", not flipped to "frozen"). - require.NoError(t, cat.MarkChunkFreezing(1, KindLedgers)) + // Window 0 finalized: ledgers/events frozen, terminal coverage [0,3] frozen. + for c := chunk.ID(0); c <= 3; c++ { + freezeKinds(t, cat, c, KindLedgers, KindEvents) + } + freezeCoverage(t, cat, 0, 0, 3) - plan, err := resolve(resolveCfg(cat), 1, 1) + // Range ends inside window 0 (at chunk 2): desired for window 0 = [0,2] ⊆ + // stored [0,3]. No tail of window 1 is in range. + plan, err := resolve(resolveCfg(cat), 0, 2) require.NoError(t, err) - require.Equal(t, []chunk.ID{1}, chunkSet(plan), - "a freezing (not frozen) key must be re-scheduled") - cb, ok := findChunkBuild(plan, 1) - require.True(t, ok) - require.True(t, cb.Artifacts.Has(KindLedgers)) + require.True(t, plan.Empty(), + "a finalized window the range ends in needs no rebuild, got %+v", plan) +} + +// --------------------------------------------------------------------------- +// A range spanning a finalized window and a fresh trailing window: the +// finalized window contributes nothing, the trailing (never-built) window +// contributes one non-terminal index build plus its chunks. +// --------------------------------------------------------------------------- + +func TestResolve_SpanFinalizedPlusFreshTrailing(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) // windows: 0=[0,3], 1=[4,7] + + // Window 0 fully finalized. + for c := chunk.ID(0); c <= 3; c++ { + freezeKinds(t, cat, c, KindLedgers, KindEvents) + } + freezeCoverage(t, cat, 0, 0, 3) + + // Window 1 untouched; range ends mid-window-1 at chunk 5. + plan, err := resolve(resolveCfg(cat), 0, 5) + require.NoError(t, err) + + // Only window 1's partial coverage [4,5] is built (NON-terminal: hi=5 < + // lastChunk(1)=7). + require.Len(t, plan.IndexBuilds, 1) + require.Equal(t, IndexBuild{Window: 1, Lo: 4, Hi: 5}, plan.IndexBuilds[0]) + + wins := cat.Windows() + require.False(t, wins.IsTerminalCoverage(IndexCoverage{Window: 1, Lo: 4, Hi: 5}), + "a trailing partial window is non-terminal") + + // Chunks 4 and 5 need every kind (all absent); window-0 chunks self-skip. + require.Equal(t, []chunk.ID{4, 5}, chunkSet(plan)) + for _, c := range []chunk.ID{4, 5} { + cb, ok := findChunkBuild(plan, c) + require.True(t, ok) + require.Equal(t, AllArtifacts(), cb.Artifacts) + } } diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/retention.go b/cmd/stellar-rpc/internal/fullhistory/streaming/retention.go index 8d9b07af0..f74959677 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/retention.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/retention.go @@ -12,12 +12,12 @@ import ( // A read for any seq below the effective retention floor is not-found, // regardless of whether the underlying file still exists on disk. // -// A read may land on a .pack that pruning has since deleted, or on one that -// pruning is about to delete; a below-floor read is not-found either way. From -// the storage layer's perspective, retention — not the set of files on disk — -// is the source of truth for "is this data available?", and that is the entire -// property prune/sweep rely on to unlink unilaterally (sweep.go, -// eligibility.go). +// A stale .idx may still resolve a tx-hash to a .pack that pruning has since +// deleted, or to one that pruning is about to delete; a below-floor read is +// not-found either way. From the storage layer's perspective, retention — not +// the set of files on disk — is the source of truth for "is this data +// available?", and that is the entire property prune/sweep rely on to unlink +// unilaterally (sweep.go, eligibility.go). // // The floor plays two roles with OPPOSITE safe directions, and the system // keeps them strictly separate (design "Lifecycle"): @@ -33,9 +33,9 @@ import ( // can produce. Production therefore never consults the floor below existing // storage; extending the bottom of storage (retention widening) is // exclusively catch-up's job, where producibility is enforced lazily per -// chunk by the cold ingest (no pre-flight gate). This gate is a retention -// consumer by construction (a read is harmless to reject), so it uses the -// floor directly. +// chunk by the buildTxhashIndex .bin precondition (no pre-flight gate). This +// gate is a retention consumer by construction (a read is harmless to +// reject), so it uses the floor directly. // // retentionFloorFor is the gate's floor: effectiveRetentionFloor evaluated at // the SAME (completeThrough, RetentionChunks, earliest_ledger) the prune and @@ -94,6 +94,16 @@ func (g RetentionGate) Floor() uint32 { return g.floor } // is not-found regardless of on-disk state — the contract pruning relies on. func (g RetentionGate) Admits(seq uint32) bool { return seq >= g.floor } +// WindowBelowFloor reports whether an entire window sits below the floor — its +// last chunk's last ledger is below the floor. Such a window's .idx need not be +// probed at all (every seq it could resolve is not-found), and the prune scan +// is free to sweep it. A window straddling the floor is NOT below it: it still +// holds in-retention seqs, so the reader probes it and lets Admits mask the +// below-floor tail. windows maps a window id to its chunk span. +func (g RetentionGate) WindowBelowFloor(w WindowID, windows Windows) bool { + return windows.LastChunk(w).LastLedger() < g.floor +} + // ChunkBelowFloor reports whether an entire chunk sits below the floor — its // last ledger is below the floor. This is the same predicate the discard and // prune scans use (eligibility.go: last < floor), surfaced on the gate so the diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/retention_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/retention_test.go index edda4f131..5a10874b9 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/retention_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/retention_test.go @@ -60,55 +60,76 @@ func TestRetentionGate_ShorteningRaisesFloorImmediately(t *testing.T) { assert.False(t, narrow.Admits(seq), "shortening retention makes it not-found at once") } -// ChunkBelowFloor: a chunk wholly below the floor is past retention; one -// straddling it is not. -func TestRetentionGate_ChunkBelowFloor(t *testing.T) { +// WindowBelowFloor / ChunkBelowFloor: a window or chunk wholly below the floor +// is past retention; one straddling it is not. +func TestRetentionGate_WindowAndChunkBelowFloor(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) // windows: 0=[0,3], 1=[4,7], 2=[8,11] + wins := cat.Windows() + // through = chunk 11's last ledger, retain 4 chunks ⇒ floor = chunk 8's first - // ledger (11-4+1 = 8). + // ledger (11-4+1 = 8). Window 2 starts at the floor. through := chunk.ID(11).LastLedger() gate := NewRetentionGate(through, 4, 0) require.Equal(t, chunk.ID(8).FirstLedger(), gate.Floor()) + // Window 0 ([0,3]) and window 1 ([4,7]) are wholly below the floor (chunk 8); + // window 2 ([8,11]) is the floor window — at it, not below. + assert.True(t, gate.WindowBelowFloor(0, wins)) + assert.True(t, gate.WindowBelowFloor(1, wins)) + assert.False(t, gate.WindowBelowFloor(2, wins)) + // Chunk 7 is below the floor; chunk 8 is the floor chunk. assert.True(t, gate.ChunkBelowFloor(7)) assert.False(t, gate.ChunkBelowFloor(8)) } // --------------------------------------------------------------------------- -// Scenario: a chunk STRADDLING the floor serves in-range seqs and not-found -// below. The reader gate makes below-floor reads not-found regardless of what -// is on disk, while the in-range tail still serves. Only chunks WHOLLY below the -// floor are swept by the prune scan; a straddling chunk's frozen ledger artifact -// survives. +// Scenario: a window STRADDLING the floor serves in-range seqs and not-found +// below. A finalized window's frozen .idx covers [lo, hi] including chunks the +// floor has since risen past; the gate masks those below-floor chunks. This is +// the stale-.idx case gettransaction §8.5 tolerates because the reader gate +// makes below-floor reads not-found regardless of what the .idx resolves. // --------------------------------------------------------------------------- -func TestReaderRetention_StraddlingFloorServesInRangeNotBelow(t *testing.T) { - cat, _ := testCatalog(t) +func TestReaderRetention_WindowStraddlingFloorServesInRangeNotBelow(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) // window 0 = chunks [0,3] + wins := cat.Windows() - // Chunks 0..3 have their ledger + events artifacts frozen, written when the - // floor sat at genesis. + // Window 0 was finalized at terminal coverage [0,3] when the floor sat at + // genesis. Its frozen .idx hashes chunks 0..3 — a static, stale-lo artifact. for c := chunk.ID(0); c <= 3; c++ { freezeKinds(t, cat, c, KindLedgers, KindEvents) - writeArtifact(t, cat.layout.LedgerPackPath(c)) } + freezeCoverage(t, cat, 0, 0, 3) + fk, ok, err := cat.FrozenCoverage(0) + require.NoError(t, err) + require.True(t, ok) + require.True(t, wins.IsTerminalCoverage(fk), "window 0 is finalized") - // The floor later rose to chunk 2 (its first ledger): chunks 0,1 below it, - // chunks 2,3 in range. + // The floor later rose to chunk 2 (its first ledger). Window 0 now STRADDLES + // the floor: chunks 0,1 below it, chunks 2,3 in range. The .idx still claims + // lo=0, but the reader gate is the source of truth. through := chunk.ID(3).LastLedger() // Pick retentionChunks so the sliding floor lands on chunk 2: // lastCompleteChunkAt(through)=3, floor chunk = 3-retention+1 = 2 ⇒ retention=2. gate := NewRetentionGate(through, 2, 0) - require.Equal(t, chunk.ID(2).FirstLedger(), gate.Floor(), "the floor lands at chunk 2") + require.Equal(t, chunk.ID(2).FirstLedger(), gate.Floor(), + "the floor straddles window 0 at chunk 2") - // A seq in chunk 2 or 3 (in range) is admitted; a seq in chunk 0 or 1 is - // not-found regardless of the file still being on disk. + // A seq in chunk 2 or 3 (in range) is admitted even though the .idx's lo is a + // now-pruned chunk 0; a seq in chunk 0 or 1 is not-found regardless of the + // .idx still hashing it. assert.True(t, gate.Admits(chunk.ID(2).FirstLedger()), "floor chunk: in range") assert.True(t, gate.Admits(chunk.ID(3).LastLedger()), "above the floor: in range") assert.False(t, gate.Admits(chunk.ID(1).LastLedger()), "below the floor: not-found") assert.False(t, gate.Admits(chunk.ID(0).FirstLedger()), "below the floor: not-found") - // The prune scan sweeps only the WHOLLY-below-floor chunks 0,1; chunks 2,3 - // survive — exactly the data the gate admits. + // The straddling window's frozen .idx is NOT swept (the window is not wholly + // below the floor) — only its below-floor chunk artifacts (chunks 0,1) are + // pruned. The .idx therefore keeps serving the in-range tail (chunks 2,3), + // with the gate masking the now-pruned chunks 0,1 it still hashes. + assert.False(t, gate.WindowBelowFloor(0, wins), + "a straddling window is not wholly below the floor — its .idx is kept") cfg, _ := lifecycleTestConfig(t, cat, 2) pops, err := eligiblePruneOps(cfg, cat, through) require.NoError(t, err) @@ -116,6 +137,14 @@ func TestReaderRetention_StraddlingFloorServesInRangeNotBelow(t *testing.T) { require.NoError(t, op()) } + // The window's frozen .idx coverage survives the prune (index family). + survives, ok, err := cat.FrozenCoverage(0) + require.NoError(t, err) + require.True(t, ok, "the straddling window keeps its frozen coverage") + require.Equal(t, fk.Key, survives.Key) + + // The below-floor chunks 0,1 ARE pruned (chunk family); the in-range chunks + // 2,3 survive — exactly the data the gate admits. for c := chunk.ID(0); c <= 1; c++ { ledgers, serr := cat.State(c, KindLedgers) require.NoError(t, serr) @@ -129,6 +158,150 @@ func TestReaderRetention_StraddlingFloorServesInRangeNotBelow(t *testing.T) { assertQuiescent(t, cfg, cat, through) } +// --------------------------------------------------------------------------- +// Scenario: retention WIDENING at the next startup. A window finalized at a +// NARROW coverage [lo, last] (a higher old floor) is re-derived by backfill at +// the new wider coverage [lo', last]: the resolver emits the wider IndexBuild +// plus .bin re-materialization for the newly-in-range chunks, and the terminal +// CommitIndex demotes the old coverage and promotes the wider one as the unique +// frozen. Extending the bottom of storage is backfill's job (runBackfill), never +// a tick's. +// --------------------------------------------------------------------------- + +func TestReaderRetention_WideningReDerivesAndDemotesOldCoverage(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) // window 0 = chunks [0,3] + wins := cat.Windows() + + // Prior run, narrow retention: the floor sat at chunk 2, so window 0 was + // finalized at the narrow TERMINAL coverage [2,3] (lo raised to the floor + // chunk). Chunks 2,3 have ledgers/events frozen; chunks 0,1 were pruned (no keys). + for c := chunk.ID(2); c <= 3; c++ { + freezeKinds(t, cat, c, KindLedgers, KindEvents) + } + freezeCoverage(t, cat, 0, 2, 3) // narrow terminal coverage + narrow, ok, err := cat.FrozenCoverage(0) + require.NoError(t, err) + require.True(t, ok) + require.True(t, wins.IsTerminalCoverage(narrow), "narrow coverage [2,3] is terminal") + require.Equal(t, chunk.ID(2), narrow.Lo) + + // Retention widened: the new floor is genesis (chunk 0), so the desired + // coverage for window 0 is the wider [0,3]. resolve at the wider range + // re-derives. Chunks 0,1 are fully pruned ⇒ every kind requested (bulk + // refetch); chunks 2,3 keep their frozen ledgers/events but need their .bin. + plan, err := resolve(resolveCfg(cat), 0, 3) + require.NoError(t, err) + + // One terminal index build at the WIDER coverage [0,3]. + require.Equal(t, []IndexBuild{{Window: 0, Lo: 0, Hi: 3}}, plan.IndexBuilds, + "widening re-derives the window at its new wider terminal coverage") + require.True(t, wins.IsTerminalCoverage(IndexCoverage{Window: 0, Lo: 0, Hi: 3})) + + // The newly-in-range chunks 0,1 need all kinds (fully pruned ⇒ bulk refetch); + // chunks 2,3 need only their .bin (ledgers/events still frozen from local .pack). + require.Equal(t, []chunk.ID{0, 1, 2, 3}, chunkSet(plan)) + for _, c := range []chunk.ID{0, 1} { + cb, found := findChunkBuild(plan, c) + require.True(t, found) + assert.Equal(t, AllArtifacts(), cb.Artifacts, + "fully-pruned chunk %s refetches every kind from the bulk source", c) + } + for _, c := range []chunk.ID{2, 3} { + cb, found := findChunkBuild(plan, c) + require.True(t, found) + assert.Equal(t, NewArtifactSet(KindTxHash), cb.Artifacts, + "covered chunk %s rebuilds only its .bin from the local .pack", c) + } + + // Now drive the terminal CommitIndex for the wider coverage (what the + // executor's IndexBuild does once the .bins are present). It must demote the + // old narrow coverage and promote the wider one as the window's UNIQUE frozen. + for c := chunk.ID(0); c <= 1; c++ { + freezeKinds(t, cat, c, KindLedgers, KindEvents) // the refetch landed + } + wider, err := cat.MarkIndexFreezing(0, 0, 3) + require.NoError(t, err) + require.NoError(t, cat.CommitIndex(wider)) + + // The window's unique frozen coverage is now the wider [0,3]; the old [2,3] + // was demoted to "pruning". + got, ok, err := cat.FrozenCoverage(0) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, chunk.ID(0), got.Lo, "the wider coverage is now the frozen one") + assert.Equal(t, chunk.ID(3), got.Hi) + assert.True(t, wins.IsTerminalCoverage(got)) + + covs, err := cat.AllIndexKeys() + require.NoError(t, err) + var oldState, newState State + for _, c := range covs { + switch c.Key { + case narrow.Key: + oldState = c.State + case wider.Key: + newState = c.State + } + } + assert.Equal(t, StatePruning, oldState, "the old narrow coverage was demoted") + assert.Equal(t, StateFrozen, newState, "the wider coverage is frozen") +} + +// The widening flows through backfill's runBackfill (resolve + executePlan), +// not a tick: a seamed runIndex performs the real terminal CommitIndex so the +// demote/promote happens on the production path. This is the "at the next +// startup" half of the contract. +func TestReaderRetention_WideningRunsThroughBackfill(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) // window 0 = chunks [0,3] + + // Prior narrow finalization at [2,3]. + for c := chunk.ID(2); c <= 3; c++ { + freezeKinds(t, cat, c, KindLedgers, KindEvents) + } + freezeCoverage(t, cat, 0, 2, 3) + narrow, _, err := cat.FrozenCoverage(0) + require.NoError(t, err) + + cfg := ExecConfig{ + Catalog: cat, Logger: silentLogger(), Workers: 2, + Process: ProcessConfig{Backend: zeroTxBackend(t)}, // bulk source for the refetch + runChunk: func(_ context.Context, cb ChunkBuild, _ ExecConfig) error { + // Simulate the freeze: flip every requested kind frozen (and demote + // nothing — the index build owns that). + kinds := []Kind{} + for _, k := range []Kind{KindLedgers, KindEvents, KindTxHash} { + if cb.Artifacts.Has(k) { + kinds = append(kinds, k) + } + } + if err := cat.MarkChunkFreezing(cb.Chunk, kinds...); err != nil { + return err + } + return cat.FlipChunkFrozen(cb.Chunk, kinds...) + }, + runIndex: func(_ context.Context, ib IndexBuild, _ ExecConfig) error { + // The real terminal commit: mark-then-commit, which demotes the old + // coverage and any in-window chunk:txhash keys. + cov, merr := cat.MarkIndexFreezing(ib.Window, ib.Lo, ib.Hi) + if merr != nil { + return merr + } + return cat.CommitIndex(cov) + }, + } + + // backfill widens the bottom of storage to chunk 0 by backfilling [0,3]. + require.NoError(t, runBackfill(context.Background(), cfg, 0, 3)) + + // The window finalized at the wider [0,3]; the old [2,3] is demoted/swept-bound. + got, ok, err := cat.FrozenCoverage(0) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, chunk.ID(0), got.Lo) + assert.Equal(t, chunk.ID(3), got.Hi) + require.NotEqual(t, narrow.Key, got.Key, "the frozen coverage is the wider one, not the old narrow one") +} + // --------------------------------------------------------------------------- // Scenario: retention SHORTENING prunes the newly-out-of-range chunks // immediately. The prune scan reads the floor live from (through, @@ -137,13 +310,15 @@ func TestReaderRetention_StraddlingFloorServesInRangeNotBelow(t *testing.T) { // --------------------------------------------------------------------------- func TestReaderRetention_ShorteningPrunesNewlyOutOfRangeChunks(t *testing.T) { - cat, _ := testCatalog(t) + cat, _ := smallWindowCatalog(t, 1) // one-chunk windows: window c == chunk c + wins := cat.Windows() - // Chunks 0..5 fully frozen (ledgers + events), with a real .pack on disk. Live - // chunk 6 (positional ⇒ through = chunk 5's last). + // Chunks 0..5 fully frozen, each its own terminal one-chunk window, with a + // real .pack on disk. Live chunk 6 (positional ⇒ through = chunk 5's last). for c := chunk.ID(0); c <= 5; c++ { - freezeKinds(t, cat, c, KindLedgers, KindEvents) + freezeKinds(t, cat, c, KindLedgers, KindEvents, KindTxHash) writeArtifact(t, cat.layout.LedgerPackPath(c)) + freezeCoverage(t, cat, wins.WindowID(c), c, c) } live := openLiveHotDB(t, cat, 6) t.Cleanup(func() { _ = live.Close() }) @@ -172,6 +347,9 @@ func TestReaderRetention_ShorteningPrunesNewlyOutOfRangeChunks(t *testing.T) { require.NoError(t, serr) assert.Equal(t, State(""), ledgers, "chunk %s key swept by the shortened floor", c) assert.NoFileExists(t, cat.layout.LedgerPackPath(c), "chunk %s pack swept", c) + _, hasFrozen, ferr := cat.FrozenCoverage(wins.WindowID(c)) + require.NoError(t, ferr) + assert.False(t, hasFrozen, "chunk %s window's index swept (wholly past the floor)", c) } // Chunks 4,5 (the new retention window) survive. for c := chunk.ID(4); c <= 5; c++ { @@ -183,3 +361,80 @@ func TestReaderRetention_ShorteningPrunesNewlyOutOfRangeChunks(t *testing.T) { assertQuiescent(t, cfg, cat, through) } + +// --------------------------------------------------------------------------- +// Scenario: the prune scan's redundant-input branch cleans a WIDENED-then- +// NARROWED window. A widening backfill re-froze (or left mid-write) a finalized +// window's chunk:c:txhash .bin keys, then retention narrowed back before the +// rebuild. The resolver schedules nothing (desired ⊆ stored), so re- +// materialization will never repair those keys; the prune scan's redundant- +// input branch demotes and sweeps them — "frozen" and "freezing" alike — because +// the window's terminal .idx provably covers their chunks. +// --------------------------------------------------------------------------- + +func TestReaderRetention_RedundantInputCleanupOfWidenedThenNarrowedWindow(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) // window 0 = chunks [0,3] + wins := cat.Windows() + + // Window 0 is finalized at terminal coverage [0,3] (the post-widening final + // .idx). ledgers/events frozen for all four chunks; a real .pack each. + for c := chunk.ID(0); c <= 3; c++ { + freezeKinds(t, cat, c, KindLedgers, KindEvents) + writeArtifact(t, cat.layout.LedgerPackPath(c)) + } + freezeCoverage(t, cat, 0, 0, 3) + fk, ok, err := cat.FrozenCoverage(0) + require.NoError(t, err) + require.True(t, ok) + require.True(t, wins.IsTerminalCoverage(fk), "window 0 is finalized at [0,3]") + + // The abandoned widening left behind chunk:c:txhash .bin keys inside this + // finalized window: chunk 1's is "frozen" (re-froze fully), chunk 2's is + // "freezing" (crashed mid-write). Both are provably redundant — the terminal + // .idx already covers chunks 1 and 2 — and the resolver never re-materializes + // a covered window. + freezeKinds(t, cat, 1, KindTxHash) // chunk:1:txhash = "frozen" + writeArtifact(t, cat.layout.TxHashBinPath(1)) + require.NoError(t, cat.MarkChunkFreezing(2, KindTxHash)) // chunk:2:txhash = "freezing" + writeArtifact(t, cat.layout.TxHashBinPath(2)) + + // The resolver schedules NOTHING for this window (desired [0,3] ⊆ stored + // [0,3]) — so these keys would never be repaired by re-materialization. + plan, err := resolve(resolveCfg(cat), 0, 3) + require.NoError(t, err) + require.True(t, plan.Empty(), "a covered finalized window schedules no work, got %+v", plan) + + // The prune scan's redundant-input branch sweeps both, frozen and freezing + // alike. A live chunk 4 keeps the window below the partition (not required for + // the prune scan, but matches steady state). + cfg, rec := lifecycleTestConfig(t, cat, 0) // full history; nothing past the floor + through := chunk.ID(3).LastLedger() + pops, err := eligiblePruneOps(cfg, cat, through) + require.NoError(t, err) + require.NotEmpty(t, pops, "the redundant chunk:txhash keys are scheduled for sweep") + for _, op := range pops { + require.NoError(t, op()) + } + require.False(t, rec.fired()) + + // Both redundant chunk:txhash keys (and their .bin files) are gone. + for _, c := range []chunk.ID{1, 2} { + st, serr := cat.State(c, KindTxHash) + require.NoError(t, serr) + assert.Equal(t, State(""), st, "chunk %s redundant txhash key swept", c) + assert.NoFileExists(t, cat.layout.TxHashBinPath(c), "chunk %s .bin swept", c) + } + // The window's terminal .idx coverage and the chunks' ledgers/events survive — the + // .idx is what serves these chunks now. + survives, ok, err := cat.FrozenCoverage(0) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, fk.Key, survives.Key, "the terminal .idx coverage is untouched") + for c := chunk.ID(0); c <= 3; c++ { + ledgers, serr := cat.State(c, KindLedgers) + require.NoError(t, serr) + assert.Equal(t, StateFrozen, ledgers, "chunk %s ledgers survives", c) + } + + assertQuiescent(t, cfg, cat, through) +} diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/startup.go b/cmd/stellar-rpc/internal/fullhistory/streaming/startup.go index d6456f14d..8f3451eb1 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/startup.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/startup.go @@ -22,7 +22,7 @@ import ( // hot DB, so the data is local by construction). runBackfill is the SAME // resolve + executePlan the lifecycle tick uses (Phase B); there is no // upfront producibility gate — each chunk's producibility is enforced -// lazily during its build by the cold ingest. +// lazily during its build by the buildTxhashIndex .bin precondition. // // 2. SERVE + INGEST. Open the resume chunk's hot DB (Issue 10), start captive // core (injected), launch the lifecycle goroutine (Issue 11) on a doorbell, diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/startup_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/startup_test.go index e936f63be..0517769c6 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/startup_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/startup_test.go @@ -80,18 +80,20 @@ func (c *fakeCore) OpenCore(_ context.Context, resumeLedger uint32) (LedgerGette } // recordingPlan captures the (rangeStart, rangeEnd) every backfill pass asked -// for, via the ExecConfig runChunk test seam — so a backfill test asserts the -// loop's range arithmetic without real cold I/O. Because resolve emits per-chunk -// builds, the lowest/highest chunk a pass touched bracket the requested range. +// for, via the ExecConfig runChunk/runIndex test seams — so a backfill test +// asserts the loop's range arithmetic without real cold I/O. Because resolve +// emits per-chunk builds, the lowest/highest chunk a pass touched bracket the +// requested range. type recordingPlan struct { mu sync.Mutex passes [][2]chunk.ID // {minChunk, maxChunk} per pass cur *[2]chunk.ID } -// note records a ChunkBuild's chunk into the current pass. runBackfill calls -// resolve then executePlan; we observe each ChunkBuild via the runChunk seam. A -// new pass is opened lazily on the first chunk after the previous pass closed. +// passSeams returns runChunk/runIndex seams that record the chunk range of the +// current pass. runBackfill calls resolve then executePlan; we observe each +// ChunkBuild. A new pass is opened lazily on the first chunk after the previous +// pass closed. func (r *recordingPlan) note(c chunk.ID) { r.mu.Lock() defer r.mu.Unlock() @@ -126,8 +128,8 @@ func (r *recordingPlan) snapshot() [][2]chunk.ID { // startTestConfig builds a StartConfig over a real catalog (genesis floor pinned // to GenesisLedger by default) with all external boundaries faked. recordPlan, -// when non-nil, wires the runChunk seam so backfill passes are recorded without -// cold I/O. +// when non-nil, wires the runChunk/runIndex seams so backfill passes are +// recorded without cold I/O. func startTestConfig( t *testing.T, cat *Catalog, tip *fakeTipBackend, core *fakeCore, recordPlan *recordingPlan, ) StartConfig { @@ -146,6 +148,7 @@ func startTestConfig( recordPlan.note(cb.Chunk) return nil } + exec.runIndex = func(_ context.Context, _ IndexBuild, _ ExecConfig) error { return nil } } life := LifecycleConfig{ExecConfig: exec, RetentionChunks: 0, Fatalf: (&fatalRecorder{}).fatalf} return StartConfig{ @@ -364,6 +367,7 @@ func TestBackfill_LongDowntimeRePass(t *testing.T) { mu.Unlock() return nil }, + runIndex: func(context.Context, IndexBuild, ExecConfig) error { return nil }, } cfg := StartConfig{ Exec: exec, diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/streaming_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/streaming_test.go index c59a1d318..f011953a3 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/streaming_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/streaming_test.go @@ -17,6 +17,8 @@ import ( "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/metastore" ) +const testCPI = 1000 // chunks_per_txhash_index for tests (the default) + func silentLogger() *supportlog.Entry { var buf bytes.Buffer log := supportlog.New() @@ -37,7 +39,10 @@ func testCatalog(t *testing.T) (*Catalog, string) { require.NoError(t, err) t.Cleanup(func() { _ = store.Close() }) - return NewCatalog(store, NewLayout(artifactRoot)), artifactRoot + windows, err := NewWindows(testCPI) + require.NoError(t, err) + + return NewCatalog(store, NewLayout(artifactRoot), windows), artifactRoot } // writeArtifact materializes a placeholder file at path (creating parents) so a @@ -48,18 +53,73 @@ func writeArtifact(t *testing.T, path string) { require.NoError(t, os.WriteFile(path, []byte("artifact"), 0o644)) } +// --------------------------------------------------------------------------- +// Window arithmetic. +// --------------------------------------------------------------------------- + +func TestNewWindows_Validation(t *testing.T) { + _, err := NewWindows(0) + require.Error(t, err) + + _, err = NewWindows(MaxChunksPerTxhashIndex + 1) + require.Error(t, err) + + w, err := NewWindows(MaxChunksPerTxhashIndex) + require.NoError(t, err) + require.Equal(t, MaxChunksPerTxhashIndex, w.ChunksPerIndex()) +} + +func TestWindowArithmetic(t *testing.T) { + w, err := NewWindows(1000) + require.NoError(t, err) + + tests := []struct { + name string + chunkID chunk.ID + wantWindow WindowID + wantFirst, wantHi chunk.ID + }{ + {"first chunk of window 0", 0, 0, 0, 999}, + {"mid window 0", 500, 0, 0, 999}, + {"last chunk of window 0", 999, 0, 0, 999}, + {"first chunk of window 1", 1000, 1, 1000, 1999}, + {"the doc's example chunk 5350", 5350, 5, 5000, 5999}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.wantWindow, w.WindowID(tc.chunkID)) + require.Equal(t, tc.wantFirst, w.FirstChunk(tc.wantWindow)) + require.Equal(t, tc.wantHi, w.LastChunk(tc.wantWindow)) + require.Equal(t, uint32(1000), w.ChunksIn()) + }) + } +} + +func TestIsTerminalCoverage(t *testing.T) { + w, err := NewWindows(1000) + require.NoError(t, err) + + // hi == window's last chunk => terminal. + require.True(t, w.IsTerminalCoverage(IndexCoverage{Window: 5, Lo: 5100, Hi: 5999})) + // hi below the last chunk => not terminal (still filling). + require.False(t, w.IsTerminalCoverage(IndexCoverage{Window: 5, Lo: 5100, Hi: 5349})) +} + // --------------------------------------------------------------------------- // Key <-> path bijection, both directions. // --------------------------------------------------------------------------- func TestKeyConstructorsMatchSpec(t *testing.T) { require.Equal(t, "chunk:00005350:ledgers", chunkKey(5350, KindLedgers)) + require.Equal(t, "chunk:00005350:events", chunkKey(5350, KindEvents)) + require.Equal(t, "chunk:00005350:txhash", chunkKey(5350, KindTxHash)) require.Equal(t, "hot:chunk:00005350", hotChunkKey(5350)) + require.Equal(t, "index:00000005:00005100:00005349", indexKey(5, 5100, 5349)) } func TestChunkKeyBijection(t *testing.T) { for _, kind := range AllKinds() { - for _, id := range []chunk.ID{0, 1, 999, 1000, 5350} { + for _, id := range []chunk.ID{0, 1, 999, 1000, 5350, chunk.ID(MaxChunksPerTxhashIndex)} { key := chunkKey(id, kind) gotID, gotKind, ok := parseChunkKey(key) require.True(t, ok, "parse %q", key) @@ -78,30 +138,60 @@ func TestHotKeyBijection(t *testing.T) { } } +func TestIndexKeyBijection(t *testing.T) { + cov := IndexCoverage{Window: 5, Lo: 5100, Hi: 5349} + key := indexKey(cov.Window, cov.Lo, cov.Hi) + got, ok := parseIndexKey(key) + require.True(t, ok) + require.Equal(t, cov.Window, got.Window) + require.Equal(t, cov.Lo, got.Lo) + require.Equal(t, cov.Hi, got.Hi) + require.Equal(t, key, got.Key) +} + func TestKeyToPathBijection(t *testing.T) { l := NewLayout("/data") // The doc's directory-layout examples. require.Equal(t, "/data/ledgers/00005/00005350.pack", l.LedgerPackPath(5350)) + require.Equal(t, "/data/txhash/raw/00005/00005350.bin", l.TxHashBinPath(5350)) + require.Equal(t, []string{ + "/data/events/00005/00005350-events.pack", + "/data/events/00005/00005350-index.pack", + "/data/events/00005/00005350-index.hash", + }, l.EventsPaths(5350)) require.Equal(t, "/data/hot/00005350", l.HotChunkPath(5350)) + + cov := IndexCoverage{Window: 5, Lo: 5100, Hi: 5349} + require.Equal(t, "/data/txhash/index/00000005", l.IndexWindowDir(cov.Window)) + require.Equal(t, "/data/txhash/index/00000005/00005100-00005349.idx", l.IndexFilePath(cov)) } func TestParseRejectsMalformed(t *testing.T) { bad := []string{ - "chunk:5350:ledgers", // not 8-digit padded - "chunk:00005350:bogus", // unknown kind - "chunk:00005350", // missing kind - "hot:chunk:5350", // not padded - "unrelated:key", // wrong family + "chunk:5350:ledgers", // not 8-digit padded + "chunk:00005350:bogus", // unknown kind + "chunk:00005350", // missing kind + "hot:chunk:5350", // not padded + "index:00000005:00005100", // too few segments + "index:5:5100:5349", // not padded + "unrelated:key", // wrong family } for _, key := range bad { _, _, okChunk := parseChunkKey(key) _, okHot := parseHotChunkKey(key) - require.False(t, okChunk && okHot, "expected %q to be rejected by all parsers", key) + _, okIdx := parseIndexKey(key) + require.False(t, okChunk && okHot && okIdx, "expected %q to be rejected by all parsers", key) } - // Specific rejection. + // Specific rejections. _, _, ok := parseChunkKey("chunk:00005350:bogus") require.False(t, ok) + _, ok2 := parseIndexKey("index:00000005:00005349:00005100") // lo > hi + require.False(t, ok2) +} + +func TestIndexKeyPanicsOnLoGreaterThanHi(t *testing.T) { + require.Panics(t, func() { indexKey(5, 5349, 5100) }) } // --------------------------------------------------------------------------- @@ -157,6 +247,21 @@ func TestRoundTripHotKeys(t *testing.T) { require.NoError(t, cat.DeleteHotKey(7)) } +func TestRoundTripIndexKey(t *testing.T) { + cat, _ := testCatalog(t) + + cov, err := cat.MarkIndexFreezing(5, 5100, 5349) + require.NoError(t, err) + require.Equal(t, StateFreezing, cov.State) + + keys, err := cat.IndexKeys(5) + require.NoError(t, err) + require.Len(t, keys, 1) + require.Equal(t, StateFreezing, keys[0].State) + require.Equal(t, chunk.ID(5100), keys[0].Lo) + require.Equal(t, chunk.ID(5349), keys[0].Hi) +} + func TestConfigPins(t *testing.T) { cat, _ := testCatalog(t) @@ -169,6 +274,16 @@ func TestConfigPins(t *testing.T) { require.NoError(t, err) require.True(t, ok) require.Equal(t, uint32(2), el) + + _, ok, err = cat.ChunksPerTxhashIndex() + require.NoError(t, err) + require.False(t, ok) + + require.NoError(t, cat.PutChunksPerTxhashIndex(testCPI)) + cpi, ok, err := cat.ChunksPerTxhashIndex() + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, uint32(testCPI), cpi) } // --------------------------------------------------------------------------- @@ -196,40 +311,280 @@ func TestChunkArtifactKeys(t *testing.T) { cat, _ := testCatalog(t) require.NoError(t, cat.MarkChunkFreezing(1, KindLedgers)) - require.NoError(t, cat.FlipChunkFrozen(2, KindLedgers)) + require.NoError(t, cat.FlipChunkFrozen(2, KindEvents)) refs, err := cat.ChunkArtifactKeys() require.NoError(t, err) require.Len(t, refs, 2) - // Sorted by key: chunk:00000001 before chunk:00000002. + // Sorted by key: chunk:00000001:ledgers before chunk:00000002:events. require.Equal(t, ArtifactRef{Chunk: 1, Kind: KindLedgers, State: StateFreezing}, refs[0]) - require.Equal(t, ArtifactRef{Chunk: 2, Kind: KindLedgers, State: StateFrozen}, refs[1]) + require.Equal(t, ArtifactRef{Chunk: 2, Kind: KindEvents, State: StateFrozen}, refs[1]) } // --------------------------------------------------------------------------- -// Sweep: the deletion body. +// frozenCoverage: uniqueness + none-case. // --------------------------------------------------------------------------- -func TestSweepChunkArtifacts(t *testing.T) { +func TestFrozenCoverageNone(t *testing.T) { + cat, _ := testCatalog(t) + + _, ok, err := cat.FrozenCoverage(5) + require.NoError(t, err) + require.False(t, ok, "no coverage at all") + + // A "freezing" coverage is not frozen. + _, err = cat.MarkIndexFreezing(5, 5100, 5349) + require.NoError(t, err) + _, ok, err = cat.FrozenCoverage(5) + require.NoError(t, err) + require.False(t, ok, "freezing is not frozen") +} + +func TestFrozenCoverageUnique(t *testing.T) { + cat, _ := testCatalog(t) + + cov, err := cat.MarkIndexFreezing(5, 5100, 5349) + require.NoError(t, err) + require.NoError(t, cat.CommitIndex(cov)) + + got, ok, err := cat.FrozenCoverage(5) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, chunk.ID(5100), got.Lo) + require.Equal(t, chunk.ID(5349), got.Hi) +} + +func TestFrozenCoverageDetectsTwoFrozen(t *testing.T) { + cat, _ := testCatalog(t) + + // Force the invariant-violating state directly through the store: two + // frozen coverages in one window. FrozenCoverage must detect it, not pick + // one. + require.NoError(t, cat.store.Put(indexKey(5, 5100, 5349), string(StateFrozen))) + require.NoError(t, cat.store.Put(indexKey(5, 5100, 5350), string(StateFrozen))) + + _, _, err := cat.FrozenCoverage(5) + require.Error(t, err) + require.Contains(t, err.Error(), "uniqueness invariant violated") +} + +// --------------------------------------------------------------------------- +// Index commit batch atomicity: promote + demote + terminal land together. +// --------------------------------------------------------------------------- + +func TestCommitIndexPromoteAndDemote(t *testing.T) { + cat, _ := testCatalog(t) + + // First coverage [5100,5349] becomes frozen. + cov1, err := cat.MarkIndexFreezing(5, 5100, 5349) + require.NoError(t, err) + require.NoError(t, cat.CommitIndex(cov1)) + + // Next boundary: [5100,5350]. Commit promotes it and demotes [5100,5349]. + cov2, err := cat.MarkIndexFreezing(5, 5100, 5350) + require.NoError(t, err) + require.NoError(t, cat.CommitIndex(cov2)) + + // Exactly one frozen coverage — the new one. + frozen, ok, err := cat.FrozenCoverage(5) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, chunk.ID(5350), frozen.Hi) + + // The predecessor is now "pruning". + keys, err := cat.IndexKeys(5) + require.NoError(t, err) + states := map[string]State{} + for _, k := range keys { + states[k.Key] = k.State + } + require.Equal(t, StatePruning, states[indexKey(5, 5100, 5349)]) + require.Equal(t, StateFrozen, states[indexKey(5, 5100, 5350)]) +} + +func TestCommitIndexTerminalDemotesTxhashKeys(t *testing.T) { + cat, _ := testCatalog(t) + + // Window 0 (chunks 0..999). Mark a few chunks' .bin frozen. + for _, c := range []chunk.ID{0, 1, 500, 999} { + require.NoError(t, cat.MarkChunkFreezing(c, KindTxHash)) + require.NoError(t, cat.FlipChunkFrozen(c, KindTxHash)) + } + // A non-txhash key in the window must NOT be demoted. + require.NoError(t, cat.FlipChunkFrozen(500, KindLedgers)) + + // Terminal build covers the whole window [0,999] => hi == last chunk. + cov, err := cat.MarkIndexFreezing(0, 0, 999) + require.NoError(t, err) + require.True(t, cat.windows.IsTerminalCoverage(cov)) + require.NoError(t, cat.CommitIndex(cov)) + + // Every present txhash key in the window demoted to "pruning". + for _, c := range []chunk.ID{0, 1, 500, 999} { + s, err := cat.State(c, KindTxHash) + require.NoError(t, err) + require.Equal(t, StatePruning, s, "chunk %d txhash", c) + } + // The ledgers key is untouched. + ledgers, err := cat.State(500, KindLedgers) + require.NoError(t, err) + require.Equal(t, StateFrozen, ledgers) + + // And the index coverage is frozen. + frozen, ok, err := cat.FrozenCoverage(0) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, chunk.ID(999), frozen.Hi) +} + +func TestCommitIndexNonTerminalLeavesTxhashKeys(t *testing.T) { + cat, _ := testCatalog(t) + + require.NoError(t, cat.MarkChunkFreezing(0, KindTxHash)) + require.NoError(t, cat.FlipChunkFrozen(0, KindTxHash)) + + // Non-terminal: hi (5) < window's last chunk (999). + cov, err := cat.MarkIndexFreezing(0, 0, 5) + require.NoError(t, err) + require.False(t, cat.windows.IsTerminalCoverage(cov)) + require.NoError(t, cat.CommitIndex(cov)) + + // txhash key NOT demoted — the window is still filling. + s, err := cat.State(0, KindTxHash) + require.NoError(t, err) + require.Equal(t, StateFrozen, s) +} + +// CommitIndex's finalization is one atomic batch: promote-new + demote-prev (+ +// demote terminal txhash keys) land together or not at all. We prove it by +// fault-injecting a failure INSIDE the batch callback (which makes metastore +// drop the whole batch) and then asserting NOTHING the batch would have written +// is observable: the predecessor is still the unique frozen coverage, the new +// coverage is still "freezing", and the in-window txhash keys are still frozen. +// Rewriting CommitIndex as separate non-atomic Puts would leave some of those +// writes durable here and fail this test. +func TestCommitIndexBatchIsAtomic(t *testing.T) { + cat, _ := testCatalog(t) + + // Predecessor [0,499] frozen. + prev, err := cat.MarkIndexFreezing(0, 0, 499) + require.NoError(t, err) + require.NoError(t, cat.CommitIndex(prev)) + + // A terminal txhash input that a successful terminal commit would demote. + require.NoError(t, cat.MarkChunkFreezing(0, KindTxHash)) + require.NoError(t, cat.FlipChunkFrozen(0, KindTxHash)) + + // The new TERMINAL coverage [0,999] — exercises all three batch puts at once. + cov, err := cat.MarkIndexFreezing(0, 0, 999) + require.NoError(t, err) + require.True(t, cat.windows.IsTerminalCoverage(cov)) + + // Fail the batch from inside its callback: metastore drops the whole batch. + cat.hooks.failCommitBatch = func() bool { return true } + err = cat.CommitIndex(cov) + require.Error(t, err, "CommitIndex must surface the injected batch failure") + cat.hooks.failCommitBatch = nil + + // All-or-nothing: the failed batch wrote NOTHING. + // (1) The predecessor is still the window's unique frozen coverage. + frozen, ok, err := cat.FrozenCoverage(0) + require.NoError(t, err, "must not observe two frozen coverages") + require.True(t, ok) + require.Equal(t, chunk.ID(499), frozen.Hi, "predecessor still the unique frozen coverage") + // (2) The new coverage is still merely "freezing" (its promote did not land). + v, ok, err := cat.Get(cov.Key) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, string(StateFreezing), v) + // (3) The terminal txhash input was not demoted. + s, err := cat.State(0, KindTxHash) + require.NoError(t, err) + require.Equal(t, StateFrozen, s) + + // And a clean re-commit (no fault) lands the whole batch. + require.NoError(t, cat.CommitIndex(cov)) + frozen, ok, err = cat.FrozenCoverage(0) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, chunk.ID(999), frozen.Hi) + prevState, ok, err := cat.Get(prev.Key) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, string(StatePruning), prevState) + s, err = cat.State(0, KindTxHash) + require.NoError(t, err) + require.Equal(t, StatePruning, s) +} + +// CommitIndex is documented crash-safe to re-run on the same coverage (the +// hasPrev && prev.Key == cov.Key branch in protocol.go): a re-commit of an +// already-landed batch must be a no-op overwrite, leaving exactly one frozen +// coverage and nothing demoted against itself. This exercises that branch, +// which no other test touched. +func TestCommitIndexReCommitIsIdempotent(t *testing.T) { cat, _ := testCatalog(t) - // Set up a frozen ledgers for chunk 3, with a real file. + cov, err := cat.MarkIndexFreezing(5, 5100, 5349) + require.NoError(t, err) + require.NoError(t, cat.CommitIndex(cov)) + + // Second commit on the SAME coverage: the predecessor IS cov, so the demote + // branch is skipped and the promote is an idempotent overwrite. + require.NoError(t, cat.CommitIndex(cov)) + + // Exactly one frozen coverage remains, and it is cov — not demoted against + // itself, no debris. + keys, err := cat.IndexKeys(5) + require.NoError(t, err) + require.Len(t, keys, 1, "exactly one coverage key in the window") + require.Equal(t, cov.Key, keys[0].Key) + require.Equal(t, StateFrozen, keys[0].State, "re-commit must leave it frozen, not pruning") + + frozen, ok, err := cat.FrozenCoverage(5) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, chunk.ID(5349), frozen.Hi) +} + +// --------------------------------------------------------------------------- +// Sweeps: the two deletion bodies. +// --------------------------------------------------------------------------- + +func TestSweepChunkArtifacts(t *testing.T) { + cat, root := testCatalog(t) + _ = root + + // Set up a frozen ledgers + frozen events for chunk 3, with real files. lfsPath := cat.layout.LedgerPackPath(3) writeArtifact(t, lfsPath) require.NoError(t, cat.MarkChunkFreezing(3, KindLedgers)) require.NoError(t, cat.FlipChunkFrozen(3, KindLedgers)) + eventsPaths := cat.layout.EventsPaths(3) + for _, p := range eventsPaths { + writeArtifact(t, p) + } + require.NoError(t, cat.MarkChunkFreezing(3, KindEvents)) + require.NoError(t, cat.FlipChunkFrozen(3, KindEvents)) + refs := []ArtifactRef{ {Chunk: 3, Kind: KindLedgers, State: StateFrozen}, + {Chunk: 3, Kind: KindEvents, State: StateFrozen}, } require.NoError(t, cat.SweepChunkArtifacts(refs)) - // File gone. + // Files gone. require.NoFileExists(t, lfsPath) - // Key gone (key absent => file gone). - s, err := cat.State(3, KindLedgers) - require.NoError(t, err) - require.Equal(t, State(""), s) + for _, p := range eventsPaths { + require.NoFileExists(t, p) + } + // Keys gone (key absent => file gone). + for _, kind := range []Kind{KindLedgers, KindEvents} { + s, err := cat.State(3, kind) + require.NoError(t, err) + require.Equal(t, State(""), s) + } } func TestSweepChunkArtifactsIdempotentOnMissingFiles(t *testing.T) { @@ -246,6 +601,44 @@ func TestSweepChunkArtifactsIdempotentOnMissingFiles(t *testing.T) { require.Equal(t, State(""), s) } +func TestSweepIndexKey(t *testing.T) { + cat, _ := testCatalog(t) + + cov, err := cat.MarkIndexFreezing(5, 5100, 5349) + require.NoError(t, err) + idxPath := cat.layout.IndexFilePath(cov) + writeArtifact(t, idxPath) + require.NoError(t, cat.CommitIndex(cov)) + + // Re-read as frozen for the sweep. + frozen, ok, err := cat.FrozenCoverage(5) + require.NoError(t, err) + require.True(t, ok) + + require.NoError(t, cat.SweepIndexKey(frozen)) + + require.NoFileExists(t, idxPath) + keys, err := cat.IndexKeys(5) + require.NoError(t, err) + require.Empty(t, keys, "key absent => file gone") +} + +func TestSweepIndexKeyFreezingDebris(t *testing.T) { + cat, _ := testCatalog(t) + + // A crashed attempt: "freezing" key with a partial file. + cov, err := cat.MarkIndexFreezing(5, 5100, 5349) + require.NoError(t, err) + idxPath := cat.layout.IndexFilePath(cov) + writeArtifact(t, idxPath) + + require.NoError(t, cat.SweepIndexKey(cov)) + require.NoFileExists(t, idxPath) + keys, err := cat.IndexKeys(5) + require.NoError(t, err) + require.Empty(t, keys) +} + // --------------------------------------------------------------------------- // CRASH-SAFETY tests — interpose at the two dangerous instants and assert both // invariants: (A) every file on disk has its meta key; (B) key absent => file @@ -277,11 +670,27 @@ func assertEveryFileHasKey(t *testing.T, cat *Catalog, root string) { func keyForArtifactFile(t *testing.T, cat *Catalog, path string) (string, bool) { t.Helper() + // Index file: txhash/index/{w}/{lo}-{hi}.idx + dir := filepath.Dir(path) + base := filepath.Base(path) + if filepath.Ext(base) == ".idx" { + w, errW := parsePadded(filepath.Base(dir)) + require.NoError(t, errW) + name := strings.TrimSuffix(base, ".idx") + loStr, hiStr, found := strings.Cut(name, "-") + require.True(t, found, "bad idx name %q", base) + lo, errLo := parsePadded(loStr) + require.NoError(t, errLo) + hi, errHi := parsePadded(hiStr) + require.NoError(t, errHi) + return indexKey(WindowID(w), chunk.ID(lo), chunk.ID(hi)), true + } + // Per-chunk files: identify by reconstructing each kind's path for the // chunk id embedded in the filename (the leading 8-digit stem, before any - // ".pack" suffix). - base := filepath.Base(path) + // "-events"/".pack"/".bin" suffix). stem, _, _ := strings.Cut(base, ".") + stem, _, _ = strings.Cut(stem, "-") cid, errC := parsePadded(stem) require.NoError(t, errC) c := chunk.ID(cid) @@ -296,8 +705,8 @@ func keyForArtifactFile(t *testing.T, cat *Catalog, path string) (string, bool) // Crash instant (i): file written but key not yet flipped to "frozen". // // Reproduces the mark-then-write protocol stopped after barrierNewFile but -// before FlipChunkFrozen. The key is "freezing", the file is on disk. INV-3 -// disk->meta must still hold: the file is reachable from its key. +// before FlipChunkFrozen / CommitIndex. The key is "freezing", the file is on +// disk. INV-3 disk->meta must still hold: the file is reachable from its key. func TestCrashSafety_FileWrittenKeyNotFlipped(t *testing.T) { cat, root := testCatalog(t) @@ -309,36 +718,67 @@ func TestCrashSafety_FileWrittenKeyNotFlipped(t *testing.T) { require.NoError(t, barrierNewFile(lfsPath, true)) // <-- crash here: no FlipChunkFrozen. + // Index: mark freezing, write+barrier the file, "crash" before CommitIndex. + cov, err := cat.MarkIndexFreezing(5, 5100, 5349) + require.NoError(t, err) + idxPath := cat.layout.IndexFilePath(cov) + writeArtifact(t, idxPath) + require.NoError(t, barrierNewFile(idxPath, true)) + // <-- crash here: no CommitIndex. + // INV-3 (disk -> meta): every file on disk has its key. assertEveryFileHasKey(t, cat, root) - // The key is observable as "freezing" — the recovery signal. + // The keys are observable as "freezing" — the recovery signal. s, err := cat.State(4, KindLedgers) require.NoError(t, err) require.Equal(t, StateFreezing, s) + + keys, err := cat.IndexKeys(5) + require.NoError(t, err) + require.Len(t, keys, 1) + require.Equal(t, StateFreezing, keys[0].State) + + // Recovery for the index "freezing" debris is the sweep: delete file + key. + require.NoError(t, cat.SweepIndexKey(keys[0])) + require.NoFileExists(t, idxPath) + // And after the sweep, INV-3 still holds for what remains. + assertEveryFileHasKey(t, cat, root) } // Crash instant (ii): inside the REAL sweep, between the durable unlink and the // key delete. // -// We fire a hook from INSIDE SweepChunkArtifacts at the exact instant after -// unlink+fsync and before the key-delete batch, and assert the EXIT-side -// invariant there: file gone => key still present. If the key delete were -// reordered ahead of the unlink, the file would still be on disk when the hook -// fires and the in-hook assertion fails. +// Earlier this test hand-replayed the sweep steps and stopped before the final +// delete — which stays green no matter how SweepChunkArtifacts orders its own +// steps, because the test never runs that code. We now fire a hook from INSIDE +// SweepChunkArtifacts at the exact instant after unlink+fsync and before the +// key-delete batch, and assert the EXIT-side invariant there: file gone => +// key still present. If the key delete were reordered ahead of the unlink, the +// file would still be on disk when the hook fires and the in-hook assertion +// fails. (Verified by experiment: moving the delete batch above the unlink loop +// turns this test red.) func TestCrashSafety_SweepUnlinkDurableKeyNotDeleted(t *testing.T) { cat, root := testCatalog(t) - // A frozen ledgers (one file) for chunk 6. + // A frozen ledgers (one file) + frozen events (three files) for chunk 6. lfsPath := cat.layout.LedgerPackPath(6) writeArtifact(t, lfsPath) require.NoError(t, cat.MarkChunkFreezing(6, KindLedgers)) require.NoError(t, cat.FlipChunkFrozen(6, KindLedgers)) + eventsPaths := cat.layout.EventsPaths(6) + for _, p := range eventsPaths { + writeArtifact(t, p) + } + require.NoError(t, cat.MarkChunkFreezing(6, KindEvents)) + require.NoError(t, cat.FlipChunkFrozen(6, KindEvents)) + refs := []ArtifactRef{ {Chunk: 6, Kind: KindLedgers, State: StateFrozen}, + {Chunk: 6, Kind: KindEvents, State: StateFrozen}, } - allPaths := []string{lfsPath} + allPaths := append([]string{lfsPath}, eventsPaths...) // The hook fires once, between the durable unlink and the key delete. fired := false @@ -370,7 +810,81 @@ func TestCrashSafety_SweepUnlinkDurableKeyNotDeleted(t *testing.T) { } } -// Per-chunk never-unlink-under-frozen-key assertion: fire INSIDE +// Index-side twin of the EXIT-invariant test: fire INSIDE SweepIndexKey, between +// the durable unlink and the key delete, and assert file-gone => key-present. +func TestCrashSafety_SweepIndexUnlinkDurableKeyNotDeleted(t *testing.T) { + cat, root := testCatalog(t) + + cov, err := cat.MarkIndexFreezing(5, 5100, 5349) + require.NoError(t, err) + idxPath := cat.layout.IndexFilePath(cov) + writeArtifact(t, idxPath) + require.NoError(t, cat.CommitIndex(cov)) + + frozen, ok, err := cat.FrozenCoverage(5) + require.NoError(t, err) + require.True(t, ok) + + fired := false + cat.hooks.beforeKeyDelete = func() { + fired = true + require.NoFileExists(t, idxPath, "EXIT invariant: idx file must be unlinked before its key is deleted") + ok, err := cat.Has(frozen.Key) + require.NoError(t, err) + require.True(t, ok, "coverage key must still exist at the pre-delete instant") + } + + require.NoError(t, cat.SweepIndexKey(frozen)) + require.True(t, fired, "beforeKeyDelete hook must have fired inside SweepIndexKey") + + require.NoFileExists(t, idxPath) + keys, err := cat.IndexKeys(5) + require.NoError(t, err) + require.Empty(t, keys) + assertEveryFileHasKey(t, cat, root) +} + +// Never-unlink-under-a-frozen-key, asserted at the instant it matters: fire +// INSIDE SweepIndexKey between the frozen->pruning demote and the unlink, and +// require the durable value to be "pruning" — never "frozen". If the demote +// were dropped (or moved after the unlink), the value here would still be +// "frozen" and this fails. The same hook also confirms the file is still on +// disk at this instant (the demote precedes any unlink). +func TestSweepIndex_NeverUnlinksUnderFrozenKey(t *testing.T) { + cat, _ := testCatalog(t) + + cov, err := cat.MarkIndexFreezing(5, 5100, 5349) + require.NoError(t, err) + idxPath := cat.layout.IndexFilePath(cov) + writeArtifact(t, idxPath) + require.NoError(t, cat.CommitIndex(cov)) + + frozen, ok, err := cat.FrozenCoverage(5) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, StateFrozen, frozen.State) + + fired := false + cat.hooks.beforeUnlink = func() { + fired = true + v, ok, err := cat.Get(frozen.Key) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, string(StatePruning), v, + "value at the pre-unlink instant must be pruning, never frozen") + require.FileExists(t, idxPath, "file must still be on disk before the unlink") + } + + require.NoError(t, cat.SweepIndexKey(frozen)) + require.True(t, fired, "beforeUnlink hook must have fired inside SweepIndexKey") + + require.NoFileExists(t, idxPath) + keys, err := cat.IndexKeys(5) + require.NoError(t, err) + require.Empty(t, keys) +} + +// Per-chunk twin of the never-unlink-under-frozen-key assertion: fire INSIDE // SweepChunkArtifacts between the demote batch and the unlinks; every "frozen" // ref must read "pruning" by then. Dropping the demote batch leaves them // "frozen" here and this fails. diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/txindex.go b/cmd/stellar-rpc/internal/fullhistory/streaming/txindex.go new file mode 100644 index 000000000..5c72602ab --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/txindex.go @@ -0,0 +1,267 @@ +package streaming + +import ( + "context" + "errors" + "fmt" + "os" + + "github.com/stellar/streamhash" + + supportlog "github.com/stellar/go-stellar-sdk/support/log" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash" +) + +// IndexBuild names one tx-hash index rebuild: the window and the coverage +// [Lo, Hi] to materialize. Terminal-ness (Hi == window's last chunk) is +// DERIVED at build time (Windows.IsTerminalCoverage), never carried as a field +// — the spec's "marked nowhere". It mirrors the resolver's plan value +// (design-docs/full-history-streaming-workflow.md "Postcondition-driven +// scheduling"). +type IndexBuild struct { + Window WindowID + Lo, Hi chunk.ID +} + +// BuildConfig is the dependency bundle buildTxhashIndex/buildThenSweep read: the +// catalog (key state, path layout, window arithmetic, the one-write protocol's +// CommitIndex + the sweeps) and a logger. BuildOpts are optional streamhash +// build options threaded into the merged txhash.BuildColdIndex — the cold +// payload/fingerprint/metadata options are pinned by BuildColdIndex itself and +// cannot be overridden here (see cold_index.go's "format options go last"). +type BuildConfig struct { + Catalog *Catalog + Logger *supportlog.Entry + + // BuildOpts are extra streamhash.BuildOptions (e.g. WithWorkers) passed + // through to BuildColdIndex. Optional; the cold format options always win. + BuildOpts []streamhash.BuildOption +} + +func (cfg BuildConfig) validate() error { + if cfg.Catalog == nil { + return errors.New("streaming: BuildConfig.Catalog is nil") + } + if cfg.Logger == nil { + return errors.New("streaming: BuildConfig.Logger is nil") + } + return nil +} + +// buildTxhashIndex is the tx-hash rolling rebuild (design-docs rule 3 / +// gettransaction-full-history-design.md §7.2). It rebuilds window w's index at +// coverage [lo, hi] from scratch, running the one-write protocol with +// CommitIndex's batch-commit extension. The four steps map exactly onto the +// spec: +// +// 1. Skip check — if w's unique "frozen" coverage already equals [lo, hi], +// return. This also short-circuits re-scheduled builds of finalized windows +// (a full-window frozen coverage is terminal by definition), which must NOT +// demand .bin inputs the terminal commit's sweep has since deleted. The skip +// precedes the precondition for exactly that reason. +// 2. Precondition + mark — every chunk in [lo, hi] must have its +// chunk:{c}:txhash key "frozen" (its .bin exists); fail loudly BEFORE any +// key is touched (the executor's done-channels broadcast completion, not +// success — this is the backstop). Then MarkIndexFreezing puts the coverage +// key "freezing" (an idempotent overwrite of a crashed attempt's debris). +// 3. Write — k-way merge the .bin files for [lo, hi] into the .idx via the +// merged txhash.BuildColdIndex (create-or-truncate at the coverage's +// canonical path; minLedger anchored at lo.FirstLedger()), then fsync the +// file + its dir (+ the grandparent dirent when this build created the +// window dir). +// 4. Commit — Catalog.CommitIndex: one atomic synced batch promoting this +// coverage to "frozen", demoting the predecessor to "pruning", and — iff +// terminal — demoting every chunk:{c}:txhash key in the window to "pruning". +// +// buildTxhashIndex never deletes a file: file removal is exclusively the sweeps' +// job (buildThenSweep / the tick's prune scan). The crash matrix (§7.6) is +// covered by the four-step ordering: a crash before step 4 leaves the +// predecessor frozen and the new coverage as "freezing" debris; a crash after +// leaves the new coverage frozen and the demoted keys as "pruning" sweep work. +func buildTxhashIndex(ctx context.Context, w WindowID, lo, hi chunk.ID, cfg BuildConfig) error { + if err := cfg.validate(); err != nil { + return err + } + if lo > hi { + return fmt.Errorf("streaming: buildTxhashIndex window %s lo %s > hi %s", w, lo, hi) + } + cat := cfg.Catalog + + // Step 1 — skip check. If the window's unique frozen coverage already covers + // exactly [lo, hi], there is nothing to write; leftover transient keys are + // the sweeps' job, not the builder's. Checked FIRST so a re-scheduled build + // of a finalized window (whose .bin inputs the terminal sweep deleted) never + // reaches the precondition below. + frozen, hasFrozen, err := cat.FrozenCoverage(w) + if err != nil { + return fmt.Errorf("streaming: buildTxhashIndex read frozen coverage window %s: %w", w, err) + } + if hasFrozen && frozen.Lo == lo && frozen.Hi == hi { + cfg.Logger.Debugf("buildTxhashIndex: window %s coverage [%s,%s] already frozen; skipping", w, lo, hi) + return nil + } + + // Step 2a — loud precondition, checked BEFORE any key is touched. Every chunk + // in [lo, hi] must have its .bin frozen. + inputs, err := cat.txhashBinInputs(w, lo, hi) + if err != nil { + return err + } + + // Step 2b — mark the coverage "freezing" (idempotent overwrite of any crashed + // attempt's debris at this name). + cov, err := cat.MarkIndexFreezing(w, lo, hi) + if err != nil { + return fmt.Errorf("streaming: buildTxhashIndex mark freezing %s: %w", indexKey(w, lo, hi), err) + } + + // Test-only observation point at the post-mark / pre-write instant (§7.6 + // "after step 2, mid step 3"): new coverage "freezing", predecessor still the + // unique frozen coverage, no resolvable in-flight name. No-op in production. + cat.hooks.fireAfterIndexMark() + + // Step 3 — write the coverage's .idx from scratch. txhash.BuildColdIndex + // create-or-truncates outputPath (streamhash's SortedBuilder), so a crashed + // attempt's partial is overwritten wholesale, never appended. The window dir + // is created on demand; detect whether THIS build created it so barrierNewFile + // can fsync the grandparent dirent (txhash/index/) on a window's first build. + idxPath := cat.layout.IndexFilePath(cov) + windowDir := cat.layout.IndexWindowDir(w) + _, statErr := os.Stat(windowDir) + newWindowDir := errors.Is(statErr, os.ErrNotExist) + if statErr != nil && !newWindowDir { + return fmt.Errorf("streaming: buildTxhashIndex stat window dir %s: %w", windowDir, statErr) + } + if newWindowDir { + if mkErr := os.MkdirAll(windowDir, 0o755); mkErr != nil { + return fmt.Errorf("streaming: buildTxhashIndex mkdir %s: %w", windowDir, mkErr) + } + } + + minLedger := lo.FirstLedger() + maxLedger := hi.LastLedger() + if berr := txhash.BuildColdIndex(ctx, inputs, idxPath, minLedger, maxLedger, cfg.BuildOpts...); berr != nil { + return fmt.Errorf("streaming: buildTxhashIndex build window %s coverage [%s,%s]: %w", w, lo, hi, berr) + } + + // Durability barrier: fsync the .idx + its dir (+ the grandparent on a new + // window dir) BEFORE the coverage flips to "frozen" in CommitIndex. + if barErr := barrierNewFile(idxPath, newWindowDir); barErr != nil { + return fmt.Errorf("streaming: buildTxhashIndex fsync barrier %s: %w", idxPath, barErr) + } + + // Step 4 — commit: one atomic synced batch (promote new -> "frozen", demote + // predecessor -> "pruning", and iff terminal demote every in-window + // chunk:{c}:txhash -> "pruning"). CommitIndex re-derives the predecessor and + // terminal-ness from durable state, so it is safe to re-run after a crash. + if cerr := cat.CommitIndex(cov); cerr != nil { + return fmt.Errorf("streaming: buildTxhashIndex commit window %s coverage [%s,%s]: %w", w, lo, hi, cerr) + } + return nil +} + +// buildThenSweep is how the executor runs an IndexBuild (design-docs rule 4's +// eager call site / §7.4): buildTxhashIndex, then the standard sweeps for THIS +// window's "pruning" coverages and (terminal) demoted .bin inputs. The commit +// batch only demotes keys; this brings the demoted files back without waiting +// for a lifecycle tick. +// +// The sweep is WINDOW-LOCAL — it walks only b.Window's index keys and only the +// chunk:{c}:txhash keys in b.Window — so concurrent windows' sweeps touch +// disjoint keys and files (the executor holds at most one IndexBuild per +// window). As a bonus it finishes any "pruning" leftovers a previous crashed +// pass left in the same window. A crash anywhere mid-sweep leaves "pruning" +// keys the next build (or the tick's prune scan) re-runs — the same convergence +// story regardless of caller. +func buildThenSweep(ctx context.Context, b IndexBuild, cfg BuildConfig) error { + if err := cfg.validate(); err != nil { + return err + } + cat := cfg.Catalog + + if err := buildTxhashIndex(ctx, b.Window, b.Lo, b.Hi, cfg); err != nil { + return err + } + + // Test-only observation point at the post-commit / pre-sweep instant (§7.6 + // "after step 4, before the eager sweep"). No-op in production. + cat.hooks.fireAfterCommitBeforeSweep() + + // Sweep this window's superseded coverages ("pruning" index keys). The + // just-frozen coverage is "frozen" and skipped; a predecessor demoted by the + // commit (or by a previous crashed pass) is "pruning" and removed. + covs, err := cat.IndexKeys(b.Window) + if err != nil { + return fmt.Errorf("streaming: buildThenSweep read index keys window %s: %w", b.Window, err) + } + for _, cov := range covs { + if cov.State != StatePruning { + continue + } + if serr := cat.SweepIndexKey(cov); serr != nil { + return fmt.Errorf("streaming: buildThenSweep sweep coverage %s: %w", cov.Key, serr) + } + } + + // Sweep this window's demoted .bin inputs (terminal build) in one batched + // pass. Non-terminal builds demote no inputs, so demoted is empty and + // SweepChunkArtifacts is a no-op. + demoted, err := cat.windowDemotedTxhashRefs(b.Window) + if err != nil { + return err + } + if serr := cat.SweepChunkArtifacts(demoted); serr != nil { + return fmt.Errorf("streaming: buildThenSweep sweep demoted inputs window %s: %w", b.Window, serr) + } + return nil +} + +// txhashBinInputs returns the .bin paths for chunks [lo, hi], enforcing rule +// 3's loud precondition: every chunk in the range MUST have its chunk:{c}:txhash +// key "frozen" (its .bin exists and is durable, trusted blindly). It returns an +// error naming the first offending chunk and produces NO partial inputs on +// failure — the precondition is checked before any write in buildTxhashIndex. +func (c *Catalog) txhashBinInputs(w WindowID, lo, hi chunk.ID) ([]string, error) { + inputs := make([]string, 0, uint32(hi)-uint32(lo)+1) + for cid := lo; ; cid++ { + state, err := c.State(cid, KindTxHash) + if err != nil { + return nil, fmt.Errorf("streaming: buildTxhashIndex read txhash state chunk %s: %w", cid, err) + } + if state != StateFrozen { + return nil, fmt.Errorf( + "streaming: buildTxhashIndex precondition violated: window %s chunk %s txhash is %q, want %q", + w, cid, state, StateFrozen) + } + inputs = append(inputs, c.layout.TxHashBinPath(cid)) + if cid == hi { // guard against chunk.ID wraparound at the top of the range + break + } + } + return inputs, nil +} + +// windowDemotedTxhashRefs returns the chunk:{c}:txhash refs in window w whose +// key is "pruning" — the terminal commit's demoted .bin inputs (and any a +// previous crashed pass left). The window-local scan walks [firstChunk, +// lastChunk]; a non-terminal build leaves none. +func (c *Catalog) windowDemotedTxhashRefs(w WindowID) ([]ArtifactRef, error) { + first := c.windows.FirstChunk(w) + last := c.windows.LastChunk(w) + var refs []ArtifactRef + for cid := first; ; cid++ { + state, err := c.State(cid, KindTxHash) + if err != nil { + return nil, fmt.Errorf("streaming: read txhash state chunk %s: %w", cid, err) + } + if state == StatePruning { + refs = append(refs, ArtifactRef{Chunk: cid, Kind: KindTxHash, State: StatePruning}) + } + if cid == last { // guard against chunk.ID wraparound at the top + break + } + } + return refs, nil +} diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/txindex_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/txindex_test.go new file mode 100644 index 000000000..ca971d413 --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/txindex_test.go @@ -0,0 +1,515 @@ +package streaming + +import ( + "context" + "crypto/sha256" + "encoding/binary" + "os" + "path/filepath" + "sort" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash" +) + +// testBuildConfig wires a BuildConfig over the test catalog with a silent +// logger. Small windows let tests cover whole windows with a handful of chunks. +func testBuildConfig(cat *Catalog) BuildConfig { + return BuildConfig{Catalog: cat, Logger: silentLogger()} +} + +// smallWindowCatalog builds a test catalog whose windows are cpi chunks wide, so +// a "terminal" (full-window) build needs only a few chunks. Returns the catalog +// and the artifact root. +func smallWindowCatalog(t *testing.T, cpi uint32) (*Catalog, string) { + t.Helper() + cat, root := testCatalog(t) + w, err := NewWindows(cpi) + require.NoError(t, err) + cat.windows = w + return cat, root +} + +// txEntry is a (full 32-byte tx hash, ledger seq) pair a test wants resolvable +// through the cold index. +type txEntry struct { + hash [32]byte + seq uint32 +} + +// hashAt returns a deterministic 32-byte tx hash for a test tag. +func hashAt(tag uint64) [32]byte { + var seed [8]byte + binary.BigEndian.PutUint64(seed[:], tag) + return sha256.Sum256(seed[:]) +} + +// freezeChunkBin writes a real sorted .bin for chunkID holding entries, fsyncs +// it, and flips chunk:{c}:txhash to "frozen" through the one-write protocol — +// the exact state buildTxhashIndex's precondition demands. Each entry's seq must +// fall in the chunk's ledger range; the helper assigns seqs the caller chose. +// Returns the entries (so the test can later assert each resolves to its seq). +func freezeChunkBin(t *testing.T, cat *Catalog, chunkID chunk.ID, entries []txEntry) { + t.Helper() + + cold := make([]txhash.ColdEntry, len(entries)) + for i, e := range entries { + require.GreaterOrEqual(t, e.seq, chunkID.FirstLedger(), "seq in chunk range") + require.LessOrEqual(t, e.seq, chunkID.LastLedger(), "seq in chunk range") + var key [txhash.ColdKeySize]byte + copy(key[:], e.hash[:txhash.ColdKeySize]) + cold[i] = txhash.ColdEntry{Key: key, Seq: e.seq} + } + // WriteColdBin writes entries verbatim; they must be sorted lex by key. + sort.Slice(cold, func(i, j int) bool { + return string(cold[i].Key[:]) < string(cold[j].Key[:]) + }) + + path := cat.layout.TxHashBinPath(chunkID) + require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) + require.NoError(t, cat.MarkChunkFreezing(chunkID, KindTxHash)) + require.NoError(t, txhash.WriteColdBin(path, cold)) + require.NoError(t, barrierNewFile(path, true)) + require.NoError(t, cat.FlipChunkFrozen(chunkID, KindTxHash)) +} + +// seqIn returns a ledger seq inside chunkID's range, offset within the chunk. +func seqIn(chunkID chunk.ID, offset uint32) uint32 { + return chunkID.FirstLedger() + offset +} + +// assertCoverageQueryable opens the window's unique frozen coverage's .idx and +// asserts every (hash, seq) resolves and an unseen hash misses. +func assertCoverageQueryable(t *testing.T, cat *Catalog, w WindowID, want []txEntry) { + t.Helper() + frozen, ok, err := cat.FrozenCoverage(w) + require.NoError(t, err) + require.True(t, ok, "window %s must have a frozen coverage", w) + + reader, err := txhash.OpenColdReader(cat.layout.IndexFilePath(frozen)) + require.NoError(t, err) + defer func() { _ = reader.Close() }() + + for _, e := range want { + got, gerr := reader.Get(e.hash) + require.NoError(t, gerr, "hash %x must resolve", e.hash[:4]) + require.Equal(t, e.seq, got, "hash %x resolves to its seq", e.hash[:4]) + } + + // An unseen hash misses (the fingerprint rejects ~255/256; this one is well + // outside the build set). + _, miss := reader.Get(hashAt(0xDEADBEEF)) + require.ErrorIs(t, miss, stores.ErrNotFound) +} + +// --------------------------------------------------------------------------- +// Happy path: build a coverage from synthetic .bin runs; assert the .idx is +// queryable and the catalog coverage is unique + frozen. +// --------------------------------------------------------------------------- + +func TestBuildTxhashIndex_BuildsQueryableCoverage(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) // window 0 = chunks [0,3] + cfg := testBuildConfig(cat) + + // Two chunks, each with a couple of entries. + e0a := txEntry{hashAt(1), seqIn(0, 5)} + e0b := txEntry{hashAt(2), seqIn(0, 9000)} + e1a := txEntry{hashAt(3), seqIn(1, 1)} + freezeChunkBin(t, cat, 0, []txEntry{e0a, e0b}) + freezeChunkBin(t, cat, 1, []txEntry{e1a}) + + // Non-terminal build [0,1] (hi 1 < window-last 3). + require.NoError(t, buildTxhashIndex(context.Background(), 0, 0, 1, cfg)) + + // Exactly one frozen coverage, covering [0,1]. + frozen, ok, err := cat.FrozenCoverage(0) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, chunk.ID(0), frozen.Lo) + require.Equal(t, chunk.ID(1), frozen.Hi) + require.Equal(t, StateFrozen, frozen.State) + + // Only one coverage key in the window (no debris). + keys, err := cat.IndexKeys(0) + require.NoError(t, err) + require.Len(t, keys, 1) + + // Non-terminal: .bin inputs stay frozen (window still filling). + for _, c := range []chunk.ID{0, 1} { + s, serr := cat.State(c, KindTxHash) + require.NoError(t, serr) + require.Equal(t, StateFrozen, s) + } + + // The .idx resolves every entry. + require.FileExists(t, cat.layout.IndexFilePath(frozen)) + assertCoverageQueryable(t, cat, 0, []txEntry{e0a, e0b, e1a}) +} + +// --------------------------------------------------------------------------- +// Rolling case: hi advances by one each boundary; the predecessor is demoted +// AND swept; exactly one frozen coverage exists at every instant. +// --------------------------------------------------------------------------- + +func TestBuildThenSweep_RollingPredecessorDemotedAndSwept(t *testing.T) { + cat, _ := smallWindowCatalog(t, 10) // window 0 = chunks [0,9] + cfg := testBuildConfig(cat) + + var all []txEntry + for c := chunk.ID(0); c <= 4; c++ { + e := txEntry{hashAt(uint64(100 + c)), seqIn(c, 7)} + freezeChunkBin(t, cat, c, []txEntry{e}) + all = append(all, e) + } + + var prevPath string + for hi := chunk.ID(0); hi <= 4; hi++ { + require.NoError(t, buildThenSweep(context.Background(), IndexBuild{Window: 0, Lo: 0, Hi: hi}, cfg)) + + // Exactly one frozen coverage at this instant, covering [0,hi]. + frozen, ok, err := cat.FrozenCoverage(0) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, chunk.ID(0), frozen.Lo) + require.Equal(t, hi, frozen.Hi) + + // Exactly ONE coverage key remains — the predecessor was demoted and the + // eager sweep removed it (key + file). + keys, err := cat.IndexKeys(0) + require.NoError(t, err) + require.Len(t, keys, 1, "exactly one coverage key after the eager sweep") + require.Equal(t, frozen.Key, keys[0].Key) + require.Equal(t, StateFrozen, keys[0].State) + + // The predecessor file is gone. + if prevPath != "" { + require.NoFileExists(t, prevPath) + } + prevPath = cat.layout.IndexFilePath(frozen) + require.FileExists(t, prevPath) + + // Non-terminal (hi < 9): inputs stay frozen. + for c := chunk.ID(0); c <= hi; c++ { + s, serr := cat.State(c, KindTxHash) + require.NoError(t, serr) + require.Equal(t, StateFrozen, s) + } + } + + // The final coverage resolves every entry rolled in. + assertCoverageQueryable(t, cat, 0, all) +} + +// --------------------------------------------------------------------------- +// Terminal case: a full-window build demotes AND sweeps every in-window txhash +// key (the .bin inputs), and leaves exactly one frozen full-window coverage. +// --------------------------------------------------------------------------- + +func TestBuildThenSweep_TerminalDemotesAndSweepsAllInputs(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) // window 0 = chunks [0,3] + cfg := testBuildConfig(cat) + + var all []txEntry + for c := chunk.ID(0); c <= 3; c++ { + e := txEntry{hashAt(uint64(200 + c)), seqIn(c, 11)} + freezeChunkBin(t, cat, c, []txEntry{e}) + all = append(all, e) + } + // A non-txhash key in the window must survive the terminal sweep. + require.NoError(t, cat.MarkChunkFreezing(2, KindLedgers)) + require.NoError(t, cat.FlipChunkFrozen(2, KindLedgers)) + + // Terminal build [0,3]: hi == window-last 3. + require.NoError(t, buildThenSweep(context.Background(), IndexBuild{Window: 0, Lo: 0, Hi: 3}, cfg)) + + // Frozen full-window coverage. + frozen, ok, err := cat.FrozenCoverage(0) + require.NoError(t, err) + require.True(t, ok) + require.True(t, cat.windows.IsTerminalCoverage(frozen)) + require.Equal(t, chunk.ID(3), frozen.Hi) + + // Every in-window txhash key was demoted AND swept: key absent => .bin gone. + for c := chunk.ID(0); c <= 3; c++ { + s, serr := cat.State(c, KindTxHash) + require.NoError(t, serr) + require.Equal(t, State(""), s, "chunk %s txhash key swept", c) + require.NoFileExists(t, cat.layout.TxHashBinPath(c)) + } + // The ledgers key (and file would be) untouched. + ledgers, err := cat.State(2, KindLedgers) + require.NoError(t, err) + require.Equal(t, StateFrozen, ledgers) + + // The terminal .idx still resolves every entry after the input sweep. + assertCoverageQueryable(t, cat, 0, all) +} + +// --------------------------------------------------------------------------- +// Skip case: if the window's unique frozen coverage already equals [lo,hi], the +// build returns early — no precondition demand on .bin inputs (load-bearing for +// re-scheduled finalized windows whose inputs the sweep deleted). +// --------------------------------------------------------------------------- + +func TestBuildTxhashIndex_SkipsWhenCoverageAlreadyFrozen(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) + cfg := testBuildConfig(cat) + + e := txEntry{hashAt(300), seqIn(0, 3)} + freezeChunkBin(t, cat, 0, []txEntry{e}) + freezeChunkBin(t, cat, 1, []txEntry{{hashAt(301), seqIn(1, 4)}}) + + // First build [0,1]. + require.NoError(t, buildTxhashIndex(context.Background(), 0, 0, 1, cfg)) + frozen, ok, err := cat.FrozenCoverage(0) + require.NoError(t, err) + require.True(t, ok) + idxPath := cat.layout.IndexFilePath(frozen) + before, err := os.Stat(idxPath) + require.NoError(t, err) + + // Now demote the .bin inputs to "pruning" — simulating a finalized window + // whose inputs the sweep is about to remove. A second build of the SAME + // coverage must SKIP (never demand the now-non-frozen inputs). + require.NoError(t, cat.store.Put(chunkKey(0, KindTxHash), string(StatePruning))) + require.NoError(t, cat.store.Put(chunkKey(1, KindTxHash), string(StatePruning))) + + require.NoError(t, buildTxhashIndex(context.Background(), 0, 0, 1, cfg), + "skip check must precede the precondition") + + // The .idx was not rewritten (same file, untouched). + after, err := os.Stat(idxPath) + require.NoError(t, err) + require.Equal(t, before.ModTime(), after.ModTime(), "skipped build must not rewrite the .idx") + + // Still exactly one frozen coverage. + keys, err := cat.IndexKeys(0) + require.NoError(t, err) + require.Len(t, keys, 1) + require.Equal(t, StateFrozen, keys[0].State) +} + +// --------------------------------------------------------------------------- +// Loud precondition: a chunk in [lo,hi] whose .bin is not frozen aborts the +// build BEFORE any key is touched — no coverage key is left behind. +// --------------------------------------------------------------------------- + +func TestBuildTxhashIndex_PreconditionFailsLoudly(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) + cfg := testBuildConfig(cat) + + // Chunk 0 frozen, chunk 1 absent (never produced). + freezeChunkBin(t, cat, 0, []txEntry{{hashAt(400), seqIn(0, 1)}}) + + err := buildTxhashIndex(context.Background(), 0, 0, 1, cfg) + require.Error(t, err) + require.Contains(t, err.Error(), "precondition violated") + require.Contains(t, err.Error(), "chunk 00000001") + + // No coverage key was written (the precondition precedes the mark). + keys, err := cat.IndexKeys(0) + require.NoError(t, err) + require.Empty(t, keys, "a precondition failure must not leave a coverage key") + require.NoFileExists(t, cat.layout.IndexFilePath(IndexCoverage{Window: 0, Lo: 0, Hi: 1})) + + // A "freezing" (in-progress) input is also not "frozen" => still aborts. + require.NoError(t, cat.MarkChunkFreezing(1, KindTxHash)) + err = buildTxhashIndex(context.Background(), 0, 0, 1, cfg) + require.Error(t, err) + require.Contains(t, err.Error(), "precondition violated") +} + +// --------------------------------------------------------------------------- +// §7.6 crash matrix — three rows, each converging on a re-run. +// --------------------------------------------------------------------------- + +// Row "after step 2, mid step 3": coverage key "freezing", file partial/complete, +// predecessor still the unique frozen coverage. A re-run of the same coverage +// re-marks and rewrites wholesale, converging on a single frozen coverage. +func TestBuildCrashMatrix_AfterMarkBeforeCommit(t *testing.T) { + cat, _ := smallWindowCatalog(t, 10) + cfg := testBuildConfig(cat) + + for c := chunk.ID(0); c <= 2; c++ { + freezeChunkBin(t, cat, c, []txEntry{{hashAt(uint64(500 + c)), seqIn(c, 2)}}) + } + + // Land a predecessor coverage [0,1] first. + require.NoError(t, buildTxhashIndex(context.Background(), 0, 0, 1, cfg)) + predFrozen, ok, err := cat.FrozenCoverage(0) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, chunk.ID(1), predFrozen.Hi) + + // "Crash" the next build [0,2] right after the mark (before the commit) by + // observing state in the afterIndexMark hook, then aborting via a panic the + // test recovers — simulating process death between step 2 and step 4. + cat.hooks.afterIndexMark = func() { + // At this instant: new key "freezing", predecessor still the unique frozen + // coverage (no two-frozen window). + frozen, fok, ferr := cat.FrozenCoverage(0) + require.NoError(t, ferr) + require.True(t, fok) + require.Equal(t, predFrozen.Key, frozen.Key, "predecessor still the unique frozen coverage") + v, vok, verr := cat.Get(indexKey(0, 0, 2)) + require.NoError(t, verr) + require.True(t, vok) + require.Equal(t, string(StateFreezing), v, "new coverage marked freezing") + panic("crash after mark") + } + require.PanicsWithValue(t, "crash after mark", func() { + _ = buildTxhashIndex(context.Background(), 0, 0, 2, cfg) + }) + cat.hooks.afterIndexMark = nil + + // Durable state after the "crash": predecessor [0,1] frozen, [0,2] "freezing" + // debris. + keys, err := cat.IndexKeys(0) + require.NoError(t, err) + states := map[string]State{} + for _, k := range keys { + states[k.Key] = k.State + } + require.Equal(t, StateFrozen, states[indexKey(0, 0, 1)]) + require.Equal(t, StateFreezing, states[indexKey(0, 0, 2)]) + + // Recovery: re-run the build of [0,2]. It re-marks (idempotent overwrite), + // rewrites the .idx, and commits — converging on a single frozen coverage. + require.NoError(t, buildThenSweep(context.Background(), IndexBuild{Window: 0, Lo: 0, Hi: 2}, cfg)) + frozen, ok, err := cat.FrozenCoverage(0) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, chunk.ID(2), frozen.Hi) + // The predecessor [0,1] was demoted by the commit and swept eagerly. + keys, err = cat.IndexKeys(0) + require.NoError(t, err) + require.Len(t, keys, 1, "exactly one coverage after recovery") + require.Equal(t, indexKey(0, 0, 2), keys[0].Key) + assertCoverageQueryable(t, cat, 0, []txEntry{{hashAt(500), seqIn(0, 2)}, {hashAt(501), seqIn(1, 2)}, {hashAt(502), seqIn(2, 2)}}) +} + +// Row "after step 4, before the eager sweep": the commit batch landed (new +// coverage frozen + live, predecessor "pruning", terminal inputs "pruning") but +// the sweeps did not run. Re-running buildThenSweep finishes the sweeps. +func TestBuildCrashMatrix_AfterCommitBeforeSweep(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) // window 0 = chunks [0,3] + cfg := testBuildConfig(cat) + + for c := chunk.ID(0); c <= 3; c++ { + freezeChunkBin(t, cat, c, []txEntry{{hashAt(uint64(600 + c)), seqIn(c, 3)}}) + } + // A predecessor [0,2] so the commit has a coverage to demote too. + require.NoError(t, buildTxhashIndex(context.Background(), 0, 0, 2, cfg)) + predPath := cat.layout.IndexFilePath(IndexCoverage{Window: 0, Lo: 0, Hi: 2}) + + // "Crash" the terminal build [0,3] right after the commit, before the sweeps. + cat.hooks.afterCommitBeforeSweep = func() { + // New coverage frozen + live; predecessor and inputs "pruning" sweep work. + frozen, fok, ferr := cat.FrozenCoverage(0) + require.NoError(t, ferr) + require.True(t, fok) + require.Equal(t, chunk.ID(3), frozen.Hi) + v, _, _ := cat.Get(indexKey(0, 0, 2)) + require.Equal(t, string(StatePruning), v, "predecessor demoted, not yet swept") + for c := chunk.ID(0); c <= 3; c++ { + s, _ := cat.State(c, KindTxHash) + require.Equal(t, StatePruning, s, "input demoted, not yet swept") + } + panic("crash after commit") + } + require.PanicsWithValue(t, "crash after commit", func() { + _ = buildThenSweep(context.Background(), IndexBuild{Window: 0, Lo: 0, Hi: 3}, cfg) + }) + cat.hooks.afterCommitBeforeSweep = nil + + // The predecessor file and the .bin inputs are still on disk (sweeps didn't + // run), but their keys are "pruning". + require.FileExists(t, predPath) + for c := chunk.ID(0); c <= 3; c++ { + require.FileExists(t, cat.layout.TxHashBinPath(c)) + } + + // Recovery: re-run buildThenSweep for [0,3]. buildTxhashIndex SKIPS (already + // frozen) and the eager sweeps finish the demoted predecessor + inputs. + require.NoError(t, buildThenSweep(context.Background(), IndexBuild{Window: 0, Lo: 0, Hi: 3}, cfg)) + require.NoFileExists(t, predPath) + for c := chunk.ID(0); c <= 3; c++ { + require.NoFileExists(t, cat.layout.TxHashBinPath(c)) + s, serr := cat.State(c, KindTxHash) + require.NoError(t, serr) + require.Equal(t, State(""), s) + } + keys, err := cat.IndexKeys(0) + require.NoError(t, err) + require.Len(t, keys, 1) + require.Equal(t, StateFrozen, keys[0].State) +} + +// Row "mid-sweep": a "pruning" key whose durable unlink completed but whose key +// delete didn't. The sweep re-runs; key absent => file gone. Driven through the +// real SweepChunkArtifacts via buildThenSweep's beforeKeyDelete hook. +func TestBuildCrashMatrix_MidSweepReRuns(t *testing.T) { + cat, _ := smallWindowCatalog(t, 4) + cfg := testBuildConfig(cat) + + for c := chunk.ID(0); c <= 3; c++ { + freezeChunkBin(t, cat, c, []txEntry{{hashAt(uint64(700 + c)), seqIn(c, 4)}}) + } + + // "Crash" mid-sweep: inside SweepChunkArtifacts, after the durable unlink and + // before the key-delete batch. The files are already gone here; the keys are + // not. Panic to simulate process death at that exact instant. + cat.hooks.beforeKeyDelete = func() { + for c := chunk.ID(0); c <= 3; c++ { + require.NoFileExists(t, cat.layout.TxHashBinPath(c), "unlink durable before key delete") + } + panic("crash mid-sweep") + } + require.PanicsWithValue(t, "crash mid-sweep", func() { + _ = buildThenSweep(context.Background(), IndexBuild{Window: 0, Lo: 0, Hi: 3}, cfg) + }) + cat.hooks.beforeKeyDelete = nil + + // The terminal commit landed (coverage frozen), the input .bin files are gone, + // but their keys survive as "pruning" — the mid-sweep leftover the next run + // finishes. + frozen, ok, err := cat.FrozenCoverage(0) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, chunk.ID(3), frozen.Hi) + pruningLeft := 0 + for c := chunk.ID(0); c <= 3; c++ { + require.NoFileExists(t, cat.layout.TxHashBinPath(c)) + s, serr := cat.State(c, KindTxHash) + require.NoError(t, serr) + require.Equal(t, StatePruning, s, "key outlives the durable unlink") + pruningLeft++ + } + require.Equal(t, 4, pruningLeft) + + // Recovery: re-run buildThenSweep. The build skips (frozen) and the sweep + // re-runs over the surviving "pruning" keys, converging on key absent. + require.NoError(t, buildThenSweep(context.Background(), IndexBuild{Window: 0, Lo: 0, Hi: 3}, cfg)) + for c := chunk.ID(0); c <= 3; c++ { + s, serr := cat.State(c, KindTxHash) + require.NoError(t, serr) + require.Equal(t, State(""), s, "mid-sweep leftover finished on re-run") + } + assertCoverageQueryable(t, cat, 0, []txEntry{{hashAt(700), seqIn(0, 4)}}) +} + +// --------------------------------------------------------------------------- +// Config validation + lo>hi guard. +// --------------------------------------------------------------------------- + +func TestBuildConfigValidation(t *testing.T) { + cat, _ := testCatalog(t) + require.Error(t, buildTxhashIndex(context.Background(), 0, 0, 0, BuildConfig{Logger: silentLogger()})) + require.Error(t, buildTxhashIndex(context.Background(), 0, 0, 0, BuildConfig{Catalog: cat})) + // lo > hi is a programmer error surfaced loudly. + require.Error(t, buildTxhashIndex(context.Background(), 0, 5, 1, testBuildConfig(cat))) +} diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/window.go b/cmd/stellar-rpc/internal/fullhistory/streaming/window.go new file mode 100644 index 000000000..26e7359ea --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/window.go @@ -0,0 +1,69 @@ +package streaming + +import ( + "errors" + "fmt" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" +) + +// Window arithmetic lives here, not in pkg/chunk: pkg/chunk deliberately has no +// window/index concept (it is pure chunk geometry), so the chunk<->window +// mapping is parameterized by chunks_per_txhash_index (cpi). A window is a +// contiguous run of cpi chunks: window w owns chunks [w*cpi, w*cpi + cpi - 1]. + +// MaxChunksPerTxhashIndex bounds cpi so a window's ledger span always fits a +// uint32 seq: floor(2^32 / LedgersPerChunk). See gettransaction-full-history- +// design.md §6.2. +const MaxChunksPerTxhashIndex uint32 = ^uint32(0) / chunk.LedgersPerChunk + +// Windows is window arithmetic bound to one chunks_per_txhash_index value. The +// value is immutable for a deployment (pinned in config:chunks_per_txhash_index +// on first start), so a Windows is constructed once and shared. +type Windows struct { + cpi uint32 // chunks_per_txhash_index; > 0, <= MaxChunksPerTxhashIndex +} + +// NewWindows validates cpi and returns the window arithmetic for it. +func NewWindows(chunksPerIndex uint32) (Windows, error) { + if chunksPerIndex == 0 { + return Windows{}, errors.New("streaming: chunks_per_txhash_index must be > 0") + } + if chunksPerIndex > MaxChunksPerTxhashIndex { + return Windows{}, fmt.Errorf( + "streaming: chunks_per_txhash_index %d exceeds max %d", + chunksPerIndex, MaxChunksPerTxhashIndex, + ) + } + return Windows{cpi: chunksPerIndex}, nil +} + +// ChunksPerIndex returns the configured cpi. +func (w Windows) ChunksPerIndex() uint32 { return w.cpi } + +// WindowID returns the window containing chunk c: c / cpi. +func (w Windows) WindowID(c chunk.ID) WindowID { + return WindowID(uint32(c) / w.cpi) +} + +// FirstChunk returns the lowest chunk in window id: id * cpi. +func (w Windows) FirstChunk(id WindowID) chunk.ID { + return chunk.ID(uint32(id) * w.cpi) +} + +// LastChunk returns the highest chunk in window id: (id+1)*cpi - 1. +func (w Windows) LastChunk(id WindowID) chunk.ID { + return chunk.ID((uint32(id)+1)*w.cpi - 1) +} + +// ChunksIn returns the number of chunks in any window (always cpi). Present so +// callers don't reach for the raw field. +func (w Windows) ChunksIn() uint32 { return w.cpi } + +// IsTerminalCoverage reports whether a coverage's hi equals its window's last +// chunk — the derived "terminal"/finalized property (marked nowhere). A frozen +// terminal coverage means its window is finalized: its .bin inputs were +// demoted in the same commit, and it is never rebuilt again. +func (w Windows) IsTerminalCoverage(cov IndexCoverage) bool { + return cov.Hi == w.LastChunk(cov.Window) +} From 9c7b4204647946719269d86e724c93c9c4fe2981 Mon Sep 17 00:00:00 2001 From: Simon Chow Date: Tue, 23 Jun 2026 18:28:21 -0400 Subject: [PATCH 2/2] docs+style(streaming): slice-3 doc.go, drop PERF, collision test, lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - doc.go (resolved in the rebase): scope the file map to the complete daemon, add window.go (geometry) + an Index group (txindex.go), drop the forbidden design-docs/full-history-implementation-status.md reference, prefer 'catalog' over 'meta-store'. - Remove PERF.md + perf_test.go: the bench-format-alignment material belongs with the bench harness, not the daemon PR. - Add TestBuildTxhashIndex_SameWindowKeyCollisionFailsLoud: a same-window 16-byte-prefix collision must fail loudly with streamhash.ErrDuplicateKey (issue #814 acceptance), never silently drop — previously uncovered. - golangci-lint (this slice's own new findings): gci/misspell/modernize/ unconvert via --fix; //nolint:cyclop on buildTxhashIndex; revive unused pendingArtifacts cfg -> _; lll wrap; //nolint:unparam on the general test helpers; //nolint:funlen,cyclop,maintidx on the lookup E2E. --- .../internal/fullhistory/streaming/PERF.md | 65 ----- .../internal/fullhistory/streaming/audit.go | 3 +- .../fullhistory/streaming/audit_invariants.go | 25 +- .../fullhistory/streaming/audit_test.go | 2 + .../fullhistory/streaming/convergence_test.go | 2 + .../fullhistory/streaming/daemon_test.go | 2 +- .../fullhistory/streaming/e2e_test.go | 4 +- .../fullhistory/streaming/eligibility.go | 2 +- .../internal/fullhistory/streaming/execute.go | 2 +- .../fullhistory/streaming/execute_test.go | 12 +- .../fullhistory/streaming/perf_test.go | 251 ------------------ .../fullhistory/streaming/recovery.go | 3 +- .../fullhistory/streaming/recovery_test.go | 2 + .../internal/fullhistory/streaming/txindex.go | 8 +- .../fullhistory/streaming/txindex_test.go | 43 ++- 15 files changed, 83 insertions(+), 343 deletions(-) delete mode 100644 cmd/stellar-rpc/internal/fullhistory/streaming/PERF.md delete mode 100644 cmd/stellar-rpc/internal/fullhistory/streaming/perf_test.go diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/PERF.md b/cmd/stellar-rpc/internal/fullhistory/streaming/PERF.md deleted file mode 100644 index 2ff72d33f..000000000 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/PERF.md +++ /dev/null @@ -1,65 +0,0 @@ -# Full-history streaming: tx-hash cold-index performance expectations - -These are the design's **measured** figures for the tx-hash cold tier, taken -from the `bench-fullhistory` harness (on the `rpc-hack` branch: -`cmd/stellar-rpc/scripts/bench-fullhistory`, the `cold-ingest --types=txhash` -and `build-txhash-index` commands). They are recorded here, not re-measured in -this package, because the streaming rebuild produces **byte-format-identical** -artifacts to the merged cold path the harness measures — see -`perf_test.go::TestStreamingRebuild_ByteIdenticalToColdPath`, which proves the -streaming `buildTxhashIndex` and a direct `txhash.BuildColdIndex` over the same -`.bin` inputs write the same bytes. Adopting the formats unchanged is what lets -the harness's figures transfer (gettransaction-full-history-design.md §6.2, -Part 4). - -Geometry assumed below: the default window of `DefaultChunksPerIndex = 1000` -chunks, a dense chunk of ~3M transactions, so a dense full window is -~3×10⁹ transactions. - -## On-disk format (the basis for the transfer) - -| artifact | format | width | -| --- | --- | --- | -| `.bin` per-chunk sorted run (§6.1) | `uint64` LE count header, then `[key:16][seq:4 LE]` entries, sorted by big-endian `uint64` of the key | **20 B/entry exactly** | -| `.idx` per-window MPHF (§6.2) | streamhash MPHF; 16-byte routing key; **3-byte** payload (`seq − MinLedger`); **1-byte** fingerprint; `[MinLedger, MaxLedger]` in user metadata | **≈4.2 B/tx** | - -The `.bin` key is the first 16 bytes of the tx hash (`streamhash.MinKeySize`); -the `.idx` payload is a 3-byte offset from the window's `MinLedger` -(`lo.FirstLedger()`), spanning up to 16.77M ledgers — a window past the 4-byte -payload threshold (>16.77M ledgers, ≥1678 chunks) adds 1 B/tx. - -## Expected figures (from the bench harness) - -- **Index size: ≈4.2 B/tx** at the default 3-byte payload (MPHF structure + - 3-byte payload + 1-byte fingerprint) — **≈12.5 GB** for a dense full window. - (`perf_test.go::TestColdIndexSizing_ConsistentWithPart4` checks a small-N - sanity band around this and pins the inviolable 4 B/tx payload+fingerprint - floor; the asymptote itself is the harness's measurement.) - -- **`.bin` floor: ≈20 B/tx, ≈60 GB** for a dense full window — the runs the - index consumes. Transient `.bin` disk is bounded by the eager sweep at one - dense in-flight window's worth (≈60 GB), irreducible because a window's build - merges all of its runs at once. - -- **Rebuild: ≈1 minute** for a full dense window — merging the ≈60 GB of - sorted `.bin` runs into the ≈12.5 GB `.idx` at a ~200 MB/s write burst. - Mid-window rebuilds scale with `hi − lo`. Against a ~14-hour chunk-boundary - cadence at mainnet rates this is ~0.1% duty cycle. - -- **Transient peak: ~2× the index size** in the window dir during each - rebuild (~25 GB at window end) — old and new coverage files coexist from the - start of the write until the eager sweep's unlink. - -- **Hot `txhash` CF: 36 B/tx raw** (32-byte key + 4-byte value, before RocksDB - overhead), ~110 MB raw per dense chunk — the serving tier for chunks above - the index's `hi` until the next rebuild folds them in. - -## Honesty note - -The streaming package does **not** re-measure these numbers — measuring a dense -full window needs the multi-TB corpus the `bench-fullhistory` harness drives on -`rpc-hack`. What this package proves instead is the precondition that makes the -transfer valid: format identity (byte-for-byte) between the streaming rebuild -and the merged cold path, plus the on-disk format pins (`perf_test.go`). If a -width or MPHF parameter ever changes, those tests fail and these figures must be -re-derived from the harness. diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/audit.go b/cmd/stellar-rpc/internal/fullhistory/streaming/audit.go index 98eb5bcf3..69b40b681 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/audit.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/audit.go @@ -194,7 +194,8 @@ func RunAudit(cfg Config, opts AuditOptions, logger *supportlog.Entry) (AuditRep if cfg.Backfill.ChunksPerTxhashIndex == nil { return AuditReport{}, errors.New( - "streaming: audit: chunks_per_txhash_index unresolved (WithDefaults not applied)") + "streaming: audit: chunks_per_txhash_index unresolved (WithDefaults not applied)", + ) } windows, err := NewWindows(*cfg.Backfill.ChunksPerTxhashIndex) if err != nil { diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/audit_invariants.go b/cmd/stellar-rpc/internal/fullhistory/streaming/audit_invariants.go index 84780e0e9..d52897ddf 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/audit_invariants.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/audit_invariants.go @@ -60,7 +60,8 @@ func (c *Catalog) auditSingleCanonicalState(through uint32, report *AuditReport) Invariant: InvSingleCanonicalState, Detail: fmt.Sprintf( "window %s has %d frozen index coverages (must be at most 1): %s", - w, len(group), strings.Join(keys, ", ")), + w, len(group), strings.Join(keys, ", "), + ), }) } } @@ -110,7 +111,8 @@ func (c *Catalog) auditSingleCanonicalState(through uint32, report *AuditReport) Key: cov.Key, Detail: fmt.Sprintf( "index coverage key is %q at quiescence: the sweep should have removed this transient", - cov.State), + cov.State, + ), }) } } @@ -149,7 +151,8 @@ func (c *Catalog) auditSingleCanonicalState(through uint32, report *AuditReport) Detail: fmt.Sprintf( "hot DB key persists for chunk %s whose cold artifacts fully serve it "+ "(all artifacts frozen and its window's index covers it): the discard scan missed it", - hc), + hc, + ), }) } } @@ -173,7 +176,8 @@ func (c *Catalog) auditSingleCanonicalState(through uint32, report *AuditReport) Detail: fmt.Sprintf( "per-chunk txhash key %q persists for chunk %s in a finalized window "+ "(its terminal index covers it): finalization demotion did not complete", - ref.State, ref.Chunk), + ref.State, ref.Chunk, + ), }) } } @@ -218,12 +222,7 @@ func auditPendingArtifacts(cat *Catalog, c chunk.ID, covered func(chunk.ID) bool // with two frozen keys does not abort the audit; the duplicate is already // recorded as a clause-1 INV-2 violation. func (c *Catalog) auditTerminalCoverage(frozenPerWindow map[WindowID][]IndexCoverage, ch chunk.ID) bool { - for _, cov := range frozenPerWindow[c.windows.WindowID(ch)] { - if c.windows.IsTerminalCoverage(cov) { - return true - } - } - return false + return slices.ContainsFunc(frozenPerWindow[c.windows.WindowID(ch)], c.windows.IsTerminalCoverage) } // --------------------------------------------------------------------------- @@ -305,7 +304,8 @@ func (c *Catalog) auditDiskMatchesMeta(through uint32, report *AuditReport) erro Key: cov.Key, Path: p, Detail: fmt.Sprintf( - "index coverage key is %q but its .idx file is missing: dangling key", cov.State), + "index coverage key is %q but its .idx file is missing: dangling key", cov.State, + ), }) } } @@ -432,7 +432,8 @@ func (c *Catalog) auditRetentionBound(floor uint32, report *AuditReport) error { Key: cov.Key, Detail: fmt.Sprintf( "index coverage [%s,%s] (last ledger %d) is wholly below the retention floor %d", - cov.Lo, cov.Hi, cov.Hi.LastLedger(), floor), + cov.Lo, cov.Hi, cov.Hi.LastLedger(), floor, + ), }) } } diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/audit_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/audit_test.go index b1269c42d..717d6b15f 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/audit_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/audit_test.go @@ -15,6 +15,8 @@ import ( // testCatalogCPI is testCatalog with a caller-chosen chunks_per_txhash_index, so // a test can build a SMALL window (e.g. cpi=2: window 0 = chunks {0,1}) and reach // the "terminal/finalized window" branch without materializing 1000 chunks. +// +//nolint:unparam // the artifact root is returned for symmetry with testCatalog func testCatalogCPI(t *testing.T, cpi uint32) (*Catalog, string) { t.Helper() metaDir := t.TempDir() diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/convergence_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/convergence_test.go index d4a8c7866..072f733c5 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/convergence_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/convergence_test.go @@ -67,6 +67,8 @@ type convergenceHarness struct { // genesis earliest_ledger pin and the given retention width. cpi=1 makes every // one-chunk window finalize immediately (the common boundary-convergence shape); // larger cpi exercises multi-chunk windows. +// +//nolint:unparam // retention width varies across convergence scenarios func newConvergenceHarness(t *testing.T, cpi, retentionChunks uint32) *convergenceHarness { t.Helper() cat, _ := smallWindowCatalog(t, cpi) diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/daemon_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/daemon_test.go index ff4862295..cc747b927 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/daemon_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/daemon_test.go @@ -137,7 +137,7 @@ func TestRunDaemon_LoadValidateWireStartCleanShutdown(t *testing.T) { cpi, cpiPinned, err := cat.ChunksPerTxhashIndex() require.NoError(t, err) require.True(t, cpiPinned) - assert.Equal(t, uint32(DefaultChunksPerTxhashIndex), cpi) + assert.Equal(t, DefaultChunksPerTxhashIndex, cpi) } // Storage-path overrides must be HONORED by the data path, not just locked. The diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/e2e_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/e2e_test.go index 6bceacb7c..fe5363a87 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/e2e_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/e2e_test.go @@ -141,7 +141,7 @@ func oneTxLCMReturningHash(t *testing.T, seq uint32) ([]byte, [32]byte) { // runs past the synthetic backlog it blocks until ctx is canceled (a live tip // stream ends only on shutdown). It records the FIRST seq it was asked for so // the restart step can assert the daemon re-derived the watermark and resumed -// with no gap. The ctx-cancelled GetLedger return is the clean-shutdown path the +// with no gap. The ctx-canceled GetLedger return is the clean-shutdown path the // daemon top level classifies as clean. type e2eGetter struct { frames map[uint32][]byte @@ -318,7 +318,7 @@ func waitClean(t *testing.T, cancel context.CancelFunc, done <-chan error) { // // Correctness is asserted at every step. // -//nolint:funlen // full lifecycle E2E with assertions at every step +//nolint:funlen,cyclop,maintidx // full lifecycle E2E with assertions at every step func TestE2E_DaemonLifecycle_FirstStartIngestFreezeLookupRestartPrune(t *testing.T) { if testing.Short() { t.Skip("e2e ingests a full 10k-ledger chunk; skipped in -short") diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/eligibility.go b/cmd/stellar-rpc/internal/fullhistory/streaming/eligibility.go index 2312ce1df..cbb0d66a5 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/eligibility.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/eligibility.go @@ -71,7 +71,7 @@ func eligibleDiscardOps(cfg LifecycleConfig, cat *Catalog, through uint32) ([]fu // frozen; txhash/.bin is exempt when the window's index already covers the // chunk — after finalization the chunk:c:txhash key is legitimately demoted or // swept, and regenerating the .bin would orphan it. -func pendingArtifacts(c chunk.ID, cfg LifecycleConfig, cat *Catalog) (ArtifactSet, error) { +func pendingArtifacts(c chunk.ID, _ LifecycleConfig, cat *Catalog) (ArtifactSet, error) { var need ArtifactSet for _, kind := range []Kind{KindLedgers, KindEvents} { state, err := cat.State(c, kind) diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/execute.go b/cmd/stellar-rpc/internal/fullhistory/streaming/execute.go index d8cc28413..fccc6a96d 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/execute.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/execute.go @@ -136,7 +136,7 @@ func (cfg ExecConfig) buildConfig() BuildConfig { // builds blocked on chunk builds that can never get a slot.) // - A failed chunk build never closes its channel, so a dependent index build // never proceeds on a missing input: it unblocks through the <-gctx.Done() -// case (the failure cancelled gctx) and bails with gctx.Err(). buildTxhash +// case (the failure canceled gctx) and bails with gctx.Err(). buildTxhash // Index also keeps a loud .bin precondition as a cheap defensive backstop // (kept — see buildTxhashIndex), but the success-semantics close is the // primary guard now. diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/execute_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/execute_test.go index 9308de6c5..a0ec4b84e 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/execute_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/execute_test.go @@ -99,7 +99,8 @@ func TestExecutePlan_IndexWaitsOnInCoverageChunks_Workers1(t *testing.T) { }, } - cfg := execTestCfg(cat, 1, + cfg := execTestCfg( + cat, 1, func(_ context.Context, cb ChunkBuild, _ ExecConfig) error { rec.markChunkDone(cb.Chunk) return nil @@ -136,7 +137,8 @@ func TestExecutePlan_DependencyHoldsUnderConcurrency(t *testing.T) { IndexBuilds: []IndexBuild{{Window: 0, Lo: 0, Hi: 3}}, } - cfg := execTestCfg(cat, 8, + cfg := execTestCfg( + cat, 8, func(_ context.Context, cb ChunkBuild, _ ExecConfig) error { // Stagger completion so an unsynchronized index build would likely // observe a not-yet-done chunk if the wait were broken. @@ -166,7 +168,8 @@ func TestExecutePlan_IndexWithNoInPlanDepsRunsImmediately(t *testing.T) { // No chunk builds — every input already frozen. IndexBuilds: []IndexBuild{{Window: 0, Lo: 0, Hi: 3}}, } - cfg := execTestCfg(cat, 2, + cfg := execTestCfg( + cat, 2, func(context.Context, ChunkBuild, ExecConfig) error { return nil }, func(context.Context, IndexBuild, ExecConfig) error { ran.Store(true); return nil }, ) @@ -193,7 +196,8 @@ func TestExecutePlan_FailedChunkAbortsPlanAndIndexNeverHangs(t *testing.T) { IndexBuilds: []IndexBuild{{Window: 0, Lo: 0, Hi: 0}}, } - cfg := execTestCfg(cat, 1, + cfg := execTestCfg( + cat, 1, func(context.Context, ChunkBuild, ExecConfig) error { return chunkErr }, func(_ context.Context, _ IndexBuild, _ ExecConfig) error { // Under SUCCESS semantics the failed chunk never closes its channel, so diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/perf_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/perf_test.go deleted file mode 100644 index dae1d2623..000000000 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/perf_test.go +++ /dev/null @@ -1,251 +0,0 @@ -package streaming - -// perf_test.go pins the tx-hash cold-index format the streaming rebuild -// produces to the merged #728/#780 cold path, and records the design's -// Part-4 sizing expectation (see PERF.md). It is the load-bearing assertion -// behind PERF.md's "the formats are identical, so the bench figures transfer" -// claim: the perf numbers are honest only if the bytes the streaming rebuild -// writes are the same bytes the bench harness measured. -// -// Two independent assertions: -// -// - Format identity. buildTxhashIndex (the streaming rebuild) and a direct -// txhash.BuildColdIndex over the SAME .bin inputs produce a byte-identical -// .idx — same MPHF structure, same 3-byte payload, same 1-byte fingerprint, -// same [MinLedger, MaxLedger] metadata. The streaming path adds catalog -// bookkeeping around the build; it must not perturb the artifact. -// -// - On-disk format pins. The .bin inputs match gettransaction §6.1 -// (uint64-LE count header, 20-byte [16-key|4-seq-LE] entries) and the .idx -// matches §6.2 (16-byte routing key, 3-byte payload offset from MinLedger, -// 1-byte fingerprint), read back through the real reader. - -import ( - "context" - "encoding/binary" - "os" - "path/filepath" - "testing" - - "github.com/stretchr/testify/require" - - "github.com/stellar/streamhash" - - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash" -) - -// --------------------------------------------------------------------------- -// Format identity: the streaming rebuild writes the same bytes as the merged -// cold path. -// --------------------------------------------------------------------------- - -// TestStreamingRebuild_ByteIdenticalToColdPath is the heart of Issue 20. It -// freezes a set of per-chunk .bin runs through the one-write protocol (the real -// txhash.WriteColdBin codec), then builds the SAME coverage two ways: -// -// 1. the streaming rebuild — buildTxhashIndex, which the daemon's executor -// drives on every boundary (build.go); and -// 2. a direct txhash.BuildColdIndex over the identical inputs — the merged -// cold path the bench harness on rpc-hack measures. -// -// The two .idx files must be byte-for-byte identical. That is what licenses -// PERF.md to transfer the bench harness's measured ≈4.2 B/tx and ≈1-min -// figures to the streaming daemon: the streaming rebuild is not a re-derivation -// of the format, it is the same txhash.BuildColdIndex call wrapped in catalog -// bookkeeping, and the bookkeeping does not touch the artifact. -func TestStreamingRebuild_ByteIdenticalToColdPath(t *testing.T) { - cat, _ := smallWindowCatalog(t, 4) // window 0 = chunks [0,3] - cfg := testBuildConfig(cat) - - // Spread entries across several chunks so the build genuinely k-way merges - // the runs (not a single trivial input). - entriesByChunk := map[chunk.ID][]txEntry{ - 0: {{hashAt(1), seqIn(0, 5)}, {hashAt(2), seqIn(0, 9000)}}, - 1: {{hashAt(3), seqIn(1, 1)}, {hashAt(4), seqIn(1, 4321)}}, - 2: {{hashAt(5), seqIn(2, 77)}}, - } - var inputs []string - for c := chunk.ID(0); c <= 2; c++ { - freezeChunkBin(t, cat, c, entriesByChunk[c]) - inputs = append(inputs, cat.layout.TxHashBinPath(c)) - } - - // (1) The streaming rebuild. Non-terminal coverage [0,2] (hi 2 < window-last - // 3) so it keeps its inputs frozen — we reuse them for path (2). - require.NoError(t, buildTxhashIndex(context.Background(), 0, 0, 2, cfg)) - frozen, ok, err := cat.FrozenCoverage(0) - require.NoError(t, err) - require.True(t, ok) - streamingIdx := cat.layout.IndexFilePath(frozen) - - // (2) The merged cold path, over the SAME .bin inputs, with the SAME - // MinLedger/MaxLedger anchor the streaming path derives (lo.FirstLedger, - // hi.LastLedger — build.go step 3). - minLedger := chunk.ID(0).FirstLedger() - maxLedger := chunk.ID(2).LastLedger() - directIdx := filepath.Join(t.TempDir(), "direct.idx") - require.NoError(t, txhash.BuildColdIndex(context.Background(), inputs, directIdx, minLedger, maxLedger)) - - streamingBytes, err := os.ReadFile(streamingIdx) - require.NoError(t, err) - directBytes, err := os.ReadFile(directIdx) - require.NoError(t, err) - - require.Equal(t, directBytes, streamingBytes, - "the streaming rebuild must write a byte-identical .idx to the merged cold path "+ - "(this is what lets PERF.md transfer the bench harness's measured figures)") -} - -// --------------------------------------------------------------------------- -// On-disk format pins: §6.1 (.bin) and §6.2 (.idx). -// --------------------------------------------------------------------------- - -// TestStreamingBin_MatchesSpecFormat asserts the .bin a frozen chunk leaves on -// disk matches gettransaction §6.1: a uint64-LE entry-count header followed by -// 20-byte [16-byte key | 4-byte LE seq] entries. freezeChunkBin uses the real -// txhash.WriteColdBin, so this is the producer's actual on-disk contract. -func TestStreamingBin_MatchesSpecFormat(t *testing.T) { - cat, _ := smallWindowCatalog(t, 4) - - e0 := txEntry{hashAt(11), seqIn(0, 5)} - e1 := txEntry{hashAt(12), seqIn(0, 9999)} - freezeChunkBin(t, cat, 0, []txEntry{e0, e1}) - - raw, err := os.ReadFile(cat.layout.TxHashBinPath(0)) - require.NoError(t, err) - - // §6.1: 8-byte header + N * 20-byte entries. - const ( - hdrSize = 8 - keyW = 16 // streamhash.MinKeySize - seqW = 4 - entryW = keyW + seqW // 20 bytes exactly - wantCount = 2 - ) - require.Equal(t, txhash.ColdKeySize, keyW, "spec pins the .bin key to 16 bytes") - require.Equal(t, streamhash.MinKeySize, keyW, "16-byte key == streamhash routing-key width") - require.Len(t, raw, hdrSize+wantCount*entryW, "header + 20-byte entries") - - count := binary.LittleEndian.Uint64(raw[:hdrSize]) - require.Equal(t, uint64(wantCount), count, "uint64-LE entry-count header") - - // Each entry: 16-byte truncated key, then a uint32-LE absolute seq. Entries - // are written sorted lex by key, so locate each by its known key prefix. - wantSeqByKey := map[[keyW]byte]uint32{} - for _, e := range []txEntry{e0, e1} { - var k [keyW]byte - copy(k[:], e.hash[:keyW]) - wantSeqByKey[k] = e.seq - } - for i := 0; i < wantCount; i++ { - off := hdrSize + i*entryW - var k [keyW]byte - copy(k[:], raw[off:off+keyW]) - gotSeq := binary.LittleEndian.Uint32(raw[off+keyW : off+entryW]) - require.Equal(t, wantSeqByKey[k], gotSeq, "entry %d: 16-byte key then uint32-LE seq", i) - } -} - -// TestStreamingIdx_MatchesSpecFormat asserts the .idx the streaming rebuild -// writes matches gettransaction §6.2 — the merged #728/#780 cold-index format — -// read back through the real streamhash reader and the cold metadata codec: -// 16-byte routing key, 3-byte payload (ledgerSeq - MinLedger), 1-byte -// fingerprint, [MinLedger, MaxLedger] in the user-metadata slot. -func TestStreamingIdx_MatchesSpecFormat(t *testing.T) { - // Pin the spec constants themselves (a config change that moved a width - // would break the bench-transferred figures, so fail here too). - require.Equal(t, 3, txhash.ColdPayloadSize, "§6.2: 3-byte payload at the default window") - require.Equal(t, 1, txhash.ColdFingerprintSize, "§6.2: 1-byte fingerprint default") - require.Equal(t, 16, txhash.ColdKeySize, "§6.1/§6.2: 16-byte routing key") - - cat, _ := smallWindowCatalog(t, 4) - cfg := testBuildConfig(cat) - - e0 := txEntry{hashAt(21), seqIn(0, 5)} - e1 := txEntry{hashAt(22), seqIn(1, 4242)} - freezeChunkBin(t, cat, 0, []txEntry{e0}) - freezeChunkBin(t, cat, 1, []txEntry{e1}) - - require.NoError(t, buildTxhashIndex(context.Background(), 0, 0, 1, cfg)) - frozen, ok, err := cat.FrozenCoverage(0) - require.NoError(t, err) - require.True(t, ok) - - idx, err := streamhash.OpenPayload(cat.layout.IndexFilePath(frozen)) - require.NoError(t, err) - t.Cleanup(func() { _ = idx.Close() }) - - // Payload, fingerprint, metadata as written by the build. - require.Equal(t, txhash.ColdPayloadSize, idx.PayloadSize(), "3-byte payload on disk") - require.Equal(t, txhash.ColdFingerprintSize, idx.Stats().FingerprintSize, "1-byte fingerprint on disk") - require.Equal(t, uint64(2), idx.NumKeys(), "one key per indexed transaction") - - gotMin, gotMax, err := txhash.ParseLedgerRange(idx.UserMetadata()) - require.NoError(t, err) - require.Equal(t, chunk.ID(0).FirstLedger(), gotMin, "MinLedger anchor = lo.FirstLedger") - require.Equal(t, chunk.ID(1).LastLedger(), gotMax, "MaxLedger = hi.LastLedger") - - // The 3-byte payload is the seq's offset from MinLedger, recovered as the - // absolute seq by the reader. - reader, err := txhash.OpenColdReader(cat.layout.IndexFilePath(frozen)) - require.NoError(t, err) - t.Cleanup(func() { _ = reader.Close() }) - for _, e := range []txEntry{e0, e1} { - got, gerr := reader.Get(e.hash) - require.NoError(t, gerr) - require.Equal(t, e.seq, got, "payload decodes to absolute seq (offset + MinLedger)") - } -} - -// --------------------------------------------------------------------------- -// Sizing: bytes-per-tx consistent with the design's Part-4 number. -// --------------------------------------------------------------------------- - -// TestColdIndexSizing_ConsistentWithPart4 asserts the .idx the streaming -// rebuild writes lands near the design's Part-4 ≈4.2 B/tx figure (PERF.md). The -// MPHF's per-key overhead has a fixed component that dominates at small key -// counts, so this is a small-N sanity band, not the asymptotic figure — at the -// dense full window (~3e9 keys) the bench harness measures ≈4.2 B/tx, and the -// width pins above guarantee the per-key payload+fingerprint contribution (4 B) -// is identical here. The band exists to catch a gross regression (e.g. a -// payload or fingerprint width change, or an MPHF parameter blow-up), not to -// re-measure the asymptote. -func TestColdIndexSizing_ConsistentWithPart4(t *testing.T) { - const nKeys = 20_000 - - cat, _ := smallWindowCatalog(t, 4) - cfg := testBuildConfig(cat) - - // Spread nKeys across chunks 0..2, each seq inside its chunk's range. - perChunk := nKeys / 3 - var n uint64 - for c := chunk.ID(0); c <= 2; c++ { - entries := make([]txEntry, 0, perChunk) - for i := 0; i < perChunk; i++ { - //nolint:gosec // small test offsets, well within the chunk - entries = append(entries, txEntry{hashAt(uint64(c)<<40 | uint64(i)), seqIn(c, uint32(i)+1)}) - } - freezeChunkBin(t, cat, c, entries) - n += uint64(len(entries)) - } - - require.NoError(t, buildTxhashIndex(context.Background(), 0, 0, 2, cfg)) - frozen, ok, err := cat.FrozenCoverage(0) - require.NoError(t, err) - require.True(t, ok) - - info, err := os.Stat(cat.layout.IndexFilePath(frozen)) - require.NoError(t, err) - bytesPerTx := float64(info.Size()) / float64(n) - t.Logf("cold .idx: %d bytes over %d keys = %.3f B/tx (design Part-4 asymptote ≈4.2 B/tx at the dense window)", info.Size(), n, bytesPerTx) - - // The per-key contribution is 4 B (3-byte payload + 1-byte fingerprint) plus - // the MPHF structure; at small N the fixed header + block overhead inflates - // B/tx, so allow a generous upper band and a hard floor (payload+fingerprint - // alone is 4 B, so anything <4 means a width regressed away). - require.GreaterOrEqual(t, bytesPerTx, 4.0, - "payload (3B) + fingerprint (1B) is an inviolable 4 B/tx floor") - require.LessOrEqual(t, bytesPerTx, 8.0, - "small-N .idx should stay within a small multiple of the ≈4.2 B/tx asymptote") -} diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/recovery.go b/cmd/stellar-rpc/internal/fullhistory/streaming/recovery.go index a76528cd7..e7cf594a3 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/recovery.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/recovery.go @@ -314,7 +314,8 @@ func RunSurgicalRecovery( // would be a programmer error. if cfg.Backfill.ChunksPerTxhashIndex == nil { return RecoveryPlan{}, errors.New( - "streaming: surgical recovery: chunks_per_txhash_index unresolved (WithDefaults not applied)") + "streaming: surgical recovery: chunks_per_txhash_index unresolved (WithDefaults not applied)", + ) } windows, err := NewWindows(*cfg.Backfill.ChunksPerTxhashIndex) if err != nil { diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/recovery_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/recovery_test.go index fa8cc350c..178e9c221 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/recovery_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/recovery_test.go @@ -35,6 +35,8 @@ func mustHotState(t *testing.T, cat *Catalog, c chunk.ID) HotState { } // mustIndexState reads one coverage key's State by re-scanning its window. +// +//nolint:unparam // window varies across recovery scenarios; the helper is general func mustIndexState(t *testing.T, cat *Catalog, w WindowID, lo, hi chunk.ID) State { t.Helper() v, ok, err := cat.Get(indexKey(w, lo, hi)) diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/txindex.go b/cmd/stellar-rpc/internal/fullhistory/streaming/txindex.go index 5c72602ab..6b799c465 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/txindex.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/txindex.go @@ -6,9 +6,8 @@ import ( "fmt" "os" - "github.com/stellar/streamhash" - supportlog "github.com/stellar/go-stellar-sdk/support/log" + "github.com/stellar/streamhash" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash" @@ -80,6 +79,8 @@ func (cfg BuildConfig) validate() error { // covered by the four-step ordering: a crash before step 4 leaves the // predecessor frozen and the new coverage as "freezing" debris; a crash after // leaves the new coverage frozen and the demoted keys as "pruning" sweep work. +// +//nolint:cyclop // the four-step build + crash-recovery branches are one unit func buildTxhashIndex(ctx context.Context, w WindowID, lo, hi chunk.ID, cfg BuildConfig) error { if err := cfg.validate(); err != nil { return err @@ -233,7 +234,8 @@ func (c *Catalog) txhashBinInputs(w WindowID, lo, hi chunk.ID) ([]string, error) if state != StateFrozen { return nil, fmt.Errorf( "streaming: buildTxhashIndex precondition violated: window %s chunk %s txhash is %q, want %q", - w, cid, state, StateFrozen) + w, cid, state, StateFrozen, + ) } inputs = append(inputs, c.layout.TxHashBinPath(cid)) if cid == hi { // guard against chunk.ID wraparound at the top of the range diff --git a/cmd/stellar-rpc/internal/fullhistory/streaming/txindex_test.go b/cmd/stellar-rpc/internal/fullhistory/streaming/txindex_test.go index ca971d413..6e0cf5029 100644 --- a/cmd/stellar-rpc/internal/fullhistory/streaming/txindex_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/streaming/txindex_test.go @@ -11,6 +11,8 @@ import ( "github.com/stretchr/testify/require" + "github.com/stellar/streamhash" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash" @@ -84,6 +86,8 @@ func seqIn(chunkID chunk.ID, offset uint32) uint32 { // assertCoverageQueryable opens the window's unique frozen coverage's .idx and // asserts every (hash, seq) resolves and an unseen hash misses. +// +//nolint:unparam // window varies; the helper is general func assertCoverageQueryable(t *testing.T, cat *Catalog, w WindowID, want []txEntry) { t.Helper() frozen, ok, err := cat.FrozenCoverage(w) @@ -389,7 +393,44 @@ func TestBuildCrashMatrix_AfterMarkBeforeCommit(t *testing.T) { require.NoError(t, err) require.Len(t, keys, 1, "exactly one coverage after recovery") require.Equal(t, indexKey(0, 0, 2), keys[0].Key) - assertCoverageQueryable(t, cat, 0, []txEntry{{hashAt(500), seqIn(0, 2)}, {hashAt(501), seqIn(1, 2)}, {hashAt(502), seqIn(2, 2)}}) + assertCoverageQueryable(t, cat, 0, []txEntry{ + {hashAt(500), seqIn(0, 2)}, {hashAt(501), seqIn(1, 2)}, {hashAt(502), seqIn(2, 2)}, + }) +} + +// TestBuildTxhashIndex_SameWindowKeyCollisionFailsLoud asserts that two distinct +// tx hashes whose first ColdKeySize bytes collide within one window abort the +// build LOUDLY (streamhash.ErrDuplicateKey) rather than silently dropping one. +// The cold .idx routes on exactly that 16-byte prefix (gettransaction §6.2), so a +// shared prefix maps two transactions to one slot; the build must reject it, not +// pick a winner. (gettransaction §6 "fingerprint + uniqueness" / issue #814.) +func TestBuildTxhashIndex_SameWindowKeyCollisionFailsLoud(t *testing.T) { + cat, _ := smallWindowCatalog(t, 1) // window 0 == chunk 0 + cfg := testBuildConfig(cat) + + // Two distinct full hashes sharing their first ColdKeySize bytes: the cold + // index keys on exactly that prefix, so they collide as one routing key. + a := hashAt(1) + b := a + b[txhash.ColdKeySize] ^= 0xFF // differ only AFTER the 16-byte routing prefix + require.Equal(t, a[:txhash.ColdKeySize], b[:txhash.ColdKeySize], "the routing prefixes collide") + require.NotEqual(t, a, b, "the full hashes are distinct") + + freezeChunkBin(t, cat, 0, []txEntry{ + {hash: a, seq: seqIn(0, 0)}, + {hash: b, seq: seqIn(0, 1)}, + }) + + err := buildTxhashIndex(context.Background(), 0, 0, 0, cfg) + require.Error(t, err, "a same-window prefix collision must fail the build") + require.ErrorIs(t, err, streamhash.ErrDuplicateKey, + "the collision surfaces as ErrDuplicateKey, never a silent drop") + + // And no frozen coverage was left: the window is unbuildable until the + // collision is resolved, never papered over with a half-built index. + _, ok, ferr := cat.FrozenCoverage(0) + require.NoError(t, ferr) + require.False(t, ok, "a failed build must leave no frozen coverage") } // Row "after step 4, before the eager sweep": the commit batch landed (new