diff --git a/cmd/stellar-rpc/internal/events/extract.go b/cmd/stellar-rpc/internal/events/extract.go index b7055c4de..1c1c1b865 100644 --- a/cmd/stellar-rpc/internal/events/extract.go +++ b/cmd/stellar-rpc/internal/events/extract.go @@ -34,7 +34,8 @@ import ( // (ingest.ExtractLedgerEvents — one TxProcessing walk yields hash + events // together). This function adds only the RPC-specific Payload shape, the // Stage→(TxIdx, OpIdx) cursor-sentinel mapping, EventIdx, and the cursor -// ordering. +// ordering — all in PayloadsFromLedgerEvents, over which this is the thin +// view-reading wrapper. func LCMViewToPayloads(lcm xdr.LedgerCloseMetaView) ([]Payload, error) { ledgerSeq, err := lcm.LedgerSequence() if err != nil { @@ -44,11 +45,26 @@ func LCMViewToPayloads(lcm xdr.LedgerCloseMetaView) ([]Payload, error) { if err != nil { return nil, err } - txEvents, err := ingest.ExtractLedgerEvents(lcm) if err != nil { return nil, err } + return PayloadsFromLedgerEvents(txEvents, ledgerSeq, ledgerClosedAt) +} + +// PayloadsFromLedgerEvents shapes an already-extracted per-transaction event +// slice (ingest.ExtractLedgerEvents output) into cursor-ordered Payloads. It is +// the body of LCMViewToPayloads minus the SDK walk, so a caller that already +// holds the txEvents — the hot ingest path, which also needs the paired tx +// hashes (txEvents[i].Hash) — can feed BOTH txhash and events from ONE +// ExtractLedgerEvents call instead of walking TxProcessing twice. ledgerSeq and +// ledgerClosedAt are the view's header values (cheap reads, not a walk). The +// cursor ordering and EventIdx assignment are IDENTICAL to what LCMViewToPayloads +// produced inline, so event IDs are unchanged across the refactor. +func PayloadsFromLedgerEvents( + txEvents []ingest.LedgerTransactionEvents, ledgerSeq uint32, ledgerClosedAt int64, +) ([]Payload, error) { + var err error at := func(i int) (uint32, xdr.Hash) { return uint32(i) + 1, xdr.Hash(txEvents[i].Hash) //nolint:gosec // 1-based, matching ingest reader's tx.Index } diff --git a/cmd/stellar-rpc/internal/fullhistory/backfill/hotsource_test.go b/cmd/stellar-rpc/internal/fullhistory/backfill/hotsource_test.go new file mode 100644 index 000000000..fc67d74b1 --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/backfill/hotsource_test.go @@ -0,0 +1,85 @@ +package backfill + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger" +) + +// seedReadyHotChunk brackets a "ready" hot DB for c (transient -> create -> ready) +// and commits ONE ledgers-CF entry at seq `top` so MaxCommittedSeq reads back +// `top`. It writes just the ledgers CF (the only CF the completeness gate reads) +// and closes the store — hygiene, not a lock requirement: a read-only open takes +// no RocksDB LOCK and would succeed against a writer-held DB too. The daemon opens +// this exact on-disk DB by its Layout path. +func seedReadyHotChunk(t *testing.T, cat *catalog.Catalog, c chunk.ID, top uint32) { + t.Helper() + require.NoError(t, cat.PutHotTransient(c)) + store, err := rocksdb.New(rocksdb.Config{ + Path: cat.Layout().HotChunkPath(c), + ColumnFamilies: hotchunk.ColumnFamilies(), + Logger: silentLogger(), + }) + require.NoError(t, err) + h := ledger.NewWithStore(store) + require.NoError(t, store.Batch(func(b *rocksdb.BatchWriter) error { + return h.AddLedgerToBatch(b, ledger.Entry{Seq: top, Bytes: []byte("ledger")}) + })) + require.NoError(t, store.Close()) + require.NoError(t, cat.FlipHotReady(c)) +} + +// TestBackfillSource_HotComplete: a "ready" hot DB whose committed frontier +// reaches the chunk's last ledger IS the source — backfillSource returns it with +// NO backend configured, so success alone proves the hot branch was taken. +func TestBackfillSource_HotComplete(t *testing.T) { + cat, _ := testCatalog(t) + cfg := testProcessConfig(t, cat) // no Backend + + c := chunk.ID(0) + seedReadyHotChunk(t, cat, c, c.LastLedger()) // complete: maxSeq == last ledger + + src, closeSrc, err := backfillSource(context.Background(), c, catalog.AllArtifacts(), cfg) + require.NoError(t, err, "complete hot tier is used; no bulk backend needed") + require.NotNil(t, src) + require.NoError(t, closeSrc()) +} + +// TestBackfillSource_HotIncompleteFallsThrough: a "ready" but incomplete hot DB is +// staleness — backfillSource falls past it. With no pack and no backend, that +// fall-through surfaces as the "no bulk backend" error (not a hot-tier error). +func TestBackfillSource_HotIncompleteFallsThrough(t *testing.T) { + cat, _ := testCatalog(t) + cfg := testProcessConfig(t, cat) // no Backend, no frozen pack + + c := chunk.ID(0) + seedReadyHotChunk(t, cat, c, c.FirstLedger()) // incomplete: maxSeq < last ledger + + _, _, err := backfillSource(context.Background(), c, catalog.AllArtifacts(), cfg) + require.Error(t, err) + require.Contains(t, err.Error(), "no bulk backend", + "an incomplete hot tier falls through; it is not itself an error") +} + +// TestBackfillSource_HotReadyButDirMissing: a "ready" key whose hot DB won't open +// (dir gone) is an ordinary restartable error — the read-only open never +// auto-heals it into a fresh empty DB. +func TestBackfillSource_HotReadyButDirMissing(t *testing.T) { + cat, _ := testCatalog(t) + cfg := testProcessConfig(t, cat) + + c := chunk.ID(0) + require.NoError(t, cat.PutHotTransient(c)) + require.NoError(t, cat.FlipHotReady(c)) // ready key, NO dir on disk + + _, _, err := backfillSource(context.Background(), c, catalog.AllArtifacts(), cfg) + require.Error(t, err) + require.Contains(t, err.Error(), "won't open") +} diff --git a/cmd/stellar-rpc/internal/fullhistory/backfill/process.go b/cmd/stellar-rpc/internal/fullhistory/backfill/process.go index d688737bc..3e059b688 100644 --- a/cmd/stellar-rpc/internal/fullhistory/backfill/process.go +++ b/cmd/stellar-rpc/internal/fullhistory/backfill/process.go @@ -17,6 +17,7 @@ import ( "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/ingest" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger" ) @@ -84,11 +85,12 @@ func processChunk(ctx context.Context, chunkID chunk.ID, artifacts catalog.Artif // Choose the source before marking "freezing": a source error (a missing pack // or a coverage timeout) must not leave "freezing" debris for a chunk we then - // refuse to produce. - src, err := backfillSource(ctx, chunkID, artifacts, cfg) + // refuse to produce. closeSource releases any opened hot DB after the pass. + src, closeSource, err := backfillSource(ctx, chunkID, artifacts, cfg) if err != nil { return err } + defer func() { _ = closeSource() }() // The one-write protocol, straight-line (see catalog_protocol.go header). The // // one-write: labels keep the four steps greppable without a wrapper. @@ -101,9 +103,9 @@ func processChunk(ctx context.Context, chunkID chunk.ID, artifacts catalog.Artif // one-write:create — materialize this chunk's cold artifacts from the resolved // source's raw ledger iterator. WriteColdChunk is source-blind. dirs := ingest.ColdDirs{ - Ledgers: layout.LedgersRoot(), - Txhash: layout.TxHashRawRoot(), - Events: layout.EventsRoot(), + LedgerPack: layout.LedgerPackPath(chunkID), + TxhashBin: layout.TxHashBinPath(chunkID), + EventsDir: layout.EventsBucketDir(chunkID), } raw := src.RawLedgers(ctx, ledgerbackend.BoundedRange(chunkID.FirstLedger(), chunkID.LastLedger())) if rerr := ingest.WriteColdChunk( @@ -130,37 +132,53 @@ func processChunk(ctx context.Context, chunkID chunk.ID, artifacts catalog.Artif return nil } -// backfillSource picks a chunk's ledger source as a bare ledgerbackend.LedgerStream: -// 1. the frozen local .pack, unless ledgers is itself requested (circular); -// 2. the bulk backend (cfg.Backend), gated by a bounded waitForCoverage on its Tip. -// -// The local pack needs no coverage wait (it is complete) and no close (its reader -// is opened and closed per RawLedgers call). The bulk backend is caller-owned (the -// daemon Closes it), so backfillSource returns no closer either. +// backfillSource picks a chunk's ledger source (+ a closer for an opened hot DB; +// no-op otherwise), in preference order: +// 1. a ready, COMPLETE hot tier (decision (a): maxCommittedSeq >= last ledger); +// incomplete-but-present is staleness that falls through (re-derivation +// recovers it); a "ready" DB that won't open is an ordinary restartable error +// (read-only open, never auto-healed); +// 2. the frozen local .pack, unless ledgers is itself requested (circular); +// 3. the bulk backend, gated by a bounded waitForCoverage on its Tip. func backfillSource( ctx context.Context, chunkID chunk.ID, artifacts catalog.ArtifactSet, cfg ProcessConfig, -) (ledgerbackend.LedgerStream, error) { +) (ledgerbackend.LedgerStream, func() error, error) { + noClose := func() error { return nil } cat := cfg.Catalog layout := cat.Layout() + // (1) Hot branch: only when the hot key is "ready". A "transient" key (mid-op + // or recovery-demoted) is not a read source; an absent key falls through. + src, closer, used, herr := resolveHotSource(chunkID, cfg) + if herr != nil { + return nil, noClose, herr // hot-DB open failure — restartable, never auto-healed + } + if used { + cfg.Logger.Debugf("backfillSource: chunk %s from complete hot tier", chunkID) + return src, closer, nil + } + + // (2) Frozen local .pack, only when ledgers is not requested (producing ledgers + // from the pack we'd write would be circular). ledgersState, err := cat.State(chunkID, geometry.KindLedgers) if err != nil { - return nil, fmt.Errorf("read ledgers state chunk %s: %w", chunkID, err) + return nil, noClose, fmt.Errorf("read ledgers state chunk %s: %w", chunkID, err) } if ledgersState == geometry.StateFrozen && !artifacts.Has(geometry.KindLedgers) { packPath := layout.LedgerPackPath(chunkID) if _, serr := os.Stat(packPath); serr == nil { cfg.Logger.Debugf("backfillSource: chunk %s re-derived from frozen .pack", chunkID) - return ledger.NewPackStream(packPath), nil + return ledger.NewPackStream(packPath), noClose, nil } // frozen ⇒ file exists; a missing pack is a bug, not a re-download trigger. - return nil, fmt.Errorf( + return nil, noClose, fmt.Errorf( "chunk %s ledgers is %q but pack file is missing at %s", chunkID, geometry.StateFrozen, packPath) } + // (3) Bulk backend — the only source for a chunk with no local copy. if cfg.Backend == nil { - return nil, fmt.Errorf( + return nil, noClose, fmt.Errorf( "chunk %s has no local copy and no bulk backend is configured", chunkID) } // The coverage wait is mandatory before reading the bulk backend: the freeze @@ -169,8 +187,59 @@ func backfillSource( if werr := waitForCoverage( ctx, cfg.Backend, chunkID.LastLedger(), defaultCoveragePollInterval, defaultCoverageTimeout, ); werr != nil { - return nil, werr + return nil, noClose, werr } cfg.Logger.Debugf("backfillSource: chunk %s from bulk backend", chunkID) - return cfg.Backend, nil + return cfg.Backend, noClose, nil +} + +// resolveHotSource applies the hot branch end to end: it reads the hot key and, +// only when "ready", tries the hot tier. used=true → src/closer are the hot +// source; used=false → no "ready" key or present-but-incomplete (caller falls +// through); err → a "ready" DB that won't open (restartable). Keeps backfillSource's +// hot branch flat. +func resolveHotSource( + chunkID chunk.ID, cfg ProcessConfig, +) (ledgerbackend.LedgerStream, func() error, bool, error) { + hotState, err := cfg.Catalog.HotState(chunkID) + if err != nil { + return nil, nil, false, fmt.Errorf("read hot state chunk %s: %w", chunkID, err) + } + if hotState != geometry.HotReady { + return nil, nil, false, nil // "transient"/absent: not a read source + } + return tryHotSource(chunkID, cfg) +} + +// tryHotSource handles the hot branch under a "ready" key: it opens the chunk's +// shared hot DB read-only (never auto-healed) straight from its Layout path. +// used=true when present AND complete; used=false when present-but-incomplete +// (staleness, caller falls through); err when a "ready" DB is absent or unopenable +// — an ordinary restartable error, detected lazily on the open. +func tryHotSource(chunkID chunk.ID, cfg ProcessConfig) (ledgerbackend.LedgerStream, func() error, bool, error) { + dir := cfg.Catalog.Layout().HotChunkPath(chunkID) + // Open the chunk's shared multi-CF DB READ-ONLY: the freeze reads its ledgers to + // re-derive the cold artifacts and must never mutate it (the read-only open + // replays any un-synced WAL into memtables but persists nothing). An absent or + // gutted "ready" DB fails the open — restartable, never auto-created. + hot, err := hotchunk.OpenReadOnly(dir, chunkID, cfg.Logger) + if err != nil { + return nil, nil, false, fmt.Errorf("chunk %s is ready but its hot DB won't open: %w", chunkID, err) + } + maxSeq, present, merr := hot.MaxCommittedSeq() + if merr != nil { + _ = hot.Close() + // A read error against an opened DB: the DB opened but cannot answer its + // own progress. Surface it (restartable), don't treat as staleness. + return nil, nil, false, fmt.Errorf("chunk %s: read hot max committed seq: %w", chunkID, merr) + } + // decision (a): complete iff the single DB's maxCommittedSeq reaches the chunk's + // last ledger. An empty DB (present==false) cannot be complete. + if present && maxSeq >= chunkID.LastLedger() { + return hot.Source(), hot.Close, true, nil + } + // Present but incomplete: legitimate staleness — caller falls through. + cfg.Logger.Debugf("backfillSource: chunk %s hot tier present but incomplete; falling through", chunkID) + _ = hot.Close() + return nil, nil, false, nil } diff --git a/cmd/stellar-rpc/internal/fullhistory/backfill/process_test.go b/cmd/stellar-rpc/internal/fullhistory/backfill/process_test.go index 78cf6540c..5c917c005 100644 --- a/cmd/stellar-rpc/internal/fullhistory/backfill/process_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/backfill/process_test.go @@ -330,8 +330,9 @@ func TestBackfillSource_PrefersFrozenPackWhenLFSNotRequested(t *testing.T) { cfg.Backend = bulk set := catalog.NewArtifactSet(geometry.KindEvents, geometry.KindTxHash) // ledgers NOT requested - src, err := backfillSource(context.Background(), chunkID, set, cfg) + src, closeSrc, err := backfillSource(context.Background(), chunkID, set, cfg) require.NoError(t, err) + defer func() { require.NoError(t, closeSrc()) }() // It is a pack stream (re-derivation without download); the bulk backend was // not consulted. require.IsType(t, ledger.NewPackStream(""), src) @@ -354,8 +355,9 @@ func TestBackfillSource_DoesNotUsePackWhenLFSRequested(t *testing.T) { // ledgers IS requested — the pack branch is skipped (circular), so it goes to // the bulk backend (whose tip covers the chunk, so the wait passes). - src, err := backfillSource(context.Background(), chunkID, catalog.AllArtifacts(), cfg) + src, closeSrc, err := backfillSource(context.Background(), chunkID, catalog.AllArtifacts(), cfg) require.NoError(t, err) + defer func() { require.NoError(t, closeSrc()) }() require.Same(t, bulk, src) } @@ -369,7 +371,7 @@ func TestBackfillSource_BulkCoverageErrorAborts(t *testing.T) { chunkID := chunk.ID(0) cfg.Backend = &fakeBackend{t: t, gen: zeroTxLCMBytes, tipErr: errors.New("boom")} - _, err := backfillSource(context.Background(), chunkID, catalog.AllArtifacts(), cfg) + _, _, err := backfillSource(context.Background(), chunkID, catalog.AllArtifacts(), cfg) require.Error(t, err) require.Contains(t, err.Error(), "backend tip query") } @@ -379,7 +381,7 @@ func TestBackfillSource_NoBackendConfigured(t *testing.T) { cfg := testProcessConfig(t, cat) cfg.Backend = nil - _, err := backfillSource(context.Background(), chunk.ID(0), catalog.AllArtifacts(), cfg) + _, _, err := backfillSource(context.Background(), chunk.ID(0), catalog.AllArtifacts(), cfg) require.Error(t, err) require.Contains(t, err.Error(), "no bulk backend") } @@ -453,7 +455,7 @@ func writeRealPack(t *testing.T, cat *catalog.Catalog, chunkID chunk.ID) { stream := &fullChunkStream{t: t, gen: zeroTxLCMBytes} raw := stream.RawLedgers(context.Background(), ledgerbackend.BoundedRange(chunkID.FirstLedger(), chunkID.LastLedger())) - dirs := ingest.ColdDirs{Ledgers: cat.Layout().LedgersRoot()} + dirs := ingest.ColdDirs{LedgerPack: cat.Layout().LedgerPackPath(chunkID)} require.NoError(t, ingest.WriteColdChunk( context.Background(), silentLogger(), chunkID, raw, dirs, ingest.NopSink{}, ingest.Config{Ledgers: true})) diff --git a/cmd/stellar-rpc/internal/fullhistory/backfill/recorder_test.go b/cmd/stellar-rpc/internal/fullhistory/backfill/recorder_test.go index 23d440123..74261ef90 100644 --- a/cmd/stellar-rpc/internal/fullhistory/backfill/recorder_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/backfill/recorder_test.go @@ -41,7 +41,11 @@ func (r *recordingMetrics) Prune(count int, d time.Duration) { r.prune = append(r.prune, pruneRec{count, d}) } -func (*recordingMetrics) LastCommitted(uint32, uint32) {} -func (*recordingMetrics) BackfillPass(time.Duration) {} +func (*recordingMetrics) LastCommitted(uint32) {} +func (*recordingMetrics) RetentionFloor(uint32) {} +func (*recordingMetrics) ChunkBoundary() {} +func (*recordingMetrics) BackfillPass(time.Duration) {} +func (*recordingMetrics) LiveHotChunks(int) {} +func (*recordingMetrics) Discard(int, time.Duration) {} var _ observability.Metrics = (*recordingMetrics)(nil) diff --git a/cmd/stellar-rpc/internal/fullhistory/backfill/resolve.go b/cmd/stellar-rpc/internal/fullhistory/backfill/resolve.go index 8ea961990..6814e66c0 100644 --- a/cmd/stellar-rpc/internal/fullhistory/backfill/resolve.go +++ b/cmd/stellar-rpc/internal/fullhistory/backfill/resolve.go @@ -30,11 +30,6 @@ type coverageRange struct { Lo, Hi chunk.ID } -// covers reports whether this range fully contains other (other ⊆ this). -func (r coverageRange) covers(other coverageRange) bool { - return r.Lo <= other.Lo && r.Hi >= other.Hi -} - // resolve diffs the desired state (every artifact of [rangeStart, rangeEnd] durable) // against the catalog, emitting a Plan. A pure read — recomputes from durable keys // every run, so a restart re-plans cleanly. @@ -98,12 +93,11 @@ func resolveTxHashIndex( Hi: min(txLayout.LastChunk(w), rangeEnd), // capped by range end } - frozen, hasFrozen, err := cat.FrozenTxHashIndex(w) + covered, err := cat.FrozenIndexCoversRange(w, desired.Lo, desired.Hi) if err != nil { return IndexBuild{}, false, err } - stored := coverageRange{Lo: frozen.Lo, Hi: frozen.Hi} - if hasFrozen && stored.covers(desired) { + if covered { // Frozen coverage already spans desired, so no rebuild is due — steady state, a // risen floor, or a finalized window. Any non-frozen leftover a crashed build // stranded (a superseded "pruning"/"freezing" coverage or a demoted .bin) is the diff --git a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog.go b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog.go index cd63ac64c..bbdf9f27f 100644 --- a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog.go +++ b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog.go @@ -3,12 +3,13 @@ package catalog import ( "errors" "fmt" + "slices" "strconv" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/metastore" - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" ) // Catalog is the streaming daemon's view of durable state. It WRAPS @@ -55,6 +56,20 @@ func (c *Catalog) State(chunkID chunk.ID, kind geometry.Kind) (geometry.State, e return geometry.State(v), nil } +// HotState returns the HotState of a chunk's hot-DB key, or empty (key absent). +// The key's mere existence (any value) marks the chunk as owned by ingestion, and +// most consumers branch on the value: the freeze source and last-committed +// derivation treat only "ready" as usable (see ReadyHotChunkKeys), and +// openHotDBForChunk picks its recovery action from it. Only the discard scan is +// value-blind (any state means "a hot dir may exist, sweep it"). +func (c *Catalog) HotState(chunkID chunk.ID) (geometry.HotState, error) { + v, ok, err := c.get(geometry.HotChunkKey(chunkID)) + if err != nil || !ok { + return "", err + } + return geometry.HotState(v), nil +} + // --------------------------------------------------------------------------- // Scans. Every "find work" operation iterates keys via PrefixScan; nothing // lists a directory. Results are returned sorted so callers need no second @@ -84,6 +99,19 @@ func (c *Catalog) TxHashIndexKeys(w geometry.TxHashIndexID) ([]geometry.TxHashIn return c.txhashIndexKeysByPrefix(geometry.TxHashIndexPrefixFor(w)) } +// HotChunkKeys returns every hot-DB chunk id (value-blind), sorted ascending. +// The highest is the live chunk — the ingestion/lifecycle partition boundary. +func (c *Catalog) HotChunkKeys() ([]chunk.ID, error) { + return c.hotChunkKeysWith(nil) +} + +// ReadyHotChunkKeys returns only the chunks whose hot-DB key is "ready", sorted +// ascending. The last-committed ledger counts only these — a "transient" key never advances +// the bound, which lets recovery demote any hot key without disturbing it. +func (c *Catalog) ReadyHotChunkKeys() ([]chunk.ID, error) { + return c.hotChunkKeysWith(func(s geometry.HotState) bool { return s == geometry.HotReady }) +} + // AllTxHashIndexKeys is TxHashIndexKeys across all indexes. func (c *Catalog) AllTxHashIndexKeys() ([]geometry.TxHashIndexCoverage, error) { return c.txhashIndexKeysByPrefix(geometry.TxHashIndexPrefix) @@ -118,6 +146,29 @@ func (c *Catalog) FrozenTxHashIndex(w geometry.TxHashIndexID) (geometry.TxHashIn return frozen, found, nil } +// FrozenIndexCoversRange reports whether index w's UNIQUE frozen coverage spans +// the whole inclusive [lo, hi] chunk range. It reads through FrozenTxHashIndex, +// so INV-2 (at most one frozen coverage per index) is asserted on every call. +// This is the single "covered by a frozen index" predicate the resolve diff +// (backfill), the discard eligibility scan, and the watermark derivation all +// share, so they can never disagree about the same catalog snapshot. Reports +// false (no error) when the index has no frozen coverage yet. +func (c *Catalog) FrozenIndexCoversRange(w geometry.TxHashIndexID, lo, hi chunk.ID) (bool, error) { + frozen, ok, err := c.FrozenTxHashIndex(w) + if err != nil { + return false, err + } + return ok && frozen.Lo <= lo && hi <= frozen.Hi, nil +} + +// FrozenIndexCovers reports whether chunk ch's OWN index window has a frozen +// coverage containing it. A chunk belongs to exactly one window, so its own +// window is the only one that can cover it — the degenerate single-chunk case of +// FrozenIndexCoversRange. +func (c *Catalog) FrozenIndexCovers(ch chunk.ID) (bool, error) { + return c.FrozenIndexCoversRange(c.txhashIndex.TxHashIndexID(ch), ch, ch) +} + // --------------------------------------------------------------------------- // Config pins. Written once on first start, immutable thereafter. // --------------------------------------------------------------------------- @@ -171,6 +222,28 @@ func (c *Catalog) has(key string) (bool, error) { return ok, err } +// hotChunkKeysWith returns the chunks whose hot-DB key matches keep, sorted +// ascending. A nil keep matches every value (value-blind). +func (c *Catalog) hotChunkKeysWith(keep func(geometry.HotState) bool) ([]chunk.ID, error) { + var ids []chunk.ID + for e, err := range c.store.PrefixScan(geometry.HotChunkPrefix) { + if err != nil { + return nil, err + } + id, ok := geometry.ParseHotChunkKey(e.Key) + if !ok { + return nil, fmt.Errorf("malformed hot key %q", e.Key) + } + if keep == nil || keep(geometry.HotState(e.Value)) { + ids = append(ids, id) + } + } + // PrefixScan yields byte-lex order == numeric under the 8-digit padding, so + // the slice is already ascending; sort defensively against a width change. + slices.Sort(ids) + return ids, nil +} + // txhashIndexKeysByPrefix scans coverage keys under prefix, attaching each scanned // value as State. func (c *Catalog) txhashIndexKeysByPrefix(prefix string) ([]geometry.TxHashIndexCoverage, error) { diff --git a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_protocol.go b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_protocol.go index 31d7e0f86..85d3f44dd 100644 --- a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_protocol.go +++ b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_protocol.go @@ -3,9 +3,9 @@ package catalog import ( "errors" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/metastore" - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" ) // The one write protocol — mark-then-write. Every durable artifact (per-chunk @@ -151,3 +151,24 @@ func (c *Catalog) txhashIndexChunkKeysPresent(lo, hi chunk.ID) ([]string, error) } return keys, nil } + +// --- Hot-DB key bracket: the file protocol's transient/ready bracket applied to +// the chunk's hot directory. --- + +// PutHotTransient marks a hot-DB key "transient" — the open end, written before +// the dir is created or a discard begins removing it. A crash mid-operation is +// detectable from this value alone. +func (c *Catalog) PutHotTransient(chunkID chunk.ID) error { + return c.store.Put(geometry.HotChunkKey(chunkID), string(geometry.HotTransient)) +} + +// FlipHotReady marks a hot-DB key "ready" (dir exists and usable). The caller +// MUST have fsynced the dir (and its parent on creation) first. +func (c *Catalog) FlipHotReady(chunkID chunk.ID) error { + return c.store.Put(geometry.HotChunkKey(chunkID), string(geometry.HotReady)) +} + +// DeleteHotKey removes a hot-DB key — the close end, after rmdir. Idempotent. +func (c *Catalog) DeleteHotKey(chunkID chunk.ID) error { + return c.store.Delete(geometry.HotChunkKey(chunkID)) +} diff --git a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_protocol_test.go b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_protocol_test.go index 2bb384a34..f70e9e536 100644 --- a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_protocol_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_protocol_test.go @@ -5,8 +5,8 @@ import ( "github.com/stretchr/testify/require" - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" ) // --------------------------------------------------------------------------- @@ -43,6 +43,29 @@ func TestCommitIndexPromoteAndDemote(t *testing.T) { require.Equal(t, geometry.StateFrozen, states[geometry.TxHashIndexKey(5, 5100, 5350)]) } +// TestFrozenIndexCoversRange_AssertsUniqueness pins that the shared "covered by a +// frozen index" predicate (#37) propagates the INV-2 assertion FrozenTxHashIndex +// makes: two frozen coverages in one window must make EVERY read error, so +// watermark derivation (progress), discard eligibility, and the resolve diff can +// never disagree — one silently tolerating the duplicate while another aborts. +func TestFrozenIndexCoversRange_AssertsUniqueness(t *testing.T) { + cat, _ := testCatalog(t) + + // Plant two frozen coverages in window 5, bypassing the promote/demote commit + // path (which never leaves two frozen) to stage the corrupt snapshot directly. + require.NoError(t, cat.store.Put(geometry.TxHashIndexKey(5, 5100, 5349), string(geometry.StateFrozen))) + require.NoError(t, cat.store.Put(geometry.TxHashIndexKey(5, 5100, 5350), string(geometry.StateFrozen))) + + _, rangeErr := cat.FrozenIndexCoversRange(5, 5100, 5349) + require.Error(t, rangeErr, "the range predicate must surface the uniqueness violation") + require.Contains(t, rangeErr.Error(), "two frozen coverages") + + // The per-chunk convenience form resolves a chunk to its window and inherits + // the same assertion. + _, chunkErr := cat.FrozenIndexCovers(5100) + require.Error(t, chunkErr, "the per-chunk predicate inherits the uniqueness assertion") +} + func TestCommitIndexTerminalDemotesTxhashKeys(t *testing.T) { cat, _ := testCatalog(t) diff --git a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_sweep.go b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_sweep.go index 9eb1825e0..02f1f4bbb 100644 --- a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_sweep.go +++ b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_sweep.go @@ -1,8 +1,13 @@ package catalog import ( - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/metastore" + "fmt" + "os" + "path/filepath" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/metastore" ) // Key-driven sweeps — the ONLY two deletion bodies in the system, one per key @@ -85,3 +90,35 @@ func (c *Catalog) SweepTxHashIndexKey(cov geometry.TxHashIndexCoverage) error { geometry.RmdirIfEmpty(dir) // best-effort; an empty dir is not an artifact return nil } + +// DiscardHotChunk retires a chunk's hot DB once its cold artifacts are durable +// (or it fell past retention), following the same crash order as the two sweeps +// above: mark "transient" -> rmdir -> fsync(parent) -> delete key. The key +// outlives the durable rmdir, so a crash anywhere leaves the key "transient" for +// the next scan to finish — idempotent, and an absent key is a no-op. The caller +// MUST have closed the chunk's hot write handle (discard runs after the freeze). +func (c *Catalog) DiscardHotChunk(chunkID chunk.ID) error { + state, err := c.HotState(chunkID) + if err != nil { + return fmt.Errorf("read hot key chunk %s: %w", chunkID, err) + } + if state == "" { + return nil + } + if err := c.PutHotTransient(chunkID); err != nil { + return fmt.Errorf("mark hot transient chunk %s: %w", chunkID, err) + } + dir := c.layout.HotChunkPath(chunkID) + if err := os.RemoveAll(dir); err != nil { + return fmt.Errorf("rmdir hot dir %s: %w", dir, err) + } + // rmdir durable BEFORE the key delete: the key outlives the dir, so a crash + // re-runs the discard rather than leaving a key-less dir. + if err := geometry.FsyncDir(filepath.Dir(dir)); err != nil { + return fmt.Errorf("fsync hot parent dir %s: %w", filepath.Dir(dir), err) + } + if err := c.DeleteHotKey(chunkID); err != nil { + return fmt.Errorf("delete hot key chunk %s: %w", chunkID, err) + } + return nil +} diff --git a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_sweep_test.go b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_sweep_test.go index 2a287f61b..762e48ba8 100644 --- a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_sweep_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_sweep_test.go @@ -1,11 +1,13 @@ package catalog import ( + "os" "testing" "github.com/stretchr/testify/require" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" ) // --------------------------------------------------------------------------- @@ -104,3 +106,32 @@ func TestSweepEmptyRefsNoop(t *testing.T) { cat, _ := testCatalog(t) require.NoError(t, cat.SweepChunkArtifacts(nil)) } + +// TestDiscardHotChunkResumesTransient mirrors the sweep siblings' crash-resume +// coverage for the hot-DB discard: a "transient" key (a discard that crashed after +// marking transient but before deleting the key) plus a leftover dir must be +// finished by the next DiscardHotChunk — the dir removed and the key deleted. +func TestDiscardHotChunkResumesTransient(t *testing.T) { + cat, _ := testCatalog(t) + c := chunk.ID(4) + + // The mid-discard crash state: a "transient" key + a real leftover dir. + require.NoError(t, cat.PutHotTransient(c)) + dir := cat.layout.HotChunkPath(c) + require.NoError(t, os.MkdirAll(dir, 0o755)) + + require.NoError(t, cat.DiscardHotChunk(c)) + + // The resume completed it: key gone, dir gone. + state, err := cat.HotState(c) + require.NoError(t, err) + require.Equal(t, geometry.HotState(""), state, "transient key finished") + require.NoDirExists(t, dir, "leftover hot dir swept") +} + +// TestDiscardHotChunkAbsentKeyNoop: an absent hot key is a clean no-op (nothing +// to finish). +func TestDiscardHotChunkAbsentKeyNoop(t *testing.T) { + cat, _ := testCatalog(t) + require.NoError(t, cat.DiscardHotChunk(chunk.ID(9))) +} diff --git a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_test.go b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_test.go index 3f3ccaeab..c9b986eb9 100644 --- a/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/catalog/catalog_test.go @@ -5,8 +5,8 @@ import ( "github.com/stretchr/testify/require" - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" ) // PinEarliestLedger writes the sole config pin; EarliestLedger reads it back. @@ -27,6 +27,27 @@ func TestConfigPins(t *testing.T) { require.Equal(t, uint32(2), el) } +// --------------------------------------------------------------------------- +// Scans: HotChunkKeys (value-blind) vs ReadyHotChunkKeys (ready-only). +// --------------------------------------------------------------------------- + +func TestHotChunkKeysValueBlindVsReadyOnly(t *testing.T) { + cat, _ := testCatalog(t) + + require.NoError(t, cat.PutHotTransient(3)) + require.NoError(t, cat.FlipHotReady(5)) + require.NoError(t, cat.PutHotTransient(9)) + require.NoError(t, cat.FlipHotReady(12)) + + all, err := cat.HotChunkKeys() + require.NoError(t, err) + require.Equal(t, []chunk.ID{3, 5, 9, 12}, all, "value-blind: every hot key") + + ready, err := cat.ReadyHotChunkKeys() + require.NoError(t, err) + require.Equal(t, []chunk.ID{5, 12}, ready, "ready-only excludes transient") +} + func TestChunkArtifactKeys(t *testing.T) { cat, _ := testCatalog(t) diff --git a/cmd/stellar-rpc/internal/fullhistory/catalog/helpers_test.go b/cmd/stellar-rpc/internal/fullhistory/catalog/helpers_test.go index 29906238b..3f34f3291 100644 --- a/cmd/stellar-rpc/internal/fullhistory/catalog/helpers_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/catalog/helpers_test.go @@ -13,9 +13,9 @@ import ( supportlog "github.com/stellar/go-stellar-sdk/support/log" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/metastore" - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" ) func silentLogger() *supportlog.Entry { diff --git a/cmd/stellar-rpc/internal/fullhistory/catalog/keys_roundtrip_test.go b/cmd/stellar-rpc/internal/fullhistory/catalog/keys_roundtrip_test.go index 03c16009f..3669aac91 100644 --- a/cmd/stellar-rpc/internal/fullhistory/catalog/keys_roundtrip_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/catalog/keys_roundtrip_test.go @@ -5,8 +5,8 @@ import ( "github.com/stretchr/testify/require" - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" ) // --------------------------------------------------------------------------- diff --git a/cmd/stellar-rpc/internal/fullhistory/config.go b/cmd/stellar-rpc/internal/fullhistory/config.go index 6ad1e2c16..3f30a5842 100644 --- a/cmd/stellar-rpc/internal/fullhistory/config.go +++ b/cmd/stellar-rpc/internal/fullhistory/config.go @@ -4,7 +4,6 @@ import ( "bytes" "fmt" "os" - "path/filepath" "runtime" "github.com/pelletier/go-toml" @@ -88,10 +87,27 @@ type BackfillConfig struct { BSB ledgerbackend.BufferedStorageBackendConfig `toml:"bsb"` } -// IngestionConfig is [ingestion] — the live-network ingestion settings. +// IngestionConfig is [ingestion] — the live-network ingestion (captive-core) +// settings. The captive-core config FILE is the single source of truth for what +// it can hold (notably NETWORK_PASSPHRASE, read back at startup); the remaining +// keys are the things that don't live in that file — the plain history-archive +// URLs (the file's [HISTORY.*] entries are shell commands, not the URLs the SDK's +// archive client needs), and, optionally, the stellar-core binary path and the +// captive-core storage directory. type IngestionConfig struct { - // Path to the CaptiveStellarCore config file. Required. + // CaptiveCoreConfig is the path to the CaptiveStellarCore (stellar-core) config + // file. Required for live ingestion. Must define NETWORK_PASSPHRASE. CaptiveCoreConfig string `toml:"captive_core_config"` + // HistoryArchiveURLs are the plain history-archive URLs the SDK reads + // checkpoints from. Required for live ingestion (not derivable from the + // captive-core file's [HISTORY.*] get-commands). + HistoryArchiveURLs []string `toml:"history_archive_urls"` + // StellarCoreBinaryPath is the path to the stellar-core binary. Optional — + // defaults to the "stellar-core" found on PATH. + StellarCoreBinaryPath string `toml:"stellar_core_binary_path"` + // CaptiveCoreStoragePath is captive core's BUCKET_DIR_PATH base; optional, + // defaults to {default_data_dir}/captive-core. + CaptiveCoreStoragePath string `toml:"captive_core_storage_path"` } // LoggingConfig is [logging]. @@ -187,23 +203,27 @@ type Paths struct { // ResolvePaths fills every storage path, defaulting under default_data_dir. // Relative overrides are kept relative (resolved against the caller's working -// dir); only the defaults are joined to the data dir. +// dir); only the defaults are joined to the data dir. The default tree is spelled +// ONCE, by geometry.NewLayout — production flows through here and every package's +// test helpers through NewLayout, so a rename to the tree can't leave the two +// disagreeing. func (cfg Config) ResolvePaths() Paths { dataDir := cfg.Service.DefaultDataDir - pick := func(override, def string) string { + def := geometry.NewLayout(dataDir) + pick := func(override, defPath string) string { if override != "" { return override } - return def + return defPath } return Paths{ DataDir: dataDir, - Catalog: pick(cfg.Storage.Catalog, filepath.Join(dataDir, "catalog", "rocksdb")), - Ledgers: pick(cfg.Storage.Ledgers, filepath.Join(dataDir, "ledgers")), - Events: pick(cfg.Storage.Events, filepath.Join(dataDir, "events")), - TxhashRaw: pick(cfg.Storage.TxhashRaw, filepath.Join(dataDir, "txhash", "raw")), - TxhashIndex: pick(cfg.Storage.TxhashIndex, filepath.Join(dataDir, "txhash", "index")), - HotStorage: pick(cfg.Storage.Hot, filepath.Join(dataDir, "hot")), + Catalog: pick(cfg.Storage.Catalog, def.CatalogPath()), + Ledgers: pick(cfg.Storage.Ledgers, def.LedgersRoot()), + Events: pick(cfg.Storage.Events, def.EventsRoot()), + TxhashRaw: pick(cfg.Storage.TxhashRaw, def.TxHashRawRoot()), + TxhashIndex: pick(cfg.Storage.TxhashIndex, def.TxHashIndexRoot()), + HotStorage: pick(cfg.Storage.Hot, def.HotRoot()), } } diff --git a/cmd/stellar-rpc/internal/fullhistory/daemon.go b/cmd/stellar-rpc/internal/fullhistory/daemon.go index 8b0dbc059..1e5937bb1 100644 --- a/cmd/stellar-rpc/internal/fullhistory/daemon.go +++ b/cmd/stellar-rpc/internal/fullhistory/daemon.go @@ -4,11 +4,16 @@ import ( "context" "errors" "fmt" + "os" + "os/exec" + "path/filepath" "time" + "github.com/pelletier/go-toml" "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" + "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" supportlog "github.com/stellar/go-stellar-sdk/support/log" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/daemon/interfaces" @@ -34,6 +39,11 @@ type daemonOptions struct { // frontfill-only daemon when no datastore is configured). Tests inject a fakeBackend. Backend backfill.Backend + // Core starts captive core at the resume ledger and yields the live getter the + // ingestion loop polls. nil ⇒ runDaemonWith builds a captiveCoreOpener from + // [ingestion] (a complete production opener). Tests inject a fake getter. + Core CoreOpener + // ServeReads launches the RPC read server; it must return promptly, not block. // nil ⇒ the #772 no-op placeholder (reads still come from the v1 SQLite daemon). ServeReads func(ctx context.Context) error @@ -49,6 +59,11 @@ type daemonOptions struct { // IngestSink is the per-type cold-path ingest sink; nil ⇒ a *ingest.PrometheusSink. IngestSink ingest.MetricSink + + // chunksPerTxhashIndex overrides the tx-hash index width (test-only). 0 ⇒ the + // fixed geometry.ChunksPerTxhashIndex. Tests set it to 1 so a single chunk's + // freeze is a terminal index (exercising the fold+prune path cheaply). + chunksPerTxhashIndex uint32 } const defaultRestartBackoff = 5 * time.Second @@ -88,7 +103,11 @@ func runDaemonWith(ctx context.Context, configPath string, opts daemonOptions) e } defer func() { _ = store.Close() }() - txLayout, err := geometry.NewTxHashIndexLayout(geometry.ChunksPerTxhashIndex) + cpi := geometry.ChunksPerTxhashIndex + if opts.chunksPerTxhashIndex != 0 { + cpi = opts.chunksPerTxhashIndex + } + txLayout, err := geometry.NewTxHashIndexLayout(cpi) if err != nil { return err } @@ -130,8 +149,21 @@ func runDaemonWith(ctx context.Context, configPath string, opts daemonOptions) e registry := prometheus.NewRegistry() metrics, sink := buildSinks(opts, registry) + // Resolve the captive-core opener: injected (tests) or built from + // [ingestion].captive_core_config (a complete production opener) — done after + // validateConfig so config errors surface first. + core := opts.Core + if core == nil { + built, cerr := newCaptiveCoreOpener(cfg.Ingestion, cfg.Service.DefaultDataDir, logger) + if cerr != nil { + return cerr + } + core = built + } + // --- Assemble the StartConfig and run the supervised run loop. --- - start := startConfig(cfg, cat, logger, backend, networkTip, serveReads, metrics, sink, tipBackoff, tipMaxAttempts) + start := startConfig( + cfg, cat, logger, backend, networkTip, core, serveReads, metrics, sink, tipBackoff, tipMaxAttempts) backoff := opts.RestartBackoff if backoff <= 0 { @@ -140,10 +172,12 @@ func runDaemonWith(ctx context.Context, configPath string, opts daemonOptions) e return supervise(ctx, start, logger, backoff) } -// startConfig assembles the StartConfig run consumes. +// startConfig assembles the StartConfig run consumes. run() builds the +// lifecycle.Config from Exec + RetentionChunks, so backfill and the lifecycle +// goroutine share ONE catalog, worker pool, and retention floor by construction. func startConfig( cfg Config, cat *catalog.Catalog, logger *supportlog.Entry, - backend backfill.Backend, networkTip NetworkTipBackend, serveReads func(context.Context) error, + backend backfill.Backend, networkTip NetworkTipBackend, core CoreOpener, serveReads func(context.Context) error, metrics observability.Metrics, sink ingest.MetricSink, tipBackoff time.Duration, tipMaxAttempts int, ) StartConfig { exec := backfill.ExecConfig{ @@ -161,6 +195,7 @@ func startConfig( Exec: exec, RetentionChunks: deref(cfg.Retention.RetentionChunks), NetworkTip: networkTip, + Core: core, ServeReads: serveReads, TipBackoff: tipBackoff, TipMaxAttempts: tipMaxAttempts, @@ -181,9 +216,11 @@ func buildSinks(opts daemonOptions, registry *prometheus.Registry) (observabilit return metrics, sink } -// supervise restarts run on a restartable error after a backoff ("startup is the -// recovery path"); a clean shutdown or ctx cancel returns nil; ErrFirstStartNoTip -// is fatal and surfaces up. +// supervise is the daemon's clean-vs-restart decision point ("startup is the +// recovery path"): nil or a ctx cancel is a clean shutdown, everything else is +// warned and retried after a backoff. There is deliberately no fatal-and-exit +// class — genuine loss presents as a crash-loop with a clear warn line. The +// never-auto-heal guarantee lives in the must-exist open (openHotDBForChunk), not here. func supervise( ctx context.Context, start StartConfig, logger *supportlog.Entry, backoff time.Duration, ) error { @@ -195,10 +232,6 @@ func supervise( if ctx.Err() != nil { return nil //nolint:nilerr // ctx canceled is a clean shutdown, not a run failure } - // Unrecoverable: a fresh start cannot heal it, so don't spin restarting. - if errors.Is(err, ErrFirstStartNoTip) { - return err - } logger.WithError(err).Warnf("daemon run failed; restarting in %s", backoff) if sleepCtx(ctx, backoff) != nil { return nil //nolint:nilerr // ctx canceled mid-backoff is a clean shutdown, not a failure @@ -207,7 +240,7 @@ func supervise( } // sleepCtx blocks for d or until ctx is canceled, returning ctx.Err() if canceled -// first and nil otherwise. supervise's three-way clean/fatal/restart loop can't be +// first and nil otherwise. supervise's clean-vs-restart loop can't be // a backoff.Retry, so it keeps a hand-rolled sleep — but shares this one helper // rather than re-rolling the timer/select (and its easy-to-forget timer.Stop). func sleepCtx(ctx context.Context, d time.Duration) error { @@ -244,6 +277,106 @@ func buildBackfillBackend( return backend, cleanup, nil } +// --------------------------------------------------------------------------- +// Production captive-core opener (the live ingestion source). +// --------------------------------------------------------------------------- + +// captiveCoreOpener is the production CoreOpener. It holds a resolved +// CaptiveCoreConfig and hands back a captive-core LedgerStream that builds a FRESH +// core per run (each supervised restart reopens core anew) — the stream owns the +// process lifecycle, so there is no eager prepare or explicit closer here. +// Construction mirrors the RPC daemon's newCaptiveCore so the full-history daemon +// runs captive core and the ledgerbackend the same way (#772 can unify them at +// the cutover). +type captiveCoreOpener struct { + config ledgerbackend.CaptiveCoreConfig +} + +// newCaptiveCoreOpener resolves the captive-core config, treating the +// captive_core_config FILE as the single source of truth: NETWORK_PASSPHRASE is +// read back from it, and the stellar-core binary defaults to the one on PATH. +// Only the plain history-archive URLs (not derivable from the file's [HISTORY.*] +// get-commands) come from [ingestion].history_archive_urls. The toml params +// mirror the RPC daemon (strict, unified events, soroban diagnostic/meta +// enforcement) so the ingested meta is what the events + txhash stores need. +func newCaptiveCoreOpener(ing IngestionConfig, dataDir string, logger *supportlog.Entry) (*captiveCoreOpener, error) { + if ing.CaptiveCoreConfig == "" { + return nil, errors.New("[ingestion].captive_core_config is required for live ingestion") + } + if len(ing.HistoryArchiveURLs) == 0 { + return nil, errors.New("[ingestion].history_archive_urls is required for live ingestion") + } + + // NETWORK_PASSPHRASE lives in the captive-core file; read it back so the + // operator configures it in one place. (go-toml v1 ignores the other fields.) + data, err := os.ReadFile(ing.CaptiveCoreConfig) + if err != nil { + return nil, fmt.Errorf("read captive_core_config %q: %w", ing.CaptiveCoreConfig, err) + } + var peek struct { + NetworkPassphrase string `toml:"NETWORK_PASSPHRASE"` + } + if perr := toml.Unmarshal(data, &peek); perr != nil { + return nil, fmt.Errorf("parse captive_core_config %q: %w", ing.CaptiveCoreConfig, perr) + } + if peek.NetworkPassphrase == "" { + return nil, fmt.Errorf("captive_core_config %q must define NETWORK_PASSPHRASE", ing.CaptiveCoreConfig) + } + + // stellar-core binary: explicit path, else the one on PATH (RPC daemon default). + binaryPath := ing.StellarCoreBinaryPath + if binaryPath == "" { + found, lerr := exec.LookPath("stellar-core") + if lerr != nil { + return nil, fmt.Errorf( + "[ingestion].stellar_core_binary_path unset and stellar-core not found on PATH: %w", lerr) + } + binaryPath = found + } + + storagePath := ing.CaptiveCoreStoragePath + if storagePath == "" { + storagePath = filepath.Join(dataDir, "captive-core") + } + + // Build the toml from the bytes already read, not the path — re-reading via + // NewCaptiveCoreTomlFromFile would parse the file twice and, worse, could + // observe a different NETWORK_PASSPHRASE than the one peeked above if the file + // changed between the two reads (surfacing as the SDK's confusing mismatch error). + coreToml, err := ledgerbackend.NewCaptiveCoreTomlFromData(data, ledgerbackend.CaptiveCoreTomlParams{ + HistoryArchiveURLs: ing.HistoryArchiveURLs, + NetworkPassphrase: peek.NetworkPassphrase, + Strict: true, + EnforceSorobanDiagnosticEvents: true, + EnforceSorobanTransactionMetaExtV1: true, + EmitUnifiedEvents: true, + CoreBinaryPath: binaryPath, + }) + if err != nil { + return nil, fmt.Errorf("invalid captive-core toml %q: %w", ing.CaptiveCoreConfig, err) + } + + return &captiveCoreOpener{ + config: ledgerbackend.CaptiveCoreConfig{ + BinaryPath: binaryPath, + StoragePath: storagePath, + NetworkPassphrase: peek.NetworkPassphrase, + HistoryArchiveURLs: ing.HistoryArchiveURLs, + Log: logger.WithField("subservice", "stellar-core"), + Toml: coreToml, + UserAgent: "stellar-rpc-fullhistory", + }, + }, nil +} + +// OpenCore returns the live ingestion stream backed by captive stellar-core. A +// fresh core per run keeps supervised restarts clean. +func (c *captiveCoreOpener) OpenCore(ctx context.Context) (ledgerbackend.LedgerStream, error) { + cfg := c.config + cfg.Context = ctx + return ledgerbackend.NewCaptiveCoreStream(cfg, c.config.Log), nil +} + // resolveNetworkTip adapts the backfill backend to backfill's tip sampler — its Tip // frontier (so the tip and the freeze's coverage frontier are one source) — or the // not-configured placeholder for a frontfill-only daemon (nil backend). @@ -286,6 +419,7 @@ func newLogger(cfg LoggingConfig) (*supportlog.Entry, error) { // compile-time interface checks. var ( + _ CoreOpener = (*captiveCoreOpener)(nil) _ NetworkTipBackend = notConfiguredTip{} _ NetworkTipBackend = backendTip{} ) diff --git a/cmd/stellar-rpc/internal/fullhistory/daemon_test.go b/cmd/stellar-rpc/internal/fullhistory/daemon_test.go index d5f09bff9..ddb9bc47b 100644 --- a/cmd/stellar-rpc/internal/fullhistory/daemon_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/daemon_test.go @@ -59,26 +59,34 @@ format = "text" // runDaemonWith — the full entrypoint flow against an injected backend. // --------------------------------------------------------------------------- -// Happy path pins earliest_ledger and serves reads once. The injected backend's -// young-network tip (inside chunk 0) ⇒ no-op backfill, no LedgerStream needed. +// Happy path pins earliest_ledger, serves reads once, then ingests. The injected +// backend's young-network tip (inside chunk 0) ⇒ no-op backfill; the injected core +// blocks until ctx cancel (the daemon's steady state), and a ctx cancel is a clean +// shutdown. No LedgerStream needed. func TestRunDaemon_LoadValidateWireStartCleanShutdown(t *testing.T) { configPath, dataDir := writeTempConfig(t, "") var served atomic.Int32 opts := daemonOptions{ Backend: &fakeBackend{tip: chunk.FirstLedgerSeq + 10}, + Core: &fakeCore{}, // default getter blocks until ctx cancel ServeReads: func(context.Context) error { served.Add(1); return nil }, Logger: silentLogger(), } + ctx, cancel := context.WithCancel(context.Background()) errCh := make(chan error, 1) - go func() { errCh <- runDaemonWith(context.Background(), configPath, opts) }() + go func() { errCh <- runDaemonWith(ctx, configPath, opts) }() + + // ServeReads is called after backfill, just before the (blocking) ingestion loop. + require.Eventually(t, func() bool { return served.Load() == 1 }, 3*time.Second, 5*time.Millisecond) + cancel() select { case err := <-errCh: - require.NoError(t, err, "cold backfill + serve returns cleanly") + require.NoError(t, err, "a ctx-canceled ingestion loop is a clean shutdown") case <-time.After(3 * time.Second): - t.Fatal("runDaemonWith did not return") + t.Fatal("runDaemonWith did not return after ctx cancel") } assert.Equal(t, int32(1), served.Load(), "reads served once") @@ -106,7 +114,8 @@ func someTxBackend(t *testing.T) *fakeBackend { if seq%2500 != 0 { return zeroTxLCMBytes(t, seq) } - return oneTxLCMBytes(t, seq, src) + raw, _ := oneTxLCMBytes(t, seq, src) + return raw } return &fakeBackend{ LedgerStream: &fullChunkStream{t: t, gen: gen}, @@ -116,8 +125,10 @@ func someTxBackend(t *testing.T) *fakeBackend { } // oneTxLCMBytes is zeroTxLCMBytes plus one tx (per-seq SeqNum ⇒ unique hash) so -// ExtractTxHashes yields exactly one key for seq. -func oneTxLCMBytes(t *testing.T, seq uint32, src xdr.MuxedAccount) []byte { +// ExtractTxHashes yields exactly one key for seq. Returns the wire bytes and the +// real, network-hashed transaction hash (the hash the daemon commits for seq), so +// callers can assert a getTransaction-style hash→seq lookup. +func oneTxLCMBytes(t *testing.T, seq uint32, src xdr.MuxedAccount) ([]byte, [32]byte) { t.Helper() envelope := xdr.TransactionEnvelope{ Type: xdr.EnvelopeTypeEnvelopeTypeTx, @@ -169,7 +180,7 @@ func oneTxLCMBytes(t *testing.T, seq uint32, src xdr.MuxedAccount) []byte { } raw, err := lcm.MarshalBinary() require.NoError(t, err) - return raw + return raw, hash } // #815 acceptance: one TOML boots the daemon and it backfills the complete chunk @@ -182,23 +193,36 @@ func TestRunDaemon_BackfillMaterializesAllColdTypesAndIndex(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() + // ServeReads runs after backfill completes, just before the blocking ingestion + // loop — so it is the "backfill done" signal. The injected core then blocks until + // the ctx cancel below, and a ctx-canceled ingestion loop is a clean shutdown. + servedCh := make(chan struct{}, 1) errCh := make(chan error, 1) go func() { errCh <- runDaemonWith(ctx, configPath, daemonOptions{ // Backend's tip is chunk 0's last ledger ⇒ chunk 0 complete, backfill freezes it. // The network tip is derived from this same backend's Tip. Backend: someTxBackend(t), - ServeReads: func(context.Context) error { return nil }, + Core: &fakeCore{}, // default getter blocks until ctx cancel + ServeReads: func(context.Context) error { servedCh <- struct{}{}; return nil }, Logger: silentLogger(), }) }() select { + case <-servedCh: // backfill complete; the daemon is now parked in ingestion case err := <-errCh: - require.NoError(t, err, "daemon backfills to tip then exits cleanly (no-op ServeReads)") + t.Fatalf("daemon returned before backfill completed: %v", err) case <-time.After(60 * time.Second): cancel() t.Fatal("runDaemonWith did not finish backfill within 60s (regressed into a hang/restart loop?)") } + cancel() // request a clean shutdown of the parked ingestion loop + select { + case err := <-errCh: + require.NoError(t, err, "a ctx-canceled ingestion loop is a clean shutdown") + case <-time.After(10 * time.Second): + t.Fatal("runDaemonWith did not return after ctx cancel") + } // Read the catalog back after the daemon released locks + closed its store. store, err := openMetaAt(t, filepath.Join(dataDir, "catalog", "rocksdb")) @@ -380,7 +404,7 @@ func TestSupervise_RetriesThenCleanShutdown(t *testing.T) { var attempts atomic.Int32 tip := &fakeTipBackend{tips: []uint32{chunk.FirstLedgerSeq + 10}} // young: no backfill - start := startTestConfig(t, cat, tip, nil) + start := startTestConfig(t, cat, tip, &fakeCore{}, nil) // An always-erroring ServeReads makes each attempt a restartable failure. start.ServeReads = func(context.Context) error { attempts.Add(1) @@ -406,16 +430,33 @@ func TestSupervise_RetriesThenCleanShutdown(t *testing.T) { assert.GreaterOrEqual(t, attempts.Load(), int32(2), "restarted on the transient failure") } -// Fatal sentinels surface up, not retried (a fresh start cannot heal them). -func TestSupervise_FatalSentinelSurfaces(t *testing.T) { +// A first start with no reachable tip is now RESTARTABLE (previously a fatal +// sentinel): supervise retries it on a backoff rather than surfacing it, and a +// ctx cancel returns clean. Loss/misconfig can't be told from a transient inside +// the process, so there is no fatal-and-exit class. +func TestSupervise_FirstStartNoTipRetries(t *testing.T) { cat, _ := testCatalog(t) pinGenesis(t, cat) - // Unreachable tip + no local progress ⇒ fatal ErrFirstStartNoTip. + // Unreachable tip + no local progress: every run fails the first-start check. tip := &fakeTipBackend{err: errors.New("unreachable"), errFirst: 99} - start := startTestConfig(t, cat, tip, nil) + start := startTestConfig(t, cat, tip, &fakeCore{}, nil) + start.TipMaxAttempts = 1 // one tip poll per run, so callCount tracks restart count + + ctx, cancel := context.WithCancel(context.Background()) + errCh := make(chan error, 1) + go func() { errCh <- supervise(ctx, start, silentLogger(), 5*time.Millisecond) }() - err := supervise(context.Background(), start, silentLogger(), time.Hour) - require.ErrorIs(t, err, ErrFirstStartNoTip, "fatal sentinel surfaces immediately, no retry") + require.Eventually(t, func() bool { + return tip.callCount() >= 2 + }, 3*time.Second, 5*time.Millisecond, "first-start-no-tip is retried, not surfaced as fatal") + cancel() + + select { + case err := <-errCh: + require.NoError(t, err, "ctx cancel returns clean, even though runs kept failing") + case <-time.After(3 * time.Second): + t.Fatal("supervise did not return after cancel") + } } // --------------------------------------------------------------------------- diff --git a/cmd/stellar-rpc/internal/fullhistory/e2e_test.go b/cmd/stellar-rpc/internal/fullhistory/e2e_test.go new file mode 100644 index 000000000..a2f754297 --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/e2e_test.go @@ -0,0 +1,520 @@ +package fullhistory + +// ============================================================================= +// In-process end-to-end integration of the full-history daemon. +// +// WHAT IS REAL HERE +// Everything inside the process is the real production code path: +// - runDaemonWith (the true daemon entrypoint): TOML load + form-validate, +// per-root flock, meta-store open + Catalog bind, the stateful +// validateConfig gate (pins the floor), and the supervised run loop. +// - run → backfillToTip → openHotDBForChunk → runIngestionLoop (the real +// atomic per-ledger WriteBatch across all CFs of the real per-chunk +// hotchunk RocksDB), the real boundary handoff, the real boundary signal. +// - lifecycle.Loop / runLifecycle: the real resolve + executePlan +// freeze (cold artifacts derived FROM the live hot DB), the real txhash +// index fold (a real streamhash .idx on disk), the real discard + prune. +// - The real txhash stores on both sides of a getTransaction-style hash→seq +// lookup: the cold ColdReader over the frozen .idx and the live hot CF. +// +// WHAT IS FAKED (the two EXTERNAL boundaries the daemon injects on purpose) +// - The ledger SOURCE. Production drives ingestion from captive +// stellar-core and backfill from a bulk object-store backend. Here both +// cross their injected interfaces (CoreOpener / backfill.Backend) and are +// fed synthetic-but-well-formed LedgerCloseMeta. No captive core, no +// object store, no network. +// - ServeReads is a no-op recorder (the read cutover is #772). The read PATH +// exercised is the txhash index lookup getTransaction will sit on. +// +// cpi=1 (the chunksPerTxhashIndex test seam) makes every one-chunk window +// terminal the instant its chunk freezes, so the freeze→fold→discard→prune +// sequence completes on a boundary tick without ingesting 1000 chunks. +// ============================================================================= + +import ( + "context" + "fmt" + "iter" + "os" + "path/filepath" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" + "github.com/stellar/go-stellar-sdk/keypair" + "github.com/stellar/go-stellar-sdk/xdr" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/lifecycle" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/observability" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash" +) + +// e2eCore is the CoreOpener handing back a fresh e2eStream per daemon run (a +// restart opens core anew). frames is the seq→raw backlog every stream serves; +// the atomics aggregate observations across opens for the restart assertions. +type e2eCore struct { + frames map[uint32][]byte + fromSeen atomic.Uint32 + delivered atomic.Uint32 + opens atomic.Int32 +} + +func (c *e2eCore) OpenCore(context.Context) (ledgerbackend.LedgerStream, error) { + c.opens.Add(1) + return &e2eStream{core: c}, nil +} + +// e2eStream is the FAKE captive-core LedgerStream the ingestion loop consumes: it +// yields the backlog frames contiguously from the range's From() and, once it runs +// past the synthetic backlog, blocks until ctx is canceled (a live tip stream ends +// only on shutdown). It records (into its core) the FIRST seq it was asked for +// (the range From), so the restart step can assert the daemon re-derived the +// watermark and resumed with no gap. +type e2eStream struct { + core *e2eCore + sawFrom atomic.Bool +} + +var _ ledgerbackend.LedgerStream = (*e2eStream)(nil) + +func (s *e2eStream) RawLedgers( + ctx context.Context, r ledgerbackend.Range, _ ...ledgerbackend.StreamOption, +) iter.Seq2[[]byte, error] { + return func(yield func([]byte, error) bool) { + if s.sawFrom.CompareAndSwap(false, true) { + s.core.fromSeen.Store(r.From()) + } + for seq := r.From(); ; seq++ { + if ctx.Err() != nil { + yield(nil, ctx.Err()) + return + } + if raw, ok := s.core.frames[seq]; ok { + s.core.delivered.Store(seq) + if !yield(raw, nil) { + return + } + continue + } + // Past the synthetic backlog: a live tip blocks until shutdown so the loop + // does not see an error that would look like a core crash. + <-ctx.Done() + yield(nil, ctx.Err()) + return + } + } +} + +// e2eMetrics is a concurrency-safe observability.Metrics that records the +// lifecycle signals this test waits on. +type e2eMetrics struct { + observability.NopMetrics + + mu sync.Mutex + boundaries int + freezes int + discarded int + pruned int +} + +func (m *e2eMetrics) ChunkBoundary() { + m.mu.Lock() + defer m.mu.Unlock() + m.boundaries++ +} + +func (m *e2eMetrics) Freeze(time.Duration) { + m.mu.Lock() + defer m.mu.Unlock() + m.freezes++ +} + +func (m *e2eMetrics) Discard(count int, _ time.Duration) { + m.mu.Lock() + defer m.mu.Unlock() + m.discarded += count +} + +func (m *e2eMetrics) Prune(count int, _ time.Duration) { + m.mu.Lock() + defer m.mu.Unlock() + m.pruned += count +} + +func (m *e2eMetrics) boundaryCount() int { + m.mu.Lock() + defer m.mu.Unlock() + return m.boundaries +} + +func (m *e2eMetrics) snapshotFreezeCount() int { + m.mu.Lock() + defer m.mu.Unlock() + return m.freezes +} + +func (m *e2eMetrics) discardedCount() int { + m.mu.Lock() + defer m.mu.Unlock() + return m.discarded +} + +func (m *e2eMetrics) prunedCount() int { + m.mu.Lock() + defer m.mu.Unlock() + return m.pruned +} + +// e2eConfigPath writes a daemon TOML for an in-process E2E: genesis floor (no +// tip needed to validate/start) and the given retention width. captive_core_config +// is a stub path the test's injected CoreOpener replaces, never opening a real core. +// The one-chunk index window is set via the chunksPerTxhashIndex test seam, not config. +func e2eConfigPath(t *testing.T, dataDir string, retentionChunks uint32) string { + t.Helper() + cfgPath := filepath.Join(t.TempDir(), "daemon.toml") + body := fmt.Sprintf(` +[service] +default_data_dir = %q + +[retention] +earliest_ledger = "genesis" +retention_chunks = %d + +[ingestion] +captive_core_config = "/dev/null" + +[logging] +level = "error" +format = "text" +`, dataDir, retentionChunks) + require.NoError(t, os.WriteFile(cfgPath, []byte(body), 0o644)) + return cfgPath +} + +// runDaemonInBackground starts runDaemonWith on a cancellable ctx and returns a +// cancel func plus a channel carrying its (clean-shutdown) return. A young-network +// tip (inside chunk 0) means backfill is a no-op and first-start ingests directly +// from genesis via the fake core. +func runDaemonInBackground( + t *testing.T, cfgPath string, core *e2eCore, served *atomic.Int32, metrics observability.Metrics, +) (context.CancelFunc, <-chan error) { + t.Helper() + ctx, cancelFn := context.WithCancel(context.Background()) + errCh := make(chan error, 1) + opts := daemonOptions{ + Backend: &fakeBackend{tip: chunk.FirstLedgerSeq + 5}, // young: no backfill + Core: core, + ServeReads: func(context.Context) error { served.Add(1); return nil }, + Logger: silentLogger(), + Metrics: metrics, + RestartBackoff: 10 * time.Millisecond, + chunksPerTxhashIndex: 1, + } + go func() { errCh <- runDaemonWith(ctx, cfgPath, opts) }() + return cancelFn, errCh +} + +// waitClean cancels the daemon and requires a clean (nil) shutdown. +func waitClean(t *testing.T, cancel context.CancelFunc, done <-chan error) { + t.Helper() + cancel() + select { + case err := <-done: + require.NoError(t, err, "ctx cancel is a clean daemon shutdown") + case <-time.After(60 * time.Second): + // Post-cancel shutdown joins one in-flight lifecycle unit; a mid-flight + // freeze's Finalize fsync + index build is unpreemptible and slow under + // -race + contention — the same reason the boundary-cross budget is 600s. + t.Fatal("daemon did not shut down cleanly after ctx cancel") + } +} + +// hotKeyExists reports whether chunk c's hot:chunk key is present (any non-empty state). +func hotKeyExists(cat *catalog.Catalog, c chunk.ID) (bool, error) { + st, err := cat.HotState(c) + if err != nil { + return false, err + } + return st != geometry.HotState(""), nil +} + +// hashAt builds a deterministic 32-byte hash from n (for the never-committed miss). +func hashAt(n uint64) [32]byte { + var h [32]byte + for i := range 8 { + h[i] = byte(n >> (8 * i)) + } + return h +} + +// TestE2E_DaemonLifecycle_FirstStartIngestFreezeLookupRestartPrune drives the +// whole daemon lifecycle in one process against the real stores and the fake +// ledger source: +// +// first start (genesis, young-network tip ⇒ direct ingest) → +// ingest a FULL chunk + cross into the next (real boundary handoff) → +// lifecycle tick freezes chunk 0 + folds its terminal txhash index + discards +// its hot tier → +// getTransaction-style hash→seq lookup resolves from the cold .idx (chunk 0) +// AND from the live hot CF (chunk 2) → +// clean shutdown → +// RESTART: re-derive the watermark, resume at exactly watermark+1 (no gap) → +// drive retention far enough to prune chunk 0, confirm a pruned read is not-found. +// +// Correctness is asserted at every step. +// +//nolint:funlen // one linear end-to-end scenario asserted step by step +func TestE2E_DaemonLifecycle_FirstStartIngestFreezeLookupRestartPrune(t *testing.T) { + if testing.Short() { + t.Skip("e2e ingests a full 10k-ledger chunk; skipped in -short") + } + + dataDir := t.TempDir() + + const c0 = chunk.ID(0) + const c1 = chunk.ID(1) + const c2 = chunk.ID(2) + + // Cross TWO chunk boundaries so chunks 0 AND 1 both freeze, leaving chunk 2 as + // the live (un-frozen) chunk. That layout lets a later retention_chunks=1 run + // prune chunk 0 (wholly below the floor) while chunk 1 survives. + c0First := c0.FirstLedger() + c1First := c1.FirstLedger() + c2First := c2.FirstLedger() + + // One shared source account; the per-seq SeqNum makes each tx hash unique. + src := xdr.MustMuxedAddress(keypair.MustRandom().Address()) + coldRaw, coldHash := oneTxLCMBytes(t, c0First, src) // → frozen cold .idx (chunk 0) + hotRaw, hotHash := oneTxLCMBytes(t, c2First, src) // → live hot CF (chunk 2) + // Chunk 1's first ledger also carries a tx so its txhash .bin is non-empty — + // streamhash refuses to build a cold index over zero keys (ErrEmptyBuildSet). + c1Raw, _ := oneTxLCMBytes(t, c1First, src) + + frames := make(map[uint32][]byte, 2*int(chunk.LedgersPerChunk)+2) + appendLedger := func(seq uint32) { + switch seq { + case c0First: + frames[seq] = coldRaw + case c1First: + frames[seq] = c1Raw + case c2First: + frames[seq] = hotRaw + default: + frames[seq] = zeroTxLCMBytes(t, seq) + } + } + // Chunks 0 and 1 in full (both freeze), then chunk 2's first two ledgers. + for seq := c0First; seq <= c1.LastLedger(); seq++ { + appendLedger(seq) + } + appendLedger(c2First) + appendLedger(c2First + 1) + + core := &e2eCore{frames: frames} + var served atomic.Int32 + metrics := &e2eMetrics{} + + // ===================================================================== + // STEP 1 — first start: config → lock → validate (pin genesis) → start → + // direct ingest across the chunk-0 AND chunk-1 boundaries, the lifecycle + // freezing, folding, and discarding each just-closed chunk off the doorbell. + // ===================================================================== + cfgPath := e2eConfigPath(t, dataDir, 0) // retention 0 (full history) for now + cancel, done := runDaemonInBackground(t, cfgPath, core, &served, metrics) + + // Wait until ingestion crosses BOTH boundaries and commits into chunk 2. + // Delivering c2First proves both boundary handoffs fired (chunks 0 and 1 + // closed, chunk 2 opened) and seeds the live hot-CF lookup. 600s absorbs the + // worst-case contended -race path (per-ledger synced WriteBatches racing the + // freezes that re-read 10k ledgers each). + require.Eventually(t, func() bool { + return core.delivered.Load() >= c2First + }, 600*time.Second, 200*time.Millisecond, "ingestion must cross both boundaries into chunk 2") + + require.Eventually(t, func() bool { + return metrics.discardedCount() >= 2 + }, 60*time.Second, 50*time.Millisecond, "the boundary ticks must freeze+fold+discard chunks 0 and 1") + + require.GreaterOrEqual(t, served.Load(), int32(1), "reads were served") + require.Equal(t, c0First, core.fromSeen.Load(), + "first start resumes the ingestion stream at genesis (watermark+1)") + + // ===================================================================== + // STEP 2 — clean shutdown. The supervised loop returns nil on ctx cancel. + // ===================================================================== + waitClean(t, cancel, done) + + // Bind a fresh inspection catalog on the (now lock-free) data dir for the + // post-shutdown reads. It MUST be closed before the restart reopens the metastore. + postCat, closePost := e2eReadCatalog(t, dataDir) + w0 := postCat.TxHashIndexLayout().TxHashIndexID(c0) + + // --- Correctness: chunks 0 and 1 per-chunk cold artifacts (ledgers + events) froze. --- + for _, c := range []chunk.ID{c0, c1} { + for _, kind := range []geometry.Kind{geometry.KindLedgers, geometry.KindEvents} { + st, err := postCat.State(c, kind) + require.NoError(t, err) + assert.Equal(t, geometry.StateFrozen, st, "chunk %s %s is frozen", c, kind) + } + has, err := hotKeyExists(postCat, c) + require.NoError(t, err) + assert.False(t, has, "chunk %s hot key is discarded", c) + } + // The window's txhash index is a frozen, terminal coverage (the .idx the cold + // getTransaction read resolves against). + frozenCov, ok, err := postCat.FrozenTxHashIndex(w0) + require.NoError(t, err) + require.True(t, ok, "chunk 0's window has a frozen txhash coverage") + require.True(t, postCat.TxHashIndexLayout().IsTerminalCoverage(frozenCov), "a one-chunk (cpi=1) window is terminal") + + // ===================================================================== + // STEP 3 — getTransaction-style hash→seq lookup, cold tier. + // ===================================================================== + + // Cold .idx — the exact reader getTransaction will sit on for frozen history. + coldReader, err := txhash.OpenColdReader(postCat.Layout().TxHashIndexFilePath(frozenCov)) + require.NoError(t, err) + gotSeq, err := coldReader.Get(coldHash) + require.NoError(t, err, "the chunk-0 tx hash must resolve from the frozen cold index") + assert.Equal(t, c0First, gotSeq, "cold lookup returns the ledger the tx was committed in") + // A hash that was never committed misses (not-found, not a wrong answer). + _, missErr := coldReader.Get(hashAt(0xE2EDEADBEEF)) + require.ErrorIs(t, missErr, stores.ErrNotFound, "an uncommitted hash misses the cold index") + require.NoError(t, coldReader.Close()) + + // Observability: the daemon emitted the boundary + freeze phase signals. + assert.GreaterOrEqual(t, metrics.boundaryCount(), 1, "at least one chunk boundary was signaled") + assert.GreaterOrEqual(t, metrics.snapshotFreezeCount(), 1, "at least one freeze stage ran") + + // ===================================================================== + // STEP 4 — hot lookup and restart watermark. + // ===================================================================== + wmBeforeRestart := mustDeriveWatermark(t, postCat) + require.GreaterOrEqual(t, wmBeforeRestart, c2First, "watermark advanced into chunk 2") + + // Live hot CF — now the daemon has stopped, chunk 2 (still the un-frozen live + // chunk) is reopenable. Resolve the chunk-2 tx hash through the txhash CF — the + // read path getTransaction uses for live history before a chunk freezes. + hotState, err := postCat.HotState(c2) + require.NoError(t, err) + require.Equal(t, geometry.HotReady, hotState, "chunk 2 is the un-frozen live chunk") + c2lfs, err := postCat.State(c2, geometry.KindLedgers) + require.NoError(t, err) + require.Equal(t, geometry.State(""), c2lfs, "the live chunk has no cold artifacts yet") + + // Retry the open: RocksDB's process-level LOCK can linger momentarily after the + // writer closed (the same transient a production reader retries through). + var liveDB *hotchunk.DB + require.Eventually(t, func() bool { + db, oerr := hotchunk.Open(postCat.Layout().HotChunkPath(c2), c2, silentLogger()) + if oerr != nil { + return false + } + liveDB = db + return true + }, 10*time.Second, 50*time.Millisecond, "chunk 2's hot DB must be reopenable after shutdown") + hotSeq, err := liveDB.Txhash().Get(hotHash) + require.NoError(t, err, "the chunk-2 tx hash must resolve from the live hot CF") + assert.Equal(t, c2First, hotSeq, "hot lookup returns the live tx's ledger") + require.NoError(t, liveDB.Close()) // release before the restart reopens it as the live writer + prunedIdxPath := postCat.Layout().TxHashIndexFilePath(frozenCov) + + // ===================================================================== + // STEP 5 — RESTART. A fresh runDaemonWith re-opens everything, re-derives the + // watermark from durable state, and resumes captive core at watermark+1 with no gap. + // ===================================================================== + closePost() // release the inspection metastore handle before the daemon reopens it + core.opens.Store(0) + core.fromSeen.Store(0) + cancel2, done2 := runDaemonInBackground(t, cfgPath, core, &served, &e2eMetrics{}) + + require.Eventually(t, func() bool { return core.opens.Load() >= 1 }, 30*time.Second, 20*time.Millisecond, + "the restarted daemon re-opened captive core") + require.Eventually(t, func() bool { return core.fromSeen.Load() != 0 }, 30*time.Second, 20*time.Millisecond, + "the restarted ingestion loop requested a resume range") + + wantResume := wmBeforeRestart + 1 + assert.Equal(t, wantResume, core.fromSeen.Load(), + "restart streams from the re-derived watermark+1 — the durable frontier, re-derived not stored, no gap") + + waitClean(t, cancel2, done2) + + // ===================================================================== + // STEP 6 — retention prune. Re-run with retention_chunks = 1: the floor anchors + // at chunk 1, so chunk 0 (frozen + folded) falls WHOLLY below it and the prune + // scan sweeps its files + keys, while chunk 1 (the floor chunk) survives. A read + // of a pruned chunk-0 hash is then not-found (no coverage to resolve it). + // ===================================================================== + prunedCfg := e2eConfigPath(t, dataDir, 1) // retain ~1 chunk + require.FileExists(t, prunedIdxPath, "chunk 0's cold index exists before the prune") + + pruneMetrics := &e2eMetrics{} + cancel3, done3 := runDaemonInBackground(t, prunedCfg, core, &served, pruneMetrics) + + // The prune scan runs on the first lifecycle tick (the at-start doorbell ring). + require.Eventually(t, func() bool { + return pruneMetrics.prunedCount() > 0 + }, 60*time.Second, 50*time.Millisecond, "retention prune scan must sweep chunk 0") + + waitClean(t, cancel3, done3) + pruneCat, closePrune := e2eReadCatalog(t, dataDir) + defer closePrune() + + // Chunk 0's per-chunk artifact keys (ledgers + events) vanished. + ledgers, err := pruneCat.State(c0, geometry.KindLedgers) + require.NoError(t, err) + ev, err := pruneCat.State(c0, geometry.KindEvents) + require.NoError(t, err) + assert.Equal(t, geometry.State(""), ledgers, "chunk 0 ledgers key is pruned") + assert.Equal(t, geometry.State(""), ev, "chunk 0 events key is pruned") + + // Chunk 1 (the floor chunk) is WITHIN retention and survives the prune. + c1lfs, err := pruneCat.State(c1, geometry.KindLedgers) + require.NoError(t, err) + assert.Equal(t, geometry.StateFrozen, c1lfs, "chunk 1 is at the retention floor and survives") + + // The on-disk cold index file is gone too (prune unlinks the files, not just keys). + require.Eventually(t, func() bool { + _, statErr := os.Stat(prunedIdxPath) + return os.IsNotExist(statErr) + }, 10*time.Second, 50*time.Millisecond, "the pruned cold index file is unlinked") + + // "pruned read is not-found": after prune the window has no frozen coverage + // (ok=false) — the read layer's "no coverage ⇒ not-found" gate. + _, covOK, err := pruneCat.FrozenTxHashIndex(w0) + require.NoError(t, err) + assert.False(t, covOK, "chunk 0's window coverage is pruned ⇒ a chunk-0 hash read is not-found") +} + +// e2eReadCatalog binds a Catalog over a SEPARATE metastore handle on the daemon's +// data dir, with the same one-chunk window the daemon's test seam uses, for +// read-only inspection BETWEEN daemon runs (the metastore is RocksDB-primary, so +// this MUST be closed via the returned close func before the next daemon run). +func e2eReadCatalog(t *testing.T, dataDir string) (*catalog.Catalog, func()) { + t.Helper() + paths := Config{Service: ServiceConfig{DefaultDataDir: dataDir}}.WithDefaults().ResolvePaths() + store, err := openMetaAt(t, paths.Catalog) + require.NoError(t, err) + windows, err := geometry.NewTxHashIndexLayout(1) // matches chunksPerTxhashIndex = 1 + require.NoError(t, err) + return catalog.NewCatalog(store, NewLayoutFromPaths(paths), windows), func() { _ = store.Close() } +} + +// mustDeriveWatermark derives the durable watermark with the read-only hot-DB +// refinement (passing a logger opens the highest ready hot DB by its Layout path). +func mustDeriveWatermark(t *testing.T, cat *catalog.Catalog) uint32 { + t.Helper() + wm, err := lifecycle.LastCommittedLedger(cat, silentLogger()) + require.NoError(t, err) + return wm +} diff --git a/cmd/stellar-rpc/internal/fullhistory/geometry/chunk_arith.go b/cmd/stellar-rpc/internal/fullhistory/geometry/chunk_arith.go new file mode 100644 index 000000000..cb7437de1 --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/geometry/chunk_arith.go @@ -0,0 +1,46 @@ +package geometry + +import "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" + +// Signed pre-genesis chunk arithmetic — the single home for the chunk↔ledger maps +// that run in int64 so the pre-genesis sentinel (-1 = "nothing complete") never +// underflows the uint32 domain. Keeping all of it here (rather than split across +// lifecycle progress and this package) means there is one -1 convention, not two. + +// PreGenesisLedger is the last-committed ledger when nothing is complete +// (FirstLedgerSeq-1) — the ledger-domain image of the -1 chunk sentinel. +const PreGenesisLedger uint32 = chunk.FirstLedgerSeq - 1 + +// CompleteThrough maps a signed chunk index to its "complete through" last ledger: +// c < 0 ⇒ PreGenesisLedger; c >= 0 ⇒ chunk.ID(c).LastLedger(). +func CompleteThrough(c int64) uint32 { + if c < 0 { + return PreGenesisLedger + } + return chunk.ID(c).LastLedger() //nolint:gosec // c >= 0 and bounded by real chunk ids +} + +// ChunkIDOfLedger maps a ledger to its chunk, signed so a sub-genesis ledger +// yields -1 instead of panicking. +func ChunkIDOfLedger(ledger uint32) int64 { + if ledger < chunk.FirstLedgerSeq { + return -1 + } + return int64(chunk.IDFromLedger(ledger)) +} + +// LastCompleteChunkAt is the inverse of chunk.ID.LastLedger: the largest chunk +// whose last ledger is <= ledger. Returns SIGNED int64 so a sub-genesis ledger +// (the sub-genesis sentinel) maps to -1 ("before the first chunk") rather than +// wrapping; the cast-before-subtract keeps it in int64 (uint32 ledger-1 would +// underflow for ledger 0). +func LastCompleteChunkAt(ledger uint32) int64 { + return (int64(ledger)+1-int64(chunk.FirstLedgerSeq))/int64(chunk.LedgersPerChunk) - 1 +} + +// ChunkFirstLedger maps a non-negative signed chunk index to its first ledger. +// It is the signed-domain companion of chunk.ID.FirstLedger used after a +// max(..., 0) clamp. +func ChunkFirstLedger(c int64) uint32 { + return chunk.ID(c).FirstLedger() //nolint:gosec // c >= 0 (clamped) and bounded by real chunk ids +} diff --git a/cmd/stellar-rpc/internal/fullhistory/geometry/chunk_arith_test.go b/cmd/stellar-rpc/internal/fullhistory/geometry/chunk_arith_test.go new file mode 100644 index 000000000..e784494f9 --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/geometry/chunk_arith_test.go @@ -0,0 +1,57 @@ +package geometry + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" +) + +// --------------------------------------------------------------------------- +// CompleteThrough — sentinel-safe signed->ledger map. +// +// ALIASING TRAP: a guard-less impl wraps -1 to exactly PreGenesisLedger anyway +// (MaxUint32+1 overflows to 0), so a -1-only test is blind to a dropped guard. +// The -2/-100 rows are the load-bearing ones (they wrap to large, distinct values +// the guard must squash). +// --------------------------------------------------------------------------- + +func TestCompleteThrough(t *testing.T) { + tests := []struct { + name string + in int64 + want uint32 + }{ + {"pre-genesis sentinel -1 => FirstLedgerSeq-1, not MaxUint32 (aliases the wrap)", -1, PreGenesisLedger}, + {"sentinel -2 does NOT alias the wrap (guard-less would yield 4294957297)", -2, PreGenesisLedger}, + {"deeply negative still pre-genesis", -100, PreGenesisLedger}, + {"chunk 0 last ledger", 0, chunk.ID(0).LastLedger()}, + {"chunk 5 last ledger", 5, chunk.ID(5).LastLedger()}, + } + require.Equal(t, uint32(1), PreGenesisLedger, "FirstLedgerSeq-1 == 1 (the doc's chunkLastLedger(-1))") + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.want, CompleteThrough(tc.in)) + }) + } + + // Assert the aliasing trap directly so the comment above can't rot: -1 wraps to + // PreGenesisLedger, -2 does not. Computed from chunk arithmetic, not hardcoded. + guardlessWrap := func(c int64) uint32 { + return chunk.ID(uint32(c)).LastLedger() + } + require.Equal(t, PreGenesisLedger, guardlessWrap(-1), + "-1 aliases PreGenesisLedger under the wrap — the coincidence this test must not rely on") + require.NotEqual(t, PreGenesisLedger, guardlessWrap(-2), + "-2 must NOT alias — proving the guard (not a coincidence) is what makes CompleteThrough(-2) safe") +} + +// ChunkIDOfLedger maps a ledger to its containing chunk, signed so a sub-genesis +// ledger yields -1 rather than panicking. +func TestChunkIDOfLedger(t *testing.T) { + require.Equal(t, int64(-1), ChunkIDOfLedger(chunk.FirstLedgerSeq-1), "sub-genesis => -1 sentinel") + require.Equal(t, int64(0), ChunkIDOfLedger(chunk.FirstLedgerSeq), "genesis => chunk 0") + require.Equal(t, int64(0), ChunkIDOfLedger(chunk.ID(0).LastLedger()), "chunk 0's last ledger => chunk 0") + require.Equal(t, int64(1), ChunkIDOfLedger(chunk.ID(1).FirstLedger()), "chunk 1's first ledger => chunk 1") +} diff --git a/cmd/stellar-rpc/internal/fullhistory/geometry/keys.go b/cmd/stellar-rpc/internal/fullhistory/geometry/keys.go index ba672301a..f8d054f3a 100644 --- a/cmd/stellar-rpc/internal/fullhistory/geometry/keys.go +++ b/cmd/stellar-rpc/internal/fullhistory/geometry/keys.go @@ -27,6 +27,19 @@ const ( StatePruning State = "pruning" ) +// HotState is a hot-DB key's value. One key per chunk brackets the chunk's hot +// RocksDB directory; the column families inside carry no individual key. +type HotState string + +const ( + // HotTransient — a dir operation is in flight (create/delete) or recovery + // demoted the key. Recovery is identical either way: open wipes+recreates, + // discard re-runs the scan. + HotTransient HotState = "transient" + // HotReady — the dir exists and is usable. + HotReady HotState = "ready" +) + // Kind is a per-chunk artifact kind. Each maps to one meta-store key suffix // and one set of on-disk files. type Kind string @@ -65,7 +78,8 @@ func (i TxHashIndexID) String() string { return fmt.Sprintf("%08d", uint32(i)) } const ( ChunkPrefix = "chunk:" - TxHashIndexPrefix = "txhash_index:" + HotChunkPrefix = "hot:chunk:" + TxHashIndexPrefix = "index:" // ConfigEarliestLedger is the sole config pin key. (chunks_per_txhash_index is // the fixed ChunksPerTxhashIndex constant, not a pin.) @@ -77,7 +91,13 @@ func ChunkKey(c chunk.ID, kind Kind) string { return ChunkPrefix + c.String() + ":" + string(kind) } -// TxHashIndexKey returns the index coverage key txhash_index:{idx:08d}:{lo:08d}:{hi:08d}. +// HotChunkKey returns the hot-DB key hot:chunk:{chunk:08d}. One key per chunk +// brackets the hot RocksDB dir; the value is a HotState. +func HotChunkKey(c chunk.ID) string { + return HotChunkPrefix + c.String() +} + +// TxHashIndexKey returns the index coverage key index:{idx:08d}:{lo:08d}:{hi:08d}. // The coverage [lo, hi] lives in the key NAME; the value is pure lifecycle // state. lo > hi is a programmer error, surfaced loudly via panic. func TxHashIndexKey(idx TxHashIndexID, lo, hi chunk.ID) string { @@ -87,7 +107,7 @@ func TxHashIndexKey(idx TxHashIndexID, lo, hi chunk.ID) string { return TxHashIndexPrefix + idx.String() + ":" + lo.String() + ":" + hi.String() } -// TxHashIndexPrefixFor returns the scan prefix txhash_index:{idx:08d}: that enumerates +// TxHashIndexPrefixFor returns the scan prefix index:{idx:08d}: that enumerates // all coverage keys of one index. func TxHashIndexPrefixFor(idx TxHashIndexID) string { return TxHashIndexPrefix + idx.String() + ":" @@ -129,7 +149,21 @@ func ParseChunkKey(key string) (chunk.ID, Kind, bool) { return chunk.ID(n), kind, true } -// ParseTxHashIndexKey decodes txhash_index:{idx:08d}:{lo:08d}:{hi:08d}. State is not part +// ParseHotChunkKey decodes hot:chunk:{chunk:08d}. ok is false for any key that +// is not a well-formed hot-chunk key. +func ParseHotChunkKey(key string) (chunk.ID, bool) { + rest, found := strings.CutPrefix(key, HotChunkPrefix) + if !found { + return 0, false + } + n, err := ParsePadded(rest) + if err != nil { + return 0, false + } + return chunk.ID(n), true +} + +// ParseTxHashIndexKey decodes index:{idx:08d}:{lo:08d}:{hi:08d}. State is not part // of the key; callers fill TxHashIndexCoverage.State from the scanned value. func ParseTxHashIndexKey(key string) (TxHashIndexCoverage, bool) { rest, found := strings.CutPrefix(key, TxHashIndexPrefix) diff --git a/cmd/stellar-rpc/internal/fullhistory/geometry/keys_test.go b/cmd/stellar-rpc/internal/fullhistory/geometry/keys_test.go index 17685323a..424ca0dff 100644 --- a/cmd/stellar-rpc/internal/fullhistory/geometry/keys_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/geometry/keys_test.go @@ -16,7 +16,7 @@ func TestKeyConstructorsMatchSpec(t *testing.T) { require.Equal(t, "chunk:00005350:ledgers", ChunkKey(5350, KindLedgers)) require.Equal(t, "chunk:00005350:events", ChunkKey(5350, KindEvents)) require.Equal(t, "chunk:00005350:txhash", ChunkKey(5350, KindTxHash)) - require.Equal(t, "txhash_index:00000005:00005100:00005349", TxHashIndexKey(5, 5100, 5349)) + require.Equal(t, "index:00000005:00005100:00005349", TxHashIndexKey(5, 5100, 5349)) } func TestChunkKeyBijection(t *testing.T) { @@ -62,12 +62,12 @@ func TestKeyToPathBijection(t *testing.T) { func TestParseRejectsMalformed(t *testing.T) { bad := []string{ - "chunk:5350:ledgers", // not 8-digit padded - "chunk:00005350:bogus", // unknown kind - "chunk:00005350", // missing kind - "txhash_index:00000005:00005100", // too few segments - "txhash_index:5:5100:5349", // not padded - "unrelated:key", // wrong family + "chunk:5350:ledgers", // not 8-digit padded + "chunk:00005350:bogus", // unknown kind + "chunk:00005350", // missing kind + "index:00000005:00005100", // too few segments + "index:5:5100:5349", // not padded + "unrelated:key", // wrong family } for _, key := range bad { _, _, okChunk := ParseChunkKey(key) @@ -77,10 +77,19 @@ func TestParseRejectsMalformed(t *testing.T) { // Specific rejections. _, _, ok := ParseChunkKey("chunk:00005350:bogus") require.False(t, ok) - _, ok2 := ParseTxHashIndexKey("txhash_index:00000005:00005349:00005100") // lo > hi + _, ok2 := ParseTxHashIndexKey("index:00000005:00005349:00005100") // lo > hi require.False(t, ok2) } func TestIndexKeyPanicsOnLoGreaterThanHi(t *testing.T) { require.Panics(t, func() { TxHashIndexKey(5, 5349, 5100) }) } + +func TestHotKeyBijection(t *testing.T) { + for _, id := range []chunk.ID{0, 7, 5350} { + key := HotChunkKey(id) + got, ok := ParseHotChunkKey(key) + require.True(t, ok) + require.Equal(t, id, got) + } +} diff --git a/cmd/stellar-rpc/internal/fullhistory/geometry/paths.go b/cmd/stellar-rpc/internal/fullhistory/geometry/paths.go index 58eb6752b..4f3331dd6 100644 --- a/cmd/stellar-rpc/internal/fullhistory/geometry/paths.go +++ b/cmd/stellar-rpc/internal/fullhistory/geometry/paths.go @@ -82,10 +82,18 @@ func (l Layout) LedgerPackPath(c chunk.ID) string { return filepath.Join(l.ledgersRoot, c.BucketID(), ledger.PackName(c)) } +// EventsBucketDir is a chunk's events cold-segment directory — the bucket dir the +// three events files (pack, index-pack, index-hash) live under, and the single +// path the cold events ingester writes into. Sharing it with EventsPaths keeps +// the events tree's shape defined once. +func (l Layout) EventsBucketDir(c chunk.ID) string { + return filepath.Join(l.eventsRoot, c.BucketID()) +} + // EventsPaths are a chunk's three events cold-segment files. Leaves owned by // eventstore.*. func (l Layout) EventsPaths(c chunk.ID) []string { - dir := filepath.Join(l.eventsRoot, c.BucketID()) + dir := l.EventsBucketDir(c) return []string{ filepath.Join(dir, eventstore.EventsPackName(c)), filepath.Join(dir, eventstore.IndexPackName(c)), diff --git a/cmd/stellar-rpc/internal/fullhistory/geometry/txhash_index.go b/cmd/stellar-rpc/internal/fullhistory/geometry/txhash_index.go index 14f7a99f0..b63164925 100644 --- a/cmd/stellar-rpc/internal/fullhistory/geometry/txhash_index.go +++ b/cmd/stellar-rpc/internal/fullhistory/geometry/txhash_index.go @@ -80,19 +80,3 @@ func (l TxHashIndexLayout) LastChunk(id TxHashIndexID) chunk.ID { func (l TxHashIndexLayout) IsTerminalCoverage(cov TxHashIndexCoverage) bool { return cov.Hi == l.LastChunk(cov.Index) } - -// LastCompleteChunkAt is the inverse of chunk.ID.LastLedger: the largest chunk -// whose last ledger is <= ledger. Returns SIGNED int64 so a sub-genesis ledger -// (the sub-genesis sentinel) maps to -1 ("before the first chunk") rather than -// wrapping; the cast-before-subtract keeps it in int64 (uint32 ledger-1 would -// underflow for ledger 0). -func LastCompleteChunkAt(ledger uint32) int64 { - return (int64(ledger)+1-int64(chunk.FirstLedgerSeq))/int64(chunk.LedgersPerChunk) - 1 -} - -// ChunkFirstLedger maps a non-negative signed chunk index to its first ledger. -// It is the signed-domain companion of chunk.ID.FirstLedger used by -// retentionFloorChunk after the max(..., 0) clamp. -func ChunkFirstLedger(c int64) uint32 { - return chunk.ID(c).FirstLedger() //nolint:gosec // c >= 0 (clamped) and bounded by real chunk ids -} diff --git a/cmd/stellar-rpc/internal/fullhistory/helpers_test.go b/cmd/stellar-rpc/internal/fullhistory/helpers_test.go index d7dc16241..7d6b3da31 100644 --- a/cmd/stellar-rpc/internal/fullhistory/helpers_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/helpers_test.go @@ -93,21 +93,30 @@ func newRecordingMetrics() *recordingMetrics { return &recordingMetrics{gaugesSet: map[string]int{}} } -func (r *recordingMetrics) LastCommitted(uint32, uint32) { +func (r *recordingMetrics) LastCommitted(uint32) { r.mu.Lock() defer r.mu.Unlock() r.gaugesSet["last_committed"]++ } +func (r *recordingMetrics) RetentionFloor(uint32) { + r.mu.Lock() + defer r.mu.Unlock() + r.gaugesSet["retention_floor"]++ +} + func (r *recordingMetrics) BackfillPass(time.Duration) { r.mu.Lock() defer r.mu.Unlock() r.backfillPasses++ } -func (*recordingMetrics) Freeze(time.Duration) {} -func (*recordingMetrics) Rebuild(time.Duration) {} -func (*recordingMetrics) Prune(int, time.Duration) {} +func (*recordingMetrics) ChunkBoundary() {} +func (*recordingMetrics) Freeze(time.Duration) {} +func (*recordingMetrics) Rebuild(time.Duration) {} +func (*recordingMetrics) Prune(int, time.Duration) {} +func (*recordingMetrics) LiveHotChunks(int) {} +func (*recordingMetrics) Discard(int, time.Duration) {} var _ observability.Metrics = (*recordingMetrics)(nil) diff --git a/cmd/stellar-rpc/internal/fullhistory/hotloop.go b/cmd/stellar-rpc/internal/fullhistory/hotloop.go new file mode 100644 index 000000000..3b1ac2487 --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/hotloop.go @@ -0,0 +1,210 @@ +package fullhistory + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + + "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" + supportlog "github.com/stellar/go-stellar-sdk/support/log" + "github.com/stellar/go-stellar-sdk/xdr" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/ingest" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/observability" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk" +) + +// The hot-DB ingestion loop (decision (a)). One goroutine consumes a single +// sequence-validated ledger stream into the per-chunk shared multi-CF hot DB, +// committing each ledger as one atomic synced WriteBatch across all CFs. It keeps +// NO progress variable — the last synced batch IS the last-committed ledger, +// re-derived at startup. Its only coupling to the lifecycle is the boundary +// signal: at each boundary it publishes the just-completed chunk id (the two +// goroutines share no memory). Clean-shutdown vs crash is decided at the daemon +// top level (a ctx-canceled return is clean). + +// openHotDBForChunk opens/recovers/creates the chunk's shared hot DB, keyed on +// the durable hot:chunk state: +// - "ready": open it must-exist (create-if-missing OFF). A missing or gutted DB +// FAILS the open — never auto-heal into a fresh empty DB (which would silently +// regress the watermark). The open failure is an ordinary restartable error: +// a transient self-heals on the next attempt, genuine loss becomes a +// supervised crash-loop with the wrapped context. +// - "transient" or absent: wipe any leftover dir and create fresh +// (transient -> fsync dir+parent -> ready), so a crash mid-create can't +// fabricate a "ready but DB gone" open failure above. +func openHotDBForChunk(cat *catalog.Catalog, chunkID chunk.ID, logger *supportlog.Entry) (*hotchunk.DB, error) { + dir := cat.Layout().HotChunkPath(chunkID) + + state, err := cat.HotState(chunkID) + if err != nil { + return nil, fmt.Errorf("read hot state chunk %s: %w", chunkID, err) + } + + if state == geometry.HotReady { + db, openErr := hotchunk.OpenExisting(dir, chunkID, logger) + if openErr != nil { + return nil, fmt.Errorf("chunk %s is %q but its hot DB won't open: %w", chunkID, geometry.HotReady, openErr) + } + return db, nil + } + + // "transient" or absent: wipe any leftover dir, then create fresh under the bracket. + if rmErr := os.RemoveAll(dir); rmErr != nil { + return nil, fmt.Errorf("wipe leftover hot dir %s: %w", dir, rmErr) + } + if putErr := cat.PutHotTransient(chunkID); putErr != nil { + return nil, fmt.Errorf("mark hot transient chunk %s: %w", chunkID, putErr) + } + + db, openErr := hotchunk.Open(dir, chunkID, logger) + if openErr != nil { + return nil, fmt.Errorf("create hot DB chunk %s: %w", chunkID, openErr) + } + + // The dir + dirent must be durable BEFORE the key flips to "ready", else a + // crash between the flip and the dir's durability fabricates the "ready but + // dir missing" won't-open error above for a DB that was actually fine. FsyncNewDirs + // syncs the leaf then its parent dirent (the one audited barrier for a + // freshly created dir). + if syncErr := geometry.FsyncNewDirs(filepath.Dir(dir), dir); syncErr != nil { + _ = db.Close() + return nil, fmt.Errorf("fsync hot dir %s: %w", dir, syncErr) + } + if flipErr := cat.FlipHotReady(chunkID); flipErr != nil { + _ = db.Close() + return nil, fmt.Errorf("flip hot ready chunk %s: %w", chunkID, flipErr) + } + return db, nil +} + +// boundaryPublisher is the ingestion loop's handoff sink: it publishes the +// just-completed chunk id to the lifecycle at each boundary. +// *lifecycle.BoundarySignal is the production impl; tests inject a recorder. +type boundaryPublisher interface { + Publish(c chunk.ID) +} + +// ingestionLoopConfig bundles the ingestion loop's dependencies. run() opens the +// resume chunk's hot DB (HotDB) BEFORE serving reads — so a broken hot tier fails +// startup instead of serving behind a crash-looping loop — and hands the open +// handle in; the loop's first deferred statement takes ownership of the close, and +// it reopens the DB itself at every boundary (Catalog + Logger). +type ingestionLoopConfig struct { + Stream ledgerbackend.LedgerStream + Resume uint32 + HotDB *hotchunk.DB + Catalog *catalog.Catalog + Boundary boundaryPublisher + Logger *supportlog.Entry + Metrics observability.Metrics + Sink ingest.MetricSink +} + +// runIngestionLoop is the hot tier's OWNER: the single goroutine that opens, +// writes, closes, and hands off the per-chunk hot DBs. It consumes ONE continuous +// sequence-validated ledger stream from Resume (the stream owns the captive-core +// process — started on the first pull, torn down when this loop exits), commits +// each ledger as one atomic synced WriteBatch (decision (a)), and at each chunk +// boundary closes the just-filled DB, opens the next, and publishes the completed +// chunk to the lifecycle. A ctx-canceled return is a clean shutdown; any other +// error is RESTARTABLE (startup re-derives the last-committed ledger, losing nothing). +// +// HANDOFF FENCE: the DB is CLOSED before the next chunk's hot:chunk key is created +// — that key is what makes THIS chunk complete to the lifecycle, which could then +// discard a dir a still-live writer holds. Publish fires only after the next DB is +// open. The HotService is rebuilt each boundary. +// +// LIVE-CHUNK EXCLUSION: this loop is the SOLE writer of a chunk's hot DB and +// closes it before publishing the chunk complete (the fence above); the lifecycle +// only ever opens chunks at or below the highest complete one — strictly below the +// live chunk. Those opens are read-only, which takes no RocksDB LOCK, so +// writer/reader separation is a construction invariant here, not a lock readers +// rely on. +func runIngestionLoop(ctx context.Context, cfg ingestionLoopConfig) (err error) { + metrics := observability.MetricsOrNop(cfg.Metrics) + + // Take ownership of the resume hot DB run() opened (before serving reads) as the + // loop's FIRST statement, so the deferred close sits ahead of any early return — + // no ownership gap for a transient failure to leak the handle (and its RocksDB + // LOCK) through. The loop is this DB's single writer and reopens it at every + // boundary; the defer closes whatever handle is live on any exit (the boundary + // handoff already closed every prior chunk's DB), and no writer races the close + // (the loop has stopped on every exit path). + hotDB := cfg.HotDB + defer func() { + if hotDB != nil { + if cerr := hotDB.Close(); cerr != nil && err == nil { + err = fmt.Errorf("close live hot DB: %w", cerr) + } + } + }() + + // hotService binds the metrics sink to THIS hotDB instance; the boundary handoff + // rebuilds it for the reopened chunk DB below. + hotService := ingest.NewHotService(hotDB, cfg.Sink) + + // One continuous stream from the resume ledger, consumed on a local sequence + // counter. The in-order contract is enforced at the SOURCE — captive core (and + // every SDK backend) validates its own output — so the loop trusts the counter + // rather than re-parsing each view's sequence. A stream / decode error ends the + // loop for the daemon to classify. + seq := cfg.Resume + for raw, verr := range cfg.Stream.RawLedgers(ctx, ledgerbackend.UnboundedRange(cfg.Resume)) { + if verr != nil { + return fmt.Errorf("ingestion stream: %w", verr) + } + + // One atomic synced WriteBatch across all hot CFs (via hotDB.IngestLedger). + if ierr := hotService.Ingest(ctx, seq, xdr.LedgerCloseMetaView(raw)); ierr != nil { + return fmt.Errorf("ingest ledger %d: %w", seq, ierr) + } + // The ingestion loop owns the last-committed gauge: this is the TRUE + // committed ledger (mid-chunk included), one atomic gauge set per ledger. + // The tick must not touch it — its chunk-aligned value would regress it. + metrics.LastCommitted(seq) + + // Chunk boundary: this seq is the chunk's last ledger. + if closed := chunk.IDFromLedger(seq); seq == closed.LastLedger() { + next := closed + 1 + // Handoff fence: close the write handle BEFORE the next chunk's key is + // created (that key is what makes THIS chunk complete to a tick, which may + // then freeze and discard its hot DB — no writer may hold it then). + if cerr := hotDB.Close(); cerr != nil { + hotDB = nil // closed (failed) — do not double-close in defer + return fmt.Errorf("close hot DB at boundary chunk %s: %w", closed, cerr) + } + hotDB = nil // released; reopen below republishes it for the defer + + nextDB, oerr := openHotDBForChunk(cfg.Catalog, next, cfg.Logger) + if oerr != nil { + return fmt.Errorf("open hot DB for chunk %s at boundary: %w", next, oerr) + } + hotDB = nextDB + hotService = ingest.NewHotService(hotDB, cfg.Sink) + // next's key (created inside openHotDBForChunk) moved the partition; only + // now publish the completed chunk to the lifecycle. + cfg.Boundary.Publish(closed) + + // Boundary observability (the woken tick reports the freeze/discard/prune). + metrics.ChunkBoundary() + cfg.Logger.WithField("closed_chunk", closed.String()). + WithField("next_chunk", next.String()). + WithField("last_ledger", seq). + Info("streaming: ingestion chunk boundary — handed off to lifecycle") + } + seq++ + } + // The unbounded production stream ends only on ctx cancellation or a source + // error, both surfaced as the cursor's error element above. Falling through here + // means the source stopped WITHOUT an error while the daemon ctx is still live — + // unexpected for captive core; surface it as a restartable error rather than a + // nil return, which supervise would read as a clean shutdown and silently stop + // ingesting. + return errors.New("ingestion stream ended unexpectedly (source stopped with no error)") +} diff --git a/cmd/stellar-rpc/internal/fullhistory/hotloop_test.go b/cmd/stellar-rpc/internal/fullhistory/hotloop_test.go new file mode 100644 index 000000000..6f7b62ec1 --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/hotloop_test.go @@ -0,0 +1,416 @@ +package fullhistory + +import ( + "context" + "errors" + "iter" + "os" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk" +) + +// --------------------------------------------------------------------------- +// fakeCoreStream — an injectable ledgerbackend.LedgerStream the ingestion loop +// consumes (the design's raw captive-core stream). RawLedgers yields programmed +// frames contiguously from the range's From(); once it runs past the last +// programmed seq it either blocks until ctx is canceled (a live tip stream that +// only ends on shutdown) or yields endErr (a crashed backend). It records the +// FIRST seq it was asked for (the loop's resume point) and a per-seq consideration +// count so a test can wait for the loop to reach the blocking pull. +// --------------------------------------------------------------------------- + +type fakeCoreStream struct { + frames map[uint32][]byte // seq -> raw LCM bytes + blockOnCtx bool // past the last frame, block until ctx.Done + endErr error // past the last frame, yield this (when not blocking) + yieldErrAt uint32 // if non-zero, yield errAt at this seq instead of bytes + errAt error + + calls atomic.Int32 // seqs yielded by the stream + firstSeen atomic.Uint32 + sawFirst atomic.Bool +} + +var _ ledgerbackend.LedgerStream = (*fakeCoreStream)(nil) + +func (s *fakeCoreStream) RawLedgers( + ctx context.Context, r ledgerbackend.Range, _ ...ledgerbackend.StreamOption, +) iter.Seq2[[]byte, error] { + return func(yield func([]byte, error) bool) { + if s.sawFirst.CompareAndSwap(false, true) { + s.firstSeen.Store(r.From()) + } + for seq := r.From(); ; seq++ { + s.calls.Add(1) + if ctx.Err() != nil { + yield(nil, ctx.Err()) + return + } + if s.yieldErrAt != 0 && seq == s.yieldErrAt { + yield(nil, s.errAt) + return + } + if raw, ok := s.frames[seq]; ok { + if !yield(raw, nil) { + return + } + continue + } + // Past the programmed frames. + if s.blockOnCtx { + <-ctx.Done() + yield(nil, ctx.Err()) + return + } + if s.endErr != nil { + yield(nil, s.endErr) + return + } + yield(nil, errors.New("fakeCoreStream: no frame for seq")) + return + } + } +} + +// streamForSeqs builds a fakeCoreStream with zero-tx LCM frames for [from,to]. +func streamForSeqs(t *testing.T, from, to uint32) *fakeCoreStream { + t.Helper() + s := &fakeCoreStream{frames: map[uint32][]byte{}} + for seq := from; seq <= to; seq++ { + s.frames[seq] = zeroTxLCMBytes(t, seq) + } + return s +} + +// recordingBoundary is a test boundaryPublisher capturing the completed chunk ids +// the loop publishes at each boundary, so a test can assert the handoff without +// wiring a real lifecycle Loop. +type recordingBoundary struct { + mu sync.Mutex + ids []chunk.ID +} + +func (r *recordingBoundary) Publish(c chunk.ID) { + r.mu.Lock() + defer r.mu.Unlock() + r.ids = append(r.ids, c) +} + +func (r *recordingBoundary) list() []chunk.ID { + r.mu.Lock() + defer r.mu.Unlock() + return append([]chunk.ID(nil), r.ids...) +} + +// loopConfig builds an ingestionLoopConfig for a test: the stream + resume point + +// a recording boundary, and opens the resume chunk's hot DB the way run() does now +// (the loop takes ownership and closes it). The test must hold no other handle on +// that dir while the loop runs (a second read-write open would contend the LOCK). +func loopConfig( + t *testing.T, stream ledgerbackend.LedgerStream, cat *catalog.Catalog, resume uint32, +) (ingestionLoopConfig, *recordingBoundary) { + t.Helper() + rec := &recordingBoundary{} + db, err := openHotDBForChunk(cat, chunk.IDFromLedger(resume), silentLogger()) + require.NoError(t, err) + return ingestionLoopConfig{ + Stream: stream, + Resume: resume, + HotDB: db, + Catalog: cat, + Boundary: rec, + Logger: silentLogger(), + }, rec +} + +// impliedResume is the resume point a hot DB's durable watermark implies — one past +// its last committed ledger, or the chunk's first ledger when empty. Production no +// longer derives this in the loop (it trusts the resume run() passes it), but tests +// still assert that a restart's durable watermark matches what startup would derive. +func impliedResume(t *testing.T, db *hotchunk.DB) uint32 { + t.Helper() + maxSeq, ok, err := db.MaxCommittedSeq() + require.NoError(t, err) + if !ok { + return db.ChunkID().FirstLedger() + } + return maxSeq + 1 +} + +// openLiveHotDB opens (and brackets ready) the live hot DB for a chunk via the +// production opener, returning the handle and the catalog it lives under. +func openLiveHotDB(t *testing.T, cat *catalog.Catalog, c chunk.ID) *hotchunk.DB { + t.Helper() + db, err := openHotDBForChunk(cat, c, silentLogger()) + require.NoError(t, err) + return db +} + +// seedWatermark commits real zero-tx LCMs for [FirstLedger, seq] into chunk c's +// hot DB through the production IngestLedger path (the events CF requires strict +// ledger contiguity from the chunk's first ledger), then CLOSES the handle — +// leaving the chunk "ready" on disk with NO open handle, so the loop can open it +// itself. Returns the resume point (seq+1) a boundary test drives the loop from. +// Seeding a near-full chunk costs one synced commit per ledger, so its callers run +// t.Parallel(). +func seedWatermark(t *testing.T, cat *catalog.Catalog, c chunk.ID, seq uint32) uint32 { + t.Helper() + db := openLiveHotDB(t, cat, c) + for s := c.FirstLedger(); s <= seq; s++ { + _, err := db.IngestLedger(s, zeroTxLCMBytes(t, s)) + require.NoError(t, err) + } + require.NoError(t, db.Close()) + return seq + 1 +} + +// --------------------------------------------------------------------------- +// openHotDBForChunk — the bracket's open end. +// --------------------------------------------------------------------------- + +// TestOpenHotTier_CreatesBracketAndDir: a fresh open writes the dir and flips +// the key "ready"; the returned DB is empty (resume at FirstLedger). +func TestOpenHotTier_CreatesBracketAndDir(t *testing.T) { + cat, _ := testCatalog(t) + c := chunk.ID(3) + + db, err := openHotDBForChunk(cat, c, silentLogger()) + require.NoError(t, err) + t.Cleanup(func() { _ = db.Close() }) + + state, err := cat.HotState(c) + require.NoError(t, err) + assert.Equal(t, geometry.HotReady, state, "open flips the key ready") + + _, statErr := os.Stat(cat.Layout().HotChunkPath(c)) + require.NoError(t, statErr, "the dir exists") + + assert.Equal(t, c.FirstLedger(), impliedResume(t, db), "an empty resume DB resumes at the chunk's first ledger") +} + +// TestOpenHotTier_ReadyButDirMissingFailsOpen: a "ready" key whose DB is gone +// FAILS the must-exist open (never auto-healed into a fresh empty DB). The error +// is ordinary/restartable — no sentinel. +func TestOpenHotTier_ReadyButDirMissingFailsOpen(t *testing.T) { + cat, _ := testCatalog(t) + c := chunk.ID(5) + require.NoError(t, cat.PutHotTransient(c)) + require.NoError(t, cat.FlipHotReady(c)) // key says ready, but no dir created + + _, err := openHotDBForChunk(cat, c, silentLogger()) + require.Error(t, err) +} + +// TestOpenHotTier_TransientRecreatesFresh: a "transient" key (crashed +// create/discard) is recovered by wiping any leftover and recreating. +func TestOpenHotTier_TransientRecreatesFresh(t *testing.T) { + cat, _ := testCatalog(t) + c := chunk.ID(2) + require.NoError(t, cat.PutHotTransient(c)) // a crash left a transient key + + db, err := openHotDBForChunk(cat, c, silentLogger()) + require.NoError(t, err) + t.Cleanup(func() { _ = db.Close() }) + + state, err := cat.HotState(c) + require.NoError(t, err) + assert.Equal(t, geometry.HotReady, state) +} + +// --------------------------------------------------------------------------- +// runIngestionLoop — atomic landing. +// --------------------------------------------------------------------------- + +// TestRunIngestionLoop_LedgerLandsAcrossAllCFs: polling a short contiguous +// prefix lands each ledger atomically across the ledgers, txhash, and events +// CFs — the single watermark advances to the last committed seq, and every CF +// is readable. The getter then errs (backend crash), which the loop returns. +func TestRunIngestionLoop_LedgerLandsAcrossAllCFs(t *testing.T) { + cat, _ := testCatalog(t) + c := chunk.ID(0) + first := c.FirstLedger() + + // A short contiguous prefix from the chunk's first ledger (events require + // strict contiguity from FirstLedger), then the stream runs dry and errs. The + // loop opens the empty chunk 0 itself and resumes at its first ledger. + stream := streamForSeqs(t, first, first+2) + stream.endErr = errors.New("backend crashed") + cfg, _ := loopConfig(t, stream, cat, first) + + err := runIngestionLoop(context.Background(), cfg) + require.Error(t, err, "stream ran past the prefix and errored") + + // Reopen the (loop-closed) DB and assert every CF advanced together. + reopened, err := hotchunk.Open(cat.Layout().HotChunkPath(c), c, silentLogger()) + require.NoError(t, err) + t.Cleanup(func() { _ = reopened.Close() }) + + maxSeq, ok, err := reopened.MaxCommittedSeq() + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, first+2, maxSeq, "the single watermark is the last committed seq") + + raw, err := reopened.Ledgers().GetLedgerRaw(first + 2) + require.NoError(t, err) + assert.NotEmpty(t, raw) + assert.Equal(t, uint32(0), eventCount(t, reopened.Events()), "zero-tx ledgers carry no events") +} + +// --------------------------------------------------------------------------- +// runIngestionLoop — boundary notifications carry the completed chunk id. +// --------------------------------------------------------------------------- + +// TestRunIngestionLoop_BoundaryNotifiesCompletedChunk: crossing the chunk 0 -> 1 +// boundary publishes chunk 0 to the lifecycle. The watermark is seeded just below +// the boundary so the stream crosses it in one step. +func TestRunIngestionLoop_BoundaryNotifiesCompletedChunk(t *testing.T) { + t.Parallel() // seeds a near-full chunk (one synced commit per ledger) + cat, _ := testCatalog(t) + c := chunk.ID(0) + c1 := c + 1 + resume := seedWatermark(t, cat, c, c.LastLedger()-1) // == c.LastLedger() + + stream := &fakeCoreStream{frames: map[uint32][]byte{ + c.LastLedger(): zeroTxLCMBytes(t, c.LastLedger()), // boundary 0->1 + c1.FirstLedger(): zeroTxLCMBytes(t, c1.FirstLedger()), // a ledger in chunk 1 + }, endErr: errors.New("end")} + cfg, rec := loopConfig(t, stream, cat, resume) + + done := make(chan error, 1) + go func() { + done <- runIngestionLoop(context.Background(), cfg) + }() + + select { + case err := <-done: + require.Error(t, err, "stream ran dry") + case <-time.After(10 * time.Second): + t.Fatal("ingestion loop deadlocked") + } + + assert.Equal(t, []chunk.ID{c}, rec.list(), "the completed chunk id was published at the boundary") +} + +// --------------------------------------------------------------------------- +// runIngestionLoop — clean shutdown vs crash (classified at the daemon top +// level: ctx-canceled return is clean, any other error is restartable). +// --------------------------------------------------------------------------- + +// TestRunIngestionLoop_CtxCancelReturnsCtxErr: a ctx cancellation while the stream +// is blocking on the tip makes RawLedgers yield ctx.Err(); the loop returns that +// (the daemon top level classifies a ctx-canceled return as a clean shutdown). +func TestRunIngestionLoop_CtxCancelReturnsCtxErr(t *testing.T) { + cat, _ := testCatalog(t) + c := chunk.ID(0) + first := c.FirstLedger() + + stream := streamForSeqs(t, first, first+1) + stream.blockOnCtx = true // after the frames, behave like a live tip stream + cfg, _ := loopConfig(t, stream, cat, first) + ctx, cancel := context.WithCancel(context.Background()) + + done := make(chan error, 1) + go func() { + done <- runIngestionLoop(ctx, cfg) + }() + + require.Eventually(t, func() bool { + return stream.calls.Load() >= 3 // ingested 2 frames, blocked on the 3rd + }, 5*time.Second, 5*time.Millisecond) + cancel() + + select { + case err := <-done: + require.Error(t, err) + require.ErrorIs(t, err, context.Canceled, "the loop surfaces the ctx-canceled stream error") + case <-time.After(10 * time.Second): + t.Fatal("ingestion loop did not stop on ctx cancellation") + } +} + +// TestRunIngestionLoop_StreamErrorReturnsError: a stream error (not a shutdown) +// propagates as a restartable failure. +func TestRunIngestionLoop_StreamErrorReturnsError(t *testing.T) { + cat, _ := testCatalog(t) + c := chunk.ID(0) + first := c.FirstLedger() + + boom := errors.New("backend exploded") + stream := streamForSeqs(t, first, first) + stream.yieldErrAt = first + 1 + stream.errAt = boom + cfg, _ := loopConfig(t, stream, cat, first) + + err := runIngestionLoop(context.Background(), cfg) + require.Error(t, err) + require.ErrorIs(t, err, boom) +} + +// --------------------------------------------------------------------------- +// runIngestionLoop — restart resumes idempotently from the derived watermark. +// --------------------------------------------------------------------------- + +// TestRunIngestionLoop_RestartResumesFromWatermark: after a first run commits a +// prefix and exits, a second run over a FRESH open of the SAME hot dir resumes at +// watermark+1 (asserted via the FIRST seq the stream is asked for) — the stream +// range starts at the derived resume, and the final watermark is exactly the last +// delivered seq. +func TestRunIngestionLoop_RestartResumesFromWatermark(t *testing.T) { + cat, _ := testCatalog(t) + c := chunk.ID(0) + first := c.FirstLedger() + + // First run: loopConfig opens empty chunk 0 (resumes at first), the loop commits + // [first, first+2], then the stream errs. + stream1 := streamForSeqs(t, first, first+2) + stream1.endErr = errors.New("end") + cfg1, _ := loopConfig(t, stream1, cat, first) + err := runIngestionLoop(context.Background(), cfg1) + require.Error(t, err) + assert.Equal(t, first, stream1.firstSeen.Load(), "first run resumed at the chunk's first ledger") + + // The durable watermark now implies resume first+3 — exactly what startup would + // derive on restart. Close the handle before the loop reopens the dir. + db2, err := openHotDBForChunk(cat, c, silentLogger()) + require.NoError(t, err) + resume := impliedResume(t, db2) + assert.Equal(t, first+3, resume, "restart resumes one past the durable watermark") + require.NoError(t, db2.Close()) + + // Second run resumes at the derived watermark and commits two more ledgers. + stream2 := streamForSeqs(t, first+3, first+5) + stream2.endErr = errors.New("end") + cfg2, _ := loopConfig(t, stream2, cat, resume) + err = runIngestionLoop(context.Background(), cfg2) + require.Error(t, err) + assert.Equal(t, first+3, stream2.firstSeen.Load(), "second run resumed at watermark+1") + + reopened, err := hotchunk.Open(cat.Layout().HotChunkPath(c), c, silentLogger()) + require.NoError(t, err) + t.Cleanup(func() { _ = reopened.Close() }) + maxSeq, ok, err := reopened.MaxCommittedSeq() + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, first+5, maxSeq) +} + +// eventCount reads the hot events store's committed event count, failing the +// test on the (close-only) error the Reader contract allows. +func eventCount(t *testing.T, r interface{ EventCount() (uint32, error) }) uint32 { + t.Helper() + n, err := r.EventCount() + require.NoError(t, err) + return n +} diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/config.go b/cmd/stellar-rpc/internal/fullhistory/ingest/config.go index 139f70d43..014c554c6 100644 --- a/cmd/stellar-rpc/internal/fullhistory/ingest/config.go +++ b/cmd/stellar-rpc/internal/fullhistory/ingest/config.go @@ -3,8 +3,7 @@ package ingest import "errors" // Config selects which data types the ingest drivers write. At least one of -// Ledgers/Txhash/Events must be enabled. Per-ledger hot fan-out is always -// parallel; that is not configurable. +// Ledgers/Txhash/Events must be enabled. // // The view-based event path derives payloads from the LedgerCloseMetaView and // needs no network passphrase, so Config carries no passphrase. diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/doc.go b/cmd/stellar-rpc/internal/fullhistory/ingest/doc.go index 5667214d9..4eeb79f70 100644 --- a/cmd/stellar-rpc/internal/fullhistory/ingest/doc.go +++ b/cmd/stellar-rpc/internal/fullhistory/ingest/doc.go @@ -8,12 +8,12 @@ // Two tiers share the per-ledger extraction but differ in everything // else: // -// - Hot (RunHot): one chunk into the long-lived, caller-owned hot -// stores, from an injected ledgerbackend.LedgerStream. The stores -// are INJECTED and never opened or closed here, and neither is the -// stream; each ledger is durable before the next is pulled. -// Per-ledger fan-out across the enabled ingesters is concurrent -// (HotService). +// - Hot (HotService): one ledger at a time into the long-lived, +// caller-owned per-chunk hot DB, driven by the daemon's live +// ingestion loop. The DB is INJECTED and never opened or closed +// here. Each ledger is written as ONE atomic synced WriteBatch +// across all column families (decision (a) — no per-type fan-out), +// so a ledger is fully present or absent before the next is pulled. // - Cold (WriteColdChunk): one chunk into per-chunk cold artifacts // (ledger .pack, txhash .bin, events pack+index). It is // SOURCE-BLIND — the caller resolves the chunk's ledger source and @@ -58,7 +58,7 @@ // // Inputs are borrowed: every Ingest receives a view over the source // stream's buffer, valid only until the next ledger is pulled, and -// each ingester copies what it retains (see HotIngester). The raw +// each ingester copies what it retains (see ColdIngester). The raw // ledger iterator's contract includes yielding an error on ctx // cancellation — the drain loop relies on it for cancellation rather // than polling ctx itself. Metrics flow through MetricSink (Prometheus in prod, diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/driver.go b/cmd/stellar-rpc/internal/fullhistory/ingest/driver.go index 417cc2d37..7c73ad0f3 100644 --- a/cmd/stellar-rpc/internal/fullhistory/ingest/driver.go +++ b/cmd/stellar-rpc/internal/fullhistory/ingest/driver.go @@ -7,80 +7,19 @@ import ( "iter" "time" - "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" supportlog "github.com/stellar/go-stellar-sdk/support/log" "github.com/stellar/go-stellar-sdk/xdr" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore" - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger" - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash" ) -// HotStores holds the long-lived, caller-owned hot stores injected into RunHot. -// The caller (the daemon) opens and closes these; RunHot only borrows them to -// build the per-type hot ingesters. A field left nil for an enabled data type is -// a configuration error caught by RunHot. Every hot store is chunk-bound (each -// instance accumulates exactly one chunk before being frozen into cold -// artifacts), so each injected store must already be bound to the chunk being -// ingested — RunHot rejects a mismatch up front. -type HotStores struct { - Ledgers *ledger.HotStore - Txhash *txhash.HotStore - Events *eventstore.HotStore -} - -// buildHotIngesters constructs one HotIngester per data type enabled in cfg, in -// canonical ledgers→txhash→events order, from the injected stores. It errors if -// an enabled type's store is nil. -func buildHotIngesters(stores HotStores, sink MetricSink, cfg Config) ([]HotIngester, error) { - var ings []HotIngester - if cfg.Ledgers { - if stores.Ledgers == nil { - return nil, errors.New("ingest: Ledgers enabled but HotStores.Ledgers is nil") - } - ings = append(ings, NewLedgerHotIngester(stores.Ledgers, sink)) - } - if cfg.Txhash { - if stores.Txhash == nil { - return nil, errors.New("ingest: Txhash enabled but HotStores.Txhash is nil") - } - ings = append(ings, NewTxhashHotIngester(stores.Txhash, sink)) - } - if cfg.Events { - if stores.Events == nil { - return nil, errors.New("ingest: Events enabled but HotStores.Events is nil") - } - ings = append(ings, NewEventsHotIngester(stores.Events, sink)) - } - return ings, nil -} - -// errColdBuildAborted is the synthetic error recorded against an -// already-built cold ingester's metric when a LATER constructor fails and the -// build is rolled back. Without it, closing a fully-built ingester would emit -// a clean (nil-err, 0-items) ColdIngest — a phantom "success" for a chunk that -// never actually ingested anything. -var errColdBuildAborted = errors.New("ingest: cold ingester build aborted (sibling constructor failed)") - -// coldAborter is implemented by the concrete cold ingesters so the -// constructor-rollback path can mark their per-chunk metric as aborted before -// Close emits it, turning what would be a phantom success into a recorded -// abort. Optional: an ingester that does not implement it just gets its normal -// Close emission. -type coldAborter interface { - abortMetric(err error) -} - // closeColdAll closes every cold ingester built so far, joining each Close error -// into err. Used when a LATER constructor fails mid-build: the already-built -// ingesters never ingested anything, so each one's metric is first marked -// aborted (so the deferred Close emit is not a phantom success). +// into err. Used when a LATER constructor fails mid-build. The already-built +// ingesters never ingested or finalized, and Close no longer emits a per-ingester +// ColdIngest, so a rolled-back build produces no phantom-success sample — no +// abort bookkeeping needed here. func closeColdAll(ings []ColdIngester, err error) error { for _, ing := range ings { - if a, ok := ing.(coldAborter); ok { - a.abortMetric(errColdBuildAborted) - } if cerr := ing.Close(); cerr != nil { err = errors.Join(err, fmt.Errorf("close: %w", cerr)) } @@ -88,105 +27,29 @@ func closeColdAll(ings []ColdIngester, err error) error { return err } -// RunHot feeds each ledger of chunkID (as a view) from the injected stream to a -// HotService over the enabled hot ingesters, built from the INJECTED, -// caller-owned stores in hotStores. Ingest errors abort fast; HotService.Ingest -// waits for all ingesters before the loop pulls again so the borrowed view is -// never read past its lifetime. The hot stores are NOT closed here, and neither -// is the stream — the caller owns both lifecycles. -func RunHot( - ctx context.Context, - logger *supportlog.Entry, - stream ledgerbackend.LedgerStream, - chunkID chunk.ID, - hotStores HotStores, - sink MetricSink, - cfg Config, -) error { - if verr := cfg.validate(); verr != nil { - return verr - } - // Every hot store is chunk-bound — each instance accumulates exactly one - // chunk's data before being frozen into the chunk's cold artifacts — and - // records its chunk at open time. An injected store bound to a different - // chunk than we're ingesting would silently interleave two chunks' data - // (ledgers, txhash) or fail every per-ledger write with an out-of-range - // offset (events, whose LedgerOffsets are chunk-relative), so catch the - // mismatch up front with a clear message. Nil stores are skipped here: - // buildHotIngesters rejects a nil store for an enabled type with a more - // specific error. - checkBinding := func(name string, got chunk.ID) error { - if got != chunkID { - return fmt.Errorf("ingest: RunHot chunk %d but injected %s store is bound to chunk %d", - uint32(chunkID), name, uint32(got)) - } - return nil - } - if cfg.Ledgers && hotStores.Ledgers != nil { - if err := checkBinding("Ledgers", hotStores.Ledgers.ChunkID()); err != nil { - return err - } - } - if cfg.Txhash && hotStores.Txhash != nil { - if err := checkBinding("Txhash", hotStores.Txhash.ChunkID()); err != nil { - return err - } - } - if cfg.Events && hotStores.Events != nil { - if err := checkBinding("Events", hotStores.Events.ChunkID()); err != nil { - return err - } - } - ings, berr := buildHotIngesters(hotStores, sink, cfg) - if berr != nil { - return berr - } - logger.Debugf("RunHot: ingesting chunk %d [%d, %d]", uint32(chunkID), chunkID.FirstLedger(), chunkID.LastLedger()) - service := NewHotService(ings, sink) - raw := stream.RawLedgers(ctx, ledgerbackend.BoundedRange(chunkID.FirstLedger(), chunkID.LastLedger())) - return drain(ctx, raw, chunkID, service) -} - -// drain pulls the chunk's raw ledgers from the iterator and feeds each (as a view) -// to the service, then verifies the full [first,last] range was consumed. For the -// cold path this completeness check runs before Finalize, so a short stream never -// produces a finalized truncated artifact. The caller passes an iterator already -// bounded to the chunk's range; cancellation is the iterator's job (RawLedgers -// yields an error once ctx is canceled), so the loop needs no ctx poll of its own. -func drain(ctx context.Context, ledgers iter.Seq2[[]byte, error], chunkID chunk.ID, ing HotIngester) error { +// drain feeds each of the chunk's raw ledgers (as a borrowed view) to the +// service on a local sequence counter, then verifies the full [first,last] range +// was consumed — for cold this runs before Finalize, so a short stream never +// finalizes a truncated artifact. The in-order contract is enforced at the SOURCE +// (packStream reads positionally by key; hotLedgerStream key-checks its own +// keyspace; the SDK backends validate their own output), so drain trusts the +// counter rather than re-parsing every view's sequence. Cancellation is the +// iterator's job (RawLedgers errors on a canceled ctx), so there is no ctx poll +// here. +func drain(ctx context.Context, ledgers iter.Seq2[[]byte, error], chunkID chunk.ID, svc *ColdService) error { first, last := chunkID.FirstLedger(), chunkID.LastLedger() seq := first for raw, serr := range ledgers { if serr != nil { - return fmt.Errorf("RawLedgers(%d): %w", seq, serr) + return fmt.Errorf("ingest: stream for chunk %d: %w", uint32(chunkID), serr) } - // Reject a stream that runs PAST the chunk before ingesting anything - // out-of-chunk. Without this, an in-order overrun would only trip the - // post-loop count check after the extra ledgers were durably ingested - // (the ledger and txhash hot stores accept any sequence). All in-repo - // sources bound themselves; this guards custom iterators. + // Reject a stream that runs PAST the chunk before ingesting out-of-chunk. + // All in-repo sources self-bound; this guards a custom iterator. if seq > last { return fmt.Errorf("ingest: stream for chunk %d yielded a ledger past %d (chunk overrun)", uint32(chunkID), last) } - lcm := xdr.LedgerCloseMetaView(raw) - // Validate the actual ledger sequence before ingesting. The final - // count check below only catches a short/long stream; a source that - // yields a duplicate or out-of-order ledger with the right total - // count would otherwise pass silently (e.g. on the txhash and - // ledger-hot paths, which key on the LCM's own seq). - actual, aerr := lcm.LedgerSequence() - if aerr != nil { - return fmt.Errorf("ingest: stream for chunk %d: ledger sequence at expected %d: %w", - uint32(chunkID), seq, aerr) - } - if actual != seq { - return fmt.Errorf("ingest: stream for chunk %d yielded ledger %d, expected %d", - uint32(chunkID), actual, seq) - } - // seq is now VALIDATED as lcm's sequence — pass it through so the - // ingesters consume it instead of each re-deriving it from the view. - if err := ing.Ingest(ctx, seq, lcm); err != nil { + if err := svc.Ingest(ctx, seq, xdr.LedgerCloseMetaView(raw)); err != nil { return err } seq++ @@ -197,36 +60,40 @@ func drain(ctx context.Context, ledgers iter.Seq2[[]byte, error], chunkID chunk. return nil } -// ColdDirs is the per-type output root for one chunk's cold artifacts. An empty -// field for an enabled type is a config error. +// ColdDirs holds ONE chunk's RESOLVED cold-artifact destinations, derived by the +// caller from geometry.Layout so the ingesters write exactly where the freeze +// barrier and the sweeps resolve — the path formula lives in Layout alone, never +// re-derived here. LedgerPack and TxhashBin are the chunk's full file paths; +// EventsDir is its events bucket dir. An empty field for an enabled type is a +// config error. type ColdDirs struct { - Ledgers string - Txhash string - Events string + LedgerPack string + TxhashBin string + EventsDir string } -// buildColdIngesters opens one ColdIngester per enabled type under its dirs field. +// buildColdIngesters opens one ColdIngester per enabled type at its resolved path. // Single definition site of the ctor table, order, and rollback. func buildColdIngesters(dirs ColdDirs, chunkID chunk.ID, sink MetricSink, cfg Config) ([]ColdIngester, error) { ctors := []struct { enabled bool dataType string - dir string + path string open func(string, chunk.ID, MetricSink) (ColdIngester, error) }{ - {cfg.Ledgers, dataTypeLedgers, dirs.Ledgers, NewLedgerColdIngester}, - {cfg.Txhash, dataTypeTxhash, dirs.Txhash, NewTxhashColdIngester}, - {cfg.Events, dataTypeEvents, dirs.Events, NewEventsColdIngester}, + {cfg.Ledgers, dataTypeLedgers, dirs.LedgerPack, NewLedgerColdIngester}, + {cfg.Txhash, dataTypeTxhash, dirs.TxhashBin, NewTxhashColdIngester}, + {cfg.Events, dataTypeEvents, dirs.EventsDir, NewEventsColdIngester}, } ings := make([]ColdIngester, 0, len(ctors)) for _, c := range ctors { if !c.enabled { continue } - if c.dir == "" { - return nil, closeColdAll(ings, fmt.Errorf("ingest: %s enabled but ColdDirs.%s is empty", c.dataType, c.dataType)) + if c.path == "" { + return nil, closeColdAll(ings, fmt.Errorf("ingest: %s enabled but its ColdDirs path is empty", c.dataType)) } - ing, err := c.open(c.dir, chunkID, sink) + ing, err := c.open(c.path, chunkID, sink) if err != nil { return nil, closeColdAll(ings, fmt.Errorf("open %s cold ingester: %w", c.dataType, err)) } @@ -235,8 +102,8 @@ func buildColdIngesters(dirs ColdDirs, chunkID chunk.ID, sink MetricSink, cfg Co return ings, nil } -// WriteColdChunk materializes ONE chunk's cold artifacts into the roots named by -// dirs, in a single pass, from the already-opened raw ledger iterator. It is +// WriteColdChunk materializes ONE chunk's cold artifacts at the resolved paths +// named by dirs, in a single pass, from the already-opened raw ledger iterator. It is // SOURCE-BLIND: the caller (backfill) resolves the chunk's ledger source — the // local frozen .pack or the bulk backend — and hands its RawLedgers iterator here, // so the cold materializer never learns where the bytes came from and is faked in @@ -275,8 +142,8 @@ func WriteColdChunk( ings, berr := buildColdIngesters(dirs, chunkID, sink, cfg) if berr != nil { - // A constructor failure is still a chunk attempt - // (closeColdAll only emitted the per-ingester aborts). + // A constructor failure is still a chunk attempt: emit the aggregate + // (closeColdAll rolled back the built ingesters with no per-ingester emit). sink.ColdChunkTotal(time.Since(start)) return berr } diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/events.go b/cmd/stellar-rpc/internal/fullhistory/ingest/events.go index 6bf9268b9..98be9f62e 100644 --- a/cmd/stellar-rpc/internal/fullhistory/ingest/events.go +++ b/cmd/stellar-rpc/internal/fullhistory/ingest/events.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "math" - "path/filepath" "time" "github.com/stellar/go-stellar-sdk/xdr" @@ -25,53 +24,6 @@ func eventPayloads(seq uint32, lcm xdr.LedgerCloseMetaView) ([]events.Payload, e return payloads, nil } -// ───────────────────────── Hot ingester ───────────────────────── - -// eventsHot derives []events.Payload from the view (events.LCMViewToPayloads) and -// writes them with IngestLedgerEvents. Each call is one atomic RocksDB batch -// (sync=true) plus an in-memory mirror update. The store is INJECTED, already -// bound to a chunk, and owned by the caller. -// -// IngestLedgerEvents is called on every ledger, including ones with zero -// payloads — LedgerOffsets.Append requires a contiguous sequence and would -// reject the next non-empty ledger if an empty one were skipped. -type eventsHot struct { - store *eventstore.HotStore - sink MetricSink -} - -// NewEventsHotIngester returns a HotIngester writing contract events into the -// injected, caller-owned store (already bound to a chunk). -func NewEventsHotIngester(store *eventstore.HotStore, sink MetricSink) HotIngester { - return &eventsHot{store: store, sink: orNop(sink)} -} - -func (e *eventsHot) Ingest(_ context.Context, seq uint32, lcm xdr.LedgerCloseMetaView) error { - m := newHotMetrics(e.sink, dataTypeEvents) - var err error - defer func() { m.emit(err) }() - - estart := time.Now() - payloads, eerr := eventPayloads(seq, lcm) - if eerr != nil { - err = eerr - return err - } - e.sink.IngestStage(dataTypeEvents, tierHot, stageExtract, time.Since(estart), len(payloads)) - // IngestLedgerEvents marshals each payload into a scratch buffer that - // RocksDB copies synchronously, so the borrowed ContractEventBytes (aliasing - // the view) is safe to pass. Term indexing happens inside the store call, - // so the write stage here covers term derivation + the RocksDB batch. - wstart := time.Now() - if ierr := e.store.IngestLedgerEvents(seq, payloads); ierr != nil { - err = fmt.Errorf("IngestLedgerEvents(seq=%d, n=%d): %w", seq, len(payloads), ierr) - return err - } - e.sink.IngestStage(dataTypeEvents, tierHot, stageWrite, time.Since(wstart), len(payloads)) - m.items = len(payloads) - return nil -} - // ───────────────────────── Cold ingester ───────────────────────── // eventsCold models the backfill path: per-ledger view → payloads → term-index @@ -95,11 +47,11 @@ type eventsCold struct { failed bool } -// NewEventsColdIngester opens a per-chunk events.pack cold writer under coldDir -// and returns a ColdIngester that owns it. The writer uses its zero-value -// options; driver-level tuning is a follow-up via Config. -func NewEventsColdIngester(coldDir string, chunkID chunk.ID, sink MetricSink) (ColdIngester, error) { - bucketDir := filepath.Join(coldDir, chunkID.BucketID()) +// NewEventsColdIngester opens a per-chunk events.pack cold writer in bucketDir — +// the caller's geometry.Layout.EventsBucketDir(chunkID), so the write path is +// Layout's single derivation — and returns a ColdIngester that owns it. The +// writer uses its zero-value options; driver-level tuning is a follow-up via Config. +func NewEventsColdIngester(bucketDir string, chunkID chunk.ID, sink MetricSink) (ColdIngester, error) { w, err := eventstore.NewColdWriter(chunkID, bucketDir, eventstore.ColdWriterOptions{}) if err != nil { return nil, fmt.Errorf("eventstore.NewColdWriter: %w", err) @@ -117,11 +69,12 @@ func NewEventsColdIngester(coldDir string, chunkID chunk.ID, sink MetricSink) (C func (e *eventsCold) Ingest(_ context.Context, seq uint32, lcm xdr.LedgerCloseMetaView) error { start := time.Now() n, ierr := e.ingestSeq(seq, lcm) + e.metrics.observe(time.Since(start), n, ierr) // terminal on err: observe emits the per-ingester signal if ierr != nil { - e.failed = true + e.failed = true // refuse a post-failure Finalize + return ierr } - e.metrics.observe(time.Since(start), n, ierr) - return ierr + return nil } // Finalize writes the events.pack trailer (Finish) + materializes the cold @@ -135,9 +88,9 @@ func (e *eventsCold) Ingest(_ context.Context, seq uint32, lcm xdr.LedgerCloseMe func (e *eventsCold) Finalize(ctx context.Context) error { start := time.Now() if e.failed { - err := fmt.Errorf("events cold ingester for chunk %s: Finalize after failed Ingest", e.chunkID) - e.metrics.emit(time.Since(start), err) - return err + // Ingest already metered and latched this failure; refuse to finalize a + // chunk whose mirror/pack may be ahead of the offsets commit point. + return fmt.Errorf("events cold ingester for chunk %s: Finalize after failed Ingest", e.chunkID) } if err := e.writer.Finish(e.offsets); err != nil { err = fmt.Errorf("events ColdWriter.Finish: %w", err) @@ -153,32 +106,29 @@ func (e *eventsCold) Finalize(ctx context.Context) error { e.metrics.emit(time.Since(start), err) return err } - e.metrics.sink.IngestStage(dataTypeEvents, tierCold, stageFinalize, time.Since(start), 0) + e.metrics.sink.IngestStage(dataTypeEvents, stageFinalize, time.Since(start), 0) e.metrics.emit(time.Since(start), nil) return nil } -// Close drops the partial events.pack when Finalize never ran, and emits the -// cold metrics if Finalize did not already (the failure path). The writer.Close -// error is folded into the emitted metric so a close-time failure (e.g. ENOSPC -// on the partial-drop) is counted in errors_total. emit is a no-op after a -// successful Finalize. Error propagation is unchanged: the writer.Close error is -// still returned. +// Close drops the partial events.pack when Finalize never ran. It does NOT emit +// the cold metric: a terminal Ingest error or Finalize already emitted it, and an +// ingester that never got that far (a rolled-back build) must produce no phantom +// sample. The writer.Close error is returned unchanged. func (e *eventsCold) Close() error { - cerr := e.writer.Close() - e.metrics.emit(0, cerr) - return cerr + return e.writer.Close() } // ingestSeq writes one ledger's events and returns the count written. The -// pre-Soroban (V0) policy lives in eventPayloads, shared with the hot tier. +// pre-Soroban (V0) policy lives in events.LCMViewToPayloads, shared with the +// hot tier. func (e *eventsCold) ingestSeq(seq uint32, lcm xdr.LedgerCloseMetaView) (int, error) { estart := time.Now() payloads, err := eventPayloads(seq, lcm) if err != nil { return 0, err } - e.metrics.sink.IngestStage(dataTypeEvents, tierCold, stageExtract, time.Since(estart), len(payloads)) + e.metrics.sink.IngestStage(dataTypeEvents, stageExtract, time.Since(estart), len(payloads)) startID := e.offsets.TotalEvents() if uint64(startID)+uint64(len(payloads)) > math.MaxUint32 { @@ -216,7 +166,7 @@ func (e *eventsCold) ingestSeq(seq uint32, lcm xdr.LedgerCloseMetaView) (int, er } writeDur += time.Since(wstart) } - e.metrics.sink.IngestStage(dataTypeEvents, tierCold, stageTermIndex, termDur, len(payloads)) + e.metrics.sink.IngestStage(dataTypeEvents, stageTermIndex, termDur, len(payloads)) // offsets.Append LAST — it is the commit point for the ledger. Its cost folds // into the write stage (rather than landing in the per-chunk total but in no @@ -227,13 +177,9 @@ func (e *eventsCold) ingestSeq(seq uint32, lcm xdr.LedgerCloseMetaView) (int, er //nolint:gosec // the overflow guard above proved startID+len(payloads) fits in uint32 oerr := e.offsets.Append(seq, uint32(len(payloads))) writeDur += time.Since(wstart) - e.metrics.sink.IngestStage(dataTypeEvents, tierCold, stageWrite, writeDur, len(payloads)) + e.metrics.sink.IngestStage(dataTypeEvents, stageWrite, writeDur, len(payloads)) if oerr != nil { return 0, fmt.Errorf("offsets append seq %d: %w", seq, oerr) } return len(payloads), nil } - -// abortMetric records a synthetic abort error so a subsequent Close emit does -// not look like a clean success. Used by the constructor-rollback path. -func (e *eventsCold) abortMetric(err error) { e.metrics.recordErr(err) } diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/ingest_test.go b/cmd/stellar-rpc/internal/fullhistory/ingest/ingest_test.go index e98898302..033ea5f45 100644 --- a/cmd/stellar-rpc/internal/fullhistory/ingest/ingest_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/ingest/ingest_test.go @@ -7,13 +7,13 @@ import ( "iter" "os" "path/filepath" - "strconv" "sync" "testing" "time" "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" @@ -25,6 +25,7 @@ import ( "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/events" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash" ) @@ -35,10 +36,10 @@ const testPassphrase = "Public Global Stellar Network ; September 2015" // ───────────────────────── test metric sink ───────────────────────── -type hotCall struct { - dataType string - items int - err error +type hotPhaseCall struct { + phase hotchunk.Phase + items int + err error } type coldCall struct { @@ -49,26 +50,24 @@ type coldCall struct { type stageCall struct { dataType string - tier string stage string items int } // testSink records every MetricSink call for assertions. Safe for concurrent -// use (HotIngest fires from the per-ledger fan-out goroutines). +// use (the hot methods fire from the per-ledger ingestion goroutine). type testSink struct { mu sync.Mutex - hotIngests []hotCall + hotPhases []hotPhaseCall coldIngests []coldCall stages []stageCall - hotLedgerTotals int coldChunkTotals int } -func (s *testSink) HotIngest(dataType string, _ time.Duration, items int, err error) { +func (s *testSink) HotPhase(phase hotchunk.Phase, _ time.Duration, items int, err error) { s.mu.Lock() defer s.mu.Unlock() - s.hotIngests = append(s.hotIngests, hotCall{dataType, items, err}) + s.hotPhases = append(s.hotPhases, hotPhaseCall{phase, items, err}) } func (s *testSink) ColdIngest(dataType string, _ time.Duration, items int, err error) { @@ -77,45 +76,52 @@ func (s *testSink) ColdIngest(dataType string, _ time.Duration, items int, err e s.coldIngests = append(s.coldIngests, coldCall{dataType, items, err}) } -func (s *testSink) HotLedgerTotal(time.Duration) { - s.mu.Lock() - defer s.mu.Unlock() - s.hotLedgerTotals++ -} - func (s *testSink) ColdChunkTotal(time.Duration) { s.mu.Lock() defer s.mu.Unlock() s.coldChunkTotals++ } -func (s *testSink) IngestStage(dataType, tier, stage string, _ time.Duration, items int) { +func (s *testSink) IngestStage(dataType, stage string, _ time.Duration, items int) { s.mu.Lock() defer s.mu.Unlock() - s.stages = append(s.stages, stageCall{dataType, tier, stage, items}) + s.stages = append(s.stages, stageCall{dataType, stage, items}) } -// stageCounts counts IngestStage calls keyed "dataType/tier/stage". +// stageCounts counts cold IngestStage calls keyed "dataType/stage". func (s *testSink) stageCounts() map[string]int { s.mu.Lock() defer s.mu.Unlock() m := map[string]int{} for _, c := range s.stages { - m[c.dataType+"/"+c.tier+"/"+c.stage]++ + m[c.dataType+"/"+c.stage]++ } return m } -func (s *testSink) hotDataTypes() map[string]int { +// hotPhaseItems returns the items reported per hot phase, keyed by phase. +func (s *testSink) hotPhaseItems() map[hotchunk.Phase]int { s.mu.Lock() defer s.mu.Unlock() - m := map[string]int{} - for _, c := range s.hotIngests { - m[c.dataType]++ + m := map[hotchunk.Phase]int{} + for _, c := range s.hotPhases { + m[c.phase] += c.items } return m } +// hotPhaseErr returns the phase that carried a non-nil error, or (0,false) if none. +func (s *testSink) hotPhaseErr() (hotchunk.Phase, bool) { + s.mu.Lock() + defer s.mu.Unlock() + for _, c := range s.hotPhases { + if c.err != nil { + return c.phase, true + } + } + return 0, false +} + func (s *testSink) coldDataTypes() map[string]int { s.mu.Lock() defer s.mu.Unlock() @@ -191,13 +197,16 @@ func packPath(ledgersRoot string, c chunk.ID) string { return filepath.Join(ledgersRoot, c.BucketID(), ledger.PackName(c)) } -// coldDirsAt derives the three per-type cold roots under one dir — the fixed -// layout the removed RunCold used, convenient for single-tmpdir tests. -func coldDirsAt(dir string) ColdDirs { +// coldDirsAt resolves chunk c's three cold-artifact paths under one dir's per-type +// roots — mirroring what geometry.Layout derives in production, so the readback +// helpers (packPath/txhashBinPath) find what the ingesters wrote. +// +//nolint:unparam // chunk-general helper; every current caller uses chunk 0 +func coldDirsAt(dir string, c chunk.ID) ColdDirs { return ColdDirs{ - Ledgers: filepath.Join(dir, dataTypeLedgers), - Txhash: filepath.Join(dir, dataTypeTxhash), - Events: filepath.Join(dir, dataTypeEvents), + LedgerPack: packPath(filepath.Join(dir, dataTypeLedgers), c), + TxhashBin: txhashBinPath(filepath.Join(dir, dataTypeTxhash)), + EventsDir: filepath.Join(dir, dataTypeEvents, c.BucketID()), } } @@ -384,29 +393,6 @@ func marshalV0LCM(t *testing.T, seq uint32) []byte { return raw } -// seqStream is a ledgerbackend.LedgerStream that yields LCMs for an explicit -// list of ledger sequences (in order), regardless of the requested range. It -// models a backend that hands back a duplicate / out-of-order / wrong-but- -// right-count sequence, exercising the drain seq guard. -type seqStream struct { - t *testing.T - seqs []uint32 -} - -var _ ledgerbackend.LedgerStream = (*seqStream)(nil) - -func (s *seqStream) RawLedgers( - _ context.Context, _ ledgerbackend.Range, _ ...ledgerbackend.StreamOption, -) iter.Seq2[[]byte, error] { - return func(yield func([]byte, error) bool) { - for _, seq := range s.seqs { - if !yield(marshalLCM(s.t, seq), nil) { - return - } - } - } -} - // errAtSeqStream yields valid LCMs until it reaches errAtSeq, where it yields // (nil, err) — modeling a backend that fails mid-stream. Used to exercise the // drain RawLedgers error path. @@ -436,68 +422,6 @@ func (s *errAtSeqStream) RawLedgers( // ───────────────────────── per-ingester unit tests ───────────────────────── -// TestLedgerHotIngester_Readback ingests one ledger via the hot ledger ingester -// (injected store) and reads the bytes back. -func TestLedgerHotIngester_Readback(t *testing.T) { - seq := chunk.ID(0).FirstLedger() - raw := marshalLCM(t, seq) - dir := t.TempDir() - logger := testLogger() - - store, err := ledger.OpenHotStore(dir, chunk.ID(0), logger) - require.NoError(t, err) - defer func() { require.NoError(t, store.Close()) }() - - ing := NewLedgerHotIngester(store, nil) - require.NoError(t, ing.Ingest(context.Background(), seq, xdr.LedgerCloseMetaView(raw))) - - got, err := store.GetLedgerRaw(seq) - require.NoError(t, err) - require.Equal(t, raw, got) -} - -// TestTxhashHotIngester_Lookup ingests an event/tx-bearing ledger via the hot -// txhash ingester and looks the hash up. -func TestTxhashHotIngester_Lookup(t *testing.T) { - seq := chunk.ID(0).FirstLedger() - raw, hash, _ := marshalLCMWithEvent(t, seq) - dir := t.TempDir() - logger := testLogger() - - store, err := txhash.NewHotStore(dir, chunk.ID(0), logger) - require.NoError(t, err) - defer func() { require.NoError(t, store.Close()) }() - - ing := NewTxhashHotIngester(store, nil) - require.NoError(t, ing.Ingest(context.Background(), seq, xdr.LedgerCloseMetaView(raw))) - - got, err := store.Get(hash) - require.NoError(t, err) - require.Equal(t, seq, got) -} - -// TestEventsHotIngester_Query ingests an event-bearing ledger via the hot events -// ingester and resolves the term. -func TestEventsHotIngester_Query(t *testing.T) { - chunkID := chunk.ID(0) - seq := chunkID.FirstLedger() - raw, _, term := marshalLCMWithEvent(t, seq) - dir := t.TempDir() - logger := testLogger() - - store, err := eventstore.OpenHotStore(dir, chunkID, logger) - require.NoError(t, err) - defer func() { require.NoError(t, store.Close()) }() - - ing := NewEventsHotIngester(store, nil) - require.NoError(t, ing.Ingest(context.Background(), seq, xdr.LedgerCloseMetaView(raw))) - - bm, err := store.Lookup(context.Background(), term) - require.NoError(t, err) - require.NotNil(t, bm) - require.Equal(t, uint64(1), bm.GetCardinality()) -} - // TestLedgerColdIngester_Readback ingests one ledger via the cold ledger // ingester, finalizes, and reads back through the cold reader. func TestLedgerColdIngester_Readback(t *testing.T) { @@ -506,7 +430,7 @@ func TestLedgerColdIngester_Readback(t *testing.T) { raw := marshalLCM(t, seq) coldDir := t.TempDir() - ing, err := NewLedgerColdIngester(coldDir, chunkID, nil) + ing, err := NewLedgerColdIngester(packPath(coldDir, chunkID), chunkID, nil) require.NoError(t, err) defer func() { require.NoError(t, ing.Close()) }() @@ -535,7 +459,7 @@ func TestTxhashColdIngester_Bin(t *testing.T) { first := chunkID.FirstLedger() coldDir := t.TempDir() - ing, err := NewTxhashColdIngester(coldDir, chunkID, nil) + ing, err := NewTxhashColdIngester(txhashBinPath(coldDir), chunkID, nil) require.NoError(t, err) defer func() { require.NoError(t, ing.Close()) }() @@ -557,7 +481,7 @@ func TestEventsColdIngester_Readback(t *testing.T) { first := chunkID.FirstLedger() coldDir := t.TempDir() - ing, err := NewEventsColdIngester(coldDir, chunkID, nil) + ing, err := NewEventsColdIngester(filepath.Join(coldDir, chunkID.BucketID()), chunkID, nil) require.NoError(t, err) defer func() { require.NoError(t, ing.Close()) }() @@ -584,28 +508,6 @@ func TestEventsColdIngester_Readback(t *testing.T) { // ───────────────────────── V0 (pre-Soroban) events handling ───────────────────────── -// TestEventsHotIngester_V0AsEmpty asserts the hot events ingester treats a V0 -// LCM as a zero-event ledger (no error) rather than failing the range, and that -// the store records the empty ledger (its event count is unchanged). -func TestEventsHotIngester_V0AsEmpty(t *testing.T) { - chunkID := chunk.ID(0) - seq := chunkID.FirstLedger() - dir := t.TempDir() - logger := testLogger() - - store, err := eventstore.OpenHotStore(dir, chunkID, logger) - require.NoError(t, err) - defer func() { require.NoError(t, store.Close()) }() - - ing := NewEventsHotIngester(store, nil) - require.NoError(t, ing.Ingest(context.Background(), seq, xdr.LedgerCloseMetaView(marshalV0LCM(t, seq))), - "V0 ledger must ingest as zero events, not error") - - cnt, err := store.EventCount() - require.NoError(t, err) - require.Equal(t, uint32(0), cnt, "V0 ledger contributes no events") -} - // TestEventsColdIngester_V0KeepsOffsetsContiguous ingests a V0 ledger followed by // an event-bearing V2 ledger and asserts: the V0 ledger does not error, and the // LedgerOffsets stay contiguous (both ledgers present, the event-bearing one's @@ -615,7 +517,7 @@ func TestEventsColdIngester_V0KeepsOffsetsContiguous(t *testing.T) { first := chunkID.FirstLedger() coldDir := t.TempDir() - ing, err := NewEventsColdIngester(coldDir, chunkID, nil) + ing, err := NewEventsColdIngester(filepath.Join(coldDir, chunkID.BucketID()), chunkID, nil) require.NoError(t, err) defer func() { require.NoError(t, ing.Close()) }() @@ -674,7 +576,7 @@ func TestWriteColdChunk_EventlessChunk_FullyReadable(t *testing.T) { // Every ledger in the chunk is a V0 (pre-Soroban) ledger → zero events. require.NoError(t, WriteColdChunk( context.Background(), logger, chunkID, rawChunk(fullStream(t, chunkID, marshalV0LCM), chunkID), - coldDirsAt(coldDir), sink, Config{Events: true}, + coldDirsAt(coldDir, chunkID), sink, Config{Events: true}, )) bucketDir := filepath.Join(coldDir, dataTypeEvents, chunkID.BucketID()) @@ -706,92 +608,6 @@ func TestWriteColdChunk_EventlessChunk_FullyReadable(t *testing.T) { require.Zero(t, sink.coldErrorTypes()[dataTypeEvents], "eventless chunk is not an error") } -// ───────────────────────── HotService tests ───────────────────────── - -// TestHotService_AllTypes_FanOut runs HotService with all three hot ingesters -// over event/tx-bearing ledgers and reads each store back, asserting the -// aggregate HotLedgerTotal and per-ingester signals fired. -func TestHotService_AllTypes_FanOut(t *testing.T) { - chunkID := chunk.ID(0) - first := chunkID.FirstLedger() - logger := testLogger() - dir := t.TempDir() - - ls, err := ledger.OpenHotStore(filepath.Join(dir, "ledgers"), chunkID, logger) - require.NoError(t, err) - defer func() { require.NoError(t, ls.Close()) }() - ts, err := txhash.NewHotStore(filepath.Join(dir, "txhash"), chunkID, logger) - require.NoError(t, err) - defer func() { require.NoError(t, ts.Close()) }() - es, err := eventstore.OpenHotStore(filepath.Join(dir, "events"), chunkID, logger) - require.NoError(t, err) - defer func() { require.NoError(t, es.Close()) }() - - sink := &testSink{} - service := NewHotService([]HotIngester{ - NewLedgerHotIngester(ls, sink), - NewTxhashHotIngester(ts, sink), - NewEventsHotIngester(es, sink), - }, sink) - - rawA, hashA, termA := marshalLCMWithEvent(t, first) - rawB, hashB, _ := marshalLCMWithEvent(t, first+1) - require.NoError(t, service.Ingest(context.Background(), first, xdr.LedgerCloseMetaView(rawA))) - require.NoError(t, service.Ingest(context.Background(), first+1, xdr.LedgerCloseMetaView(rawB))) - - // All three stores retained the data. - gotRawA, err := ls.GetLedgerRaw(first) - require.NoError(t, err) - require.Equal(t, rawA, gotRawA) - gotA, err := ts.Get(hashA) - require.NoError(t, err) - require.Equal(t, first, gotA) - gotB, err := ts.Get(hashB) - require.NoError(t, err) - require.Equal(t, first+1, gotB) - bm, err := es.Lookup(context.Background(), termA) - require.NoError(t, err) - require.Equal(t, uint64(2), bm.GetCardinality()) - - // Aggregate + per-ingester signals. - require.Equal(t, 2, sink.hotLedgerTotals, "one HotLedgerTotal per ledger") - dt := sink.hotDataTypes() - require.Equal(t, 2, dt[dataTypeLedgers]) - require.Equal(t, 2, dt[dataTypeTxhash]) - require.Equal(t, 2, dt[dataTypeEvents]) - - // Per-stage signals: each ledger fired the hot extract/write stages its - // data type defines (ledgers has no extract — it writes the view verbatim). - st := sink.stageCounts() - require.Equal(t, 2, st[dataTypeLedgers+"/"+tierHot+"/"+stageWrite]) - require.Equal(t, 2, st[dataTypeTxhash+"/"+tierHot+"/"+stageExtract]) - require.Equal(t, 2, st[dataTypeTxhash+"/"+tierHot+"/"+stageWrite]) - require.Equal(t, 2, st[dataTypeEvents+"/"+tierHot+"/"+stageExtract]) - require.Equal(t, 2, st[dataTypeEvents+"/"+tierHot+"/"+stageWrite]) -} - -// TestHotService_EnabledSubset runs HotService with only the ledger ingester and -// asserts only that type's signals fire. -func TestHotService_EnabledSubset(t *testing.T) { - seq := chunk.ID(0).FirstLedger() - logger := testLogger() - dir := t.TempDir() - - ls, err := ledger.OpenHotStore(dir, chunk.ID(0), logger) - require.NoError(t, err) - defer func() { require.NoError(t, ls.Close()) }() - - sink := &testSink{} - service := NewHotService([]HotIngester{NewLedgerHotIngester(ls, sink)}, sink) - require.NoError(t, service.Ingest(context.Background(), seq, viewOf(t, seq))) - - require.Equal(t, 1, sink.hotLedgerTotals) - dt := sink.hotDataTypes() - require.Equal(t, 1, dt[dataTypeLedgers]) - require.Zero(t, dt[dataTypeTxhash]) - require.Zero(t, dt[dataTypeEvents]) -} - // ───────────────────────── ColdService tests ───────────────────────── // TestColdService_Success drives ledger+txhash+events cold ingesters through a @@ -802,7 +618,8 @@ func TestColdService_Success(t *testing.T) { coldDir := t.TempDir() sink := &testSink{} - ings, err := buildColdIngesters(coldDirsAt(coldDir), chunkID, sink, Config{Ledgers: true, Txhash: true, Events: true}) + ings, err := buildColdIngesters( + coldDirsAt(coldDir, chunkID), chunkID, sink, Config{Ledgers: true, Txhash: true, Events: true}) require.NoError(t, err) service := NewColdService(ings, sink) defer func() { require.NoError(t, service.Close()) }() @@ -855,14 +672,14 @@ func TestColdService_Success(t *testing.T) { // events now emits term_index/write for every ledger, and txhash's extract // spans its whole per-ledger Ingest. require.Equal(t, map[string]int{ - dataTypeLedgers + "/" + tierCold + "/" + stageWrite: 2, - dataTypeLedgers + "/" + tierCold + "/" + stageFinalize: 1, - dataTypeTxhash + "/" + tierCold + "/" + stageExtract: 2, - dataTypeTxhash + "/" + tierCold + "/" + stageFinalize: 1, - dataTypeEvents + "/" + tierCold + "/" + stageExtract: 2, - dataTypeEvents + "/" + tierCold + "/" + stageTermIndex: 2, - dataTypeEvents + "/" + tierCold + "/" + stageWrite: 2, - dataTypeEvents + "/" + tierCold + "/" + stageFinalize: 1, + dataTypeLedgers + "/" + stageWrite: 2, + dataTypeLedgers + "/" + stageFinalize: 1, + dataTypeTxhash + "/" + stageExtract: 2, + dataTypeTxhash + "/" + stageFinalize: 1, + dataTypeEvents + "/" + stageExtract: 2, + dataTypeEvents + "/" + stageTermIndex: 2, + dataTypeEvents + "/" + stageWrite: 2, + dataTypeEvents + "/" + stageFinalize: 1, }, sink.stageCounts()) // No double-emit: the deferred Close (after this body) must not add a second @@ -887,22 +704,21 @@ func (f *failingCold) Ingest(context.Context, uint32, xdr.LedgerCloseMetaView) e func (f *failingCold) Finalize(context.Context) error { f.finalized = true; return nil } func (f *failingCold) Close() error { f.closed = true; return nil } -// TestColdService_FailurePath_NoArtifact uses a real ledger cold ingester plus a +// TestColdService_FailurePath_NoArtifact uses two real cold ingesters plus a // failing sibling: ColdService.Ingest returns the sibling's error, Finalize is // not called, the deferred Close drops the partial ledger pack, and no finalized -// artifact remains. It also asserts the cold metrics still fire on this failure -// path: each real ingester emits exactly one ColdIngest and the service emits one -// aggregate ColdChunkTotal — driven from Close, since Finalize never ran. +// artifact remains. It asserts the aggregate ColdChunkTotal still fires for the +// attempt, but the two real ingesters emit NO per-ingester ColdIngest: each +// ingested cleanly (no terminal error of its own) and never finalized, and Close +// no longer emits — so a chunk abandoned by a sibling leaves no phantom sample. func TestColdService_FailurePath_NoArtifact(t *testing.T) { chunkID := chunk.ID(0) coldDir := t.TempDir() sink := &testSink{} - // Two real cold ingesters (ledger + events) plus a failing sibling, so we can - // assert each real ingester emits its per-chunk ColdIngest from Close. - realLedger, err := NewLedgerColdIngester(filepath.Join(coldDir, dataTypeLedgers), chunkID, sink) + realLedger, err := NewLedgerColdIngester(packPath(filepath.Join(coldDir, dataTypeLedgers), chunkID), chunkID, sink) require.NoError(t, err) - realEvents, err := NewEventsColdIngester(filepath.Join(coldDir, dataTypeEvents), chunkID, sink) + realEvents, err := NewEventsColdIngester(filepath.Join(coldDir, dataTypeEvents, chunkID.BucketID()), chunkID, sink) require.NoError(t, err) failing := &failingCold{} service := NewColdService([]ColdIngester{realLedger, realEvents, failing}, sink) @@ -913,19 +729,16 @@ func TestColdService_FailurePath_NoArtifact(t *testing.T) { require.ErrorIs(t, err, errFailingCold) require.False(t, failing.finalized, "Finalize must not run on the failure path") - // Before Close, no cold metric has fired (emission is deferred to Close on the - // failure path). - require.Empty(t, sink.coldDataTypes(), "no ColdIngest before Close on failure path") - require.Zero(t, sink.coldChunkTotals, "no ColdChunkTotal before Close on failure path") + // Nothing has emitted: the real ingesters ingested cleanly (no terminal error) + // and never finalized; the mock sibling records nothing. + require.Empty(t, sink.coldDataTypes(), "no per-ingester ColdIngest on the sibling-failure path") + require.Zero(t, sink.coldChunkTotals, "no ColdChunkTotal before Close") - // Close drops partials and drives the deferred metric emissions. + // Close drops partials and emits the aggregate only. require.NoError(t, service.Close()) require.True(t, failing.closed) - // Each real ingester emitted exactly one ColdIngest; the aggregate fired once. - cdt := sink.coldDataTypes() - require.Equal(t, 1, cdt[dataTypeLedgers], "ledger cold ingester emits once on failure path") - require.Equal(t, 1, cdt[dataTypeEvents], "events cold ingester emits once on failure path") + require.Empty(t, sink.coldDataTypes(), "a chunk abandoned by a sibling emits no per-ingester ColdIngest") require.Equal(t, 1, sink.coldChunkTotals, "exactly one aggregate ColdChunkTotal") // No finalized ledger pack must exist. @@ -938,24 +751,27 @@ func TestColdService_FailurePath_NoArtifact(t *testing.T) { // so its OWN Ingest fails (recording firstErr), then Close. The failure is an // out-of-order seq: the per-chunk ColdWriter expects the chunk's first ledger, // so AppendLedger rejects a later one. Per #765 a failed cold chunk must record -// a per-ingester error count and an aggregate duration sample. Emission happens -// exactly once (from Close), with the accumulated error carried. +// a per-ingester error count and an aggregate duration sample. A terminal Ingest +// error emits the single per-ingester ColdIngest right there (Close no longer +// emits), so the error-carrying sample is present after Ingest returns. func TestColdIngester_Failure_RecordsErrorMetric(t *testing.T) { chunkID := chunk.ID(0) coldDir := t.TempDir() sink := &testSink{} - realLedger, err := NewLedgerColdIngester(filepath.Join(coldDir, dataTypeLedgers), chunkID, sink) + realLedger, err := NewLedgerColdIngester(packPath(filepath.Join(coldDir, dataTypeLedgers), chunkID), chunkID, sink) require.NoError(t, err) service := NewColdService([]ColdIngester{realLedger}, sink) // An out-of-order seq makes the writer's own AppendLedger fail inside the - // ingester's Ingest, so it records its firstErr. (drain would never feed - // this — the test targets the ingester's metric path directly.) + // ingester's Ingest, so it records its firstErr and emits the error-carrying + // ColdIngest. (drain would never feed this — the test targets the ingester's + // metric path directly.) wrongSeq := chunkID.FirstLedger() + 5 require.Error(t, service.Ingest(context.Background(), wrongSeq, viewOf(t, wrongSeq))) + require.Equal(t, 1, sink.coldDataTypes()[dataTypeLedgers], "the failed Ingest emits its ColdIngest immediately") - // Finalize is skipped on this path; Close drives the single emission. + // Finalize is skipped on this path; Close emits nothing more. require.NoError(t, service.Close()) // Exactly one ColdIngest for ledgers, carrying the error, plus one aggregate. @@ -972,13 +788,16 @@ func TestPrometheusSink_Smoke(t *testing.T) { reg := prometheus.NewRegistry() require.NotPanics(t, func() { sink := NewPrometheusSink(reg, "test") - sink.HotIngest(dataTypeLedgers, time.Millisecond, 1, nil) - sink.HotIngest(dataTypeEvents, time.Millisecond, 3, errFailingCold) + // The five hot per-ledger phases: extract/commit carry no items, the write + // phases carry per-type volume; the commit phase exercises the error dimension. + sink.HotPhase(hotchunk.PhaseExtract, time.Millisecond, 0, nil) + sink.HotPhase(hotchunk.PhaseLedgers, time.Millisecond, 1, nil) + sink.HotPhase(hotchunk.PhaseTxhash, time.Millisecond, 5, nil) + sink.HotPhase(hotchunk.PhaseEvents, time.Millisecond, 3, nil) + sink.HotPhase(hotchunk.PhaseCommit, time.Millisecond, 0, errFailingCold) sink.ColdIngest(dataTypeTxhash, time.Second, 100, nil) - sink.HotLedgerTotal(time.Millisecond) sink.ColdChunkTotal(time.Second) - sink.IngestStage(dataTypeEvents, tierHot, stageExtract, time.Millisecond, 3) - sink.IngestStage(dataTypeEvents, tierCold, stageFinalize, time.Second, 0) + sink.IngestStage(dataTypeEvents, stageFinalize, time.Second, 0) }) mfs, err := reg.Gather() @@ -986,78 +805,6 @@ func TestPrometheusSink_Smoke(t *testing.T) { require.NotEmpty(t, mfs) } -// ───────────────────────── hot driver tests ───────────────────────── - -// TestRunHot_AllTypes_Readback runs the RunHot driver with injected hot stores -// over event/tx-bearing ledgers and asserts each hot store reads back. The short -// stream ends early so RunHot returns the completeness error after both ledgers -// are fully ingested. -func TestRunHot_AllTypes_Readback(t *testing.T) { - chunkID := chunk.ID(0) - first := chunkID.FirstLedger() - logger := testLogger() - dir := t.TempDir() - - ls, err := ledger.OpenHotStore(filepath.Join(dir, "ledgers"), chunkID, logger) - require.NoError(t, err) - defer func() { require.NoError(t, ls.Close()) }() - ts, err := txhash.NewHotStore(filepath.Join(dir, "txhash"), chunkID, logger) - require.NoError(t, err) - defer func() { require.NoError(t, ts.Close()) }() - es, err := eventstore.OpenHotStore(filepath.Join(dir, "events"), chunkID, logger) - require.NoError(t, err) - defer func() { require.NoError(t, es.Close()) }() - - evSeqA, evSeqB := first, first+1 - rawA, hashA, termA := marshalLCMWithEvent(t, evSeqA) - rawB, hashB, _ := marshalLCMWithEvent(t, evSeqB) - gen := func(tt *testing.T, seq uint32) []byte { - switch seq { - case evSeqA: - return rawA - case evSeqB: - return rawB - default: - return marshalLCM(tt, seq) - } - } - stream := &fakeStream{t: t, count: 2, gen: gen} - - stores := HotStores{Ledgers: ls, Txhash: ts, Events: es} - cfg := Config{Ledgers: true, Txhash: true, Events: true} - - err = RunHot(context.Background(), logger, stream, chunkID, stores, nil, cfg) - require.Error(t, err) - require.Contains(t, err.Error(), "ended at") - - gotRawA, err := ls.GetLedgerRaw(evSeqA) - require.NoError(t, err) - require.Equal(t, rawA, gotRawA) - - gotA, err := ts.Get(hashA) - require.NoError(t, err) - require.Equal(t, evSeqA, gotA) - gotB, err := ts.Get(hashB) - require.NoError(t, err) - require.Equal(t, evSeqB, gotB) - - bm, err := es.Lookup(context.Background(), termA) - require.NoError(t, err) - require.NotNil(t, bm) - require.Equal(t, uint64(2), bm.GetCardinality(), "both sentinel events share the term") -} - -// TestRunHot_MissingStore asserts RunHot rejects an enabled type with a nil -// injected store. -func TestRunHot_MissingStore(t *testing.T) { - chunkID := chunk.ID(0) - logger := testLogger() - err := RunHot(context.Background(), logger, &fakeStream{t: t, count: 1}, chunkID, - HotStores{}, nil, Config{Ledgers: true}) - require.Error(t, err) - require.Contains(t, err.Error(), "HotStores.Ledgers is nil") -} - // ───────────────────────── cold driver tests ───────────────────────── func TestWriteColdChunk_RoundTrip(t *testing.T) { @@ -1070,7 +817,8 @@ func TestWriteColdChunk_RoundTrip(t *testing.T) { sink := &testSink{} require.NoError(t, WriteColdChunk( - context.Background(), logger, chunkID, rawChunk(stream, chunkID), coldDirsAt(coldDir), sink, Config{Ledgers: true}, + context.Background(), logger, chunkID, rawChunk(stream, chunkID), + coldDirsAt(coldDir, chunkID), sink, Config{Ledgers: true}, )) path := packPath(filepath.Join(coldDir, "ledgers"), chunkID) @@ -1099,7 +847,8 @@ func TestWriteColdChunk_ShortStream_NoArtifact(t *testing.T) { short := &fakeStream{t: t, count: 3} err := WriteColdChunk( - context.Background(), logger, chunkID, rawChunk(short, chunkID), coldDirsAt(coldDir), nil, Config{Ledgers: true}, + context.Background(), logger, chunkID, rawChunk(short, chunkID), + coldDirsAt(coldDir, chunkID), nil, Config{Ledgers: true}, ) require.Error(t, err) require.Contains(t, err.Error(), "ended at") @@ -1128,7 +877,7 @@ func TestWriteColdChunk_TxhashCold_Bin(t *testing.T) { require.NoError(t, WriteColdChunk( context.Background(), logger, chunkID, rawChunk(fullStream(t, chunkID, gen), chunkID), - coldDirsAt(coldDir), nil, Config{Txhash: true}, + coldDirsAt(coldDir, chunkID), nil, Config{Txhash: true}, )) entries, err := txhash.ReadColdBin(txhashBinPath(filepath.Join(coldDir, dataTypeTxhash))) @@ -1157,7 +906,7 @@ func TestWriteColdChunk_EventsCold_Readback(t *testing.T) { require.NoError(t, WriteColdChunk( context.Background(), logger, chunkID, rawChunk(fullStream(t, chunkID, gen), chunkID), - coldDirsAt(coldDir), nil, Config{Events: true}, + coldDirsAt(coldDir, chunkID), nil, Config{Events: true}, )) bucketDir := filepath.Join(coldDir, "events", chunkID.BucketID()) @@ -1174,76 +923,17 @@ func TestWriteColdChunk_EventsCold_Readback(t *testing.T) { require.Equal(t, uint64(len(evSeqs)), bm.GetCardinality()) } -// ───────────────────────── drain seq guard (P0-1) ───────────────────────── - -// TestWriteColdChunk_OutOfOrderSeq_NoArtifact feeds a stream that yields a ledger out -// of expected order (the second ledger repeats the first's seq — right total -// count, wrong sequence). drain must reject it with the mismatch error before -// any Finalize, and leave no cold artifact behind. -func TestWriteColdChunk_OutOfOrderSeq_NoArtifact(t *testing.T) { - chunkID := chunk.ID(0) - first := chunkID.FirstLedger() - last := chunkID.LastLedger() - coldDir := t.TempDir() - logger := testLogger() - - // Build a full-length seq list, then corrupt the second entry to a - // duplicate of the first: same count as a valid stream, wrong order. - seqs := make([]uint32, 0, last-first+1) - for s := first; s <= last; s++ { - seqs = append(seqs, s) - } - require.GreaterOrEqual(t, len(seqs), 2) - seqs[1] = seqs[0] // duplicate/out-of-order while keeping the count intact - - stream := &seqStream{t: t, seqs: seqs} - err := WriteColdChunk( - context.Background(), logger, chunkID, rawChunk(stream, chunkID), coldDirsAt(coldDir), nil, Config{Ledgers: true}, - ) - require.Error(t, err) - require.Contains(t, err.Error(), "yielded ledger") - require.Contains(t, err.Error(), "expected") - - // No finalized artifact: the deferred Close dropped the partial pack. - path := packPath(filepath.Join(coldDir, dataTypeLedgers), chunkID) - _, statErr := os.Stat(path) - require.True(t, os.IsNotExist(statErr), "expected no cold artifact at %s, stat err: %v", path, statErr) -} - -// TestDrain_TxhashSeqGuard asserts the guard also fires on the txhash path, -// where a wrong-but-right-count sequence would otherwise be silently absorbed -// (each ledger keys on its own LCM seq). -func TestDrain_TxhashSeqGuard(t *testing.T) { - chunkID := chunk.ID(0) - first := chunkID.FirstLedger() - last := chunkID.LastLedger() - coldDir := t.TempDir() - logger := testLogger() - - seqs := make([]uint32, 0, last-first+1) - for s := first; s <= last; s++ { - seqs = append(seqs, s) - } - require.GreaterOrEqual(t, len(seqs), 2) - // Corrupt the SECOND ledger so at least one valid ledger is ingested - // before the guard fires. - seqs[1] += 100 - - err := WriteColdChunk( - context.Background(), logger, chunkID, rawChunk(&seqStream{t: t, seqs: seqs}, chunkID), - coldDirsAt(coldDir), nil, Config{Txhash: true}, - ) - require.Error(t, err) - require.Contains(t, err.Error(), "yielded ledger") - - binPath := txhashBinPath(filepath.Join(coldDir, dataTypeTxhash)) - _, statErr := os.Stat(binPath) - require.True(t, os.IsNotExist(statErr), "expected no .bin at %s, stat err: %v", binPath, statErr) -} +// ───────────────────────── drain stream errors ───────────────────────── +// +// The per-seq order guard the shared cursor used to run in drain moved to the +// SOURCE (packStream reads positionally; hotLedgerStream key-checks its keyspace, +// see TestSource_RejectsGap; the SDK backends validate their own output), so drain +// keeps only its overrun + completeness checks on a local counter. The tests that +// fed an artificially mis-ordered stream to drain were deleted with the cursor. // TestWriteColdChunk_DrainStreamError_NoArtifact exercises the drain mid-stream error // path: the backend yields valid ledgers, then hands back (nil, err) at a seq in -// the middle of the chunk. drain must wrap the error with RawLedgers + the seq, +// the middle of the chunk. drain must propagate the error (wrapped with the chunk), // short-circuit before Finalize (so no cold artifact is committed), and the // deferred Close must drop the partial. func TestWriteColdChunk_DrainStreamError_NoArtifact(t *testing.T) { @@ -1257,12 +947,12 @@ func TestWriteColdChunk_DrainStreamError_NoArtifact(t *testing.T) { stream := &errAtSeqStream{t: t, errAtSeq: failAt, err: wantErr} err := WriteColdChunk( - context.Background(), logger, chunkID, rawChunk(stream, chunkID), coldDirsAt(coldDir), nil, Config{Ledgers: true}, + context.Background(), logger, chunkID, rawChunk(stream, chunkID), + coldDirsAt(coldDir, chunkID), nil, Config{Ledgers: true}, ) require.Error(t, err) require.ErrorIs(t, err, wantErr, "the backend error must propagate") - require.Contains(t, err.Error(), "RawLedgers", "error wraps RawLedgers") - require.Contains(t, err.Error(), strconv.FormatUint(uint64(failAt), 10), "error names the failing seq") + require.Contains(t, err.Error(), "stream for chunk", "error wraps the drained chunk") // Finalize never ran → no finalized artifact; deferred Close dropped the partial. path := packPath(filepath.Join(coldDir, dataTypeLedgers), chunkID) @@ -1275,103 +965,60 @@ func TestWriteColdChunk_DrainStreamError_NoArtifact(t *testing.T) { // pkg/stores/txhash (cold_bin_test.go); these tests only cover the // ingester-level behavior on top of it. -// ───────────────────────── HotService failure path (P1-c) ───────────────────────── - -// failingHot is a HotIngester whose Ingest always fails. ctxObserved records -// whether the ingester's context was already canceled when it ran (used to -// show errgroup sibling cancellation in the multi-ingester path). -type failingHot struct { - mu sync.Mutex - ran int - ctxObserved error -} - -var errFailingHot = errors.New("failingHot: induced ingest failure") +// ───────────────────────── hot service emission ───────────────────────── -func (f *failingHot) Ingest(ctx context.Context, _ uint32, _ xdr.LedgerCloseMetaView) error { - f.mu.Lock() - f.ran++ - f.ctxObserved = ctx.Err() - f.mu.Unlock() - return errFailingHot -} - -// blockingHot blocks until its context is canceled, then reports the cancel -// error. Pairs with failingHot in the multi-ingester test to prove the first -// error cancels the siblings via the errgroup context. -type blockingHot struct { - canceled chan struct{} - once sync.Once +func hotTestLogger() *supportlog.Entry { + l := supportlog.New() + l.SetLevel(logrus.ErrorLevel) + return l } -func (b *blockingHot) Ingest(ctx context.Context, _ uint32, _ xdr.LedgerCloseMetaView) error { - <-ctx.Done() - b.once.Do(func() { close(b.canceled) }) - return ctx.Err() -} +// TestHotService_EmitsEveryPhaseOnSuccess constructs a HotService over a real hot +// DB with a recording sink and asserts one successful ingest emits every phase +// once, the write phases carry per-type volume (extract/commit carry none), and no +// phase carries an error. +func TestHotService_EmitsEveryPhaseOnSuccess(t *testing.T) { + db, err := hotchunk.Open(t.TempDir(), chunk.ID(0), hotTestLogger()) + require.NoError(t, err) + t.Cleanup(func() { _ = db.Close() }) -// TestHotService_SingleIngesterFailure asserts the len==1 fast path returns the -// ingester error and still emits exactly one HotLedgerTotal. -func TestHotService_SingleIngesterFailure(t *testing.T) { sink := &testSink{} - fail := &failingHot{} - service := NewHotService([]HotIngester{fail}, sink) - - err := service.Ingest(context.Background(), chunk.ID(0).FirstLedger(), viewOf(t, chunk.ID(0).FirstLedger())) - require.ErrorIs(t, err, errFailingHot) - require.Equal(t, 1, sink.hotLedgerTotals, "HotLedgerTotal fires exactly once even on failure") -} + svc := NewHotService(db, sink) + first := chunk.ID(0).FirstLedger() + raw, _, _ := marshalLCMWithEvent(t, first) // one tx, one event + require.NoError(t, svc.Ingest(context.Background(), first, xdr.LedgerCloseMetaView(raw))) + + require.Len(t, sink.hotPhases, int(hotchunk.NumPhases), "every phase emitted once on success") + items := sink.hotPhaseItems() + assert.Equal(t, 1, items[hotchunk.PhaseLedgers], "one ledger") + assert.Equal(t, 1, items[hotchunk.PhaseTxhash], "one tx hash") + assert.Equal(t, 1, items[hotchunk.PhaseEvents], "one event") + assert.Zero(t, items[hotchunk.PhaseExtract], "extract carries no items") + assert.Zero(t, items[hotchunk.PhaseCommit], "commit carries no items") + _, hadErr := sink.hotPhaseErr() + assert.False(t, hadErr, "success path carries no phase error") +} + +// TestHotService_CommitErrorLandsOnCommitPhase asserts a commit failure (a closed +// DB) surfaces the error on the commit phase — by construction, not by a +// separately-maintained label — and emits no items on the failure path. +func TestHotService_CommitErrorLandsOnCommitPhase(t *testing.T) { + db, err := hotchunk.Open(t.TempDir(), chunk.ID(0), hotTestLogger()) + require.NoError(t, err) + require.NoError(t, db.Close()) // closed => the batch commit fails -// TestHotService_MultiIngesterFailureCancelsSiblings asserts the errgroup path -// propagates the failing ingester's error, cancels the sibling via the group -// context, and still emits exactly one HotLedgerTotal. -func TestHotService_MultiIngesterFailureCancelsSiblings(t *testing.T) { sink := &testSink{} - fail := &failingHot{} - block := &blockingHot{canceled: make(chan struct{})} - service := NewHotService([]HotIngester{fail, block}, sink) - - err := service.Ingest(context.Background(), chunk.ID(0).FirstLedger(), viewOf(t, chunk.ID(0).FirstLedger())) - require.ErrorIs(t, err, errFailingHot) - - // The blocking sibling only returns once its context is canceled, so a - // non-blocking Ingest return already proves cancellation propagated. - select { - case <-block.canceled: - case <-time.After(2 * time.Second): - t.Fatal("sibling ingester was not canceled by the failing ingester") + svc := NewHotService(db, sink) + first := chunk.ID(0).FirstLedger() + raw, _, _ := marshalLCMWithEvent(t, first) + require.Error(t, svc.Ingest(context.Background(), first, xdr.LedgerCloseMetaView(raw))) + + phase, hadErr := sink.hotPhaseErr() + require.True(t, hadErr, "the failure must be reported on a phase") + assert.Equal(t, hotchunk.PhaseCommit, phase, "a commit failure lands on the commit phase") + for p, n := range sink.hotPhaseItems() { + assert.Zero(t, n, "no items on the failure path (phase %v)", p) } - require.Equal(t, 1, sink.hotLedgerTotals, "HotLedgerTotal fires exactly once even on failure") -} - -// TestHotIngester_Failure_RecordsErrorMetric drives a REAL hot ingester -// (eventsHot, built via NewEventsHotIngester) with a malformed view so its own -// Ingest fails through the production hotMetrics emit path — unlike the -// failingHot/blockingHot stubs, which bypass hotMetrics entirely. Per #765 a -// failed hot Ingest must record exactly one HotIngest carrying a non-nil error -// for that data type. Mirrors the cold-side TestColdIngester_Failure_RecordsErrorMetric. -func TestHotIngester_Failure_RecordsErrorMetric(t *testing.T) { - chunkID := chunk.ID(0) - logger := testLogger() - dir := t.TempDir() - sink := &testSink{} - - store, err := eventstore.OpenHotStore(dir, chunkID, logger) - require.NoError(t, err) - defer func() { require.NoError(t, store.Close()) }() - - ing := NewEventsHotIngester(store, sink) - - // A truncated/garbage view makes the event extraction fail inside the real - // Ingest, so the deferred hotMetrics.emit reports the wrapped error. - bad := xdr.LedgerCloseMetaView([]byte{0x00, 0x01, 0x02}) - require.Error(t, ing.Ingest(context.Background(), chunkID.FirstLedger(), bad)) - - sink.mu.Lock() - defer sink.mu.Unlock() - require.Len(t, sink.hotIngests, 1, "exactly one HotIngest recorded") - require.Equal(t, dataTypeEvents, sink.hotIngests[0].dataType) - require.Error(t, sink.hotIngests[0].err, "the recorded HotIngest carries the ingest error") } // ───────────────────────── cold txhash .bin content (P1-d) ───────────────────────── @@ -1387,7 +1034,7 @@ func TestTxhashColdIngester_BinContent(t *testing.T) { first := chunkID.FirstLedger() coldDir := t.TempDir() - ing, err := NewTxhashColdIngester(coldDir, chunkID, nil) + ing, err := NewTxhashColdIngester(txhashBinPath(coldDir), chunkID, nil) require.NoError(t, err) defer func() { require.NoError(t, ing.Close()) }() @@ -1436,53 +1083,12 @@ func TestWriteColdChunk_CanceledContext(t *testing.T) { cancel() rerr := WriteColdChunk( ctx, logger, chunkID, rawChunk(fullStream(t, chunkID, nil), chunkID), - coldDirsAt(coldDir), sink, Config{Ledgers: true}, + coldDirsAt(coldDir, chunkID), sink, Config{Ledgers: true}, ) require.ErrorIs(t, rerr, context.Canceled) require.Equal(t, 1, sink.coldChunkTotals, "a canceled chunk attempt still emits one ColdChunkTotal") } -// ───────────────────────── RunHot chunkID cross-check (P2-e) ───────────────────────── - -// TestRunHot_ChunkIDMismatch asserts RunHot rejects ANY injected hot store -// bound to a different chunk than the one being ingested, with a clear -// up-front error (rather than silently interleaving chunks on the ledger and -// txhash paths, or a later per-ledger out-of-range on the events path). All -// three hot stores are chunk-bound. -func TestRunHot_ChunkIDMismatch(t *testing.T) { - ingestChunk := chunk.ID(1) - storeChunk := chunk.ID(0) - logger := testLogger() - - run := func(t *testing.T, stores HotStores, cfg Config) { - t.Helper() - err := RunHot(context.Background(), logger, &fakeStream{t: t, count: 1}, ingestChunk, - stores, nil, cfg) - require.Error(t, err) - require.Contains(t, err.Error(), "bound to chunk 0") - require.Contains(t, err.Error(), "RunHot chunk 1") - } - - t.Run("ledgers", func(t *testing.T) { - ls, err := ledger.OpenHotStore(t.TempDir(), storeChunk, logger) - require.NoError(t, err) - defer func() { require.NoError(t, ls.Close()) }() - run(t, HotStores{Ledgers: ls}, Config{Ledgers: true}) - }) - t.Run("txhash", func(t *testing.T) { - ts, err := txhash.NewHotStore(t.TempDir(), storeChunk, logger) - require.NoError(t, err) - defer func() { require.NoError(t, ts.Close()) }() - run(t, HotStores{Txhash: ts}, Config{Txhash: true}) - }) - t.Run("events", func(t *testing.T) { - es, err := eventstore.OpenHotStore(t.TempDir(), storeChunk, logger) - require.NoError(t, err) - defer func() { require.NoError(t, es.Close()) }() - run(t, HotStores{Events: es}, Config{Events: true}) - }) -} - // ───────────────────────── Config validate / guard negatives (P2-g) ───────────────────────── // TestWriteColdChunk_ConfigGuards covers the validate guard on the cold materializer: @@ -1493,15 +1099,7 @@ func TestWriteColdChunk_ConfigGuards(t *testing.T) { chunkID := chunk.ID(0) err := WriteColdChunk(context.Background(), logger, chunkID, - rawChunk(fullStream(t, chunkID, nil), chunkID), coldDirsAt(t.TempDir()), nil, Config{}) - require.Error(t, err) - require.Contains(t, err.Error(), "enables no data types") -} - -// TestRunHot_EmptyConfig asserts the hot driver also rejects an empty Config. -func TestRunHot_EmptyConfig(t *testing.T) { - err := RunHot(context.Background(), testLogger(), &fakeStream{t: t, count: 1}, - chunk.ID(0), HotStores{}, nil, Config{}) + rawChunk(fullStream(t, chunkID, nil), chunkID), coldDirsAt(t.TempDir(), chunkID), nil, Config{}) require.Error(t, err) require.Contains(t, err.Error(), "enables no data types") } @@ -1521,13 +1119,13 @@ func countCleanColdIngests(s *testSink) int { return n } -// TestBuildColdIngesters_RollbackNoPhantomMetric makes a LATER constructor -// (txhash) fail by planting a regular file at the txhash per-type directory, -// so the constructor's own MkdirAll fails. The earlier-built ledger ingester -// is rolled back via closeColdAll, which must NOT emit a phantom success -// ColdIngest — the recorded ledger metric (if any) must carry the abort -// error, never a clean (nil-err, 0-items) success. -func TestBuildColdIngesters_RollbackNoPhantomMetric(t *testing.T) { +// TestBuildColdIngesters_RollbackOneBuilt makes a LATER constructor (txhash) fail +// by planting a regular file at the txhash per-type directory, so the +// constructor's own MkdirAll fails. The earlier-built ledger ingester is rolled +// back via closeColdAll — which only closes it. Since Close no longer emits a +// per-ingester ColdIngest, a rolled-back ingester (built, never ingested or +// finalized) produces NO sample at all: no phantom success, no synthetic abort. +func TestBuildColdIngesters_RollbackOneBuilt(t *testing.T) { chunkID := chunk.ID(0) coldDir := t.TempDir() sink := &testSink{} @@ -1537,27 +1135,19 @@ func TestBuildColdIngesters_RollbackNoPhantomMetric(t *testing.T) { // fails its bucket-dir MkdirAll. require.NoError(t, os.WriteFile(filepath.Join(coldDir, dataTypeTxhash), []byte("not a dir"), 0o644)) - _, err := buildColdIngesters(coldDirsAt(coldDir), chunkID, sink, Config{Ledgers: true, Txhash: true}) + _, err := buildColdIngesters(coldDirsAt(coldDir, chunkID), chunkID, sink, Config{Ledgers: true, Txhash: true}) require.Error(t, err, "txhash constructor must fail on the planted file") - // The ledger ingester was built then rolled back. No phantom SUCCESS metric: - // any recorded ledger ColdIngest must carry an error. - cdt := sink.coldDataTypes() - if cdt[dataTypeLedgers] > 0 { - require.Equal(t, cdt[dataTypeLedgers], sink.coldErrorTypes()[dataTypeLedgers], - "rolled-back ledger ingester must not emit a phantom success ColdIngest") - } - // And the success-only assertion: there must be zero clean (nil-err) cold - // ingest signals recorded. - require.Zero(t, countCleanColdIngests(sink), "no clean ColdIngest on the rollback path") + // The ledger ingester was built then rolled back with no Ingest/Finalize, so + // it emits nothing. + require.Empty(t, sink.coldDataTypes(), "a rolled-back ingester emits no per-ingester ColdIngest") } -// TestBuildColdIngesters_RollbackLaterFailure_TxhashAborts makes the LAST -// constructor (events) fail AFTER both the ledger AND txhash ingesters were -// already built, so closeColdAll rolls back two ingesters. It asserts the txhash -// ingester (which DOES implement abortMetric) emits an error-carrying — not a -// clean-success — ColdIngest, complementing the ledger-only abort coverage above. -func TestBuildColdIngesters_RollbackLaterFailure_TxhashAborts(t *testing.T) { +// TestBuildColdIngesters_RollbackTwoBuilt makes the LAST constructor (events) +// fail AFTER both the ledger AND txhash ingesters were already built, so +// closeColdAll rolls back two ingesters. Same invariant at greater rollback +// depth: neither rolled-back ingester emits a per-ingester ColdIngest. +func TestBuildColdIngesters_RollbackTwoBuilt(t *testing.T) { chunkID := chunk.ID(0) coldDir := t.TempDir() sink := &testSink{} @@ -1568,19 +1158,13 @@ func TestBuildColdIngesters_RollbackLaterFailure_TxhashAborts(t *testing.T) { packPath := filepath.Join(coldDir, dataTypeEvents, chunkID.BucketID(), eventstore.EventsPackName(chunkID)) require.NoError(t, os.MkdirAll(packPath, 0o755)) - _, err := buildColdIngesters(coldDirsAt(coldDir), chunkID, sink, + _, err := buildColdIngesters(coldDirsAt(coldDir, chunkID), chunkID, sink, Config{Ledgers: true, Txhash: true, Events: true}) require.Error(t, err, "events constructor must fail on the planted directory") - // The txhash ingester was built then rolled back: its recorded ColdIngest must - // carry the abort error, never a clean success. - cdt := sink.coldDataTypes() - require.Equal(t, 1, cdt[dataTypeTxhash], "rolled-back txhash ingester emits one ColdIngest") - require.Equal(t, 1, sink.coldErrorTypes()[dataTypeTxhash], - "the rolled-back txhash ColdIngest must carry the abort error") - - // No phantom clean success on the rollback path for any ingester. - require.Zero(t, countCleanColdIngests(sink), "no clean ColdIngest on the rollback path") + // Both the ledger and txhash ingesters were built then rolled back with no + // Ingest/Finalize, so neither emits a per-ingester ColdIngest. + require.Empty(t, sink.coldDataTypes(), "rolled-back ingesters emit no per-ingester ColdIngest") } // TestWriteColdChunk_ConstructorFailure_EmitsAggregate drives a constructor failure @@ -1598,7 +1182,7 @@ func TestWriteColdChunk_ConstructorFailure_EmitsAggregate(t *testing.T) { err := WriteColdChunk( context.Background(), logger, chunkID, rawChunk(fullStream(t, chunkID, nil), chunkID), - coldDirsAt(coldDir), sink, Config{Ledgers: true}, + coldDirsAt(coldDir, chunkID), sink, Config{Ledgers: true}, ) require.Error(t, err) require.Equal(t, 1, sink.coldChunkTotals, @@ -1619,7 +1203,7 @@ func TestEventsCold_FinishThenIndexFails_LeavesInertPack(t *testing.T) { first := chunkID.FirstLedger() coldDir := t.TempDir() - ing, err := NewEventsColdIngester(coldDir, chunkID, nil) + ing, err := NewEventsColdIngester(filepath.Join(coldDir, chunkID.BucketID()), chunkID, nil) require.NoError(t, err) // Ingest one event-bearing ledger so the mirror is non-empty (an empty @@ -1656,7 +1240,7 @@ func TestEventsCold_FinalizeAfterFailedIngest_Refuses(t *testing.T) { chunkID := chunk.ID(0) coldDir := t.TempDir() - ing, err := NewEventsColdIngester(coldDir, chunkID, nil) + ing, err := NewEventsColdIngester(filepath.Join(coldDir, chunkID.BucketID()), chunkID, nil) require.NoError(t, err) defer func() { require.NoError(t, ing.Close()) }() @@ -1721,13 +1305,16 @@ func TestColdService_Finalize_FirstErrorStopsRemaining(t *testing.T) { // ───────────────────────── drain overrun guard ───────────────────────── // countingIngester counts Ingest calls; used to prove the overrun guard fires -// BEFORE the out-of-chunk ledger is handed to the ingesters. +// BEFORE the out-of-chunk ledger is handed to the ingesters. It fakes the +// ColdIngester seam (a ColdService drives it), the layer drain consumes. type countingIngester struct{ ingested int } func (c *countingIngester) Ingest(context.Context, uint32, xdr.LedgerCloseMetaView) error { c.ingested++ return nil } +func (*countingIngester) Finalize(context.Context) error { return nil } +func (*countingIngester) Close() error { return nil } // TestDrain_OverrunPastChunk asserts a stream that keeps yielding in order // PAST the chunk's last ledger is rejected before the overrun ledger is @@ -1739,8 +1326,9 @@ func TestDrain_OverrunPastChunk(t *testing.T) { // One ledger past the chunk, still in order. stream := &fakeStream{t: t, count: ledgersInChunk + 1} counter := &countingIngester{} + service := NewColdService([]ColdIngester{counter}, nil) - err := drain(context.Background(), rawChunk(stream, chunkID), chunkID, counter) + err := drain(context.Background(), rawChunk(stream, chunkID), chunkID, service) require.Error(t, err) require.Contains(t, err.Error(), "overrun") require.Equal(t, int(ledgersInChunk), counter.ingested, @@ -1777,11 +1365,11 @@ func TestWriteColdChunk_LazySourceFirstReadError(t *testing.T) { wantErr := errors.New("induced lazy-source failure (bad config / missing object)") err := WriteColdChunk( context.Background(), logger, chunkID, rawChunk(lazyErrStream{err: wantErr}, chunkID), - coldDirsAt(coldDir), sink, Config{Ledgers: true}, + coldDirsAt(coldDir, chunkID), sink, Config{Ledgers: true}, ) require.Error(t, err) require.ErrorIs(t, err, wantErr) - require.Contains(t, err.Error(), "RawLedgers", "the error surfaces from drain's stream pull") + require.Contains(t, err.Error(), "stream for chunk", "the error surfaces from drain's stream pull") // Finalize never committed → no finalized pack (Close dropped the partial). path := packPath(filepath.Join(coldDir, dataTypeLedgers), chunkID) @@ -1801,7 +1389,7 @@ func TestWriteColdChunk_EmptyStream(t *testing.T) { err := WriteColdChunk( context.Background(), logger, chunkID, rawChunk(&fakeStream{t: t, count: 0}, chunkID), - coldDirsAt(coldDir), sink, Config{Ledgers: true}, + coldDirsAt(coldDir, chunkID), sink, Config{Ledgers: true}, ) require.Error(t, err) require.Contains(t, err.Error(), "ended at", "the completeness check rejects the empty stream") @@ -1823,7 +1411,7 @@ func TestColdService_FinalizeAbort_KeepsEarlierArtifact(t *testing.T) { coldDir := t.TempDir() sink := &testSink{} - realLedger, err := NewLedgerColdIngester(filepath.Join(coldDir, dataTypeLedgers), chunkID, sink) + realLedger, err := NewLedgerColdIngester(packPath(filepath.Join(coldDir, dataTypeLedgers), chunkID), chunkID, sink) require.NoError(t, err) failErr := errors.New("induced finalize failure") failing := &finalizeErrCold{err: failErr} diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/ingester.go b/cmd/stellar-rpc/internal/fullhistory/ingest/ingester.go index d59453293..ad312520d 100644 --- a/cmd/stellar-rpc/internal/fullhistory/ingest/ingester.go +++ b/cmd/stellar-rpc/internal/fullhistory/ingest/ingester.go @@ -6,29 +6,6 @@ import ( "github.com/stellar/go-stellar-sdk/xdr" ) -// HotIngester ingests one data type for one ledger into a long-lived hot store. -// -// Ownership: the hot store is INJECTED into the ingester's constructor and owned -// by the caller (the daemon). The ingester does NOT open the store and does NOT -// close it — Close is intentionally absent from this interface. -// -// Input: seq is the DRIVER-VALIDATED ledger sequence of lcm — the drain loop -// has already read it off the view and checked it against the chunk's expected -// position (duplicate / out-of-order / overrun), so ingesters consume it -// directly instead of each re-deriving and re-error-handling it. lcm is a -// zero-copy xdr.LedgerCloseMetaView (a []byte alias over the source stream's -// BORROWED buffer), valid only for the current iteration step; an ingester -// must copy any bytes it retains. The hot fan-out (HotService) waits for all -// ingesters to finish a ledger before the source pulls the next one, so -// synchronous consumption inside Ingest is safe. -// -// Concurrency: distinct HotIngester instances are run concurrently for the same -// ledger (HotService fans out via errgroup); each instance touches only its own -// store plus the read-only view. -type HotIngester interface { - Ingest(ctx context.Context, seq uint32, lcm xdr.LedgerCloseMetaView) error -} - // ColdIngester ingests one data type for one chunk into a per-chunk cold writer. // // Ownership: the ingester OPENS its own per-chunk writer in its constructor and @@ -44,7 +21,10 @@ type HotIngester interface { // artifact; implementations are encouraged to latch the failure and refuse // (eventsCold does). // -// Input: same driver-validated-seq and borrowed-view contract as HotIngester. +// Input: seq is the ledger sequence of lcm on drain's contiguous counter (the +// in-order contract is enforced at the source), and lcm is a zero-copy +// xdr.LedgerCloseMetaView over the source stream's BORROWED buffer, valid only for +// the current iteration step — an implementation must copy any bytes it retains. // ColdService drives the per-ledger Ingest calls sequentially, so each view is // fully consumed before the next. type ColdIngester interface { diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/ledgers.go b/cmd/stellar-rpc/internal/fullhistory/ingest/ledgers.go index f9bab63af..5acf01b91 100644 --- a/cmd/stellar-rpc/internal/fullhistory/ingest/ledgers.go +++ b/cmd/stellar-rpc/internal/fullhistory/ingest/ledgers.go @@ -13,42 +13,6 @@ import ( "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger" ) -// ───────────────────────── Hot ingester ───────────────────────── - -// ledgerHot writes raw ledger bytes verbatim into a long-lived ledger.HotStore. -// AddLedgers fsyncs once per call, so each ledger is durable before Ingest -// returns. The store is INJECTED and owned by the caller — ledgerHot never -// opens or closes it. -type ledgerHot struct { - store *ledger.HotStore - sink MetricSink -} - -// NewLedgerHotIngester returns a HotIngester writing raw ledger bytes into the -// injected, caller-owned store. -func NewLedgerHotIngester(store *ledger.HotStore, sink MetricSink) HotIngester { - return &ledgerHot{store: store, sink: orNop(sink)} -} - -func (h *ledgerHot) Ingest(_ context.Context, seq uint32, lcm xdr.LedgerCloseMetaView) error { - m := newHotMetrics(h.sink, dataTypeLedgers) - var err error - defer func() { m.emit(err) }() - - // ledger.HotStore.AddLedgers copies the bytes into its RocksDB batch - // synchronously, so aliasing the borrowed view buffer here is safe. - wstart := time.Now() - if aerr := h.store.AddLedgers(ledger.Entry{Seq: seq, Bytes: []byte(lcm)}); aerr != nil { - err = fmt.Errorf("AddLedgers(seq=%d): %w", seq, aerr) - return err - } - h.sink.IngestStage(dataTypeLedgers, tierHot, stageWrite, time.Since(wstart), 1) - // Set AFTER the store call so a failed write reports items=0, matching - // the MetricSink "items written" contract and the other hot ingesters. - m.items = 1 - return nil -} - // ───────────────────────── Cold ingester ───────────────────────── // ledgerCold writes raw ledger bytes into a per-chunk ledger.ColdWriter (one @@ -61,31 +25,28 @@ type ledgerCold struct { appended bool } -// NewLedgerColdIngester opens a per-chunk cold ledger writer under coldDir and -// returns a ColdIngester that owns it. The writer uses its zero-value options; -// driver-level tuning is a follow-up via Config. -func NewLedgerColdIngester(coldDir string, chunkID chunk.ID, sink MetricSink) (ColdIngester, error) { - // The chunk's pack lives under its %05d bucket subdirectory; ledger.PackName - // owns the per-chunk filename so the naming convention has a single owner - // shared with the cold-ledger read path (ledger.NewPackStream). - path := filepath.Join(coldDir, chunkID.BucketID(), ledger.PackName(chunkID)) - if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { - return nil, fmt.Errorf("mkdir %s: %w", filepath.Dir(path), err) +// NewLedgerColdIngester opens a per-chunk cold ledger writer at packPath — the +// caller's geometry.Layout.LedgerPackPath(chunkID), so the write path is Layout's +// single derivation, not a second copy — and returns a ColdIngester that owns it. +// The writer uses its zero-value options; driver-level tuning is a follow-up via Config. +func NewLedgerColdIngester(packPath string, chunkID chunk.ID, sink MetricSink) (ColdIngester, error) { + if err := os.MkdirAll(filepath.Dir(packPath), 0o755); err != nil { + return nil, fmt.Errorf("mkdir %s: %w", filepath.Dir(packPath), err) } - w, err := ledger.NewColdWriter(path, chunkID.FirstLedger(), ledger.ColdWriterOptions{}) + w, err := ledger.NewColdWriter(packPath, chunkID.FirstLedger(), ledger.ColdWriterOptions{}) if err != nil { - return nil, fmt.Errorf("ledger.NewColdWriter %s: %w", path, err) + return nil, fmt.Errorf("ledger.NewColdWriter %s: %w", packPath, err) } - return &ledgerCold{path: path, writer: w, metrics: newColdMetrics(sink, dataTypeLedgers)}, nil + return &ledgerCold{path: packPath, writer: w, metrics: newColdMetrics(sink, dataTypeLedgers)}, nil } func (c *ledgerCold) Ingest(_ context.Context, seq uint32, lcm xdr.LedgerCloseMetaView) error { start := time.Now() if err := c.writer.AppendLedger(seq, []byte(lcm)); err != nil { - c.metrics.observe(time.Since(start), 0, err) + c.metrics.observe(time.Since(start), 0, err) // terminal: observe emits the per-ingester signal return fmt.Errorf("AppendLedger(seq=%d): %w", seq, err) } - c.metrics.sink.IngestStage(dataTypeLedgers, tierCold, stageWrite, time.Since(start), 1) + c.metrics.sink.IngestStage(dataTypeLedgers, stageWrite, time.Since(start), 1) c.appended = true c.metrics.observe(time.Since(start), 1, nil) return nil @@ -105,23 +66,15 @@ func (c *ledgerCold) Finalize(_ context.Context) error { c.metrics.emit(time.Since(start), err) return err } - c.metrics.sink.IngestStage(dataTypeLedgers, tierCold, stageFinalize, time.Since(start), 0) + c.metrics.sink.IngestStage(dataTypeLedgers, stageFinalize, time.Since(start), 0) c.metrics.emit(time.Since(start), nil) return nil } -// Close drops the partial pack when Finalize never ran, and emits the cold -// metrics if Finalize did not already (the failure path). The writer.Close -// error is folded into the emitted metric so a close-time failure is counted in -// errors_total. emit is a no-op after a successful Finalize, so this never -// double-counts. Error propagation is unchanged: the writer.Close error is -// still returned. +// Close drops the partial pack when Finalize never ran. It does NOT emit the cold +// metric: a terminal Ingest error or Finalize already emitted it, and an ingester +// that never got that far (a rolled-back build) must produce no phantom sample. +// The writer.Close error is returned unchanged. func (c *ledgerCold) Close() error { - cerr := c.writer.Close() - c.metrics.emit(0, cerr) - return cerr + return c.writer.Close() } - -// abortMetric records a synthetic abort error so a subsequent Close emit does -// not look like a clean success. Used by the constructor-rollback path. -func (c *ledgerCold) abortMetric(err error) { c.metrics.recordErr(err) } diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/metrics.go b/cmd/stellar-rpc/internal/fullhistory/ingest/metrics.go index 22ab631dc..8b9952e6b 100644 --- a/cmd/stellar-rpc/internal/fullhistory/ingest/metrics.go +++ b/cmd/stellar-rpc/internal/fullhistory/ingest/metrics.go @@ -4,23 +4,20 @@ import ( "time" "github.com/prometheus/client_golang/prometheus" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk" ) // Data-type labels reported to a MetricSink. These match the per-type -// subdirectory names used on disk. +// subdirectory names used on disk. (The hot tier keys its per-ledger phases by +// hotchunk.Phase, not by data type — see MetricSink.HotPhase.) const ( dataTypeLedgers = "ledgers" dataTypeTxhash = "txhash" dataTypeEvents = "events" ) -// Tier labels reported to a MetricSink. -const ( - tierHot = "hot" - tierCold = "cold" -) - -// Stage labels reported via MetricSink.IngestStage. These sit at the seams +// Cold stage labels reported via MetricSink.IngestStage. These sit at the seams // the rpc-hack bench collectors measured (per-stage extract / term-index / // store-write samples plus a per-chunk finish), so a CSV sink can reproduce // those reports from production ingesters without re-instrumenting. @@ -31,53 +28,66 @@ const ( stageFinalize = "finalize" // per-chunk commit (pack trailer, index build, .bin write) ) +// coldStagePairs is the set of (data_type, stage) pairs the cold ingesters +// actually emit — the eight real ones, not the 3×4 cross-product. A sink +// pre-resolves exactly these, so it registers no series no code path can feed. +// +//nolint:gochecknoglobals // fixed label set, read-only +var coldStagePairs = []struct{ dataType, stage string }{ + {dataTypeLedgers, stageWrite}, + {dataTypeLedgers, stageFinalize}, + {dataTypeTxhash, stageExtract}, + {dataTypeTxhash, stageFinalize}, + {dataTypeEvents, stageExtract}, + {dataTypeEvents, stageTermIndex}, + {dataTypeEvents, stageWrite}, + {dataTypeEvents, stageFinalize}, +} + // MetricSink receives ingest timing and volume signals. Ingesters report their // own per-call latency / item counts / errors (they know the item count); the // per-tier services report aggregate per-ledger (hot) and per-chunk (cold) // wall-clock. A sink lets the same ingesters/services feed Prometheus in prod, // a CSV recorder in benchmarks, or a test recorder — interchangeably. // -// Implementations must be safe for concurrent use across ALL methods, not just -// HotIngest: the hot fan-out calls HotIngest/HotLedgerTotal from per-ledger -// goroutines, and a caller may freeze several chunks concurrently (each its own -// WriteColdChunk), so the cold methods (ColdIngest, ColdChunkTotal) can likewise -// be called from several goroutines at once. +// Implementations must be safe for concurrent use across ALL methods: the live +// hot ingestion loop reports HotPhase from its own goroutine while the lifecycle +// may freeze several chunks concurrently (each its own WriteColdChunk), so the +// cold methods (ColdIngest, ColdChunkTotal, IngestStage) can likewise be called +// from several goroutines at once. type MetricSink interface { - // HotIngest reports one hot ingester's per-ledger Ingest: dataType is the - // data-type label, d the wall-clock, items the number of items written - // (events, txhashes, or 1 for a ledger), err the Ingest error (nil on - // success). - HotIngest(dataType string, d time.Duration, items int, err error) + // HotPhase reports ONE phase of one hot ledger ingest — the single hot-tier + // signal family. It carries that phase's wall-clock, its item count (0 for the + // extract/commit phases, the per-type write volume for the write phases, on the + // success path), and its outcome (err is non-nil only on the phase that failed, + // so a decode failure lands on PhaseExtract and a commit failure on PhaseCommit + // by construction). The per-ledger total is the sum of the phase durations; the + // caller emits phases [0, Failed] on error and all phases on success. + HotPhase(phase hotchunk.Phase, d time.Duration, items int, err error) // ColdIngest reports one cold ingester's per-chunk total: the summed Ingest // wall-clock plus its Finalize, items the total items written for the chunk, // err the first error (nil on success). ColdIngest(dataType string, d time.Duration, items int, err error) - // HotLedgerTotal reports the per-ledger wall-clock across all hot ingesters - // (the HotService.Ingest fan-out duration). - HotLedgerTotal(d time.Duration) // ColdChunkTotal reports the per-chunk wall-clock across all cold ingesters' // ingests plus their Finalizes (the ColdService lifetime). ColdChunkTotal(d time.Duration) - // IngestStage reports one ingester's per-stage wall-clock INSIDE an + // IngestStage reports one COLD ingester's per-stage wall-clock inside an // Ingest/Finalize call: stage is one of the stage* constants (extract, - // term_index, write, finalize), tier "hot" or "cold", items the stage's - // natural item count (0 where none applies). The whole-call HotIngest / - // ColdIngest signals above cannot be decomposed by a sink after the - // fact, so the per-stage granularity the bench reports need is exposed - // as its own signal — a sink that doesn't want it (production - // Prometheus, optionally) can no-op it. - IngestStage(dataType, tier, stage string, d time.Duration, items int) + // term_index, write, finalize), items the stage's natural item count (0 where + // none applies). The whole-call ColdIngest signal cannot be decomposed by a + // sink after the fact, so the per-stage granularity the bench reports need is + // exposed as its own signal — a sink that doesn't want it can no-op it. + IngestStage(dataType, stage string, d time.Duration, items int) } // NopSink is a MetricSink that discards everything. It is the default when a // caller passes a nil sink to a service or ingester. type NopSink struct{} -func (NopSink) HotIngest(string, time.Duration, int, error) {} -func (NopSink) ColdIngest(string, time.Duration, int, error) {} -func (NopSink) HotLedgerTotal(time.Duration) {} -func (NopSink) ColdChunkTotal(time.Duration) {} -func (NopSink) IngestStage(string, string, string, time.Duration, int) {} +func (NopSink) HotPhase(hotchunk.Phase, time.Duration, int, error) {} +func (NopSink) ColdIngest(string, time.Duration, int, error) {} +func (NopSink) ColdChunkTotal(time.Duration) {} +func (NopSink) IngestStage(string, string, time.Duration, int) {} // orNop returns sink, or NopSink{} when sink is nil, so call sites never // nil-check before reporting. @@ -88,48 +98,18 @@ func orNop(sink MetricSink) MetricSink { return sink } -// hotMetrics emits a single HotIngest signal for one hot ingester's per-ledger -// Ingest. The ingester sets items as it learns the count, then a single deferred -// emit reports the wall-clock since start, the final item count, and the WRAPPED -// error captured from the named return — so every Ingest has exactly one emit -// site regardless of which return path it takes. -// -// Usage: -// -// func (h *fooHot) Ingest(...) (err error) { -// m := newHotMetrics(h.sink, dataTypeFoo) -// defer func() { m.emit(err) }() -// ... -// m.items = len(things) -// return nil -// } -type hotMetrics struct { - sink MetricSink - dataType string - start time.Time - items int -} - -func newHotMetrics(sink MetricSink, dataType string) hotMetrics { - return hotMetrics{sink: orNop(sink), dataType: dataType, start: time.Now()} -} - -// emit reports the single HotIngest signal: the wall-clock since construction, -// the accumulated item count, and the (wrapped) error from the named return. -func (m *hotMetrics) emit(err error) { - m.sink.HotIngest(m.dataType, time.Since(m.start), m.items, err) -} - // coldMetrics is the per-chunk metric accumulator shared by all three cold // ingesters. Each ingester accumulates Ingest wall-clock (accum), item count // (items), and the FIRST error it saw (firstErr) across the chunk, then emits a -// single ColdIngest signal — in Finalize if reached, otherwise in Close (the -// failure path). The emitted flag guards against a double-emit: a successful -// Finalize emits and sets emitted=true so the deferred Close is a no-op, while a -// chunk that errors before Finalize emits exactly once from Close. +// single ColdIngest signal on a TERMINAL step only: Finalize (success or error), +// or an Ingest error (which abandons the chunk). Close NEVER emits — an ingester +// that was built but never ingested/finalized (e.g. a sibling constructor failed +// and the build rolled back) produces NO phantom sample. The emitted flag guards +// against a double-emit so the guarantee holds even if a defensive caller drives +// the terminal steps redundantly. // -// This guarantees: failed chunk → one ColdIngest with the error recorded; -// success → exactly one ColdIngest per ingester; never both. +// This guarantees: a chunk that ingested and then failed/finalized → exactly one +// ColdIngest (error recorded on failure); a rolled-back ingester → none. type coldMetrics struct { sink MetricSink dataType string @@ -143,29 +123,25 @@ func newColdMetrics(sink MetricSink, dataType string) coldMetrics { return coldMetrics{sink: orNop(sink), dataType: dataType} } -// recordErr folds err into firstErr WITHOUT emitting. Used on the -// constructor-rollback path so the subsequent Close emit carries the abort -// error instead of looking like a clean (nil-err, 0-items) success. -func (m *coldMetrics) recordErr(err error) { - if err != nil { - m.firstErr = errOrFirst(m.firstErr, err) - } -} - -// observe records one Ingest's wall-clock and (on error) the first error. +// observe records one Ingest's wall-clock and (on error) the first error. An +// Ingest error is TERMINAL by the ColdIngester contract (the chunk is abandoned +// and the ingester is never reused), so observe emits the single per-ingester +// ColdIngest itself here — callers just observe-and-return, no hand-paired emit. func (m *coldMetrics) observe(d time.Duration, items int, err error) { m.accum += d m.items += items if err != nil { m.firstErr = errOrFirst(m.firstErr, err) + m.emit(0, nil) } } // emit reports the single ColdIngest signal for this ingester, adding extra to // the accumulated Ingest time (e.g. the Finalize wall-clock) and folding err // (if non-nil) into firstErr before reporting. It is a no-op after the first -// call, so calling it from both Finalize (success) and Close (deferred cleanup) -// emits exactly once. Pass a nil err when there is no stage error to record. +// call, so a redundant terminal-step call emits exactly once. Pass a nil err +// when the error is already recorded (an Ingest failure observes it) or there is +// none. func (m *coldMetrics) emit(extra time.Duration, err error) { if err != nil { m.firstErr = errOrFirst(m.firstErr, err) @@ -200,16 +176,9 @@ var ( coldStageBuckets = prometheus.ExponentialBuckets(0.001, 4, 12) ) -// ingestStages is the construction-time stage label set used to pre-resolve -// the per-(data_type, stage) children. -// -//nolint:gochecknoglobals // fixed label set, read-only -var ingestStages = []string{stageExtract, stageTermIndex, stageWrite, stageFinalize} - -// ingestCollectors bundles the pre-resolved per-(data_type, tier) children. -// The label space is fixed at construction (three data types × two tiers), so -// resolving the children once removes the per-emit label-map allocation and -// hashed vector lookups from the hot per-ledger path. +// ingestCollectors bundles the pre-resolved per-cold-data-type children. The +// label space is fixed at construction, so resolving the children once removes +// the per-emit label-map allocation and hashed vector lookup. type ingestCollectors struct { duration prometheus.Observer items prometheus.Counter @@ -234,26 +203,22 @@ func (c ingestCollectors) observe(d time.Duration, items int, err error) { // passing it into the ingest drivers) is a follow-up — there is no full-history // ingest daemon startup path yet. This type only provides the registerable sink. type PrometheusSink struct { - // Pre-resolved per-ingester children, keyed by data type, one map per - // tier (the duration histograms have per-tier buckets). - hot map[string]ingestCollectors + // Hot per-ledger phases — the single hot signal family, one set of children per + // hotchunk.Phase, indexed by the phase value into a fixed-size ARRAY (not a map), + // so an out-of-table phase is a bounds panic at the index rather than a silent + // nil-map emit. The per-ledger total is the sum of hotPhaseDur; commit errors are + // hotPhaseErrs[PhaseCommit]; decode errors hotPhaseErrs[PhaseExtract]. + hotPhaseDur [hotchunk.NumPhases]prometheus.Observer + hotPhaseItems [hotchunk.NumPhases]prometheus.Counter + hotPhaseErrs [hotchunk.NumPhases]prometheus.Counter + // Pre-resolved per-cold-ingester children, keyed by data type. Producers draw + // their data_type from the same constant set the map is built from, so a lookup + // can never miss — indexed directly, no on-the-fly vector fallback. cold map[string]ingestCollectors - // The vectors behind the resolved children, kept for the (unexpected) - // case of a data type outside the construction-time set — resolved on - // the fly so no signal is ever silently dropped. - hotDuration *prometheus.HistogramVec - coldDuration *prometheus.HistogramVec - ingestItems *prometheus.CounterVec - ingestErrors *prometheus.CounterVec - // Per-stage durations (IngestStage), pre-resolved per - // (data_type, stage) with per-tier buckets, keyed "dataType/stage". - hotStage map[string]prometheus.Observer - coldStage map[string]prometheus.Observer - hotStageVec *prometheus.HistogramVec - coldStageVec *prometheus.HistogramVec - // Aggregate per-tier wall-clock: hot per-ledger fan-out, cold per-chunk - // service lifetime. Separate histograms so each tier gets fitting buckets. - hotLedgerTotal prometheus.Observer + // Per-cold-stage durations, pre-resolved for the eight real (data_type, stage) + // pairs only (coldStagePairs), keyed "dataType/stage". + coldStage map[string]prometheus.Observer + // Aggregate per-chunk cold wall-clock (ColdService lifetime). coldChunkTotal prometheus.Observer } @@ -261,12 +226,24 @@ type PrometheusSink struct { // registry under namespace + the fullhistory_ingest subsystem. namespace is the // daemon convention value (interfaces.PrometheusNamespace). func NewPrometheusSink(registry *prometheus.Registry, namespace string) *PrometheusSink { - hotDuration := prometheus.NewHistogramVec(prometheus.HistogramOpts{ + hotPhaseDurVec := prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: namespace, Subsystem: metricsSubsystem, - Name: "hot_ingest_duration_seconds", - Help: "per-ingester hot Ingest wall-clock (per ledger)", + Name: "hot_phase_duration_seconds", + Help: "per-ledger phase wall-clock (extract/ledgers/txhash/events/commit; phases sum to the per-ledger total)", Buckets: hotBuckets, - }, []string{"data_type"}) + }, []string{"phase"}) + + hotPhaseItemsVec := prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, Subsystem: metricsSubsystem, + Name: "hot_phase_items_total", + Help: "items written per hot phase (the write phases carry per-type volume; extract/commit are 0)", + }, []string{"phase"}) + + hotPhaseErrsVec := prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, Subsystem: metricsSubsystem, + Name: "hot_phase_errors_total", + Help: "hot ledger failures by the phase that failed (decode->extract, commit->commit, by construction)", + }, []string{"phase"}) coldDuration := prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: namespace, Subsystem: metricsSubsystem, @@ -275,24 +252,17 @@ func NewPrometheusSink(registry *prometheus.Registry, namespace string) *Prometh Buckets: coldBuckets, }, []string{"data_type"}) - ingestItems := prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, Subsystem: metricsSubsystem, - Name: "items_total", - Help: "items written per ingester (events, txhashes, or ledgers)", - }, []string{"data_type", "tier"}) - - ingestErrors := prometheus.NewCounterVec(prometheus.CounterOpts{ + coldItems := prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: namespace, Subsystem: metricsSubsystem, - Name: "errors_total", - Help: "ingester Ingest/Finalize errors", - }, []string{"data_type", "tier"}) + Name: "cold_items_total", + Help: "items written per cold ingester (events, txhashes, or ledgers)", + }, []string{"data_type"}) - hotLedgerTotal := prometheus.NewHistogram(prometheus.HistogramOpts{ + coldErrors := prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: namespace, Subsystem: metricsSubsystem, - Name: "hot_ledger_duration_seconds", - Help: "aggregate per-ledger wall-clock across all hot ingesters (HotService fan-out)", - Buckets: hotBuckets, - }) + Name: "cold_errors_total", + Help: "cold ingester Ingest/Finalize errors", + }, []string{"data_type"}) coldChunkTotal := prometheus.NewHistogram(prometheus.HistogramOpts{ Namespace: namespace, Subsystem: metricsSubsystem, @@ -301,13 +271,6 @@ func NewPrometheusSink(registry *prometheus.Registry, namespace string) *Prometh Buckets: coldBuckets, }) - hotStageVec := prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: namespace, Subsystem: metricsSubsystem, - Name: "hot_stage_duration_seconds", - Help: "per-stage wall-clock inside a hot Ingest (extract, write; ledgers emits write only)", - Buckets: hotBuckets, - }, []string{"data_type", "stage"}) - coldStageVec := prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: namespace, Subsystem: metricsSubsystem, Name: "cold_stage_duration_seconds", @@ -316,92 +279,55 @@ func NewPrometheusSink(registry *prometheus.Registry, namespace string) *Prometh Buckets: coldStageBuckets, }, []string{"data_type", "stage"}) - registry.MustRegister(hotDuration, coldDuration, ingestItems, ingestErrors, - hotLedgerTotal, coldChunkTotal, hotStageVec, coldStageVec) + registry.MustRegister(hotPhaseDurVec, hotPhaseItemsVec, hotPhaseErrsVec, + coldDuration, coldItems, coldErrors, coldChunkTotal, coldStageVec) - hot := make(map[string]ingestCollectors, 3) - cold := make(map[string]ingestCollectors, 3) - hotStage := make(map[string]prometheus.Observer, 3*len(ingestStages)) - coldStage := make(map[string]prometheus.Observer, 3*len(ingestStages)) + sink := &PrometheusSink{ + cold: make(map[string]ingestCollectors, 3), + coldStage: make(map[string]prometheus.Observer, len(coldStagePairs)), + coldChunkTotal: coldChunkTotal, + } + // Hot phases: one child per phase, indexed by the phase value. + for p := range hotchunk.NumPhases { + sink.hotPhaseDur[p] = hotPhaseDurVec.WithLabelValues(p.String()) + sink.hotPhaseItems[p] = hotPhaseItemsVec.WithLabelValues(p.String()) + sink.hotPhaseErrs[p] = hotPhaseErrsVec.WithLabelValues(p.String()) + } for _, dataType := range []string{dataTypeLedgers, dataTypeTxhash, dataTypeEvents} { - hot[dataType] = ingestCollectors{ - duration: hotDuration.WithLabelValues(dataType), - items: ingestItems.WithLabelValues(dataType, tierHot), - errors: ingestErrors.WithLabelValues(dataType, tierHot), - } - cold[dataType] = ingestCollectors{ + sink.cold[dataType] = ingestCollectors{ duration: coldDuration.WithLabelValues(dataType), - items: ingestItems.WithLabelValues(dataType, tierCold), - errors: ingestErrors.WithLabelValues(dataType, tierCold), - } - for _, stage := range ingestStages { - hotStage[dataType+"/"+stage] = hotStageVec.WithLabelValues(dataType, stage) - coldStage[dataType+"/"+stage] = coldStageVec.WithLabelValues(dataType, stage) + items: coldItems.WithLabelValues(dataType), + errors: coldErrors.WithLabelValues(dataType), } } - - return &PrometheusSink{ - hot: hot, - cold: cold, - hotDuration: hotDuration, - coldDuration: coldDuration, - ingestItems: ingestItems, - ingestErrors: ingestErrors, - hotStage: hotStage, - coldStage: coldStage, - hotStageVec: hotStageVec, - coldStageVec: coldStageVec, - hotLedgerTotal: hotLedgerTotal, - coldChunkTotal: coldChunkTotal, + // Cold stages: only the eight real (data_type, stage) pairs. + for _, k := range coldStagePairs { + sink.coldStage[k.dataType+"/"+k.stage] = coldStageVec.WithLabelValues(k.dataType, k.stage) } + return sink } -func (p *PrometheusSink) HotIngest(dataType string, d time.Duration, items int, err error) { - c, ok := p.hot[dataType] - if !ok { - c = ingestCollectors{ - duration: p.hotDuration.WithLabelValues(dataType), - items: p.ingestItems.WithLabelValues(dataType, tierHot), - errors: p.ingestErrors.WithLabelValues(dataType, tierHot), - } +func (p *PrometheusSink) HotPhase(phase hotchunk.Phase, d time.Duration, items int, err error) { + p.hotPhaseDur[phase].Observe(d.Seconds()) + if items > 0 { + p.hotPhaseItems[phase].Add(float64(items)) } - c.observe(d, items, err) -} - -func (p *PrometheusSink) ColdIngest(dataType string, d time.Duration, items int, err error) { - c, ok := p.cold[dataType] - if !ok { - c = ingestCollectors{ - duration: p.coldDuration.WithLabelValues(dataType), - items: p.ingestItems.WithLabelValues(dataType, tierCold), - errors: p.ingestErrors.WithLabelValues(dataType, tierCold), - } + if err != nil { + p.hotPhaseErrs[phase].Inc() } - c.observe(d, items, err) } -func (p *PrometheusSink) HotLedgerTotal(d time.Duration) { - p.hotLedgerTotal.Observe(d.Seconds()) +func (p *PrometheusSink) ColdIngest(dataType string, d time.Duration, items int, err error) { + p.cold[dataType].observe(d, items, err) } func (p *PrometheusSink) ColdChunkTotal(d time.Duration) { p.coldChunkTotal.Observe(d.Seconds()) } -// IngestStage records the per-stage duration into the tier's stage histogram. -// The per-stage item counts are not exported to Prometheus (the per-Ingest -// items_total already carries volume); they exist on the interface for the -// CSV bench sink. -func (p *PrometheusSink) IngestStage(dataType, tier, stage string, d time.Duration, _ int) { - resolved, vec := p.hotStage, p.hotStageVec - if tier == tierCold { - resolved, vec = p.coldStage, p.coldStageVec - } - o, ok := resolved[dataType+"/"+stage] - if !ok { - // Unexpected (data_type, stage) outside the construction-time set — - // resolve on the fly so no signal is silently dropped. - o = vec.WithLabelValues(dataType, stage) - } - o.Observe(d.Seconds()) +// IngestStage records the per-stage cold duration. The per-stage item counts are +// not exported to Prometheus (cold_items_total already carries volume); they exist +// on the interface for the CSV bench sink. +func (p *PrometheusSink) IngestStage(dataType, stage string, d time.Duration, _ int) { + p.coldStage[dataType+"/"+stage].Observe(d.Seconds()) } diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/service.go b/cmd/stellar-rpc/internal/fullhistory/ingest/service.go index 1d5430f06..ec0c317d7 100644 --- a/cmd/stellar-rpc/internal/fullhistory/ingest/service.go +++ b/cmd/stellar-rpc/internal/fullhistory/ingest/service.go @@ -6,9 +6,9 @@ import ( "fmt" "time" - "golang.org/x/sync/errgroup" - "github.com/stellar/go-stellar-sdk/xdr" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk" ) // errOrFirst returns prev if it is non-nil, else cur. Used to retain the FIRST @@ -21,49 +21,48 @@ func errOrFirst(prev, cur error) error { return cur } -// HotService fans one ledger out to a set of HotIngesters concurrently, waiting -// for all to finish before returning (so the borrowed view is safe to release), -// and emits the aggregate per-ledger wall-clock via the sink. +// HotService commits one ledger to the shared per-chunk hot DB as ONE atomic +// synced WriteBatch across all hot CFs (decision (a)) and emits the single hot +// signal family: one HotPhase per hotchunk.Phase. No fan-out — the three types are +// CFs of one RocksDB committing in one WriteBatch (hotchunk.DB.IngestLedger). type HotService struct { - ingesters []HotIngester - sink MetricSink + db *hotchunk.DB + sink MetricSink } -// NewHotService builds a HotService over the enabled hot ingesters. A nil sink -// defaults to NopSink. -func NewHotService(ingesters []HotIngester, sink MetricSink) *HotService { - return &HotService{ingesters: ingesters, sink: orNop(sink)} +// NewHotService builds a HotService that writes ledgers, txhash, and events into +// the shared per-chunk DB. A nil sink defaults to NopSink. +func NewHotService(db *hotchunk.DB, sink MetricSink) *HotService { + return &HotService{db: db, sink: orNop(sink)} } -// Ingest runs every hot ingester on lcm concurrently and waits for all of them. -// seq is the driver-validated sequence of lcm, passed through unchanged. The -// first ingester error is returned; the production HotIngester.Ingest -// implementations do not check ctx.Err(), so the siblings run to completion -// regardless (g.Wait still returns the first error). The single-ingester config -// skips the errgroup entirely. HotLedgerTotal is emitted with the fan-out -// wall-clock regardless of success. -func (s *HotService) Ingest(ctx context.Context, seq uint32, lcm xdr.LedgerCloseMetaView) error { - start := time.Now() - switch len(s.ingesters) { - case 0: - // No hot ingesters enabled for this tier: nothing to do. - s.sink.HotLedgerTotal(time.Since(start)) - return nil - case 1: - // Single ingester: call directly, skipping the errgroup overhead. - err := s.ingesters[0].Ingest(ctx, seq, lcm) - s.sink.HotLedgerTotal(time.Since(start)) - return err - default: - // Two or more: concurrent fan-out, waiting for all. - g, gctx := errgroup.WithContext(ctx) - for _, ing := range s.ingesters { - g.Go(func() error { return ing.Ingest(gctx, seq, lcm) }) +// Ingest commits lcm to the shared hot DB in one atomic synced WriteBatch +// (decision (a)) and emits one HotPhase per phase from the ledger report. Each +// phase carries its own wall-clock (the phases partition the per-ledger total), +// the write phases carry per-type item volume on success, and the outcome lands on +// the phase that failed BY CONSTRUCTION — a decode failure on PhaseExtract, a +// commit failure on PhaseCommit — so there is no mislabeled batch-scoped error. +// On failure only phases [0, Failed] ran, so only those are emitted (and with zero +// items — nothing landed durably); on success every phase is emitted. +func (s *HotService) Ingest(_ context.Context, seq uint32, lcm xdr.LedgerCloseMetaView) error { + rep, err := s.db.IngestLedger(seq, lcm) + + last := hotchunk.NumPhases - 1 + if err != nil { + last = rep.Failed + } + for p := hotchunk.Phase(0); p <= last; p++ { + items := rep.Phases[p].Items + var perr error + if err != nil { + items = 0 // the failure path committed nothing durably + if p == rep.Failed { + perr = err + } } - err := g.Wait() - s.sink.HotLedgerTotal(time.Since(start)) - return err + s.sink.HotPhase(p, rep.Phases[p].Dur, items, perr) } + return err } // ColdService drives a set of ColdIngesters for one chunk: sequential per-ledger @@ -124,11 +123,12 @@ func (s *ColdService) Finalize(ctx context.Context) error { } // Close closes every cold ingester, joining each Close error, and emits the -// aggregate ColdChunkTotal if Finalize never reached it (the failure path). Each -// ingester's own Close in turn emits that ingester's per-chunk ColdIngest if its -// Finalize never ran, so a failed chunk still produces one per-ingester signal -// and one aggregate. Idempotent: on the failure path a writer's Close drops its -// partial file; after a successful Finalize all emissions are no-ops. +// aggregate ColdChunkTotal if Finalize never reached it (the failure path). A +// per-ingester ColdIngest is emitted only from a TERMINAL step (a failed Ingest, +// via coldMetrics.observe, or Finalize) — never from Close, so an ingester rolled +// back before any work produces no per-ingester sample (only the aggregate here). +// Idempotent: on the failure path a writer's Close drops its partial file; after +// a successful Finalize this is a no-op for the aggregate. func (s *ColdService) Close() error { var err error for _, ing := range s.ingesters { diff --git a/cmd/stellar-rpc/internal/fullhistory/ingest/txhash.go b/cmd/stellar-rpc/internal/fullhistory/ingest/txhash.go index b80f77de5..7d98b0a70 100644 --- a/cmd/stellar-rpc/internal/fullhistory/ingest/txhash.go +++ b/cmd/stellar-rpc/internal/fullhistory/ingest/txhash.go @@ -16,51 +16,6 @@ import ( "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash" ) -// ───────────────────────── Hot ingester ───────────────────────── - -// txhashHot extracts the ledger's transaction hashes via the SDK -// (sdkingest.ExtractTxHashes — apply order, hashes copied off the view) and -// writes (txhash, seq) tuples in one AddEntries call (one fsync per ledger). -// The store is INJECTED and owned by the caller. -type txhashHot struct { - store *txhash.HotStore - sink MetricSink -} - -// NewTxhashHotIngester returns a HotIngester writing (txhash, seq) tuples into -// the injected, caller-owned store. -func NewTxhashHotIngester(store *txhash.HotStore, sink MetricSink) HotIngester { - return &txhashHot{store: store, sink: orNop(sink)} -} - -func (t *txhashHot) Ingest(_ context.Context, seq uint32, lcm xdr.LedgerCloseMetaView) error { - m := newHotMetrics(t.sink, dataTypeTxhash) - var err error - defer func() { m.emit(err) }() - - estart := time.Now() - hashes, eerr := sdkingest.ExtractTxHashes(lcm) - if eerr != nil { - err = fmt.Errorf("ExtractTxHashes seq %d: %w", seq, eerr) - return err - } - t.sink.IngestStage(dataTypeTxhash, tierHot, stageExtract, time.Since(estart), len(hashes)) - if len(hashes) > 0 { - entries := make([]txhash.Entry, len(hashes)) - for i, h := range hashes { - entries[i] = txhash.Entry{Hash: [32]byte(h), LedgerSeq: seq} - } - wstart := time.Now() - if aerr := t.store.AddEntries(entries); aerr != nil { - err = fmt.Errorf("AddEntries(seq=%d, n=%d): %w", seq, len(entries), aerr) - return err - } - t.sink.IngestStage(dataTypeTxhash, tierHot, stageWrite, time.Since(wstart), len(entries)) - } - m.items = len(hashes) - return nil -} - // ───────────────────────── Cold ingester ───────────────────────── // txhashCold accumulates (txhash[:ColdKeySize], seq) tuples per ledger; at @@ -78,19 +33,18 @@ type txhashCold struct { } // NewTxhashColdIngester returns a ColdIngester that accumulates a per-chunk -// sorted .bin under coldDir's bucket subdirectory, written at Finalize -// (overwriting any prior attempt's file — see the package doc's artifact -// model). -func NewTxhashColdIngester(coldDir string, chunkID chunk.ID, sink MetricSink) (ColdIngester, error) { - bucketDir := filepath.Join(coldDir, chunkID.BucketID()) - if err := os.MkdirAll(bucketDir, 0o755); err != nil { - return nil, fmt.Errorf("mkdir %s: %w", bucketDir, err) +// sorted .bin at binPath — the caller's geometry.Layout.TxHashBinPath(chunkID), +// so the write path is Layout's single derivation — written at Finalize +// (overwriting any prior attempt's file — see the package doc's artifact model). +func NewTxhashColdIngester(binPath string, chunkID chunk.ID, sink MetricSink) (ColdIngester, error) { + if err := os.MkdirAll(filepath.Dir(binPath), 0o755); err != nil { + return nil, fmt.Errorf("mkdir %s: %w", filepath.Dir(binPath), err) } // The initial cap (64Ki entries, ~1.3 MB) deliberately starts well below a // typical pubnet chunk's tx count (~3M): empty/sparse chunks stay cheap, // and a busy chunk just pays a few amortized growths. return &txhashCold{ - binPath: filepath.Join(bucketDir, txhash.ColdBinName(chunkID)), + binPath: binPath, chunkID: chunkID, entries: make([]txhash.ColdEntry, 0, 1<<16), metrics: newColdMetrics(sink, dataTypeTxhash), @@ -105,7 +59,7 @@ func (t *txhashCold) Ingest(_ context.Context, seq uint32, lcm xdr.LedgerCloseMe // chunk that intermediate would be hundreds of MB of transient garbage. hashes, err := sdkingest.ExtractTxHashes(lcm) if err != nil { - t.metrics.observe(time.Since(start), 0, err) + t.metrics.observe(time.Since(start), 0, err) // terminal: observe emits the per-ingester signal return fmt.Errorf("ExtractTxHashes seq %d: %w", seq, err) } for i := range hashes { @@ -122,7 +76,7 @@ func (t *txhashCold) Ingest(_ context.Context, seq uint32, lcm xdr.LedgerCloseMe // write is the finalize stage; there is no separate cold write stage for // txhash.) d := time.Since(start) - t.metrics.sink.IngestStage(dataTypeTxhash, tierCold, stageExtract, d, len(hashes)) + t.metrics.sink.IngestStage(dataTypeTxhash, stageExtract, d, len(hashes)) t.metrics.observe(d, len(hashes), nil) return nil } @@ -139,20 +93,15 @@ func (t *txhashCold) Finalize(_ context.Context) error { }) err := txhash.WriteColdBin(t.binPath, t.entries) if err == nil { - t.metrics.sink.IngestStage(dataTypeTxhash, tierCold, stageFinalize, time.Since(start), len(t.entries)) + t.metrics.sink.IngestStage(dataTypeTxhash, stageFinalize, time.Since(start), len(t.entries)) } t.metrics.emit(time.Since(start), err) return err } -// Close emits the cold metrics if Finalize never ran (the failure path); emit is -// a no-op after Finalize. There is no open file handle to release (the .bin is -// written in Finalize). +// Close is a no-op: there is no open file handle to release (the .bin is written +// in Finalize), and the cold metric is emitted on a terminal Ingest error or in +// Finalize — never here, so a rolled-back build produces no phantom sample. func (t *txhashCold) Close() error { - t.metrics.emit(0, nil) return nil } - -// abortMetric records a synthetic abort error so a subsequent Close emit does -// not look like a clean success. Used by the constructor-rollback path. -func (t *txhashCold) abortMetric(err error) { t.metrics.recordErr(err) } diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/discard_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/discard_test.go new file mode 100644 index 000000000..fd6253929 --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/discard_test.go @@ -0,0 +1,30 @@ +package lifecycle + +import ( + "os" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" +) + +// TestDiscardHotTier_RemovesDirAndKey retires the bracket: the key is deleted +// and the dir is gone. A second discard is a no-op. +func TestDiscardHotTier_RemovesDirAndKey(t *testing.T) { + cat, _ := testCatalog(t) + c := chunk.ID(4) + db := openLiveHotDB(t, cat, c) + require.NoError(t, db.Close()) + + require.NoError(t, cat.DiscardHotChunk(c)) + + has, err := hotKeyExists(cat, c) + require.NoError(t, err) + assert.False(t, has, "the hot key is deleted") + _, statErr := os.Stat(cat.Layout().HotChunkPath(c)) + assert.True(t, os.IsNotExist(statErr), "the dir is removed") + + require.NoError(t, cat.DiscardHotChunk(c), "second discard is a no-op") +} diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/eligibility.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/eligibility.go new file mode 100644 index 000000000..4db9c212c --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/eligibility.go @@ -0,0 +1,157 @@ +package lifecycle + +import ( + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" +) + +// The discard and prune eligibility scans. Each returns zero-arg op closures the +// tick calls in order. Both are PURE READS — eligibility comes from durable keys +// alone, so re-running against the same snapshot yields nothing (quiescence). + +// eligibleDiscardOps returns a discard closure per hot DB the cold artifacts now +// fully serve (or that fell past retention). Per chunk: below the floor → discard; +// complete (last <= through), nothing pending, and the index covers it → discard; +// otherwise (live, or frozen awaiting coverage) → leave alone. +// catalog.DiscardHotChunk is idempotent, so a crash between freeze and discard +// self-heals next tick. +func eligibleDiscardOps(cat *catalog.Catalog, gate RetentionFloor, through uint32) ([]func() error, error) { + hot, err := cat.HotChunkKeys() + if err != nil { + return nil, err + } + + var ops []func() error + for _, c := range hot { + last := c.LastLedger() + switch { + case gate.Excludes(c): + ops = append(ops, func() error { return cat.DiscardHotChunk(c) }) + case last <= through: + // Coverage is read once here and passed into pendingArtifacts — the + // discard requires covers independently, so the whole predicate is + // ledgers-frozen && events-frozen && covers. + covers, cerr := cat.FrozenIndexCovers(c) + if cerr != nil { + return nil, cerr + } + pending, perr := pendingArtifacts(c, cat, covers) + if perr != nil { + return nil, perr + } + if pending.Empty() && covers { + ops = append(ops, func() error { return cat.DiscardHotChunk(c) }) + } + // else: frozen awaiting coverage, or still producing — leave alone. + } + // default (last > through): the live chunk or above — ingestion's, not ours. + } + return ops, nil +} + +// pendingArtifacts lists which outputs chunk still needs: ledgers and events must +// be frozen; txhash/.bin is exempt when the window's index already covers the +// chunk (covers, computed by the caller — after finalization the chunk:c:txhash +// key is demoted/swept, so regenerating the .bin would orphan it). +func pendingArtifacts(c chunk.ID, cat *catalog.Catalog, covers bool) (catalog.ArtifactSet, error) { + var need catalog.ArtifactSet + for _, kind := range []geometry.Kind{geometry.KindLedgers, geometry.KindEvents} { + state, err := cat.State(c, kind) + if err != nil { + return need, err + } + if state != geometry.StateFrozen { + need = need.Add(kind) + } + } + txState, err := cat.State(c, geometry.KindTxHash) + if err != nil { + return need, err + } + if txState != geometry.StateFrozen && !covers { + need = need.Add(geometry.KindTxHash) + } + return need, nil +} + +// eligiblePruneOps is the system's only file-deleter, key-driven, covering both +// key families. It returns sweep closures (SweepTxHashIndexKey per index key, one +// batched SweepChunkArtifacts for the chunk family). "Below the floor" is the +// gate predicate shared with the discard scan and read path, so prune deletes +// exactly what the reader has stopped admitting. +// The second return is the total number of artifacts the ops will sweep (one per +// index-key op plus every ref in the single batched chunk sweep), so the caller +// meters Prune in artifacts — the same unit the Phase 1 sweep reports — rather +// than in op closures (the chunk family collapses N artifacts into one op). +func eligiblePruneOps(cat *catalog.Catalog, gate RetentionFloor) ([]func() error, int, error) { + var ops []func() error + artifacts := 0 + + // Index family: transient debris from any window, plus frozen keys below the floor. + idxKeys, err := cat.AllTxHashIndexKeys() + if err != nil { + return nil, 0, err + } + for _, cov := range idxKeys { + switch { + case cov.State == geometry.StateFreezing || cov.State == geometry.StatePruning: + // Transient debris (a crashed build or unfinished demotion). Safe only + // because no build is in flight when this scan runs (it follows + // executePlan's return, and backfill finishes before the loop starts). + ops = append(ops, func() error { return cat.SweepTxHashIndexKey(cov) }) + artifacts++ + case gate.Excludes(cat.TxHashIndexLayout().LastChunk(cov.Index)): + // Frozen index key below the floor; the sweep demotes it first. + ops = append(ops, func() error { return cat.SweepTxHashIndexKey(cov) }) + artifacts++ + } + } + + // Chunk family: swept in one batch. + refs, err := cat.ChunkArtifactKeys() + if err != nil { + return nil, 0, err + } + var sweep []catalog.ArtifactRef + for _, ref := range refs { + switch { + case gate.Excludes(ref.Chunk): + // Past retention: any state goes. + sweep = append(sweep, ref) + case ref.State == geometry.StatePruning: + // In-retention .bin demoted by its window's terminal commit batch. + sweep = append(sweep, ref) + case ref.Kind == geometry.KindTxHash: + // A frozen/freezing chunk:c:txhash inside a FINALIZED window: re-derived + // (or left mid-write) by a widening backfill that crashed before its + // terminal rebuild, then abandoned when retention narrowed. The terminal + // .idx provably covers the chunk and is never re-materialized, so it's + // redundant. + redundant, rerr := txhashRedundantInFinalizedWindow(cat, ref.Chunk) + if rerr != nil { + return nil, 0, rerr + } + if redundant { + sweep = append(sweep, ref) + } + } + } + if len(sweep) > 0 { + ops = append(ops, func() error { return cat.SweepChunkArtifacts(sweep) }) + artifacts += len(sweep) + } + return ops, artifacts, nil +} + +// txhashRedundantInFinalizedWindow reports whether c's window has a TERMINAL +// frozen index coverage (Hi == the window's last chunk) — the branch that makes +// INV-2's no-leftover-txhash-keys clause self-healing, not merely auditable. +func txhashRedundantInFinalizedWindow(cat *catalog.Catalog, c chunk.ID) (bool, error) { + w := cat.TxHashIndexLayout().TxHashIndexID(c) + fk, ok, err := cat.FrozenTxHashIndex(w) + if err != nil { + return false, err + } + return ok && cat.TxHashIndexLayout().IsTerminalCoverage(fk), nil +} diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/helpers_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/helpers_test.go new file mode 100644 index 000000000..09bc2bad1 --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/helpers_test.go @@ -0,0 +1,156 @@ +package lifecycle + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/require" + + supportlog "github.com/stellar/go-stellar-sdk/support/log" + "github.com/stellar/go-stellar-sdk/xdr" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/metastore" +) + +// This file provides the shared test scaffolding the lifecycle tests need. The +// catalog/fixture helpers are copied verbatim from the root fullhistory package's +// helpers_test.go (which still serves the root tests). The hot-tier helpers +// (openHotDBForChunk / openLiveHotDB) create the SAME on-disk "ready" hot DBs the +// real daemon does, so the lifecycle tick freezes and the watermark refinement +// read the genuine hot DBs by path (the way production does after #22). + +// testCPI is the tx-hash index width tests build layouts with; equals the +// production constant so on-disk geometry reads back identically. +const testCPI = geometry.ChunksPerTxhashIndex + +func silentLogger() *supportlog.Entry { + var buf bytes.Buffer + log := supportlog.New() + log.SetLevel(logrus.DebugLevel) + log.SetOutput(&buf) + return log +} + +// newTestCatalog builds a Catalog over a real metastore on temp dirs with +// cpi-wide tx-hash indexes; returns the catalog and artifact root (the store is +// closed via t.Cleanup). +func newTestCatalog(t *testing.T, cpi uint32) (*catalog.Catalog, string) { + t.Helper() + metaDir := t.TempDir() + artifactRoot := t.TempDir() + + store, err := metastore.New(filepath.Join(metaDir, "rocksdb"), silentLogger()) + require.NoError(t, err) + t.Cleanup(func() { _ = store.Close() }) + + idxLayout, err := geometry.NewTxHashIndexLayout(cpi) + require.NoError(t, err) + + return catalog.NewCatalog(store, geometry.NewLayout(artifactRoot), idxLayout), artifactRoot +} + +// testCatalog builds a catalog with the default (wide) tx-hash index, returning it +// and the artifact root. +func testCatalog(t *testing.T) (*catalog.Catalog, string) { + t.Helper() + cat, root := newTestCatalog(t, testCPI) + return cat, root +} + +// smallTxHashIndexCatalog builds a test catalog whose indexes are cpi chunks +// wide, so a "terminal" (full-index) build needs only a few chunks. Returns the +// catalog and the artifact root. +func smallTxHashIndexCatalog(t *testing.T, cpi uint32) (*catalog.Catalog, string) { + t.Helper() + cat, root := newTestCatalog(t, cpi) + return cat, root +} + +// freezeKinds flips the given per-chunk kinds to "frozen" via the one-write protocol. +func freezeKinds(t *testing.T, cat *catalog.Catalog, chunkID chunk.ID, kinds ...geometry.Kind) { + t.Helper() + require.NoError(t, cat.MarkChunkFreezing(chunkID, kinds...)) + require.NoError(t, cat.FlipChunkFrozen(chunkID, kinds...)) +} + +// freezeCoverage marks and commits a frozen index coverage [lo, hi] for index w. +func freezeCoverage(t *testing.T, cat *catalog.Catalog, w geometry.TxHashIndexID, lo, hi chunk.ID) { + t.Helper() + cov, err := cat.MarkTxHashIndexFreezing(w, lo, hi) + require.NoError(t, err) + require.NoError(t, cat.CommitTxHashIndex(cov)) +} + +// zeroTxLCMBytes builds wire bytes of a minimal valid zero-tx V2 LedgerCloseMeta; +// zero-tx keeps a full 10k-ledger chunk pass cheap. +func zeroTxLCMBytes(t *testing.T, seq uint32) []byte { + t.Helper() + lcm := xdr.LedgerCloseMeta{ + V: 2, + V2: &xdr.LedgerCloseMetaV2{ + LedgerHeader: xdr.LedgerHeaderHistoryEntry{ + Header: xdr.LedgerHeader{ + ScpValue: xdr.StellarValue{CloseTime: xdr.TimePoint(0)}, + LedgerSeq: xdr.Uint32(seq), + }, + }, + TxSet: xdr.GeneralizedTransactionSet{ + V: 1, + V1TxSet: &xdr.TransactionSetV1{Phases: nil}, + }, + TxProcessing: nil, + }, + } + raw, err := lcm.MarshalBinary() + require.NoError(t, err) + return raw +} + +// --------------------------------------------------------------------------- +// Hot-tier test scaffolding: a test-local equivalent of the root package's hot +// DB opener (hotloop.go's openHotDBForChunk). It uses only the public +// hotchunk/catalog APIs the production code uses, so a lifecycle test creates the +// SAME on-disk "ready" hot DB the real daemon would — which the freeze and the +// watermark refinement then open by Layout path, exactly as production does. +// --------------------------------------------------------------------------- + +// openHotDBForChunk creates a "ready" shared hot DB for chunkID under the +// hot:chunk bracket (transient -> create -> ready) and returns an open handle the +// caller owns. The test equivalent of the production opener, trimmed to the +// create branch the lifecycle tests need (no crash-recovery / fsync — those edges +// are covered by the root hotloop_test.go opener tests). +func openHotDBForChunk(cat *catalog.Catalog, chunkID chunk.ID, logger *supportlog.Entry) (*hotchunk.DB, error) { + dir := cat.Layout().HotChunkPath(chunkID) + if err := os.RemoveAll(dir); err != nil { + return nil, fmt.Errorf("wipe leftover hot dir %s: %w", dir, err) + } + if err := cat.PutHotTransient(chunkID); err != nil { + return nil, fmt.Errorf("mark hot transient chunk %s: %w", chunkID, err) + } + db, err := hotchunk.Open(dir, chunkID, logger) + if err != nil { + return nil, fmt.Errorf("create hot DB chunk %s: %w", chunkID, err) + } + if err := cat.FlipHotReady(chunkID); err != nil { + _ = db.Close() + return nil, fmt.Errorf("flip hot ready chunk %s: %w", chunkID, err) + } + return db, nil +} + +// openLiveHotDB opens (and brackets ready) the live hot DB for a chunk via the +// test opener, returning the handle. +func openLiveHotDB(t *testing.T, cat *catalog.Catalog, c chunk.ID) *hotchunk.DB { + t.Helper() + db, err := openHotDBForChunk(cat, c, silentLogger()) + require.NoError(t, err) + return db +} diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/hotkeys_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/hotkeys_test.go new file mode 100644 index 000000000..e0fb16c79 --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/hotkeys_test.go @@ -0,0 +1,54 @@ +package lifecycle + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" +) + +// writeArtifact writes a placeholder artifact file at path (creating parents), +// so a test can assert presence/absence around the catalog protocol. +func writeArtifact(t *testing.T, path string) { + t.Helper() + require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) + require.NoError(t, os.WriteFile(path, []byte("artifact"), 0o644)) +} + +// hotKeyExists reports whether chunk c has a hot:chunk key (any value). The +// catalog's key existence read is unexported; this is the streaming-package test +// shim over the public HotState ("" ⇒ absent). +func hotKeyExists(cat *catalog.Catalog, c chunk.ID) (bool, error) { + s, err := cat.HotState(c) + return s != "", err +} + +func TestRoundTripHotKeys(t *testing.T) { + cat, _ := testCatalog(t) + + state, err := cat.HotState(7) + require.NoError(t, err) + require.Equal(t, geometry.HotState(""), state) + + require.NoError(t, cat.PutHotTransient(7)) + state, err = cat.HotState(7) + require.NoError(t, err) + require.Equal(t, geometry.HotTransient, state) + + require.NoError(t, cat.FlipHotReady(7)) + state, err = cat.HotState(7) + require.NoError(t, err) + require.Equal(t, geometry.HotReady, state) + + require.NoError(t, cat.DeleteHotKey(7)) + state, err = cat.HotState(7) + require.NoError(t, err) + require.Equal(t, geometry.HotState(""), state) + // Idempotent on a missing key. + require.NoError(t, cat.DeleteHotKey(7)) +} diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle.go new file mode 100644 index 000000000..3046a034c --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle.go @@ -0,0 +1,228 @@ +package lifecycle + +import ( + "context" + "fmt" + "sync/atomic" + "time" + + "github.com/cenkalti/backoff/v4" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/backfill" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/observability" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" +) + +// The lifecycle tick runs three stages in order: (1) plan-and-execute (the same +// resolve+executePlan as backfill, over [floor, lastChunk]); (2) discard scan; +// (3) prune scan. The tick is a pure function of the catalog — the two goroutines +// share no state. +// +// The retention floor has two roles with OPPOSITE safe directions (design +// "Lifecycle"): as a RETENTION boundary erring low is harmless (an extra chunk +// lingers, or a read returns not-found via the missing-file rule); as a +// PRODUCTION boundary erring low would in principle plan a build below existing +// storage — but producibility is enforced lazily per chunk in resolve, so the +// plan simply spans [floor, lastChunk] and extending the bottom is backfill's job. + +// Config bundles the tick/loop dependencies. It composes the scheduler's +// ExecConfig (shared postconditions + worker pool with backfill) plus the +// retention knob. +type Config struct { + backfill.ExecConfig + + // RetentionChunks bounds the sliding retention floor's width. 0 disables the + // sliding floor (the fixed earliest-ledger floor alone applies). + RetentionChunks uint32 + + // OpRetryAttempts / OpRetryBackoff bound the per-op retry the discard/prune + // sweeps use (see runOps). Zero values fall back to defaults in + // WithLifecycleDefaults. + OpRetryAttempts int + OpRetryBackoff time.Duration +} + +const ( + defaultOpRetryAttempts = 3 + defaultOpRetryBackoff = 5 * time.Second +) + +// WithLifecycleDefaults returns a copy with the embedded ExecConfig defaults and +// the op-retry defaults applied. Called once at startup before launching the loop. +func (cfg Config) WithLifecycleDefaults() Config { + cfg.ExecConfig = cfg.WithDefaults() + if cfg.OpRetryAttempts < 1 { + cfg.OpRetryAttempts = defaultOpRetryAttempts + } + if cfg.OpRetryBackoff <= 0 { + cfg.OpRetryBackoff = defaultOpRetryBackoff + } + return cfg +} + +// runOps runs each op in order, retrying a failed op a bounded number of times on +// a fixed pause before giving up. The discard/prune ops are idempotent file +// deletions, so a transient failure (a busy file, a slow fsync) is exactly the +// retryable kind — retrying in place avoids canceling ingestion through the shared +// errgroup and forcing a whole-daemon restart (which relaunches captive core) for +// a retryable file operation. It checks ctx between ops (and the backoff aborts on +// ctx cancellation) so a shutdown mid-scan stops promptly; the ctx error surfaces +// up through Loop for supervise to classify as clean. +func runOps(ctx context.Context, cfg Config, ops []func() error) error { + // A zero-value Config (no WithLifecycleDefaults, e.g. a test harness) runs each + // op exactly once. + attempts := max(cfg.OpRetryAttempts, 1) + for _, op := range ops { + if err := ctx.Err(); err != nil { + return err + } + // attempts total tries == 1 initial + (attempts-1) retries, fixed pause. + //nolint:gosec // attempts >= 1, so attempts-1 >= 0 + bo := backoff.WithMaxRetries(backoff.NewConstantBackOff(cfg.OpRetryBackoff), uint64(attempts-1)) + if err := backoff.Retry(op, backoff.WithContext(bo, ctx)); err != nil { + return err + } + } + return nil +} + +// runLifecycle runs one tick over the three stages for just-completed chunk +// lastChunk. through = lastChunk.LastLedger() is the single snapshot every stage +// shares, so a boundary committing mid-tick can't make stages contradict (it's +// next tick's work). Plan range is [floor, lastChunk] (start raised to storage); +// discard/prune key off through. +// +// It returns the first stage error WITHOUT classifying it: Loop propagates it to +// run's errgroup and supervise decides clean-vs-restart (a canceled ctx surfaces +// as a ctx error supervise treats as a clean shutdown). +func runLifecycle(ctx context.Context, cfg Config, cat *catalog.Catalog, lastChunk chunk.ID) error { + metrics := observability.MetricsOrNop(cfg.Metrics) + logger := cfg.Logger + + // The one snapshot every stage shares. earliest and the retention gate are read + // and computed ONCE here (not re-derived per scan), then passed to both scans. + through := lastChunk.LastLedger() + + earliest, _, err := cat.EarliestLedger() + if err != nil { + return fmt.Errorf("read earliest ledger: %w", err) + } + floorLedger := EffectiveRetentionFloor(through, cfg.RetentionChunks, earliest) + gate := RetentionFloorAt(floorLedger) + + // Retention-floor gauge only. The last-committed gauge is owned by the ingestion + // loop (which holds the true, possibly mid-chunk value); re-emitting it here from + // the chunk-aligned `through` would regress it on every tick. + metrics.RetentionFloor(floorLedger) + logger.WithField("through", through). + WithField("floor_chunk", gate.FirstChunk().String()). + Debug("streaming: lifecycle tick — derived snapshot") + + // Stage 1 — plan-and-execute (freeze + index fold) over [floor, lastChunk], via + // the same entry point backfill uses (resolve → executePlan → Freeze metric, + // recorded internally). A canceled ctx makes RunBackfill return ctx.Err(), which + // propagates up for supervise to treat as a clean shutdown. lastChunk is always + // a completed chunk (boundary fence + post-backfill seed), so the only guard + // needed is the empty-range check (floor above lastChunk when retention outran + // production). An empty range emits no Freeze sample — the Discard/Prune samples + // below carry empty-tick visibility. + if start := gate.FirstChunk(); start <= lastChunk { + if eerr := backfill.RunBackfill(ctx, cfg.ExecConfig, start, lastChunk); eerr != nil { + return fmt.Errorf("run backfill [%s,%s]: %w", start, lastChunk, eerr) + } + } + + // Stage 2 — discard scan. + discardStart := time.Now() + discardOps, err := eligibleDiscardOps(cat, gate, through) + if err != nil { + return fmt.Errorf("eligible discard ops: %w", err) + } + if err := runOps(ctx, cfg, discardOps); err != nil { + return fmt.Errorf("discard op: %w", err) + } + metrics.Discard(len(discardOps), time.Since(discardStart)) + if len(discardOps) > 0 { + logger.WithField("discarded", len(discardOps)).Info("streaming: lifecycle discard stage complete") + } + + // Live hot-chunk gauge after the discard stage. + hot, err := cat.HotChunkKeys() + if err != nil { + return fmt.Errorf("read hot chunk keys: %w", err) + } + metrics.LiveHotChunks(len(hot)) + + // Stage 3 — prune scan. + pruneStart := time.Now() + pruneOps, prunedArtifacts, err := eligiblePruneOps(cat, gate) + if err != nil { + return fmt.Errorf("eligible prune ops: %w", err) + } + if err := runOps(ctx, cfg, pruneOps); err != nil { + return fmt.Errorf("prune op: %w", err) + } + metrics.Prune(prunedArtifacts, time.Since(pruneStart)) + if prunedArtifacts > 0 { + logger.WithField("pruned", prunedArtifacts).Info("streaming: lifecycle prune stage complete") + } + return nil +} + +// BoundarySignal couples ingestion (the producer) to the lifecycle Loop (the +// consumer): ingestion stores the latest completed chunk id and pings a +// 1-buffered wake; the Loop blocks on the wake, then reads the latest id. A +// latest-CELL (not a queue) means a slow lifecycle can never fall behind — one +// tick over [floor, latest] subsumes every skipped boundary — so there is no +// bounded buffer to overflow and thus no "fell behind" fatal path. Safe for one +// producer and one consumer. +type BoundarySignal struct { + latest atomic.Uint32 + wake chan struct{} +} + +// NewBoundarySignal returns a ready signal with an empty latest cell. +func NewBoundarySignal() *BoundarySignal { + return &BoundarySignal{wake: make(chan struct{}, 1)} +} + +// Publish records c as the latest completed chunk and wakes the Loop. The wake is +// non-blocking: a pending wake already covers this boundary (the Loop will read +// the newest latest when it runs), so a full buffer is dropped, never blocked on. +func (s *BoundarySignal) Publish(c chunk.ID) { + s.latest.Store(uint32(c)) + select { + case s.wake <- struct{}{}: + default: + } +} + +// latestChunk returns the most recently published completed chunk id. A wake is +// only ever sent by Publish, AFTER it stores the cell, so a received wake proves a +// value is present — no separate "was anything published" flag is needed. +func (s *BoundarySignal) latestChunk() chunk.ID { + return chunk.ID(s.latest.Load()) +} + +// Loop is the event-driven lifecycle goroutine. It blocks on the boundary signal's +// wake, reads the latest completed chunk id, and runs one tick over +// [floor, lastChunk] (which subsumes every boundary skipped while it was busy). It +// selects on ctx.Done() too, so it never blocks past shutdown. +// +// It returns the first tick error to its caller (run() joins it with ingestion in +// an errgroup, so supervise decides clean-vs-restart). A cancellation observed at +// the select returns nil; a cancellation mid-tick returns the tick's wrapped ctx +// error — both are clean, since supervise keys off the daemon ctx, not this return. +func Loop(ctx context.Context, cfg Config, cat *catalog.Catalog, sig *BoundarySignal) error { + for { + select { + case <-ctx.Done(): + return nil + case <-sig.wake: + if err := runLifecycle(ctx, cfg, cat, sig.latestChunk()); err != nil { + return err + } + } + } +} diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_arith_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_arith_test.go new file mode 100644 index 000000000..84d230749 --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_arith_test.go @@ -0,0 +1,94 @@ +package lifecycle + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" +) + +// --------------------------------------------------------------------------- +// Arithmetic: geometry.LastCompleteChunkAt, EffectiveRetentionFloor. +// --------------------------------------------------------------------------- + +func TestLastCompleteChunkAt(t *testing.T) { + tests := []struct { + name string + ledger uint32 + want int64 + }{ + {"below first chunk's last ledger => sentinel -1", chunk.ID(0).LastLedger() - 1, -1}, + {"genesis sentinel (FirstLedgerSeq-1) => -1", chunk.FirstLedgerSeq - 1, -1}, + {"ledger 0 does not underflow => -1", 0, -1}, + {"chunk 0's last ledger => 0", chunk.ID(0).LastLedger(), 0}, + {"chunk 0's last ledger + 1 (into chunk 1) => still 0", chunk.ID(0).LastLedger() + 1, 0}, + {"chunk 5's last ledger => 5", chunk.ID(5).LastLedger(), 5}, + {"the doc's example 10_001 => 0", 10_001, 0}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.want, geometry.LastCompleteChunkAt(tc.ledger)) + }) + } +} + +func TestEffectiveRetentionFloor(t *testing.T) { + genesis := uint32(chunk.FirstLedgerSeq) + tests := []struct { + name string + upperBound uint32 + retentionChunks uint32 + earliest uint32 + want uint32 + }{ + { + name: "no sliding (retention 0): earliest floor wins", + upperBound: chunk.ID(100).LastLedger(), + retentionChunks: 0, + earliest: chunk.ID(10).FirstLedger(), + want: chunk.ID(10).FirstLedger(), + }, + { + name: "no sliding, no earliest pin: genesis", + upperBound: chunk.ID(100).LastLedger(), + retentionChunks: 0, + earliest: 0, + want: genesis, + }, + { + name: "sliding floor leads when above earliest", + upperBound: chunk.ID(100).LastLedger(), // last complete chunk = 100 + retentionChunks: 10, // floor chunk = 100-10+1 = 91 + earliest: 0, + want: chunk.ID(91).FirstLedger(), + }, + { + name: "earliest floor leads when above the sliding floor", + upperBound: chunk.ID(100).LastLedger(), + retentionChunks: 10, // sliding floor chunk = 91 + earliest: chunk.ID(95).FirstLedger(), // higher + want: chunk.ID(95).FirstLedger(), + }, + { + name: "retention wider than history clamps to chunk 0, never wraps", + upperBound: chunk.ID(3).LastLedger(), + retentionChunks: 1000, // sliding chunk = 3-1000+1 < 0 => clamp to chunk 0 + earliest: 0, + want: chunk.ID(0).FirstLedger(), + }, + { + name: "young store (upperBound below first chunk) clamps to chunk 0", + upperBound: chunk.FirstLedgerSeq + 5, // no complete chunk yet + retentionChunks: 5, + earliest: 0, + want: chunk.ID(0).FirstLedger(), + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.want, EffectiveRetentionFloor(tc.upperBound, tc.retentionChunks, tc.earliest)) + }) + } +} diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_helpers_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_helpers_test.go new file mode 100644 index 000000000..72c8471f6 --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_helpers_test.go @@ -0,0 +1,192 @@ +package lifecycle + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/stellar/go-stellar-sdk/keypair" + "github.com/stellar/go-stellar-sdk/network" + "github.com/stellar/go-stellar-sdk/xdr" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/backfill" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" +) + +// lifecyclePassphrase is the network passphrase the one-tx fixture hashes +// against (any stable value works; the index only needs deterministic hashes). +const lifecyclePassphrase = network.PublicNetworkPassphrase + +// oneTxLCMRand builds the wire bytes of a V2 LedgerCloseMeta carrying ONE +// transaction for seq, so a chunk ingested with at least one such ledger yields +// a NON-empty txhash .bin — streamhash refuses to build a cold index over zero +// keys (txhash.ErrEmptyBuildSet), so a fully zero-tx chunk cannot exercise the +// real index fold. Mirrors ingest_test's buildLCMReturningHashes, trimmed to one +// tx. +func oneTxLCMRand(t *testing.T, seq uint32) []byte { + t.Helper() + envelope := xdr.TransactionEnvelope{ + Type: xdr.EnvelopeTypeEnvelopeTypeTx, + V1: &xdr.TransactionV1Envelope{ + Tx: xdr.Transaction{ + SourceAccount: xdr.MustMuxedAddress(keypair.MustRandom().Address()), + Ext: xdr.TransactionExt{V: 1, SorobanData: &xdr.SorobanTransactionData{}}, + }, + }, + } + hash, err := network.HashTransactionInEnvelope(envelope, lifecyclePassphrase) + require.NoError(t, err) + + comp := []xdr.TxSetComponent{{ + Type: xdr.TxSetComponentTypeTxsetCompTxsMaybeDiscountedFee, + TxsMaybeDiscountedFee: &xdr.TxSetComponentTxsMaybeDiscountedFee{ + Txs: []xdr.TransactionEnvelope{envelope}, + }, + }} + opResults := []xdr.OperationResult{} + lcm := xdr.LedgerCloseMeta{ + V: 2, + V2: &xdr.LedgerCloseMetaV2{ + LedgerHeader: xdr.LedgerHeaderHistoryEntry{ + Header: xdr.LedgerHeader{ + ScpValue: xdr.StellarValue{CloseTime: xdr.TimePoint(0)}, + LedgerSeq: xdr.Uint32(seq), + }, + }, + TxSet: xdr.GeneralizedTransactionSet{ + V: 1, + V1TxSet: &xdr.TransactionSetV1{Phases: []xdr.TransactionPhase{{V: 0, V0Components: &comp}}}, + }, + TxProcessing: []xdr.TransactionResultMetaV1{{ + TxApplyProcessing: xdr.TransactionMeta{ + V: 4, + V4: &xdr.TransactionMetaV4{Operations: []xdr.OperationMetaV2{}}, + }, + Result: xdr.TransactionResultPair{ + TransactionHash: hash, + Result: xdr.TransactionResult{ + FeeCharged: 100, + Result: xdr.TransactionResultResult{Code: xdr.TransactionResultCodeTxSuccess, Results: &opResults}, + }, + }, + }}, + }, + } + raw, err := lcm.MarshalBinary() + require.NoError(t, err) + return raw +} + +// ingestFullHotChunk creates a "ready" hot DB for chunk c and ingests every +// ledger in the chunk (all CFs, contiguous from FirstLedger), then closes the +// write handle — the post-boundary state the lifecycle freezes from. The hot +// key is left "ready" and the dir is on disk, as the boundary handoff leaves it. +func ingestFullHotChunk(t *testing.T, cat *catalog.Catalog, c chunk.ID) { + t.Helper() + db := openLiveHotDB(t, cat, c) + for seq := c.FirstLedger(); seq <= c.LastLedger(); seq++ { + // The first ledger carries one tx so the chunk's txhash .bin is non-empty + // (streamhash refuses a zero-key index); the rest stay zero-tx for speed. + var raw []byte + if seq == c.FirstLedger() { + raw = oneTxLCMRand(t, seq) + } else { + raw = zeroTxLCMBytes(t, seq) + } + _, err := db.IngestLedger(seq, xdr.LedgerCloseMetaView(raw)) + require.NoError(t, err) + } + require.NoError(t, db.Close()) // release the write handle (boundary handoff) +} + +// lifecycleTestConfig wires a Config over the real production primitives. The +// freeze reads the hot tier by opening the chunk's real on-disk DB (created by +// ingestFullHotChunk) straight from its Layout path — the same open production +// does after #22. A tick failure now surfaces as runLifecycle's returned error +// (no Fatalf), so tests assert on that error rather than a recorder. +func lifecycleTestConfig(t *testing.T, cat *catalog.Catalog, retentionChunks uint32) Config { + t.Helper() + return Config{ + ExecConfig: backfill.ExecConfig{ + Catalog: cat, + Logger: silentLogger(), + Workers: 2, + Process: backfill.ProcessConfig{}, + }, + RetentionChunks: retentionChunks, + } +} + +// lastCompleteChunkAtID maps geometry.LastCompleteChunkAt to a chunk.ID (ok=false +// on a negative result). Was a production helper until #25 (the tick now plans +// [floor, lastChunk] without it); it lives here for the tick-mirroring helpers. +func lastCompleteChunkAtID(ledger uint32) (chunk.ID, bool) { + c := geometry.LastCompleteChunkAt(ledger) + if c < 0 { + return 0, false + } + return chunk.ID(c), true +} + +// runTickForCatalog runs one lifecycle tick the way ingestion would drive it: it +// derives the highest complete chunk from the catalog (the chunk id ingestion +// hands over at a boundary) and passes it as lastChunk, returning the tick's +// error. On a young network with no complete chunk it runs no tick (returns nil) — +// mirroring production, where the boundary/seed guard upstream never triggers the +// Loop in that state. +func runTickForCatalog(ctx context.Context, t *testing.T, cfg Config, cat *catalog.Catalog) error { + t.Helper() + through, err := deriveCompleteThrough(cat) + require.NoError(t, err) + last, ok := lastCompleteChunkAtID(through) + if !ok { + return nil + } + return runLifecycle(ctx, cfg, cat, last) +} + +// makeReadyHotDirNoData opens and closes a real (empty) hot DB for c so its dir +// exists on disk and its key is "ready" — the state a discard scan inspects +// without needing a full ingest. +func makeReadyHotDirNoData(t *testing.T, cat *catalog.Catalog, c chunk.ID) { + t.Helper() + db, err := openHotDBForChunk(cat, c, silentLogger()) + require.NoError(t, err) + require.NoError(t, db.Close()) +} + +// gateFor builds the retention gate the tick passes into the eligibility scans, +// from the same (through, retention, earliest) snapshot runLifecycle uses. +func gateFor(t *testing.T, cfg Config, cat *catalog.Catalog, through uint32) RetentionFloor { + t.Helper() + earliest, _, err := cat.EarliestLedger() + require.NoError(t, err) + return NewRetentionFloor(through, cfg.RetentionChunks, earliest) +} + +// assertQuiescent re-runs the tick's three derivations against the SAME through +// snapshot and asserts none schedule work — the quiescence postcondition. +func assertQuiescent(t *testing.T, cfg Config, cat *catalog.Catalog, through uint32) { + t.Helper() + earliest, _, err := cat.EarliestLedger() + require.NoError(t, err) + gate := NewRetentionFloor(through, cfg.RetentionChunks, earliest) + start := gate.FirstChunk() + if rangeEnd, ok := lastCompleteChunkAtID(through); ok && start <= rangeEnd { + // At quiescence resolve finds an empty plan, so RunBackfill (resolve + + // executePlan) is a no-op that returns nil — even with no Backend wired, + // since an empty plan never reaches backfillSource. + perr := backfill.RunBackfill(context.Background(), cfg.ExecConfig, start, rangeEnd) + assert.NoError(t, perr, "re-running backfill schedules no work at quiescence") + } + dops, err := eligibleDiscardOps(cat, gate, through) + require.NoError(t, err) + assert.Empty(t, dops, "re-scan finds no discard work at quiescence") + pops, _, err := eligiblePruneOps(cat, gate) + require.NoError(t, err) + assert.Empty(t, pops, "re-scan finds no prune work at quiescence") +} diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_loop_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_loop_test.go new file mode 100644 index 000000000..d8702a11e --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_loop_test.go @@ -0,0 +1,114 @@ +package lifecycle + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" +) + +// --------------------------------------------------------------------------- +// Loop: selects on BOTH ctx.Done and the boundary signal's wake; reads the +// most-recent published chunk id from the latest-cell. +// --------------------------------------------------------------------------- + +// TestLifecycleLoop_RunsTickPerNotifyThenStopsOnCtx: a boundary signal (a completed +// chunk id) runs a tick; a ctx cancellation returns the loop. The loop never +// blocks forever and never fatals on shutdown. +func TestLifecycleLoop_RunsTickPerNotifyThenStopsOnCtx(t *testing.T) { + cat, _ := smallTxHashIndexCatalog(t, 1) + cfg := lifecycleTestConfig(t, cat, 0) + + // Make the tick observable WITHOUT a slow full ingest: chunk 0 is already + // fully frozen and folded into its (terminal, cpi=1) window, with a leftover + // "ready" hot DB on disk. The plan stage is a no-op; the discard scan retires + // chunk 0's hot DB. A live chunk 1 keeps chunk 0 below the partition. + freezeKinds(t, cat, 0, geometry.KindLedgers, geometry.KindEvents, geometry.KindTxHash) + freezeCoverage(t, cat, cat.TxHashIndexLayout().TxHashIndexID(0), 0, 0) // terminal coverage of chunk 0 + makeReadyHotDirNoData(t, cat, 0) + live := openLiveHotDB(t, cat, 1) + t.Cleanup(func() { _ = live.Close() }) + + sig := NewBoundarySignal() + ctx, cancel := context.WithCancel(context.Background()) + done := make(chan error, 1) + go func() { done <- Loop(ctx, cfg, cat, sig) }() + + sig.Publish(chunk.ID(0)) // ingestion hands over the just-completed chunk 0 + require.Eventually(t, func() bool { + has, err := hotKeyExists(cat, 0) + return err == nil && !has + }, 10*time.Second, 20*time.Millisecond, "the signal ran a tick that discarded chunk 0") + + cancel() + select { + case err := <-done: + require.NoError(t, err, "a ctx-canceled Loop is a clean return") + case <-time.After(5 * time.Second): + t.Fatal("the loop did not return on ctx cancellation") + } +} + +// TestLifecycleLoop_DrainsToMostRecent: the latest-cell coalesces rapid +// boundaries — publishing 0 then 1 lands a tick over the most-recent (chunk 1) +// that subsumes chunk 0. With chunks 0 and 1 both frozen+covered and a live chunk +// 2, both are discarded (whether that takes one coalesced tick or two). +func TestLifecycleLoop_DrainsToMostRecent(t *testing.T) { + cat, _ := smallTxHashIndexCatalog(t, 1) + cfg := lifecycleTestConfig(t, cat, 0) + + for c := chunk.ID(0); c <= 1; c++ { + freezeKinds(t, cat, c, geometry.KindLedgers, geometry.KindEvents, geometry.KindTxHash) + freezeCoverage(t, cat, cat.TxHashIndexLayout().TxHashIndexID(c), c, c) + makeReadyHotDirNoData(t, cat, c) + } + live := openLiveHotDB(t, cat, 2) + t.Cleanup(func() { _ = live.Close() }) + + sig := NewBoundarySignal() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + done := make(chan error, 1) + go func() { done <- Loop(ctx, cfg, cat, sig) }() + + sig.Publish(chunk.ID(0)) + sig.Publish(chunk.ID(1)) // latest-cell coalesces: a tick over [floor, 1] discards both + require.Eventually(t, func() bool { + h0, e0 := hotKeyExists(cat, 0) + h1, e1 := hotKeyExists(cat, 1) + return e0 == nil && e1 == nil && !h0 && !h1 + }, 10*time.Second, 20*time.Millisecond, "one drained tick discarded both completed chunks") + + cancel() + select { + case err := <-done: + require.NoError(t, err, "a ctx-canceled Loop is a clean return") + case <-time.After(5 * time.Second): + t.Fatal("the loop did not return on ctx cancellation") + } +} + +// TestLifecycleLoop_ReturnsImmediatelyOnAlreadyCancelledCtx: an already-canceled +// ctx makes the loop return without running any tick (never blocks on the +// channel forever). +func TestLifecycleLoop_ReturnsImmediatelyOnAlreadyCancelledCtx(t *testing.T) { + cat, _ := smallTxHashIndexCatalog(t, 1) + cfg := lifecycleTestConfig(t, cat, 0) + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + sig := NewBoundarySignal() // never published to + done := make(chan error, 1) + go func() { done <- Loop(ctx, cfg, cat, sig) }() + select { + case err := <-done: + require.NoError(t, err, "an already-canceled ctx is a clean return") + case <-time.After(5 * time.Second): + t.Fatal("the loop blocked instead of observing the canceled ctx") + } +} diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_test.go new file mode 100644 index 000000000..3d3398d11 --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/lifecycle_test.go @@ -0,0 +1,209 @@ +package lifecycle + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" +) + +// --------------------------------------------------------------------------- +// End-to-end tick harness: real catalog + real hotchunk DBs. +// --------------------------------------------------------------------------- + +// TestRunLifecycleTick_BoundaryFreezesFoldsDiscards is the "one boundary, end to +// end" walk: chunk 0 just closed (its full hot DB is on disk, ready), chunk 1 is +// the new live chunk. One tick must: +// - freeze chunk 0's cold artifacts FROM its hot DB (via processChunk's hot +// branch), +// - fold chunk 0 into its window's index (terminal coverage, cpi=1), +// - discard chunk 0's hot DB (cold artifacts now fully serve it), +// - leave the live chunk 1 untouched. +// +// Then re-running the tick is a no-op (quiescence). +func TestRunLifecycleTick_BoundaryFreezesFoldsDiscards(t *testing.T) { + // full-chunk ingest on an isolated TempDir/catalog; overlaps the other heavy + // tests to fit the gate's go-test timeout. + t.Parallel() + cat, _ := smallTxHashIndexCatalog(t, 1) // window w == chunk w; a one-chunk window finalizes immediately + cfg := lifecycleTestConfig(t, cat, 0) + + // Chunk 0: just-closed, full hot DB on disk. Chunk 1: the new live chunk. + ingestFullHotChunk(t, cat, 0) + live := openLiveHotDB(t, cat, 1) // the live chunk's hot DB (held open by "ingestion") + t.Cleanup(func() { _ = live.Close() }) + + require.NoError(t, runTickForCatalog(context.Background(), t, cfg, cat), "a healthy tick never fails") + + // Chunk 0's cold artifacts are all frozen. + for _, kind := range []geometry.Kind{geometry.KindLedgers, geometry.KindEvents} { + state, err := cat.State(0, kind) + require.NoError(t, err) + assert.Equal(t, geometry.StateFrozen, state, "chunk 0 %s frozen", kind) + } + // The window's index is terminal and covers chunk 0. + covered, err := cat.FrozenIndexCovers(0) + require.NoError(t, err) + assert.True(t, covered, "the window index folded chunk 0 in") + fk, ok, err := cat.FrozenTxHashIndex(cat.TxHashIndexLayout().TxHashIndexID(0)) + require.NoError(t, err) + require.True(t, ok) + assert.True(t, cat.TxHashIndexLayout().IsTerminalCoverage(fk), "a one-chunk window is terminal") + + // Chunk 0's hot DB is discarded (cold artifacts fully serve it). + has, err := hotKeyExists(cat, 0) + require.NoError(t, err) + assert.False(t, has, "chunk 0's hot key is gone") + + // The live chunk 1 is untouched: its hot key still "ready", no cold artifacts. + hotState, err := cat.HotState(1) + require.NoError(t, err) + assert.Equal(t, geometry.HotReady, hotState, "the live chunk's hot key is untouched") + lfs1, err := cat.State(1, geometry.KindLedgers) + require.NoError(t, err) + assert.Equal(t, geometry.State(""), lfs1, "the live chunk is not frozen") + + // Quiescence: re-running the tick produces no work. + through, err := deriveCompleteThrough(cat) + require.NoError(t, err) + assertQuiescent(t, cfg, cat, through) +} + +// TestRunLifecycleTick_DiscardGatedOnIndexCoverage: a complete chunk whose cold +// ledgers+events are frozen but whose window index does NOT yet cover it keeps its +// hot DB (it still serves tx lookups). Only once a terminal coverage exists does +// the discard fire. cpi=2 so a single chunk does NOT finalize the window. +func TestRunLifecycleTick_DiscardGatedOnIndexCoverage(t *testing.T) { + cat, _ := smallTxHashIndexCatalog(t, 2) // window 0 = chunks [0,1] + cfg := lifecycleTestConfig(t, cat, 0) + + // Pre-freeze chunk 0's ledgers+events+txhash directly (no hot dependence), and + // leave it with a "ready" hot DB on disk. The window is NOT finalized (cpi=2, + // only chunk 0 present), so no terminal coverage exists. + freezeKinds(t, cat, 0, geometry.KindLedgers, geometry.KindEvents, geometry.KindTxHash) + makeReadyHotDirNoData(t, cat, 0) + // A live chunk 1 above it so chunk 0 is below the partition boundary. + require.NoError(t, cat.PutHotTransient(1)) + + through := chunk.ID(0).LastLedger() // chunk 0 complete via cold + // txhash is frozen, ledgers/events frozen, but the window has no FROZEN coverage + // yet => indexCovers(0) is false => NOT discarded (still needed for lookups via + // its .bin/hot DB until the index folds it in). + ops, err := eligibleDiscardOps(cat, gateFor(t, cfg, cat, through), through) + require.NoError(t, err) + require.Empty(t, ops, "no index coverage yet: the hot DB stays") + + // Now finalize the window's index so it covers chunk 0 (terminal needs chunk + // 1's .bin too; build a non-terminal-but-covering frozen coverage [0,0]). + freezeCoverage(t, cat, 0, 0, 0) + covered, err := cat.FrozenIndexCovers(0) + require.NoError(t, err) + require.True(t, covered) + + ops, err = eligibleDiscardOps(cat, gateFor(t, cfg, cat, through), through) + require.NoError(t, err) + require.Len(t, ops, 1, "covered + nothing pending => discard eligible") + require.NoError(t, ops[0]()) + + has, err := hotKeyExists(cat, 0) + require.NoError(t, err) + assert.False(t, has, "the now-covered chunk's hot DB is discarded") +} + +// TestRunLifecycleTick_PastFloorPrune: a chunk wholly below the effective +// retention floor has its artifact files and hot DB swept, regardless of state. +func TestRunLifecycleTick_PastFloorPrune(t *testing.T) { + cat, _ := smallTxHashIndexCatalog(t, 1) + cfg := lifecycleTestConfig(t, cat, 2) // retain ~2 chunks + + // CompleteThrough will be chunk 5's last ledger (positional: live chunk 6). + // floor = geometry.LastCompleteChunkAt(through)-retention+1 = 5-2+1 = chunk 4's first + // ledger. So chunks 0..3 are wholly past the floor and must be swept. + for c := chunk.ID(0); c <= 5; c++ { + freezeKinds(t, cat, c, geometry.KindLedgers, geometry.KindEvents, geometry.KindTxHash) + writeArtifact(t, cat.Layout().LedgerPackPath(c)) + freezeCoverage(t, cat, cat.TxHashIndexLayout().TxHashIndexID(c), c, c) // each one-chunk window terminal + } + // A past-floor hot DB too (chunk 1). + makeReadyHotDirNoData(t, cat, 1) + live := openLiveHotDB(t, cat, 6) // live chunk + t.Cleanup(func() { _ = live.Close() }) + + through, err := deriveCompleteThrough(cat) + require.NoError(t, err) + require.Equal(t, chunk.ID(5).LastLedger(), through) + floor := EffectiveRetentionFloor(through, cfg.RetentionChunks, 0) + require.Equal(t, chunk.ID(4).FirstLedger(), floor, "floor anchors 2 chunks back") + + require.NoError(t, runTickForCatalog(context.Background(), t, cfg, cat), "prune tick never fails") + + // Chunks 0..3 (wholly below the floor) are gone: keys and files. + for c := chunk.ID(0); c <= 3; c++ { + ledgers, serr := cat.State(c, geometry.KindLedgers) + require.NoError(t, serr) + assert.Equal(t, geometry.State(""), ledgers, "chunk %s ledgers key swept", c) + assert.NoFileExists(t, cat.Layout().LedgerPackPath(c), "chunk %s pack swept", c) + has, herr := hotKeyExists(cat, c) + require.NoError(t, herr) + assert.False(t, has, "chunk %s hot key swept", c) + } + // Chunk 4 (the floor chunk) and 5 are within retention and survive. + for c := chunk.ID(4); c <= 5; c++ { + ledgers, serr := cat.State(c, geometry.KindLedgers) + require.NoError(t, serr) + assert.Equal(t, geometry.StateFrozen, ledgers, "chunk %s in retention survives", c) + } + + assertQuiescent(t, cfg, cat, through) +} + +// TestRunLifecycleTick_PrunesTransientIndexDebris: a "freezing" index key (a +// crashed build attempt) is swept regardless of window, even within retention. +func TestRunLifecycleTick_PrunesTransientIndexDebris(t *testing.T) { + cat, _ := smallTxHashIndexCatalog(t, 2) + cfg := lifecycleTestConfig(t, cat, 0) + + // A crashed build left a "freezing" coverage key (no commit). + _, err := cat.MarkTxHashIndexFreezing(0, 0, 0) + require.NoError(t, err) + + through, err := deriveCompleteThrough(cat) + require.NoError(t, err) + ops, artifacts, err := eligiblePruneOps(cat, gateFor(t, cfg, cat, through)) + require.NoError(t, err) + require.Len(t, ops, 1, "the freezing debris is swept") + require.Equal(t, 1, artifacts, "one index artifact swept") + require.NoError(t, ops[0]()) + + covs, err := cat.AllTxHashIndexKeys() + require.NoError(t, err) + require.Empty(t, covs, "the freezing index key is gone") +} + +// --------------------------------------------------------------------------- +// ERROR PLUMBING: a failing tick RETURNS its error (no Fatalf / os.Exit). +// supervise — not the tick — classifies ctx-cancel-is-clean vs restart (tested at +// the daemon level: TestRunDaemon_LoadValidateWireStartCleanShutdown, TestSupervise_*). +// --------------------------------------------------------------------------- + +// TestRunLifecycleTick_FailureReturnsError: when a plan op fails, runLifecycle +// returns the wrapped error rather than aborting the process — so Loop can +// propagate it up through the errgroup to supervise. The chunk-0 build is +// GENUINELY unproducible: chunk 0 sits below a READY live chunk 1 (so it counts as +// complete and the plan range [0,0] is non-empty), has no frozen artifacts, and +// its hot key is "transient" (not a ready read source). With no bulk Backend +// configured, backfillSource has no source for chunk 0 and RunBackfill fails; +// MaxRetries defaults to 0, so it fails fast. +func TestRunLifecycleTick_FailureReturnsError(t *testing.T) { + cat, _ := smallTxHashIndexCatalog(t, 1) + cfg := lifecycleTestConfig(t, cat, 0) // hot tier read by path, no Backend + readyHot(t, cat, 1) // ready live chunk => through = chunk 0 last ledger + require.NoError(t, cat.PutHotTransient(0)) // chunk 0 below live, no frozen artifacts, not a ready source + + err := runLifecycle(context.Background(), cfg, cat, 0) // plan range [0,0], the failing build + require.Error(t, err, "a genuine op failure surfaces up the call stack") +} diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress.go new file mode 100644 index 000000000..d25e62224 --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress.go @@ -0,0 +1,163 @@ +package lifecycle + +import ( + "fmt" + + supportlog "github.com/stellar/go-stellar-sdk/support/log" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk" +) + +// Progress is derived, never stored. "Highest complete chunk" arithmetic runs in +// int64 (-1 = "nothing complete") to avoid uint32 wraparound on the pre-genesis +// sentinel; geometry.CompleteThrough is the chokepoint (the signed chunk↔ledger +// maps live in geometry so there is one -1 convention across the daemon). + +// LastCommittedLedger is the single highest-durably-committed-ledger derivation. +// It maxes three terms, each in the signed domain so a fresh/young store never +// underflows to MaxUint32: +// +// - COLD — highest chunk with all artifacts durable (highestDurableChunk; -1 on +// a fresh start). Leads at startup before any hot key exists. +// - HOT — only when hot > cold, over "ready" keys: one read-only MaxCommittedSeq +// read of the highest ready hot DB (empty DB ⇒ positional CompleteThrough(hot-1)). +// The read-only open takes no RocksDB LOCK, so it never contends with a writer; +// in practice it runs before ingestion opens the live chunk anyway. +// - FLOOR — EarliestLedger()-1 as int64(earliest)-1, so an absent/zero pin +// yields the pre-genesis sentinel rather than underflowing. +// +// logger is required (hotchunk.OpenReadOnly needs it); there is no logger-less +// mode — the tick derives the frontier the same way startup does. +func LastCommittedLedger(cat *catalog.Catalog, logger *supportlog.Entry) (uint32, error) { + cold, err := highestDurableChunk(cat) + if err != nil { + return 0, err + } + through := geometry.CompleteThrough(cold) + + hot, err := highestReadyChunkSigned(cat) + if err != nil { + return 0, err + } + if hot > cold { + // One refinement read of the highest ready hot DB; loss detected lazily on + // this open (no eager scan over every ready key). + refined, rerr := refineWithHotDB(cat, logger, hot) + if rerr != nil { + return 0, rerr + } + through = max(through, refined) + } + + earliest, ok, err := cat.EarliestLedger() + if err != nil { + return 0, err + } + if ok { + // int64 before the -1 so a zero/genesis pin does not underflow. + floor := max(int64(earliest)-1, 0) + through = max(through, uint32(floor)) //nolint:gosec // floor in [0, MaxUint32), fits uint32 + } + + return through, nil +} + +// refineWithHotDB opens the highest ready hot chunk read-only straight from its +// Layout path and returns its MaxCommittedSeq, or CompleteThrough(live-1) on an +// empty DB. A "ready" key whose dir/DB is gone surfaces as an ordinary +// (restartable) error — the read-only open never auto-heals it into a fresh empty +// DB. A read-only open replays any crash-left synced WAL into memtables, so +// MaxCommittedSeq is correct even after an ungraceful crash. +func refineWithHotDB(cat *catalog.Catalog, logger *supportlog.Entry, live int64) (uint32, error) { + id := chunk.ID(live) //nolint:gosec // live > cold >= -1, so live >= 0 + hot, openErr := hotchunk.OpenReadOnly(cat.Layout().HotChunkPath(id), id, logger) + if openErr != nil { + return 0, fmt.Errorf("chunk %s is %q but its hot DB won't open: %w", id, geometry.HotReady, openErr) + } + defer func() { _ = hot.Close() }() + + maxSeq, present, seqErr := hot.MaxCommittedSeq() + if seqErr != nil { + return 0, fmt.Errorf("chunk %s: read hot max committed seq: %w", id, seqErr) + } + if present { + return maxSeq, nil + } + // Empty live DB: positional fallback (everything below it). + return geometry.CompleteThrough(live - 1), nil +} + +// highestReadyChunkSigned returns the highest "ready" hot chunk id as int64, or -1 +// when none. The signed return lets CompleteThrough compute the positional term +// without a uint32 underflow when the live chunk is chunk 0. +func highestReadyChunkSigned(cat *catalog.Catalog) (int64, error) { + ready, err := cat.ReadyHotChunkKeys() + if err != nil { + return 0, err + } + if len(ready) == 0 { + return -1, nil + } + // Sorted ascending; the last is the highest. + return int64(ready[len(ready)-1]), nil +} + +// highestDurableChunk returns the highest chunk id with all artifacts durable +// (ledgers AND events frozen AND (txhash frozen OR covered by a frozen index)), +// or -1 on a fresh start. A partially-frozen tip chunk is excluded; backfill +// repairs it. +func highestDurableChunk(cat *catalog.Catalog) (int64, error) { + refs, err := cat.ChunkArtifactKeys() + if err != nil { + return 0, err + } + + // Frozen per-kind state per chunk. + type kinds struct{ ledgers, events, txhash bool } + frozen := map[chunk.ID]*kinds{} + for _, ref := range refs { + if ref.State != geometry.StateFrozen { + continue + } + k := frozen[ref.Chunk] + if k == nil { + k = &kinds{} + frozen[ref.Chunk] = k + } + switch ref.Kind { + case geometry.KindLedgers: + k.ledgers = true + case geometry.KindEvents: + k.events = true + case geometry.KindTxHash: + k.txhash = true + } + } + + highest := int64(-1) + for c, k := range frozen { + if !k.ledgers || !k.events { + continue + } + // A frozen index coverage satisfies txhash even after the .bin was demoted. + // The shared catalog predicate asserts INV-2 (one frozen coverage per window) + // on every read, so watermark derivation, discard eligibility, and resolve + // can never disagree about the same snapshot. + if !k.txhash { + covered, err := cat.FrozenIndexCovers(c) + if err != nil { + return 0, err + } + if !covered { + continue + } + } + if id := int64(c); id > highest { + highest = id + } + } + return highest, nil +} diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress_realdb_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress_realdb_test.go new file mode 100644 index 000000000..394816e2b --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress_realdb_test.go @@ -0,0 +1,144 @@ +package lifecycle + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger" +) + +// seedLedgersCF reopens a CLOSED chunk hot DB raw and commits sparse ledgers-CF +// entries in one batch via the production AddLedgerToBatch. These fixtures need +// arbitrary frontier heights without the events CF's contiguity requirement, so +// they write the one CF the watermark refinement reads (MaxCommittedSeq only +// looks at the ledgers CF's last key; the payload bytes are never decoded). +func seedLedgersCF(t *testing.T, cat *catalog.Catalog, c chunk.ID, entries ...ledger.Entry) { + t.Helper() + store, err := rocksdb.New(rocksdb.Config{ + Path: cat.Layout().HotChunkPath(c), + ColumnFamilies: hotchunk.ColumnFamilies(), + Logger: silentLogger(), + }) + require.NoError(t, err) + h := ledger.NewWithStore(store) + require.NoError(t, store.Batch(func(b *rocksdb.BatchWriter) error { + for _, e := range entries { + if berr := h.AddLedgerToBatch(b, e); berr != nil { + return berr + } + } + return nil + })) + require.NoError(t, store.Close()) +} + +// seedReadyLiveDB brackets a "ready" hot DB for chunk c (via the production +// opener) and commits a single ledgers-CF entry at seq `top` so MaxCommittedSeq +// reads back `top`. top==0 leaves the DB empty (present=false). It closes the DB +// as hygiene — a read-only reopen takes no RocksDB LOCK, so this isn't required +// for the refinement to open, but it keeps the fixtures single-handle. +func seedReadyLiveDB(t *testing.T, cat *catalog.Catalog, c chunk.ID, top uint32) { + t.Helper() + db := openLiveHotDB(t, cat, c) // ready key + real dir + empty DB + require.NoError(t, db.Close()) + if top > 0 { + seedLedgersCF(t, cat, c, ledger.Entry{Seq: top, Bytes: []byte("ledger")}) + } +} + +// TestDeriveWatermark_RealHotDB_RefinementIsNotStale exercises the watermark +// refinement against a REAL per-chunk hotchunk DB opened read-only by its Layout +// path (the same open production does). It proves the single-DB MaxCommittedSeq +// refinement reads the actual committed ledger frontier (the ledgers CF's last +// key) and is not a stale/constant value: the bound rises to exactly the highest +// seq committed to the live chunk's real DB. +func TestDeriveWatermark_RealHotDB_RefinementIsNotStale(t *testing.T) { + cat, _ := testCatalog(t) + + live := chunk.ID(5) + // Production bracket: creates the hot dir, opens the SINGLE shared multi-CF + // DB, flips the hot key "ready". This is exactly what ingestion does. + db := openLiveHotDB(t, cat, live) + // Close the live writer before seeding — hygiene (the refinement's read-only + // reopen takes no RocksDB LOCK), keeping the fixture single-handle. + require.NoError(t, db.Close()) + + // Commit two real ledgers into the ledgers CF (the CF MaxCommittedSeq reads). + first := live.FirstLedger() + committedTop := first + 200 + seedLedgersCF(t, cat, live, + ledger.Entry{Seq: first, Bytes: []byte("ledger-A")}, + ledger.Entry{Seq: committedTop, Bytes: []byte("ledger-B")}, + ) + + // Sanity: positional baseline (live chunk 5 ⇒ everything below 5) is chunk 4's + // last ledger, strictly below the committed top — so the assertion below can + // only pass if the refinement actually read the real DB. + baseline := geometry.CompleteThrough(int64(live) - 1) + require.Equal(t, chunk.ID(4).LastLedger(), baseline) + require.Greater(t, committedTop, baseline, "fixture must put the real frontier above the baseline") + + got, err := deriveWatermark(cat, silentLogger()) + require.NoError(t, err) + require.Equal(t, committedTop, got, + "watermark must equal the REAL ledgers-CF last key, not the positional baseline") +} + +// TestDeriveWatermark_RealHotDB_OpensHighestReady proves the refinement opens the +// HIGHEST ready chunk (the live chunk), not just any ready chunk. Two ready chunks +// have independent real hot DBs with DIFFERENT committed frontiers; the watermark +// must reflect the higher chunk's DB. Only opening the real per-chunk DB by its +// Layout path distinguishes the two — a "open ready[0] instead of ready[len-1]" +// regression would land on the wrong frontier. +func TestDeriveWatermark_RealHotDB_OpensHighestReady(t *testing.T) { + cat, _ := testCatalog(t) + + lower, higher := chunk.ID(4), chunk.ID(7) + + // Lower ready chunk: a real DB committed near the TOP of chunk 4. If the + // refinement wrongly opened the lower chunk, the bound would land here. + lowDB := openLiveHotDB(t, cat, lower) + require.NoError(t, lowDB.Close()) + lowTop := lower.FirstLedger() + 9000 + seedLedgersCF(t, cat, lower, ledger.Entry{Seq: lowTop, Bytes: []byte("low")}) + + // Higher ready chunk (the live chunk): committed mid-chunk 7. + highDB := openLiveHotDB(t, cat, higher) + require.NoError(t, highDB.Close()) + highMid := higher.FirstLedger() + 1234 + seedLedgersCF(t, cat, higher, ledger.Entry{Seq: highMid, Bytes: []byte("high")}) + + // The two frontiers must be unambiguous: chunk 7 mid-seq is far above chunk 4's + // top, so reading the wrong chunk yields a strictly different (lower) answer. + require.Greater(t, highMid, lowTop) + + got, err := deriveWatermark(cat, silentLogger()) + require.NoError(t, err) + require.Equal(t, highMid, got, + "refinement must open the HIGHEST ready chunk (7), reading its committed mid-seq") +} + +// TestDeriveWatermark_RealHotDB_EmptyLiveFallsBack is the count-only-ready case +// against a real DB: a "ready" live chunk whose real hot DB has NO committed +// ledger (MaxCommittedSeq ok=false) must fall back to deriveCompleteThrough, not +// fabricate a frontier. Read through a real read-only open by Layout path. +func TestDeriveWatermark_RealHotDB_EmptyLiveFallsBack(t *testing.T) { + cat, _ := testCatalog(t) + makeChunkDurable(t, cat, 0) // cold term => chunk 0 last ledger + + live := chunk.ID(3) + db := openLiveHotDB(t, cat, live) // ready key + real dir, but NOTHING committed + require.NoError(t, db.Close()) + + // A read-only open of the empty ledgers CF: ok=false, no refinement. + got, err := deriveWatermark(cat, silentLogger()) + require.NoError(t, err) + require.Equal(t, chunk.ID(2).LastLedger(), got, + "empty live DB ⇒ positional baseline (max ready 3 - 1 = chunk 2), no fabricated frontier") +} diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress_shim_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress_shim_test.go new file mode 100644 index 000000000..271b0c282 --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress_shim_test.go @@ -0,0 +1,22 @@ +package lifecycle + +import ( + supportlog "github.com/stellar/go-stellar-sdk/support/log" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog" +) + +// Test-only aliases for the single progress derivation, LastCommittedLedger. +// There is no logger-less mode: when a "ready" hot key leads the cold term the +// derivation always opens that DB read-only, so both aliases pass a real logger. +// deriveCompleteThrough names the cold/floor/positional-selection intent (its +// callers seed no ready-above-cold hot key, or seed an empty real hot DB whose +// refinement falls back to the positional term); deriveWatermark names the +// refinement-value intent. Production callers use LastCommittedLedger directly. +func deriveCompleteThrough(cat *catalog.Catalog) (uint32, error) { + return LastCommittedLedger(cat, silentLogger()) +} + +func deriveWatermark(cat *catalog.Catalog, logger *supportlog.Entry) (uint32, error) { + return LastCommittedLedger(cat, logger) +} diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress_test.go new file mode 100644 index 000000000..c4bd5f50d --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/progress_test.go @@ -0,0 +1,233 @@ +package lifecycle + +import ( + "os" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" +) + +// --------------------------------------------------------------------------- +// progress derivation test helpers. +// --------------------------------------------------------------------------- + +// makeChunkDurable freezes ledgers+events+txhash for a chunk — the durable state +// highestDurableChunk counts. +func makeChunkDurable(t *testing.T, cat *catalog.Catalog, c chunk.ID) { + t.Helper() + freezeKinds(t, cat, c, geometry.KindLedgers, geometry.KindEvents, geometry.KindTxHash) +} + +// makeHotDir creates the on-disk hot dir for a chunk. The refinement opens only +// the HIGHEST ready chunk, so a lower ready key needs only its dir present, not a +// real DB (readyHot pairs this with the key); the highest ready chunk in a +// positional-term test needs a real empty DB via seedReadyLiveDB. +func makeHotDir(t *testing.T, cat *catalog.Catalog, c chunk.ID) { + t.Helper() + require.NoError(t, os.MkdirAll(cat.Layout().HotChunkPath(c), 0o755)) +} + +// readyHot marks a chunk's hot key "ready" AND creates its dir, the production +// pairing deriveWatermark expects (a ready key whose dir is missing is loss). +func readyHot(t *testing.T, cat *catalog.Catalog, c chunk.ID) { + t.Helper() + require.NoError(t, cat.PutHotTransient(c)) + require.NoError(t, cat.FlipHotReady(c)) + makeHotDir(t, cat, c) +} + +// --------------------------------------------------------------------------- +// LastCommittedLedger — chunk-granularity bound, pure catalog read. +// (CompleteThrough / ChunkIDOfLedger arithmetic is tested in geometry.) +// --------------------------------------------------------------------------- + +func TestLastCommittedLedger(t *testing.T) { + t.Run("fresh store => pre-genesis sentinel, never MaxUint32", func(t *testing.T) { + // Every term is -1; the signed domain must yield FirstLedgerSeq-1, not wrap. + cat, _ := testCatalog(t) + got, err := deriveCompleteThrough(cat) + require.NoError(t, err) + require.Equal(t, geometry.PreGenesisLedger, got) + }) + + t.Run("cold term leads: highest fully-durable chunk", func(t *testing.T) { + cat, _ := testCatalog(t) + makeChunkDurable(t, cat, 0) + makeChunkDurable(t, cat, 1) + makeChunkDurable(t, cat, 2) + got, err := deriveCompleteThrough(cat) + require.NoError(t, err) + require.Equal(t, chunk.ID(2).LastLedger(), got) + }) + + t.Run("incompletely-frozen tip degrades the bound (ledgers frozen, events freezing)", func(t *testing.T) { + cat, _ := testCatalog(t) + makeChunkDurable(t, cat, 0) + makeChunkDurable(t, cat, 1) + // Chunk 2 mid-freeze (events only "freezing") must NOT count: bound stays at 1. + freezeKinds(t, cat, 2, geometry.KindLedgers, geometry.KindTxHash) + require.NoError(t, cat.MarkChunkFreezing(2, geometry.KindEvents)) + got, err := deriveCompleteThrough(cat) + require.NoError(t, err) + require.Equal(t, chunk.ID(1).LastLedger(), got) + }) + + t.Run("txhash satisfied by a frozen index coverage (post-finalization demote)", func(t *testing.T) { + cat, _ := testCatalog(t) + // Chunk 7: txhash demoted but a frozen index coverage spans it ⇒ still durable. + freezeKinds(t, cat, 7, geometry.KindLedgers, geometry.KindEvents) + freezeCoverage(t, cat, cat.TxHashIndexLayout().TxHashIndexID(7), 0, 999) // window 0 covers chunk 7 + got, err := deriveCompleteThrough(cat) + require.NoError(t, err) + require.Equal(t, chunk.ID(7).LastLedger(), got) + }) + + t.Run("chunk NOT covered by any frozen index and no frozen txhash does not count", func(t *testing.T) { + cat, _ := testCatalog(t) + makeChunkDurable(t, cat, 0) + // Chunk 1: ledgers+events frozen, no txhash, no covering index. + freezeKinds(t, cat, 1, geometry.KindLedgers, geometry.KindEvents) + got, err := deriveCompleteThrough(cat) + require.NoError(t, err) + require.Equal(t, chunk.ID(0).LastLedger(), got, "chunk 1 not durable; bound stays at chunk 0") + }) + + t.Run("positional term leads in steady state: everything below the live chunk", func(t *testing.T) { + cat, _ := testCatalog(t) + // No cold artifacts yet (steady state: chunks complete before cold exists). + // Ready hot keys 3,4,5 => live chunk is 5 => everything below 5 complete. Only + // the highest (5) is opened; empty DB ⇒ positional fallback CompleteThrough(4). + readyHot(t, cat, 3) + readyHot(t, cat, 4) + seedReadyLiveDB(t, cat, 5, 0) + got, err := deriveCompleteThrough(cat) + require.NoError(t, err) + require.Equal(t, chunk.ID(4).LastLedger(), got, "max ready (5) - 1 = chunk 4's last ledger") + }) + + t.Run("transient hot key does NOT advance the positional term", func(t *testing.T) { + cat, _ := testCatalog(t) + seedReadyLiveDB(t, cat, 3, 0) // highest ready, empty DB ⇒ positional CompleteThrough(2) + // A transient key above the highest ready one must be excluded. + require.NoError(t, cat.PutHotTransient(9)) + got, err := deriveCompleteThrough(cat) + require.NoError(t, err) + require.Equal(t, chunk.ID(2).LastLedger(), got, "max READY (3) - 1, ignoring transient 9") + }) + + t.Run("live chunk 0 => positional term is pre-genesis, NOT MaxUint32", func(t *testing.T) { + // The exact uint32-underflow trap: max ready = 0, so 0-1 must be the + // pre-genesis sentinel, not ID(4294967295).LastLedger(). + cat, _ := testCatalog(t) + seedReadyLiveDB(t, cat, 0, 0) // ready chunk 0, empty DB ⇒ positional fallback + got, err := deriveCompleteThrough(cat) + require.NoError(t, err) + require.Equal(t, geometry.PreGenesisLedger, got) + }) + + t.Run("earliest pin floor leads when above cold/positional terms", func(t *testing.T) { + cat, _ := testCatalog(t) + // Floor pinned mid-chain, no chunks durable, no hot keys. + const floor = 50000 + require.NoError(t, cat.PinEarliestLedger(floor)) + got, err := deriveCompleteThrough(cat) + require.NoError(t, err) + require.Equal(t, uint32(floor-1), got) + }) + + t.Run("earliest pin == genesis (2) does not underflow", func(t *testing.T) { + cat, _ := testCatalog(t) + require.NoError(t, cat.PinEarliestLedger(chunk.FirstLedgerSeq)) + got, err := deriveCompleteThrough(cat) + require.NoError(t, err) + require.Equal(t, geometry.PreGenesisLedger, got, "earliest 2 - 1 = 1, not MaxUint32") + }) + + t.Run("max of all three terms", func(t *testing.T) { + cat, _ := testCatalog(t) + makeChunkDurable(t, cat, 0) // cold => chunk 0 last ledger + seedReadyLiveDB(t, cat, 4, 0) // positional (empty DB) => chunk 3 last ledger (highest) + require.NoError(t, cat.PinEarliestLedger(2)) + got, err := deriveCompleteThrough(cat) + require.NoError(t, err) + require.Equal(t, chunk.ID(3).LastLedger(), got) + }) +} + +// --------------------------------------------------------------------------- +// deriveWatermark — deriveCompleteThrough + one read-only refinement of the +// highest ready hot DB, opened lazily by its Layout path. These read REAL +// per-chunk hot DBs; the sub-chunk-precision / opens-highest / empty-fallback +// value cases are covered against real DBs in progress_realdb_test.go. +// --------------------------------------------------------------------------- + +func TestDeriveWatermark(t *testing.T) { + t.Run("no ready hot keys => equals deriveCompleteThrough, no open", func(t *testing.T) { + cat, _ := testCatalog(t) + makeChunkDurable(t, cat, 0) + // No ready key above the cold term ⇒ the hot>cold gate skips the open entirely. + got, err := deriveWatermark(cat, silentLogger()) + require.NoError(t, err) + require.Equal(t, chunk.ID(0).LastLedger(), got) + }) + + t.Run("boundary-crash under-count recovered by refinement", func(t *testing.T) { + // Live chunk crashed at a boundary and was demoted to "transient": the + // highest READY key is the just-completed predecessor (chunk 4), whose + // completion no key advertises (positional term = chunk 3). The refinement + // opens chunk 4's real DB and reads its full committed seq = chunk 4's last + // ledger, recovering the frontier the positional term under-counted. + cat, _ := testCatalog(t) + chunk4Last := chunk.ID(4).LastLedger() + seedReadyLiveDB(t, cat, 4, chunk4Last) + require.NoError(t, cat.PutHotTransient(5)) // the crashed live chunk + // The positional term alone (highest ready 4, minus 1) under-counts to chunk 3; + // only the refinement below, opening chunk 4's real DB, recovers chunk 4's frontier. + require.Equal(t, chunk.ID(3).LastLedger(), geometry.CompleteThrough(3), + "positional term alone under-counts to chunk 3") + + got, err := deriveWatermark(cat, silentLogger()) + require.NoError(t, err) + require.Equal(t, chunk4Last, got, "refinement recovers the chunk-4 frontier") + }) + + t.Run("LAZY loss (item R2-6): only the highest ready chunk is opened; a lower"+ + " ready key's missing dir is NOT eagerly flagged", func(t *testing.T) { + cat, _ := testCatalog(t) + // Two ready keys; the LOWER one's dir is missing. Under the design's lazy + // detection (no eager all-ready-keys scan) only the HIGHEST ready chunk is + // opened, so the lower key's missing dir is not surfaced here — it surfaces + // later, when ingestion/discard reaches that chunk via openHotDBForChunk. + require.NoError(t, cat.PutHotTransient(2)) + require.NoError(t, cat.FlipHotReady(2)) // ready key 2, NO dir (not opened here) + highSeq := chunk.ID(5).FirstLedger() + 10 + seedReadyLiveDB(t, cat, 5, highSeq) // highest ready key 5 WITH real DB (opened) + got, err := deriveWatermark(cat, silentLogger()) + require.NoError(t, err) + require.Equal(t, highSeq, got, "refined to the highest ready chunk's seq") + }) + + t.Run("errors: a ready HIGHEST chunk whose dir is missing (lazy detection on open)", func(t *testing.T) { + cat, _ := testCatalog(t) + // The highest ready chunk's dir is missing: the one open the derivation + // performs surfaces an ordinary (restartable) error — the read-only open + // never auto-heals it into a fresh empty DB. + require.NoError(t, cat.PutHotTransient(5)) + require.NoError(t, cat.FlipHotReady(5)) // ready key 5, NO dir + _, err := deriveWatermark(cat, silentLogger()) + require.Error(t, err) + require.Contains(t, err.Error(), "00000005") + }) + + t.Run("live chunk 0 ready, empty DB => pre-genesis, no underflow", func(t *testing.T) { + cat, _ := testCatalog(t) + seedReadyLiveDB(t, cat, 0, 0) // ready + real dir, nothing committed + got, err := deriveWatermark(cat, silentLogger()) + require.NoError(t, err) + require.Equal(t, geometry.PreGenesisLedger, got) + }) +} diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/retention.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/retention.go new file mode 100644 index 000000000..c96b65678 --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/retention.go @@ -0,0 +1,54 @@ +package lifecycle + +import ( + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" +) + +// RetentionFloor is the lowest chunk still within retention; anything below is +// eligible for discard/prune. It is the reader-side retention contract (design +// "Reader retention contract", gettx §8.2 / §8.5): availability is decided by +// retention, not the on-disk file set, so prune/sweep can unlink a chunk the +// instant it passes the floor without coordinating with the index lifecycle. The +// floor may err LOW harmlessly (a wrongly-retained chunk still hits the reader's +// missing-file rule), so it anchors on the live CompleteThrough; widening history +// is backfill's job, not the floor's. +type RetentionFloor struct { + chunk chunk.ID // lowest in-retention chunk +} + +// NewRetentionFloor pins the floor for one (through, retentionChunks, earliest) +// snapshot. A shortened retentionChunks raises the floor at once. +func NewRetentionFloor(through, retentionChunks, earliest uint32) RetentionFloor { + return RetentionFloorAt(EffectiveRetentionFloor(through, retentionChunks, earliest)) +} + +// RetentionFloorAt pins the floor from an already-computed floor ledger, so the +// tick derives EffectiveRetentionFloor once and shares it between the gauge and +// the gate rather than recomputing it per scan. +func RetentionFloorAt(floorLedger uint32) RetentionFloor { + return RetentionFloor{chunk: chunk.IDFromLedger(floorLedger)} +} + +// Excludes reports whether chunk c is below the floor (past retention). The scans +// use it on a chunk directly and, since an index is below the floor exactly when +// its last chunk is, as Excludes(layout.LastChunk(idx)) for a whole index. +func (f RetentionFloor) Excludes(c chunk.ID) bool { return c < f.chunk } + +// FirstChunk is the lowest in-retention chunk — the single floor→chunk boundary +// definition shared by prune (the gate), the lifecycle plan range, and startup +// backfill, so the three can never disagree on where retention begins. +func (f RetentionFloor) FirstChunk() chunk.ID { return f.chunk } + +// EffectiveRetentionFloor is the chunk-aligned lower bound of the retention +// window: the HIGHER of the sliding floor (retentionChunks back from the last +// complete chunk) and the fixed earliest_ledger. slidingChunk is signed so a +// young store / large retentionChunks clamps to chunk 0 instead of underflowing. +func EffectiveRetentionFloor(upperBound, retentionChunks, earliest uint32) uint32 { + sliding := uint32(chunk.FirstLedgerSeq) // GenesisLedger + if retentionChunks > 0 { + slidingChunk := geometry.LastCompleteChunkAt(upperBound) - int64(retentionChunks) + 1 + sliding = geometry.ChunkFirstLedger(max(slidingChunk, 0)) + } + return max(sliding, earliest) +} diff --git a/cmd/stellar-rpc/internal/fullhistory/retention_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/retention_test.go similarity index 56% rename from cmd/stellar-rpc/internal/fullhistory/retention_test.go rename to cmd/stellar-rpc/internal/fullhistory/lifecycle/retention_test.go index e3defe955..7ced429e6 100644 --- a/cmd/stellar-rpc/internal/fullhistory/retention_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/retention_test.go @@ -1,4 +1,4 @@ -package fullhistory +package lifecycle import ( "testing" @@ -6,8 +6,8 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" ) // --------------------------------------------------------------------------- @@ -18,7 +18,7 @@ import ( // --------------------------------------------------------------------------- // through = chunk 100's last ledger, retain 10 chunks ⇒ floor = chunk 91 -// (retentionFloorChunk: 100-10+1 = 91). Anything below chunk 91 is excluded. +// (EffectiveRetentionFloor: 100-10+1 = 91). Anything below chunk 91 is excluded. func TestRetentionFloor_ExcludesBelow(t *testing.T) { floor := NewRetentionFloor(chunk.ID(100).LastLedger(), 10, 0) @@ -44,8 +44,8 @@ func TestRetentionFloor_ShorteningRaisesFloorImmediately(t *testing.T) { // A whole tx-hash index is below the floor exactly when its last chunk is, so // callers test Excludes(layout.LastChunk(idx)) — no index-specific method needed. func TestRetentionFloor_ExcludesIndexByLastChunk(t *testing.T) { - layout, err := geometry.NewTxHashIndexLayout(4) // indexes: 0=[0,3], 1=[4,7], 2=[8,11] - require.NoError(t, err) + cat, _ := smallTxHashIndexCatalog(t, 4) // indexes: 0=[0,3], 1=[4,7], 2=[8,11] + layout := cat.TxHashIndexLayout() // through = chunk 11's last ledger, retain 4 chunks ⇒ floor = chunk 8 // (11-4+1 = 8). Index 2 ([8,11]) starts at the floor. @@ -94,3 +94,71 @@ func TestRetentionFloor_YoungStoreClampsToGenesis(t *testing.T) { floor := NewRetentionFloor(chunk.ID(3).LastLedger(), 1000, 0) assert.False(t, floor.Excludes(0), "chunk 0 is at the clamped floor, not below it") } + +// --------------------------------------------------------------------------- +// Scenario: a window STRADDLING the floor serves in-range seqs and not-found +// below. A finalized window's frozen .idx covers [lo, hi] including chunks the +// floor has since risen past; the gate masks those below-floor chunks. This is +// the stale-.idx case gettransaction §8.5 tolerates because the reader gate +// makes below-floor reads not-found regardless of what the .idx resolves. +// --------------------------------------------------------------------------- + +func TestReaderRetention_WindowStraddlingFloorServesInRangeNotBelow(t *testing.T) { + cat, _ := smallTxHashIndexCatalog(t, 4) // window 0 = chunks [0,3] + wins := cat.TxHashIndexLayout() + + // Window 0 was finalized at terminal coverage [0,3] when the floor sat at + // genesis. Its frozen .idx hashes chunks 0..3 — a static, stale-lo artifact. + for c := chunk.ID(0); c <= 3; c++ { + freezeKinds(t, cat, c, geometry.KindLedgers, geometry.KindEvents) + } + freezeCoverage(t, cat, 0, 0, 3) + fk, ok, err := cat.FrozenTxHashIndex(0) + require.NoError(t, err) + require.True(t, ok) + require.True(t, wins.IsTerminalCoverage(fk), "window 0 is finalized") + + // The floor later rose to chunk 2 (its first ledger). Window 0 now STRADDLES + // the floor: chunks 0,1 below it, chunks 2,3 in range. The .idx still claims + // lo=0, but the reader gate is the source of truth. + through := chunk.ID(3).LastLedger() + // Pick retentionChunks so the sliding floor lands on chunk 2: + // geometry.LastCompleteChunkAt(through)=3, floor chunk = 3-retention+1 = 2 ⇒ retention=2. + floor := NewRetentionFloor(through, 2, 0) + + // (The seq-level reader masking — a below-floor read is not-found even though + // the stale .idx still hashes chunks 0,1 — returns with the read path, #772; + // RetentionFloor here exposes only the chunk-granularity prune predicate.) + + // The straddling window's frozen .idx is NOT swept: the window is not wholly + // below the floor (its last chunk, 3, is in range), so only its below-floor + // chunk artifacts (chunks 0,1) are pruned. + assert.False(t, floor.Excludes(wins.LastChunk(0)), + "a straddling window is not wholly below the floor — its .idx is kept") + cfg := lifecycleTestConfig(t, cat, 2) + pops, _, err := eligiblePruneOps(cat, gateFor(t, cfg, cat, through)) + require.NoError(t, err) + for _, op := range pops { + require.NoError(t, op()) + } + + // The window's frozen .idx coverage survives the prune (index family). + survives, ok, err := cat.FrozenTxHashIndex(0) + require.NoError(t, err) + require.True(t, ok, "the straddling window keeps its frozen coverage") + require.Equal(t, fk.Key, survives.Key) + + // The below-floor chunks 0,1 ARE pruned (chunk family); the in-range chunks + // 2,3 survive — exactly the data the gate admits. + for c := chunk.ID(0); c <= 1; c++ { + ledgers, serr := cat.State(c, geometry.KindLedgers) + require.NoError(t, serr) + assert.Equal(t, geometry.State(""), ledgers, "below-floor chunk %s pruned", c) + } + for c := chunk.ID(2); c <= 3; c++ { + ledgers, serr := cat.State(c, geometry.KindLedgers) + require.NoError(t, serr) + assert.Equal(t, geometry.StateFrozen, ledgers, "in-range chunk %s survives", c) + } + assertQuiescent(t, cfg, cat, through) +} diff --git a/cmd/stellar-rpc/internal/fullhistory/lifecycle/runops_test.go b/cmd/stellar-rpc/internal/fullhistory/lifecycle/runops_test.go new file mode 100644 index 000000000..a3f3eb8b1 --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/lifecycle/runops_test.go @@ -0,0 +1,56 @@ +package lifecycle + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +// runOps retries a failed (idempotent) op a bounded number of times on a fixed +// pause before giving up, so a transient sweep failure doesn't cancel ingestion +// and force a whole-daemon restart. + +func TestRunOps_RetriesTransientThenSucceeds(t *testing.T) { + cfg := Config{OpRetryAttempts: 3, OpRetryBackoff: time.Millisecond} + calls := 0 + op := func() error { + calls++ + if calls < 3 { + return errors.New("busy file") + } + return nil + } + require.NoError(t, runOps(context.Background(), cfg, []func() error{op})) + require.Equal(t, 3, calls, "two transient failures retried, third try succeeds") +} + +func TestRunOps_GivesUpAfterAttempts(t *testing.T) { + cfg := Config{OpRetryAttempts: 2, OpRetryBackoff: time.Millisecond} + calls := 0 + op := func() error { calls++; return errors.New("permanent") } + require.Error(t, runOps(context.Background(), cfg, []func() error{op})) + require.Equal(t, 2, calls, "attempts total tries (1 initial + 1 retry), then gives up") +} + +func TestRunOps_CtxCancelStopsBeforeOp(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + cancel() + cfg := Config{OpRetryAttempts: 3, OpRetryBackoff: time.Hour} + calls := 0 + op := func() error { calls++; return errors.New("x") } + require.ErrorIs(t, runOps(ctx, cfg, []func() error{op}), context.Canceled) + require.Zero(t, calls, "a canceled ctx stops before running the op") +} + +// A zero-value Config (no WithLifecycleDefaults) runs each op exactly once — no +// retry, no panic on the zero backoff — so a test harness that builds Config +// directly keeps the pre-retry behavior. +func TestRunOps_ZeroConfigRunsOnce(t *testing.T) { + calls := 0 + op := func() error { calls++; return errors.New("boom") } + require.Error(t, runOps(context.Background(), Config{}, []func() error{op})) + require.Equal(t, 1, calls, "zero-config = single attempt") +} diff --git a/cmd/stellar-rpc/internal/fullhistory/observability/observability.go b/cmd/stellar-rpc/internal/fullhistory/observability/observability.go index d6972fe10..cb5e0c0ef 100644 --- a/cmd/stellar-rpc/internal/fullhistory/observability/observability.go +++ b/cmd/stellar-rpc/internal/fullhistory/observability/observability.go @@ -10,9 +10,26 @@ import ( // per-phase wall-clock timings; distinct from the per-data-type ingest.MetricSink. // All methods must be safe for concurrent use. type Metrics interface { - // LastCommitted sets the derived last-committed ledger and the effective - // retention floor (the two advance together each backfill pass). - LastCommitted(lastCommitted, retentionFloor uint32) + // LastCommitted sets the derived last-committed ledger gauge. Owned by the two + // call sites that know the TRUE value: startup/backfill (as history advances) + // and the ingestion loop (one atomic gauge set per committed ledger). The tick + // must NOT set it — its chunk-aligned lastChunk.LastLedger() would regress the + // gauge below a mid-chunk refined watermark on every restart. + LastCommitted(lastCommitted uint32) + + // RetentionFloor sets the effective retention floor gauge (lowest in-window + // ledger). Owned by startup/backfill and the lifecycle tick; the floor depends + // only on the last complete chunk, so it does not regress in the tick's window. + RetentionFloor(retentionFloor uint32) + + // ChunkBoundary counts one ingestion chunk-boundary handoff. The closed chunk + // id is logged at the call site; this metric is a plain counter. + ChunkBoundary() + + // LiveHotChunks sets the count of hot-chunk DBs currently on disk (the + // hot:chunk key count). Reported by every lifecycle tick after the discard + // stage so the gauge tracks the live + awaiting-discard set. + LiveHotChunks(count int) // BackfillPass records one completed backfill pass's wall-clock. BackfillPass(d time.Duration) @@ -20,6 +37,8 @@ type Metrics interface { Freeze(d time.Duration) // Rebuild records one index rebuild's wall-clock. Rebuild(d time.Duration) + // Discard counts the hot DBs a tick retired and records the stage wall-clock. + Discard(count int, d time.Duration) // Prune counts swept artifacts and records the sweep's wall-clock. Prune(count int, d time.Duration) } @@ -27,11 +46,15 @@ type Metrics interface { // NopMetrics discards every signal — the default when a config carries no Metrics. type NopMetrics struct{} -func (NopMetrics) LastCommitted(uint32, uint32) {} -func (NopMetrics) BackfillPass(time.Duration) {} -func (NopMetrics) Freeze(time.Duration) {} -func (NopMetrics) Rebuild(time.Duration) {} -func (NopMetrics) Prune(int, time.Duration) {} +func (NopMetrics) LastCommitted(uint32) {} +func (NopMetrics) RetentionFloor(uint32) {} +func (NopMetrics) ChunkBoundary() {} +func (NopMetrics) LiveHotChunks(int) {} +func (NopMetrics) BackfillPass(time.Duration) {} +func (NopMetrics) Freeze(time.Duration) {} +func (NopMetrics) Rebuild(time.Duration) {} +func (NopMetrics) Discard(int, time.Duration) {} +func (NopMetrics) Prune(int, time.Duration) {} // MetricsOrNop returns m, or NopMetrics{} when nil, so call sites never nil-check. func MetricsOrNop(m Metrics) Metrics { @@ -56,9 +79,12 @@ type PrometheusMetrics struct { // Gauges — absolute, last-write-wins. lastCommitted prometheus.Gauge retentionFloor prometheus.Gauge + liveHotChunks prometheus.Gauge - // Counter — monotonic tally. - pruned prometheus.Counter + // Counters — monotonic tallies. + chunkBoundaries prometheus.Counter + discarded prometheus.Counter + pruned prometheus.Counter // Durations — per-phase wall-clock histogram, keyed by phase label. phaseDuration *prometheus.HistogramVec @@ -69,6 +95,7 @@ const ( phaseBackfillPass = "backfill_pass" phaseFreeze = "freeze" phaseRebuild = "rebuild" + phaseDiscard = "discard" phasePrune = "prune" ) @@ -79,14 +106,19 @@ func NewPrometheusMetrics(registry *prometheus.Registry, namespace string) *Prom Namespace: namespace, Subsystem: subsystem, Name: name, Help: help, }) } + counter := func(name, help string) prometheus.Counter { + return prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: namespace, Subsystem: subsystem, Name: name, Help: help, + }) + } m := &PrometheusMetrics{ - lastCommitted: gauge("last_committed_ledger", "highest ledger durably committed"), - retentionFloor: gauge("retention_floor_ledger", "effective retention floor — lowest in-window ledger"), - pruned: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, Subsystem: subsystem, - Name: "pruned_ops_total", Help: "artifacts swept after an index build", - }), + lastCommitted: gauge("last_committed_ledger", "highest ledger durably committed"), + retentionFloor: gauge("retention_floor_ledger", "effective retention floor — lowest in-window ledger"), + liveHotChunks: gauge("live_hot_chunks", "count of hot-chunk DBs currently on disk"), + chunkBoundaries: counter("chunk_boundaries_total", "ingestion chunk-boundary handoffs"), + discarded: counter("discarded_hot_chunks_total", "hot DBs retired by the discard stage"), + pruned: counter("pruned_artifacts_total", "artifacts swept by the prune stage (below the retention floor)"), phaseDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, Name: "phase_duration_seconds", Help: "wall-clock of a daemon phase action", @@ -94,15 +126,26 @@ func NewPrometheusMetrics(registry *prometheus.Registry, namespace string) *Prom }, []string{"phase"}), } - registry.MustRegister(m.lastCommitted, m.retentionFloor, m.pruned, m.phaseDuration) + registry.MustRegister( + m.lastCommitted, m.retentionFloor, m.liveHotChunks, + m.chunkBoundaries, m.discarded, m.pruned, + m.phaseDuration, + ) return m } -func (m *PrometheusMetrics) LastCommitted(lastCommitted, retentionFloor uint32) { +func (m *PrometheusMetrics) LastCommitted(lastCommitted uint32) { m.lastCommitted.Set(float64(lastCommitted)) +} + +func (m *PrometheusMetrics) RetentionFloor(retentionFloor uint32) { m.retentionFloor.Set(float64(retentionFloor)) } +func (m *PrometheusMetrics) ChunkBoundary() { m.chunkBoundaries.Inc() } + +func (m *PrometheusMetrics) LiveHotChunks(count int) { m.liveHotChunks.Set(float64(count)) } + func (m *PrometheusMetrics) BackfillPass(d time.Duration) { m.phaseDuration.WithLabelValues(phaseBackfillPass).Observe(d.Seconds()) } @@ -115,6 +158,13 @@ func (m *PrometheusMetrics) Rebuild(d time.Duration) { m.phaseDuration.WithLabelValues(phaseRebuild).Observe(d.Seconds()) } +func (m *PrometheusMetrics) Discard(count int, d time.Duration) { + if count > 0 { + m.discarded.Add(float64(count)) + } + m.phaseDuration.WithLabelValues(phaseDiscard).Observe(d.Seconds()) +} + func (m *PrometheusMetrics) Prune(count int, d time.Duration) { if count > 0 { m.pruned.Add(float64(count)) diff --git a/cmd/stellar-rpc/internal/fullhistory/observability/observability_test.go b/cmd/stellar-rpc/internal/fullhistory/observability/observability_test.go index 6ebe0310a..336dfcff5 100644 --- a/cmd/stellar-rpc/internal/fullhistory/observability/observability_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/observability/observability_test.go @@ -17,10 +17,14 @@ import ( func TestMetricsOrNop_NilNeverPanics(t *testing.T) { m := MetricsOrNop(nil) require.NotNil(t, m) - m.LastCommitted(5, 2) + m.LastCommitted(5) + m.RetentionFloor(2) + m.ChunkBoundary() + m.LiveHotChunks(3) m.BackfillPass(time.Second) m.Freeze(time.Second) m.Rebuild(time.Second) + m.Discard(1, time.Second) m.Prune(2, time.Second) } @@ -34,10 +38,15 @@ func TestPrometheusMetrics_RegistersAndRecords(t *testing.T) { reg := prometheus.NewRegistry() m := NewPrometheusMetrics(reg, "test_ns") - m.LastCommitted(58, 12) + m.LastCommitted(58) + m.RetentionFloor(12) + m.LiveHotChunks(4) + m.ChunkBoundary() + m.ChunkBoundary() m.BackfillPass(250 * time.Millisecond) m.Freeze(100 * time.Millisecond) m.Rebuild(50 * time.Millisecond) + m.Discard(3, 20*time.Millisecond) m.Prune(2, 5*time.Millisecond) families, err := reg.Gather() @@ -61,10 +70,13 @@ func TestPrometheusMetrics_RegistersAndRecords(t *testing.T) { assert.InDelta(t, float64(58), values["test_ns_fullhistory_streaming_last_committed_ledger"], 0) assert.InDelta(t, float64(12), values["test_ns_fullhistory_streaming_retention_floor_ledger"], 0) - assert.InDelta(t, float64(2), values["test_ns_fullhistory_streaming_pruned_ops_total"], 0) + assert.InDelta(t, float64(4), values["test_ns_fullhistory_streaming_live_hot_chunks"], 0) + assert.InDelta(t, float64(2), values["test_ns_fullhistory_streaming_chunk_boundaries_total"], 0) + assert.InDelta(t, float64(3), values["test_ns_fullhistory_streaming_discarded_hot_chunks_total"], 0) + assert.InDelta(t, float64(2), values["test_ns_fullhistory_streaming_pruned_artifacts_total"], 0) - // Phase-duration histogram saw backfill_pass + freeze + rebuild + prune = 4 observations. - assert.Equal(t, uint64(4), counts["test_ns_fullhistory_streaming_phase_duration_seconds"]) + // Phase-duration histogram saw backfill_pass + freeze + rebuild + discard + prune = 5 observations. + assert.Equal(t, uint64(5), counts["test_ns_fullhistory_streaming_phase_duration_seconds"]) } // Double-registration on the same registry panics (one sink per registry). diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb/rocksdb.go b/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb/rocksdb.go index f172587e6..0f1b56552 100644 --- a/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb/rocksdb.go +++ b/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb/rocksdb.go @@ -58,6 +58,22 @@ type Config struct { // "inherit the pinned defaults"; see CFOptions docstring for // the per-knob inherit/override semantics. PerCFOptions map[string]CFOptions + + // ReadOnly opens the store read-only (dir never created, no writes, no + // flush-on-close). An un-flushed WAL IS recovered into in-memory memtables + // on open (RocksDB OpenForReadOnly semantics; nothing is persisted), so + // reads see every synced write, not just SST/MANIFEST state. Used by the + // freeze source. + ReadOnly bool + + // MustExist opens read-WRITE but with create-if-missing OFF, so opening a + // missing or gutted DB fails instead of silently fabricating a fresh empty one + // — the "never auto-heal" hot-DB open under a "ready" key, a DB the filesystem + // should already hold. (RocksDB's env layer may still leave a stub leaf dir with + // a LOG file behind on the failed open; correctness holds — every retry still + // fails on the missing CURRENT — but no usable DB is created.) Ignored when + // ReadOnly is set (read-only never creates regardless). + MustExist bool } // Store is the Layer-1 RocksDB handle. Concrete struct: one impl, @@ -292,26 +308,12 @@ func (s *Store) Iterate(cf string, prefix []byte) iter.Seq2[Entry, error] { } } -// FirstKey returns the smallest key in cf. If cf has no keys this is not -// an error: it returns (nil, false, nil), so callers detect emptiness via -// ok. (cf == "" selects the default column family; an unregistered cf name -// returns ErrCFNotFound.) -// Cheap: a single boundary seek (no scan). -func (s *Store) FirstKey(cf string) ([]byte, bool, error) { - return s.edgeKey(cf, false) -} - // LastKey returns the largest key in cf. If cf has no keys this is not an // error: it returns (nil, false, nil), so callers detect emptiness via ok. // (cf == "" selects the default column family; an unregistered cf name // returns ErrCFNotFound.) // Cheap: a single boundary seek (no scan). func (s *Store) LastKey(cf string) ([]byte, bool, error) { - return s.edgeKey(cf, true) -} - -//nolint:funcorder // helper grouped with FirstKey/LastKey for readability -func (s *Store) edgeKey(cf string, last bool) ([]byte, bool, error) { s.mu.RLock() defer s.mu.RUnlock() @@ -325,11 +327,7 @@ func (s *Store) edgeKey(cf string, last bool) ([]byte, bool, error) { it := s.db.NewIteratorCF(s.ro, cfh) defer it.Close() - if last { - it.SeekToLast() - } else { - it.SeekToFirst() - } + it.SeekToLast() if !it.Valid() { // Empty CF (it.Err() is nil) or a mid-seek RocksDB error. return nil, false, it.Err() @@ -425,8 +423,13 @@ func (s *Store) Close() error { return nil } - if err := s.doFlush(); err != nil { - s.cfg.Logger.WithError(err).Warnf("rocksdb: graceful close Flush failed at %s; next Open will replay WAL", s.cfg.Path) + // A read-only store has nothing to flush (and the RocksDB read-only handle + // would reject it); only a writable store flushes its memtable on close. + if !s.cfg.ReadOnly { + if err := s.doFlush(); err != nil { + s.cfg.Logger.WithError(err).Warnf( + "rocksdb: graceful close Flush failed at %s; next Open will replay WAL", s.cfg.Path) + } } for _, cfh := range s.cfHandles { @@ -494,14 +497,20 @@ func (s *Store) constructAndOpen() error { if err != nil { return fmt.Errorf("rocksdb: canonicalize path %s: %w", s.cfg.Path, err) } - if err := os.MkdirAll(abs, dirPerm); err != nil { - return fmt.Errorf("rocksdb: mkdir %s: %w", abs, err) + // Read-only and must-exist opens require a pre-existing DB; neither creates + // the directory. Only a plain read-write open (create-if-missing) does. + if !s.cfg.ReadOnly && !s.cfg.MustExist { + if err := os.MkdirAll(abs, dirPerm); err != nil { + return fmt.Errorf("mkdir %s: %w", abs, err) + } } cfNames := resolveCFNames(s.cfg) opts := grocksdb.NewDefaultOptions() - opts.SetCreateIfMissing(true) - opts.SetCreateIfMissingColumnFamilies(true) + if !s.cfg.ReadOnly && !s.cfg.MustExist { + opts.SetCreateIfMissing(true) + opts.SetCreateIfMissingColumnFamilies(true) + } cfOpts := make([]*grocksdb.Options, len(cfNames)) for i := range cfOpts { @@ -511,7 +520,18 @@ func (s *Store) constructAndOpen() error { s.applyTuning(opts, cfNames, cfOpts) start := time.Now() - db, cfHandles, err := grocksdb.OpenDbColumnFamilies(opts, abs, cfNames, cfOpts) + var ( + db *grocksdb.DB + cfHandles []*grocksdb.ColumnFamilyHandle + ) + if s.cfg.ReadOnly { + // errorIfWalFileExists=false: a cleanly-closed DB has no WAL; if a crash ever + // left one, the open recovers it into in-memory memtables (see Config.ReadOnly) + // rather than failing, so reads still see every synced write. + db, cfHandles, err = grocksdb.OpenDbForReadOnlyColumnFamilies(opts, abs, cfNames, cfOpts, false) + } else { + db, cfHandles, err = grocksdb.OpenDbColumnFamilies(opts, abs, cfNames, cfOpts) + } elapsed := time.Since(start) if err != nil { opts.Destroy() @@ -548,7 +568,7 @@ func (s *Store) constructAndOpen() error { // WAL on + per-write Sync on — non-negotiable across every // fullhistory store, so pinned here on the shared wo rather // than exposed via Tuning. The streaming ingestion contract - // requires "AddEntries returned nil" to mean "durable on disk"; + // requires "the ledger batch committed" to mean "durable on disk"; // one fsync per Put/Batch regardless of size. s.wo.DisableWAL(false) s.wo.SetSync(true) diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb/rocksdb_test.go b/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb/rocksdb_test.go index 999803b75..f1a726875 100644 --- a/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb/rocksdb_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb/rocksdb_test.go @@ -48,6 +48,47 @@ func openTestStore(t *testing.T, cfNames []string) *Store { return s } +// TestNew_MustExist_EmptyReadyDBReopens pins that a must-exist read-write open of +// an already-created but EMPTY DB succeeds: the mode refuses only to CREATE, it +// never requires committed data. This is the "ready" hot-chunk reopen path (an +// ingester that crashed before committing its first ledger must still reopen). +func TestNew_MustExist_EmptyReadyDBReopens(t *testing.T) { + path := t.TempDir() + cf := []string{"c0"} + + // Create an empty DB the normal way (create-if-missing), then close it. + s, err := New(Config{Path: path, ColumnFamilies: cf, Logger: silentLogger()}) + require.NoError(t, err) + require.NoError(t, s.Close()) + + // Reopen must-exist: succeeds against the existing empty DB. + reopened, err := New(Config{Path: path, ColumnFamilies: cf, Logger: silentLogger(), MustExist: true}) + require.NoError(t, err, "must-exist reopen of an empty ready DB succeeds") + require.NoError(t, reopened.Close()) +} + +// TestNew_MustExist_GuttedDirFailsOpen pins that a must-exist open of a directory +// that exists but holds no valid RocksDB (no CURRENT) FAILS. The daemon depends on +// this: a "ready" hot key whose DB was wiped must never silently auto-heal into a +// fresh empty DB, which would regress the watermark. +func TestNew_MustExist_GuttedDirFailsOpen(t *testing.T) { + path := t.TempDir() + cf := []string{"c0"} + + // Create a real DB, close it, then gut the dir (remove every file, keep the dir). + s, err := New(Config{Path: path, ColumnFamilies: cf, Logger: silentLogger()}) + require.NoError(t, err) + require.NoError(t, s.Close()) + entries, err := os.ReadDir(path) + require.NoError(t, err) + for _, e := range entries { + require.NoError(t, os.RemoveAll(filepath.Join(path, e.Name()))) + } + + _, err = New(Config{Path: path, ColumnFamilies: cf, Logger: silentLogger(), MustExist: true}) + require.Error(t, err, "must-exist open of a gutted dir (no CURRENT) fails, never auto-heals") +} + func TestMain(m *testing.M) { if os.Getenv("ROCKSDB_LOCK_PROBE") == "1" { _, err := New(Config{ @@ -141,27 +182,19 @@ func TestStore_PutGet_DefaultCF(t *testing.T) { assert.False(t, found3) } -func TestStore_FirstLastKey(t *testing.T) { +func TestStore_LastKey(t *testing.T) { s := openTestStore(t, nil) - // Empty default CF: ok=false, no error, at both ends. - _, ok, err := s.FirstKey("") - require.NoError(t, err) - require.False(t, ok) - _, ok, err = s.LastKey("") + // Empty default CF: ok=false, no error. + _, ok, err := s.LastKey("") require.NoError(t, err) require.False(t, ok) // EncodeUint32 is big-endian, so byte-lex key order is numeric order: - // insert out of order and expect the min/max back. + // insert out of order and expect the max back. for _, n := range []uint32{500, 1, 9999, 42} { require.NoError(t, s.Put("", EncodeUint32(n), []byte{byte(n)})) } - first, ok, err := s.FirstKey("") - require.NoError(t, err) - require.True(t, ok) - require.Equal(t, uint32(1), DecodeUint32(first)) - last, ok, err := s.LastKey("") require.NoError(t, err) require.True(t, ok) @@ -169,28 +202,21 @@ func TestStore_FirstLastKey(t *testing.T) { // Unknown CF surfaces ErrCFNotFound (distinct from ok=false on an // empty-but-configured CF). - _, _, err = s.FirstKey("not-configured") - require.ErrorIs(t, err, ErrCFNotFound) _, _, err = s.LastKey("not-configured") require.ErrorIs(t, err, ErrCFNotFound) - // Non-default CF: FirstKey/LastKey resolve the requested CF - // independently of the default CF. + // Non-default CF: LastKey resolves the requested CF independently of the default. const altCF = "alt" sAlt := openTestStore(t, []string{altCF}) for _, n := range []uint32{7, 3, 8} { require.NoError(t, sAlt.Put(altCF, EncodeUint32(n), []byte{byte(n)})) } - first, ok, err = sAlt.FirstKey(altCF) - require.NoError(t, err) - require.True(t, ok) - require.Equal(t, uint32(3), DecodeUint32(first)) last, ok, err = sAlt.LastKey(altCF) require.NoError(t, err) require.True(t, ok) require.Equal(t, uint32(8), DecodeUint32(last)) // The default CF of the same store is untouched → ok=false. - _, ok, err = sAlt.FirstKey("") + _, ok, err = sAlt.LastKey("") require.NoError(t, err) require.False(t, ok) } @@ -373,7 +399,6 @@ func TestStore_OpsAfterCloseFailWithErrStoreClosed(t *testing.T) { }{ {"Put", func() error { return s.Put(defaultCFName, []byte("k"), []byte("v")) }}, {"Get", func() error { _, _, err := s.Get(defaultCFName, []byte("k")); return err }}, - {"FirstKey", func() error { _, _, err := s.FirstKey(defaultCFName); return err }}, {"LastKey", func() error { _, _, err := s.LastKey(defaultCFName); return err }}, {"Delete", func() error { return s.Delete(defaultCFName, []byte("k")) }}, {"Iterate", func() error { diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/cold_index.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/cold_index.go index b730b9986..70ad7fb78 100644 --- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/cold_index.go +++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/cold_index.go @@ -50,15 +50,9 @@ import ( // hit a slow (*Bitmap).lazyOR path at query time and K≥12 regresses // catastrophically. // -// Two callers produce bitmaps: -// -// - Cold backfill builds a Bitmaps single-threaded via per-event -// events.TermsFor + Bitmaps.AddTo, hands it directly to this -// function. -// - The live-chunk freeze path calls hotStore.Index().Snapshot() to -// materialize a uniquely-owned Bitmaps from the concurrent live -// mirror; that Snapshot Clones each bitmap so this function may -// mutate them freely. +// Both cold backfill and the live-chunk freeze build a Bitmaps single-threaded by +// re-deriving terms from raw LCMs (per-event events.TermsFor + Bitmaps.AddTo) and +// hand it directly here. // // index.hash is the MPHF serialized via buildMPHF. // @@ -133,9 +127,9 @@ func WriteColdIndex(ctx context.Context, chunkID chunk.ID, bitmaps events.Bitmap } var fp [IndexRecordFingerprintLen]byte copy(fp[:], term[:IndexRecordFingerprintLen]) - // Mutate in place — bitmaps is uniquely owned by the caller - // (built single-threaded for cold backfill, or Cloned via - // ConcurrentBitmaps.Snapshot for the live-chunk freeze path). + // Mutate in place — bitmaps is uniquely owned by the caller, built + // single-threaded either way: cold backfill from the .pack, or the freeze + // from the read-only hot DB. bitmap.RunOptimize() entries = append(entries, indexEntry{slot: slot, fp: fp, bitmap: bitmap}) } diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_store.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_store.go index 0b95fc8ef..ea905d0a3 100644 --- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_store.go +++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_store.go @@ -7,24 +7,15 @@ import ( "fmt" "iter" "math" - "os" - "path/filepath" "github.com/RoaringBitmap/roaring/v2" "github.com/linxGnu/grocksdb" - supportlog "github.com/stellar/go-stellar-sdk/support/log" - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/events" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb" ) -// HotDirName is the subdirectory under EventsFullHistoryDataDir that -// contains one DB per active hot chunk (the current_hot_chunk plus -// any chunk currently being frozen). -const HotDirName = "hot" - // Column-family names used inside one chunk's hot RocksDB DB. The // per-Chunk DB directory encodes the chunk ID, so the CF names // themselves carry no chunk suffix. @@ -34,22 +25,6 @@ const ( OffsetsCF = "events_offsets" ) -// HotChunkDir returns the on-disk path of chunkID's per-Chunk hot DB -// rooted at dataDir. -func HotChunkDir(dataDir string, chunkID chunk.ID) string { - return filepath.Join(dataDir, HotDirName, chunkID.String()) -} - -// RemoveHotChunkDir deletes chunkID's hot DB directory. Idempotent — -// returns nil when the directory is already absent. -// -// The caller MUST close chunkID's HotStore before calling this; -// otherwise RocksDB's LOCK file is still held and the on-disk state -// will be inconsistent. -func RemoveHotChunkDir(dataDir string, chunkID chunk.ID) error { - return os.RemoveAll(HotChunkDir(dataDir, chunkID)) -} - // Per-CF tuning for the hot store, passed via rocksdb.Config.PerCFOptions: // // - DataCF holds XDR-encoded event payloads: compressible (zstd @@ -79,26 +54,13 @@ func hotStoreCFOptions() map[string]rocksdb.CFOptions { } } -// openHotChunk opens (or creates) chunkID's per-Chunk hot RocksDB DB -// at HotChunkDir(dataDir, chunkID). The three per-Chunk CFs are -// configured at New so they auto-create on a fresh DB and are -// rediscovered on a reopen. -// -// Unexported: OpenHotStore is the only caller and is the public way -// to open a per-Chunk hot DB (since the warmup step is mandatory -// before the store is usable). -func openHotChunk(dataDir string, chunkID chunk.ID, logger *supportlog.Entry) (*rocksdb.Store, error) { - store, err := rocksdb.New(rocksdb.Config{ - Path: HotChunkDir(dataDir, chunkID), - ColumnFamilies: []string{DataCF, IndexCF, OffsetsCF}, - Logger: logger, - PerCFOptions: hotStoreCFOptions(), - }) - if err != nil { - return nil, fmt.Errorf("events: open hot chunk %s: %w", chunkID, err) - } - return store, nil -} +// CFNames returns the three CFs this facade owns. Exported so the hotchunk +// shared-DB opener can register them alongside the other CFs (decision (a)). +func CFNames() []string { return []string{DataCF, IndexCF, OffsetsCF} } + +// CFOptions returns this facade's per-CF options. Exported so the hotchunk +// opener merges them into the shared per-chunk DB's PerCFOptions. +func CFOptions() map[string]rocksdb.CFOptions { return hotStoreCFOptions() } const ( dataKeyLen = 4 // event_id (chunk encoded by per-Chunk DB directory) @@ -107,47 +69,35 @@ const ( offsetValLen = 4 // per-ledger event count (uint32 BE) ) -// ErrLedgerOutOfRange is returned by IngestLedgerEvents when the +// ErrLedgerOutOfRange is returned by IngestLedgerToBatch when the // supplied ledger sequence falls outside the chunk's [FirstLedger, // LastLedger] window. var ErrLedgerOutOfRange = errors.New("events: ledger outside chunk range") -// ErrLedgerOutOfOrder is returned by IngestLedgerEvents when the +// ErrLedgerOutOfOrder is returned by IngestLedgerToBatch when the // supplied ledger sequence is not the next-expected one. Catches // duplicate ingest of an already-committed ledger as well as gaps // (skipping ahead). Both would silently corrupt the per-ledger // offset chain if not rejected up front. var ErrLedgerOutOfOrder = errors.New("events: ledger out of order") -// HotStore wraps one chunk's hot RocksDB DB plus the in-memory term -// mirror and ledger-offset cache that feed the query path. Reads and -// writes share the same struct; every HotStore owns its chunkStore -// exclusively and Close releases it. +// HotStore wraps one chunk's hot RocksDB DB plus the in-memory term mirror and +// ledger-offset cache that feed the query path. // -// Atomicity model: the per-Chunk DB is the source of truth. -// IngestLedgerEvents commits data + index + offsets to chunkStore in one -// atomic batch and then updates the in-memory mirrors. Warmup on next -// startup reconstructs the mirrors from the chunk's on-disk CFs. +// Atomicity: the per-Chunk DB is the source of truth. IngestLedgerToBatch queues +// data + index + offsets into one atomic batch, then (post-commit) the apply +// hook updates the in-memory mirrors; warmup reconstructs them from the on-disk +// CFs on next startup. // -// Concurrency model: +// Concurrency: // -// - Writes (IngestLedgerEvents) follow a single-writer contract — -// the orchestrator drives ingest from one goroutine per chunk. -// The in-memory mirror and offsets have their own concurrency -// primitives for the single-writer-vs-multi-reader pattern. -// - Reads (Lookup, FetchEvents, All) take NO HotStore-level lock. -// They fast-path-guard via h.chunkStore.IsClosed() and rely on -// the in-memory primitives' internal locks (for the mirror) and -// RocksDB's own thread-safety (for chunkStore). -// - Metadata accessors split by Close behavior: -// ChunkID, NextEventID, Index — infallible, return their cached -// value forever (usable for post-Close logging). -// EventCount, Offsets — return ErrClosed after Close, matching -// the ColdReader and Reader-interface contract. -// - Close delegates to chunkStore.Close, which is itself idempotent -// via rocksdb.Store's own atomic.Bool + CompareAndSwap. The -// in-memory mirror has no separate close step — it is dropped -// implicitly when HotStore is GC'd. +// - Writes (IngestLedgerToBatch) are single-writer (one goroutine per chunk). +// - Reads (Lookup, FetchEvents, All) take NO HotStore-level lock — they guard +// via chunkStore.IsClosed() and rely on the mirror's internal locks and +// RocksDB's thread-safety. +// - Metadata split after the caller-owned store is closed: ChunkID is +// infallible (cached, usable post-close); EventCount and +// Offsets return ErrClosed after close (Reader-interface contract). type HotStore struct { chunkStore *rocksdb.Store chunkID chunk.ID @@ -158,60 +108,31 @@ type HotStore struct { // Compile-time guard: *HotStore satisfies Reader. var _ Reader = (*HotStore)(nil) -// OpenHotStore opens (or creates) chunkID's hot DB at -// HotChunkDir(dataDir, chunkID), warms up the in-memory mirror and -// offsets from disk, and returns a ready-to-use HotStore. The -// returned store owns its chunkStore; Close releases it. -func OpenHotStore( - dataDir string, - chunkID chunk.ID, - logger *supportlog.Entry, -) (*HotStore, error) { - if dataDir == "" { - return nil, errors.New("events: OpenHotStore requires a data dir") - } - if logger == nil { - return nil, errors.New("events: OpenHotStore requires a logger") - } - - chunkStore, err := openHotChunk(dataDir, chunkID, logger) +// NewWithStore wraps an ALREADY-OPEN rocksdb.Store as an events HotStore on the +// three events CFs (CFNames()), running the mandatory warmup to rebuild the +// in-memory mirror + offsets. The store is owned by the caller — in production, +// hotchunk.DB composes this facade over the shared per-chunk DB and closes that +// DB once. The store must have CFNames() registered + CFOptions() applied. +// A warmup failure returns the error WITHOUT closing the caller-owned store. +func NewWithStore(store *rocksdb.Store, chunkID chunk.ID) (*HotStore, error) { + mirror, offsets, err := warmup(store, chunkID) if err != nil { - return nil, err - } - mirror, offsets, err := warmup(chunkStore, chunkID) - if err != nil { - _ = chunkStore.Close() return nil, fmt.Errorf("events: warmup chunk %s: %w", chunkID, err) } return &HotStore{ - chunkStore: chunkStore, + chunkStore: store, chunkID: chunkID, mirror: mirror, offsets: offsets, }, nil } -// Close releases the underlying chunk store. Idempotent — delegates -// to chunkStore.Close, which is itself idempotent via its own -// atomic.Bool + CompareAndSwap. The in-memory mirror is dropped -// implicitly when HotStore is GC'd. -// -// Concurrency: must not be called concurrently with in-flight read -// methods on the same HotStore (Lookup, FetchEvents, All). Callers -// drain those reads before invoking Close. The single-writer ingest -// contract means there is no concurrent IngestLedgerEvents call to -// race with either; chunkStore's IsClosed check inside -// IngestLedgerEvents fast-fails any post-Close ingest attempt. -func (h *HotStore) Close() error { - return h.chunkStore.Close() -} - // ChunkID returns the chunk this store serves. func (h *HotStore) ChunkID() chunk.ID { return h.chunkID } // EventCount is the total number of events committed to this Chunk -// so far. Equal to the next event-id IngestLedgerEvents would assign. -// Returns (0, ErrClosed) after Close. The Reader interface signature +// so far. Equal to the next event-id IngestLedgerToBatch would assign. +// Returns (0, ErrClosed) after the caller-owned store is closed. The Reader interface signature // is fallible to accommodate ColdReader's lazy metadata load; on the // hot side the value is always live and the error is only ErrClosed. func (h *HotStore) EventCount() (uint32, error) { @@ -221,20 +142,13 @@ func (h *HotStore) EventCount() (uint32, error) { return h.offsets.TotalEvents(), nil } -// NextEventID is the next chunk-relative event ID IngestLedgerEvents -// will assign. Returns the same value as EventCount on the hot side -// and is exposed under both names for the ingest-side and reader-side -// mental models. Infallible at the type level (hot-only API, not on -// the Reader interface). -func (h *HotStore) NextEventID() uint32 { return h.offsets.TotalEvents() } - // Offsets returns a point-in-time view of the ledger-offset cache. // The coordinator uses this to stitch a multi-ledger query range // into chunk-relative event-id ranges (see Reader.Offsets). // // Implementation: returns a *LedgerOffsets sharing the live // backing array, capped at the count visible at call time -// (~24-byte allocation per Query). Concurrent IngestLedgerEvents +// (~24-byte allocation per Query). A concurrent IngestLedgerToBatch // may extend the backing past the cap, but the returned view's // slice stays bounded to what was visible when Offsets returned. // Callers (Query) take the view once at entry and pass it through @@ -244,7 +158,7 @@ func (h *HotStore) NextEventID() uint32 { return h.offsets.TotalEvents() } // with the live backing array. Calling Append on the view would // silently fork it from the live data; the contract is read-only. // -// Returns (nil, ErrClosed) after Close. +// Returns (nil, ErrClosed) after the caller-owned store is closed. func (h *HotStore) Offsets() (*events.LedgerOffsets, error) { if h.chunkStore.IsClosed() { return nil, ErrClosed @@ -252,13 +166,6 @@ func (h *HotStore) Offsets() (*events.LedgerOffsets, error) { return h.offsets.View(), nil } -// Index returns the in-memory term mirror. Used by the freezer to -// snapshot every (events.TermKey, bitmap) pair into WriteColdIndex -// without rebuilding from RocksDB. Callers should typically call -// h.Index().Snapshot() to get a uniquely owned Bitmaps for -// serialization. -func (h *HotStore) Index() *events.ConcurrentBitmaps { return h.mirror } - // Lookup returns the bitmap of event IDs in this Chunk that match // the given term. The returned bitmap is an immutable snapshot of // the live mirror — writers publish new pointers via atomic.Store @@ -266,7 +173,7 @@ func (h *HotStore) Index() *events.ConcurrentBitmaps { return h.mirror } // bitmap. Callers MUST NOT mutate it themselves. See Reader.Lookup // and ConcurrentBitmaps.Get for the full contract. Returns // (nil, ErrTermNotFound) when the term has no matching events. -// Returns (nil, ErrClosed) after Close. +// Returns (nil, ErrClosed) after the caller-owned store is closed. // // ctx is checked as a fast guard but the hot path does no blocking // I/O — the bitmap comes from the in-memory mirror. @@ -336,7 +243,7 @@ func (h *HotStore) LookupKeys(ctx context.Context, keys []events.TermKey) ([]*ro // RocksDB also has them. A miss indicates corruption or a // writer/reader mismatch, not a normal not-found case. // -// After Close, returns ErrClosed. +// After the caller-owned store is closed, returns ErrClosed. func (h *HotStore) FetchEvents(ctx context.Context, eventIDs []uint32) ([]events.Payload, error) { if h.chunkStore.IsClosed() { return nil, ErrClosed @@ -392,7 +299,7 @@ func (h *HotStore) FetchEvents(ctx context.Context, eventIDs []uint32) ([]events // Yielded Payloads are borrowed: ContractEventBytes aliases the iteration // buffer and is valid only until the next step — clone to retain. // -// After Close, yields (zero Payload, ErrClosed) and stops. +// After the caller-owned store is closed, yields (zero Payload, ErrClosed) and stops. // ctx is checked at entry and between iterator steps — // rocksdb.Store.IterateRange does not itself accept a ctx, so a // very slow Next() can block past a cancellation until the next @@ -400,11 +307,11 @@ func (h *HotStore) FetchEvents(ctx context.Context, eventIDs []uint32) ([]events // // Out-of-range arguments yield an error and stop: // - count == 0 is a natural no-op (no yields). -// - start+count > NextEventID (overflow-safe via uint64) yields a -// wrapped out-of-bounds error. +// - start+count > the committed event count (overflow-safe via uint64) +// yields a wrapped out-of-bounds error. // - A short scan (fewer DataCF rows than count) yields a wrapped // error after the partial stream — the CF should be dense in -// [0, NextEventID), so a hole indicates corruption. +// [0, committed count), so a hole indicates corruption. func (h *HotStore) FetchRange(ctx context.Context, start, count uint32) iter.Seq2[events.Payload, error] { return func(yield func(events.Payload, error) bool) { if h.chunkStore.IsClosed() { @@ -418,7 +325,7 @@ func (h *HotStore) FetchRange(ctx context.Context, start, count uint32) iter.Seq if count == 0 { return } - if err := validateFetchRange(start, count, h.NextEventID(), h.chunkID); err != nil { + if err := validateFetchRange(start, count, h.offsets.TotalEvents(), h.chunkID); err != nil { yield(events.Payload{}, err) return } @@ -463,16 +370,16 @@ func (h *HotStore) FetchRange(ctx context.Context, start, count uint32) iter.Seq // ColdWriter without buffering. Thin wrapper over FetchRange; its // yielded Payloads are likewise borrowed (valid only for the step). // -// NextEventID is read inside the returned closure body, so a +// The committed event count is read inside the returned closure body, so a // concurrent ingest between r.All(ctx) returning the Seq2 and the // consumer's first range step is included in the snapshot. // -// After Close, yields (zero Payload, ErrClosed) and stops. +// After the caller-owned store is closed, yields (zero Payload, ErrClosed) and stops. func (h *HotStore) All(ctx context.Context) iter.Seq2[events.Payload, error] { return func(yield func(events.Payload, error) bool) { // FetchRange stops iterating after yielding an error; we // just forward whatever it yields and exit on the same step. - for p, err := range h.FetchRange(ctx, 0, h.NextEventID()) { + for p, err := range h.FetchRange(ctx, 0, h.offsets.TotalEvents()) { if !yield(p, err) { return } @@ -480,138 +387,104 @@ func (h *HotStore) All(ctx context.Context) iter.Seq2[events.Payload, error] { } } -// IngestLedgerEvents commits one ledger's events to the chunk store -// atomically and then updates the in-memory mirrors. +// IngestLedgerToBatch validates one ledger's events, marshals them, and queues +// their CF Puts into the SHARED batch b, returning the post-commit apply hook the +// caller runs AFTER b commits (decision (a)). Validation + term derivation happen +// before any Put; on any error Store.Batch discards the whole WriteBatch, so a +// rejected ledger never leaves committed rows behind. // // payloads is produced by events.LCMViewToPayloads, which emits each ledger's -// events in ascending getEvents cursor order — write order here IS the -// cursor contract (event IDs are assigned by arrival position). Terms are -// derived internally via events.TermsForBytes on each payload's -// ContractEventBytes. -// -// Sequence validation is performed up front, before any RocksDB -// write or mirror mutation: -// -// - ledgerSeq must lie within [chunkID.FirstLedger(), -// chunkID.LastLedger()] — out-of-range returns ErrLedgerOutOfRange. -// - ledgerSeq == the next expected ledger (StartLedger + LedgerCount) -// is appended normally. -// - ledgerSeq < expected (an already-ingested ledger) is an idempotent -// no-op returning nil, so a restarted ingester can blindly re-deliver -// the in-flight ledger; the re-delivered events are not re-verified. -// - ledgerSeq > expected (a gap) returns ErrLedgerOutOfOrder. -// -// A rejected call (out-of-range or gap) completes its checks before -// marshaling, leaving the chunk store and in-memory mirrors untouched. -// -// Post-batch atomicity: once the RocksDB batch commits, the in-memory -// mirror + offsets updates are infallible by construction. Any -// failure there panics rather than returning an error, because a -// returned error would leave on-disk state ahead of in-memory state -// with no clean recovery short of close + reopen. -// -//nolint:cyclop // sequential pipeline: validate -> marshal -> batch -> mirror updates -func (h *HotStore) IngestLedgerEvents(ledgerSeq uint32, payloads []events.Payload) error { - if h.chunkStore.IsClosed() { - return ErrClosed - } - - // Validate ledger sequence BEFORE any disk write or mirror mutation. - // Failing the offsets.Append check after the RocksDB batch has - // committed would leave events orphaned under a bad ledger key. +// events in ascending getEvents cursor order — write order here IS the cursor +// contract (event IDs are assigned by arrival position). Terms are derived via +// events.TermsForBytes on each payload's ContractEventBytes. +// +// Sequence validation, before any Put or mirror mutation: +// +// - ledgerSeq must lie within [chunkID.FirstLedger(), chunkID.LastLedger()] — +// out-of-range returns ErrLedgerOutOfRange. +// - ledgerSeq must equal the next expected ledger (StartLedger + LedgerCount). +// Under decision (a) resume is always MaxCommittedSeq+1, so a non-expected +// ledger is a mis-sequencing source (the ingestion loop's seq guard should +// have caught it) — an error (ErrLedgerOutOfOrder), never silent tolerance. +// +// Post-batch atomicity: once the batch commits, the apply hook's in-memory +// mirror + offsets updates are infallible by construction. Any failure there +// panics rather than returning an error, because a returned error would leave +// on-disk state ahead of in-memory state with no clean recovery short of +// close + reopen. +func (h *HotStore) IngestLedgerToBatch( + b *rocksdb.BatchWriter, ledgerSeq uint32, payloads []events.Payload, +) (func(), error) { + // Validate BEFORE any Put. On error Store.Batch discards the whole WriteBatch, + // so a mid-loop failure never orphans rows — no separate staging buffer needed. if ledgerSeq < h.chunkID.FirstLedger() || ledgerSeq > h.chunkID.LastLedger() { - return fmt.Errorf("%w: ledger %d not in chunk %s [%d, %d]", + return nil, fmt.Errorf("%w: ledger %d not in chunk %s [%d, %d]", ErrLedgerOutOfRange, ledgerSeq, h.chunkID, h.chunkID.FirstLedger(), h.chunkID.LastLedger()) } expected := h.offsets.StartLedger() + uint32(h.offsets.LedgerCount()) //nolint:gosec - if ledgerSeq < expected { - // Already ingested: idempotent retry no-op. A restarted ingester - // can blindly re-deliver an already-committed ledger; drop it - // rather than erroring or double-appending. The re-delivered - // events are not re-verified, so a re-delivery carrying different - // events for an already-ingested ledger is silently ignored. - return nil - } - if ledgerSeq > expected { - return fmt.Errorf("%w: expected ledger %d, got %d", + if ledgerSeq != expected { + return nil, fmt.Errorf("%w: expected ledger %d, got %d", ErrLedgerOutOfOrder, expected, ledgerSeq) } - // Pre-derive term keys per payload so the post-commit mirror - // update doesn't re-hash. Surfacing TermsForBytes errors here - // (pre-batch) cleanly rejects the ledger commit without touching disk — - // a decode failure on stellar-core-validated XDR is a corruption - // signal worth aborting on. + // Derive term keys per payload up front (a TermsForBytes error rejects the + // ledger without any Put) and retain them for the post-commit mirror update. termKeys := make([][]events.TermKey, len(payloads)) for i := range payloads { keys, err := events.TermsForBytes(payloads[i].ContractEventBytes) if err != nil { - return fmt.Errorf("events: derive terms for payload %d in ledger %d: %w", i, ledgerSeq, err) + return nil, fmt.Errorf("derive terms for payload %d in ledger %d: %w", i, ledgerSeq, err) } termKeys[i] = keys } startID := h.offsets.TotalEvents() if uint64(startID)+uint64(len(payloads)) > math.MaxUint32 { - return fmt.Errorf("events: chunk %s would overflow uint32 event-id space at ledger %d", + return nil, fmt.Errorf("chunk %s would overflow uint32 event-id space at ledger %d", h.chunkID, ledgerSeq) } - // Atomic batch on the per-Chunk DB. Each payload is marshaled into one - // reused scratch buffer: BatchWriter.Put copies the value into the write - // batch synchronously, so the scratch is free to reuse on the next - // iteration — no per-payload allocation. A marshal error returns from - // the callback, which aborts the batch so nothing commits. + // Marshal + queue each event directly into b. BatchWriter.Put copies + // synchronously, so ONE reused scratch buffer serves every event — the caller + // opens exactly one batch per ledger, so no row must outlive this call. var scratch []byte - err := h.chunkStore.Batch(func(b *rocksdb.BatchWriter) error { - for i := range payloads { - eventID := startID + uint32(i) - blob, err := payloads[i].MarshalInto(scratch[:0]) - if err != nil { - return fmt.Errorf("events: marshal payload %d for ledger %d: %w", i, ledgerSeq, err) - } - scratch = blob - b.Put(DataCF, encodeDataKey(eventID), blob) - for _, key := range termKeys[i] { - b.Put(IndexCF, encodeIndexKey(key, eventID), nil) - } + for i := range payloads { + blob, err := payloads[i].MarshalInto(scratch[:0]) + if err != nil { + return nil, fmt.Errorf("marshal payload %d for ledger %d: %w", i, ledgerSeq, err) } - // On-disk shape matches the in-memory API: per-ledger event - // count, not cumulative. Warmup replays directly via - // offsets.Append(eventCount) — no delta arithmetic. - //nolint:gosec // bounds-checked above - eventCount := uint32(len(payloads)) - b.Put(OffsetsCF, encodeOffsetKey(ledgerSeq), encodeLedgerEventCount(eventCount)) - return nil - }) - if err != nil { - return fmt.Errorf("events: commit ledger %d to chunk %s: %w", ledgerSeq, h.chunkID, err) - } - - // Phase 3: the batch is durable — apply it to the in-memory cache. - // Infallible given the validation above (ledgerSeq == expected and - // in-chunk, single writer): mirror.AddTo cannot fail and offsets.Append - // appends at the already-validated next slot, so the only - // non-completion is a crash, after which warmup rebuilds the cache from - // disk. - // - // Ordering invariant: mirror BEFORE offsets. A concurrent Query - // that captures offsets via h.offsets.Snapshot() then later calls - // mirror.Get for the same key sees either the previous state - // (offsets count N-1, mirror without ledger-N events) or a - // consistent later one (offsets count ≥N, mirror with ledger-N - // events). Reversing the order would let a reader observe an - // offsets count that includes IDs the mirror hasn't published - // yet — Query would then ask FetchEvents for IDs not yet - // indexed; the bitmap intersection would simply miss them, with - // no error surface. - // - // Batch by key so each ConcurrentBitmaps.AddTo call clones at most - // once per (key, ledger), not once per (key, event). For popular - // terms that receive many events in one ledger this turns N COW - // clones into 1. Initial capacity 64 ≈ a few × unique-terms per - // typical ledger; the map grows correctly past that. + scratch = blob + eventID := startID + uint32(i) + b.Put(DataCF, encodeDataKey(eventID), blob) + for _, key := range termKeys[i] { + b.Put(IndexCF, encodeIndexKey(key, eventID), nil) + } + } + //nolint:gosec // len bounded by the overflow guard above + b.Put(OffsetsCF, encodeOffsetKey(ledgerSeq), encodeLedgerEventCount(uint32(len(payloads)))) + + return func() { h.applyLedger(startID, termKeys) }, nil +} + +// index returns the in-memory term mirror. Test-only write hook: no production +// path reads it. Kept unexported until #772 decides whether the v2 read path +// hooks into it. +func (h *HotStore) index() *events.ConcurrentBitmaps { return h.mirror } + +// applyLedger updates the mirror + offsets for a ledger whose rows are durable. +// Infallible by construction (IngestLedgerToBatch validated seq under the +// single-writer contract); the only non-completion is a crash, after which warmup +// rebuilds. +// +// Ordering invariant: mirror BEFORE offsets. A concurrent Query that snapshots +// offsets then reads the mirror must see either the prior state or a consistent +// later one. Reversing it would let a reader see an offsets count including IDs +// the mirror hasn't published — FetchEvents would then miss them, silently. +func (h *HotStore) applyLedger(startID uint32, termKeys [][]events.TermKey) { + // Batch by key so each AddTo clones at most once per (key, ledger), not per + // (key, event) — turns N COW clones into 1 for popular terms. Cap 64 ≈ a few + // × unique-terms per ledger; the map grows past that. perKeyIDs := make(map[events.TermKey][]uint32, 64) for i, keys := range termKeys { eventID := startID + uint32(i) @@ -622,14 +495,13 @@ func (h *HotStore) IngestLedgerEvents(ledgerSeq uint32, payloads []events.Payloa for key, ids := range perKeyIDs { h.mirror.AddTo(key, ids...) } - //nolint:gosec // len bounded by the overflow check above - h.offsets.Append(uint32(len(payloads))) - return nil + //nolint:gosec // len bounded by IngestLedgerToBatch's overflow guard + h.offsets.Append(uint32(len(termKeys))) } // ────────────────────────────────────────────────────────────────── // Warmup — reconstructs the in-memory mirror + offsets from the -// per-Chunk DB's on-disk CFs. Called only by OpenHotStore. +// per-Chunk DB's on-disk CFs. Called by NewWithStore. // ────────────────────────────────────────────────────────────────── // warmup rebuilds the in-memory mirrors for chunkID by prefix-scanning diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_store_test.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_store_test.go index ea5d3ce7d..bda698c7f 100644 --- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_store_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_store_test.go @@ -7,6 +7,7 @@ import ( "errors" "fmt" "iter" + "path/filepath" "sync" "testing" @@ -19,6 +20,7 @@ import ( "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/events" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb" ) // silentLogger returns a logger whose output is buffered into an @@ -37,6 +39,7 @@ func silentLogger() *supportlog.Entry { type hotStoreHarness struct { dataDir string store *HotStore + raw *rocksdb.Store } // openHotStoreForTest opens a fresh per-Chunk hot DB for chunkID @@ -48,11 +51,39 @@ func openHotStoreForTest(t *testing.T, chunkID chunk.ID) *hotStoreHarness { t.Helper() dir := t.TempDir() - hot, err := OpenHotStore(dir, chunkID, silentLogger()) + hot, raw := openHotStoreForTestAt(t, dir, chunkID) + return &hotStoreHarness{dataDir: dir, store: hot, raw: raw} +} + +func openHotStoreForTestAt(t *testing.T, dir string, chunkID chunk.ID) (*HotStore, *rocksdb.Store) { + t.Helper() + hot, raw, err := tryOpenHotStoreForTest(t, dir, chunkID) require.NoError(t, err) - t.Cleanup(func() { _ = hot.Close() }) + return hot, raw +} + +func tryOpenHotStoreForTest(t *testing.T, dir string, chunkID chunk.ID) (*HotStore, *rocksdb.Store, error) { + t.Helper() + raw := openRawHotChunkForTest(t, dir, chunkID) + hot, err := NewWithStore(raw, chunkID) + if err != nil { + _ = raw.Close() + return nil, nil, err + } + t.Cleanup(func() { _ = raw.Close() }) + return hot, raw, nil +} - return &hotStoreHarness{dataDir: dir, store: hot} +func openRawHotChunkForTest(t *testing.T, dir string, chunkID chunk.ID) *rocksdb.Store { + t.Helper() + raw, err := rocksdb.New(rocksdb.Config{ + Path: filepath.Join(dir, chunkID.String()), + ColumnFamilies: CFNames(), + Logger: silentLogger(), + PerCFOptions: CFOptions(), + }) + require.NoError(t, err) + return raw } func makePayload(symbol string) (events.Payload, []events.TermKey) { @@ -105,23 +136,12 @@ func dataSym(t *testing.T, p events.Payload) string { return string(*eventOf(p).Body.V0.Data.Sym) } -func TestOpenHotStore_RequiresDataDirAndLogger(t *testing.T) { - dir := t.TempDir() - - _, err := OpenHotStore("", 0, silentLogger()) - require.Error(t, err, "missing dataDir") - - _, err = OpenHotStore(dir, 0, nil) - require.Error(t, err, "missing logger") -} - func TestHotStore_FreshChunkHasEmptyState(t *testing.T) { const chunkID = chunk.ID(0) h := openHotStoreForTest(t, chunkID) assert.Equal(t, chunkID, h.store.ChunkID()) assert.Equal(t, uint32(0), mustEventCount(t, h.store)) - assert.Equal(t, uint32(0), h.store.NextEventID()) assert.Equal(t, chunkID.FirstLedger(), mustOffsets(t, h.store).StartLedger()) } @@ -130,7 +150,7 @@ func TestHotStore_IngestLedgerWritesAllCFs(t *testing.T) { h := openHotStoreForTest(t, chunkID) p, keys := makePayload("transfer") - require.NoError(t, h.store.IngestLedgerEvents(2, []events.Payload{p})) + require.NoError(t, ingestLedgerEvents(h.store, 2, []events.Payload{p})) // events_data row exists. got, found, err := h.store.chunkStore.Get(DataCF, encodeDataKey(0)) @@ -159,7 +179,7 @@ func TestHotStore_IngestLedgerWritesAllCFs(t *testing.T) { require.NotNil(t, bm) assert.True(t, bm.Contains(0)) - assert.Equal(t, uint32(1), h.store.NextEventID()) + assert.Equal(t, uint32(1), mustEventCount(t, h.store)) } func TestHotStore_EventIDsAreMonotonic(t *testing.T) { @@ -169,24 +189,24 @@ func TestHotStore_EventIDsAreMonotonic(t *testing.T) { p1, _ := makePayload("a") p2, _ := makePayload("b") - require.NoError(t, h.store.IngestLedgerEvents(first, []events.Payload{p1, p2})) + require.NoError(t, ingestLedgerEvents(h.store, first, []events.Payload{p1, p2})) p3, _ := makePayload("c") - require.NoError(t, h.store.IngestLedgerEvents(first+1, []events.Payload{p3})) + require.NoError(t, ingestLedgerEvents(h.store, first+1, []events.Payload{p3})) for id := range uint32(3) { _, found, err := h.store.chunkStore.Get(DataCF, encodeDataKey(id)) require.NoError(t, err) assert.True(t, found, "missing event id %d", id) } - assert.Equal(t, uint32(3), h.store.NextEventID()) + assert.Equal(t, uint32(3), mustEventCount(t, h.store)) } func TestHotStore_EmptyLedgerStillWritesOffsetsAndState(t *testing.T) { const chunkID = chunk.ID(0) h := openHotStoreForTest(t, chunkID) - require.NoError(t, h.store.IngestLedgerEvents(2, nil)) + require.NoError(t, ingestLedgerEvents(h.store, 2, nil)) val, found, err := h.store.chunkStore.Get(OffsetsCF, encodeOffsetKey(2)) require.NoError(t, err) @@ -209,7 +229,7 @@ func TestHotStore_LookupReturnsImmutableSnapshot(t *testing.T) { // Promote to dense mode so we exercise the bm.Load path (sparse // mode allocates a fresh bitmap per Get). for i := range uint32(70) { - require.NoError(t, h.store.IngestLedgerEvents(2+i, []events.Payload{p})) + require.NoError(t, ingestLedgerEvents(h.store, 2+i, []events.Payload{p})) } first, err := h.store.Lookup(context.Background(), keys[0]) @@ -218,7 +238,7 @@ func TestHotStore_LookupReturnsImmutableSnapshot(t *testing.T) { // New ingest publishes a new snapshot. The old pointer must // remain unchanged (it's the previous snapshot). - require.NoError(t, h.store.IngestLedgerEvents(72, []events.Payload{p})) + require.NoError(t, ingestLedgerEvents(h.store, 72, []events.Payload{p})) assert.Equal(t, cardBefore, first.GetCardinality(), "prior Lookup result must be an immutable snapshot — later IngestLedgerEvents must not mutate it") @@ -235,9 +255,9 @@ func TestHotStore_FetchEventsRoundTrip(t *testing.T) { p1, _ := makePayload("a") p2, _ := makePayload("b") - require.NoError(t, h.store.IngestLedgerEvents(2, []events.Payload{p1, p2})) + require.NoError(t, ingestLedgerEvents(h.store, 2, []events.Payload{p1, p2})) p3, _ := makePayload("c") - require.NoError(t, h.store.IngestLedgerEvents(3, []events.Payload{p3})) + require.NoError(t, ingestLedgerEvents(h.store, 3, []events.Payload{p3})) fetched, err := h.store.FetchEvents(context.Background(), []uint32{0, 1, 2}) require.NoError(t, err) @@ -251,7 +271,7 @@ func TestHotStore_FetchEventsErrorsOnMissingID(t *testing.T) { const chunkID = chunk.ID(0) h := openHotStoreForTest(t, chunkID) p, _ := makePayload("only") - require.NoError(t, h.store.IngestLedgerEvents(2, []events.Payload{p})) + require.NoError(t, ingestLedgerEvents(h.store, 2, []events.Payload{p})) _, err := h.store.FetchEvents(context.Background(), []uint32{99}) assert.Error(t, err) @@ -272,7 +292,7 @@ func TestHotStore_FetchEventsLargeBatch(t *testing.T) { p, _ := makePayload(fmt.Sprintf("evt-%03d", i)) payloads[i] = p } - require.NoError(t, h.store.IngestLedgerEvents(2, payloads)) + require.NoError(t, ingestLedgerEvents(h.store, 2, payloads)) ids := make([]uint32, n) for i := range n { @@ -297,7 +317,7 @@ func TestHotStore_FetchEventsHonorsContext(t *testing.T) { const chunkID = chunk.ID(0) h := openHotStoreForTest(t, chunkID) p, _ := makePayload("only") - require.NoError(t, h.store.IngestLedgerEvents(2, []events.Payload{p})) + require.NoError(t, ingestLedgerEvents(h.store, 2, []events.Payload{p})) ctx, cancel := context.WithCancel(context.Background()) cancel() @@ -320,7 +340,7 @@ func TestHotStore_FetchEventsRejectsUnsortedInput(t *testing.T) { p2.LedgerSequence = 2 p3, _ := makePayload("c") p3.LedgerSequence = 2 - require.NoError(t, h.store.IngestLedgerEvents(2, []events.Payload{p1, p2, p3})) + require.NoError(t, ingestLedgerEvents(h.store, 2, []events.Payload{p1, p2, p3})) _, err := h.store.FetchEvents(context.Background(), []uint32{2, 0}) require.ErrorIs(t, err, ErrUnsortedEventIDs, "out-of-order input must error") @@ -336,10 +356,10 @@ func TestHotStore_AllStreamsInEventIDOrder(t *testing.T) { p1.LedgerSequence = 2 p2, _ := makePayload("b") p2.LedgerSequence = 2 - require.NoError(t, h.store.IngestLedgerEvents(2, []events.Payload{p1, p2})) + require.NoError(t, ingestLedgerEvents(h.store, 2, []events.Payload{p1, p2})) p3, _ := makePayload("c") p3.LedgerSequence = 3 - require.NoError(t, h.store.IngestLedgerEvents(3, []events.Payload{p3})) + require.NoError(t, ingestLedgerEvents(h.store, 3, []events.Payload{p3})) got := make([]string, 0, 3) gotLedgers := make([]uint32, 0, 3) @@ -364,8 +384,8 @@ func TestHotStore_AllEmptyChunkYieldsNothing(t *testing.T) { func TestHotStore_CloseRejectsWrites(t *testing.T) { h := openHotStoreForTest(t, 0) - require.NoError(t, h.store.Close()) - err := h.store.IngestLedgerEvents(2, nil) + require.NoError(t, h.raw.Close()) + err := ingestLedgerEvents(h.store, 2, nil) assert.ErrorIs(t, err, ErrClosed) } @@ -379,8 +399,8 @@ func TestHotStore_PostCloseReadsError(t *testing.T) { h := openHotStoreForTest(t, chunkID) p, keys := makePayload("seed") - require.NoError(t, h.store.IngestLedgerEvents(chunkID.FirstLedger(), []events.Payload{p})) - require.NoError(t, h.store.Close()) + require.NoError(t, ingestLedgerEvents(h.store, chunkID.FirstLedger(), []events.Payload{p})) + require.NoError(t, h.raw.Close()) // Lookup must error rather than silently returning the cached bitmap. bm, err := h.store.Lookup(context.Background(), keys[0]) @@ -403,45 +423,45 @@ func TestHotStore_PostCloseReadsError(t *testing.T) { require.ErrorIs(t, err, ErrClosed) } -// TestHotStore_IngestLedgerEvents_DuplicateLedgerIsNoOp pins the -// idempotency contract: re-ingesting an already-committed ledger is a -// no-op (returns nil) that leaves state untouched — it neither advances -// eventID/offsets nor writes the re-delivered payload, and the original -// ledger's events remain intact. A restarted ingester can blindly -// re-deliver the in-flight ledger. -func TestHotStore_IngestLedgerEvents_DuplicateLedgerIsNoOp(t *testing.T) { +// TestHotStore_IngestLedgerEvents_DuplicateLedgerErrors pins the sequencing +// contract after the staging collapse (#30): re-ingesting an already-committed +// ledger is NOT a silent no-op — it is a mis-sequencing error (ErrLedgerOutOfOrder) +// that leaves state untouched (Store.Batch discards the WriteBatch on the error). +// Under decision (a) the ingestion loop always resumes at MaxCommittedSeq+1 and +// the shared cursor validates contiguity, so a duplicate can only mean a broken +// source — an error, never silent tolerance. +func TestHotStore_IngestLedgerEvents_DuplicateLedgerErrors(t *testing.T) { const chunkID = chunk.ID(0) h := openHotStoreForTest(t, chunkID) first := chunkID.FirstLedger() p1, _ := makePayload("a") - require.NoError(t, h.store.IngestLedgerEvents(first, []events.Payload{p1})) + require.NoError(t, ingestLedgerEvents(h.store, first, []events.Payload{p1})) countBefore := mustEventCount(t, h.store) - nextBefore := h.store.NextEventID() - // Re-ingesting the same ledger is an idempotent no-op. + // Re-ingesting the same ledger errors (expected is now first+1). p2, _ := makePayload("b") - require.NoError(t, h.store.IngestLedgerEvents(first, []events.Payload{p2})) + err := ingestLedgerEvents(h.store, first, []events.Payload{p2}) + require.ErrorIs(t, err, ErrLedgerOutOfOrder, "a re-delivered committed ledger must error, not no-op") - assert.Equal(t, countBefore, mustEventCount(t, h.store), "EventCount must not advance on duplicate ingest") - assert.Equal(t, nextBefore, h.store.NextEventID(), "NextEventID must not advance on duplicate ingest") + assert.Equal(t, countBefore, mustEventCount(t, h.store), "event count must not advance on the rejected ingest") - // The original ledger's event is untouched (not overwritten by p2). + // The original ledger's event is untouched, and the rejected batch committed + // nothing (Store.Batch discards the WriteBatch on the callback error). got, err := h.store.FetchEvents(context.Background(), []uint32{0}) require.NoError(t, err) require.Len(t, got, 1) - assert.Equal(t, "a", dataSym(t, got[0]), "original event must survive the no-op") + assert.Equal(t, "a", dataSym(t, got[0]), "original event must survive the rejected re-ingest") - // The dropped payload must not reach the mirror. makePayload emits + // The rejected payload must not reach the mirror. makePayload emits // [contractID, topic0, ...]; contractID is shared across symbols - // (hardcoded 0xab), so we check topic0 (index 1), which is - // symbol-specific. + // (hardcoded 0xab), so we check topic0 (index 1), which is symbol-specific. _, secondKeys := makePayload("b") require.GreaterOrEqual(t, len(secondKeys), 2, "test fixture expected to have a topic0 term") bm, lookupErr := h.store.Lookup(context.Background(), secondKeys[1]) require.ErrorIs(t, lookupErr, ErrTermNotFound, - "the no-op'd payload's topic0 term must not appear in the mirror") + "the rejected payload's topic0 term must not appear in the mirror") assert.Nil(t, bm) } @@ -453,18 +473,16 @@ func TestHotStore_IngestLedgerEvents_RejectsLedgerGap(t *testing.T) { first := chunkID.FirstLedger() p1, _ := makePayload("a") - require.NoError(t, h.store.IngestLedgerEvents(first, []events.Payload{p1})) + require.NoError(t, ingestLedgerEvents(h.store, first, []events.Payload{p1})) countBefore := mustEventCount(t, h.store) - nextBefore := h.store.NextEventID() // Skip first+1; jump directly to first+2. p2, _ := makePayload("c") - err := h.store.IngestLedgerEvents(first+2, []events.Payload{p2}) + err := ingestLedgerEvents(h.store, first+2, []events.Payload{p2}) require.ErrorIs(t, err, ErrLedgerOutOfOrder) assert.Equal(t, countBefore, mustEventCount(t, h.store)) - assert.Equal(t, nextBefore, h.store.NextEventID()) } // TestHotStore_IngestLedgerEvents_RejectsOutOfRangeLedger pins the @@ -476,22 +494,21 @@ func TestHotStore_IngestLedgerEvents_RejectsOutOfRangeLedger(t *testing.T) { p, _ := makePayload("a") // Below range (chunk 0's FirstLedger is FirstLedgerSeq == 2). - err := h.store.IngestLedgerEvents(1, []events.Payload{p}) + err := ingestLedgerEvents(h.store, 1, []events.Payload{p}) require.ErrorIs(t, err, ErrLedgerOutOfRange, "ledger below chunk range") // Above range — well past chunk 0's LastLedger. - err = h.store.IngestLedgerEvents(chunkID.LastLedger()+1, []events.Payload{p}) + err = ingestLedgerEvents(h.store, chunkID.LastLedger()+1, []events.Payload{p}) require.ErrorIs(t, err, ErrLedgerOutOfRange, "ledger above chunk range") // State must be unchanged after both rejections. assert.Equal(t, uint32(0), mustEventCount(t, h.store)) - assert.Equal(t, uint32(0), h.store.NextEventID()) } func TestHotStore_CloseIsIdempotent(t *testing.T) { h := openHotStoreForTest(t, 0) - require.NoError(t, h.store.Close()) - assert.NoError(t, h.store.Close()) + require.NoError(t, h.raw.Close()) + assert.NoError(t, h.raw.Close()) } func TestHotStore_ReopenRecoversState(t *testing.T) { @@ -501,21 +518,18 @@ func TestHotStore_ReopenRecoversState(t *testing.T) { const chunkID = chunk.ID(0) dir := t.TempDir() - hot1, err := OpenHotStore(dir, chunkID, silentLogger()) - require.NoError(t, err) + hot1, raw1 := openHotStoreForTestAt(t, dir, chunkID) p1, _ := makePayload("before") - require.NoError(t, hot1.IngestLedgerEvents(2, []events.Payload{p1})) - require.NoError(t, hot1.Close()) + require.NoError(t, ingestLedgerEvents(hot1, 2, []events.Payload{p1})) + require.NoError(t, raw1.Close()) - hot2, err := OpenHotStore(dir, chunkID, silentLogger()) - require.NoError(t, err) - t.Cleanup(func() { _ = hot2.Close() }) + hot2, _ := openHotStoreForTestAt(t, dir, chunkID) - assert.Equal(t, uint32(1), hot2.NextEventID(), "warmup recovered offsets") + assert.Equal(t, uint32(1), mustEventCount(t, hot2), "warmup recovered offsets") p2, _ := makePayload("after") - require.NoError(t, hot2.IngestLedgerEvents(3, []events.Payload{p2})) - assert.Equal(t, uint32(2), hot2.NextEventID()) + require.NoError(t, ingestLedgerEvents(hot2, 3, []events.Payload{p2})) + assert.Equal(t, uint32(2), mustEventCount(t, hot2)) } func TestHotStore_SatisfiesReader(t *testing.T) { @@ -542,7 +556,7 @@ func TestHotStore_ConcurrentIngestAndLookup(t *testing.T) { go func() { defer wg.Done() for i := range uint32(N) { - if err := h.store.IngestLedgerEvents(2+i, []events.Payload{p}); err != nil { + if err := ingestLedgerEvents(h.store, 2+i, []events.Payload{p}); err != nil { t.Errorf("ingest %d: %v", i, err) return } @@ -560,7 +574,7 @@ func TestHotStore_ConcurrentIngestAndLookup(t *testing.T) { } }() wg.Wait() - assert.Equal(t, uint32(N), h.store.NextEventID()) + assert.Equal(t, uint32(N), mustEventCount(t, h.store)) } // fetchRangePayloads fully drains FetchRange into a slice for tests @@ -608,7 +622,7 @@ func TestHotStore_FetchRangeMidRange(t *testing.T) { p, _ := makePayload(fmt.Sprintf("evt-%d", i)) payloads[i] = p } - require.NoError(t, h.store.IngestLedgerEvents(first, payloads)) + require.NoError(t, ingestLedgerEvents(h.store, first, payloads)) got, err := fetchRangePayloads(t, h.store, 1, 3) require.NoError(t, err) @@ -630,7 +644,7 @@ func TestHotStore_FetchRangeOutOfBoundsErrors(t *testing.T) { const chunkID = chunk.ID(0) h := openHotStoreForTest(t, chunkID) p, _ := makePayload("only") - require.NoError(t, h.store.IngestLedgerEvents(chunkID.FirstLedger(), []events.Payload{p})) + require.NoError(t, ingestLedgerEvents(h.store, chunkID.FirstLedger(), []events.Payload{p})) _, err := fetchRangePayloads(t, h.store, 0, 2) // count > EventCount require.Error(t, err) @@ -641,7 +655,7 @@ func TestHotStore_FetchRangeOutOfBoundsErrors(t *testing.T) { func TestHotStore_FetchRangePostCloseYieldsErrClosed(t *testing.T) { const chunkID = chunk.ID(0) h := openHotStoreForTest(t, chunkID) - require.NoError(t, h.store.Close()) + require.NoError(t, h.raw.Close()) require.ErrorIs(t, firstIterError(h.store.FetchRange(context.Background(), 0, 1)), ErrClosed) } @@ -655,7 +669,7 @@ func TestHotStore_AllMatchesFetchRange(t *testing.T) { p, _ := makePayload(fmt.Sprintf("e%d", i)) payloads[i] = p } - require.NoError(t, h.store.IngestLedgerEvents(first, payloads)) + require.NoError(t, ingestLedgerEvents(h.store, first, payloads)) allSyms := make([]string, 0, len(payloads)) for p, err := range h.store.All(context.Background()) { @@ -689,3 +703,24 @@ func mustOffsets(t *testing.T, r Reader) *events.LedgerOffsets { require.NotNil(t, o) return o } + +// ingestLedgerEvents commits one ledger's events through IngestLedgerToBatch in +// a test-owned batch and runs the post-commit apply hook — the production +// write shape, reduced to a test seeding call. +func ingestLedgerEvents(h *HotStore, ledgerSeq uint32, payloads []events.Payload) error { + if h.chunkStore.IsClosed() { + return ErrClosed + } + var apply func() + if err := h.chunkStore.Batch(func(b *rocksdb.BatchWriter) error { + a, aerr := h.IngestLedgerToBatch(b, ledgerSeq, payloads) + apply = a + return aerr + }); err != nil { + return err + } + if apply != nil { + apply() + } + return nil +} diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_warmup_test.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_warmup_test.go index bae6da25a..5dd71349e 100644 --- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_warmup_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/hot_warmup_test.go @@ -13,11 +13,11 @@ import ( ) // These tests exercise the (unexported) warmup() function indirectly -// through OpenHotStore, which is the only production caller. They +// through NewWithStore over an explicitly opened RocksDB store. They // document the "fresh chunk → empty caches", "ingested chunk → // reconstructed caches" contract. -func TestWarmup_FreshChunkProducesEmptyMirrorsViaOpenHotStore(t *testing.T) { +func TestWarmup_FreshChunkProducesEmptyMirrorsViaNewWithStore(t *testing.T) { const chunkID = chunk.ID(0) h := openHotStoreForTest(t, chunkID) @@ -37,11 +37,10 @@ func TestWarmup_RebuildsMirrorFromIngestedRows(t *testing.T) { const chunkID = chunk.ID(0) dir := t.TempDir() - hot1, err := OpenHotStore(dir, chunkID, silentLogger()) - require.NoError(t, err) + hot1, raw1 := openHotStoreForTestAt(t, dir, chunkID) p1, _ := makePayload("alpha") p2, _ := makePayload("beta") - require.NoError(t, hot1.IngestLedgerEvents(2, []events.Payload{p1, p2})) + require.NoError(t, ingestLedgerEvents(hot1, 2, []events.Payload{p1, p2})) // Snapshot the mirror state before close. Snapshot returns a // uniquely-owned Bitmaps the test can iterate freely. @@ -49,12 +48,10 @@ func TestWarmup_RebuildsMirrorFromIngestedRows(t *testing.T) { for term, bm := range hot1.mirror.Snapshot() { expected[term] = bm.GetCardinality() } - require.NoError(t, hot1.Close()) + require.NoError(t, raw1.Close()) // Reopen — warmup replays events_index into a fresh mirror. - hot2, err := OpenHotStore(dir, chunkID, silentLogger()) - require.NoError(t, err) - t.Cleanup(func() { _ = hot2.Close() }) + hot2, _ := openHotStoreForTestAt(t, dir, chunkID) got := make(map[events.TermKey]uint64) for term, bm := range hot2.mirror.Snapshot() { @@ -67,17 +64,14 @@ func TestWarmup_RestoresEventIDsForRepeatedTerm(t *testing.T) { const chunkID = chunk.ID(0) dir := t.TempDir() - hot1, err := OpenHotStore(dir, chunkID, silentLogger()) - require.NoError(t, err) + hot1, raw1 := openHotStoreForTestAt(t, dir, chunkID) p1, _ := makePayload("shared") p2, _ := makePayload("shared") p3, _ := makePayload("shared") - require.NoError(t, hot1.IngestLedgerEvents(2, []events.Payload{p1, p2, p3})) - require.NoError(t, hot1.Close()) + require.NoError(t, ingestLedgerEvents(hot1, 2, []events.Payload{p1, p2, p3})) + require.NoError(t, raw1.Close()) - hot2, err := OpenHotStore(dir, chunkID, silentLogger()) - require.NoError(t, err) - t.Cleanup(func() { _ = hot2.Close() }) + hot2, _ := openHotStoreForTestAt(t, dir, chunkID) contractTermKey := events.ComputeTermKey(eventOf(p1).ContractId[:], events.FieldContractID) bm, err := hot2.Lookup(context.Background(), contractTermKey) @@ -93,18 +87,15 @@ func TestWarmup_OffsetsReconstructedAcrossLedgers(t *testing.T) { const chunkID = chunk.ID(0) dir := t.TempDir() - hot1, err := OpenHotStore(dir, chunkID, silentLogger()) - require.NoError(t, err) + hot1, raw1 := openHotStoreForTestAt(t, dir, chunkID) p1, _ := makePayload("a") p2, _ := makePayload("b") - require.NoError(t, hot1.IngestLedgerEvents(2, []events.Payload{p1, p2})) + require.NoError(t, ingestLedgerEvents(hot1, 2, []events.Payload{p1, p2})) p3, _ := makePayload("c") - require.NoError(t, hot1.IngestLedgerEvents(3, []events.Payload{p3})) - require.NoError(t, hot1.Close()) + require.NoError(t, ingestLedgerEvents(hot1, 3, []events.Payload{p3})) + require.NoError(t, raw1.Close()) - hot2, err := OpenHotStore(dir, chunkID, silentLogger()) - require.NoError(t, err) - t.Cleanup(func() { _ = hot2.Close() }) + hot2, _ := openHotStoreForTestAt(t, dir, chunkID) assert.Equal(t, uint32(3), mustEventCount(t, hot2)) @@ -127,8 +118,7 @@ func TestWarmup_OffsetsReconstructedAcrossLedgers(t *testing.T) { //nolint:unparam // chunkID kept as a param for call-site clarity; today every caller uses 0 func corruptHotChunk(t *testing.T, dir string, chunkID chunk.ID, mutate func(raw *rocksdb.Store)) { t.Helper() - raw, err := openHotChunk(dir, chunkID, silentLogger()) - require.NoError(t, err) + raw := openRawHotChunkForTest(t, dir, chunkID) defer func() { require.NoError(t, raw.Close()) }() // release LOCK even if mutate fails mutate(raw) } @@ -137,12 +127,11 @@ func TestWarmup_RejectsDataEventBeyondOffsets(t *testing.T) { const chunkID = chunk.ID(0) dir := t.TempDir() - hot1, err := OpenHotStore(dir, chunkID, silentLogger()) - require.NoError(t, err) + hot1, raw1 := openHotStoreForTestAt(t, dir, chunkID) p1, _ := makePayload("a") p2, _ := makePayload("b") - require.NoError(t, hot1.IngestLedgerEvents(2, []events.Payload{p1, p2})) // total = 2 - require.NoError(t, hot1.Close()) + require.NoError(t, ingestLedgerEvents(hot1, 2, []events.Payload{p1, p2})) // total = 2 + require.NoError(t, raw1.Close()) // An orphan data row well beyond total (id 7, total = 2): proves the // check catches any id >= total, not just one past the boundary. @@ -150,7 +139,7 @@ func TestWarmup_RejectsDataEventBeyondOffsets(t *testing.T) { require.NoError(t, raw.Put(DataCF, encodeDataKey(7), []byte("orphan"))) }) - _, err = OpenHotStore(dir, chunkID, silentLogger()) + _, _, err := tryOpenHotStoreForTest(t, dir, chunkID) // Branch-specific substring: every corruption shares "corrupt chunk", // so assert the data-orphan message to prove this branch fired. require.ErrorContains(t, err, "data present at id >= committed count") @@ -160,13 +149,12 @@ func TestWarmup_RejectsOffsetsGap(t *testing.T) { const chunkID = chunk.ID(0) dir := t.TempDir() - hot1, err := OpenHotStore(dir, chunkID, silentLogger()) - require.NoError(t, err) + hot1, raw1 := openHotStoreForTestAt(t, dir, chunkID) for _, seq := range []uint32{2, 3, 4} { p, _ := makePayload("x") - require.NoError(t, hot1.IngestLedgerEvents(seq, []events.Payload{p})) + require.NoError(t, ingestLedgerEvents(hot1, seq, []events.Payload{p})) } - require.NoError(t, hot1.Close()) + require.NoError(t, raw1.Close()) // Drop ledger 3's offset row: warmup then iterates 2, 4 and must // reject the gap. This is the sequence check that moved out of @@ -175,7 +163,7 @@ func TestWarmup_RejectsOffsetsGap(t *testing.T) { require.NoError(t, raw.Delete(OffsetsCF, encodeOffsetKey(3))) }) - _, err = OpenHotStore(dir, chunkID, silentLogger()) + _, _, err := tryOpenHotStoreForTest(t, dir, chunkID) require.ErrorContains(t, err, "expected ledger 3, got 4") } @@ -183,13 +171,12 @@ func TestWarmup_RejectsOffsetsOverflow(t *testing.T) { const chunkID = chunk.ID(0) dir := t.TempDir() - hot1, err := OpenHotStore(dir, chunkID, silentLogger()) - require.NoError(t, err) + hot1, raw1 := openHotStoreForTestAt(t, dir, chunkID) for _, seq := range []uint32{2, 3} { p, _ := makePayload("x") - require.NoError(t, hot1.IngestLedgerEvents(seq, []events.Payload{p})) + require.NoError(t, ingestLedgerEvents(hot1, seq, []events.Payload{p})) } - require.NoError(t, hot1.Close()) + require.NoError(t, raw1.Close()) // Overwrite the offset rows with counts that sum past uint32: warmup // must reject the cumulative overflow rather than silently wrapping. @@ -198,7 +185,7 @@ func TestWarmup_RejectsOffsetsOverflow(t *testing.T) { require.NoError(t, raw.Put(OffsetsCF, encodeOffsetKey(3), encodeLedgerEventCount(2_000_000_000))) }) - _, err = OpenHotStore(dir, chunkID, silentLogger()) + _, _, err := tryOpenHotStoreForTest(t, dir, chunkID) require.ErrorContains(t, err, "cumulative event count overflow") } @@ -206,9 +193,8 @@ func TestWarmup_RejectsOrphanInEmptyChunk(t *testing.T) { const chunkID = chunk.ID(0) dir := t.TempDir() - hot1, err := OpenHotStore(dir, chunkID, silentLogger()) - require.NoError(t, err) - require.NoError(t, hot1.Close()) // total = 0, nothing committed + _, raw1 := openHotStoreForTestAt(t, dir, chunkID) + require.NoError(t, raw1.Close()) // total = 0, nothing committed // A data row in a chunk that committed nothing: total == 0, so the // tail Get is skipped and the orphan scan must fire from id 0. @@ -216,7 +202,7 @@ func TestWarmup_RejectsOrphanInEmptyChunk(t *testing.T) { require.NoError(t, raw.Put(DataCF, encodeDataKey(0), []byte("orphan"))) }) - _, err = OpenHotStore(dir, chunkID, silentLogger()) + _, _, err := tryOpenHotStoreForTest(t, dir, chunkID) require.ErrorContains(t, err, "data present at id >= committed count 0") } @@ -224,12 +210,11 @@ func TestWarmup_RejectsMissingTailDataEvent(t *testing.T) { const chunkID = chunk.ID(0) dir := t.TempDir() - hot1, err := OpenHotStore(dir, chunkID, silentLogger()) - require.NoError(t, err) + hot1, raw1 := openHotStoreForTestAt(t, dir, chunkID) p1, _ := makePayload("a") p2, _ := makePayload("b") - require.NoError(t, hot1.IngestLedgerEvents(2, []events.Payload{p1, p2})) // total = 2 - require.NoError(t, hot1.Close()) + require.NoError(t, ingestLedgerEvents(hot1, 2, []events.Payload{p1, p2})) // total = 2 + require.NoError(t, raw1.Close()) // Drop the last data row (event id total-1 == 1) while offsets still // count 2. @@ -237,7 +222,7 @@ func TestWarmup_RejectsMissingTailDataEvent(t *testing.T) { require.NoError(t, raw.Delete(DataCF, encodeDataKey(1))) }) - _, err = OpenHotStore(dir, chunkID, silentLogger()) + _, _, err := tryOpenHotStoreForTest(t, dir, chunkID) require.ErrorContains(t, err, "missing from data") } @@ -245,12 +230,11 @@ func TestWarmup_RejectsIndexBeyondCommitted(t *testing.T) { const chunkID = chunk.ID(0) dir := t.TempDir() - hot1, err := OpenHotStore(dir, chunkID, silentLogger()) - require.NoError(t, err) + hot1, raw1 := openHotStoreForTestAt(t, dir, chunkID) p1, _ := makePayload("a") p2, _ := makePayload("b") - require.NoError(t, hot1.IngestLedgerEvents(2, []events.Payload{p1, p2})) // total = 2 - require.NoError(t, hot1.Close()) + require.NoError(t, ingestLedgerEvents(hot1, 2, []events.Payload{p1, p2})) // total = 2 + require.NoError(t, raw1.Close()) // An index row at exactly total (id 2): the tightest "beyond // committed" case, pinning the > (not >=) bound — valid ids are 0..1. @@ -260,7 +244,7 @@ func TestWarmup_RejectsIndexBeyondCommitted(t *testing.T) { require.NoError(t, raw.Put(IndexCF, encodeIndexKey(term, 2), nil)) }) - _, err = OpenHotStore(dir, chunkID, silentLogger()) + _, _, err := tryOpenHotStoreForTest(t, dir, chunkID) require.ErrorContains(t, err, "index references event 2 but only 2 committed") } @@ -268,16 +252,13 @@ func TestWarmup_OffsetsHandleEmptyTrailingLedger(t *testing.T) { const chunkID = chunk.ID(0) dir := t.TempDir() - hot1, err := OpenHotStore(dir, chunkID, silentLogger()) - require.NoError(t, err) + hot1, raw1 := openHotStoreForTestAt(t, dir, chunkID) p, _ := makePayload("only") - require.NoError(t, hot1.IngestLedgerEvents(2, []events.Payload{p})) - require.NoError(t, hot1.IngestLedgerEvents(3, nil)) - require.NoError(t, hot1.Close()) + require.NoError(t, ingestLedgerEvents(hot1, 2, []events.Payload{p})) + require.NoError(t, ingestLedgerEvents(hot1, 3, nil)) + require.NoError(t, raw1.Close()) - hot2, err := OpenHotStore(dir, chunkID, silentLogger()) - require.NoError(t, err) - t.Cleanup(func() { _ = hot2.Close() }) + hot2, _ := openHotStoreForTestAt(t, dir, chunkID) assert.Equal(t, uint32(1), mustEventCount(t, hot2)) assert.Equal(t, 2, mustOffsets(t, hot2).LedgerCount()) diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/query_test.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/query_test.go index 88b48c48a..d3d7fd08d 100644 --- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/query_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/query_test.go @@ -104,7 +104,7 @@ func newQueryFixture(t *testing.T) *queryFixture { require.NoError(t, err) first := chunkID.FirstLedger() - require.NoError(t, fx.store.IngestLedgerEvents(first, []events.Payload{ + require.NoError(t, ingestLedgerEvents(fx.store, first, []events.Payload{ payloadFor(t, fx.contractA, "evt-a-ab", fx.t0a, fx.t0b), payloadFor(t, fx.contractA, "evt-a-ac", fx.t0a, fx.t0c), payloadFor(t, fx.contractB, "evt-b-ab", fx.t0a, fx.t0b), @@ -354,7 +354,7 @@ func TestQuery_ManyFiltersAtCallerCap(t *testing.T) { contracts[i][0] = byte(i + 1) payloads[i] = payloadFor(t, contracts[i], fmt.Sprintf("evt-%02d", i)) } - require.NoError(t, h.store.IngestLedgerEvents(first, payloads)) + require.NoError(t, ingestLedgerEvents(h.store, first, payloads)) filters := make([]Filter, n) for i := range n { @@ -377,7 +377,7 @@ func newMultiLedgerQueryFixture(t *testing.T) *queryFixture { t.Helper() fx := newQueryFixture(t) first := chunk.ID(0).FirstLedger() - require.NoError(t, fx.store.IngestLedgerEvents(first+1, []events.Payload{ + require.NoError(t, ingestLedgerEvents(fx.store, first+1, []events.Payload{ payloadFor(t, fx.contractA, "evt-extra-0", fx.t0a), payloadFor(t, fx.contractA, "evt-extra-1", fx.t0a), })) @@ -544,7 +544,7 @@ func TestQuery_PostFilterRejectsTermHashCollision(t *testing.T) { // ConcurrentBitmaps.AddTo is the writer-side API the ingest path uses // to register (term, eventID) pairs. No concurrent ingest is running // in this test, so the single-writer contract is satisfied. - fx.store.Index().AddTo(gammaKey, 4) + fx.store.index().AddTo(gammaKey, 4) after, err := fx.store.Lookup(context.Background(), gammaKey) require.NoError(t, err) @@ -609,7 +609,7 @@ func TestQuery_ChunkWithLedgersButZeroEvents(t *testing.T) { // Ingest three empty ledgers — recorded in offsets, no events. for i := range uint32(3) { - require.NoError(t, h.store.IngestLedgerEvents(first+i, nil)) + require.NoError(t, ingestLedgerEvents(h.store, first+i, nil)) } require.Equal(t, uint32(0), mustEventCount(t, h.store)) @@ -704,8 +704,8 @@ func TestQuery_EmptyLeadingLedgerRangeStaysEmpty(t *testing.T) { // real events. After ingest the chunk's offsets read: // [first] → [0, 0) (empty) // [first+1] → [0, 5) (5 events) - require.NoError(t, h.store.IngestLedgerEvents(first, nil)) - require.NoError(t, h.store.IngestLedgerEvents(first+1, []events.Payload{ + require.NoError(t, ingestLedgerEvents(h.store, first, nil)) + require.NoError(t, ingestLedgerEvents(h.store, first+1, []events.Payload{ makeSimplePayload(t, "evt-0"), makeSimplePayload(t, "evt-1"), makeSimplePayload(t, "evt-2"), @@ -804,7 +804,7 @@ func makeSimplePayload(t *testing.T, dataSymbol string) events.Payload { // Walks the hot store one ledger at a time using its Offsets snapshot // (which tracks the ingest-time ledger sequence) rather than reading // LedgerSequence off each Payload — the test fixture's payloadFor -// builder doesn't set Payload.LedgerSequence, and HotStore.IngestLedgerEvents +// builder doesn't set Payload.LedgerSequence, and IngestLedgerToBatch // stores them verbatim, so the per-event field is the zero value and // can't be used to recover ledger boundaries. // diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/reader.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/reader.go index 77f306d4c..41b5ad63e 100644 --- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/reader.go +++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore/reader.go @@ -83,8 +83,8 @@ type Reader interface { // // Implementations: // - HotStore allocates a fresh Snapshot from the live - // ConcurrentLedgerOffsets per call. Concurrent - // IngestLedgerEvents may extend the underlying state after + // ConcurrentLedgerOffsets per call. A concurrent + // IngestLedgerToBatch may extend the underlying state after // Offsets returns, but the returned snapshot reflects what // was visible at call time. Callers (Query) take the // snapshot once at entry and pass it through their helpers. @@ -183,12 +183,6 @@ type Reader interface { // Each events.Payload carries its LedgerSequence, so consumers can // track ledger boundaries without separate signaling. All(ctx context.Context) iter.Seq2[events.Payload, error] - - // Close releases any resources the Reader holds. Idempotent. - // After Close, Lookup / FetchEvents / FetchRange / All return - // ErrClosed. Metadata accessors (ChunkID, EventCount, Offsets) - // survive Close — see each impl's docstring for details. - Close() error } // validateSortedEventIDs returns a wrapped ErrUnsortedEventIDs if diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk/hotchunk.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk/hotchunk.go new file mode 100644 index 000000000..477b31ecf --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk/hotchunk.go @@ -0,0 +1,388 @@ +// Package hotchunk implements decision (a): the per-chunk hot tier is ONE +// RocksDB holding the union of every hot data type's CFs (ledger + 3 events + 1 +// txhash), and each ledger commits as ONE atomic synced WriteBatch +// across ALL of them — so a ledger is fully present or fully absent, with a +// SINGLE per-chunk last-committed ledger (max committed seq, from the ledgers CF's last key) +// and no per-store frontiers / min-of-three. The three typed facades +// (ledger/txhash/eventstore HotStore) are composed over the shared store via +// NewWithStore; their write paths queue Puts into the one shared batch. +package hotchunk + +import ( + "context" + "fmt" + "iter" + "slices" + "time" + + sdkingest "github.com/stellar/go-stellar-sdk/ingest" + "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" + supportlog "github.com/stellar/go-stellar-sdk/support/log" + "github.com/stellar/go-stellar-sdk/xdr" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/events" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash" +) + +// DB is one chunk's hot tier: a single multi-CF rocksdb.Store plus the three +// typed facades composed over it. It owns the store (Close closes it once); the +// facades wrap it without owning it. +// +// Concurrency: ingestion is single-writer; IngestLedger is not safe to call +// concurrently with itself. Reads via the facades follow each facade's own +// contract and are safe alongside the single writer. +type DB struct { + store *rocksdb.Store + chunkID chunk.ID + + ledger *ledger.HotStore + txhash *txhash.HotStore + events *eventstore.HotStore +} + +// ColumnFamilies is the full CF list for the shared per-chunk DB (ledger + 3 +// events + 1 txhash), assembled from each facade's CFNames() — one idiom, so +// callers (including tests) never hand-stitch the union. Names are non-colliding +// across the facades. +func ColumnFamilies() []string { + return slices.Concat(ledger.CFNames(), eventstore.CFNames(), txhash.CFNames()) +} + +// config builds the shared store's rocksdb.Config: events' per-CF options (ZSTD +// on DataCF, tuned block sizes) plus the txhash workload's Tuning. Tuning's +// per-CF fields apply to every CF — a benign over-application (ledger/events CFs +// just gain a bloom + larger write buffer); the per-CF overrides keep events +// distinct. +func config(path string, logger *supportlog.Entry, readOnly, mustExist bool) rocksdb.Config { + return rocksdb.Config{ + Path: path, + ColumnFamilies: ColumnFamilies(), + Logger: logger, + Tuning: txhash.Tuning(), + PerCFOptions: eventstore.CFOptions(), + ReadOnly: readOnly, + MustExist: mustExist, + } +} + +// Open opens (or creates) the chunk's shared multi-CF hot DB read-WRITE +// (ingestion's handle for a NEW chunk) and composes the three facades over it. On +// any facade-construction failure the shared store is closed before returning. +func Open(path string, chunkID chunk.ID, logger *supportlog.Entry) (*DB, error) { + return open(path, chunkID, logger, false, false) +} + +// OpenExisting opens an EXISTING hot DB read-WRITE with create-if-missing OFF — +// ingestion's handle for a chunk whose "ready" key promises the DB already exists. +// A missing or gutted DB fails the open instead of silently fabricating a fresh +// empty one (the "never auto-heal" rule); the caller treats that failure as an +// ordinary restartable error. +func OpenExisting(path string, chunkID chunk.ID, logger *supportlog.Entry) (*DB, error) { + return open(path, chunkID, logger, false, true) +} + +// OpenReadOnly opens an EXISTING hot DB read-only — the freeze source's view AND +// the startup watermark refiner's. RocksDB's read-only open replays the +// synced-but-unflushed WAL into in-memory memtables (persisting nothing), so a +// reader sees every synced write even after an ungraceful crash — the watermark +// refinement DEPENDS on that replay to read a correct MaxCommittedSeq. (An +// unsynced tail is exactly what a crash loses, and is not recovered.) Composing +// the facades only reads. +func OpenReadOnly(path string, chunkID chunk.ID, logger *supportlog.Entry) (*DB, error) { + return open(path, chunkID, logger, true, false) +} + +func open(path string, chunkID chunk.ID, logger *supportlog.Entry, readOnly, mustExist bool) (*DB, error) { + if path == "" { + return nil, stores.ErrInvalidConfig + } + if logger == nil { + return nil, stores.ErrInvalidConfig + } + store, err := rocksdb.New(config(path, logger, readOnly, mustExist)) + if err != nil { + return nil, fmt.Errorf("open chunk %s: %w", chunkID, err) + } + + es, err := eventstore.NewWithStore(store, chunkID) + if err != nil { + _ = store.Close() + return nil, fmt.Errorf("compose events facade for chunk %s: %w", chunkID, err) + } + return &DB{ + store: store, + chunkID: chunkID, + ledger: ledger.NewWithStore(store), + txhash: txhash.NewWithStore(store), + events: es, + }, nil +} + +// ChunkID returns the chunk this DB is bound to. +func (d *DB) ChunkID() chunk.ID { return d.chunkID } + +// Ledgers returns the ledger read/write facade over the shared store. +func (d *DB) Ledgers() *ledger.HotStore { return d.ledger } + +// Txhash returns the txhash read/write facade over the shared store. +// Write side feeds the ingestion loop; the read side has no production +// caller yet — it's the intended hot read seam for the v2 cutover (#772), +// exercised by tests until then. +func (d *DB) Txhash() *txhash.HotStore { return d.txhash } + +// Events returns the events read/write facade over the shared store. +// Same status as Txhash: writes feed ingestion, reads are the #772 seam. +func (d *DB) Events() *eventstore.HotStore { return d.events } + +// Source streams the chunk's LCMs from the ledgers CF as a ledgerbackend.LedgerStream +// the cold writer (backfill's WriteColdChunk) drains, so a just-closed chunk freezes +// straight from its hot DB without a refetch. The freeze opens the DB read-only. +func (d *DB) Source() ledgerbackend.LedgerStream { + return &hotLedgerStream{store: d.ledger} +} + +// Close releases the shared store exactly once. Idempotent. Must not be called +// concurrently with in-flight reads/writes. +func (d *DB) Close() error { return d.store.Close() } + +// MaxCommittedSeq returns the single authoritative per-chunk last-committed ledger: the +// highest seq durably committed, from the ledgers CF's last key. Under decision +// (a) this one value pins EVERY CF's frontier. ok=false on an empty DB. +func (d *DB) MaxCommittedSeq() (uint32, bool, error) { + return d.ledger.LastSeq() +} + +// Phase enumerates the ordered phases of one IngestLedger call. It is a typed +// index into a fixed-size array (LedgerReport.Phases), so an out-of-table phase is +// unrepresentable — no string label to mistype and no map lookup to nil-panic in a +// sink. The phases partition the per-ledger wall-clock: +// - PhaseExtract: the shared ExtractLedgerEvents walk + txhash-entry build + +// event shaping (all pre-batch — every decode failure lands here by construction); +// - PhaseLedgers/PhaseTxhash/PhaseEvents: each facade's queue-into-batch step; +// - PhaseCommit: the RocksDB batch write (WAL append + fsync + memtable) = the +// whole Batch call minus the three queue steps — the fsync wait pprof can't see. +type Phase uint8 + +const ( + PhaseExtract Phase = iota + PhaseLedgers + PhaseTxhash + PhaseEvents + PhaseCommit + // NumPhases is the array size; it is not itself a phase. + NumPhases +) + +// String is the metric label for a phase. +func (p Phase) String() string { + switch p { + case PhaseExtract: + return "extract" + case PhaseLedgers: + return "ledgers" + case PhaseTxhash: + return "txhash" + case PhaseEvents: + return "events" + case PhaseCommit: + return "commit" + default: + return "unknown" + } +} + +// PhaseSample is one phase's wall-clock and item count (Items is 0 where a phase +// handles no per-type volume — extract and commit). +type PhaseSample struct { + Dur time.Duration + Items int +} + +// LedgerReport is the single result of IngestLedger: the per-phase samples, plus +// the phase that failed when the call returns a non-nil error. Phases that never +// ran (after a failure) keep their zero sample; the caller emits phases up to and +// including Failed on error, and all phases on success. +type LedgerReport struct { + Phases [NumPhases]PhaseSample + // Failed is meaningful only when IngestLedger returns a non-nil error. + Failed Phase +} + +// IngestLedger commits ONE ledger as a SINGLE atomic synced WriteBatch across all +// hot CFs (decision (a)): queue ledgers, txhash, and events rows into one +// BatchWriter, commit once, and only then apply the events in-memory mirror/offsets +// update. +// +// lcm is a borrowed zero-copy view; every extractor copies what it retains, so +// the view need not outlive this call. Store.Batch's lifecycle RLock + checkOpen +// is the authoritative closed-store guard, so there is no separate pre-check here. +func (d *DB) IngestLedger(seq uint32, lcm xdr.LedgerCloseMetaView) (LedgerReport, error) { + var rep LedgerReport + + // Pre-extract anything that can fail BEFORE opening the batch, so a decode + // error rejects the ledger without a half-built batch. + // + // ONE TxProcessing walk feeds BOTH hot data types: ExtractLedgerEvents yields, + // per transaction in apply order, the tx hash AND its contract events. txhash + // reads each element's Hash and events shapes the same slice + // (PayloadsFromLedgerEvents), so the two share one walk instead of the two + // (ExtractTxHashes + LCMViewToPayloads-internal ExtractLedgerEvents) they used + // to each run — halving per-ledger extraction. Shaping the already-extracted + // slice (not re-walking) keeps the event-ID assignment order identical to + // LCMViewToPayloads. The atomic batch below serializes only the commit; the + // extractors are independent and could run concurrently into the same batch if + // catch-up profiling ever demands it — sequential is right at live cadence. + // Every failure below stamps the failed phase's PARTIAL duration before + // returning — a phase that blocked and then failed is signal (mirrors + // RunBackfill's "reported even on failure"), so the error is never emitted with + // a zero-duration sample. + extractStart := time.Now() + txEvents, err := sdkingest.ExtractLedgerEvents(lcm) + if err != nil { + rep.Phases[PhaseExtract].Dur = time.Since(extractStart) + rep.Failed = PhaseExtract + return rep, fmt.Errorf("extract ledger events seq %d: %w", seq, err) + } + txEntries := make([]txhash.Entry, len(txEvents)) + for i := range txEvents { + txEntries[i] = txhash.Entry{Hash: txEvents[i].Hash, LedgerSeq: seq} + } + + closedAt, err := lcm.LedgerCloseTime() + if err != nil { + rep.Phases[PhaseExtract].Dur = time.Since(extractStart) + rep.Failed = PhaseExtract + return rep, fmt.Errorf("ledger close time seq %d: %w", seq, err) + } + // A pre-Soroban ledger yields zero payloads, no error. + payloads, err := events.PayloadsFromLedgerEvents(txEvents, seq, closedAt) + if err != nil { + rep.Phases[PhaseExtract].Dur = time.Since(extractStart) + rep.Failed = PhaseExtract + return rep, fmt.Errorf("shape events seq %d: %w", seq, err) + } + rep.Phases[PhaseExtract].Dur = time.Since(extractStart) + // Per-type write volume lives on the write phases (emitted on success). + rep.Phases[PhaseLedgers].Items = 1 + rep.Phases[PhaseTxhash].Items = len(txEntries) + rep.Phases[PhaseEvents].Items = len(payloads) + + // The events facade validates + marshals inside the batch callback (so a + // rejected ledger never leaves committed rows) and returns the post-commit + // apply hook. Under decision (a) resume is always MaxCommittedSeq+1, so seq is + // never a duplicate — the hook is always non-nil on success. Each facade's queue + // step is timed individually; Commit (below) is the whole Batch minus those — + // the RocksDB write (WAL append + fsync + memtable). + var applyEvents func() + // A batch error not attributed to a specific queue step below is the commit + // itself (the RocksDB write); a queue-step error narrows Failed to its phase. + failed := PhaseCommit + batchStart := time.Now() + cerr := d.store.Batch(func(b *rocksdb.BatchWriter) error { + ls := time.Now() + if err := d.ledger.AddLedgerToBatch(b, ledger.Entry{Seq: seq, Bytes: []byte(lcm)}); err != nil { + rep.Phases[PhaseLedgers].Dur = time.Since(ls) + failed = PhaseLedgers + return fmt.Errorf("queue ledger seq %d: %w", seq, err) + } + rep.Phases[PhaseLedgers].Dur = time.Since(ls) + + ts := time.Now() + if len(txEntries) > 0 { + if err := d.txhash.AddEntriesToBatch(b, txEntries); err != nil { + rep.Phases[PhaseTxhash].Dur = time.Since(ts) + failed = PhaseTxhash + return fmt.Errorf("queue tx hashes seq %d: %w", seq, err) + } + } + rep.Phases[PhaseTxhash].Dur = time.Since(ts) + + es := time.Now() + apply, err := d.events.IngestLedgerToBatch(b, seq, payloads) + if err != nil { + rep.Phases[PhaseEvents].Dur = time.Since(es) + failed = PhaseEvents + return fmt.Errorf("queue events seq %d: %w", seq, err) + } + rep.Phases[PhaseEvents].Dur = time.Since(es) + applyEvents = apply + return nil + }) + // Commit is the whole Batch call minus the three queue steps: the RocksDB write + // (WAL append + fsync + memtable). Stamp it whether the batch succeeded or the + // commit itself failed (all queue steps ran) — a slow-then-failed commit is + // signal. A queue-step failure already stamped its own partial above. + if failed == PhaseCommit { + rep.Phases[PhaseCommit].Dur = time.Since(batchStart) - + rep.Phases[PhaseLedgers].Dur - rep.Phases[PhaseTxhash].Dur - rep.Phases[PhaseEvents].Dur + } + if cerr != nil { + rep.Failed = failed + return rep, fmt.Errorf("commit ledger %d to chunk %s: %w", seq, d.chunkID, cerr) + } + + // Batch is durable — now and only now apply the events mirror/offsets update. + applyEvents() + return rep, nil +} + +// hotLedgerStream is a ledgerbackend.LedgerStream over a ledger.HotStore, so the +// source-blind cold pipeline freezes a just-closed chunk from its hot DB. +type hotLedgerStream struct { + store *ledger.HotStore +} + +var _ ledgerbackend.LedgerStream = (*hotLedgerStream)(nil) + +// RawLedgers yields the range's wire bytes from the hot store. IterateLedgers +// yields BORROWED buffers (valid only to the next step); the drain loop consumes +// each fully before the next yield, so the borrow is safe. ctx cancellation is +// observed between ledgers (the LedgerStream contract drain relies on). +// +// It enforces the LedgerStream in-order contract at the source (so the shared +// cursor could be deleted): the hot store is the SOLE writer of recent history, so +// a gap in its keyspace is a real defect, caught here by a key-derived seq check +// (no XDR parse). An unbounded range self-bounds at the store's committed frontier +// (LastSeq), mirroring packStream, so callers can pass UnboundedRange(from). +func (st *hotLedgerStream) RawLedgers( + ctx context.Context, r ledgerbackend.Range, _ ...ledgerbackend.StreamOption, +) iter.Seq2[[]byte, error] { + return func(yield func([]byte, error) bool) { + to := r.To() + if !r.Bounded() { + maxSeq, ok, err := st.store.LastSeq() + if err != nil { + yield(nil, fmt.Errorf("hotLedgerStream: read committed frontier: %w", err)) + return + } + if !ok { + return // empty store: nothing to yield + } + to = maxSeq + } + expected := r.From() + for e, ierr := range st.store.IterateLedgers(r.From(), to) { + if cerr := ctx.Err(); cerr != nil { + yield(nil, cerr) + return + } + if ierr != nil { + yield(nil, ierr) + return + } + if e.Seq != expected { + yield(nil, fmt.Errorf("hotLedgerStream: gap at seq %d, expected %d", e.Seq, expected)) + return + } + if !yield(e.Bytes, nil) { + return + } + expected++ + } + } +} diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk/hotchunk_test.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk/hotchunk_test.go new file mode 100644 index 000000000..7485979e8 --- /dev/null +++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/hotchunk/hotchunk_test.go @@ -0,0 +1,582 @@ +package hotchunk + +import ( + "context" + "testing" + + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" + "github.com/stellar/go-stellar-sdk/keypair" + "github.com/stellar/go-stellar-sdk/network" + supportlog "github.com/stellar/go-stellar-sdk/support/log" + "github.com/stellar/go-stellar-sdk/xdr" + + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/events" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/eventstore" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash" +) + +const testPassphrase = "Public Global Stellar Network ; September 2015" + +func silentLogger() *supportlog.Entry { + log := supportlog.New() + log.SetLevel(logrus.ErrorLevel) + return log +} + +// openTestDB opens a fresh hot DB bound to chunk 0 (every test uses chunk 0). +func openTestDB(t *testing.T) *DB { + t.Helper() + db, err := Open(t.TempDir(), chunk.ID(0), silentLogger()) + require.NoError(t, err) + t.Cleanup(func() { _ = db.Close() }) + return db +} + +// assertWriteItems checks the per-type write volume the report carries on the +// write phases (the item counts that used to be LedgerCounts). Every fixture +// commits exactly one ledger with one event, so only the txhash count (one per +// applied tx) varies across callers. +func assertWriteItems(t *testing.T, rep LedgerReport, txhash int) { + t.Helper() + assert.Equal(t, 1, rep.Phases[PhaseLedgers].Items, "ledgers items") + assert.Equal(t, txhash, rep.Phases[PhaseTxhash].Items, "txhash items") + assert.Equal(t, 1, rep.Phases[PhaseEvents].Items, "events items") +} + +func TestOpen_ValidatesInputs(t *testing.T) { + _, err := Open("", chunk.ID(0), silentLogger()) + require.ErrorIs(t, err, stores.ErrInvalidConfig) + + _, err = Open(t.TempDir(), chunk.ID(0), nil) + require.ErrorIs(t, err, stores.ErrInvalidConfig) +} + +func TestColumnFamilies_UnionIsNonColliding(t *testing.T) { + cfs := ColumnFamilies() + // 1 ledger CF + 3 events CFs + 1 txhash CF = 5. + require.Len(t, cfs, len(ledger.CFNames())+len(eventstore.CFNames())+len(txhash.CFNames())) + seen := map[string]bool{} + for _, cf := range cfs { + require.False(t, seen[cf], "CF name %q collides across facades", cf) + seen[cf] = true + } + require.Contains(t, seen, ledger.LedgersCF) + for _, cf := range eventstore.CFNames() { + require.Contains(t, seen, cf) + } + for _, cf := range txhash.CFNames() { + require.Contains(t, seen, cf) + } +} + +// TestIngestLedger_AllCFsAdvanceTogether is the core decision-(a) happy path: +// one IngestLedger call writes the ledger, its tx hash, and its event into the +// ONE shared DB, and the single watermark reaches exactly the committed seq — +// every CF readable, every CF in lockstep. +func TestIngestLedger_AllCFsAdvanceTogether(t *testing.T) { + chunkID := chunk.ID(0) + first := chunkID.FirstLedger() + db := openTestDB(t) + + // Empty DB: no watermark. + _, ok, err := db.MaxCommittedSeq() + require.NoError(t, err) + require.False(t, ok) + + rawA, hashA, termA := lcmWithEvent(t, first) + rawB, hashB, _ := lcmWithEvent(t, first+1) + + repA, err := db.IngestLedger(first, xdr.LedgerCloseMetaView(rawA)) + require.NoError(t, err) + assertWriteItems(t, repA, 1) + + repB, err := db.IngestLedger(first+1, xdr.LedgerCloseMetaView(rawB)) + require.NoError(t, err) + assertWriteItems(t, repB, 1) + + // ledgers CF. + gotA, err := db.Ledgers().GetLedgerRaw(first) + require.NoError(t, err) + assert.Equal(t, rawA, gotA) + // txhash CFs. + seqA, err := db.Txhash().Get(hashA) + require.NoError(t, err) + assert.Equal(t, first, seqA) + seqB, err := db.Txhash().Get(hashB) + require.NoError(t, err) + assert.Equal(t, first+1, seqB) + // events CFs. + bm, err := db.Events().Lookup(context.Background(), termA) + require.NoError(t, err) + require.NotNil(t, bm) + assert.Equal(t, uint64(2), bm.GetCardinality(), "both ledgers share the event term") + assert.Equal(t, uint32(2), eventCount(t, db.Events())) + + // The single authoritative watermark equals the last committed seq. + maxSeq, ok, err := db.MaxCommittedSeq() + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, first+1, maxSeq) +} + +// TestIngestLedger_RejectedLedgerPersistsNothingAcrossAnyCF is the atomicity +// guarantee for decision (a): a ledger the events facade rejects (here an +// out-of-range seq) must leave EVERY CF untouched — the ledgers and txhash CFs +// included — because the whole ledger is one batch and the events facade's +// validation aborts that batch before commit. The single watermark must not +// advance. +func TestIngestLedger_RejectedLedgerPersistsNothingAcrossAnyCF(t *testing.T) { + chunkID := chunk.ID(0) + db := openTestDB(t) + + // A ledger seq ABOVE the chunk's range: the events facade rejects it + // (ErrLedgerOutOfRange) from inside the batch callback, aborting the write. + badSeq := chunkID.LastLedger() + 1 + raw, hash, term := lcmWithEvent(t, badSeq) + + _, err := db.IngestLedger(badSeq, xdr.LedgerCloseMetaView(raw)) + require.Error(t, err) + require.ErrorIs(t, err, eventstore.ErrLedgerOutOfRange) + + // NOTHING persisted, across every CF: + // ledgers CF — no row at badSeq. + _, gerr := db.Ledgers().GetLedgerRaw(badSeq) + require.ErrorIs(t, gerr, stores.ErrNotFound) + // txhash CFs — the hash is absent. + _, gerr = db.Txhash().Get(hash) + require.ErrorIs(t, gerr, stores.ErrNotFound) + // events CFs — no term indexed, no event committed. + _, lerr := db.Events().Lookup(context.Background(), term) + require.ErrorIs(t, lerr, eventstore.ErrTermNotFound) + assert.Equal(t, uint32(0), eventCount(t, db.Events())) + + // The single watermark is still empty — nothing committed. + _, ok, err := db.MaxCommittedSeq() + require.NoError(t, err) + require.False(t, ok, "a rejected ledger must not advance the watermark") +} + +// TestIngestLedger_MidBatchCommitFailurePersistsNothing simulates a mid-batch +// COMMIT failure (the store closed under the writer) and asserts the partial +// batch persisted nothing across any CF after reopen — the single synced +// WriteBatch is all-or-nothing. +func TestIngestLedger_MidBatchCommitFailurePersistsNothing(t *testing.T) { + chunkID := chunk.ID(0) + first := chunkID.FirstLedger() + dir := t.TempDir() + + db, err := Open(dir, chunkID, silentLogger()) + require.NoError(t, err) + + // Commit one good ledger so there is a known watermark, then close the DB. + rawGood, hashGood, _ := lcmWithEvent(t, first) + _, err = db.IngestLedger(first, xdr.LedgerCloseMetaView(rawGood)) + require.NoError(t, err) + require.NoError(t, db.Close()) + + // Reopen and confirm the watermark survived (sync=true durability). + db2, err := Open(dir, chunkID, silentLogger()) + require.NoError(t, err) + t.Cleanup(func() { _ = db2.Close() }) + + maxSeq, ok, err := db2.MaxCommittedSeq() + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, first, maxSeq, "the committed ledger is durable across reopen") + + // Now close the DB and attempt to ingest the NEXT ledger into the closed + // store: the commit fails, and nothing for that ledger persists anywhere. + require.NoError(t, db2.Close()) + rawNext, hashNext, _ := lcmWithEvent(t, first+1) + _, err = db2.IngestLedger(first+1, xdr.LedgerCloseMetaView(rawNext)) + require.Error(t, err) + + // Reopen a third time: the failed ledger left NO trace in any CF, and the + // watermark is still the last good seq. + db3, err := Open(dir, chunkID, silentLogger()) + require.NoError(t, err) + t.Cleanup(func() { _ = db3.Close() }) + + maxSeq, ok, err = db3.MaxCommittedSeq() + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, first, maxSeq, "the failed ledger did not advance the watermark") + + // The events CF advanced for exactly the one good ledger — the failed + // ledger's event was not committed (warmup reconstructed the offsets from + // disk, which hold only the good ledger). + assert.Equal(t, uint32(1), eventCount(t, db3.Events()), + "the failed ledger's event must not be committed to the events CFs") + + // The good ledger's data is intact; the failed ledger's is wholly absent + // across the ledgers and txhash CFs. + _, gerr := db3.Ledgers().GetLedgerRaw(first + 1) + require.ErrorIs(t, gerr, stores.ErrNotFound) + _, gerr = db3.Txhash().Get(hashNext) + require.ErrorIs(t, gerr, stores.ErrNotFound) + + gotGood, err := db3.Ledgers().GetLedgerRaw(first) + require.NoError(t, err) + assert.Equal(t, rawGood, gotGood) + _, err = db3.Txhash().Get(hashGood) + require.NoError(t, err) +} + +// TestSharedBatch_DirectRocksAbortAcrossCFs is the lower-level atomicity proof: +// queue Puts into DIFFERENT CFs of the shared store, then return an error from +// the batch callback — RocksDB applies NONE of them. Pins the property the +// IngestLedger path relies on (intra-store cross-CF atomicity of one +// WriteBatch). +func TestSharedBatch_DirectRocksAbortAcrossCFs(t *testing.T) { + db := openTestDB(t) + + var hash [32]byte + hash[0] = 0xa0 + sentinelErr := assert.AnError + + err := storeOf(db).Batch(func(b *rocksdb.BatchWriter) error { + b.Put(ledger.LedgersCF, rocksdb.EncodeUint32(2), []byte("ledger-row")) + b.Put(txhash.CFNames()[0], hash[:], rocksdb.EncodeUint32(2)) + b.Put(eventstore.DataCF, []byte{0, 0, 0, 0}, []byte("event-row")) + return sentinelErr // abort: nothing should commit + }) + require.ErrorIs(t, err, sentinelErr) + + // None of the three CFs received the aborted writes. + _, gerr := db.Ledgers().GetLedgerRaw(2) + require.ErrorIs(t, gerr, stores.ErrNotFound) + _, gerr = db.Txhash().Get(hash) + require.ErrorIs(t, gerr, stores.ErrNotFound) + _, ok, derr := db.MaxCommittedSeq() + require.NoError(t, derr) + require.False(t, ok) +} + +// storeOf exposes the shared store for the direct-batch atomicity test (same +// package, so no production accessor is needed). +func storeOf(db *DB) *rocksdb.Store { return db.store } + +// TestSource_SelfBoundsUnboundedRange confirms the freeze source (hotLedgerStream) +// yields the store's committed ledgers in order and self-bounds an UNBOUNDED range +// at the committed frontier (mirroring packStream), so drain can pass +// UnboundedRange(from) rather than a pre-computed bound. +func TestSource_SelfBoundsUnboundedRange(t *testing.T) { + db := openTestDB(t) + first := chunk.ID(0).FirstLedger() + for i := range uint32(3) { + _, err := db.IngestLedger(first+i, xdr.LedgerCloseMetaView(zeroTxLCM(t, first+i))) + require.NoError(t, err) + } + + var got []uint32 + for raw, err := range db.Source().RawLedgers(context.Background(), ledgerbackend.UnboundedRange(first)) { + require.NoError(t, err) + seq, serr := xdr.LedgerCloseMetaView(raw).LedgerSequence() + require.NoError(t, serr) + got = append(got, seq) + } + require.Equal(t, []uint32{first, first + 1, first + 2}, got, "self-bounds at the frontier, in order") +} + +// TestSource_RejectsGap pins the source-side in-order guard that replaced the +// shared cursor: a gap in the hot store's keyspace (the sole writer of recent +// history) is a real defect and must surface as an error, not a silent skip. +func TestSource_RejectsGap(t *testing.T) { + db := openTestDB(t) + first := chunk.ID(0).FirstLedger() + // Seed the ledgers CF directly with a GAP (first, first+2), bypassing + // IngestLedger's contiguity so the source-level guard is what's exercised. + require.NoError(t, storeOf(db).Batch(func(b *rocksdb.BatchWriter) error { + for _, s := range []uint32{first, first + 2} { + if err := db.Ledgers().AddLedgerToBatch(b, ledger.Entry{Seq: s, Bytes: []byte("x")}); err != nil { + return err + } + } + return nil + })) + + var lastErr error + for _, err := range db.Source().RawLedgers(context.Background(), ledgerbackend.BoundedRange(first, first+2)) { + if err != nil { + lastErr = err + break + } + } + require.Error(t, lastErr) + require.Contains(t, lastErr.Error(), "gap") +} + +// TestIngestLedger_WritesEveryHotType confirms the hot tier always writes all +// three hot data types; per-type disabling is not a supported hot DB mode. +func TestIngestLedger_WritesEveryHotType(t *testing.T) { + chunkID := chunk.ID(0) + first := chunkID.FirstLedger() + db := openTestDB(t) + + raw, hash, term := lcmWithEvent(t, first) + rep, err := db.IngestLedger(first, xdr.LedgerCloseMetaView(raw)) + require.NoError(t, err) + assertWriteItems(t, rep, 1) + + got, err := db.Ledgers().GetLedgerRaw(first) + require.NoError(t, err) + assert.Equal(t, raw, got) + + seq, err := db.Txhash().Get(hash) + require.NoError(t, err) + assert.Equal(t, first, seq) + bm, err := db.Events().Lookup(context.Background(), term) + require.NoError(t, err) + require.NotNil(t, bm) + assert.Equal(t, uint64(1), bm.GetCardinality()) +} + +// TestIngestLedger_EventlessTxStillIndexesHash pins the post-merge txhash +// completeness invariant: after #18 folded the txhash and events walks into one +// ExtractLedgerEvents pass, txhash coverage rests entirely on that walk yielding +// an element per APPLIED tx — hash included — even for an event-less transaction +// (the common classic-only case). Every other hotchunk test uses one-tx-one-event +// ledgers, so nothing else pins it: an SDK change that dropped event-less txs from +// the walk would silently gut the txhash index for every classic-only transaction. +func TestIngestLedger_EventlessTxStillIndexesHash(t *testing.T) { + chunkID := chunk.ID(0) + first := chunkID.FirstLedger() + db := openTestDB(t) + + // Two applied txs in one ledger: one carries a contract event, one carries none. + eventful := xdr.TransactionMeta{V: 4, V4: &xdr.TransactionMetaV4{ + Operations: []xdr.OperationMetaV2{{Events: []xdr.ContractEvent{buildContractEvent("eventful")}}}, + }} + eventless := xdr.TransactionMeta{V: 4, V4: &xdr.TransactionMetaV4{ + Operations: []xdr.OperationMetaV2{{}}, // one op, no events + }} + lcm, hashes := buildLCM(t, first, []xdr.TransactionMeta{eventful, eventless}) + require.Len(t, hashes, 2) + raw, err := lcm.MarshalBinary() + require.NoError(t, err) + + rep, err := db.IngestLedger(first, xdr.LedgerCloseMetaView(raw)) + require.NoError(t, err) + assertWriteItems(t, rep, 2) // both hashes indexed (event-less included); one event + + // Both hashes resolve in the txhash CF to this ledger. + for _, h := range hashes { + seq, gerr := db.Txhash().Get(h) + require.NoError(t, gerr, "event-less tx hash must still be indexed") + assert.Equal(t, first, seq) + } + // The events CF holds exactly the one eventful tx's event. + assert.Equal(t, uint32(1), eventCount(t, db.Events())) +} + +// TestReopen_RecoversEventsMirror confirms the events facade's warmup runs over +// the shared store on reopen (the mirror/offsets are reconstructed from the +// events CFs), so a reopened DB assigns event IDs continuing from disk. +func TestReopen_RecoversEventsMirror(t *testing.T) { + chunkID := chunk.ID(0) + first := chunkID.FirstLedger() + dir := t.TempDir() + + db, err := Open(dir, chunkID, silentLogger()) + require.NoError(t, err) + raw, _, _ := lcmWithEvent(t, first) + _, err = db.IngestLedger(first, xdr.LedgerCloseMetaView(raw)) + require.NoError(t, err) + require.NoError(t, db.Close()) + + db2, err := Open(dir, chunkID, silentLogger()) + require.NoError(t, err) + t.Cleanup(func() { _ = db2.Close() }) + assert.Equal(t, uint32(1), eventCount(t, db2.Events()), "warmup recovered the events offsets") +} + +// TestOpenReadOnly_ReadsCommittedAndRejectsWrites pins the freeze source's +// read-only handle: it sees data a writer committed and cleanly closed (so the +// completeness gate is exact), and any write through it fails — a freeze can +// never mutate the hot DB it reads. +func TestOpenReadOnly_ReadsCommittedAndRejectsWrites(t *testing.T) { + chunkID := chunk.ID(0) + first := chunkID.FirstLedger() + dir := t.TempDir() + + // Writer: ingest two ledgers, then close (flushes the WAL into SST). + db, err := Open(dir, chunkID, silentLogger()) + require.NoError(t, err) + for _, seq := range []uint32{first, first + 1} { + _, ierr := db.IngestLedger(seq, xdr.LedgerCloseMetaView(zeroTxLCM(t, seq))) + require.NoError(t, ierr) + } + require.NoError(t, db.Close()) + + // Reader: a read-only open sees the committed watermark; Close must not flush. + ro, err := OpenReadOnly(dir, chunkID, silentLogger()) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, ro.Close()) }) + + seq, ok, err := ro.MaxCommittedSeq() + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, first+1, seq, "read-only handle sees the committed data") + + // A write through the read-only handle must fail — the freeze never mutates. + _, err = ro.IngestLedger(first+2, xdr.LedgerCloseMetaView(zeroTxLCM(t, first+2))) + require.Error(t, err, "read-only DB must reject writes") +} + +// TestIngestLedger_ClosedDBFails confirms a closed shared DB rejects ingest. The +// closed-store guard is Store.Batch's authoritative lifecycle RLock + checkOpen +// (the per-facade pre-checks were dropped in #30), so the surfaced sentinel is +// rocksdb.ErrStoreClosed. +func TestIngestLedger_ClosedDBFails(t *testing.T) { + chunkID := chunk.ID(0) + db, err := Open(t.TempDir(), chunkID, silentLogger()) + require.NoError(t, err) + require.NoError(t, db.Close()) + + raw := zeroTxLCM(t, chunkID.FirstLedger()) + _, err = db.IngestLedger(chunkID.FirstLedger(), xdr.LedgerCloseMetaView(raw)) + require.ErrorIs(t, err, rocksdb.ErrStoreClosed) +} + +// ──────────────────────────── LCM fixtures ──────────────────────────── + +// lcmWithEvent builds a V2 LCM with one transaction carrying one contract event +// (topic="hotchunk_test"). Returns the wire bytes, the tx hash, and the event's +// term key. +func lcmWithEvent(t *testing.T, seq uint32) ([]byte, [32]byte, events.TermKey) { + t.Helper() + ev := buildContractEvent("hotchunk_test") + meta := xdr.TransactionMeta{ + V: 4, + V4: &xdr.TransactionMetaV4{Operations: []xdr.OperationMetaV2{{Events: []xdr.ContractEvent{ev}}}}, + } + lcm, hash := buildLCMWithTx(t, seq, meta) + raw, err := lcm.MarshalBinary() + require.NoError(t, err) + + evBytes, err := ev.MarshalBinary() + require.NoError(t, err) + keys, err := events.TermsForBytes(evBytes) + require.NoError(t, err) + require.NotEmpty(t, keys) + return raw, hash, keys[0] +} + +func zeroTxLCM(t *testing.T, seq uint32) []byte { + t.Helper() + lcm, _ := buildLCM(t, seq, nil) + raw, err := lcm.MarshalBinary() + require.NoError(t, err) + return raw +} + +func buildContractEvent(topic string) xdr.ContractEvent { + var contractID xdr.ContractId + contractID[0] = 0xab + contractID[1] = 0xcd + sym := xdr.ScSymbol(topic) + return xdr.ContractEvent{ + ContractId: &contractID, + Type: xdr.ContractEventTypeContract, + Body: xdr.ContractEventBody{ + V: 0, + V0: &xdr.ContractEventV0{ + Topics: []xdr.ScVal{{Type: xdr.ScValTypeScvSymbol, Sym: &sym}}, + Data: xdr.ScVal{Type: xdr.ScValTypeScvSymbol, Sym: &sym}, + }, + }, + } +} + +func successResult() xdr.TransactionResult { + opResults := []xdr.OperationResult{} + return xdr.TransactionResult{ + FeeCharged: 100, + Result: xdr.TransactionResultResult{ + Code: xdr.TransactionResultCodeTxSuccess, + Results: &opResults, + }, + } +} + +func buildLCMWithTx(t *testing.T, seq uint32, meta xdr.TransactionMeta) (xdr.LedgerCloseMeta, [32]byte) { + t.Helper() + lcm, hashes := buildLCM(t, seq, []xdr.TransactionMeta{meta}) + require.Len(t, hashes, 1) + return lcm, hashes[0] +} + +func buildLCM(t *testing.T, seq uint32, txMetas []xdr.TransactionMeta) (xdr.LedgerCloseMeta, [][32]byte) { + t.Helper() + phases := make([]xdr.TransactionPhase, 0, len(txMetas)) + txProcessing := make([]xdr.TransactionResultMetaV1, 0, len(txMetas)) + hashes := make([][32]byte, 0, len(txMetas)) + + for _, meta := range txMetas { + envelope := xdr.TransactionEnvelope{ + Type: xdr.EnvelopeTypeEnvelopeTypeTx, + V1: &xdr.TransactionV1Envelope{ + Tx: xdr.Transaction{ + SourceAccount: xdr.MustMuxedAddress(keypair.MustRandom().Address()), + Ext: xdr.TransactionExt{ + V: 1, + SorobanData: &xdr.SorobanTransactionData{}, + }, + }, + }, + } + hash, err := network.HashTransactionInEnvelope(envelope, testPassphrase) + require.NoError(t, err) + hashes = append(hashes, hash) + + txProcessing = append(txProcessing, xdr.TransactionResultMetaV1{ + TxApplyProcessing: meta, + Result: xdr.TransactionResultPair{ + TransactionHash: hash, + Result: successResult(), + }, + }) + comp := []xdr.TxSetComponent{{ + Type: xdr.TxSetComponentTypeTxsetCompTxsMaybeDiscountedFee, + TxsMaybeDiscountedFee: &xdr.TxSetComponentTxsMaybeDiscountedFee{ + Txs: []xdr.TransactionEnvelope{envelope}, + }, + }} + phases = append(phases, xdr.TransactionPhase{V: 0, V0Components: &comp}) + } + + lcm := xdr.LedgerCloseMeta{ + V: 2, + V2: &xdr.LedgerCloseMetaV2{ + LedgerHeader: xdr.LedgerHeaderHistoryEntry{ + Header: xdr.LedgerHeader{ + ScpValue: xdr.StellarValue{CloseTime: xdr.TimePoint(0)}, + LedgerSeq: xdr.Uint32(seq), + }, + }, + TxSet: xdr.GeneralizedTransactionSet{ + V: 1, + V1TxSet: &xdr.TransactionSetV1{Phases: phases}, + }, + TxProcessing: txProcessing, + }, + } + return lcm, hashes +} + +// eventCount reads the hot events store's committed event count, failing the +// test on the (close-only) error the Reader contract allows. +func eventCount(t *testing.T, r interface{ EventCount() (uint32, error) }) uint32 { + t.Helper() + n, err := r.EventCount() + require.NoError(t, err) + return n +} diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger/hot_store.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger/hot_store.go index 2ba7afd4f..b860a93cb 100644 --- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger/hot_store.go +++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger/hot_store.go @@ -9,144 +9,74 @@ import ( "iter" "sync" - supportlog "github.com/stellar/go-stellar-sdk/support/log" - - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/zstd" ) -// Entry — one (sequence, uncompressed ledger bytes) pair. Both -// hot and cold stores compress on write and decompress on read, -// so callers always pass and receive raw ledger bytes here. +// LedgersCF is the column family the hot ledger data lives in. Registered the +// shared per-chunk multi-CF DB (decision (a)). +const LedgersCF = "ledgers" + +// CFNames returns the CFs this facade owns, so the hotchunk shared-DB opener +// assembles the union the same way it does for txhash and eventstore (every +// facade exports CFNames()). +func CFNames() []string { return []string{LedgersCF} } + +// Entry — one (sequence, uncompressed ledger bytes) pair. Compression is +// internal to the store, so callers pass and receive raw bytes here. type Entry struct { Seq uint32 Bytes []byte } -// HotStore — RocksDB-backed hot ledger store. Default-CF only; -// keys are 4-byte big-endian sequences; values are zstd-compressed -// ledger bytes. Compression is internal: callers see raw bytes on -// the boundary. +// HotStore — RocksDB-backed hot ledger store. Keys are 4-byte BE sequences; +// values are zstd-compressed (internal). It accumulates one chunk's ledgers +// before freezing; it does not itself range-check writes (the driver's drain loop +// already validates every sequence against the chunk). // -// Like every hot store, a HotStore instance is chunk-bound: it -// accumulates exactly one chunk's ledgers before being frozen into -// the chunk's cold artifacts. The binding is recorded at open time -// (ChunkID) so the ingest driver can reject a store bound to a -// different chunk than it is ingesting; the store does not itself -// range-check writes (the driver's drain loop already validates -// every sequence against the chunk). -// -// Concurrency: all methods, including Close, are safe for concurrent -// use. rocksdb.Store.Close CAS-marks the store closed and then drains -// in-flight ops (each holds an RLock for its duration) before releasing -// resources; a read/write racing Close either completes first or -// observes the closed store and returns stores.ErrStoreClosed. Close is -// idempotent. HotStore adds no unguarded state of its own — the -// compressor pool and decompressor are both concurrent-safe. +// Concurrency: all methods are safe for concurrent use, including use alongside +// the caller-owned rocksdb.Store.Close. A read/write racing Close either completes +// first or observes the closed store and returns stores.ErrStoreClosed. HotStore +// adds no unguarded state of its own — the compressor pool and decompressor are +// both concurrent-safe. type HotStore struct { - store *rocksdb.Store - chunkID chunk.ID - dec *zstd.Decompressor - // compPool — per-store pool of zstd.Compressors. Each - // concurrent AddLedgers borrows one for the duration of its - // Encode call; the pool's GC finalizer (set inside - // zstd.NewCompressor) frees the C context when the compressor - // is dropped between GC cycles. + store *rocksdb.Store + dec *zstd.Decompressor + // compPool — per-store pool of zstd.Compressors; each concurrent + // AddLedgerToBatch borrows one for its Encode call. compPool sync.Pool } -// OpenHotStore validates inputs and returns an open HotStore bound -// to chunkID (see the HotStore doc on chunk binding). path and -// logger are both required; logger is forwarded to the -// pkg/rocksdb wrapper (rocksdb writes the on-open state line and -// the close-time Flush warning through it). HotStore itself does -// not emit any logs — the cold store, by contrast, takes no -// logger because packfile is silent. Rides on RocksDB defaults — -// no explicit block cache (RocksDB's per-CF default plus OS page -// cache cover range scans), no bloom filter (callers know in -// advance which sequences this store holds, so it is never asked -// for a key it doesn't have), no WAL cap (graceful Close flushes -// the memtable; ungraceful WAL replay at this scale is sub-second). -// Re-tune only with a workload measurement. -func OpenHotStore(path string, chunkID chunk.ID, logger *supportlog.Entry) (*HotStore, error) { - if path == "" { - return nil, stores.ErrInvalidConfig - } - if logger == nil { - return nil, stores.ErrInvalidConfig - } - store, err := rocksdb.New(rocksdb.Config{ - Path: path, - Logger: logger, - }) - if err != nil { - return nil, err - } +// NewWithStore wraps an ALREADY-OPEN rocksdb.Store as a ledger HotStore on +// LedgersCF. The store is owned by the caller — in production, hotchunk.DB +// composes this facade over the shared multi-CF DB and closes that DB once. The +// store must have LedgersCF registered. +func NewWithStore(store *rocksdb.Store) *HotStore { return &HotStore{ - store: store, - chunkID: chunkID, - dec: zstd.NewDecompressor(), + store: store, + dec: zstd.NewDecompressor(), compPool: sync.Pool{ New: func() any { return zstd.NewCompressor() }, }, - }, nil + } } -// Close releases the underlying RocksDB store. Idempotent — -// delegates to rocksdb.Store.Close. Must not be called concurrently -// with in-flight reads/writes on this HotStore. -func (h *HotStore) Close() error { return h.store.Close() } - -// ChunkID returns the chunk this store is bound to (constructor-supplied; -// never reads the store). -func (h *HotStore) ChunkID() chunk.ID { return h.chunkID } - -// AddLedgers writes (seq, raw-bytes) entries to rocksdb. Bytes is -// the uncompressed ledger payload; AddLedgers compresses each -// entry with zstd before write. Variadic so callers can pass -// individual entries (h.AddLedgers(e)), a literal batch -// (h.AddLedgers(e1, e2, e3)), or a slice (h.AddLedgers(entries...)). -// Zero entries is a no-op; one entry uses Store.Put; multiple -// entries use Store.Batch (one atomic write, one fsync — versus N -// fsyncs for N Put calls). -func (h *HotStore) AddLedgers(entries ...Entry) error { - if h.store.IsClosed() { - return stores.ErrStoreClosed - } - if len(entries) == 0 { - return nil - } +// AddLedgerToBatch compresses one ledger and queues its Put into b on LedgersCF +// — the building block hotchunk uses to fold the ledger write into the one +// shared per-ledger WriteBatch (decision (a)). Does not commit (caller owns the +// batch). Compresses into a fresh buffer BatchWriter.Put copies, so e.Bytes need +// not outlive this call. The caller runs inside Store.Batch, whose lifecycle +// RLock + checkOpen is the authoritative closed-store guard, so this adds none. +func (h *HotStore) AddLedgerToBatch(b *rocksdb.BatchWriter, e Entry) error { c, _ := h.compPool.Get().(*zstd.Compressor) defer h.compPool.Put(c) - - if len(entries) == 1 { - e := entries[0] - compressed, err := c.Encode(nil, e.Bytes) - if err != nil { - return err - } - return translateRocksErr(h.store.Put("", rocksdb.EncodeUint32(e.Seq), compressed)) - } - // Multi-entry path: compress each into its own fresh slice so - // the batch can hold them all simultaneously (the compressor's - // internal buffer would otherwise be overwritten on the next - // Encode call). - compressed := make([][]byte, len(entries)) - for i, e := range entries { - out, err := c.Encode(nil, e.Bytes) - if err != nil { - return err - } - compressed[i] = out + compressed, err := c.Encode(nil, e.Bytes) + if err != nil { + return err } - return translateRocksErr(h.store.Batch(func(b *rocksdb.BatchWriter) error { - for i, e := range entries { - b.Put("", rocksdb.EncodeUint32(e.Seq), compressed[i]) - } - return nil - })) + b.Put(LedgersCF, rocksdb.EncodeUint32(e.Seq), compressed) + return nil } // GetLedgerRaw decodes the ledger stored under seq into a fresh, @@ -155,7 +85,7 @@ func (h *HotStore) AddLedgers(entries ...Entry) error { // should prefer IterateLedgers, which yields borrows without the // per-ledger decode allocation. func (h *HotStore) GetLedgerRaw(seq uint32) ([]byte, error) { - v, found, err := h.store.Get("", rocksdb.EncodeUint32(seq)) + v, found, err := h.store.Get(LedgersCF, rocksdb.EncodeUint32(seq)) if err != nil { return nil, translateRocksErr(err) } @@ -169,22 +99,12 @@ func (h *HotStore) GetLedgerRaw(seq uint32) ([]byte, error) { return out, nil } -// FirstSeq returns the lowest ledger sequence in the store, or ok=false -// if the store is empty. Cheap (a single RocksDB boundary seek): lets a -// caller learn the store's ledger range without an external chunk hint. -func (h *HotStore) FirstSeq() (uint32, bool, error) { return h.edgeSeq(false) } - // LastSeq returns the highest ledger sequence in the store, or ok=false -// if the store is empty. -func (h *HotStore) LastSeq() (uint32, bool, error) { return h.edgeSeq(true) } - -//nolint:funcorder // helper grouped with FirstSeq/LastSeq for readability -func (h *HotStore) edgeSeq(last bool) (uint32, bool, error) { - edge := h.store.FirstKey - if last { - edge = h.store.LastKey - } - k, ok, err := edge("") +// if the store is empty. This is the chunk's authoritative last-committed +// ledger (hotchunk.DB.MaxCommittedSeq reads it). Cheap — a single RocksDB +// boundary seek on the last key. +func (h *HotStore) LastSeq() (uint32, bool, error) { + k, ok, err := h.store.LastKey(LedgersCF) if err != nil { return 0, false, translateRocksErr(err) } @@ -213,7 +133,7 @@ func (h *HotStore) IterateLedgers(start, end uint32) iter.Seq2[Entry, error] { // it past the loop body. The read benches consume each ledger in-scope, // so this avoids a per-ledger decode allocation. var scratch []byte - for e, err := range h.store.IterateRange("", rocksdb.EncodeUint32(start), rocksdb.EncodeUint32(end)) { + for e, err := range h.store.IterateRange(LedgersCF, rocksdb.EncodeUint32(start), rocksdb.EncodeUint32(end)) { if err != nil { yield(Entry{}, translateRocksErr(err)) return diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger/hot_store_test.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger/hot_store_test.go index 4a7f89ecd..fc53cab30 100644 --- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger/hot_store_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/ledger/hot_store_test.go @@ -17,7 +17,7 @@ import ( supportlog "github.com/stellar/go-stellar-sdk/support/log" "github.com/stellar/go-stellar-sdk/xdr" - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores" ) @@ -31,41 +31,20 @@ func silentLogger() *supportlog.Entry { func openTestHotStore(t *testing.T) *HotStore { t.Helper() - h, err := OpenHotStore(t.TempDir(), chunk.ID(0), silentLogger()) - require.NoError(t, err) - t.Cleanup(func() { _ = h.Close() }) + h, _ := openTestHotStoreAt(t, t.TempDir()) return h } -func TestOpenHotStore_ValidatesInputs(t *testing.T) { - _, err := OpenHotStore("", chunk.ID(0), silentLogger()) - require.ErrorIs(t, err, stores.ErrInvalidConfig) - - _, err = OpenHotStore(t.TempDir(), chunk.ID(0), nil) - require.ErrorIs(t, err, stores.ErrInvalidConfig) -} - -func TestOpenHotStore_RecordsChunkBinding(t *testing.T) { - h, err := OpenHotStore(t.TempDir(), chunk.ID(7), silentLogger()) - require.NoError(t, err) - t.Cleanup(func() { _ = h.Close() }) - require.Equal(t, chunk.ID(7), h.ChunkID()) -} - -func TestOpenHotStore_CreatesMissingDirectory(t *testing.T) { - path := filepath.Join(t.TempDir(), "subdir-never-created") - h, err := OpenHotStore(path, chunk.ID(0), silentLogger()) +func openTestHotStoreAt(t *testing.T, path string) (*HotStore, *rocksdb.Store) { + t.Helper() + store, err := rocksdb.New(rocksdb.Config{ + Path: path, + ColumnFamilies: []string{LedgersCF}, + Logger: silentLogger(), + }) require.NoError(t, err) - require.NotNil(t, h) - t.Cleanup(func() { _ = h.Close() }) -} - -func TestHotStore_CloseIsIdempotent(t *testing.T) { - h, err := OpenHotStore(t.TempDir(), chunk.ID(0), silentLogger()) - require.NoError(t, err) - - require.NoError(t, h.Close()) - require.NoError(t, h.Close()) + t.Cleanup(func() { _ = store.Close() }) + return NewWithStore(store), store } func TestHotStore_AddGetRoundTripVerbatim(t *testing.T) { @@ -77,20 +56,20 @@ func TestHotStore_AddGetRoundTripVerbatim(t *testing.T) { // Single-entry write. payload := []byte("arbitrary opaque bytes the store has no opinion about") - require.NoError(t, h.AddLedgers(Entry{Seq: 42, Bytes: payload})) + require.NoError(t, addLedgers(h, Entry{Seq: 42, Bytes: payload})) got, err := h.GetLedgerRaw(42) require.NoError(t, err) assert.Equal(t, payload, got) // Overwrite. updated := []byte("different bytes") - require.NoError(t, h.AddLedgers(Entry{Seq: 42, Bytes: updated})) + require.NoError(t, addLedgers(h, Entry{Seq: 42, Bytes: updated})) got, err = h.GetLedgerRaw(42) require.NoError(t, err) assert.Equal(t, updated, got) // Zero entries — no-op, no error. - require.NoError(t, h.AddLedgers()) + require.NoError(t, addLedgers(h)) } // TestHotStore_AddLedgersIdempotentRetry mirrors the events store's retry @@ -103,46 +82,34 @@ func TestHotStore_AddLedgersIdempotentRetry(t *testing.T) { h := openTestHotStore(t) payload := []byte("ledger payload") - require.NoError(t, h.AddLedgers(Entry{Seq: 7, Bytes: payload})) - require.NoError(t, h.AddLedgers(Entry{Seq: 7, Bytes: payload})) // retry + require.NoError(t, addLedgers(h, Entry{Seq: 7, Bytes: payload})) + require.NoError(t, addLedgers(h, Entry{Seq: 7, Bytes: payload})) // retry got, err := h.GetLedgerRaw(7) require.NoError(t, err) assert.Equal(t, payload, got) // Still a single entry — the retry overwrote rather than appended. - first, ok, err := h.FirstSeq() - require.NoError(t, err) - require.True(t, ok) - assert.Equal(t, uint32(7), first) last, ok, err := h.LastSeq() require.NoError(t, err) require.True(t, ok) assert.Equal(t, uint32(7), last) } -func TestHotStore_FirstLastSeq(t *testing.T) { +func TestHotStore_LastSeq(t *testing.T) { h := openTestHotStore(t) // Empty store: ok=false, no error. - _, ok, err := h.FirstSeq() - require.NoError(t, err) - require.False(t, ok) - _, ok, err = h.LastSeq() + _, ok, err := h.LastSeq() require.NoError(t, err) require.False(t, ok) - // Insert seqs out of order; FirstSeq/LastSeq report the min/max present. - require.NoError(t, h.AddLedgers( + // Insert seqs out of order; LastSeq reports the max present. + require.NoError(t, addLedgers(h, Entry{Seq: 105, Bytes: []byte("c")}, Entry{Seq: 100, Bytes: []byte("a")}, Entry{Seq: 103, Bytes: []byte("b")}, )) - first, ok, err := h.FirstSeq() - require.NoError(t, err) - require.True(t, ok) - assert.Equal(t, uint32(100), first) - last, ok, err := h.LastSeq() require.NoError(t, err) require.True(t, ok) @@ -157,7 +124,7 @@ func TestHotStore_AddLedgersMultipleEntries(t *testing.T) { {Seq: 101, Bytes: []byte("ledger 101 payload")}, {Seq: 102, Bytes: []byte("ledger 102 payload")}, } - require.NoError(t, h.AddLedgers(entries...)) + require.NoError(t, addLedgers(h, entries...)) for _, e := range entries { got, err := h.GetLedgerRaw(e.Seq) require.NoError(t, err) @@ -168,7 +135,7 @@ func TestHotStore_AddLedgersMultipleEntries(t *testing.T) { func TestHotStore_IterateLedgers(t *testing.T) { h := openTestHotStore(t) for _, seq := range []uint32{10, 20, 30, 40, 50} { - require.NoError(t, h.AddLedgers(Entry{Seq: seq, Bytes: []byte("v")})) + require.NoError(t, addLedgers(h, Entry{Seq: seq, Bytes: []byte("v")})) } // Full window. @@ -219,7 +186,7 @@ func TestHotStore_IterateLedgersVisibleGap(t *testing.T) { h := openTestHotStore(t) // Non-contiguous keyspace: missing 30. for _, seq := range []uint32{10, 20, 40, 50} { - require.NoError(t, h.AddLedgers(Entry{Seq: seq, Bytes: []byte("v")})) + require.NoError(t, addLedgers(h, Entry{Seq: seq, Bytes: []byte("v")})) } var seen []uint32 @@ -239,14 +206,11 @@ func TestHotStore_GracefulCloseAndReopen(t *testing.T) { {Seq: 15, Bytes: []byte("payload-15")}, } - first, err := OpenHotStore(path, chunk.ID(0), silentLogger()) - require.NoError(t, err) - require.NoError(t, first.AddLedgers(seeded...)) - require.NoError(t, first.Close()) + first, firstStore := openTestHotStoreAt(t, path) + require.NoError(t, addLedgers(first, seeded...)) + require.NoError(t, firstStore.Close()) - second, err := OpenHotStore(path, chunk.ID(0), silentLogger()) - require.NoError(t, err) - t.Cleanup(func() { _ = second.Close() }) + second, _ := openTestHotStoreAt(t, path) for _, want := range seeded { got, err := second.GetLedgerRaw(want.Seq) @@ -256,12 +220,11 @@ func TestHotStore_GracefulCloseAndReopen(t *testing.T) { } func TestHotStore_PostCloseOps(t *testing.T) { - h, err := OpenHotStore(t.TempDir(), chunk.ID(0), silentLogger()) - require.NoError(t, err) - require.NoError(t, h.Close()) + h, store := openTestHotStoreAt(t, t.TempDir()) + require.NoError(t, store.Close()) - require.ErrorIs(t, h.AddLedgers(Entry{Seq: 1, Bytes: []byte("v")}), stores.ErrStoreClosed) - _, err = h.GetLedgerRaw(1) + require.ErrorIs(t, addLedgers(h, Entry{Seq: 1, Bytes: []byte("v")}), stores.ErrStoreClosed) + _, err := h.GetLedgerRaw(1) require.ErrorIs(t, err, stores.ErrStoreClosed) var iterErr error for _, e := range h.IterateLedgers(0, 100) { @@ -269,7 +232,7 @@ func TestHotStore_PostCloseOps(t *testing.T) { } require.ErrorIs(t, iterErr, stores.ErrStoreClosed) - require.ErrorIs(t, h.AddLedgers(), stores.ErrStoreClosed) + require.ErrorIs(t, addLedgers(h), stores.ErrStoreClosed) iterErr = nil for _, e := range h.IterateLedgers(100, 50) { @@ -279,9 +242,9 @@ func TestHotStore_PostCloseOps(t *testing.T) { } func TestHotStore_ConcurrentOpsAndCloseRaceFree(t *testing.T) { - h := openTestHotStore(t) + h, store := openTestHotStoreAt(t, t.TempDir()) for i := range uint32(50) { - require.NoError(t, h.AddLedgers(Entry{Seq: i, Bytes: []byte("v")})) + require.NoError(t, addLedgers(h, Entry{Seq: i, Bytes: []byte("v")})) } var wg sync.WaitGroup @@ -290,7 +253,7 @@ func TestHotStore_ConcurrentOpsAndCloseRaceFree(t *testing.T) { for w := range workers { wg.Go(func() { for i := uint32(0); !stop.Load(); i++ { - _ = h.AddLedgers(Entry{Seq: uint32(w)*1_000_000 + i, Bytes: []byte("v")}) + _ = addLedgers(h, Entry{Seq: uint32(w)*1_000_000 + i, Bytes: []byte("v")}) } }) wg.Go(func() { @@ -310,11 +273,11 @@ func TestHotStore_ConcurrentOpsAndCloseRaceFree(t *testing.T) { } time.Sleep(50 * time.Millisecond) - require.NoError(t, h.Close()) + require.NoError(t, store.Close()) stop.Store(true) wg.Wait() - require.ErrorIs(t, h.AddLedgers(Entry{Seq: 1, Bytes: []byte("v")}), stores.ErrStoreClosed) + require.ErrorIs(t, addLedgers(h, Entry{Seq: 1, Bytes: []byte("v")}), stores.ErrStoreClosed) } // TestHotStore_AddLedgersEmptyBytes pins behavior on zero-length @@ -322,7 +285,7 @@ func TestHotStore_ConcurrentOpsAndCloseRaceFree(t *testing.T) { // and read back as empty. func TestHotStore_AddLedgersEmptyBytes(t *testing.T) { h := openTestHotStore(t) - require.NoError(t, h.AddLedgers(Entry{Seq: 1, Bytes: nil})) + require.NoError(t, addLedgers(h, Entry{Seq: 1, Bytes: nil})) got, err := h.GetLedgerRaw(1) require.NoError(t, err) assert.Empty(t, got) @@ -345,7 +308,7 @@ func TestHotToColdMigration(t *testing.T) { b, err := lcm.MarshalBinary() require.NoError(t, err) raws[i] = b - require.NoError(t, hot.AddLedgers(Entry{Seq: firstSeq + uint32(i), Bytes: b})) + require.NoError(t, addLedgers(hot, Entry{Seq: firstSeq + uint32(i), Bytes: b})) } // Stream hot → cold. No re-encoding step on the caller side. @@ -380,7 +343,7 @@ func TestHotStore_XDRRoundTrip(t *testing.T) { require.NoError(t, err) h := openTestHotStore(t) - require.NoError(t, h.AddLedgers(Entry{Seq: ledgerSeq, Bytes: raw})) + require.NoError(t, addLedgers(h, Entry{Seq: ledgerSeq, Bytes: raw})) gotRaw, err := h.GetLedgerRaw(ledgerSeq) require.NoError(t, err) @@ -476,3 +439,16 @@ func makeRandomLedgerCloseMeta( lcm.V1.LedgerHeader.Header.LedgerSeq = xdr.Uint32(ledgerSeq) return lcm, hashes } + +// addLedgers commits entries through AddLedgerToBatch in one batch — the +// production write shape, reduced to a test seeding call. +func addLedgers(h *HotStore, entries ...Entry) error { + return translateRocksErr(h.store.Batch(func(b *rocksdb.BatchWriter) error { + for _, e := range entries { + if err := h.AddLedgerToBatch(b, e); err != nil { + return err + } + } + return nil + })) +} diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/hot_store.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/hot_store.go index 18bfa4420..1c1ed81fe 100644 --- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/hot_store.go +++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/hot_store.go @@ -1,29 +1,16 @@ -// Package txhash holds the hot transaction-hash store (RocksDB-backed, -// 16-CF nibble-routed) and its value types. A future cold reader -// (RecSplit-backed) will live alongside the HotStore in this package. +// Package txhash holds the hot transaction-hash store (RocksDB-backed, a single +// txhash CF) and its value types. A future cold reader (RecSplit-backed) will +// live alongside the HotStore in this package. package txhash import ( - supportlog "github.com/stellar/go-stellar-sdk/support/log" - - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores" ) -// 16 CFs — one per high-nibble bucket of byte 0 of the txhash. -// Same routing the cold RecSplit index uses. -const numCFs = 16 - -// cfNameByNibble is the precomputed (cf-0..cf-f) table indexed by -// hash[0]>>4. Single source of truth used by both cfNames (open-time -// CF list) and cfNameForTxHash (hot path). -// -//nolint:gochecknoglobals -var cfNameByNibble = [16]string{ - "cf-0", "cf-1", "cf-2", "cf-3", "cf-4", "cf-5", "cf-6", "cf-7", - "cf-8", "cf-9", "cf-a", "cf-b", "cf-c", "cf-d", "cf-e", "cf-f", -} +// txhashCF is the single column family holding every (txhash → ledgerSeq) +// entry for the chunk, per the design's hot-tier spec (one `txhash` CF). +const txhashCF = "txhash" // Entry — one (txhash → ledgerSeq) mapping. type Entry struct { @@ -31,65 +18,40 @@ type Entry struct { LedgerSeq uint32 } -// HotStore — RocksDB-backed hot transaction-hash store. 16 CFs named -// cf-0..cf-f; each hash routes to cf-{txhash[0]>>4}; ledgerSeq -// encoded big-endian. Routing, CF names, and encoding are internal. +// HotStore — RocksDB-backed hot transaction-hash store. A single txhash CF +// holding the full 32-byte hash as key and the big-endian ledgerSeq as value. +// The CF name and encoding are internal. // // Like every hot store, a HotStore instance is chunk-bound: it // accumulates exactly one chunk's (txhash → seq) tuples before being -// frozen into the chunk's cold .bin artifact. The binding is recorded -// at open time (ChunkID) so the ingest driver can reject a store -// bound to a different chunk than it is ingesting; the store does not -// itself range-check writes (the driver's drain loop already -// validates every ledger sequence against the chunk). +// frozen into the chunk's cold .bin artifact. The store does not itself +// range-check writes (the driver's drain loop already validates every ledger +// sequence against the chunk). type HotStore struct { - store *rocksdb.Store - chunkID chunk.ID -} - -// NewHotStore validates inputs and returns an open HotStore bound to -// chunkID (see the HotStore doc on chunk binding). -func NewHotStore(path string, chunkID chunk.ID, logger *supportlog.Entry) (*HotStore, error) { - if path == "" { - return nil, rocksdb.ErrInvalidConfig - } - if logger == nil { - return nil, rocksdb.ErrInvalidConfig - } - store, err := rocksdb.New(rocksdb.Config{ - Path: path, - ColumnFamilies: cfNames(), - Logger: logger, - Tuning: tuning(), - }) - if err != nil { - return nil, err - } - return &HotStore{store: store, chunkID: chunkID}, nil + store *rocksdb.Store } -func cfNames() []string { - out := make([]string, numCFs) - copy(out, cfNameByNibble[:]) - return out +// NewWithStore wraps an ALREADY-OPEN rocksdb.Store as a txhash HotStore on the +// single txhash CF (CFNames()). The store is owned by the caller — in production, +// hotchunk.DB composes this facade over the shared per-chunk DB and closes that DB +// once. The store must have CFNames() registered. +func NewWithStore(store *rocksdb.Store) *HotStore { + return &HotStore{store: store} } -func cfNameForTxHash(hash [32]byte) string { - return cfNameByNibble[hash[0]>>4] -} +// CFNames returns the single txhash CF name this facade owns. Exported so +// the hotchunk shared-DB opener can register it alongside the other CFs. +func CFNames() []string { return []string{txhashCF} } -// tuning — the hot txhash workload is write-once / point-lookup over -// 16 CFs; the cross-knob interactions below are non-obvious enough -// that they get an explicit per-stanza rationale. The other facades -// ride on RocksDB defaults by contrast — only this workload earned -// the calibration. -func tuning() rocksdb.Tuning { +// Tuning returns this facade's RocksDB tuning, applied to the shared per-chunk +// DB by the hotchunk opener. The hot txhash workload is write-once / +// point-lookup; the cross-knob interactions below are non-obvious enough that +// they get an explicit per-stanza rationale. The other facades ride on RocksDB +// defaults by contrast — only this workload earned the calibration. +func Tuning() rocksdb.Tuning { return rocksdb.Tuning{ - // Per-CF memtable budget × 16 CFs (64 MB × 16 = 1024 MB) - // matches the MaxTotalWalSizeMB cap below. Memtable-fill - // cadence and WAL-cap cadence align under uniform writes; - // either trigger fires at roughly the same time and produces - // ~64 MB SSTs. + // 64 MB memtable so one flush produces one ~64 MB SST under + // uniform writes. WriteBufferMB: 64, MaxWriteBufferNumber: 2, @@ -117,8 +79,7 @@ func tuning() rocksdb.Tuning { TargetFileSizeMB: 64, MaxBytesForLevelBaseMB: 256, - // High background-job budget for the periodic memtable - // flushes across 16 CFs. + // Background-job budget for the periodic memtable flushes. MaxBackgroundJobs: 8, MaxOpenFiles: 10_000, @@ -131,47 +92,29 @@ func tuning() rocksdb.Tuning { BlockCacheMB: 512, BloomFilterBitsPerKey: 12, - // 1 GB WAL cap matches the natural memtable budget above. - // Graceful Close auto-Flushes (see rocksdb.Store.Close), so - // this cap only bounds ungraceful-shutdown recovery (kernel - // panic, power loss, OOM kill). + // 1 GB WAL cap. Graceful Close auto-Flushes (see + // rocksdb.Store.Close), so this cap only bounds ungraceful-shutdown + // recovery (kernel panic, power loss, OOM kill). MaxTotalWalSizeMB: 1024, } } -func (h *HotStore) Close() error { return h.store.Close() } - -// ChunkID returns the chunk this store is bound to (constructor-supplied; -// never reads the store). -func (h *HotStore) ChunkID() chunk.ID { return h.chunkID } - -// AddEntries writes a batch of (txhash → ledgerSeq) atomically -// across however many CFs the hashes' nibbles cover. One fsync per -// call. -func (h *HotStore) AddEntries(entries []Entry) error { - if h.store.IsClosed() { - return rocksdb.ErrStoreClosed - } - switch len(entries) { - case 0: - return nil - case 1: - e := entries[0] - return h.store.Put(cfNameForTxHash(e.Hash), e.Hash[:], rocksdb.EncodeUint32(e.LedgerSeq)) - default: - return h.store.Batch(func(b *rocksdb.BatchWriter) error { - for _, e := range entries { - b.Put(cfNameForTxHash(e.Hash), e.Hash[:], rocksdb.EncodeUint32(e.LedgerSeq)) - } - return nil - }) +// AddEntriesToBatch queues each (txhash → ledgerSeq) Put into b on the txhash +// CF — the building block hotchunk uses to fold the tx-hash writes into the one +// shared per-ledger WriteBatch (decision (a)). Does not commit (caller owns the +// batch). The caller runs inside Store.Batch, whose lifecycle RLock + checkOpen +// is the authoritative closed-store guard, so this adds none. +func (h *HotStore) AddEntriesToBatch(b *rocksdb.BatchWriter, entries []Entry) error { + for _, e := range entries { + b.Put(txhashCF, e.Hash[:], rocksdb.EncodeUint32(e.LedgerSeq)) } + return nil } // Get returns the ledger sequence the hash was committed in, or -// (0, stores.ErrNotFound) on miss. Only the routed CF is queried. +// (0, stores.ErrNotFound) on miss. func (h *HotStore) Get(hash [32]byte) (uint32, error) { - v, found, err := h.store.Get(cfNameForTxHash(hash), hash[:]) + v, found, err := h.store.Get(txhashCF, hash[:]) if err != nil { return 0, err } diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/hot_store_test.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/hot_store_test.go index c600d6141..7e0a117e0 100644 --- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/hot_store_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/hot_store_test.go @@ -2,7 +2,6 @@ package txhash import ( "bytes" - "path/filepath" "sync" "sync/atomic" "testing" @@ -14,7 +13,6 @@ import ( supportlog "github.com/stellar/go-stellar-sdk/support/log" - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/rocksdb" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/stores" ) @@ -27,6 +25,8 @@ func silentLogger() *supportlog.Entry { return log } +// txhashFor builds a distinct 32-byte hash from a (high-nibble, tag) pair — +// a convenient generator of many distinct keys for the single txhash CF. func txhashFor(nibble, tag byte) [32]byte { var h [32]byte h[0] = nibble << 4 @@ -39,41 +39,21 @@ func txhashFor(nibble, tag byte) [32]byte { func openTestHotStore(t *testing.T) *HotStore { t.Helper() - s, err := NewHotStore(t.TempDir(), chunk.ID(0), silentLogger()) - require.NoError(t, err) - t.Cleanup(func() { _ = s.Close() }) + s, _ := openTestHotStoreAt(t, t.TempDir()) return s } -func TestNewHotStore_ValidatesInputs(t *testing.T) { - _, err := NewHotStore("", chunk.ID(0), silentLogger()) - require.ErrorIs(t, err, rocksdb.ErrInvalidConfig) - - _, err = NewHotStore(t.TempDir(), chunk.ID(0), nil) - require.ErrorIs(t, err, rocksdb.ErrInvalidConfig) -} - -func TestNewHotStore_RecordsChunkBinding(t *testing.T) { - s, err := NewHotStore(t.TempDir(), chunk.ID(7), silentLogger()) - require.NoError(t, err) - t.Cleanup(func() { _ = s.Close() }) - require.Equal(t, chunk.ID(7), s.ChunkID()) -} - -func TestNewHotStore_CreatesMissingDirectory(t *testing.T) { - path := filepath.Join(t.TempDir(), "subdir-never-created") - s, err := NewHotStore(path, chunk.ID(0), silentLogger()) - require.NoError(t, err) - require.NotNil(t, s) - t.Cleanup(func() { _ = s.Close() }) -} - -func TestHotStore_CloseIsIdempotent(t *testing.T) { - s, err := NewHotStore(t.TempDir(), chunk.ID(0), silentLogger()) +func openTestHotStoreAt(t *testing.T, path string) (*HotStore, *rocksdb.Store) { + t.Helper() + store, err := rocksdb.New(rocksdb.Config{ + Path: path, + ColumnFamilies: CFNames(), + Logger: silentLogger(), + Tuning: Tuning(), + }) require.NoError(t, err) - - require.NoError(t, s.Close()) - require.NoError(t, s.Close()) + t.Cleanup(func() { _ = store.Close() }) + return NewWithStore(store), store } func TestHotStore_AddGetRoundTrip(t *testing.T) { @@ -86,42 +66,43 @@ func TestHotStore_AddGetRoundTrip(t *testing.T) { require.ErrorIs(t, err, stores.ErrNotFound) // Single-entry AddEntries. - require.NoError(t, s.AddEntries([]Entry{{Hash: h, LedgerSeq: 12345}})) + require.NoError(t, addEntries(s, []Entry{{Hash: h, LedgerSeq: 12345}})) got, err := s.Get(h) require.NoError(t, err) assert.Equal(t, uint32(12345), got) // Overwrite via a second AddEntries. - require.NoError(t, s.AddEntries([]Entry{{Hash: h, LedgerSeq: 67890}})) + require.NoError(t, addEntries(s, []Entry{{Hash: h, LedgerSeq: 67890}})) got, err = s.Get(h) require.NoError(t, err) assert.Equal(t, uint32(67890), got) // Empty slice — no-op, no error. - require.NoError(t, s.AddEntries(nil)) - require.NoError(t, s.AddEntries([]Entry{})) + require.NoError(t, addEntries(s, nil)) + require.NoError(t, addEntries(s, []Entry{})) } -func TestHotStore_NibbleRoutingAcrossAllCFs(t *testing.T) { +func TestHotStore_ManyDistinctKeys(t *testing.T) { s := openTestHotStore(t) - entries := make([]Entry, numCFs) - for n := range numCFs { - entries[n] = Entry{ - Hash: txhashFor(byte(n), 1), - LedgerSeq: uint32(n) * 100, + const n = 16 + entries := make([]Entry, n) + for i := range n { + entries[i] = Entry{ + Hash: txhashFor(byte(i), 1), + LedgerSeq: uint32(i) * 100, } } - require.NoError(t, s.AddEntries(entries)) + require.NoError(t, addEntries(s, entries)) - for n := range numCFs { - got, err := s.Get(entries[n].Hash) - require.NoError(t, err, "nibble %x", n) - assert.Equal(t, uint32(n)*100, got, "nibble %x", n) + for i := range n { + got, err := s.Get(entries[i].Hash) + require.NoError(t, err, "key %d", i) + assert.Equal(t, uint32(i)*100, got, "key %d", i) } } -func TestHotStore_AddEntriesMultipleSpansCFs(t *testing.T) { +func TestHotStore_AddEntriesMultiple(t *testing.T) { s := openTestHotStore(t) entries := []Entry{ @@ -131,7 +112,7 @@ func TestHotStore_AddEntriesMultipleSpansCFs(t *testing.T) { {Hash: txhashFor(0xc, 1), LedgerSeq: 40}, {Hash: txhashFor(0xf, 1), LedgerSeq: 50}, } - require.NoError(t, s.AddEntries(entries)) + require.NoError(t, addEntries(s, entries)) for _, e := range entries { got, err := s.Get(e.Hash) @@ -144,7 +125,7 @@ func TestHotStore_AddEntriesMultipleSpansCFs(t *testing.T) { for i, e := range entries { updated[i] = Entry{Hash: e.Hash, LedgerSeq: e.LedgerSeq + 1000} } - require.NoError(t, s.AddEntries(updated)) + require.NoError(t, addEntries(s, updated)) for _, e := range updated { got, err := s.Get(e.Hash) require.NoError(t, err) @@ -153,36 +134,32 @@ func TestHotStore_AddEntriesMultipleSpansCFs(t *testing.T) { } func TestHotStore_PostCloseOps(t *testing.T) { - s, err := NewHotStore(t.TempDir(), chunk.ID(0), silentLogger()) - require.NoError(t, err) - require.NoError(t, s.Close()) + s, store := openTestHotStoreAt(t, t.TempDir()) + require.NoError(t, store.Close()) h := txhashFor(0x5, 1) - require.ErrorIs(t, s.AddEntries([]Entry{{Hash: h, LedgerSeq: 1}}), rocksdb.ErrStoreClosed) - _, err = s.Get(h) + require.ErrorIs(t, addEntries(s, []Entry{{Hash: h, LedgerSeq: 1}}), rocksdb.ErrStoreClosed) + _, err := s.Get(h) require.ErrorIs(t, err, rocksdb.ErrStoreClosed) - require.ErrorIs(t, s.AddEntries(nil), rocksdb.ErrStoreClosed) - require.ErrorIs(t, s.AddEntries([]Entry{}), rocksdb.ErrStoreClosed) + require.ErrorIs(t, addEntries(s, nil), rocksdb.ErrStoreClosed) + require.ErrorIs(t, addEntries(s, []Entry{}), rocksdb.ErrStoreClosed) } func TestHotStore_GracefulCloseAndReopenRoundTrips(t *testing.T) { path := t.TempDir() - first, err := NewHotStore(path, chunk.ID(0), silentLogger()) - require.NoError(t, err) - for n := range numCFs { - require.NoError(t, first.AddEntries([]Entry{ + first, firstStore := openTestHotStoreAt(t, path) + for n := range 16 { + require.NoError(t, addEntries(first, []Entry{ {Hash: txhashFor(byte(n), 1), LedgerSeq: uint32(n) + 1}, })) } - require.NoError(t, first.Close()) + require.NoError(t, firstStore.Close()) - second, err := NewHotStore(path, chunk.ID(0), silentLogger()) - require.NoError(t, err) - t.Cleanup(func() { _ = second.Close() }) + second, _ := openTestHotStoreAt(t, path) - for n := range numCFs { + for n := range 16 { got, err := second.Get(txhashFor(byte(n), 1)) require.NoError(t, err) assert.Equal(t, uint32(n)+1, got) @@ -190,13 +167,13 @@ func TestHotStore_GracefulCloseAndReopenRoundTrips(t *testing.T) { } func TestHotStore_ConcurrentOpsAndCloseRaceFree(t *testing.T) { - s := openTestHotStore(t) - // Pre-populate one entry per nibble. - pre := make([]Entry, numCFs) - for n := range numCFs { + s, store := openTestHotStoreAt(t, t.TempDir()) + // Pre-populate a spread of distinct keys. + pre := make([]Entry, 16) + for n := range 16 { pre[n] = Entry{Hash: txhashFor(byte(n), 1), LedgerSeq: uint32(n)} } - require.NoError(t, s.AddEntries(pre)) + require.NoError(t, addEntries(s, pre)) var wg sync.WaitGroup var stop atomic.Bool @@ -204,52 +181,31 @@ func TestHotStore_ConcurrentOpsAndCloseRaceFree(t *testing.T) { for w := range workers { wg.Go(func() { for i := byte(0); !stop.Load(); i++ { - _ = s.AddEntries([]Entry{ - {Hash: txhashFor(i%numCFs, byte(w+5)), LedgerSeq: uint32(i)}, + _ = addEntries(s, []Entry{ + {Hash: txhashFor(i%16, byte(w+5)), LedgerSeq: uint32(i)}, }) } }) wg.Go(func() { for i := byte(0); !stop.Load(); i++ { - _, _ = s.Get(txhashFor(i%numCFs, 1)) + _, _ = s.Get(txhashFor(i%16, 1)) } }) } time.Sleep(50 * time.Millisecond) - require.NoError(t, s.Close()) + require.NoError(t, store.Close()) stop.Store(true) wg.Wait() postClose := []Entry{{Hash: txhashFor(0x1, 1), LedgerSeq: 1}} - require.ErrorIs(t, s.AddEntries(postClose), rocksdb.ErrStoreClosed) + require.ErrorIs(t, addEntries(s, postClose), rocksdb.ErrStoreClosed) } -func TestCFNameForTxHash_AllHighNibbles(t *testing.T) { - cases := []struct { - topByte byte - want string - }{ - {0x00, "cf-0"}, - {0x10, "cf-1"}, - {0x20, "cf-2"}, - {0x30, "cf-3"}, - {0x40, "cf-4"}, - {0x50, "cf-5"}, - {0x60, "cf-6"}, - {0x70, "cf-7"}, - {0x80, "cf-8"}, - {0x90, "cf-9"}, - {0xa0, "cf-a"}, - {0xb0, "cf-b"}, - {0xc0, "cf-c"}, - {0xd0, "cf-d"}, - {0xe0, "cf-e"}, - {0xf0, "cf-f"}, - } - for _, c := range cases { - var h [32]byte - h[0] = c.topByte - assert.Equal(t, c.want, cfNameForTxHash(h)) - } +// addEntries commits entries through AddEntriesToBatch in one batch — the +// production write shape, reduced to a test seeding call. +func addEntries(h *HotStore, entries []Entry) error { + return h.store.Batch(func(b *rocksdb.BatchWriter) error { + return h.AddEntriesToBatch(b, entries) + }) } diff --git a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/read_assembly_test.go b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/read_assembly_test.go index d358b4adc..840c9697f 100644 --- a/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/read_assembly_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/pkg/stores/txhash/read_assembly_test.go @@ -346,7 +346,7 @@ func TestTxReader_HotAndColdFederation(t *testing.T) { flHot := buildLedgers(t, []uint32{hotSeq}, 1) hotStore := openTestHotStore(t) for h, seq := range flHot.byHash { - require.NoError(t, hotStore.AddEntries([]Entry{{Hash: h, LedgerSeq: seq}})) + require.NoError(t, addEntries(hotStore, []Entry{{Hash: h, LedgerSeq: seq}})) } coldSeq := chunk.ID(5).FirstLedger() diff --git a/cmd/stellar-rpc/internal/fullhistory/progress.go b/cmd/stellar-rpc/internal/fullhistory/progress.go deleted file mode 100644 index 2ab6ba375..000000000 --- a/cmd/stellar-rpc/internal/fullhistory/progress.go +++ /dev/null @@ -1,121 +0,0 @@ -package fullhistory - -import ( - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog" - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" -) - -// Progress is derived, never stored: every consumer recomputes from durable keys. -// "Highest complete chunk" arithmetic runs in int64 (-1 = "nothing complete") to -// avoid uint32 wraparound on the pre-genesis sentinel. - -// lastCommittedLedger derives the highest durably committed ledger: the max of the -// floor term (EarliestLedger()-1) and the cold term (the highest fully-durable -// chunk's last ledger). Computed signed so a fresh/unpinned store doesn't underflow, -// then floored at the pre-genesis base (FirstLedgerSeq-1) — the "ingest from -// genesis, nothing committed" base. -func lastCommittedLedger(cat *catalog.Catalog) (uint32, error) { - cold, err := highestDurableChunk(cat) - if err != nil { - return 0, err - } - earliest, ok, err := cat.EarliestLedger() - if err != nil { - return 0, err - } - - through := int64(chunk.FirstLedgerSeq) - 1 // pre-genesis base - if ok { - through = max(through, int64(earliest)-1) - } - if cold >= 0 { - through = max(through, int64(chunk.ID(cold).LastLedger())) //nolint:gosec // cold >= 0, a real chunk id - } - return uint32(through), nil // through >= FirstLedgerSeq-1 >= 0 -} - -// highestDurableChunk returns the highest chunk id with all artifacts durable -// (ledgers frozen AND events frozen AND (txhash frozen OR covered by a frozen -// index)), or -1 on a fresh start. A partially-frozen tip chunk is excluded — -// counting it would open reads over a partial artifact; backfill repairs it. -func highestDurableChunk(cat *catalog.Catalog) (int64, error) { - refs, err := cat.ChunkArtifactKeys() - if err != nil { - return 0, err - } - - // Frozen per-kind state per chunk. - type kinds struct{ ledgers, events, txhash bool } - frozen := map[chunk.ID]*kinds{} - for _, ref := range refs { - if ref.State != geometry.StateFrozen { - continue - } - k := frozen[ref.Chunk] - if k == nil { - k = &kinds{} - frozen[ref.Chunk] = k - } - switch ref.Kind { - case geometry.KindLedgers: - k.ledgers = true - case geometry.KindEvents: - k.events = true - case geometry.KindTxHash: - k.txhash = true - } - } - - // A frozen index coverage satisfies a chunk's txhash even after its .bin was demoted. - covered, err := frozenCoverageContains(cat) - if err != nil { - return 0, err - } - - highest := int64(-1) - for c, k := range frozen { - if !k.ledgers || !k.events { - continue - } - if !k.txhash && !covered(c) { - continue - } - if id := int64(c); id > highest { - highest = id - } - } - return highest, nil -} - -// frozenCoverageContains returns a predicate reporting whether a chunk falls in -// some frozen index coverage [Lo, Hi]; coverages are read once up front. -func frozenCoverageContains(cat *catalog.Catalog) (func(chunk.ID) bool, error) { - covs, err := cat.AllTxHashIndexKeys() - if err != nil { - return nil, err - } - var frozen []geometry.TxHashIndexCoverage - for _, cov := range covs { - if cov.State == geometry.StateFrozen { - frozen = append(frozen, cov) - } - } - return func(c chunk.ID) bool { - for _, cov := range frozen { - if cov.Lo <= c && c <= cov.Hi { - return true - } - } - return false - }, nil -} - -// chunkIDOfLedger maps a ledger to its chunk, signed so a sub-genesis ledger -// yields -1 instead of panicking like chunk.IDFromLedger. -func chunkIDOfLedger(ledger uint32) int64 { - if ledger < chunk.FirstLedgerSeq { - return -1 - } - return int64(chunk.IDFromLedger(ledger)) -} diff --git a/cmd/stellar-rpc/internal/fullhistory/progress_test.go b/cmd/stellar-rpc/internal/fullhistory/progress_test.go deleted file mode 100644 index 6fc469049..000000000 --- a/cmd/stellar-rpc/internal/fullhistory/progress_test.go +++ /dev/null @@ -1,105 +0,0 @@ -package fullhistory - -import ( - "testing" - - "github.com/stretchr/testify/require" - - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog" - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" -) - -// --------------------------------------------------------------------------- -// progress derivation test helpers. -// --------------------------------------------------------------------------- - -// makeChunkDurable freezes ledgers+events+txhash for a chunk — the durable state -// highestDurableChunk counts. -func makeChunkDurable(t *testing.T, cat *catalog.Catalog, c chunk.ID) { - t.Helper() - freezeKinds(t, cat, c, geometry.KindLedgers, geometry.KindEvents, geometry.KindTxHash) -} - -// --------------------------------------------------------------------------- -// lastCommittedLedger — chunk-granularity bound, pure catalog read. -// --------------------------------------------------------------------------- - -func TestLastCommittedLedger(t *testing.T) { - t.Run("fresh store => pre-genesis sentinel, never MaxUint32", func(t *testing.T) { - // Every term is -1; the signed domain must yield FirstLedgerSeq-1, not wrap. - cat, _ := testCatalog(t) - got, err := lastCommittedLedger(cat) - require.NoError(t, err) - require.Equal(t, preGenesisLedger, got) - }) - - t.Run("cold term leads: highest fully-durable chunk", func(t *testing.T) { - cat, _ := testCatalog(t) - makeChunkDurable(t, cat, 0) - makeChunkDurable(t, cat, 1) - makeChunkDurable(t, cat, 2) - got, err := lastCommittedLedger(cat) - require.NoError(t, err) - require.Equal(t, chunk.ID(2).LastLedger(), got) - }) - - t.Run("incompletely-frozen tip degrades the bound (ledgers frozen, events freezing)", func(t *testing.T) { - cat, _ := testCatalog(t) - makeChunkDurable(t, cat, 0) - makeChunkDurable(t, cat, 1) - // Chunk 2 mid-freeze (events only "freezing") must NOT count: bound stays at 1. - freezeKinds(t, cat, 2, geometry.KindLedgers, geometry.KindTxHash) - require.NoError(t, cat.MarkChunkFreezing(2, geometry.KindEvents)) - got, err := lastCommittedLedger(cat) - require.NoError(t, err) - require.Equal(t, chunk.ID(1).LastLedger(), got) - }) - - t.Run("txhash satisfied by a frozen index coverage (post-finalization demote)", func(t *testing.T) { - cat, _ := testCatalog(t) - // Chunk 7: txhash demoted but a frozen index coverage spans it ⇒ still durable. - freezeKinds(t, cat, 7, geometry.KindLedgers, geometry.KindEvents) - freezeCoverage(t, cat, cat.TxHashIndexLayout().TxHashIndexID(7), 0, 999) // window 0 covers chunk 7 - got, err := lastCommittedLedger(cat) - require.NoError(t, err) - require.Equal(t, chunk.ID(7).LastLedger(), got) - }) - - t.Run("chunk NOT covered by any frozen index and no frozen txhash does not count", func(t *testing.T) { - cat, _ := testCatalog(t) - makeChunkDurable(t, cat, 0) - // Chunk 1: ledgers+events frozen, no txhash, no covering index. - freezeKinds(t, cat, 1, geometry.KindLedgers, geometry.KindEvents) - got, err := lastCommittedLedger(cat) - require.NoError(t, err) - require.Equal(t, chunk.ID(0).LastLedger(), got, "chunk 1 not durable; bound stays at chunk 0") - }) - - t.Run("earliest pin floor leads when above the cold term", func(t *testing.T) { - cat, _ := testCatalog(t) - // Floor pinned mid-chain, no chunks durable, no hot keys. - const floor = 50000 - require.NoError(t, cat.PinEarliestLedger(floor)) - got, err := lastCommittedLedger(cat) - require.NoError(t, err) - require.Equal(t, uint32(floor-1), got) - }) - - t.Run("earliest pin == genesis (2) does not underflow", func(t *testing.T) { - cat, _ := testCatalog(t) - require.NoError(t, cat.PinEarliestLedger(chunk.FirstLedgerSeq)) - got, err := lastCommittedLedger(cat) - require.NoError(t, err) - require.Equal(t, preGenesisLedger, got, "earliest 2 - 1 = 1, not MaxUint32") - }) - - t.Run("max of the cold term and the earliest floor", func(t *testing.T) { - cat, _ := testCatalog(t) - makeChunkDurable(t, cat, 3) // cold => chunk 3 last ledger (the higher term) - require.NoError(t, cat.PinEarliestLedger(2)) - got, err := lastCommittedLedger(cat) - require.NoError(t, err) - require.Equal(t, chunk.ID(3).LastLedger(), got) - }) -} diff --git a/cmd/stellar-rpc/internal/fullhistory/retention.go b/cmd/stellar-rpc/internal/fullhistory/retention.go deleted file mode 100644 index 2b0462390..000000000 --- a/cmd/stellar-rpc/internal/fullhistory/retention.go +++ /dev/null @@ -1,48 +0,0 @@ -package fullhistory - -import ( - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" - "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" -) - -// RetentionFloor is the lowest chunk still within retention; any chunk below it -// is eligible for discard/prune. It is the reader-side retention contract -// (design "Reader retention contract", gettx §8.2 / §8.5): availability is -// decided by retention, not the on-disk file set, which lets prune/sweep unlink -// a chunk the instant it passes the floor without coordinating with the index -// lifecycle (a stale .idx pointing at a pruned .pack is masked). The floor may -// err LOW harmlessly — a wrongly-retained chunk still hits the reader's -// missing-file rule — so it anchors on the same live completeThrough the prune -// scan uses; widening history is backfill's job, not the floor's. -type RetentionFloor struct { - chunk chunk.ID // lowest in-retention chunk -} - -// NewRetentionFloor pins the floor for one (through, retentionChunks, earliest) -// snapshot. A shortened retentionChunks raises the floor at once — no per-chunk -// state to migrate. -func NewRetentionFloor(through, retentionChunks, earliest uint32) RetentionFloor { - return RetentionFloor{chunk: retentionFloorChunk(through, retentionChunks, earliest)} -} - -// Excludes reports whether chunk c is below the floor — past retention, eligible -// for discard/prune. The discard and prune scans (eligibility.go) use it on a -// chunk directly and, since an index is below the floor exactly when its last -// chunk is, as Excludes(layout.LastChunk(idx)) for a whole tx-hash index. (The -// reader's seq-level admit predicate and the ledger-seq floor for §8.2 coverage -// filtering return with the read path, #772.) -func (f RetentionFloor) Excludes(c chunk.ID) bool { return c < f.chunk } - -// retentionFloorChunk is the retention window's lower bound as a chunk id (the -// design's retentionFloorChunk): the HIGHER of the sliding floor (retentionChunks -// back from the last complete chunk) and the fixed earliest_ledger. slidingChunk is -// signed so a young store / large retentionChunks clamps to chunk 0 instead of -// underflowing. Both terms are chunk-first-ledgers, so IDFromLedger is exact. -func retentionFloorChunk(upperBound, retentionChunks, earliest uint32) chunk.ID { - sliding := uint32(chunk.FirstLedgerSeq) // GenesisLedger - if retentionChunks > 0 { - slidingChunk := geometry.LastCompleteChunkAt(upperBound) - int64(retentionChunks) + 1 - sliding = geometry.ChunkFirstLedger(max(slidingChunk, 0)) - } - return chunk.IDFromLedger(max(sliding, earliest)) -} diff --git a/cmd/stellar-rpc/internal/fullhistory/startup.go b/cmd/stellar-rpc/internal/fullhistory/startup.go index 34e49dd46..a38d16c20 100644 --- a/cmd/stellar-rpc/internal/fullhistory/startup.go +++ b/cmd/stellar-rpc/internal/fullhistory/startup.go @@ -7,16 +7,26 @@ import ( "time" "github.com/cenkalti/backoff/v4" + "golang.org/x/sync/errgroup" + + "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/backfill" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/lifecycle" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/observability" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" ) -// run is the daemon's startup: backfill to the tip, then serve reads (injected). -// Returns nil only on clean shutdown; any other return is restartable -// (ErrFirstStartNoTip on a first start with no reachable backend). +// run is the daemon's startup, in two steps: (1) BACKFILL to the tip, then +// (2) SERVE + INGEST — start captive core (injected), begin serving reads +// (injected), then run the live ingestion loop (which opens the resume chunk's hot +// DB itself) and the lifecycle loop as a joined errgroup pair (whichever returns +// first cancels the other; g.Wait surfaces the first error). Returns nil only on a +// clean shutdown (ctx canceled mid-run); any other return is a restartable error +// the supervisor warns on and retries with backoff (a first start with no +// reachable backend, a backfill/ingest/lifecycle failure, or a "ready" hot DB that +// won't open — none are auto-healed, all are re-attempted). func run(ctx context.Context, cfg StartConfig) error { if err := cfg.validate(); err != nil { return err @@ -36,14 +46,19 @@ func run(ctx context.Context, cfg StartConfig) error { "(validateConfig pins it before run; not done here)") } - // Derived, never stored: highest durably-committed ledger, clamped by earliest-1. - lastCommitted, err := lastCommittedLedger(cat) + // Derived, never stored: highest durably-committed ledger (frozen cold artifacts + // vs the highest ready hot DB's max committed seq), clamped by earliest-1. Passing + // the logger refines with one read-only open of the highest ready hot DB before + // ingestion opens a writer; a read-only open replays any synced WAL from an + // ungraceful crash into memtables, so MaxCommittedSeq is correct. + lastCommitted, err := lifecycle.LastCommittedLedger(cat, logger) if err != nil { return fmt.Errorf("startup derive last-committed: %w", err) } metrics := observability.MetricsOrNop(cfg.Exec.Metrics) - metrics.LastCommitted(lastCommitted, retentionFloorChunk(lastCommitted, cfg.RetentionChunks, earliest).FirstLedger()) + metrics.LastCommitted(lastCommitted) + metrics.RetentionFloor(lifecycle.EffectiveRetentionFloor(lastCommitted, cfg.RetentionChunks, earliest)) logger.WithField("last_committed", lastCommitted). WithField("earliest", earliest). WithField("pinned", pinned). @@ -56,17 +71,100 @@ func run(ctx context.Context, cfg StartConfig) error { } logger.WithField("last_committed", lastCommitted). - Info("backfill complete — handing off to the read server") + WithField("resume_chunk", chunk.IDFromLedger(lastCommitted+1).String()). + Info("backfill complete — opening resume hot tier and ingesting") + + // Step 2: serve + ingest. resumeLedger is one past the last-committed ledger — + // the live chunk's next un-committed ledger. + resumeLedger := lastCommitted + 1 + + // Open the resume chunk's hot DB BEFORE serving reads, so a broken hot tier (a + // "ready" key whose DB won't open) fails startup instead of serving behind a + // crash-looping ingestion loop. run() owns the close only until the loop takes + // over: loopOwnsDB flips true at the errgroup launch, after which the loop's + // deferred close owns it (and g.Wait joins before run returns, so there is no + // window where neither owns it). Restarts re-enter run() from the top, so this + // stays the single initial-open site; the loop still reopens at each boundary. + hotDB, err := openHotDBForChunk(cat, chunk.IDFromLedger(resumeLedger), logger) + if err != nil { + return fmt.Errorf("startup open resume hot tier for ledger %d: %w", resumeLedger, err) + } + loopOwnsDB := false + defer func() { + if !loopOwnsDB { + _ = hotDB.Close() // an error before the loop took ownership + } + }() + + // The live ingestion stream. It owns the captive-core process (started on the + // loop's first pull, torn down when the loop exits), so there is no eager + // prepare and no closer to defer — the loop's ctx-scoped iteration is the + // teardown. OpenCore only constructs, so a start failure surfaces as the loop's + // first stream error for the daemon to classify (and restart). (Eager core start + // before serve would need a LedgerStream.Start hook the SDK deliberately omits.) + stream, err := cfg.Core.OpenCore(ctx) + if err != nil { + return fmt.Errorf("startup open ingestion stream: %w", err) + } + + // The lifecycle goroutine runs one tick per boundary signal; ingestion Publishes + // the just-completed chunk id into a latest-cell. It shares NO in-memory state + // with ingestion — all derived from durable keys. + boundary := lifecycle.NewBoundarySignal() + + // Seed the first tick with the last complete chunk at the resume point so it + // fires at once. Skipped on a young network where no chunk is complete. + if seed := geometry.LastCompleteChunkAt(lastCommitted); seed >= 0 { + boundary.Publish(chunk.ID(seed)) //nolint:gosec // seed >= 0 + } + + // The lifecycle config draws on the SAME Exec wiring backfill uses, so the two + // share one catalog/pool by construction. + lifecycleCfg := lifecycle.Config{ + ExecConfig: cfg.Exec, + RetentionChunks: cfg.RetentionChunks, + }.WithLifecycleDefaults() - // Step 2: serve (injected). Its error is restartable. + // Begin serving reads (injected) BEFORE launching the loops; it must return + // promptly (launch, not block). if err := cfg.ServeReads(ctx); err != nil { return fmt.Errorf("startup serve reads: %w", err) } - // TODO(#772): production ServeReads is a no-op until the cutover, so an immediate - // clean exit after backfill is expected, not a misconfig. - logger.WithField("last_committed", lastCommitted). - Info("read server returned — cold-only daemon shutting down cleanly") - return nil + + // Ingestion and the lifecycle run as a joined pair under errgroup.WithContext: + // gctx cancels as soon as EITHER returns — and WithContext records the returning + // goroutine's error BEFORE canceling, so g.Wait surfaces the real cause, not the + // sibling's induced context-canceled. g.Wait joins both before run returns, + // restoring the single-lifecycle-goroutine invariant across supervisor restarts. + // supervise is the one clean-vs-restart decision point; a canceled parent ctx + // classifies as clean. + g, gctx := errgroup.WithContext(ctx) + // The loop's deferred close now owns hotDB; g.Wait joins it before run returns. + loopOwnsDB = true + g.Go(func() error { + err := runIngestionLoop(gctx, ingestionLoopConfig{ + Stream: stream, + Resume: resumeLedger, + HotDB: hotDB, + Catalog: cat, + Boundary: boundary, + Logger: logger, + Metrics: metrics, + Sink: cfg.Exec.Process.Sink, + }) + if err == nil { + // WithContext cancels gctx (unblocking the lifecycle sibling in g.Wait) + // ONLY on a non-nil return. runIngestionLoop upholds that — every exit is + // an error, including a clean stream end — but guard it so a future nil + // return degrades to a supervised restart, never a silent g.Wait hang. + return errors.New("ingestion loop returned nil unexpectedly") + } + return err + }) + g.Go(func() error { + return lifecycle.Loop(gctx, lifecycleCfg, cat, boundary) + }) + return g.Wait() } // backfillToTip runs the backfill loop, returning lastCommitted as backfill makes @@ -93,8 +191,12 @@ func backfillToTip(ctx context.Context, cfg StartConfig, lastCommitted, earliest tip, err := networkTip(ctx, cfg.NetworkTip, cfg.TipBackoff, cfg.TipMaxAttempts) if err != nil { if lastCommitted < earliest { - // First start, no reachable backend: FATAL — never serve incomplete history. - return 0, fmt.Errorf("%w: %w", ErrFirstStartNoTip, err) + // First start, no reachable backend: error out — the daemon must never + // serve incomplete history. Restartable: the property is enforced by + // returning an error at all (each restart re-checks lastCommitted < + // earliest), not by the exit shape, so a datastore mid-outage or a young + // lake below genesis self-heals on a later restart. + return 0, fmt.Errorf("network tip unavailable and no local history to serve: %w", err) } // Restart with local progress: serve what's below lastCommitted, skip backfill. tip = lastCommitted @@ -103,16 +205,18 @@ func backfillToTip(ctx context.Context, cfg StartConfig, lastCommitted, earliest // max() guards a lagging bulk tip: the tip alone could regress the floor below // pruning or drop a complete last-committed chunk. anchor := max(tip, lastCommitted) - rangeStart := retentionFloorChunk(anchor, retentionChunks, earliest) + rangeStart := chunk.IDFromLedger(lifecycle.EffectiveRetentionFloor(anchor, retentionChunks, earliest)) // Same anchor for rangeEnd: a complete last-committed chunk above a lagging tip // still folds in; chunks beyond the tip are durable and self-skip. rangeEndSigned := geometry.LastCompleteChunkAt(anchor) // Mid-chunk resume exclusion: a mid-chunk last-committed within one chunk of the tip - // leaves the partial resume chunk to ingestion. Signed so genesis reads as a boundary. + // leaves the partial resume chunk to ingestion. Under the mid-chunk precondition + // (guarded here) the last COMPLETE chunk is exactly one short of the live chunk, + // so LastCompleteChunkAt names it directly — same vocabulary as rangeEndSigned above. if withinOneChunkOfTip(tip, lastCommitted) && lastCommittedMidChunk(lastCommitted) { - rangeEndSigned = chunkIDOfLedger(lastCommitted) - 1 // one short of the live chunk + rangeEndSigned = geometry.LastCompleteChunkAt(lastCommitted) } // Break on an empty or non-advancing range. @@ -139,7 +243,8 @@ func backfillToTip(ctx context.Context, cfg StartConfig, lastCommitted, earliest metrics.BackfillPass(passDuration) // Refresh the derived gauges as last-committed advances and the floor rises with it. - metrics.LastCommitted(lastCommitted, retentionFloorChunk(lastCommitted, retentionChunks, earliest).FirstLedger()) + metrics.LastCommitted(lastCommitted) + metrics.RetentionFloor(lifecycle.EffectiveRetentionFloor(lastCommitted, retentionChunks, earliest)) logger.WithField("range_lo", rangeStart.String()). WithField("range_hi", rangeEnd.String()). WithField("last_committed", lastCommitted). @@ -156,40 +261,49 @@ func withinOneChunkOfTip(tip, lastCommitted uint32) bool { } // lastCommittedMidChunk reports whether lastCommitted falls strictly inside a chunk. -// The only sub-genesis value it sees is the fresh-start sentinel preGenesisLedger, -// where chunkIDOfLedger yields -1 and chunk.ID(-1).LastLedger() wraps (MaxUint32+1 -// overflows to 0) back to exactly preGenesisLedger — so the comparison reports a -// boundary (false) without a special case. +// The genesis sentinel reads as a boundary, never mid-chunk. func lastCommittedMidChunk(lastCommitted uint32) bool { - c := chunkIDOfLedger(lastCommitted) - //nolint:gosec // c is -1 (wraps to preGenesisLedger) or a real chunk id - return lastCommitted != chunk.ID(c).LastLedger() + c := geometry.ChunkIDOfLedger(lastCommitted) + return lastCommitted != geometry.CompleteThrough(c) } -// ErrFirstStartNoTip is the first-start FATAL: no local progress and no reachable -// tip. A sentinel so the supervisor owns the restart and tests can assert it. -var ErrFirstStartNoTip = errors.New("network tip unavailable and no local history to serve") - // --------------------------------------------------------------------------- // Injected external boundaries (so startup is testable with fakes). // --------------------------------------------------------------------------- // NetworkTipBackend samples the bulk backend's current network tip during backfill. +// It is consulted only during backfill; once ingestion runs, captive core is the tip. type NetworkTipBackend interface { NetworkTip(ctx context.Context) (uint32, error) } +// CoreOpener hands back the live ingestion stream the loop consumes. The stream +// OWNS its source's lifecycle (started on the first RawLedgers pull over the +// unbounded range from the loop's resume ledger, torn down when the loop exits), +// so there is no resume arg, no PrepareRange, and no closer for the caller to +// sequence. Production returns a captive-core stream; tests pass a fake +// LedgerStream. +type CoreOpener interface { + OpenCore(ctx context.Context) (ledgerbackend.LedgerStream, error) +} + // StartConfig is run's resolved dependency bundle. type StartConfig struct { // Exec drives backfill's RunBackfill; its Catalog/Logger are the shared ones. Exec backfill.ExecConfig - // RetentionChunks is the backfill floor's width; 0 ⇒ the earliest-ledger floor only. + // RetentionChunks bounds the sliding retention floor's width — the backfill + // floor's width too (0 ⇒ the earliest-ledger floor only). run() assembles the + // lifecycle.Config from Exec + this, so the lifecycle and backfill can never + // diverge on the catalog/pool (the invariant is structural, not by comment). RetentionChunks uint32 // NetworkTip samples the bulk backend's tip during backfill. Required. NetworkTip NetworkTipBackend + // Core starts captive core and yields the ingestion getter. Required. + Core CoreOpener + // ServeReads begins serving reads; it must return promptly, not block. Required. ServeReads func(ctx context.Context) error @@ -207,7 +321,9 @@ const ( defaultTipMaxAttempts = 5 ) -// withDefaults fills the tip-backoff defaults and the embedded ExecConfig defaults. +// withDefaults fills the tip-backoff defaults and the embedded Exec defaults +// (Workers -> GOMAXPROCS). The lifecycle.Config is assembled from Exec + +// RetentionChunks in run(). func (cfg StartConfig) withDefaults() StartConfig { cfg.Exec = cfg.Exec.WithDefaults() if cfg.TipBackoff <= 0 { @@ -229,6 +345,9 @@ func (cfg StartConfig) validate() error { if cfg.NetworkTip == nil { return errors.New("nil StartConfig.NetworkTip") } + if cfg.Core == nil { + return errors.New("nil StartConfig.Core") + } if cfg.ServeReads == nil { return errors.New("nil StartConfig.ServeReads") } diff --git a/cmd/stellar-rpc/internal/fullhistory/startup_test.go b/cmd/stellar-rpc/internal/fullhistory/startup_test.go index 5f2e65c94..a9ba55b12 100644 --- a/cmd/stellar-rpc/internal/fullhistory/startup_test.go +++ b/cmd/stellar-rpc/internal/fullhistory/startup_test.go @@ -11,8 +11,11 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/stellar/go-stellar-sdk/ingest/ledgerbackend" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/backfill" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/catalog" + "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/geometry" "github.com/stellar/stellar-rpc/cmd/stellar-rpc/internal/fullhistory/pkg/chunk" ) @@ -75,16 +78,25 @@ func (r *recordingPlan) snapshot() [][2]chunk.ID { return out } -// startTestConfig builds a cold StartConfig over a real catalog with faked -// boundaries; a non-nil recordPlan wires the runBackfill seam to record passes. +// startTestConfig builds a StartConfig over a real catalog with faked boundaries. +// core may be nil for backfillToTip tests (which call backfillToTip directly and +// never reach validate or the ingestion path); run() tests pass a fakeCore. A +// non-nil recordPlan wires the runBackfill seam to record passes without cold I/O. func startTestConfig( - t *testing.T, cat *catalog.Catalog, tip *fakeTipBackend, recordPlan *recordingPlan, + t *testing.T, cat *catalog.Catalog, tip *fakeTipBackend, core *fakeCore, recordPlan *recordingPlan, ) StartConfig { t.Helper() + exec := backfill.ExecConfig{ + Catalog: cat, + Logger: silentLogger(), + Workers: 2, + Process: backfill.ProcessConfig{}, + } cfg := StartConfig{ - Exec: backfill.ExecConfig{Catalog: cat, Logger: silentLogger(), Workers: 2}, + Exec: exec, RetentionChunks: 0, NetworkTip: tip, + Core: core, ServeReads: func(context.Context) error { return nil }, TipBackoff: time.Millisecond, TipMaxAttempts: 3, @@ -98,6 +110,37 @@ func startTestConfig( return cfg } +// fakeCore is a CoreOpener handing back a programmed LedgerStream. The loop opens +// the stream at its resume ledger via RawLedgers(UnboundedRange(resume)), so the +// resume the loop started from is the stream's recorded firstSeen (resumeSeen()). +type fakeCore struct { + stream *fakeCoreStream // programmed; nil → default block-on-ctx stream + openErr error + openedCount atomic.Int32 +} + +func (c *fakeCore) OpenCore(context.Context) (ledgerbackend.LedgerStream, error) { + c.openedCount.Add(1) + if c.openErr != nil { + return nil, c.openErr + } + if c.stream == nil { + // Default: a live stream that blocks until ctx is canceled (the daemon's + // steady state). Tests that need a finite stream set c.stream. + c.stream = &fakeCoreStream{frames: map[uint32][]byte{}, blockOnCtx: true} + } + return c.stream, nil +} + +// resumeSeen returns the resume ledger the loop opened the stream at (the range's +// From()), 0 before the loop has pulled. +func (c *fakeCore) resumeSeen() uint32 { + if c.stream == nil { + return 0 + } + return c.stream.firstSeen.Load() +} + // pinGenesis pins earliest_ledger to genesis (as validateConfig does for a // "genesis" floor) so the first-start predicate classifies correctly. func pinGenesis(t *testing.T, cat *catalog.Catalog) { @@ -145,17 +188,17 @@ func TestNetworkTip_CtxCancelAbortsWait(t *testing.T) { // backfillToTip — backfill loop edge cases. // --------------------------------------------------------------------------- -// First start (genesis, no local history) with the tip absent is fatal. -func TestBackfill_FirstStartTipAbsentFatal(t *testing.T) { +// First start (genesis, no local history) with the tip absent errors out +// (restartable — no sentinel; the supervisor retries). +func TestBackfill_FirstStartTipAbsentErrors(t *testing.T) { cat, _ := testCatalog(t) pinGenesis(t, cat) tip := &fakeTipBackend{err: errors.New("backend unreachable"), errFirst: 99} - cfg := startTestConfig(t, cat, tip, &recordingPlan{}) + cfg := startTestConfig(t, cat, tip, nil, &recordingPlan{}) // Empty catalog ⇒ lastCommitted=1 < earliest=2 ⇒ first start with no progress. _, err := backfillToTip(context.Background(), cfg, preGenesisLedger, chunk.FirstLedgerSeq) require.Error(t, err) - require.ErrorIs(t, err, ErrFirstStartNoTip) } // First start (genesis) with the tip present computes range [chunk 0, @@ -167,7 +210,7 @@ func TestBackfill_FirstStartTipPresentComputesRange(t *testing.T) { tipLedger := chunk.ID(3).FirstLedger() + 100 rec := &recordingPlan{} tip := &fakeTipBackend{tips: []uint32{tipLedger}} - cfg := startTestConfig(t, cat, tip, rec) + cfg := startTestConfig(t, cat, tip, nil, rec) last, err := backfillToTip(context.Background(), cfg, preGenesisLedger, chunk.FirstLedgerSeq) require.NoError(t, err) @@ -186,7 +229,7 @@ func TestBackfill_YoungNetworkNoOp(t *testing.T) { // Tip inside chunk 0 (no chunk has fully closed yet). tip := &fakeTipBackend{tips: []uint32{chunk.FirstLedgerSeq + 50}} rec := &recordingPlan{} - cfg := startTestConfig(t, cat, tip, rec) + cfg := startTestConfig(t, cat, tip, nil, rec) last, err := backfillToTip(context.Background(), cfg, preGenesisLedger, chunk.FirstLedgerSeq) require.NoError(t, err) @@ -203,7 +246,7 @@ func TestBackfill_SteadyRestartNoOp(t *testing.T) { tipLedger := chunk.ID(3).FirstLedger() + 10 // last complete chunk == 2 rec := &recordingPlan{} tip := &fakeTipBackend{tips: []uint32{tipLedger}} - cfg := startTestConfig(t, cat, tip, rec) + cfg := startTestConfig(t, cat, tip, nil, rec) last, err := backfillToTip(context.Background(), cfg, lastCommitted, chunk.FirstLedgerSeq) require.NoError(t, err) @@ -224,7 +267,7 @@ func TestBackfill_MidChunkResumeExclusion(t *testing.T) { tipLedger := chunk.ID(5).LastLedger() // within one chunk, chunk 5 complete-at-tip rec := &recordingPlan{} tip := &fakeTipBackend{tips: []uint32{tipLedger}} - cfg := startTestConfig(t, cat, tip, rec) + cfg := startTestConfig(t, cat, tip, nil, rec) last, err := backfillToTip(context.Background(), cfg, lastCommitted, chunk.FirstLedgerSeq) require.NoError(t, err) @@ -251,7 +294,7 @@ func TestBackfill_LongDowntimeRePass(t *testing.T) { chunk.ID(6).FirstLedger() + 1, // last complete 5 }} rec := &recordingPlan{} - cfg := startTestConfig(t, cat, tip, rec) + cfg := startTestConfig(t, cat, tip, nil, rec) last, err := backfillToTip(context.Background(), cfg, preGenesisLedger, chunk.FirstLedgerSeq) require.NoError(t, err) @@ -274,7 +317,7 @@ func TestBackfill_RestartTipUnreachableDegrades(t *testing.T) { lastCommitted := chunk.ID(2).LastLedger() // local progress exists tip := &fakeTipBackend{err: errors.New("backend down"), errFirst: 99} rec := &recordingPlan{} - cfg := startTestConfig(t, cat, tip, rec) + cfg := startTestConfig(t, cat, tip, nil, rec) last, err := backfillToTip(context.Background(), cfg, lastCommitted, chunk.FirstLedgerSeq) require.NoError(t, err, "local progress means no fatal") @@ -295,7 +338,7 @@ func TestBackfill_LaggingBulkTipFoldsLastCommittedChunk(t *testing.T) { tipLedger := chunk.ID(3).FirstLedger() + 10 // lagging bulk tip in chunk 3 (last complete 2) rec := &recordingPlan{} tip := &fakeTipBackend{tips: []uint32{tipLedger}} - cfg := startTestConfig(t, cat, tip, rec) + cfg := startTestConfig(t, cat, tip, nil, rec) last, err := backfillToTip(context.Background(), cfg, lastCommitted, chunk.FirstLedgerSeq) require.NoError(t, err) @@ -308,57 +351,128 @@ func TestBackfill_LaggingBulkTipFoldsLastCommittedChunk(t *testing.T) { } // --------------------------------------------------------------------------- -// run — the backfill + serve flow. +// run — the backfill + serve + ingest flow. // --------------------------------------------------------------------------- -// A young-network first start does no backfill then serves reads once. -func TestRun_FirstStartBackfillThenServe(t *testing.T) { +// A young-network first start does no backfill, opens the resume hot DB, starts +// the (blocking) fake core, serves reads, and runs the ingestion loop — which +// surfaces the ctx-canceled stream error on a clean shutdown (the daemon top +// level classifies it as clean). The resume ledger is genesis (watermark+1). +func TestRun_FirstStartServeIngestCleanShutdown(t *testing.T) { cat, _ := testCatalog(t) pinGenesis(t, cat) served := atomic.Int32{} + core := &fakeCore{stream: &fakeCoreStream{frames: map[uint32][]byte{}, blockOnCtx: true}} tip := &fakeTipBackend{tips: []uint32{chunk.FirstLedgerSeq + 10}} // young: no backfill - cfg := startTestConfig(t, cat, tip, nil) + cfg := startTestConfig(t, cat, tip, core, nil) cfg.ServeReads = func(context.Context) error { served.Add(1); return nil } - require.NoError(t, run(context.Background(), cfg)) + ctx, cancel := context.WithCancel(context.Background()) + errCh := make(chan error, 1) + go func() { errCh <- run(ctx, cfg) }() + + // Wait until the loop has opened the hot DB, started core, served, and parked on + // the blocking stream, then request a clean shutdown. + require.Eventually(t, func() bool { return served.Load() == 1 }, 2*time.Second, 5*time.Millisecond) + cancel() + + select { + case err := <-errCh: + require.ErrorIs(t, err, context.Canceled, "clean shutdown surfaces the ctx-canceled error") + case <-time.After(3 * time.Second): + t.Fatal("run did not return after ctx cancel") + } + require.Equal(t, int32(1), served.Load(), "reads were served exactly once") + require.Equal(t, int32(1), core.openedCount.Load(), "captive core started once") + require.Equal(t, uint32(chunk.FirstLedgerSeq), core.resumeSeen(), + "resume ledger is genesis on a fresh start (watermark+1)") + + // The resume chunk's hot key is "ready" (opened, boundary never crossed). + state, err := cat.HotState(chunk.IDFromLedger(chunk.FirstLedgerSeq)) + require.NoError(t, err) + assert.Equal(t, geometry.HotReady, state) } -// run surfaces a ServeReads error wrapped, as a restartable failure. +// A ServeReads error is surfaced wrapped as a restartable failure (NOT clean). +// run() opens the resume hot DB and starts core BEFORE serving; a serve error +// after those returns via run()'s defer, which closes the DB (the loop never took +// ownership), so a restart can reopen it — asserted by the reopen below. func TestRun_ServeReadsErrorSurfaces(t *testing.T) { cat, _ := testCatalog(t) pinGenesis(t, cat) + core := &fakeCore{stream: &fakeCoreStream{frames: map[uint32][]byte{}, blockOnCtx: true}} tip := &fakeTipBackend{tips: []uint32{chunk.FirstLedgerSeq + 10}} - cfg := startTestConfig(t, cat, tip, nil) + cfg := startTestConfig(t, cat, tip, core, nil) cfg.ServeReads = func(context.Context) error { return errors.New("rpc bind failed") } err := run(context.Background(), cfg) require.Error(t, err) require.Contains(t, err.Error(), "serve reads") + require.NotErrorIs(t, err, context.Canceled, "a ServeReads error is restartable, not a clean shutdown") + require.Equal(t, int32(1), core.openedCount.Load(), "core was started before serving") + + // run() opened the resume hot DB before serving and closed it on the error path + // (the loop never took ownership): reopening it succeeds (LOCK released). + db, err := openHotDBForChunk(cat, chunk.IDFromLedger(chunk.FirstLedgerSeq), silentLogger()) + require.NoError(t, err, "the resume hot DB is reopenable — run released its LOCK") + require.NoError(t, db.Close()) +} + +// The resume hot DB and core are opened BEFORE reads are served (the design's +// fail-fast order): by the time ServeReads runs, the resume chunk's hot key is +// already "ready" and core has started — so a broken hot tier / core fails startup +// instead of serving behind a crash-looping loop. Asserted from inside ServeReads, +// which then errors to avoid entering the blocking loop. +func TestRun_OpensHotDBAndCoreBeforeServe(t *testing.T) { + cat, _ := testCatalog(t) + pinGenesis(t, cat) + resumeChunk := chunk.IDFromLedger(chunk.FirstLedgerSeq) // fresh start ⇒ resume at genesis + core := &fakeCore{stream: &fakeCoreStream{frames: map[uint32][]byte{}, blockOnCtx: true}} + tip := &fakeTipBackend{tips: []uint32{chunk.FirstLedgerSeq + 10}} // young ⇒ no backfill + cfg := startTestConfig(t, cat, tip, core, nil) + + var stateAtServe geometry.HotState + var coreAtServe int32 + cfg.ServeReads = func(context.Context) error { + st, herr := cat.HotState(resumeChunk) + require.NoError(t, herr) + stateAtServe = st + coreAtServe = core.openedCount.Load() + return errors.New("stop before the blocking loop") + } + + err := run(context.Background(), cfg) + require.Error(t, err) + require.Contains(t, err.Error(), "serve reads") + assert.Equal(t, geometry.HotReady, stateAtServe, "resume hot DB is open+ready before serve") + assert.Equal(t, int32(1), coreAtServe, "core is opened before serve") } -// run fatals with ErrFirstStartNoTip on a first start with an -// unavailable tip; reads are never served. -func TestRun_FirstStartNoTipFatal(t *testing.T) { +// run errors on a first start with an unavailable tip (restartable, no sentinel); +// reads are never served and ingestion never starts. +func TestRun_FirstStartNoTipErrors(t *testing.T) { cat, _ := testCatalog(t) pinGenesis(t, cat) served := atomic.Int32{} + core := &fakeCore{} tip := &fakeTipBackend{err: errors.New("unreachable"), errFirst: 99} - cfg := startTestConfig(t, cat, tip, nil) + cfg := startTestConfig(t, cat, tip, core, nil) cfg.ServeReads = func(context.Context) error { served.Add(1); return nil } err := run(context.Background(), cfg) - require.ErrorIs(t, err, ErrFirstStartNoTip) - require.Zero(t, served.Load(), "reads are never served when backfill fatals") + require.Error(t, err) + require.Zero(t, served.Load(), "reads are never served when backfill errors") + require.Zero(t, core.openedCount.Load(), "core never starts when backfill errors") } -// run surfaces a missing earliest_ledger pin loudly (a wiring error, -// not a first start to mis-classify). +// run surfaces a missing earliest_ledger pin loudly (a wiring error, not a first +// start to mis-classify). func TestRun_RequiresEarliestPin(t *testing.T) { cat, _ := testCatalog(t) // No pinGenesis. - cfg := startTestConfig(t, cat, &fakeTipBackend{tips: []uint32{50_000}}, nil) + cfg := startTestConfig(t, cat, &fakeTipBackend{tips: []uint32{50_000}}, &fakeCore{}, nil) err := run(context.Background(), cfg) require.Error(t, err) require.Contains(t, err.Error(), "earliest_ledger pinned") @@ -367,13 +481,18 @@ func TestRun_RequiresEarliestPin(t *testing.T) { // run validates its injected boundaries. func TestRun_ValidatesConfig(t *testing.T) { cat, _ := testCatalog(t) - base := startTestConfig(t, cat, &fakeTipBackend{tips: []uint32{50_000}}, nil) + base := startTestConfig(t, cat, &fakeTipBackend{tips: []uint32{50_000}}, &fakeCore{}, nil) t.Run("nil NetworkTip", func(t *testing.T) { cfg := base cfg.NetworkTip = nil require.Error(t, run(context.Background(), cfg)) }) + t.Run("nil Core", func(t *testing.T) { + cfg := base + cfg.Core = nil + require.Error(t, run(context.Background(), cfg)) + }) t.Run("nil ServeReads", func(t *testing.T) { cfg := base cfg.ServeReads = nil @@ -436,7 +555,7 @@ func TestBackfill_ReportsPassAndProgress(t *testing.T) { rp := &recordingPlan{} tipLedger := chunk.ID(3).LastLedger() + 5 tip := &fakeTipBackend{tips: []uint32{tipLedger}} - start := startTestConfig(t, cat, tip, rp) + start := startTestConfig(t, cat, tip, nil, rp) metrics := newRecordingMetrics() start.Exec.Metrics = metrics