Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -187,13 +187,16 @@ scp.pending.discarded | counter | number of discarded enve
scp.pending.fetching | counter | number of incomplete envelopes
scp.pending.processed | counter | number of already processed envelopes
scp.pending.ready | counter | number of envelopes ready to process
scp.skip.externalized | counter | number of times the local node externalized a skip-ledger value
scp.skip.value-replaced | counter | number of times the ballot protocol swapped a value for a skip-ledger value
scp.sync.lost | meter | validator lost sync
scp.timeout.nominate | meter | timeouts in nomination
scp.timeout.prepare | meter | timeouts in ballot protocol
scp.timing.nominated | timer | time spent in nomination
scp.timing.externalized | timer | time spent in ballot protocol
scp.timing.first-to-self-externalize-lag | timer | delay between first externalize message and local node externalizing
scp.timing.self-to-others-externalize-lag | timer | delay between local node externalizing and later externalize messages from other nodes
scp.timing.ballot-blocked-on-txset | timer | time balloting was blocked waiting for a txset download (milliseconds)
scp.value.invalid | meter | SCP value is invalid
scp.value.valid | meter | SCP value is valid
scp.slot.values-referenced | histogram | number of values referenced per consensus round
Expand Down
6 changes: 6 additions & 0 deletions docs/stellar-core_example.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,12 @@ PEER_TIMEOUT=30
# time when authenticated.
PEER_STRAGGLER_TIMEOUT=120

# TODO: Update these docs vv
# TX_SET_DOWNLOAD_TIMEOUT (Integer) default 5000
# Time in milliseconds before a node gives up waiting on a transaction set and
# votes to skip the ledger.
TX_SET_DOWNLOAD_TIMEOUT=5000

# MAX_BATCH_WRITE_COUNT (Integer) default 1024
# How many messages can this server send at once to a peer
MAX_BATCH_WRITE_COUNT=1024
Expand Down
2 changes: 2 additions & 0 deletions src/herder/Herder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,6 @@ uint32 const Herder::SCP_EXTRA_LOOKBACK_LEDGERS = 3u;
std::chrono::minutes const Herder::TX_SET_GC_DELAY(1);
std::chrono::minutes const Herder::CHECK_FOR_DEAD_NODES_MINUTES(15);
uint32 const Herder::FLOW_CONTROL_BYTES_EXTRA_BUFFER(2000);

Hash const Herder::SKIP_LEDGER_HASH{};
}
13 changes: 12 additions & 1 deletion src/herder/Herder.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,17 @@
#include <functional>
#include <memory>
#include <string>
#include <variant>

namespace stellar
{

// Returned by getTxSet to distinguish "skip" values (no real tx set)
// from "not yet downloaded" (nullptr).
struct SkipTxSet
{
};
using TxSetResult = std::variant<TxSetXDRFrameConstPtr, SkipTxSet>;
class Application;
class XDROutputFileStream;

Expand Down Expand Up @@ -79,6 +87,9 @@ class Herder

static std::chrono::minutes const TX_SET_GC_DELAY;

// TODO: Docs
static Hash const SKIP_LEDGER_HASH;

enum State
{
// Starting up, no state is known
Expand Down Expand Up @@ -147,7 +158,7 @@ class Herder
#endif
virtual void peerDoesntHave(stellar::MessageType type,
uint256 const& itemID, Peer::pointer peer) = 0;
virtual TxSetXDRFrameConstPtr getTxSet(Hash const& hash) = 0;
virtual TxSetResult getTxSet(Hash const& hash) = 0;
virtual SCPQuorumSetPtr getQSet(Hash const& qSetHash) = 0;

// We are learning about a new envelope.
Expand Down
80 changes: 68 additions & 12 deletions src/herder/HerderImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,8 +328,19 @@ HerderImpl::processExternalized(uint64 slotIndex, StellarValue const& value,
slotIndex, hexAbbrev(value.txSetHash));
}

TxSetXDRFrameConstPtr externalizedSet =
mPendingEnvelopes.getTxSet(value.txSetHash);
auto result = mPendingEnvelopes.getTxSet(value.txSetHash);
TxSetXDRFrameConstPtr externalizedSet;
if (std::holds_alternative<SkipTxSet>(result))
{
auto const& ov = value.ext.originalValue();
externalizedSet = TxSetXDRFrame::makeEmpty(ov.previousLedgerHash,
ov.previousLedgerVersion);
}
else
{
externalizedSet = std::get<TxSetXDRFrameConstPtr>(result);
}
releaseAssert(externalizedSet != nullptr);

// save the SCP messages in the database
if (mApp.getConfig().MODE_STORES_HISTORY_MISC)
Expand Down Expand Up @@ -898,6 +909,9 @@ HerderImpl::recvSCPEnvelope(SCPEnvelope const& envelope)
return Herder::ENVELOPE_STATUS_SKIPPED_SELF;
}

// This call fetches everything. Will only return ENVELOPE_STATUS_READY once
// everything is fetched though! Will need a new status to allow it to
// proceed to nomination at least, I think.
auto status = mPendingEnvelopes.recvSCPEnvelope(envelope);
if (status == Herder::ENVELOPE_STATUS_READY)
{
Expand All @@ -912,10 +926,26 @@ HerderImpl::recvSCPEnvelope(SCPEnvelope const& envelope)
}
else
{
if (status == Herder::ENVELOPE_STATUS_FETCHING)
SCPStatementType type = envelope.statement.pledges.type();
// Allow parallel tx set downloading if the node is in sync and this is
// a NOMINATE or PREPARE message. Technically both of these criteria
// should be properly handled downstream, but this provides some
// additional assurance.
if (mApp.getState() == Application::State::APP_SYNCED_STATE &&
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, herder having to deal with the guts of SCP and differentiate between nomination, prepare and commit seems like an improper abstraction/separation of responsibilities situation. It also seems quite brittle - what if we change SCP in the future such that some CONFIRM messages would be allowed and some won't? or some PREPARE messages should be stalled?

It looks to me like the main problem is the weird mechanism in herder (which existed prior to your changes), which decides when to feed new messages to SCP. Does it really need to live in herder? Have you considered moving the mechanism inside of SCP, so it can automatically decide when to advance the protocol vs when to stall (then re-feed the messages back to the protocol).

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I see what you mean. I'll try moving this into SCP after wrapping up the test harness work.

status == Herder::ENVELOPE_STATUS_FETCHING &&
(type == SCP_ST_NOMINATE || type == SCP_ST_PREPARE))
{
std::string txt("FETCHING");
ZoneText(txt.c_str(), txt.size());

// If we have the quorum set, then proceed without the tx set.
auto qSetHash = Slot::getCompanionQuorumSetHashFromStatement(
envelope.statement);
auto maybeQSet = mApp.getHerder().getQSet(qSetHash);
if (maybeQSet)
{
processSCPQueue(true);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this correct? wouldn't it be possible to have other message types in this queue, which would cause them to get processed as well?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that's fine? Any CONFIRM or EXTERNALIZE messages in the queue would have fully downloaded tx sets and should be safe to process. The "partially ready" queue only contains nomination and PREPARE messages, which should also be safe to process.

I don't see how this is different than the processSCPQueue call in the Herder::ENVELOPE_STATUS_READY case. In either case, all messages in the queue should safe to process, even though you may end up processing messages other than the one you just received.

Do you see a scenario where it would be wrong to process other message types here?

Copy link
Copy Markdown
Contributor

@marta-lokhova marta-lokhova Apr 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My worry is processing messages in unexpected order, such that it trips downstream invariance (e.g. value must be downloaded, but we accidentally fed the message that's not fully ready, crashing the node).

I don't see how this is different than the processSCPQueue call in the Herder::ENVELOPE_STATUS_READY case.

Do we understand why? Messages can be received in any order, so let's say we feed a ballot message with 1 value followed by a nomination message referencing 3 values that aren't all downloaded. I don't quite understand how this wouldn't trip SCP.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, is this due to isNewerStatement checks in SCP itself? If so, that seems quite brittle at the two subsystems have an implicit correctness dependency between them that isn't documented. I know this was here before, but what worries me is that we're building on top of badness, making this even harder to maintain and reason about.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value must be downloaded, but we accidentally fed the message that's not fully ready, crashing the node

If the value must be downloaded (such as for CONFIRM and EXTERNALIZE), then the envelope isn't considered ready or partially ready and isn't in either queue, so processSCPQueue won't pass it on.

Messages can be received in any order, so let's say we feed a ballot message with 1 value followed by a nomination message referencing 3 values that aren't all downloaded. I don't quite understand how this wouldn't trip SCP.

I don't understand why this would be a problem? Nomination can run concurrently with balloting. In this case, we'd treat the undownloaded values as valid in nomination. If balloting times out and hasn't set mValueOverride, it'll pick up the latest composite candidate from nomination. If it has set mValueOverride (and therefore set h), it will stick with the value from balloting. This is no different than the situation without parallel downloading.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Following up about this (and the other comments on HerderImpl) to capture some discussion from our meeting.

The plan is to try pushing much of this logic (and related logic in PendingEnvelopes) down to SCP. My first thought is that it would live in Slot. PendingEnvelopes would treat all messages with downloaded qsets as "fully ready" (there would be no "partially ready" queue anymore) and pass them on to Slot. Slot would then interface with item fetcher as needed, and manage which values get passed to nomination/ballot protocol, and which get held back (CONFIRM and EXTERNALIZE messages without downloaded qsets). Slot would also handle delivering / replaying messages as tx sets arrive, potentially via a callback registered with ItemFetcher.

The first step here is to draw out the various data flows between Herder, PendingEnvelopes, ItemFetcher, and SCP. From there we can decide how to change these interfaces.

}
}
else if (status == Herder::ENVELOPE_STATUS_PROCESSED)
{
Expand Down Expand Up @@ -1138,6 +1168,7 @@ void
HerderImpl::processSCPQueueUpToIndex(uint64 slotIndex)
{
ZoneScoped;
CLOG_TRACE(Proto, "Processing SCP queue up to index {}", slotIndex);
while (true)
{
SCPEnvelopeWrapperPtr envW = mPendingEnvelopes.pop(slotIndex);
Expand Down Expand Up @@ -1386,7 +1417,7 @@ HerderImpl::peerDoesntHave(MessageType type, uint256 const& itemID,
mPendingEnvelopes.peerDoesntHave(type, itemID, peer);
}

TxSetXDRFrameConstPtr
TxSetResult
HerderImpl::getTxSet(Hash const& hash)
{
return mPendingEnvelopes.getTxSet(hash);
Expand Down Expand Up @@ -2162,11 +2193,15 @@ HerderImpl::persistSCPState(uint64 slot)
// saves transaction sets referred by the statement
for (auto const& h : getValidatedTxSetHashes(e))
{
auto txSet = mPendingEnvelopes.getTxSet(h);
if (txSet && !mApp.getPersistentState().hasTxSet(h))
auto result = mPendingEnvelopes.getTxSet(h);
if (auto* txSetPtr = std::get_if<TxSetXDRFrameConstPtr>(&result))
{
txSets.insert(std::make_pair(h, txSet));
if (*txSetPtr && !mApp.getPersistentState().hasTxSet(h))
{
txSets.insert(std::make_pair(h, *txSetPtr));
}
}
// SkipTxSet: nothing to persist
}
Hash qsHash = Slot::getCompanionQuorumSetHashFromStatement(e.statement);
SCPQuorumSetPtr qSet = mPendingEnvelopes.getQSet(qsHash);
Expand Down Expand Up @@ -2705,11 +2740,32 @@ bool
HerderImpl::verifyStellarValueSignature(StellarValue const& sv)
{
ZoneScoped;
auto [b, _] = PubKeyUtils::verifySig(
sv.ext.lcValueSignature().nodeID, sv.ext.lcValueSignature().signature,
xdr::xdr_to_opaque(mApp.getNetworkID(), ENVELOPE_TYPE_SCPVALUE,
sv.txSetHash, sv.closeTime));
return b;
switch (sv.ext.v())
{
case STELLAR_VALUE_BASIC:
// This function should never be called with an unsigned value
releaseAssert(false);
case STELLAR_VALUE_SIGNED:
return PubKeyUtils::verifySig(sv.ext.lcValueSignature().nodeID,
sv.ext.lcValueSignature().signature,
xdr::xdr_to_opaque(mApp.getNetworkID(),
ENVELOPE_TYPE_SCPVALUE,
sv.txSetHash,
sv.closeTime))
.valid;
case STELLAR_VALUE_SKIP:
{
auto const& ov = sv.ext.originalValue();
return PubKeyUtils::verifySig(
ov.lcValueSignature.nodeID, ov.lcValueSignature.signature,
xdr::xdr_to_opaque(mApp.getNetworkID(),
ENVELOPE_TYPE_SCPVALUE, ov.txSetHash,
sv.closeTime))
.valid;
}
default:
releaseAssert(false);
}
}

StellarValue
Expand Down
4 changes: 2 additions & 2 deletions src/herder/HerderImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ class HerderImpl : public Herder
bool recvTxSet(Hash const& hash, TxSetXDRFrameConstPtr txset) override;
void peerDoesntHave(MessageType type, uint256 const& itemID,
Peer::pointer peer) override;
TxSetXDRFrameConstPtr getTxSet(Hash const& hash) override;
TxSetResult getTxSet(Hash const& hash) override;
SCPQuorumSetPtr getQSet(Hash const& qSetHash) override;

// process ready SCP messages. This may trigger the node to externalze new
Expand Down Expand Up @@ -234,7 +234,7 @@ class HerderImpl : public Herder
// helper function to sign envelopes
void signEnvelope(SecretKey const& s, SCPEnvelope& envelope);

// helper function to verify SCPValues are signed
// helper function to verify SCPValues signatures
bool verifyStellarValueSignature(StellarValue const& sv);

size_t getMaxQueueSizeOps() const override;
Expand Down
Loading
Loading