Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions src/bucket/FutureBucket.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,11 @@ template <class BucketT> class FutureBucket
// Return all hashes referenced by this future.
std::vector<std::string> getHashes() const;

// Maximum number of shadow hashes allowed during deserialization.
// Shadows were removed in protocol 12; this cap prevents OOM from
// crafted HAS JSON with enormous shadow arrays.
static constexpr size_t MAX_SHADOW_HASHES = 32;

template <class Archive>
void
load(Archive& ar)
Expand All @@ -145,9 +150,26 @@ template <class BucketT> class FutureBucket
ar(cereal::make_nvp("curr", mInputCurrBucketHash));
ar(cereal::make_nvp("snap", mInputSnapBucketHash));
ar(cereal::make_nvp("shadow", mInputShadowBucketHashes));
// Validate required fields before checkState to avoid
// releaseAssert abort on malformed archive data
if (mInputCurrBucketHash.empty() || mInputSnapBucketHash.empty())
{
throw std::runtime_error(
"FutureBucket FB_HASH_INPUTS has empty curr or snap hash");
}
if (mInputShadowBucketHashes.size() > MAX_SHADOW_HASHES)
{
throw std::runtime_error(
"FutureBucket has too many shadow hashes");
}
break;
case FB_HASH_OUTPUT:
ar(cereal::make_nvp("output", mOutputBucketHash));
if (mOutputBucketHash.empty())
{
throw std::runtime_error(
"FutureBucket FB_HASH_OUTPUT has empty output hash");
}
break;
case FB_CLEAR:
break;
Expand Down
117 changes: 110 additions & 7 deletions src/history/HistoryArchive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@
#include <Tracy.hpp>
#include <fmt/format.h>

#include <cctype>
#include <cereal/archives/json.hpp>
#include <cereal/cereal.hpp>
#include <cereal/types/vector.hpp>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <medida/meter.h>
Expand Down Expand Up @@ -148,10 +150,107 @@ HistoryArchiveState::toString() const
return out.str();
}

static bool
isValidHexHash(std::string const& s)
{
if (s.size() != 64)
{
return false;
}
for (unsigned char c : s)
{
if (!std::isxdigit(c))
{
return false;
}
}
return true;
}

static void
validateHASAfterDeserialization(HistoryArchiveState const& has)
{
if (has.version < HistoryArchiveState::
HISTORY_ARCHIVE_STATE_VERSION_BEFORE_HOT_ARCHIVE ||
has.version >=
HistoryArchiveState::HISTORY_ARCHIVE_STATE_VERSION_LAST_ITEM)
{
CLOG_ERROR(History, "Unexpected history archive state version: {}",
has.version);
throw std::runtime_error("unexpected history archive state version");
}

if (has.currentBuckets.size() != LiveBucketList::kNumLevels)
{
throw std::runtime_error(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are you sure this is correct in all protocols?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep. I just downloaded a couple of very early history files to double check, still the bucket level constant.

fmt::format(FMT_STRING("Invalid currentBuckets count: {}"),
has.currentBuckets.size()));
}

if (has.hasHotArchiveBuckets() &&
has.hotArchiveBuckets.size() != HotArchiveBucketList::kNumLevels)
{
throw std::runtime_error(
fmt::format(FMT_STRING("Invalid hotArchiveBuckets count: {}"),
has.hotArchiveBuckets.size()));
}

// Prevent integer overflow in downstream CheckpointRange calculations
if (has.currentLedger > HistoryArchiveState::MAX_CURRENT_LEDGER)
{
throw std::runtime_error(fmt::format(
FMT_STRING("currentLedger {} is too large"), has.currentLedger));
}

// Validate all bucket hash strings are well-formed 64-character hex
auto validateHashesInBuckets = [](auto const& buckets,
std::string const& name) {
for (size_t i = 0; i < buckets.size(); ++i)
{
auto const& level = buckets[i];
if (!isValidHexHash(level.curr))
{
throw std::runtime_error(fmt::format(
FMT_STRING("Invalid {} curr hash at level {}"), name, i));
}
if (!isValidHexHash(level.snap))
{
throw std::runtime_error(fmt::format(
FMT_STRING("Invalid {} snap hash at level {}"), name, i));
}
for (auto const& h : level.next.getHashes())
{
if (!isValidHexHash(h))
{
throw std::runtime_error(fmt::format(
FMT_STRING("Invalid {} next hash at level {}"), name,
i));
}
}
}
};

validateHashesInBuckets(has.currentBuckets, "currentBuckets");
if (has.hasHotArchiveBuckets())
{
validateHashesInBuckets(has.hotArchiveBuckets, "hotArchiveBuckets");
}
}

void
HistoryArchiveState::load(std::string const& inFile)
{
ZoneScoped;

// Check file size before parsing to prevent OOM from crafted JSON
auto fileSize = std::filesystem::file_size(inFile);
if (fileSize > MAX_HAS_FILE_SIZE)
{
throw std::runtime_error(
fmt::format(FMT_STRING("HAS file size {} exceeds maximum {}"),
fileSize, MAX_HAS_FILE_SIZE));
}
Comment thread
SirTyson marked this conversation as resolved.

std::ifstream in(inFile);
if (!in)
{
Expand All @@ -161,22 +260,26 @@ HistoryArchiveState::load(std::string const& inFile)
in.exceptions(std::ios::badbit);
cereal::JSONInputArchive ar(in);
serialize(ar);
if (version != HISTORY_ARCHIVE_STATE_VERSION_BEFORE_HOT_ARCHIVE &&
version != HISTORY_ARCHIVE_STATE_VERSION_WITH_HOT_ARCHIVE)
{
CLOG_ERROR(History, "Unexpected history archive state version: {}",
version);
throw std::runtime_error("unexpected history archive state version");
}
validateHASAfterDeserialization(*this);
}

void
HistoryArchiveState::fromString(std::string const& str)
{
ZoneScoped;

// Check string size before parsing to prevent OOM from crafted JSON
if (str.size() > MAX_HAS_FILE_SIZE)
{
throw std::runtime_error(
fmt::format(FMT_STRING("HAS string size {} exceeds maximum {}"),
str.size(), MAX_HAS_FILE_SIZE));
}

std::istringstream in(str);
cereal::JSONInputArchive ar(in);
serialize(ar);
validateHASAfterDeserialization(*this);
}

std::string
Expand Down
26 changes: 22 additions & 4 deletions src/history/HistoryArchive.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,28 @@ struct HistoryArchiveState
static constexpr size_t MAX_HISTORY_ARCHIVE_BUCKET_SIZE =
1024ull * 1024ull * 1024ull * 100ull; // 100 GB

static inline unsigned const
HISTORY_ARCHIVE_STATE_VERSION_BEFORE_HOT_ARCHIVE = 1;
static inline unsigned const
HISTORY_ARCHIVE_STATE_VERSION_WITH_HOT_ARCHIVE = 2;
// Maximum allowed size for a History Archive State (HAS) JSON
// file or string. This applies only to the HAS JSON document itself
// (stellar-history.json), not to bucket files or other archive
// contents. Checked before parsing to prevent OOM from crafted JSON
// with oversized arrays or strings. A valid HAS is typically under
// 50KB; 10MB is extremely generous.
static constexpr size_t MAX_HAS_FILE_SIZE = 10 * 1024 * 1024; // 10 MB

// Upper bound on currentLedger to prevent uint32_t overflow in
// downstream arithmetic. Callers round currentLedger up to the next
// checkpoint boundary and then step one checkpoint further, so we add up to
// 128 to this number. We'll never hit this in practice, so use 256 for a
// little wiggle room so we don't overflow.
static constexpr uint32_t MAX_CURRENT_LEDGER =
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how did you arrive at this number? has.currentLedger = max should be a valid HAS

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Later on in the call stack we round this value up to the next checkpoint boundary, then later on move that value up another checkpoint, so has.CurrentLedger + 128. This would cause an overflow if we allowed actual uint32_t max. I subtracted 256 just for some buffer, I'll add a comment.

This would take about 81 years to hit after switching to 600 ms ledgers, so I think we're safe. At least, it's not my problem. Making a note here for the AIs to fix in 2085.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think this is the wrong place to solve this issue. we should just use saturated adds, and return work failure if we hit overflow in respective works. hard-coding 256 here is pretty awkward, and is not future-proof (implementation changes all the time)

std::numeric_limits<uint32_t>::max() - 256;

Comment thread
SirTyson marked this conversation as resolved.
enum Version : unsigned
{
HISTORY_ARCHIVE_STATE_VERSION_BEFORE_HOT_ARCHIVE = 1,
HISTORY_ARCHIVE_STATE_VERSION_WITH_HOT_ARCHIVE = 2,
HISTORY_ARCHIVE_STATE_VERSION_LAST_ITEM
};

struct BucketHashReturnT
{
Expand Down
26 changes: 16 additions & 10 deletions src/history/HistoryManagerImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,10 @@ HistoryManagerImpl::queueCurrentHistory(uint32_t ledger, uint32_t ledgerVers)
}

CLOG_INFO(History, "Queueing publish state for ledger {}", ledger);
mEnqueueTimes.emplace(ledger, std::chrono::steady_clock::now());
{
LOCK_GUARD(mEnqueueTimesMtx, guard);
mEnqueueTimes.emplace(ledger, std::chrono::steady_clock::now());
}

// We queue history inside ledger commit, so do not finalize the file yet
writeCheckpointFile(mApp, has, /* finalize */ false);
Expand Down Expand Up @@ -587,16 +590,19 @@ HistoryManagerImpl::historyPublished(
ZoneScoped;
if (success)
{
auto iter = mEnqueueTimes.find(ledgerSeq);
if (iter != mEnqueueTimes.end())
{
auto now = std::chrono::steady_clock::now();
CLOG_INFO(
Perf, "Published history for ledger {} in {} seconds",
ledgerSeq,
std::chrono::duration<double>(now - iter->second).count());
mEnqueueToPublishTimer.Update(now - iter->second);
mEnqueueTimes.erase(iter);
LOCK_GUARD(mEnqueueTimesMtx, guard);
auto iter = mEnqueueTimes.find(ledgerSeq);
if (iter != mEnqueueTimes.end())
{
auto now = std::chrono::steady_clock::now();
CLOG_INFO(
Perf, "Published history for ledger {} in {} seconds",
ledgerSeq,
std::chrono::duration<double>(now - iter->second).count());
mEnqueueToPublishTimer.Update(now - iter->second);
mEnqueueTimes.erase(iter);
}
}

this->mPublishSuccess.Mark();
Expand Down
5 changes: 4 additions & 1 deletion src/history/HistoryManagerImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "history/CheckpointBuilder.h"
#include "history/HistoryManager.h"
#include "util/ThreadAnnotations.h"
#include "util/TmpDir.h"
#include "work/Work.h"
#include <memory>
Expand All @@ -32,7 +33,9 @@ class HistoryManagerImpl : public HistoryManager
medida::Meter& mPublishFailure;

medida::Timer& mEnqueueToPublishTimer;
UnorderedMap<uint32_t, std::chrono::steady_clock::time_point> mEnqueueTimes;
ANNOTATED_MUTEX(mEnqueueTimesMtx);
UnorderedMap<uint32_t, std::chrono::steady_clock::time_point>
mEnqueueTimes GUARDED_BY(mEnqueueTimesMtx);
CheckpointBuilder mCheckpointBuilder;

#ifdef BUILD_TESTS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@
"curr": "ae7e4814b50e176d8e3532e462e2e9db02f218adebd74603d7e349cc19f489e2",
"next": {
"state": 1,
"output": "50abed8a9d86c072cfe8388246b7a378dc355fe996fd7384a5ee57e8da2ad52"
"output": "50abed8a9d86c072cfe8388246b7a378dc355fe996fd7384a5ee57e8da2ad52d"
},
"snap": "0000000000000000000000000000000000000000000000000000000000000000"
}
Expand Down
Loading
Loading