From 0d8ca56146d4b1255dda0b97b8c3adbfc3d6077f Mon Sep 17 00:00:00 2001 From: JasMehta08 Date: Mon, 8 Jun 2026 19:55:47 +0530 Subject: [PATCH] [ntuple] Add RNTupleLocatorMulti class for kTypeMulti locator --- tree/ntuple/inc/ROOT/RNTupleTypes.hxx | 71 ++++++++++++++++- tree/ntuple/src/RNTupleSerialize.cxx | 42 ++++++++++- tree/ntuple/src/RNTupleTypes.cxx | 38 +++++++++- tree/ntuple/test/ntuple_serialize.cxx | 105 +++++++++++++++++++++++++- tree/ntuple/test/ntuple_test.hxx | 1 + 5 files changed, 249 insertions(+), 8 deletions(-) diff --git a/tree/ntuple/inc/ROOT/RNTupleTypes.hxx b/tree/ntuple/inc/ROOT/RNTupleTypes.hxx index cbfb76c96b340..ff91c4dccb66a 100644 --- a/tree/ntuple/inc/ROOT/RNTupleTypes.hxx +++ b/tree/ntuple/inc/ROOT/RNTupleTypes.hxx @@ -201,6 +201,67 @@ public: std::uint64_t GetLocation() const { return fLocation; } }; +/// RNTupleLocator payload for the kTypeMulti locator (type 0x03). Used by storage +/// backends that pack multiple pages into shared objects (e.g., S3 Mode A). +/// +/// The class stores the object identifier and byte offset as two explicit 32-bit +/// integers plus a separate 4-bit reserved field. When packed into a single 64-bit +/// value via GetLocation() (parallel to RNTupleLocatorObject64::GetLocation()), the +/// layout is: +/// bits 63..60: 4 reserved bits (for future per-locator flags) +/// bits 59..30: 30-bit object identifier (max value 2^30 - 1, i.e. ~1 Billion objects) +/// bits 29..0: 30-bit byte offset within the object (max value 2^30 - 1, i.e. 1 GiB - 1) +class RNTupleLocatorMulti { +public: + static constexpr std::uint32_t kMaxObjectId = (1U << 30) - 1; + static constexpr std::uint32_t kMaxOffset = (1U << 30) - 1; + static constexpr std::uint8_t kMaxReserved = 0xF; + +private: + static constexpr std::uint64_t kMaskReserved = 0xFULL << 60; + static constexpr std::uint64_t kMaskObjectId = 0x3FFFFFFFULL << 30; + static constexpr std::uint64_t kMaskOffset = 0x3FFFFFFFULL; + + std::uint32_t fObjectId = 0; + std::uint32_t fOffset = 0; + std::uint8_t fReserved = 0; + +public: + RNTupleLocatorMulti() = default; + /// Construct from logical object identifier and byte offset; throws if either value + /// exceeds the 30-bit range. + RNTupleLocatorMulti(std::uint32_t objectId, std::uint32_t offset); + /// Construct from a raw 64-bit packed location value, extracting the fields per + /// the layout documented above. Intended for the deserializer; user code should + /// prefer the (objectId, offset) constructor. + explicit RNTupleLocatorMulti(std::uint64_t location) + : fObjectId(static_cast((location & kMaskObjectId) >> 30)), + fOffset(static_cast(location & kMaskOffset)), + fReserved(static_cast((location & kMaskReserved) >> 60)) + { + } + + bool operator==(const RNTupleLocatorMulti &other) const + { + return fObjectId == other.fObjectId && fOffset == other.fOffset && fReserved == other.fReserved; + } + + /// Returns the raw 64-bit packed location derived from the three fields, suitable + /// for storage in RNTupleLocator::fPosition or for direct serialization. Parallel + /// to RNTupleLocatorObject64::GetLocation(). + std::uint64_t GetLocation() const + { + return (static_cast(fReserved) << 60) | (static_cast(fObjectId) << 30) | fOffset; + } + + std::uint32_t GetObjectId() const { return fObjectId; } + std::uint32_t GetOffset() const { return fOffset; } + std::uint8_t GetReserved() const { return fReserved; } + + /// Sets the 4-bit reserved field; throws if the value exceeds the 4-bit range. + void SetReserved(std::uint8_t reserved); +}; + // Workaround missing return type overloading class RNTupleLocator; namespace Internal { @@ -216,6 +277,11 @@ template <> struct RNTupleLocatorHelper { static RNTupleLocatorObject64 Get(const RNTupleLocator &loc); }; + +template <> +struct RNTupleLocatorHelper { + static RNTupleLocatorMulti Get(const RNTupleLocator &loc); +}; } // namespace Internal /// Generic information about the physical location of data. Values depend on the concrete storage type. E.g., @@ -226,6 +292,7 @@ struct RNTupleLocatorHelper { class RNTupleLocator { friend struct Internal::RNTupleLocatorHelper; friend struct Internal::RNTupleLocatorHelper; + friend struct Internal::RNTupleLocatorHelper; public: /// Values for the _Type_ field in non-disk locators. Serializable types must have the MSb == 0; see @@ -277,7 +344,8 @@ public: void SetType(ELocatorType type); void SetReserved(std::uint8_t reserved); - /// Note that for GetPosition() / SetPosition(), the locator type must correspond (kTypeFile, kTypeObject64). + /// Note that for GetPosition() / SetPosition(), the locator type must correspond + /// (kTypeFile, kTypeObject64, kTypeMulti). template T GetPosition() const @@ -287,6 +355,7 @@ public: void SetPosition(std::uint64_t position); void SetPosition(RNTupleLocatorObject64 position); + void SetPosition(RNTupleLocatorMulti position); }; namespace Internal { diff --git a/tree/ntuple/src/RNTupleSerialize.cxx b/tree/ntuple/src/RNTupleSerialize.cxx index 77a63f863c3a0..b2a4b16268d46 100644 --- a/tree/ntuple/src/RNTupleSerialize.cxx +++ b/tree/ntuple/src/RNTupleSerialize.cxx @@ -511,6 +511,44 @@ ROOT::RResult DeserializeLocatorPayloadObject64(const unsigned char *buffe return ROOT::RResult::Success(); } +std::uint32_t SerializeLocatorPayloadMulti(const ROOT::RNTupleLocator &locator, unsigned char *buffer) +{ + const auto &data = locator.GetPosition(); + const uint32_t sizeofNBytesOnStorage = (locator.GetNBytesOnStorage() > std::numeric_limits::max()) + ? sizeof(std::uint64_t) + : sizeof(std::uint32_t); + if (buffer) { + if (sizeofNBytesOnStorage == sizeof(std::uint32_t)) { + RNTupleSerializer::SerializeUInt32(locator.GetNBytesOnStorage(), buffer); + } else { + RNTupleSerializer::SerializeUInt64(locator.GetNBytesOnStorage(), buffer); + } + RNTupleSerializer::SerializeUInt64(data.GetLocation(), buffer + sizeofNBytesOnStorage); + } + return sizeofNBytesOnStorage + sizeof(std::uint64_t); +} + +ROOT::RResult DeserializeLocatorPayloadMulti(const unsigned char *buffer, std::uint32_t sizeofLocatorPayload, + ROOT::RNTupleLocator &locator) +{ + std::uint64_t packed; + if (sizeofLocatorPayload == 12) { + std::uint32_t nBytesOnStorage; + RNTupleSerializer::DeserializeUInt32(buffer, nBytesOnStorage); + locator.SetNBytesOnStorage(nBytesOnStorage); + RNTupleSerializer::DeserializeUInt64(buffer + sizeof(std::uint32_t), packed); + } else if (sizeofLocatorPayload == 16) { + std::uint64_t nBytesOnStorage; + RNTupleSerializer::DeserializeUInt64(buffer, nBytesOnStorage); + locator.SetNBytesOnStorage(nBytesOnStorage); + RNTupleSerializer::DeserializeUInt64(buffer + sizeof(std::uint64_t), packed); + } else { + return R__FAIL("invalid Multi locator payload size: " + std::to_string(sizeofLocatorPayload)); + } + locator.SetPosition(ROOT::RNTupleLocatorMulti{packed}); + return ROOT::RResult::Success(); +} + std::uint32_t SerializeAliasColumn(const ROOT::RColumnDescriptor &columnDesc, const ROOT::Internal::RNTupleSerializer::RContext &context, void *buffer) { @@ -1091,7 +1129,7 @@ ROOT::Internal::RNTupleSerializer::SerializeLocator(const RNTupleLocator &locato locatorType = 0x02; break; case RNTupleLocator::kTypeMulti: - size += SerializeLocatorPayloadObject64(locator, payloadp); + size += SerializeLocatorPayloadMulti(locator, payloadp); locatorType = 0x03; break; default: @@ -1144,7 +1182,7 @@ ROOT::RResult ROOT::Internal::RNTupleSerializer::DeserializeLocat break; case 0x03: locator.SetType(RNTupleLocator::kTypeMulti); - DeserializeLocatorPayloadObject64(bytes, payloadSize, locator); + DeserializeLocatorPayloadMulti(bytes, payloadSize, locator); break; default: locator.SetType(RNTupleLocator::kTypeUnknown); } diff --git a/tree/ntuple/src/RNTupleTypes.cxx b/tree/ntuple/src/RNTupleTypes.cxx index 62e31d289e6ef..6f531f09b924a 100644 --- a/tree/ntuple/src/RNTupleTypes.cxx +++ b/tree/ntuple/src/RNTupleTypes.cxx @@ -80,11 +80,18 @@ void ROOT::RNTupleLocator::SetPosition(std::uint64_t position) void ROOT::RNTupleLocator::SetPosition(RNTupleLocatorObject64 position) { - if (GetType() != kTypeObject64 && GetType() != kTypeMulti) + if (GetType() != kTypeObject64) throw RException(R__FAIL("cannot set position as 64bit object for type " + std::to_string(GetType()))); fPosition = position.GetLocation(); } +void ROOT::RNTupleLocator::SetPosition(RNTupleLocatorMulti position) +{ + if (GetType() != kTypeMulti) + throw RException(R__FAIL("cannot set position as Multi locator for type " + std::to_string(GetType()))); + fPosition = position.GetLocation(); +} + std::uint64_t ROOT::Internal::RNTupleLocatorHelper::Get(const RNTupleLocator &loc) { if (loc.GetType() != ROOT::RNTupleLocator::kTypeFile) @@ -95,7 +102,34 @@ std::uint64_t ROOT::Internal::RNTupleLocatorHelper::Get(const RNT ROOT::RNTupleLocatorObject64 ROOT::Internal::RNTupleLocatorHelper::Get(const RNTupleLocator &loc) { - if (loc.GetType() != ROOT::RNTupleLocator::kTypeObject64 && loc.GetType() != ROOT::RNTupleLocator::kTypeMulti) + if (loc.GetType() != ROOT::RNTupleLocator::kTypeObject64) throw RException(R__FAIL("cannot retrieve position as 64bit object for type " + std::to_string(loc.GetType()))); return RNTupleLocatorObject64{loc.fPosition}; } + +ROOT::RNTupleLocatorMulti +ROOT::Internal::RNTupleLocatorHelper::Get(const RNTupleLocator &loc) +{ + if (loc.GetType() != ROOT::RNTupleLocator::kTypeMulti) + throw RException( + R__FAIL("cannot retrieve position as Multi locator for type " + std::to_string(loc.GetType()))); + return RNTupleLocatorMulti{loc.fPosition}; +} + +ROOT::RNTupleLocatorMulti::RNTupleLocatorMulti(std::uint32_t objectId, std::uint32_t offset) +{ + if (objectId > kMaxObjectId) + throw RException(R__FAIL("RNTupleLocatorMulti object id exceeds 30-bit range: " + std::to_string(objectId))); + if (offset > kMaxOffset) + throw RException(R__FAIL("RNTupleLocatorMulti offset exceeds 30-bit range: " + std::to_string(offset))); + fObjectId = objectId; + fOffset = offset; +} + +void ROOT::RNTupleLocatorMulti::SetReserved(std::uint8_t reserved) +{ + if (reserved > kMaxReserved) + throw RException( + R__FAIL("RNTupleLocatorMulti reserved value exceeds 4-bit range: " + std::to_string(reserved))); + fReserved = reserved; +} diff --git a/tree/ntuple/test/ntuple_serialize.cxx b/tree/ntuple/test/ntuple_serialize.cxx index 5a6acfb1cfc4b..67a4e4cc5f328 100644 --- a/tree/ntuple/test/ntuple_serialize.cxx +++ b/tree/ntuple/test/ntuple_serialize.cxx @@ -402,7 +402,7 @@ TEST(RNTuple, SerializeLocator) // Multi locator round-trip with 32-bit nBytesOnStorage locator = RNTupleLocator{}; locator.SetType(RNTupleLocator::kTypeMulti); - locator.SetPosition(RNTupleLocatorObject64{42U}); + locator.SetPosition(RNTupleLocatorMulti{7, 1024}); locator.SetNBytesOnStorage(1024U); locator.SetReserved(0); EXPECT_EQ(16u, RNTupleSerializer::SerializeLocator(locator, buffer).Unwrap()); @@ -411,7 +411,9 @@ TEST(RNTuple, SerializeLocator) EXPECT_EQ(locator.GetType(), RNTupleLocator::kTypeMulti); EXPECT_EQ(locator.GetNBytesOnStorage(), 1024U); EXPECT_EQ(locator.GetReserved(), 0); - EXPECT_EQ(42U, locator.GetPosition().GetLocation()); + auto multi = locator.GetPosition(); + EXPECT_EQ(7U, multi.GetObjectId()); + EXPECT_EQ(1024U, multi.GetOffset()); // Multi locator round-trip with 64-bit nBytesOnStorage and reserved bit locator.SetNBytesOnStorage(static_cast(std::numeric_limits::max()) + 1); @@ -422,7 +424,9 @@ TEST(RNTuple, SerializeLocator) EXPECT_EQ(locator.GetType(), RNTupleLocator::kTypeMulti); EXPECT_EQ(locator.GetNBytesOnStorage(), static_cast(std::numeric_limits::max()) + 1); EXPECT_EQ(locator.GetReserved(), 1); - EXPECT_EQ(42U, locator.GetPosition().GetLocation()); + multi = locator.GetPosition(); + EXPECT_EQ(7U, multi.GetObjectId()); + EXPECT_EQ(1024U, multi.GetOffset()); std::int32_t *head = reinterpret_cast(buffer); #ifndef R__BYTESWAP @@ -435,6 +439,101 @@ TEST(RNTuple, SerializeLocator) EXPECT_EQ(locator.GetType(), RNTupleLocator::kTypeUnknown); } +TEST(RNTuple, RNTupleLocatorMultiPackUnpack) +{ + // Default-constructed: all zero + RNTupleLocatorMulti zero; + EXPECT_EQ(0U, zero.GetObjectId()); + EXPECT_EQ(0U, zero.GetOffset()); + EXPECT_EQ(0U, zero.GetReserved()); + EXPECT_EQ(0ULL, zero.GetLocation()); + + // Non-trivial values within the 30-bit range + RNTupleLocatorMulti m(0x12345, 0xABCDE); + EXPECT_EQ(0x12345U, m.GetObjectId()); + EXPECT_EQ(0xABCDEU, m.GetOffset()); + EXPECT_EQ(0U, m.GetReserved()); + // GetLocation() packs the three fields per the documented layout: + // bits [63..60] reserved, [59..30] object id, [29..0] offset + EXPECT_EQ((static_cast(0x12345) << 30) | 0xABCDE, m.GetLocation()); + + // Round-trip via the raw uint64 constructor + RNTupleLocatorMulti round{m.GetLocation()}; + EXPECT_EQ(m, round); + EXPECT_EQ(m.GetObjectId(), round.GetObjectId()); + EXPECT_EQ(m.GetOffset(), round.GetOffset()); + + // Max 30-bit values + RNTupleLocatorMulti maxVals(RNTupleLocatorMulti::kMaxObjectId, RNTupleLocatorMulti::kMaxOffset); + EXPECT_EQ(0x3FFFFFFFU, maxVals.GetObjectId()); + EXPECT_EQ(0x3FFFFFFFU, maxVals.GetOffset()); + EXPECT_EQ(0U, maxVals.GetReserved()); + + // Object id only: no bit leak into offset or reserved + RNTupleLocatorMulti idOnly(RNTupleLocatorMulti::kMaxObjectId, 0U); + EXPECT_EQ(RNTupleLocatorMulti::kMaxObjectId, idOnly.GetObjectId()); + EXPECT_EQ(0U, idOnly.GetOffset()); + EXPECT_EQ(0U, idOnly.GetReserved()); + + // Offset only: no bit leak into object id or reserved + RNTupleLocatorMulti offsetOnly(0U, RNTupleLocatorMulti::kMaxOffset); + EXPECT_EQ(0U, offsetOnly.GetObjectId()); + EXPECT_EQ(RNTupleLocatorMulti::kMaxOffset, offsetOnly.GetOffset()); + EXPECT_EQ(0U, offsetOnly.GetReserved()); + + // Constructor enforces the 30-bit range on both fields + EXPECT_THROW(RNTupleLocatorMulti(1U << 30, 0), ROOT::RException); + EXPECT_THROW(RNTupleLocatorMulti(0, 1U << 30), ROOT::RException); + + // Reserved bits round-trip without corrupting the other fields + RNTupleLocatorMulti withReserved(7, 42); + withReserved.SetReserved(0xF); + EXPECT_EQ(0xF, withReserved.GetReserved()); + EXPECT_EQ(7U, withReserved.GetObjectId()); + EXPECT_EQ(42U, withReserved.GetOffset()); + + // SetReserved enforces the 4-bit range + EXPECT_THROW(withReserved.SetReserved(0x10), ROOT::RException); + + // Raw uint64 construction preserves bits without validation; the masks + // correctly carve out each field from a fully populated location. + RNTupleLocatorMulti raw{0xFFFFFFFFFFFFFFFFULL}; + EXPECT_EQ(RNTupleLocatorMulti::kMaxObjectId, raw.GetObjectId()); + EXPECT_EQ(RNTupleLocatorMulti::kMaxOffset, raw.GetOffset()); + EXPECT_EQ(RNTupleLocatorMulti::kMaxReserved, raw.GetReserved()); +} + +TEST(RNTuple, RNTupleLocatorMultiTypeEnforcement) +{ + RNTupleLocator locator; + + // SetPosition(Multi) requires kTypeMulti. + locator.SetType(RNTupleLocator::kTypeObject64); + EXPECT_THROW(locator.SetPosition(RNTupleLocatorMulti(1, 2)), ROOT::RException); + locator.SetType(RNTupleLocator::kTypeFile); + EXPECT_THROW(locator.SetPosition(RNTupleLocatorMulti(1, 2)), ROOT::RException); + + // SetPosition(Object64) no longer accepts kTypeMulti after the split. + locator = RNTupleLocator{}; + locator.SetType(RNTupleLocator::kTypeMulti); + EXPECT_THROW(locator.SetPosition(RNTupleLocatorObject64{1}), ROOT::RException); + + // Correct usage round-trip. + locator.SetPosition(RNTupleLocatorMulti(1, 2)); + auto m = locator.GetPosition(); + EXPECT_EQ(1U, m.GetObjectId()); + EXPECT_EQ(2U, m.GetOffset()); + + // GetPosition() no longer accepts kTypeMulti after the split. + EXPECT_THROW(locator.GetPosition(), ROOT::RException); + + // GetPosition() rejects non-Multi types. + locator = RNTupleLocator{}; + locator.SetType(RNTupleLocator::kTypeObject64); + locator.SetPosition(RNTupleLocatorObject64{1}); + EXPECT_THROW(locator.GetPosition(), ROOT::RException); +} + TEST(RNTuple, SerializeEnvelopeLink) { RNTupleSerializer::REnvelopeLink link; diff --git a/tree/ntuple/test/ntuple_test.hxx b/tree/ntuple/test/ntuple_test.hxx index e4cd028af99f1..8c1b737b17413 100644 --- a/tree/ntuple/test/ntuple_test.hxx +++ b/tree/ntuple/test/ntuple_test.hxx @@ -57,6 +57,7 @@ using ROOT::EExtraTypeInfoIds; using ROOT::RNTupleLocalIndex; using ROOT::RNTupleLocator; +using ROOT::RNTupleLocatorMulti; using ROOT::RNTupleLocatorObject64; using ROOT::Internal::RColumnIndex; using RClusterDescriptor = ROOT::RClusterDescriptor;