From 0c30d6cd70abfda961fd15de113e81e5a4bf3337 Mon Sep 17 00:00:00 2001 From: SANTHOSH-SACHIN Date: Sun, 21 Jun 2026 01:51:44 +0530 Subject: [PATCH] fix: seal FromBytes trait with private supertrait to prevent unsound external implementations (#10164) Seal the public unsafe trait `FromBytes` (and by extension `FromBitpacked`) with a private supertrait so that it cannot be implemented outside the `parquet` crate. ```rust pub unsafe trait FromBytes: Sized + private::Sealed { ... } ``` This is necessary because `BitReader::get_batch` casts `*mut T` pointers to `*mut u16`/u32/u64 and calls `std::slice::from_raw_parts_mut` on them, which requires the pointer to be aligned for those primitive types. The old `# Safety` contract only mentioned bit-pattern validity and did not document the alignment requirement, so a downstream crate could soundly implement `FromBytes` for a `#[repr(packed)]` type with size 2 but align 1 and trigger undefined behavior. All in-crate implementations (`u8`-`u64`, `i8`-`i64`, `f32`, `f64`, `bool`, `Int96`, `ByteArray`, `FixedLenByteArray`) already satisfy the alignment requirement because they all have natural alignment matching their size. --- parquet/src/util/bit_util.rs | 37 +++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/parquet/src/util/bit_util.rs b/parquet/src/util/bit_util.rs index d0b13072dc4e..b06fd696026e 100644 --- a/parquet/src/util/bit_util.rs +++ b/parquet/src/util/bit_util.rs @@ -36,10 +36,29 @@ fn array_from_slice(bs: &[u8]) -> Result<[u8; N]> { } } +pub(crate) mod private { + /// Sealed trait to prevent external implementations of [`FromBytes`]. + /// + /// This is required for soundness: `BitReader::get_batch` internally casts + /// `*mut T` pointers to `*mut u16`/`u32`/`u64` and must be able to assume + /// natural alignment for the primitive type matching `size_of::()`. + pub trait Sealed {} +} + /// # Safety -/// All bit patterns 00000xxxx, where there are `BIT_CAPACITY` `x`s, -/// must be valid, unless BIT_CAPACITY is 0. -pub unsafe trait FromBytes: Sized { +/// +/// Implementors must satisfy the following requirements: +/// +/// 1. All bit patterns `00000xxxx`, where there are `BIT_CAPACITY` `x`s, +/// must be valid, unless `BIT_CAPACITY` is 0. +/// 2. `align_of::()` must be at least `align_of::()` where +/// `Primitive` is the unsigned integer of the same size as `Self` +/// (i.e. `u8` for size 1, `u16` for size 2, `u32` for size 4, `u64` for size 8). +/// This ensures that `BitReader::get_batch` can safely cast `*mut Self` to +/// `*mut u16`/`u32`/`u64` in its fast path. +/// +/// This trait is sealed and cannot be implemented outside of the `parquet` crate. +pub unsafe trait FromBytes: Sized + private::Sealed { const BIT_CAPACITY: usize; type Buffer: AsMut<[u8]> + Default; fn try_from_le_slice(b: &[u8]) -> Result; @@ -60,6 +79,7 @@ pub trait FromBitpacked: FromBytes { macro_rules! from_le_bytes { ($($ty: ty),*) => { $( + impl private::Sealed for $ty {} // SAFETY: this macro is used for types for which all bit patterns are valid. unsafe impl FromBytes for $ty { const BIT_CAPACITY: usize = std::mem::size_of::<$ty>() * 8; @@ -83,6 +103,9 @@ macro_rules! from_le_bytes { from_le_bytes! { u8, u16, u32, u64, i8, i16, i32, i64 } +impl private::Sealed for f32 {} +impl private::Sealed for f64 {} + // SAFETY: all bit patterns are valid for f32 and f64. unsafe impl FromBytes for f32 { const BIT_CAPACITY: usize = 32; @@ -121,6 +144,8 @@ impl FromBitpacked for f64 { } } +impl private::Sealed for bool {} + // SAFETY: the 0000000x bit pattern is always valid for `bool`. unsafe impl FromBytes for bool { const BIT_CAPACITY: usize = 1; @@ -141,6 +166,8 @@ impl FromBitpacked for bool { } } +impl private::Sealed for Int96 {} + // SAFETY: BIT_CAPACITY is 0. unsafe impl FromBytes for Int96 { const BIT_CAPACITY: usize = 0; @@ -168,6 +195,8 @@ unsafe impl FromBytes for Int96 { } } +impl private::Sealed for ByteArray {} + // SAFETY: BIT_CAPACITY is 0. unsafe impl FromBytes for ByteArray { const BIT_CAPACITY: usize = 0; @@ -181,6 +210,8 @@ unsafe impl FromBytes for ByteArray { } } +impl private::Sealed for FixedLenByteArray {} + // SAFETY: BIT_CAPACITY is 0. unsafe impl FromBytes for FixedLenByteArray { const BIT_CAPACITY: usize = 0;