Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 27 additions & 4 deletions src/murmur3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ impl Murmur3Hasher {
// NOTE(unsafe) avoid calling `memcpy` on a 0-3 byte copy
// self.buf.bytes[start..start+len].copy_from(buf);
for i in 0..len {
// SAFETY:
// 1. `start + x` is less than or equal to `start + len`, which is `<=` 4 by the
// function precondition, so `self.buf.bytes.get_unchecked_mut(start + i)` is in bounds.
// 2. `i` is within the range `0..len`, which matches the length of `buf`, so
// `buf.get_unchecked(i)` is in bounds.
unsafe {
self.buf
.bytes
Expand Down Expand Up @@ -104,6 +109,8 @@ impl crate::Hasher for Murmur3Hasher {
let mut state = match self.index {
Index::_3 => {
let mut block = 0;
// SAFETY: `self.index == 3` indicates that exactly 3 bytes
// have been written and initialized via previous `push()` or `write()` calls.
unsafe {
block ^= u32::from(self.buf.bytes[2].assume_init()) << 16;
block ^= u32::from(self.buf.bytes[1].assume_init()) << 8;
Expand All @@ -113,6 +120,8 @@ impl crate::Hasher for Murmur3Hasher {
}
Index::_2 => {
let mut block = 0;
// SAFETY: `self.index == 2` indicates that exactly 2 bytes
// have been written and initialized via previous `push()` or `write()` calls.
unsafe {
block ^= u32::from(self.buf.bytes[1].assume_init()) << 8;
block ^= u32::from(self.buf.bytes[0].assume_init());
Expand All @@ -121,6 +130,8 @@ impl crate::Hasher for Murmur3Hasher {
}
Index::_1 => {
let mut block = 0;
// SAFETY: `self.index == 1` indicates that exactly 1 byte
// has been written and initialized via previous `push()` or `write()` calls.
unsafe {
block ^= u32::from(self.buf.bytes[0].assume_init());
}
Expand Down Expand Up @@ -159,12 +170,19 @@ impl core::hash::Hasher for Murmur3Hasher {
// NOTE(unsafe) avoid panicking branch (`slice_index_len_fail`)
// let (head, body) = bytes.split_at(4 - index);
let mid = 4 - index;
// SAFETY: By condition `len + index >= 4`, we have `len >= 4 - index` (= `mid`).
// Hence `bytes` contains at least `mid` valid bytes to construct a slice from its pointer.
let head = unsafe { slice::from_raw_parts(bytes.as_ptr(), mid) };
// SAFETY: `bytes.as_ptr().add(mid)` stays within bounds of the original `bytes` slice
// because `mid <= len`. The remaining length is `len - mid`.
let body = unsafe { slice::from_raw_parts(bytes.as_ptr().add(mid), len - mid) };

// NOTE(unsafe) avoid calling `memcpy` on a 0-3 byte copy
// self.buf.bytes[index..].copy_from_slice(head);
for i in 0..4 - index {
// SAFETY:
// 1. `index + i < index + (4 - index) = 4`, so it's in bounds of `self.buf.bytes`.
// 2. `i < 4 - index = mid`, so it's in bounds of `head`.
unsafe {
self.buf
.bytes
Expand All @@ -177,7 +195,7 @@ impl core::hash::Hasher for Murmur3Hasher {

// SAFETY: the loop above just wrote bytes [index..4], and prior push() calls
// wrote bytes [0..index], so all 4 bytes are initialized.
// In the future this can be replaced with array_assume_init
// The transmute from `&[MaybeUninit<u8>; 4]` to `&[u8; 4]` is valid.
let block: &[u8; 4] = unsafe { core::mem::transmute(&self.buf.bytes) };
self.state.process_block(block);

Expand All @@ -190,12 +208,17 @@ impl core::hash::Hasher for Murmur3Hasher {

for block in body.chunks(4) {
if block.len() == 4 {
// SAFETY: By condition `block.len() == 4`, direct cast to `&[u8; 4]` is valid,
// as the slice pointer is valid for 4 contiguous readable bytes,
// and arrays of `u8` require only 1-byte alignment.
self.state
.process_block(unsafe { &*(block.as_ptr().cast::<[u8; 4]>()) });
} else {
// NOTE(unsafe) In this branch, `block.len() < 4`. For CASE 1 and CASE 2 above,
// `self.index.usize()` will be 0 here, so `self.index.usize() + block.len() < 4`.
// The condition for CASE 3 ensures that `self.index.usize() + bytes.len() < 4`.
// SAFETY:
// 1. In CASE 1 and CASE 2 above, `self.index.usize() == 0`, so `self.index.usize() + block.len() < 4`.
// 2. In CASE 3, the condition for this branch ensures that `self.index.usize() + bytes.len() < 4`,
// and since `block == body == bytes`, `self.index.usize() + block.len() < 4`.
// In all cases, the precondition for `self.push()` is upheld.
unsafe {
self.push(block);
}
Expand Down
Loading