From b310030c05eeaa9ca511fc819549fd01bae1848d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Sat, 2 May 2026 18:59:11 +0300 Subject: [PATCH] fix(storage): replace unwrap() with expect() for u32 overflow in build() PositionsWriter::build() used unwrap() on u32::try_from() conversions at 4 locations (term_count, doc_count, pos_count, term_bytes.len). These would panic if counts exceeded u32::MAX (~4B), which is effectively impossible but the panic was undocumented. Now uses expect() with descriptive messages documenting the file-format constraint that limits these counts to u32::MAX. --- .../cloudsearch-storage/src/positions_writer.rs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/rust/crates/cloudsearch-storage/src/positions_writer.rs b/rust/crates/cloudsearch-storage/src/positions_writer.rs index 823966b..a4469ee 100644 --- a/rust/crates/cloudsearch-storage/src/positions_writer.rs +++ b/rust/crates/cloudsearch-storage/src/positions_writer.rs @@ -49,7 +49,8 @@ impl PositionsWriter { result.push(1); // VERSION (byte 4) // 3 bytes padding (bytes 5-7) to align term_count at byte 8 result.extend_from_slice(&[0u8, 0u8, 0u8]); - let term_count = u32::try_from(self.terms.len()).unwrap(); + let term_count = u32::try_from(self.terms.len()) + .expect("term count exceeds u32::MAX, file format cannot represent this"); result.extend_from_slice(&term_count.to_le_bytes()); // term_count at bytes 8-11 // Collect body offsets by scanning terms in sorted order @@ -60,12 +61,14 @@ impl PositionsWriter { for (term, posting_list) in &self.terms { body_offsets.push((term.clone(), body.len() as u64)); // Serialize posting list to body - let doc_count = u32::try_from(posting_list.docs.len()).unwrap(); + let doc_count = u32::try_from(posting_list.docs.len()) + .expect("doc count for term exceeds u32::MAX, file format cannot represent this"); body.extend_from_slice(&doc_count.to_le_bytes()); for posting in &posting_list.docs { body.extend_from_slice(&posting.doc_id.to_le_bytes()); body.extend_from_slice(&posting.term_freq.to_le_bytes()); - let pos_count = u32::try_from(posting.positions.len()).unwrap(); + let pos_count = u32::try_from(posting.positions.len()) + .expect("position count exceeds u32::MAX, file format cannot represent this"); body.extend_from_slice(&pos_count.to_le_bytes()); for p in &posting.positions { body.extend_from_slice(&p.to_le_bytes()); @@ -76,7 +79,11 @@ impl PositionsWriter { // Term dictionary: (str_len[4], str[bytes], body_offset[8]) for (term, body_offset) in &body_offsets { let term_bytes = term.as_bytes(); - result.extend_from_slice(&u32::try_from(term_bytes.len()).unwrap().to_le_bytes()); + result.extend_from_slice( + &u32::try_from(term_bytes.len()) + .expect("term length exceeds u32::MAX, file format cannot represent this") + .to_le_bytes(), + ); result.extend_from_slice(term_bytes); result.extend_from_slice(&(*body_offset).to_le_bytes()); }