diff --git a/examples/network_streaming.rs b/examples/network_streaming.rs index 954d0a3..71ca2f9 100644 --- a/examples/network_streaming.rs +++ b/examples/network_streaming.rs @@ -2,10 +2,10 @@ use std::io::{BufReader, BufWriter}; use std::net::{TcpListener, TcpStream}; use std::thread; -use binseq::bq::{BinseqHeader, BinseqHeaderBuilder, StreamReader, StreamWriterBuilder}; +use binseq::bq::{FileHeader, FileHeaderBuilder, StreamReader, StreamWriterBuilder}; use binseq::{BinseqRecord, Policy, Result}; -fn server(header: BinseqHeader, sequence: &[u8]) -> Result<()> { +fn server(header: FileHeader, sequence: &[u8]) -> Result<()> { // Create a listener on localhost:3000 let listener = TcpListener::bind("127.0.0.1:3000").expect("Failed to bind to address"); println!("Server listening on 127.0.0.1:3000"); @@ -80,7 +80,7 @@ fn client() -> Result<()> { fn main() -> Result<()> { // Create a header for sequences of length 100 - let header = BinseqHeaderBuilder::new().slen(100).build()?; + let header = FileHeaderBuilder::new().slen(100).build()?; // Create some example sequence data let sequence = b"ACGT".repeat(25); // 100 nucleotides diff --git a/examples/streaming.rs b/examples/streaming.rs index 3efb784..001ad47 100644 --- a/examples/streaming.rs +++ b/examples/streaming.rs @@ -1,11 +1,11 @@ use std::io::{BufReader, Cursor}; -use binseq::bq::{BinseqHeaderBuilder, StreamReader, StreamWriterBuilder}; +use binseq::bq::{FileHeaderBuilder, StreamReader, StreamWriterBuilder}; use binseq::{BinseqRecord, Policy, Result}; fn main() -> Result<()> { // Create a header for sequences of length 100 - let header = BinseqHeaderBuilder::new().slen(100).build()?; + let header = FileHeaderBuilder::new().slen(100).build()?; // Create some example sequence data let sequence = b"ACGT".repeat(25); // 100 nucleotides diff --git a/src/bq/header.rs b/src/bq/header.rs index 4508ca0..42b845e 100644 --- a/src/bq/header.rs +++ b/src/bq/header.rs @@ -32,22 +32,22 @@ pub const SIZE_HEADER: usize = 32; pub const RESERVED: [u8; 17] = [42; 17]; #[derive(Debug, Clone, Copy)] -pub struct BinseqHeaderBuilder { +pub struct FileHeaderBuilder { slen: Option, xlen: Option, bitsize: Option, flags: Option, } -impl Default for BinseqHeaderBuilder { +impl Default for FileHeaderBuilder { fn default() -> Self { Self::new() } } -impl BinseqHeaderBuilder { +impl FileHeaderBuilder { #[must_use] pub fn new() -> Self { - BinseqHeaderBuilder { + FileHeaderBuilder { slen: None, xlen: None, bitsize: None, @@ -74,8 +74,8 @@ impl BinseqHeaderBuilder { self.flags = Some(flags); self } - pub fn build(self) -> Result { - Ok(BinseqHeader { + pub fn build(self) -> Result { + Ok(FileHeader { magic: MAGIC, format: FORMAT, slen: if let Some(slen) = self.slen { @@ -93,13 +93,13 @@ impl BinseqHeaderBuilder { /// Header structure for binary sequence files /// -/// The `BinseqHeader` contains metadata about the binary sequence data stored in a file, +/// The `FileHeader` contains metadata about the binary sequence data stored in a file, /// including format information, sequence lengths, and space for future extensions. /// /// The total size of this structure is 32 bytes, with a fixed layout to ensure /// consistent reading and writing across different platforms. #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct BinseqHeader { +pub struct FileHeader { /// Magic number to identify the file format /// /// 4 bytes @@ -135,7 +135,7 @@ pub struct BinseqHeader { /// 17 bytes pub reserved: [u8; 17], } -impl BinseqHeader { +impl FileHeader { /// Creates a new header with the specified sequence length /// /// This constructor initializes a standard header with the given sequence length, @@ -150,7 +150,7 @@ impl BinseqHeader { /// /// # Returns /// - /// A new `BinseqHeader` instance + /// A new `FileHeader` instance #[must_use] pub fn new(bits: BitSize, slen: u32, flags: bool) -> Self { Self { @@ -178,7 +178,7 @@ impl BinseqHeader { /// /// # Returns /// - /// A new `BinseqHeader` instance with extended sequence information + /// A new `FileHeader` instance with extended sequence information #[must_use] pub fn new_extended(bits: BitSize, slen: u32, xlen: u32, flags: bool) -> Self { Self { @@ -214,7 +214,7 @@ impl BinseqHeader { /// /// # Returns /// - /// * `Ok(BinseqHeader)` - A valid header parsed from the buffer + /// * `Ok(FileHeader)` - A valid header parsed from the buffer /// * `Err(Error)` - If the buffer contains invalid header data /// /// # Errors @@ -266,7 +266,7 @@ impl BinseqHeader { /// /// # Returns /// - /// * `Ok(BinseqHeader)` - A valid header parsed from the buffer + /// * `Ok(FileHeader)` - A valid header parsed from the buffer /// * `Err(Error)` - If the buffer is too small or contains invalid header data /// /// # Errors @@ -324,7 +324,7 @@ impl BinseqHeader { /// /// # Returns /// - /// * `Ok(BinseqHeader)` - A valid header read from the reader + /// * `Ok(FileHeader)` - A valid header read from the reader /// * `Err(Error)` - If reading from the reader failed or the header data is invalid /// /// # Errors diff --git a/src/bq/mod.rs b/src/bq/mod.rs index bb7f886..fd194f6 100644 --- a/src/bq/mod.rs +++ b/src/bq/mod.rs @@ -2,9 +2,9 @@ //! //! *.bq files are BINSEQ variants for **fixed-length** records and **does not support quality scores**. //! -//! For variable-length records and optional quality scores use the [`vbq`](crate::vbq) module. +//! For variable-length records and optional quality scores use the [`cbq`](crate::cbq) or [`vbq`](crate::vbq) modules. //! -//! This module contains the utilities for reading, writing, and interacting with BINSEQ files. +//! This module contains the utilities for reading, writing, and interacting with BQ files. //! //! For detailed information on the file format, see our [paper](https://www.biorxiv.org/content/10.1101/2025.04.08.647863v1). //! @@ -46,11 +46,11 @@ //! // Create an in-memory buffer for output //! let output_handle = Cursor::new(Vec::new()); //! -//! // Initialize our BINSEQ header (64 bp, only primary) -//! let header = bq::BinseqHeaderBuilder::new().slen(64).build().unwrap(); +//! // Initialize our BQ header (64 bp, only primary) +//! let header = bq::FileHeaderBuilder::new().slen(64).build().unwrap(); //! -//! // Initialize our BINSEQ writer -//! let mut writer = bq::BinseqWriterBuilder::default() +//! // Initialize our BQ writer +//! let mut writer = bq::WriterBuilder::default() //! .header(header) //! .build(output_handle) //! .unwrap(); @@ -79,11 +79,11 @@ //! // Create an in-memory buffer for output //! let output_handle = Cursor::new(Vec::new()); //! -//! // Initialize our BINSEQ header (64 bp and 128bp) -//! let header = bq::BinseqHeaderBuilder::new().slen(64).xlen(128).build().unwrap(); +//! // Initialize our BQ header (64 bp and 128bp) +//! let header = bq::FileHeaderBuilder::new().slen(64).xlen(128).build().unwrap(); //! -//! // Initialize our BINSEQ writer -//! let mut writer = bq::BinseqWriterBuilder::default() +//! // Initialize our BQ writer +//! let mut writer = bq::WriterBuilder::default() //! .header(header) //! .build(output_handle) //! .unwrap(); @@ -109,12 +109,12 @@ //! //! ``` //! use binseq::{Policy, Result, BinseqRecord, SequencingRecordBuilder}; -//! use binseq::bq::{BinseqHeaderBuilder, StreamReader, StreamWriterBuilder}; +//! use binseq::bq::{FileHeaderBuilder, StreamReader, StreamWriterBuilder}; //! use std::io::{BufReader, Cursor}; //! //! fn main() -> Result<()> { //! // Create a header for sequences of length 100 -//! let header = BinseqHeaderBuilder::new().slen(100).build()?; +//! let header = FileHeaderBuilder::new().slen(100).build()?; //! //! // Create a stream writer //! let mut writer = StreamWriterBuilder::default() @@ -150,7 +150,7 @@ //! //! ## BQ file format //! -//! A BINSEQ file consists of two sections: +//! A BQ file consists of two sections: //! //! 1. Fixed-size header (32 bytes) //! 2. Record data section @@ -241,6 +241,6 @@ mod header; mod reader; mod writer; -pub use header::{BinseqHeader, BinseqHeaderBuilder, SIZE_HEADER}; +pub use header::{FileHeader, FileHeaderBuilder, SIZE_HEADER}; pub use reader::{MmapReader, RefRecord, StreamReader}; -pub use writer::{BinseqWriter, BinseqWriterBuilder, Encoder, StreamWriter, StreamWriterBuilder}; +pub use writer::{Encoder, StreamWriter, StreamWriterBuilder, Writer, WriterBuilder}; diff --git a/src/bq/reader.rs b/src/bq/reader.rs index 97183b8..fc28cf9 100644 --- a/src/bq/reader.rs +++ b/src/bq/reader.rs @@ -17,7 +17,7 @@ use bitnuc::BitSize; use bytemuck::cast_slice; use memmap2::Mmap; -use super::header::{BinseqHeader, SIZE_HEADER}; +use super::header::{FileHeader, SIZE_HEADER}; use crate::{ BinseqRecord, DEFAULT_QUALITY_SCORE, Error, ParallelProcessor, ParallelReader, error::{ReadError, Result}, @@ -298,12 +298,12 @@ impl RecordConfig { /// /// # Arguments /// - /// * `header` - A reference to a `BinseqHeader` containing sequence lengths + /// * `header` - A reference to a `FileHeader` containing sequence lengths /// /// # Returns /// /// A new `RecordConfig` instance with the sequence lengths from the header - pub fn from_header(header: &BinseqHeader) -> Self { + pub fn from_header(header: &FileHeader) -> Self { Self::new( header.slen as usize, header.xlen as usize, @@ -411,7 +411,7 @@ pub struct MmapReader { mmap: Arc, /// Binary sequence file header containing format information - header: BinseqHeader, + header: FileHeader, /// Configuration defining the layout of records in the file config: RecordConfig, @@ -456,7 +456,7 @@ impl MmapReader { let mmap = unsafe { Mmap::map(&file)? }; // Read header from mapped memory - let header = BinseqHeader::from_buffer(&mmap)?; + let header = FileHeader::from_buffer(&mmap)?; // Record configuraration let config = RecordConfig::from_header(&header); @@ -491,7 +491,7 @@ impl MmapReader { /// /// The header contains format information and sequence length specifications. #[must_use] - pub fn header(&self) -> BinseqHeader { + pub fn header(&self) -> FileHeader { self.header } @@ -580,7 +580,7 @@ pub struct StreamReader { reader: R, /// Binary sequence file header containing format information - header: Option, + header: Option, /// Configuration defining the layout of records in the file config: Option, @@ -659,7 +659,7 @@ impl StreamReader { /// /// # Returns /// - /// * `Ok(&BinseqHeader)` - A reference to the validated header + /// * `Ok(&FileHeader)` - A reference to the validated header /// * `Err(Error)` - If reading or validating the header fails /// /// # Panics @@ -672,7 +672,7 @@ impl StreamReader { /// * There is an I/O error when reading from the source /// * The header data is invalid /// * End of stream is reached before the full header can be read - pub fn read_header(&mut self) -> Result<&BinseqHeader> { + pub fn read_header(&mut self) -> Result<&FileHeader> { if self.header.is_some() { return Ok(self .header @@ -687,7 +687,7 @@ impl StreamReader { // Parse header let header_slice = &self.buffer[self.buffer_pos..self.buffer_pos + SIZE_HEADER]; - let header = BinseqHeader::from_buffer(header_slice)?; + let header = FileHeader::from_buffer(header_slice)?; self.header = Some(header); self.config = Some(RecordConfig::from_header(&header)); diff --git a/src/bq/writer.rs b/src/bq/writer.rs index 52b2935..084d215 100644 --- a/src/bq/writer.rs +++ b/src/bq/writer.rs @@ -12,7 +12,7 @@ use std::io::{BufWriter, Write}; use byteorder::{LittleEndian, WriteBytesExt}; use rand::{SeedableRng, rngs::SmallRng}; -use super::BinseqHeader; +use super::FileHeader; use crate::{ Policy, RNG_SEED, SequencingRecord, error::{Result, WriteError}, @@ -66,7 +66,7 @@ pub fn write_buffer(writer: &mut W, ebuf: &[u64]) -> Result<()> { #[derive(Clone)] pub struct Encoder { /// Header containing sequence length and format information - header: BinseqHeader, + header: FileHeader, /// Buffers for storing encoded nucleotides in 2-bit format /// Each u64 can store 32 nucleotides (64 bits / 2 bits per nucleotide) @@ -95,12 +95,12 @@ impl Encoder { /// # Examples /// /// ``` - /// # use binseq::bq::{BinseqHeaderBuilder, Encoder}; - /// let header = BinseqHeaderBuilder::new().slen(100).build().unwrap(); + /// # use binseq::bq::{FileHeaderBuilder, Encoder}; + /// let header = FileHeaderBuilder::new().slen(100).build().unwrap(); /// let encoder = Encoder::new(header); /// ``` #[must_use] - pub fn new(header: BinseqHeader) -> Self { + pub fn new(header: FileHeader) -> Self { Self::with_policy(header, Policy::default()) } @@ -114,13 +114,13 @@ impl Encoder { /// # Examples /// /// ``` - /// # use binseq::bq::{BinseqHeaderBuilder, Encoder}; + /// # use binseq::bq::{FileHeaderBuilder, Encoder}; /// # use binseq::Policy; - /// let header = BinseqHeaderBuilder::new().slen(100).build().unwrap(); + /// let header = FileHeaderBuilder::new().slen(100).build().unwrap(); /// let encoder = Encoder::with_policy(header, Policy::SetToA); /// ``` #[must_use] - pub fn with_policy(header: BinseqHeader, policy: Policy) -> Self { + pub fn with_policy(header: FileHeader, policy: Policy) -> Self { Self { header, policy, @@ -225,7 +225,7 @@ impl Encoder { } } -/// Builder for creating configured `BinseqWriter` instances +/// Builder for creating configured `Writer` instances /// /// This builder provides a flexible way to create writers with various /// configurations. It follows the builder pattern, allowing for optional @@ -235,10 +235,10 @@ impl Encoder { /// /// ``` /// # use binseq::{Policy, Result}; -/// # use binseq::bq::{BinseqHeaderBuilder, BinseqWriterBuilder}; +/// # use binseq::bq::{FileHeaderBuilder, WriterBuilder}; /// # fn main() -> Result<()> { -/// let header = BinseqHeaderBuilder::new().slen(100).build()?; -/// let writer = BinseqWriterBuilder::default() +/// let header = FileHeaderBuilder::new().slen(100).build()?; +/// let writer = WriterBuilder::default() /// .header(header) /// .policy(Policy::SetToA) /// .headless(false) @@ -247,17 +247,17 @@ impl Encoder { /// # } /// ``` #[derive(Default)] -pub struct BinseqWriterBuilder { +pub struct WriterBuilder { /// Required header defining sequence lengths and format - header: Option, + header: Option, /// Optional policy for handling invalid nucleotides policy: Option, /// Optional headless mode for parallel writing scenarios headless: Option, } -impl BinseqWriterBuilder { +impl WriterBuilder { #[must_use] - pub fn header(mut self, header: BinseqHeader) -> Self { + pub fn header(mut self, header: FileHeader) -> Self { self.header = Some(header); self } @@ -274,11 +274,11 @@ impl BinseqWriterBuilder { self } - pub fn build(self, inner: W) -> Result> { + pub fn build(self, inner: W) -> Result> { let Some(header) = self.header else { return Err(WriteError::MissingHeader.into()); }; - BinseqWriter::new( + Writer::new( inner, header, self.policy.unwrap_or_default(), @@ -301,7 +301,7 @@ impl BinseqWriterBuilder { /// /// * `W` - The underlying writer type that implements `Write` #[derive(Clone)] -pub struct BinseqWriter { +pub struct Writer { /// The underlying writer for output inner: W, @@ -312,11 +312,11 @@ pub struct BinseqWriter { /// When true, the header is not written to the output headless: bool, } -impl BinseqWriter { - /// Creates a new `BinseqWriter` instance with specified configuration +impl Writer { + /// Creates a new `Writer` instance with specified configuration /// /// This is a low-level constructor. For a more convenient way to create a - /// `BinseqWriter`, use the `BinseqWriterBuilder` struct. + /// `Writer`, use the `WriterBuilder` struct. /// /// # Arguments /// @@ -327,17 +327,17 @@ impl BinseqWriter { /// /// # Returns /// - /// * `Ok(BinseqWriter)` - A new writer instance + /// * `Ok(Writer)` - A new writer instance /// * `Err(Error)` - If writing the header fails /// /// # Examples /// /// ``` - /// # use binseq::bq::{BinseqHeaderBuilder, BinseqWriter}; + /// # use binseq::bq::{FileHeaderBuilder, Writer}; /// # use binseq::{Result, Policy}; /// # fn main() -> Result<()> { - /// let header = BinseqHeaderBuilder::new().slen(100).build()?; - /// let writer = BinseqWriter::new( + /// let header = FileHeaderBuilder::new().slen(100).build()?; + /// let writer = Writer::new( /// Vec::new(), /// header, /// Policy::default(), @@ -346,7 +346,7 @@ impl BinseqWriter { /// # Ok(()) /// # } /// ``` - pub fn new(mut inner: W, header: BinseqHeader, policy: Policy, headless: bool) -> Result { + pub fn new(mut inner: W, header: FileHeader, policy: Policy, headless: bool) -> Result { if !headless { header.write_bytes(&mut inner)?; } @@ -363,7 +363,7 @@ impl BinseqWriter { } /// Returns the header of the writer - pub fn header(&self) -> BinseqHeader { + pub fn header(&self) -> FileHeader { self.encoder.header } @@ -452,11 +452,11 @@ impl BinseqWriter { /// # Examples /// /// ``` - /// # use binseq::bq::{BinseqHeaderBuilder, BinseqWriterBuilder}; + /// # use binseq::bq::{FileHeaderBuilder, WriterBuilder}; /// # use binseq::{Result, SequencingRecordBuilder}; /// # fn main() -> Result<()> { - /// let header = BinseqHeaderBuilder::new().slen(8).build()?; - /// let mut writer = BinseqWriterBuilder::default() + /// let header = FileHeaderBuilder::new().slen(8).build()?; + /// let mut writer = WriterBuilder::default() /// .header(header) /// .build(Vec::new())?; /// @@ -513,11 +513,11 @@ impl BinseqWriter { /// # Examples /// /// ``` - /// # use binseq::bq::{BinseqHeaderBuilder, BinseqWriterBuilder}; + /// # use binseq::bq::{FileHeaderBuilder, WriterBuilder}; /// # use binseq::Result; /// # fn main() -> Result<()> { - /// let header = BinseqHeaderBuilder::new().slen(100).build()?; - /// let writer = BinseqWriterBuilder::default() + /// let header = FileHeaderBuilder::new().slen(100).build()?; + /// let writer = WriterBuilder::default() /// .header(header) /// .build(Vec::new())?; /// @@ -533,7 +533,7 @@ impl BinseqWriter { /// Gets a mutable reference to the underlying writer /// /// This allows direct access to the underlying writer while retaining - /// ownership of the `BinseqWriter`. + /// ownership of the `Writer`. pub fn by_ref(&mut self) -> &mut W { &mut self.inner } @@ -591,7 +591,7 @@ impl BinseqWriter { /// /// * `Ok(())` - If the contents were successfully ingested /// * `Err(Error)` - If writing the contents failed - pub fn ingest(&mut self, other: &mut BinseqWriter>) -> Result<()> { + pub fn ingest(&mut self, other: &mut Writer>) -> Result<()> { let other_inner = other.by_ref(); self.inner.write_all(other_inner)?; other_inner.clear(); @@ -607,11 +607,11 @@ impl BinseqWriter { /// - Processing very large datasets /// - Pipeline processing /// -/// The `StreamWriter` is a specialized version of `BinseqWriter` that +/// The `StreamWriter` is a specialized version of `Writer` that /// adds internal buffering and is optimized for streaming scenarios. pub struct StreamWriter { /// The underlying writer for processing sequences - writer: BinseqWriter>, + writer: Writer>, } impl StreamWriter { @@ -631,7 +631,7 @@ impl StreamWriter { /// /// * `Ok(StreamWriter)` - A new streaming writer /// * `Err(Error)` - If initialization fails - pub fn new(inner: W, header: BinseqHeader, policy: Policy, headless: bool) -> Result { + pub fn new(inner: W, header: FileHeader, policy: Policy, headless: bool) -> Result { Self::with_capacity(inner, 8192, header, policy, headless) } @@ -655,12 +655,12 @@ impl StreamWriter { pub fn with_capacity( inner: W, capacity: usize, - header: BinseqHeader, + header: FileHeader, policy: Policy, headless: bool, ) -> Result { let buffered = BufWriter::with_capacity(capacity, inner); - let writer = BinseqWriter::new(buffered, header, policy, headless)?; + let writer = Writer::new(buffered, header, policy, headless)?; Ok(Self { writer }) } @@ -724,7 +724,7 @@ impl StreamWriter { #[derive(Default)] pub struct StreamWriterBuilder { /// Required header defining sequence lengths and format - header: Option, + header: Option, /// Optional policy for handling invalid nucleotides policy: Option, /// Optional headless mode for parallel writing scenarios @@ -736,7 +736,7 @@ pub struct StreamWriterBuilder { impl StreamWriterBuilder { /// Sets the header for the writer #[must_use] - pub fn header(mut self, header: BinseqHeader) -> Self { + pub fn header(mut self, header: FileHeader) -> Self { self.header = Some(header); self } @@ -794,13 +794,13 @@ mod testing { use std::{fs::File, io::BufWriter}; use super::*; - use crate::bq::{BinseqHeaderBuilder, SIZE_HEADER}; + use crate::bq::{FileHeaderBuilder, SIZE_HEADER}; #[test] fn test_headless() -> Result<()> { let inner = Vec::new(); - let mut writer = BinseqWriterBuilder::default() - .header(BinseqHeaderBuilder::new().slen(32).build()?) + let mut writer = WriterBuilder::default() + .header(FileHeaderBuilder::new().slen(32).build()?) .headless(true) .build(inner)?; assert!(writer.is_headless()); @@ -812,8 +812,8 @@ mod testing { #[test] fn test_not_headless() -> Result<()> { let inner = Vec::new(); - let mut writer = BinseqWriterBuilder::default() - .header(BinseqHeaderBuilder::new().slen(32).build()?) + let mut writer = WriterBuilder::default() + .header(FileHeaderBuilder::new().slen(32).build()?) .build(inner)?; assert!(!writer.is_headless()); let inner = writer.by_ref(); @@ -823,8 +823,8 @@ mod testing { #[test] fn test_stdout() -> Result<()> { - let writer = BinseqWriterBuilder::default() - .header(BinseqHeaderBuilder::new().slen(32).build()?) + let writer = WriterBuilder::default() + .header(FileHeaderBuilder::new().slen(32).build()?) .build(std::io::stdout())?; assert!(!writer.is_headless()); Ok(()) @@ -834,8 +834,8 @@ mod testing { fn test_to_path() -> Result<()> { let path = "test_to_path.file"; let inner = File::create(path).map(BufWriter::new)?; - let mut writer = BinseqWriterBuilder::default() - .header(BinseqHeaderBuilder::new().slen(32).build()?) + let mut writer = WriterBuilder::default() + .header(FileHeaderBuilder::new().slen(32).build()?) .build(inner)?; assert!(!writer.is_headless()); let inner = writer.by_ref(); @@ -851,7 +851,7 @@ mod testing { fn test_stream_writer() -> Result<()> { let inner = Vec::new(); let writer = StreamWriterBuilder::default() - .header(BinseqHeaderBuilder::new().slen(32).build()?) + .header(FileHeaderBuilder::new().slen(32).build()?) .buffer_capacity(16384) .build(inner)?; diff --git a/src/cbq/mod.rs b/src/cbq/mod.rs index 0826925..f59b90a 100644 --- a/src/cbq/mod.rs +++ b/src/cbq/mod.rs @@ -80,8 +80,6 @@ pub use core::{ BlockHeader, BlockRange, ColumnarBlock, FileHeader, FileHeaderBuilder, Index, IndexFooter, IndexHeader, RefRecord, RefRecordIter, }; -// Re-export from crate root for backwards compatibility -pub use crate::{SequencingRecord, SequencingRecordBuilder}; pub use read::{MmapReader, Reader}; pub use write::ColumnarBlockWriter; diff --git a/src/error.rs b/src/error.rs index 89f4706..1099b74 100644 --- a/src/error.rs +++ b/src/error.rs @@ -166,7 +166,7 @@ pub enum ReadError { #[error("Unable to find an expected full block at position {0}")] UnexpectedEndOfFile(usize), - /// When the file metadata doesn't match the expected VBINSEQ format + /// When the file metadata doesn't match the expected VBQ format #[error("Unexpected file metadata")] InvalidFileType, @@ -264,17 +264,17 @@ pub enum WriteError { /// When trying to ingest data with an incompatible header /// /// The first parameter is the expected header, the second is the found header - #[error("Incompatible headers found in VBinseqWriter::ingest. Found ({1:?}) Expected ({0:?})")] - IncompatibleHeaders(crate::vbq::VBinseqHeader, crate::vbq::VBinseqHeader), + #[error("Incompatible headers found in vbq::Writer::ingest. Found ({1:?}) Expected ({0:?})")] + IncompatibleHeaders(crate::vbq::FileHeader, crate::vbq::FileHeader), /// When building a `SequencingRecord` without a primary sequence #[error("SequencingRecordBuilder requires a primary sequence (s_seq)")] MissingSequence, } -/// Errors related to VBINSEQ file indexing +/// Errors related to VBQ file indexing /// -/// These errors occur when there are issues with the index of a VBINSEQ file, +/// These errors occur when there are issues with the index of a VBQ file, /// such as corruption or mismatches with the underlying file. #[derive(thiserror::Error, Debug)] pub enum IndexError { diff --git a/src/vbq/header.rs b/src/vbq/header.rs index fdca1c1..16476bd 100644 --- a/src/vbq/header.rs +++ b/src/vbq/header.rs @@ -1,10 +1,10 @@ //! # File and Block Header Definitions //! -//! This module defines the header structures used in the VBINSEQ file format. +//! This module defines the header structures used in the VBQ file format. //! -//! The VBINSEQ format consists of two primary header types: +//! The VBQ format consists of two primary header types: //! -//! 1. `VBinseqHeader` - The file header that appears at the beginning of a VBINSEQ file, +//! 1. `FileHeader` - The file header that appears at the beginning of a VBQ file, //! containing information about the overall file format and configuration. //! //! 2. `BlockHeader` - Headers that appear before each block of records, containing @@ -21,7 +21,7 @@ use crate::error::{HeaderError, ReadError, Result}; /// Magic number for file identification: "VSEQ" in ASCII (0x51455356) /// -/// This constant is used in the file header to identify VBINSEQ formatted files. +/// This constant is used in the file header to identify VBQ formatted files. #[allow(clippy::unreadable_literal)] const MAGIC: u32 = 0x51455356; @@ -63,7 +63,7 @@ pub const RESERVED_BYTES: [u8; 13] = [42; 13]; pub const RESERVED_BYTES_BLOCK: [u8; 12] = [42; 12]; #[derive(Default, Debug, Clone, Copy)] -pub struct VBinseqHeaderBuilder { +pub struct FileHeaderBuilder { qual: Option, block: Option, compressed: Option, @@ -72,7 +72,7 @@ pub struct VBinseqHeaderBuilder { headers: Option, flags: Option, } -impl VBinseqHeaderBuilder { +impl FileHeaderBuilder { #[must_use] pub fn new() -> Self { Self::default() @@ -113,8 +113,8 @@ impl VBinseqHeaderBuilder { self } #[must_use] - pub fn build(self) -> VBinseqHeader { - VBinseqHeader::with_capacity( + pub fn build(self) -> FileHeader { + FileHeader::with_capacity( self.block.unwrap_or(BLOCK_SIZE), self.qual.unwrap_or(false), self.compressed.unwrap_or(false), @@ -126,10 +126,10 @@ impl VBinseqHeaderBuilder { } } -/// File header for VBINSEQ files +/// File header for VBQ files /// /// This structure represents the 32-byte header that appears at the beginning of every -/// VBINSEQ file. It contains configuration information about the file format, including +/// VBQ file. It contains configuration information about the file format, including /// whether quality scores are included, whether blocks are compressed, and whether /// records contain paired sequences. /// @@ -143,7 +143,7 @@ impl VBinseqHeaderBuilder { /// * `paired` - Whether records contain paired sequences (1 byte boolean) /// * `reserved` - Reserved bytes for future extensions (16 bytes) #[derive(Clone, Copy, Debug, PartialEq)] -pub struct VBinseqHeader { +pub struct FileHeader { /// Magic number to identify the file format ("VSEQ") /// /// Always set to 0x51455356 (4 bytes) @@ -198,7 +198,7 @@ pub struct VBinseqHeader { /// Currently filled with placeholder values (13 bytes) pub reserved: [u8; 13], } -impl Default for VBinseqHeader { +impl Default for FileHeader { /// Creates a default header with default block size and all features disabled /// /// The default header: @@ -220,8 +220,8 @@ impl Default for VBinseqHeader { ) } } -impl VBinseqHeader { - /// Creates a new VBINSEQ header with the default block size +impl FileHeader { + /// Creates a new VBQ header with the default block size /// /// # Parameters /// @@ -234,10 +234,10 @@ impl VBinseqHeader { /// # Example /// /// ```rust - /// use binseq::vbq::VBinseqHeaderBuilder; + /// use binseq::vbq::FileHeaderBuilder; /// /// // Create header with quality scores and compression, without paired sequences - /// let header = VBinseqHeaderBuilder::new() + /// let header = FileHeaderBuilder::new() /// .qual(true) /// .compressed(true) /// .build(); @@ -256,7 +256,7 @@ impl VBinseqHeader { ) } - /// Creates a new VBINSEQ header with a custom block size + /// Creates a new VBQ header with a custom block size /// /// # Parameters /// @@ -268,10 +268,10 @@ impl VBinseqHeader { /// # Example /// /// ```rust - /// use binseq::vbq::VBinseqHeaderBuilder; + /// use binseq::vbq::FileHeaderBuilder; /// /// // Create header with a 256KB block size, with quality scores and compression - /// let header = VBinseqHeaderBuilder::new() + /// let header = FileHeaderBuilder::new() /// .block(256 * 1024) /// .qual(true) /// .compressed(true) @@ -308,7 +308,7 @@ impl VBinseqHeader { /// Creates a header from a 32-byte buffer /// - /// This function parses a raw byte buffer into a `VBinseqHeader` structure, + /// This function parses a raw byte buffer into a `FileHeader` structure, /// validating the magic number and format version. /// /// # Parameters @@ -399,7 +399,7 @@ impl VBinseqHeader { /// Reads a header from a reader /// /// This function reads 32 bytes from the provided reader and parses them into - /// a `VBinseqHeader` structure. + /// a `FileHeader` structure. /// /// # Parameters /// @@ -425,9 +425,9 @@ impl VBinseqHeader { } } -/// Block header for VBINSEQ block data +/// Block header for VBQ block data /// -/// Each block in a VBINSEQ file is preceded by a 32-byte block header that contains +/// Each block in a VBQ file is preceded by a 32-byte block header that contains /// information about the block including its size and the number of records it contains. /// /// # Fields diff --git a/src/vbq/index.rs b/src/vbq/index.rs index a9e1194..9f3418f 100644 --- a/src/vbq/index.rs +++ b/src/vbq/index.rs @@ -43,7 +43,7 @@ use byteorder::{ByteOrder, LittleEndian}; use zstd::{Decoder, Encoder}; use super::{ - BlockHeader, VBinseqHeader, + BlockHeader, FileHeader, header::{SIZE_BLOCK_HEADER, SIZE_HEADER}, }; use crate::error::{IndexError, Result}; @@ -61,13 +61,13 @@ pub const INDEX_END_MAGIC: u64 = 0x444E455845444E49; /// Index Block Reservation pub const INDEX_RESERVATION: [u8; 4] = [42; 4]; -/// Descriptor of the dimensions of a block in a VBINSEQ file +/// Descriptor of the dimensions of a block in a VBQ file /// -/// A `BlockRange` contains metadata about a single block within a VBINSEQ file, +/// A `BlockRange` contains metadata about a single block within a VBQ file, /// including its position, size, and record count. This information enables /// efficient random access to blocks without scanning the entire file. /// -/// Block ranges are stored in a `BlockIndex` to form a complete index of a VBINSEQ file. +/// Block ranges are stored in a `BlockIndex` to form a complete index of a VBQ file. /// Each range is serialized to a fixed-size 32-byte structure when stored in the embedded index. /// /// ## Format Changes (v0.7.0+) @@ -249,22 +249,22 @@ impl BlockRange { } } -/// Header for a VBINSEQ index file +/// Header for a VBQ index file /// /// The `IndexHeader` contains metadata about an index file, including a magic number /// for validation and the size of the indexed file. This allows verifying that an index -/// file matches its corresponding VBINSEQ file. +/// file matches its corresponding VBQ file. /// /// The header has a fixed size of 32 bytes to ensure compatibility across versions. #[derive(Debug, Clone, Copy)] pub struct IndexHeader { /// Magic number to designate the index file ("VBQINDEX" in ASCII) /// - /// This is used to verify that a file is indeed a VBINSEQ index file. + /// This is used to verify that a file is indeed a VBQ index file. /// (8 bytes in serialized form) magic: u64, - /// Total size of the indexed VBINSEQ file in bytes + /// Total size of the indexed VBQ file in bytes /// /// This is used to verify that the index matches the file it references. /// (8 bytes in serialized form) @@ -276,11 +276,11 @@ pub struct IndexHeader { reserved: [u8; INDEX_HEADER_SIZE - 16], } impl IndexHeader { - /// Creates a new index header for a VBINSEQ file of the specified size + /// Creates a new index header for a VBQ file of the specified size /// /// # Parameters /// - /// * `bytes` - The total size of the VBINSEQ file being indexed, in bytes + /// * `bytes` - The total size of the VBQ file being indexed, in bytes /// /// # Returns /// @@ -296,7 +296,7 @@ impl IndexHeader { /// /// This method reads 32 bytes from the provided reader and deserializes them /// into an `IndexHeader`. It validates the magic number to ensure that the file - /// is indeed a VBINSEQ index file. + /// is indeed a VBQ index file. /// /// # Parameters /// @@ -367,14 +367,14 @@ impl IndexHeader { } } -/// Complete index for a VBINSEQ file +/// Complete index for a VBQ file /// -/// A `BlockIndex` contains metadata about a VBINSEQ file and all of its blocks, +/// A `BlockIndex` contains metadata about a VBQ file and all of its blocks, /// enabling efficient random access and parallel processing. It consists of an /// `IndexHeader` and a collection of `BlockRange` entries, one for each block in /// the file. /// -/// The index can be created by scanning a VBINSEQ file or loaded from a previously +/// The index can be created by scanning a VBQ file or loaded from a previously /// created index file. Once loaded, it provides information about block locations, /// sizes, and record counts. /// @@ -384,7 +384,7 @@ impl IndexHeader { /// use binseq::vbq::{BlockIndex, MmapReader}; /// use std::path::Path; /// -/// // Create an index from a VBINSEQ file +/// // Create an index from a VBQ file /// let vbq_path = Path::new("example.vbq"); /// let index = BlockIndex::from_vbq(vbq_path).unwrap(); /// @@ -425,7 +425,7 @@ impl BlockIndex { /// /// # Returns /// - /// The number of blocks in the VBINSEQ file described by this index + /// The number of blocks in the VBQ file described by this index /// /// # Examples /// @@ -445,7 +445,7 @@ impl BlockIndex { /// Saves the index to a file /// /// This writes the index header and all block ranges to a file, which can be loaded - /// later to avoid rescanning the VBINSEQ file. The index is compressed to reduce + /// later to avoid rescanning the VBQ file. The index is compressed to reduce /// storage space. /// /// # Parameters @@ -463,7 +463,7 @@ impl BlockIndex { /// use binseq::vbq::BlockIndex; /// use std::path::Path; /// - /// // Create an index from a VBINSEQ file + /// // Create an index from a VBQ file /// let index = BlockIndex::from_vbq(Path::new("example.vbq")).unwrap(); /// /// // Save it for future use @@ -521,15 +521,15 @@ impl BlockIndex { self.ranges.push(range); } - /// Creates a new index by scanning a VBINSEQ file + /// Creates a new index by scanning a VBQ file /// - /// This method memory-maps the specified VBINSEQ file and scans it block by block + /// This method memory-maps the specified VBQ file and scans it block by block /// to create an index. The index can then be saved to a file for future use, enabling /// efficient random access without rescanning the file. /// /// # Parameters /// - /// * `path` - Path to the VBINSEQ file to index + /// * `path` - Path to the VBQ file to index /// /// # Returns /// @@ -542,7 +542,7 @@ impl BlockIndex { /// use binseq::vbq::BlockIndex; /// use std::path::Path; /// - /// // Create an index from a VBINSEQ file + /// // Create an index from a VBQ file /// let index = BlockIndex::from_vbq(Path::new("example.vbq")).unwrap(); /// /// // Save the index for future use @@ -572,7 +572,7 @@ impl BlockIndex { let _header = { let mut header_bytes = [0u8; SIZE_HEADER]; header_bytes.copy_from_slice(&mmap[..SIZE_HEADER]); - VBinseqHeader::from_bytes(&header_bytes)? + FileHeader::from_bytes(&header_bytes)? }; // Initialize position after the header diff --git a/src/vbq/mod.rs b/src/vbq/mod.rs index bdbc0b8..f5ec06b 100644 --- a/src/vbq/mod.rs +++ b/src/vbq/mod.rs @@ -1,15 +1,15 @@ -//! # VBINSEQ Format +//! # VBQ Format //! -//! VBINSEQ is a high-performance binary format for variable-length nucleotide sequences +//! VBQ is a high-performance binary format for variable-length nucleotide sequences //! that optimizes both storage efficiency and parallel processing capabilities. //! //! For more information on the format, please refer to our [preprint](https://www.biorxiv.org/content/10.1101/2025.04.08.647863v1). //! //! ## Overview //! -//! VBINSEQ extends the core principles of BINSEQ to accommodate: +//! VBQ extends the core principles of BINSEQ to accommodate: //! -//! * **Variable-length sequences**: Unlike BINSEQ which requires fixed-length reads, VBINSEQ can store +//! * **Variable-length sequences**: Unlike BINSEQ which requires fixed-length reads, VBQ can store //! sequences of any length, making it suitable for technologies like PacBio and Oxford Nanopore. //! //! * **Quality scores**: Optional storage of quality scores alongside nucleotide data when needed. @@ -30,7 +30,7 @@ //! //! ## File Structure //! -//! A VBINSEQ file consists of a 32-byte header followed by record blocks and an embedded index: +//! A VBQ file consists of a 32-byte header followed by record blocks and an embedded index: //! //! ```text //! ┌───────────────────┐ @@ -78,7 +78,7 @@ //! //! ## Performance Characteristics //! -//! VBINSEQ is designed for high-throughput parallel processing: +//! VBQ is designed for high-throughput parallel processing: //! //! * Independent blocks enable true parallel processing without synchronization //! * Memory-mapped access provides efficient I/O @@ -91,7 +91,7 @@ //! ``` //! use std::fs::File; //! use std::io::BufWriter; -//! use binseq::vbq::{VBinseqHeaderBuilder, VBinseqWriterBuilder, MmapReader}; +//! use binseq::vbq::{FileHeaderBuilder, WriterBuilder, MmapReader}; //! use binseq::{BinseqRecord, SequencingRecordBuilder}; //! //! /* @@ -99,7 +99,7 @@ //! */ //! //! // Create a header for sequences with quality scores and headers -//! let header = VBinseqHeaderBuilder::new() +//! let header = FileHeaderBuilder::new() //! .qual(true) //! .compressed(true) //! .headers(true) @@ -107,7 +107,7 @@ //! //! // Create a writer //! let file = File::create("example.vbq").unwrap(); -//! let mut writer = VBinseqWriterBuilder::default() +//! let mut writer = WriterBuilder::default() //! .header(header) //! .build(BufWriter::new(file)) //! .unwrap(); @@ -150,7 +150,7 @@ mod index; mod reader; mod writer; -pub use header::{BlockHeader, VBinseqHeader, VBinseqHeaderBuilder}; +pub use header::{BlockHeader, FileHeader, FileHeaderBuilder}; pub use index::{BlockIndex, BlockRange}; pub use reader::{MmapReader, RecordBlock, RecordBlockIter, RefRecord}; -pub use writer::{VBinseqWriter, VBinseqWriterBuilder}; +pub use writer::{Writer, WriterBuilder}; diff --git a/src/vbq/reader.rs b/src/vbq/reader.rs index bb6ee58..82426b9 100644 --- a/src/vbq/reader.rs +++ b/src/vbq/reader.rs @@ -1,6 +1,6 @@ -//! Reader implementation for VBINSEQ files +//! Reader implementation for VBQ files //! -//! This module provides functionality for reading sequence data from VBINSEQ files, +//! This module provides functionality for reading sequence data from VBQ files, //! including support for compressed blocks, quality scores, paired-end reads, and sequence headers. //! //! ## Format Changes (v0.7.0+) @@ -60,7 +60,7 @@ use memmap2::Mmap; use zstd::zstd_safe; use super::{ - BlockHeader, BlockIndex, BlockRange, VBinseqHeader, + BlockHeader, BlockIndex, BlockRange, FileHeader, header::{SIZE_BLOCK_HEADER, SIZE_HEADER}, }; use crate::DEFAULT_QUALITY_SCORE; @@ -132,9 +132,9 @@ struct RecordMetadata { has_quality: bool, } -/// A container for a block of VBINSEQ records +/// A container for a block of VBQ records /// -/// The `RecordBlock` struct represents a single block of records read from a VBINSEQ file. +/// The `RecordBlock` struct represents a single block of records read from a VBQ file. /// It stores the raw data for multiple records in vectors, allowing efficient iteration /// over the records without copying memory for each record. /// @@ -193,7 +193,7 @@ pub struct RecordBlock { impl RecordBlock { /// Creates a new empty `RecordBlock` with the specified block size /// - /// The block size should match the one specified in the VBINSEQ file header + /// The block size should match the one specified in the VBQ file header /// for proper operation. This is typically handled automatically when using /// `MmapReader::new_block()`. /// @@ -727,9 +727,9 @@ impl BinseqRecord for RefRecord<'_> { } } -/// Memory-mapped reader for VBINSEQ files +/// Memory-mapped reader for VBQ files /// -/// [`MmapReader`] provides efficient, memory-mapped access to VBINSEQ files. It allows +/// [`MmapReader`] provides efficient, memory-mapped access to VBQ files. It allows /// sequential reading of record blocks and supports parallel processing of records. /// /// ## Format Support (v0.7.0+) @@ -743,7 +743,7 @@ impl BinseqRecord for RefRecord<'_> { /// which can be more efficient than standard file I/O, especially for large files. /// /// The [`MmapReader`] is designed to be used in a multi-threaded environment, and it -/// is built around [`RecordBlock`]s which are the units of data in a VBINSEQ file. +/// is built around [`RecordBlock`]s which are the units of data in a VBQ file. /// Each one would be held by a separate thread and would load data from the shared /// [`MmapReader`] through the [`MmapReader::read_block_into`] method. However, they can /// also be used in a single-threaded environment for sequential processing. @@ -792,14 +792,14 @@ impl BinseqRecord for RefRecord<'_> { /// } /// ``` pub struct MmapReader { - /// Path to the VBINSEQ file + /// Path to the VBQ file path: PathBuf, /// Memory-mapped file contents for efficient access mmap: Arc, /// Parsed header information from the file - header: VBinseqHeader, + header: FileHeader, /// Current cursor position in the file (in bytes) pos: usize, @@ -814,10 +814,10 @@ pub struct MmapReader { default_quality_score: u8, } impl MmapReader { - /// Creates a new `MmapReader` for a VBINSEQ file + /// Creates a new `MmapReader` for a VBQ file /// /// This method opens the specified file, memory-maps its contents, reads the - /// VBINSEQ header information, and loads the embedded index. The reader is positioned + /// VBQ header information, and loads the embedded index. The reader is positioned /// at the beginning of the first record block after the header. /// /// ## Index Loading (v0.7.0+) @@ -828,7 +828,7 @@ impl MmapReader { /// /// # Parameters /// - /// * `path` - Path to the VBINSEQ file to open + /// * `path` - Path to the VBQ file to open /// /// # Returns /// @@ -838,7 +838,7 @@ impl MmapReader { /// /// * `ReadError::InvalidFileType` if the path doesn't point to a regular file /// * I/O errors if the file can't be opened or memory-mapped - /// * Header validation errors if the file doesn't contain a valid VBINSEQ header + /// * Header validation errors if the file doesn't contain a valid VBQ header /// /// # Examples /// @@ -861,7 +861,7 @@ impl MmapReader { let header = { let mut header_bytes = [0u8; SIZE_HEADER]; header_bytes.copy_from_slice(&mmap[..SIZE_HEADER]); - VBinseqHeader::from_bytes(&header_bytes)? + FileHeader::from_bytes(&header_bytes)? }; Ok(Self { @@ -924,7 +924,7 @@ impl MmapReader { /// Returns the path where the index file would be located /// /// The index file is used for random access to blocks and has the same path as - /// the VBINSEQ file with the ".vqi" extension appended. + /// the VBQ file with the ".vqi" extension appended. /// /// # Returns /// @@ -959,9 +959,9 @@ impl MmapReader { /// /// # Returns /// - /// A copy of the file's `VBinseqHeader` + /// A copy of the file's `FileHeader` #[must_use] - pub fn header(&self) -> VBinseqHeader { + pub fn header(&self) -> FileHeader { self.header } @@ -976,7 +976,7 @@ impl MmapReader { /// This method reads the next block of records from the current position in the file /// and populates the provided `RecordBlock` with the data. The block is cleared and reused /// to avoid unnecessary memory allocations. This is the primary method for sequential - /// reading of VBINSEQ files. + /// reading of VBQ files. /// /// The method automatically handles decompression if the file was written with /// compression enabled and updates the total record count as it progresses through the file. @@ -1080,12 +1080,12 @@ impl MmapReader { Ok(true) } - /// Loads or creates the block index for this VBINSEQ file + /// Loads or creates the block index for this VBQ file /// /// The block index provides metadata about each block in the file, enabling /// random access to blocks and parallel processing. This method first attempts to /// load an existing index file. If the index doesn't exist or doesn't match the - /// current file, it automatically generates a new index from the VBINSEQ file + /// current file, it automatically generates a new index from the VBQ file /// and saves it for future use. /// /// # Returns @@ -1095,7 +1095,7 @@ impl MmapReader { /// # Errors /// /// * File I/O errors when reading or creating the index - /// * Parsing errors if the VBINSEQ file has invalid format + /// * Parsing errors if the VBQ file has invalid format /// * Other index-related errors that cannot be resolved by creating a new index /// /// # Examples @@ -1114,7 +1114,7 @@ impl MmapReader { /// /// # Notes /// - /// The index file is stored with the same path as the VBINSEQ file but with a ".vqi" + /// The index file is stored with the same path as the VBQ file but with a ".vqi" /// extension appended. This allows for reusing the index across multiple runs, /// which can significantly improve startup performance for large files. pub fn load_index(&self) -> Result { @@ -1149,7 +1149,7 @@ impl MmapReader { impl ParallelReader for MmapReader { /// Processes all records in the file in parallel using multiple threads /// - /// This method provides efficient parallel processing of VBINSEQ files by distributing + /// This method provides efficient parallel processing of VBQ files by distributing /// blocks across multiple worker threads. The file's block structure is leveraged to divide /// the work evenly without requiring thread synchronization during processing, which leads /// to near-linear scaling with the number of threads. @@ -1226,7 +1226,7 @@ impl ParallelReader for MmapReader { /// } /// } /// - /// // Use the processor with a VBINSEQ file + /// // Use the processor with a VBQ file /// let reader = MmapReader::new("example.vbq").unwrap(); /// let counter = RecordCounter::new(); /// diff --git a/src/vbq/writer.rs b/src/vbq/writer.rs index 8ac056c..e451e66 100644 --- a/src/vbq/writer.rs +++ b/src/vbq/writer.rs @@ -1,9 +1,9 @@ -//! Writer implementation for VBINSEQ files +//! Writer implementation for VBQ files //! -//! This module provides functionality for writing sequence data to VBINSEQ files, +//! This module provides functionality for writing sequence data to VBQ files, //! including support for compression, quality scores, paired-end reads, and sequence headers. //! -//! The VBINSEQ writer implements a block-based approach where records are packed +//! The VBQ writer implements a block-based approach where records are packed //! into fixed-size blocks. Each block has a header containing metadata about the //! records it contains. Blocks may be optionally compressed using zstd compression. //! @@ -29,13 +29,13 @@ //! # Example //! //! ```rust,no_run -//! use binseq::vbq::{VBinseqWriterBuilder, VBinseqHeaderBuilder}; +//! use binseq::vbq::{WriterBuilder, FileHeaderBuilder}; //! use binseq::SequencingRecordBuilder; //! use std::fs::File; //! -//! // Create a VBINSEQ file writer with headers and compression +//! // Create a VBQ file writer with headers and compression //! let file = File::create("example.vbq").unwrap(); -//! let header = VBinseqHeaderBuilder::new() +//! let header = FileHeaderBuilder::new() //! .block(128 * 1024) //! .qual(true) //! .compressed(true) @@ -43,7 +43,7 @@ //! .flags(true) //! .build(); //! -//! let mut writer = VBinseqWriterBuilder::default() +//! let mut writer = WriterBuilder::default() //! .header(header) //! .build(file) //! .unwrap(); @@ -70,7 +70,7 @@ use rand::SeedableRng; use rand::rngs::SmallRng; use zstd::stream::copy_encode; -use super::header::{BlockHeader, VBinseqHeader}; +use super::header::{BlockHeader, FileHeader}; use crate::SequencingRecord; use crate::error::{Result, WriteError}; use crate::policy::{Policy, RNG_SEED}; @@ -78,23 +78,23 @@ use crate::vbq::header::{SIZE_BLOCK_HEADER, SIZE_HEADER}; use crate::vbq::index::{INDEX_END_MAGIC, IndexHeader}; use crate::vbq::{BlockIndex, BlockRange}; -/// A builder for creating configured `VBinseqWriter` instances +/// A builder for creating configured `Writer` instances /// /// This builder provides a fluent interface for configuring and creating a -/// `VBinseqWriter` with customized settings. It allows specifying the file header, +/// `Writer` with customized settings. It allows specifying the file header, /// encoding policy, and whether to operate in headless mode. /// /// # Examples /// /// ```rust,no_run -/// use binseq::vbq::{VBinseqWriterBuilder, VBinseqHeaderBuilder}; +/// use binseq::vbq::{WriterBuilder, FileHeaderBuilder}; /// use binseq::Policy; /// use std::fs::File; /// /// // Create a writer with custom settings /// let file = File::create("example.vbq").unwrap(); -/// let mut writer = VBinseqWriterBuilder::default() -/// .header(VBinseqHeaderBuilder::new() +/// let mut writer = WriterBuilder::default() +/// .header(FileHeaderBuilder::new() /// .block(65536) /// .qual(true) /// .compressed(true) @@ -106,23 +106,23 @@ use crate::vbq::{BlockIndex, BlockRange}; /// // Use the writer... /// ``` #[derive(Default)] -pub struct VBinseqWriterBuilder { +pub struct WriterBuilder { /// Header of the file - header: Option, + header: Option, /// Optional policy for encoding policy: Option, /// Optional headless mode (used in parallel writing) headless: Option, } -impl VBinseqWriterBuilder { - /// Sets the header for the VBINSEQ file +impl WriterBuilder { + /// Sets the header for the VBQ file /// /// The header defines the file format parameters such as block size, whether /// the file contains quality scores, paired-end reads, and compression settings. /// /// # Parameters /// - /// * `header` - The `VBinseqHeader` to use for the file + /// * `header` - The `FileHeader` to use for the file /// /// # Returns /// @@ -131,20 +131,20 @@ impl VBinseqWriterBuilder { /// # Examples /// /// ```rust,no_run - /// use binseq::vbq::{VBinseqWriterBuilder, VBinseqHeaderBuilder}; + /// use binseq::vbq::{WriterBuilder, FileHeaderBuilder}; /// /// // Create a header with 64KB blocks and quality scores - /// let header = VBinseqHeaderBuilder::new() + /// let header = FileHeaderBuilder::new() /// .block(65536) /// .qual(true) /// .paired(true) /// .compressed(true) /// .build(); /// - /// let builder = VBinseqWriterBuilder::default().header(header); + /// let builder = WriterBuilder::default().header(header); /// ``` #[must_use] - pub fn header(mut self, header: VBinseqHeader) -> Self { + pub fn header(mut self, header: FileHeader) -> Self { self.header = Some(header); self } @@ -166,10 +166,10 @@ impl VBinseqWriterBuilder { /// # Examples /// /// ```rust,no_run - /// use binseq::vbq::{VBinseqWriterBuilder}; + /// use binseq::vbq::{WriterBuilder}; /// use binseq::Policy; /// - /// let builder = VBinseqWriterBuilder::default().policy(Policy::IgnoreSequence); + /// let builder = WriterBuilder::default().policy(Policy::IgnoreSequence); /// ``` #[must_use] pub fn policy(mut self, policy: Policy) -> Self { @@ -194,10 +194,10 @@ impl VBinseqWriterBuilder { /// # Examples /// /// ```rust,no_run - /// use binseq::vbq::VBinseqWriterBuilder; + /// use binseq::vbq::WriterBuilder; /// /// // Create a headless writer for parallel writing - /// let builder = VBinseqWriterBuilder::default().headless(true); + /// let builder = WriterBuilder::default().headless(true); /// ``` #[must_use] pub fn headless(mut self, headless: bool) -> Self { @@ -205,9 +205,9 @@ impl VBinseqWriterBuilder { self } - /// Builds a `VBinseqWriter` with the configured settings + /// Builds a `Writer` with the configured settings /// - /// This finalizes the builder and creates a new `VBinseqWriter` instance using + /// This finalizes the builder and creates a new `Writer` instance using /// the provided writer and the configured settings. If any settings were not /// explicitly set, default values will be used. /// @@ -217,22 +217,22 @@ impl VBinseqWriterBuilder { /// /// # Returns /// - /// * `Ok(VBinseqWriter)` - A configured `VBinseqWriter` ready for use + /// * `Ok(Writer)` - A configured `Writer` ready for use /// * `Err(_)` - If an error occurred while initializing the writer /// /// # Examples /// /// ```rust,no_run - /// use binseq::vbq::VBinseqWriterBuilder; + /// use binseq::vbq::WriterBuilder; /// use std::fs::File; /// /// let file = File::create("example.vbq").unwrap(); - /// let mut writer = VBinseqWriterBuilder::default() + /// let mut writer = WriterBuilder::default() /// .build(file) /// .unwrap(); /// ``` - pub fn build(self, inner: W) -> Result> { - VBinseqWriter::new( + pub fn build(self, inner: W) -> Result> { + Writer::new( inner, self.header.unwrap_or_default(), self.policy.unwrap_or_default(), @@ -241,15 +241,15 @@ impl VBinseqWriterBuilder { } } -/// Writer for VBINSEQ format files +/// Writer for VBQ format files /// -/// The `VBinseqWriter` handles writing nucleotide sequence data to VBINSEQ files in a +/// The `Writer` handles writing nucleotide sequence data to VBQ files in a /// block-based format. It manages the file structure, compression settings, and ensures /// data is properly encoded and organized. /// /// ## File Structure /// -/// A VBINSEQ file consists of: +/// A VBQ file consists of: /// 1. A file header that defines parameters like block size and compression settings /// 2. A series of blocks, each with: /// - A block header with metadata (e.g., record count) @@ -265,18 +265,18 @@ impl VBinseqWriterBuilder { /// - Single-end sequences with or without quality scores /// - Paired-end sequences with or without quality scores /// -/// It's recommended to use the `VBinseqWriterBuilder` to create and configure a writer +/// It's recommended to use the `WriterBuilder` to create and configure a writer /// instance with the appropriate settings. /// /// ```rust,no_run -/// use binseq::vbq::{VBinseqWriterBuilder, VBinseqHeader}; +/// use binseq::vbq::{WriterBuilder, FileHeader}; /// use binseq::SequencingRecordBuilder; /// use std::fs::File; /// /// // Create a writer for single-end reads /// let file = File::create("example.vbq").unwrap(); -/// let mut writer = VBinseqWriterBuilder::default() -/// .header(VBinseqHeader::default()) +/// let mut writer = WriterBuilder::default() +/// .header(FileHeader::default()) /// .build(file) /// .unwrap(); /// @@ -290,12 +290,12 @@ impl VBinseqWriterBuilder { /// // Writer automatically flushes when dropped /// ``` #[derive(Clone)] -pub struct VBinseqWriter { +pub struct Writer { /// Inner Writer inner: W, /// Header of the file - header: VBinseqHeader, + header: FileHeader, /// Encoder for nucleotide sequences encoder: Encoder, @@ -315,8 +315,8 @@ pub struct VBinseqWriter { /// Determines if index is already written index_written: bool, } -impl VBinseqWriter { - pub fn new(inner: W, header: VBinseqHeader, policy: Policy, headless: bool) -> Result { +impl Writer { + pub fn new(inner: W, header: FileHeader, policy: Policy, headless: bool) -> Result { let mut wtr = Self { inner, header, @@ -342,7 +342,7 @@ impl VBinseqWriter { /// Initializes the writer by writing the file header /// /// This method is called automatically during creation unless headless mode is enabled. - /// It writes the `VBinseqHeader` to the underlying writer. + /// It writes the `FileHeader` to the underlying writer. /// /// # Returns /// @@ -367,15 +367,15 @@ impl VBinseqWriter { /// # Examples /// /// ```rust,no_run - /// use binseq::vbq::{VBinseqWriterBuilder, VBinseqHeader}; + /// use binseq::vbq::{WriterBuilder, FileHeader}; /// use std::fs::File; /// /// // Create a header for paired-end reads - /// let mut header = VBinseqHeader::default(); + /// let mut header = FileHeader::default(); /// header.paired = true; /// /// let file = File::create("paired_reads.vbq").unwrap(); - /// let writer = VBinseqWriterBuilder::default() + /// let writer = WriterBuilder::default() /// .header(header) /// .build(file) /// .unwrap(); @@ -387,7 +387,7 @@ impl VBinseqWriter { } /// Returns the header of the writer - pub fn header(&self) -> VBinseqHeader { + pub fn header(&self) -> FileHeader { self.header } @@ -410,15 +410,15 @@ impl VBinseqWriter { /// # Examples /// /// ```rust,no_run - /// use binseq::vbq::{VBinseqWriterBuilder, VBinseqHeader}; + /// use binseq::vbq::{WriterBuilder, FileHeader}; /// use std::fs::File; /// /// // Create a header for sequences with quality scores - /// let mut header = VBinseqHeader::default(); + /// let mut header = FileHeader::default(); /// header.qual = true; /// /// let file = File::create("reads_with_quality.vbq").unwrap(); - /// let writer = VBinseqWriterBuilder::default() + /// let writer = WriterBuilder::default() /// .header(header) /// .build(file) /// .unwrap(); @@ -487,16 +487,16 @@ impl VBinseqWriter { /// # Examples /// /// ```rust,no_run - /// use binseq::vbq::{VBinseqWriterBuilder, VBinseqHeaderBuilder}; + /// use binseq::vbq::{WriterBuilder, FileHeaderBuilder}; /// use binseq::SequencingRecordBuilder; /// use std::fs::File; /// - /// let header = VBinseqHeaderBuilder::new() + /// let header = FileHeaderBuilder::new() /// .qual(true) /// .headers(true) /// .build(); /// - /// let mut writer = VBinseqWriterBuilder::default() + /// let mut writer = WriterBuilder::default() /// .header(header) /// .build(File::create("example.vbq").unwrap()) /// .unwrap(); @@ -608,12 +608,12 @@ impl VBinseqWriter { /// # Examples /// /// ```rust,no_run - /// use binseq::vbq::{VBinseqWriterBuilder, VBinseqHeader}; + /// use binseq::vbq::{WriterBuilder, FileHeader}; /// use binseq::SequencingRecordBuilder; /// use std::fs::File; /// /// let file = File::create("example.vbq").unwrap(); - /// let mut writer = VBinseqWriterBuilder::default() + /// let mut writer = WriterBuilder::default() /// .build(file) /// .unwrap(); /// @@ -656,7 +656,7 @@ impl VBinseqWriter { &mut self.cblock } - /// Ingests data from another `VBinseqWriter` that uses a `Vec` as its inner writer + /// Ingests data from another `Writer` that uses a `Vec` as its inner writer /// /// This method is particularly useful for parallel processing, where multiple writers /// might be writing to memory buffers and need to be combined into a single file. It @@ -666,7 +666,7 @@ impl VBinseqWriter { /// /// # Parameters /// - /// * `other` - Another `VBinseqWriter` whose inner writer is a `Vec` + /// * `other` - Another `Writer` whose inner writer is a `Vec` /// /// # Returns /// @@ -682,18 +682,18 @@ impl VBinseqWriter { /// # Examples /// /// ```rust,no_run - /// use binseq::vbq::{VBinseqWriterBuilder, VBinseqHeader}; + /// use binseq::vbq::{WriterBuilder, FileHeader}; /// use binseq::SequencingRecordBuilder; /// use std::fs::File; /// /// // Create a file writer /// let file = File::create("combined.vbq").unwrap(); - /// let mut file_writer = VBinseqWriterBuilder::default() + /// let mut file_writer = WriterBuilder::default() /// .build(file) /// .unwrap(); /// /// // Create a memory writer - /// let mut mem_writer = VBinseqWriterBuilder::default() + /// let mut mem_writer = WriterBuilder::default() /// .build(Vec::new()) /// .unwrap(); /// @@ -707,7 +707,7 @@ impl VBinseqWriter { /// // Ingest data from memory writer into file writer /// file_writer.ingest(&mut mem_writer).unwrap(); /// ``` - pub fn ingest(&mut self, other: &mut VBinseqWriter>) -> Result<()> { + pub fn ingest(&mut self, other: &mut Writer>) -> Result<()> { if self.header != other.header { return Err(WriteError::IncompatibleHeaders(self.header, other.header).into()); } @@ -808,10 +808,9 @@ fn impl_flush_block( Ok(()) } -impl Drop for VBinseqWriter { +impl Drop for Writer { fn drop(&mut self) { - self.finish() - .expect("VBinseqWriter: Failed to finish writing"); + self.finish().expect("Writer: Failed to finish writing"); } } @@ -1205,18 +1204,14 @@ impl Encoder { mod tests { use super::*; use crate::SequencingRecordBuilder; - use crate::vbq::{VBinseqHeaderBuilder, header::SIZE_HEADER}; + use crate::vbq::{FileHeaderBuilder, header::SIZE_HEADER}; #[test] fn test_headless_writer() -> super::Result<()> { - let writer = VBinseqWriterBuilder::default() - .headless(true) - .build(Vec::new())?; + let writer = WriterBuilder::default().headless(true).build(Vec::new())?; assert_eq!(writer.inner.len(), 0); - let writer = VBinseqWriterBuilder::default() - .headless(false) - .build(Vec::new())?; + let writer = WriterBuilder::default().headless(false).build(Vec::new())?; assert_eq!(writer.inner.len(), SIZE_HEADER); Ok(()) @@ -1225,16 +1220,16 @@ mod tests { #[test] fn test_ingest_empty_writer() -> super::Result<()> { // Test ingesting from an empty writer - let header = VBinseqHeaderBuilder::new().build(); + let header = FileHeaderBuilder::new().build(); // Create a source writer that's empty - let mut source = VBinseqWriterBuilder::default() + let mut source = WriterBuilder::default() .header(header) .headless(true) .build(Vec::new())?; // Create a destination writer - let mut dest = VBinseqWriterBuilder::default() + let mut dest = WriterBuilder::default() .header(header) .headless(true) .build(Vec::new())?; @@ -1255,10 +1250,10 @@ mod tests { #[test] fn test_ingest_single_record() -> super::Result<()> { // Test ingesting a single record - let header = VBinseqHeaderBuilder::new().build(); + let header = FileHeaderBuilder::new().build(); // Create a source writer with a single record - let mut source = VBinseqWriterBuilder::default() + let mut source = WriterBuilder::default() .header(header) .headless(true) .build(Vec::new())?; @@ -1274,7 +1269,7 @@ mod tests { assert!(source.by_ref().is_empty()); // Create a destination writer - let mut dest = VBinseqWriterBuilder::default() + let mut dest = WriterBuilder::default() .header(header) .headless(true) .build(Vec::new())?; @@ -1304,10 +1299,10 @@ mod tests { #[test] fn test_ingest_multi_record() -> super::Result<()> { // Test ingesting a single record - let header = VBinseqHeaderBuilder::new().build(); + let header = FileHeaderBuilder::new().build(); // Create a source writer with a single record - let mut source = VBinseqWriterBuilder::default() + let mut source = WriterBuilder::default() .header(header) .headless(true) .build(Vec::new())?; @@ -1324,7 +1319,7 @@ mod tests { assert!(source.by_ref().is_empty()); // Create a destination writer - let mut dest = VBinseqWriterBuilder::default() + let mut dest = WriterBuilder::default() .header(header) .headless(true) .build(Vec::new())?; @@ -1354,10 +1349,10 @@ mod tests { #[test] fn test_ingest_block_boundary() -> super::Result<()> { // Test ingesting a single record - let header = VBinseqHeaderBuilder::new().build(); + let header = FileHeaderBuilder::new().build(); // Create a source writer with a single record - let mut source = VBinseqWriterBuilder::default() + let mut source = WriterBuilder::default() .header(header) .headless(true) .build(Vec::new())?; @@ -1375,7 +1370,7 @@ mod tests { assert!(!source.by_ref().is_empty()); // Create a destination writer - let mut dest = VBinseqWriterBuilder::default() + let mut dest = WriterBuilder::default() .header(header) .headless(true) .build(Vec::new())?; @@ -1405,11 +1400,11 @@ mod tests { #[test] fn test_ingest_with_quality_scores() -> super::Result<()> { // Test ingesting records with quality scores - let source_header = VBinseqHeaderBuilder::new().qual(true).build(); - let dest_header = VBinseqHeaderBuilder::new().qual(true).build(); + let source_header = FileHeaderBuilder::new().qual(true).build(); + let dest_header = FileHeaderBuilder::new().qual(true).build(); // Create a source writer with quality scores - let mut source = VBinseqWriterBuilder::default() + let mut source = WriterBuilder::default() .header(source_header) .headless(true) .build(Vec::new())?; @@ -1427,7 +1422,7 @@ mod tests { } // Create a destination writer - let mut dest = VBinseqWriterBuilder::default() + let mut dest = WriterBuilder::default() .header(dest_header) .headless(true) .build(Vec::new())?; @@ -1449,10 +1444,10 @@ mod tests { #[test] fn test_ingest_with_compression() -> super::Result<()> { // Test ingesting a single record - let header = VBinseqHeaderBuilder::new().compressed(true).build(); + let header = FileHeaderBuilder::new().compressed(true).build(); // Create a source writer with a single record - let mut source = VBinseqWriterBuilder::default() + let mut source = WriterBuilder::default() .header(header) .headless(true) .build(Vec::new())?; @@ -1467,7 +1462,7 @@ mod tests { } // Create a destination writer - let mut dest = VBinseqWriterBuilder::default() + let mut dest = WriterBuilder::default() .header(header) .headless(true) .build(Vec::new())?; @@ -1496,17 +1491,17 @@ mod tests { #[test] fn test_ingest_incompatible_headers() -> super::Result<()> { - let source_header = VBinseqHeaderBuilder::new().build(); - let dest_header = VBinseqHeaderBuilder::new().qual(true).build(); + let source_header = FileHeaderBuilder::new().build(); + let dest_header = FileHeaderBuilder::new().qual(true).build(); // Create a source writer with quality scores - let mut source = VBinseqWriterBuilder::default() + let mut source = WriterBuilder::default() .header(source_header) .headless(true) .build(Vec::new())?; // Create a destination writer - let mut dest = VBinseqWriterBuilder::default() + let mut dest = WriterBuilder::default() .header(dest_header) .headless(true) .build(Vec::new())?; diff --git a/src/write.rs b/src/write.rs index c7cfd23..d500b2e 100644 --- a/src/write.rs +++ b/src/write.rs @@ -240,6 +240,66 @@ impl BinseqWriterBuilder { self } + /// Sets the corresponding values for this builder given an existing BQ header + #[must_use] + pub fn from_bq_header(header: bq::FileHeader) -> Self { + Self { + format: Format::Bq, + slen: Some(header.slen), + xlen: (header.xlen > 0).then_some(header.xlen), + bitsize: Some(header.bits), + paired: header.is_paired(), + flags: header.flags, + compression: false, + headers: false, + quality: false, + compression_level: None, + block_size: None, + headless: false, + policy: None, + } + } + + /// Sets the corresponding values for this builder given an existing VBQ header + #[must_use] + pub fn from_vbq_header(header: vbq::FileHeader) -> Self { + Self { + format: Format::Vbq, + slen: None, + xlen: None, + flags: header.flags, + quality: header.qual, + paired: header.paired, + bitsize: Some(header.bits), + headers: header.headers, + compression: header.compressed, + block_size: Some(header.block as usize), + policy: None, + compression_level: None, + headless: false, + } + } + + /// Sets the corresponding values for this builder given an existing CBQ header + #[must_use] + pub fn from_cbq_header(header: cbq::FileHeader) -> Self { + Self { + format: Format::Cbq, + flags: header.has_flags(), + quality: header.has_qualities(), + headers: header.has_headers(), + paired: header.is_paired(), + block_size: Some(header.block_size as usize), + compression_level: Some(header.compression_level as i32), + compression: false, + slen: None, + xlen: None, + bitsize: None, + policy: None, + headless: false, + } + } + /// Encode FASTX file(s) to BINSEQ format /// /// This method returns a [`FastxEncoderBuilder`] that allows you to configure @@ -317,7 +377,7 @@ impl BinseqWriterBuilder { 0 }; - let mut header_builder = bq::BinseqHeaderBuilder::new().slen(slen).xlen(xlen); + let mut header_builder = bq::FileHeaderBuilder::new().slen(slen).xlen(xlen); if let Some(bitsize) = self.bitsize { header_builder = header_builder.bitsize(bitsize); @@ -327,7 +387,7 @@ impl BinseqWriterBuilder { let header = header_builder.build()?; - let inner = bq::BinseqWriterBuilder::default() + let inner = bq::WriterBuilder::default() .header(header) .policy(self.policy.unwrap_or_default()) .headless(self.headless) @@ -337,7 +397,7 @@ impl BinseqWriterBuilder { } fn build_vbq(self, writer: W) -> Result> { - let mut header_builder = vbq::VBinseqHeaderBuilder::new() + let mut header_builder = vbq::FileHeaderBuilder::new() .paired(self.paired) .qual(self.quality) .headers(self.headers) @@ -354,7 +414,7 @@ impl BinseqWriterBuilder { let header = header_builder.build(); - let inner = vbq::VBinseqWriterBuilder::default() + let inner = vbq::WriterBuilder::default() .header(header) .policy(self.policy.unwrap_or_default()) .headless(self.headless) @@ -389,9 +449,9 @@ impl BinseqWriterBuilder { /// a unified interface for writing sequence data. pub enum BinseqWriter { /// BQ format writer - Bq(bq::BinseqWriter), + Bq(bq::Writer), /// VBQ format writer - Vbq(vbq::VBinseqWriter), + Vbq(vbq::Writer), /// CBQ format writer Cbq(cbq::ColumnarBlockWriter), } @@ -520,7 +580,7 @@ impl BinseqWriter { pub fn new_headless_buffer(&self) -> Result>> { match self { Self::Bq(w) => { - let inner = bq::BinseqWriterBuilder::default() + let inner = bq::WriterBuilder::default() .header(w.header()) .policy(w.policy()) .headless(true) @@ -528,7 +588,7 @@ impl BinseqWriter { Ok(BinseqWriter::Bq(inner)) } Self::Vbq(w) => { - let inner = vbq::VBinseqWriterBuilder::default() + let inner = vbq::WriterBuilder::default() .header(w.header()) .policy(w.policy()) .headless(true)