From 443096787d5ca042042618910e82ad37d14b3f74 Mon Sep 17 00:00:00 2001 From: George Glidden-Handgis <47768122+georgeglidden@users.noreply.github.com> Date: Wed, 26 Mar 2025 13:36:02 -0700 Subject: [PATCH 1/4] prototype of behavior that only loads the suffix array once when low_memory = false --- libsufr/src/sufr_file.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/libsufr/src/sufr_file.rs b/libsufr/src/sufr_file.rs index 0dccd43..586ade5 100644 --- a/libsufr/src/sufr_file.rs +++ b/libsufr/src/sufr_file.rs @@ -100,6 +100,8 @@ where /// File access wrapper to the LCP array pub lcp_file: FileAccess, + is_in_mem: bool, + /// In-memory access to the suffix array suffix_array_mem: Vec, @@ -268,6 +270,7 @@ where suffix_array_file, lcp_file, text_file, + is_in_mem: false, suffix_array_mem: vec![], suffix_array_mem_mql: None, suffix_array_rank_mem: vec![], @@ -512,6 +515,10 @@ where /// Args: /// * `max_query_len`: prefix length fn set_suffix_array_mem(&mut self, max_query_len: Option) -> Result<()> { + if self.is_in_mem { + return Ok(()) + } + let mut max_query_len = max_query_len.unwrap_or(0); // If ".sufr" file was built with a nonzero max_query_len or seed mask @@ -664,6 +671,8 @@ where } } + self.is_in_mem = true; + Ok(()) } From ae501b0409252870413d6f7b45a0b521b5b52270 Mon Sep 17 00:00:00 2001 From: George Glidden-Handgis <47768122+georgeglidden@users.noreply.github.com> Date: Wed, 26 Mar 2025 14:03:25 -0700 Subject: [PATCH 2/4] early return is already implemented; revert previous changes. why is it not triggering? --- libsufr/src/sufr_file.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/libsufr/src/sufr_file.rs b/libsufr/src/sufr_file.rs index 586ade5..ea1f224 100644 --- a/libsufr/src/sufr_file.rs +++ b/libsufr/src/sufr_file.rs @@ -100,8 +100,6 @@ where /// File access wrapper to the LCP array pub lcp_file: FileAccess, - is_in_mem: bool, - /// In-memory access to the suffix array suffix_array_mem: Vec, @@ -270,7 +268,6 @@ where suffix_array_file, lcp_file, text_file, - is_in_mem: false, suffix_array_mem: vec![], suffix_array_mem_mql: None, suffix_array_rank_mem: vec![], @@ -515,9 +512,6 @@ where /// Args: /// * `max_query_len`: prefix length fn set_suffix_array_mem(&mut self, max_query_len: Option) -> Result<()> { - if self.is_in_mem { - return Ok(()) - } let mut max_query_len = max_query_len.unwrap_or(0); @@ -671,8 +665,6 @@ where } } - self.is_in_mem = true; - Ok(()) } From 1816c6494f1f04f96da450f64d950b6aeb39d005 Mon Sep 17 00:00:00 2001 From: George Glidden-Handgis <47768122+georgeglidden@users.noreply.github.com> Date: Wed, 26 Mar 2025 14:07:49 -0700 Subject: [PATCH 3/4] always check for early return condition; set suffix_array_mem_mql to max_query_len to avoid None values causing re-loading on every call to set_suffix_array_mem --- libsufr/src/sufr_file.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/libsufr/src/sufr_file.rs b/libsufr/src/sufr_file.rs index ea1f224..02af236 100644 --- a/libsufr/src/sufr_file.rs +++ b/libsufr/src/sufr_file.rs @@ -536,6 +536,14 @@ where } } + // Do nothing if we've already loaded the correct SA/MQL + if !self.suffix_array_mem.is_empty() + && self.suffix_array_mem_mql == Some(max_query_len) + { + info!("Using existing suffix_array_mem"); + return Ok(()); + } + // The requested MQL matches how the SA was built if max_query_len == built_max_query_len { // Stuff entire SA into memory @@ -547,13 +555,6 @@ where // There will be no ranks self.suffix_array_rank_mem = vec![]; } else { - // Do nothing if we've already loaded the correct SA/MQL - if !self.suffix_array_mem.is_empty() - && self.suffix_array_mem_mql == Some(max_query_len) - { - info!("Using existing suffix_array_mem"); - return Ok(()); - } info!("Loading suffix_array_mem using max_query_len {max_query_len}"); @@ -620,7 +621,6 @@ where } else { let now = Instant::now(); let (sub_sa, sub_rank) = &self.subsample_suffix_array(max_query_len); - self.suffix_array_mem_mql = Some(max_query_len); self.suffix_array_mem = sub_sa.to_vec(); self.suffix_array_rank_mem = sub_rank.to_vec(); @@ -664,6 +664,7 @@ where } } } + self.suffix_array_mem_mql = Some(max_query_len); Ok(()) } From 2a614c7342aa75325f0ffd1f934158c485ec9ce9 Mon Sep 17 00:00:00 2001 From: George Glidden-Handgis <47768122+georgeglidden@users.noreply.github.com> Date: Wed, 26 Mar 2025 14:28:29 -0700 Subject: [PATCH 4/4] remove print statements --- libsufr/src/sufr_file.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/libsufr/src/sufr_file.rs b/libsufr/src/sufr_file.rs index 02af236..5834a8c 100644 --- a/libsufr/src/sufr_file.rs +++ b/libsufr/src/sufr_file.rs @@ -542,10 +542,8 @@ where { info!("Using existing suffix_array_mem"); return Ok(()); - } - - // The requested MQL matches how the SA was built - if max_query_len == built_max_query_len { + } else if max_query_len == built_max_query_len { + // The requested MQL matches how the SA was built // Stuff entire SA into memory let now = Instant::now(); self.suffix_array_file.reset();