From 7d3b7aa6e4d0269e64bcf1e4f842ff4182d992e5 Mon Sep 17 00:00:00 2001
From: Mikhail <mckaylbing@gmail.com>
Date: Fri, 13 Jun 2025 13:28:18 -0600
Subject: [PATCH 01/24] Add scraper for output

---
 Cargo.toml                      | 2 +-
 libs/output_scraper/Cargo.toml  | 6 ++++++
 libs/output_scraper/src/main.rs | 3 +++
 3 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 libs/output_scraper/Cargo.toml
 create mode 100644 libs/output_scraper/src/main.rs

diff --git a/Cargo.toml b/Cargo.toml
index 546b30f7..57dcfbe0 100755
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,2 +1,2 @@
 [workspace]
-members = ["libs/graph_analyze", "libs/super_source_and_sink","libs/FmAssemblyGraph"]
+members = ["libs/graph_analyze", "libs/super_source_and_sink","libs/FmAssemblyGraph", "libs/output_scraper"]
diff --git a/libs/output_scraper/Cargo.toml b/libs/output_scraper/Cargo.toml
new file mode 100644
index 00000000..5e142277
--- /dev/null
+++ b/libs/output_scraper/Cargo.toml
@@ -0,0 +1,6 @@
+[package]
+name = "output_scraper"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
diff --git a/libs/output_scraper/src/main.rs b/libs/output_scraper/src/main.rs
new file mode 100644
index 00000000..e7a11a96
--- /dev/null
+++ b/libs/output_scraper/src/main.rs
@@ -0,0 +1,3 @@
+fn main() {
+    println!("Hello, world!");
+}

From 265e8fb8033c6f5f47e8776962117febcc680325 Mon Sep 17 00:00:00 2001
From: Mikhail <mckaylbing@gmail.com>
Date: Mon, 16 Jun 2025 15:29:03 -0600
Subject: [PATCH 02/24] Change needle to waterman command

---
 findviralstrains_2.smk | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/findviralstrains_2.smk b/findviralstrains_2.smk
index 1335f356..cadb0e1f 100755
--- a/findviralstrains_2.smk
+++ b/findviralstrains_2.smk
@@ -289,7 +289,7 @@ rule Rebuild_3:
     shell:
         "python3 {input.script} {input.flow} {input.swg} {params.outtemp}"
 
-# Compares our newly constructed genomes to original covid reference using Needleman-Wunsch #
+# Compares our newly constructed genomes to original covid reference using waterman-Wunsch #
 rule Compare_1:
     input:
         rebuilt_genome = bd("output_genomes/{sample}/subgraph_{subgraph}/{sample}_1_of_1.fasta"),
@@ -297,7 +297,7 @@ rule Compare_1:
     output:
         compar_file = bd("output_genomes/{sample}/subgraph_{subgraph}/{sample}_1_of_1_vs_ref.txt")
     shell:
-        "needle -asequence {input.origin_covid} -bsequence {input.rebuilt_genome} -gapopen 10 -gapextend 0.5 -outfile {output.compar_file}"
+        "water -asequence {input.origin_covid} -bsequence {input.rebuilt_genome} -gapopen 10 -gapextend 0.5 -outfile {output.compar_file}"
 
 # Compares genomes from the two path result to the reference #
 rule Compare_2:
@@ -310,8 +310,8 @@ rule Compare_2:
         compar_file_2 = bd("output_genomes/{sample}/subgraph_{subgraph}/{sample}_2_of_2_vs_ref.txt")
     shell:
         """
-        needle -asequence {input.origin_covid} -bsequence {input.rebuilt_genome_1} -gapopen 10 -gapextend 0.5 -outfile {output.compar_file_1}
-        needle -asequence {input.origin_covid} -bsequence {input.rebuilt_genome_2} -gapopen 10 -gapextend 0.5 -outfile {output.compar_file_2}
+        water -asequence {input.origin_covid} -bsequence {input.rebuilt_genome_1} -gapopen 10 -gapextend 0.5 -outfile {output.compar_file_1}
+        water -asequence {input.origin_covid} -bsequence {input.rebuilt_genome_2} -gapopen 10 -gapextend 0.5 -outfile {output.compar_file_2}
         """
 
 # Compares genomes from the three path result to the reference #
@@ -327,7 +327,7 @@ rule Compare_3:
         compar_file_3 = bd("output_genomes/{sample}/subgraph_{subgraph}/{sample}_3_of_3_vs_ref.txt")
     shell:
         """
-        needle -asequence {input.origin_covid} -bsequence {input.rebuilt_genome_1} -gapopen 10 -gapextend 0.5 -outfile {output.compar_file_1}
-        needle -asequence {input.origin_covid} -bsequence {input.rebuilt_genome_2} -gapopen 10 -gapextend 0.5 -outfile {output.compar_file_2}
-        needle -asequence {input.origin_covid} -bsequence {input.rebuilt_genome_3} -gapopen 10 -gapextend 0.5 -outfile {output.compar_file_3}
-        """
\ No newline at end of file
+        water -asequence {input.origin_covid} -bsequence {input.rebuilt_genome_1} -gapopen 10 -gapextend 0.5 -outfile {output.compar_file_1}
+        water -asequence {input.origin_covid} -bsequence {input.rebuilt_genome_2} -gapopen 10 -gapextend 0.5 -outfile {output.compar_file_2}
+        water -asequence {input.origin_covid} -bsequence {input.rebuilt_genome_3} -gapopen 10 -gapextend 0.5 -outfile {output.compar_file_3}
+        """

From 37ad2a26131e49f8c173cb40ad3e7567558cb5eb Mon Sep 17 00:00:00 2001
From: Mikhail <mckaylbing@gmail.com>
Date: Wed, 18 Jun 2025 14:47:25 -0600
Subject: [PATCH 03/24] Add initial version of output scraper (to be changed)

---
 libs/FmAssemblyGraph            |   2 +-
 libs/output_scraper/Cargo.toml  |   1 +
 libs/output_scraper/src/main.rs | 161 +++++++++++++++++++++++++++++++-
 3 files changed, 161 insertions(+), 3 deletions(-)

diff --git a/libs/FmAssemblyGraph b/libs/FmAssemblyGraph
index affc5bc3..120c78ab 160000
--- a/libs/FmAssemblyGraph
+++ b/libs/FmAssemblyGraph
@@ -1 +1 @@
-Subproject commit affc5bc31d3fc815b0898e91e32a210e54764e6a
+Subproject commit 120c78abcc7d6363d69b46f8ca7de71c9a28668c
diff --git a/libs/output_scraper/Cargo.toml b/libs/output_scraper/Cargo.toml
index 5e142277..8a7e64fa 100644
--- a/libs/output_scraper/Cargo.toml
+++ b/libs/output_scraper/Cargo.toml
@@ -4,3 +4,4 @@ version = "0.1.0"
 edition = "2021"
 
 [dependencies]
+csv = "1.1"
diff --git a/libs/output_scraper/src/main.rs b/libs/output_scraper/src/main.rs
index e7a11a96..8f67dbf2 100644
--- a/libs/output_scraper/src/main.rs
+++ b/libs/output_scraper/src/main.rs
@@ -1,3 +1,160 @@
-fn main() {
-    println!("Hello, world!");
+use std::path::{Path, PathBuf};
+use std::fs::{self, File};
+use std::io::{BufRead, BufReader, Write};
+use std::collections::BTreeMap; // Changed from HashMap for ordered output
+
+#[derive(Debug)]
+struct AlignmentStats {
+    file_name: String,
+    length: usize,
+    identity_pct: f64,
+    gaps_pct: f64,
+    score: f64,
+    start_position: usize,
+    end_position: usize,
+}
+
+fn main() -> std::io::Result<()> {
+    let args: Vec<String> = std::env::args().collect();
+    if args.len() != 3 {
+        eprintln!("Usage: {} <input_directory> <output_file>", args[0]);
+        std::process::exit(1);
+    }
+
+    let input_dir = Path::new(&args[1]);
+    let output_path = Path::new(&args[2]);
+    let mut results = BTreeMap::new(); // Key: subgraph name, Value: Vec of stats
+
+    // Walk through the directory structure
+    for entry in fs::read_dir(input_dir)? {
+        let entry = entry?;
+        let path = entry.path();
+        
+        if path.is_dir() {
+            if let Some(dir_name) = path.file_name() {
+                let dir_name = dir_name.to_string_lossy();
+                if dir_name.starts_with("subgraph_") {
+                    process_subgraph_dir(&path, &dir_name, &mut results)?;
+                }
+            }
+        }
+    }
+
+    // Write formatted results
+    write_formatted_output(output_path, &results)?;
+
+    println!("Successfully processed {} subgraphs, output written to {}", 
+        results.len(), 
+        output_path.display());
+
+    Ok(())
+}
+
+fn process_subgraph_dir(dir: &Path, subgraph: &str, results: &mut BTreeMap<String, Vec<AlignmentStats>>) -> std::io::Result<()> {
+    let mut subgraph_results = Vec::new();
+    
+    for entry in fs::read_dir(dir)? {
+        let entry = entry?;
+        let path = entry.path();
+        
+        if path.is_file() {
+            if let Some(file_name) = path.file_name() {
+                let file_name = file_name.to_string_lossy();
+                if file_name.ends_with("_vs_ref.txt") && file_name.contains("1_of_1") {
+                    if let Ok(stats) = parse_alignment_file(&path) {
+                        subgraph_results.push(stats);
+                    }
+                }
+            }
+        }
+    }
+    
+    if !subgraph_results.is_empty() {
+        results.insert(subgraph.to_string(), subgraph_results);
+    }
+    Ok(())
+}
+
+fn parse_alignment_file(file_path: &Path) -> std::io::Result<AlignmentStats> {
+    let file = fs::File::open(file_path)?;
+    let reader = BufReader::new(file);
+    
+    let file_name = file_path.file_name()
+        .and_then(|n| n.to_str())
+        .unwrap_or("unknown")
+        .to_string();
+
+    let mut stats = AlignmentStats {
+        file_name,
+        length: 0,
+        identity_pct: 0.0,
+        gaps_pct: 0.0,
+        score: 0.0,
+        start_position: 0,
+        end_position: 0,
+    };
+
+    for line in reader.lines() {
+        let line = line?;
+        
+        if line.starts_with("# Length: ") {
+            stats.length = line[10..].trim().parse().unwrap_or(0);
+        } 
+        else if line.starts_with("# Identity: ") {
+            let identity_str = line[12..].trim();
+            stats.identity_pct = parse_percentage(identity_str);
+        } 
+        else if line.starts_with("# Gaps: ") {
+            let gaps_str = line[8..].trim();
+            stats.gaps_pct = parse_percentage(gaps_str);
+        } 
+        else if line.starts_with("# Score: ") {
+            stats.score = line[9..].trim().parse().unwrap_or(0.0);
+        } 
+        else if line.starts_with("NC_045512.2") {
+            let parts: Vec<&str> = line.split_whitespace().collect();
+            if parts.len() >= 2 {
+                if stats.start_position == 0 {
+                    stats.start_position = parts[1].parse().unwrap_or(0);
+                }
+                stats.end_position = parts.last().and_then(|s| s.parse().ok()).unwrap_or(0);
+            }
+        }
+    }
+
+    Ok(stats)
+}
+
+fn parse_percentage(s: &str) -> f64 {
+    s.split('(').nth(1)
+        .and_then(|s| s.split('%').next())
+        .and_then(|s| s.trim().parse().ok())
+        .unwrap_or(0.0)
+}
+
+fn write_formatted_output(output_path: &Path, results: &BTreeMap<String, Vec<AlignmentStats>>) -> std::io::Result<()> {
+    let mut file = File::create(output_path)?;
+    
+    for (subgraph, stats_vec) in results {
+        writeln!(file, "╔══════════════════════════════════════╗")?;
+        writeln!(file, "║ Subgraph: {:<26} ║", subgraph)?;
+        writeln!(file, "╠══════════════════════════════════════╣")?;
+        
+        for stats in stats_vec {
+            writeln!(file, "║ File: {:<30} ║", stats.file_name)?;
+            writeln!(file, "║   Length: {:<26} ║", stats.length)?;
+            writeln!(file, "║   Identity: {:>5.1}% {:<18} ║", 
+                stats.identity_pct, 
+                format!("({}/{})", (stats.identity_pct/100.0 * stats.length as f64) as usize, stats.length))?;
+            writeln!(file, "║   Gaps: {:>5.1}% {:<20} ║", 
+                stats.gaps_pct,
+                format!("({}/{})", (stats.gaps_pct/100.0 * stats.length as f64) as usize, stats.length))?;
+            writeln!(file, "║   Score: {:<26.1} ║", stats.score)?;
+            writeln!(file, "║   Positions: {}-{:<18} ║", stats.start_position, stats.end_position)?;
+            writeln!(file, "╠──────────────────────────────────────╣")?;
+        }
+    }
+    
+    writeln!(file, "╚══════════════════════════════════════╝")?;
+    Ok(())
 }

From 872591958954581cca71f05fcaf092d86f297208 Mon Sep 17 00:00:00 2001
From: Mikhail <mckaylbing@gmail.com>
Date: Wed, 18 Jun 2025 15:22:12 -0600
Subject: [PATCH 04/24] Change output to cvs

---
 libs/output_scraper/src/main.rs | 118 +++++++++++++++++++-------------
 1 file changed, 69 insertions(+), 49 deletions(-)

diff --git a/libs/output_scraper/src/main.rs b/libs/output_scraper/src/main.rs
index 8f67dbf2..b64c8dae 100644
--- a/libs/output_scraper/src/main.rs
+++ b/libs/output_scraper/src/main.rs
@@ -1,14 +1,17 @@
 use std::path::{Path, PathBuf};
 use std::fs::{self, File};
-use std::io::{BufRead, BufReader, Write};
-use std::collections::BTreeMap; // Changed from HashMap for ordered output
+use std::io::{BufRead, BufReader};
+use std::collections::BTreeMap;
+use csv::Writer;
 
 #[derive(Debug)]
 struct AlignmentStats {
-    file_name: String,
+    sample_name: String,
     length: usize,
     identity_pct: f64,
+    identity_count: usize,
     gaps_pct: f64,
+    gaps_count: usize,
     score: f64,
     start_position: usize,
     end_position: usize,
@@ -17,15 +20,15 @@ struct AlignmentStats {
 fn main() -> std::io::Result<()> {
     let args: Vec<String> = std::env::args().collect();
     if args.len() != 3 {
-        eprintln!("Usage: {} <input_directory> <output_file>", args[0]);
+        eprintln!("Usage: {} <input_directory> <output_csv>", args[0]);
         std::process::exit(1);
     }
 
     let input_dir = Path::new(&args[1]);
     let output_path = Path::new(&args[2]);
-    let mut results = BTreeMap::new(); // Key: subgraph name, Value: Vec of stats
+    let mut results = BTreeMap::new();
 
-    // Walk through the directory structure
+    // Process each subgraph directory
     for entry in fs::read_dir(input_dir)? {
         let entry = entry?;
         let path = entry.path();
@@ -34,14 +37,16 @@ fn main() -> std::io::Result<()> {
             if let Some(dir_name) = path.file_name() {
                 let dir_name = dir_name.to_string_lossy();
                 if dir_name.starts_with("subgraph_") {
-                    process_subgraph_dir(&path, &dir_name, &mut results)?;
+                    if let Some((sample_name, stats)) = process_subgraph_dir(&path)? {
+                        results.insert(dir_name.to_string(), (sample_name, stats));
+                    }
                 }
             }
         }
     }
 
-    // Write formatted results
-    write_formatted_output(output_path, &results)?;
+    // Write CSV output
+    write_csv_output(output_path, &results)?;
 
     println!("Successfully processed {} subgraphs, output written to {}", 
         results.len(), 
@@ -50,9 +55,7 @@ fn main() -> std::io::Result<()> {
     Ok(())
 }
 
-fn process_subgraph_dir(dir: &Path, subgraph: &str, results: &mut BTreeMap<String, Vec<AlignmentStats>>) -> std::io::Result<()> {
-    let mut subgraph_results = Vec::new();
-    
+fn process_subgraph_dir(dir: &Path) -> std::io::Result<Option<(String, AlignmentStats)>> {
     for entry in fs::read_dir(dir)? {
         let entry = entry?;
         let path = entry.path();
@@ -61,34 +64,30 @@ fn process_subgraph_dir(dir: &Path, subgraph: &str, results: &mut BTreeMap<Strin
             if let Some(file_name) = path.file_name() {
                 let file_name = file_name.to_string_lossy();
                 if file_name.ends_with("_vs_ref.txt") && file_name.contains("1_of_1") {
-                    if let Ok(stats) = parse_alignment_file(&path) {
-                        subgraph_results.push(stats);
-                    }
+                    // Extract sample name from filename (e.g., E1250_S84_L001 from E1250_S84_L001_1_of_1_vs_ref.txt)
+                    let sample_name = file_name.split('_')
+                        .take(3)
+                        .collect::<Vec<_>>()
+                        .join("_");
+                    return Ok(Some((sample_name, parse_alignment_file(&path)?)));
                 }
             }
         }
     }
-    
-    if !subgraph_results.is_empty() {
-        results.insert(subgraph.to_string(), subgraph_results);
-    }
-    Ok(())
+    Ok(None)
 }
 
 fn parse_alignment_file(file_path: &Path) -> std::io::Result<AlignmentStats> {
     let file = fs::File::open(file_path)?;
     let reader = BufReader::new(file);
-    
-    let file_name = file_path.file_name()
-        .and_then(|n| n.to_str())
-        .unwrap_or("unknown")
-        .to_string();
 
     let mut stats = AlignmentStats {
-        file_name,
+        sample_name: String::new(),
         length: 0,
         identity_pct: 0.0,
+        identity_count: 0,
         gaps_pct: 0.0,
+        gaps_count: 0,
         score: 0.0,
         start_position: 0,
         end_position: 0,
@@ -103,10 +102,12 @@ fn parse_alignment_file(file_path: &Path) -> std::io::Result<AlignmentStats> {
         else if line.starts_with("# Identity: ") {
             let identity_str = line[12..].trim();
             stats.identity_pct = parse_percentage(identity_str);
+            stats.identity_count = parse_count(identity_str);
         } 
         else if line.starts_with("# Gaps: ") {
             let gaps_str = line[8..].trim();
             stats.gaps_pct = parse_percentage(gaps_str);
+            stats.gaps_count = parse_count(gaps_str);
         } 
         else if line.starts_with("# Score: ") {
             stats.score = line[9..].trim().parse().unwrap_or(0.0);
@@ -132,29 +133,48 @@ fn parse_percentage(s: &str) -> f64 {
         .unwrap_or(0.0)
 }
 
-fn write_formatted_output(output_path: &Path, results: &BTreeMap<String, Vec<AlignmentStats>>) -> std::io::Result<()> {
-    let mut file = File::create(output_path)?;
-    
-    for (subgraph, stats_vec) in results {
-        writeln!(file, "╔══════════════════════════════════════╗")?;
-        writeln!(file, "║ Subgraph: {:<26} ║", subgraph)?;
-        writeln!(file, "╠══════════════════════════════════════╣")?;
-        
-        for stats in stats_vec {
-            writeln!(file, "║ File: {:<30} ║", stats.file_name)?;
-            writeln!(file, "║   Length: {:<26} ║", stats.length)?;
-            writeln!(file, "║   Identity: {:>5.1}% {:<18} ║", 
-                stats.identity_pct, 
-                format!("({}/{})", (stats.identity_pct/100.0 * stats.length as f64) as usize, stats.length))?;
-            writeln!(file, "║   Gaps: {:>5.1}% {:<20} ║", 
-                stats.gaps_pct,
-                format!("({}/{})", (stats.gaps_pct/100.0 * stats.length as f64) as usize, stats.length))?;
-            writeln!(file, "║   Score: {:<26.1} ║", stats.score)?;
-            writeln!(file, "║   Positions: {}-{:<18} ║", stats.start_position, stats.end_position)?;
-            writeln!(file, "╠──────────────────────────────────────╣")?;
-        }
+fn parse_count(s: &str) -> usize {
+    s.split('/').next()
+        .and_then(|s| s.trim().parse().ok())
+        .unwrap_or(0)
+}
+
+fn write_csv_output(output_path: &Path, results: &BTreeMap<String, (String, AlignmentStats)>) -> std::io::Result<()> {
+    let mut writer = Writer::from_path(output_path)?;
+
+    // Write header
+    writer.write_record(&[
+        "Subgraph",
+        "Sample",
+        "Length",
+        "Identity %",
+        "Identity Count",
+        "Gaps %",
+        "Gaps Count",
+        "Score",
+        "Start Position",
+        "End Position",
+        "Alignment Length",
+    ])?;
+
+    // Write data with one row per subgraph
+    for (subgraph, (sample_name, stats)) in results {
+        let alignment_length = stats.end_position - stats.start_position + 1;
+        writer.write_record(&[
+            subgraph,
+            sample_name,
+            &stats.length.to_string(),
+            &format!("{:.1}", stats.identity_pct),
+            &stats.identity_count.to_string(),
+            &format!("{:.1}", stats.gaps_pct),
+            &stats.gaps_count.to_string(),
+            &format!("{:.1}", stats.score),
+            &stats.start_position.to_string(),
+            &stats.end_position.to_string(),
+            &alignment_length.to_string(),
+        ])?;
     }
-    
-    writeln!(file, "╚══════════════════════════════════════╝")?;
+
+    writer.flush()?;
     Ok(())
 }

From cc1287b4ed51e9fd273de9864fd18cc224a9c38b Mon Sep 17 00:00:00 2001
From: Mikhail <mckaylbing@gmail.com>
Date: Wed, 18 Jun 2025 15:30:15 -0600
Subject: [PATCH 05/24] Change csv to include # of paths:

---
 libs/output_scraper/src/main.rs | 37 +++++++++++++++++++++++++++------
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/libs/output_scraper/src/main.rs b/libs/output_scraper/src/main.rs
index b64c8dae..97fa7a36 100644
--- a/libs/output_scraper/src/main.rs
+++ b/libs/output_scraper/src/main.rs
@@ -7,6 +7,8 @@ use csv::Writer;
 #[derive(Debug)]
 struct AlignmentStats {
     sample_name: String,
+    part_number: usize,    // First number (1 in "1_of_3") //
+    total_parts: usize,   // Second number (3 in "1_of_3") //
     length: usize,
     identity_pct: f64,
     identity_count: usize,
@@ -28,7 +30,7 @@ fn main() -> std::io::Result<()> {
     let output_path = Path::new(&args[2]);
     let mut results = BTreeMap::new();
 
-    // Process each subgraph directory
+    // Process each subgraph directory //
     for entry in fs::read_dir(input_dir)? {
         let entry = entry?;
         let path = entry.path();
@@ -63,13 +65,17 @@ fn process_subgraph_dir(dir: &Path) -> std::io::Result<Option<(String, Alignment
         if path.is_file() {
             if let Some(file_name) = path.file_name() {
                 let file_name = file_name.to_string_lossy();
-                if file_name.ends_with("_vs_ref.txt") && file_name.contains("1_of_1") {
-                    // Extract sample name from filename (e.g., E1250_S84_L001 from E1250_S84_L001_1_of_1_vs_ref.txt)
+                if file_name.ends_with("_vs_ref.txt") {
+                    // Extract sample name from filename, to be changed later //
                     let sample_name = file_name.split('_')
                         .take(3)
                         .collect::<Vec<_>>()
                         .join("_");
-                    return Ok(Some((sample_name, parse_alignment_file(&path)?)));
+                    
+                    // Extract the subgraph numbers //
+                    let part_numbers = extract_part_numbers(&file_name);
+                    
+                    return Ok(Some((sample_name, parse_alignment_file(&path, part_numbers)?)));
                 }
             }
         }
@@ -77,12 +83,27 @@ fn process_subgraph_dir(dir: &Path) -> std::io::Result<Option<(String, Alignment
     Ok(None)
 }
 
-fn parse_alignment_file(file_path: &Path) -> std::io::Result<AlignmentStats> {
+fn extract_part_numbers(filename: &str) -> (usize, usize) {
+    let parts: Vec<&str> = filename.split('_').collect();
+    if parts.len() >= 5 {
+        if let (Ok(current), Ok(total)) = (
+            parts[3].parse::<usize>(),
+            parts[5].parse::<usize>(),
+        ) {
+            return (current, total);
+        }
+    }
+    (1, 1) // Default values if parsing fails //
+}
+
+fn parse_alignment_file(file_path: &Path, part_numbers: (usize, usize)) -> std::io::Result<AlignmentStats> {
     let file = fs::File::open(file_path)?;
     let reader = BufReader::new(file);
 
     let mut stats = AlignmentStats {
         sample_name: String::new(),
+        part_number: part_numbers.0,
+        total_parts: part_numbers.0,
         length: 0,
         identity_pct: 0.0,
         identity_count: 0,
@@ -146,6 +167,8 @@ fn write_csv_output(output_path: &Path, results: &BTreeMap<String, (String, Alig
     writer.write_record(&[
         "Subgraph",
         "Sample",
+        "Part",
+        "Total Parts",
         "Length",
         "Identity %",
         "Identity Count",
@@ -157,12 +180,14 @@ fn write_csv_output(output_path: &Path, results: &BTreeMap<String, (String, Alig
         "Alignment Length",
     ])?;
 
-    // Write data with one row per subgraph
+    // Write data with one row per subgraph //
     for (subgraph, (sample_name, stats)) in results {
         let alignment_length = stats.end_position - stats.start_position + 1;
         writer.write_record(&[
             subgraph,
             sample_name,
+            &stats.part_number.to_string(),
+            &stats.total_parts.to_string(),
             &stats.length.to_string(),
             &format!("{:.1}", stats.identity_pct),
             &stats.identity_count.to_string(),

From 1cb4ab6d3c281ee41f463c95d444cc66e28b4005 Mon Sep 17 00:00:00 2001
From: Mikhail <mckaylbing@gmail.com>
Date: Thu, 19 Jun 2025 14:45:26 -0600
Subject: [PATCH 06/24] Change to run on all samples and all subgraphs

---
 libs/output_scraper/src/main.rs | 117 +++++++++++++++++++++-----------
 1 file changed, 77 insertions(+), 40 deletions(-)

diff --git a/libs/output_scraper/src/main.rs b/libs/output_scraper/src/main.rs
index 97fa7a36..7553d8d3 100644
--- a/libs/output_scraper/src/main.rs
+++ b/libs/output_scraper/src/main.rs
@@ -7,8 +7,9 @@ use csv::Writer;
 #[derive(Debug)]
 struct AlignmentStats {
     sample_name: String,
-    part_number: usize,    // First number (1 in "1_of_3") //
-    total_parts: usize,   // Second number (3 in "1_of_3") //
+    subgraph_name: String,  // Added to track subgraph directory
+    part_number: usize,
+    total_parts: usize,
     length: usize,
     identity_pct: f64,
     identity_count: usize,
@@ -28,36 +29,60 @@ fn main() -> std::io::Result<()> {
 
     let input_dir = Path::new(&args[1]);
     let output_path = Path::new(&args[2]);
-    let mut results = BTreeMap::new();
+    let mut results = Vec::new();
 
-    // Process each subgraph directory //
-    for entry in fs::read_dir(input_dir)? {
-        let entry = entry?;
-        let path = entry.path();
+    // Process each sample directory
+    for sample_entry in fs::read_dir(input_dir)? {
+        let sample_entry = sample_entry?;
+        let sample_path = sample_entry.path();
         
-        if path.is_dir() {
-            if let Some(dir_name) = path.file_name() {
-                let dir_name = dir_name.to_string_lossy();
-                if dir_name.starts_with("subgraph_") {
-                    if let Some((sample_name, stats)) = process_subgraph_dir(&path)? {
-                        results.insert(dir_name.to_string(), (sample_name, stats));
+        if sample_path.is_dir() {
+            let sample_name = sample_path.file_name()
+                .unwrap_or_default()
+                .to_string_lossy()
+                .to_string();
+
+            // Process each subgraph directory in the sample directory
+            for subgraph_entry in fs::read_dir(&sample_path)? {
+                let subgraph_entry = subgraph_entry?;
+                let subgraph_path = subgraph_entry.path();
+                
+                if subgraph_path.is_dir() {
+                    if let Some(dir_name) = subgraph_path.file_name() {
+                        let subgraph_name = dir_name.to_string_lossy().to_string();
+                        if subgraph_name.starts_with("subgraph_") {
+                            if let Some(stats_vec) = process_subgraph_dir(&subgraph_path, &sample_name, &subgraph_name)? {
+                                results.extend(stats_vec);
+                            }
+                        }
                     }
                 }
             }
+            
+            // Also check for files directly in the sample directory (like subgraph_0 might be missing)
+            if let Some(stats_vec) = process_files_in_dir(&sample_path, &sample_name, "root")? {
+                results.extend(stats_vec);
+            }
         }
     }
 
     // Write CSV output
     write_csv_output(output_path, &results)?;
 
-    println!("Successfully processed {} subgraphs, output written to {}", 
+    println!("Successfully processed {} alignment files, output written to {}", 
         results.len(), 
         output_path.display());
 
     Ok(())
 }
 
-fn process_subgraph_dir(dir: &Path) -> std::io::Result<Option<(String, AlignmentStats)>> {
+fn process_subgraph_dir(dir: &Path, sample_name: &str, subgraph_name: &str) -> std::io::Result<Option<Vec<AlignmentStats>>> {
+    process_files_in_dir(dir, sample_name, subgraph_name)
+}
+
+fn process_files_in_dir(dir: &Path, sample_name: &str, subgraph_name: &str) -> std::io::Result<Option<Vec<AlignmentStats>>> {
+    let mut stats_vec = Vec::new();
+    
     for entry in fs::read_dir(dir)? {
         let entry = entry?;
         let path = entry.path();
@@ -66,44 +91,56 @@ fn process_subgraph_dir(dir: &Path) -> std::io::Result<Option<(String, Alignment
             if let Some(file_name) = path.file_name() {
                 let file_name = file_name.to_string_lossy();
                 if file_name.ends_with("_vs_ref.txt") {
-                    // Extract sample name from filename, to be changed later //
-                    let sample_name = file_name.split('_')
-                        .take(3)
-                        .collect::<Vec<_>>()
-                        .join("_");
-                    
-                    // Extract the subgraph numbers //
+                    // Extract the part numbers
                     let part_numbers = extract_part_numbers(&file_name);
                     
-                    return Ok(Some((sample_name, parse_alignment_file(&path, part_numbers)?)));
+                    stats_vec.push(parse_alignment_file(
+                        &path, 
+                        sample_name.to_string(),
+                        subgraph_name.to_string(),
+                        part_numbers
+                    )?);
                 }
             }
         }
     }
-    Ok(None)
+    
+    if stats_vec.is_empty() {
+        Ok(None)
+    } else {
+        Ok(Some(stats_vec))
+    }
 }
 
 fn extract_part_numbers(filename: &str) -> (usize, usize) {
     let parts: Vec<&str> = filename.split('_').collect();
-    if parts.len() >= 5 {
-        if let (Ok(current), Ok(total)) = (
-            parts[3].parse::<usize>(),
-            parts[5].parse::<usize>(),
-        ) {
-            return (current, total);
+    for i in 0..parts.len() {
+        if parts[i] == "of" && i > 0 && i < parts.len() - 1 {
+            if let (Ok(current), Ok(total)) = (
+                parts[i-1].parse::<usize>(),
+                parts[i+1].parse::<usize>(),
+            ) {
+                return (current, total);
+            }
         }
     }
-    (1, 1) // Default values if parsing fails //
+    (1, 1) // Default values if parsing fails
 }
 
-fn parse_alignment_file(file_path: &Path, part_numbers: (usize, usize)) -> std::io::Result<AlignmentStats> {
+fn parse_alignment_file(
+    file_path: &Path, 
+    sample_name: String,
+    subgraph_name: String,
+    part_numbers: (usize, usize)
+) -> std::io::Result<AlignmentStats> {
     let file = fs::File::open(file_path)?;
     let reader = BufReader::new(file);
 
     let mut stats = AlignmentStats {
-        sample_name: String::new(),
+        sample_name,
+        subgraph_name,
         part_number: part_numbers.0,
-        total_parts: part_numbers.0,
+        total_parts: part_numbers.1,
         length: 0,
         identity_pct: 0.0,
         identity_count: 0,
@@ -160,13 +197,13 @@ fn parse_count(s: &str) -> usize {
         .unwrap_or(0)
 }
 
-fn write_csv_output(output_path: &Path, results: &BTreeMap<String, (String, AlignmentStats)>) -> std::io::Result<()> {
+fn write_csv_output(output_path: &Path, results: &[AlignmentStats]) -> std::io::Result<()> {
     let mut writer = Writer::from_path(output_path)?;
 
     // Write header
     writer.write_record(&[
-        "Subgraph",
         "Sample",
+        "Subgraph",
         "Part",
         "Total Parts",
         "Length",
@@ -180,12 +217,12 @@ fn write_csv_output(output_path: &Path, results: &BTreeMap<String, (String, Alig
         "Alignment Length",
     ])?;
 
-    // Write data with one row per subgraph //
-    for (subgraph, (sample_name, stats)) in results {
+    // Write data
+    for stats in results {
         let alignment_length = stats.end_position - stats.start_position + 1;
         writer.write_record(&[
-            subgraph,
-            sample_name,
+            &stats.sample_name,
+            &stats.subgraph_name,
             &stats.part_number.to_string(),
             &stats.total_parts.to_string(),
             &stats.length.to_string(),

From ae9ed3390d0b243dd72fd98d9674a65cab77d5f7 Mon Sep 17 00:00:00 2001
From: Mikhail <mckaylbing@gmail.com>
Date: Thu, 19 Jun 2025 14:51:28 -0600
Subject: [PATCH 07/24] Change csv header and sort output

---
 libs/output_scraper/src/main.rs | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/libs/output_scraper/src/main.rs b/libs/output_scraper/src/main.rs
index 7553d8d3..4a51983d 100644
--- a/libs/output_scraper/src/main.rs
+++ b/libs/output_scraper/src/main.rs
@@ -7,7 +7,7 @@ use csv::Writer;
 #[derive(Debug)]
 struct AlignmentStats {
     sample_name: String,
-    subgraph_name: String,  // Added to track subgraph directory
+    subgraph_name: String,
     part_number: usize,
     total_parts: usize,
     length: usize,
@@ -59,13 +59,21 @@ fn main() -> std::io::Result<()> {
                 }
             }
             
-            // Also check for files directly in the sample directory (like subgraph_0 might be missing)
+            // Also check for files directly in the sample directory
             if let Some(stats_vec) = process_files_in_dir(&sample_path, &sample_name, "root")? {
                 results.extend(stats_vec);
             }
         }
     }
 
+    // Sort results by sample, then subgraph, then total parts, then part number
+    results.sort_by(|a, b| {
+        a.sample_name.cmp(&b.sample_name)
+            .then(a.subgraph_name.cmp(&b.subgraph_name))
+            .then(a.total_parts.cmp(&b.total_parts))
+            .then(a.part_number.cmp(&b.part_number))
+    });
+
     // Write CSV output
     write_csv_output(output_path, &results)?;
 
@@ -91,7 +99,6 @@ fn process_files_in_dir(dir: &Path, sample_name: &str, subgraph_name: &str) -> s
             if let Some(file_name) = path.file_name() {
                 let file_name = file_name.to_string_lossy();
                 if file_name.ends_with("_vs_ref.txt") {
-                    // Extract the part numbers
                     let part_numbers = extract_part_numbers(&file_name);
                     
                     stats_vec.push(parse_alignment_file(
@@ -200,12 +207,11 @@ fn parse_count(s: &str) -> usize {
 fn write_csv_output(output_path: &Path, results: &[AlignmentStats]) -> std::io::Result<()> {
     let mut writer = Writer::from_path(output_path)?;
 
-    // Write header
     writer.write_record(&[
         "Sample",
         "Subgraph",
-        "Part",
-        "Total Parts",
+        "Path",
+        "Total Paths",
         "Length",
         "Identity %",
         "Identity Count",
@@ -217,7 +223,6 @@ fn write_csv_output(output_path: &Path, results: &[AlignmentStats]) -> std::io::
         "Alignment Length",
     ])?;
 
-    // Write data
     for stats in results {
         let alignment_length = stats.end_position - stats.start_position + 1;
         writer.write_record(&[

From a7da5a5e99903b253b910e8f0974102f5ea0e49b Mon Sep 17 00:00:00 2001
From: Mikhail <mckaylbing@gmail.com>
Date: Thu, 19 Jun 2025 15:41:50 -0600
Subject: [PATCH 08/24] Add runtime and objective values

---
 libs/output_scraper/src/main.rs | 78 ++++++++++++++++++++++++++++++---
 1 file changed, 73 insertions(+), 5 deletions(-)

diff --git a/libs/output_scraper/src/main.rs b/libs/output_scraper/src/main.rs
index 4a51983d..67430c4f 100644
--- a/libs/output_scraper/src/main.rs
+++ b/libs/output_scraper/src/main.rs
@@ -18,6 +18,14 @@ struct AlignmentStats {
     score: f64,
     start_position: usize,
     end_position: usize,
+    runtime: f64,
+    objective_value: f64,
+}
+
+#[derive(Debug)]
+struct DecompStats {
+    runtime: f64,
+    objective_value: f64,
 }
 
 fn main() -> std::io::Result<()> {
@@ -28,6 +36,7 @@ fn main() -> std::io::Result<()> {
     }
 
     let input_dir = Path::new(&args[1]);
+    let decomp_dir = input_dir.join("../decomp_results");
     let output_path = Path::new(&args[2]);
     let mut results = Vec::new();
 
@@ -51,7 +60,8 @@ fn main() -> std::io::Result<()> {
                     if let Some(dir_name) = subgraph_path.file_name() {
                         let subgraph_name = dir_name.to_string_lossy().to_string();
                         if subgraph_name.starts_with("subgraph_") {
-                            if let Some(stats_vec) = process_subgraph_dir(&subgraph_path, &sample_name, &subgraph_name)? {
+                            if let Some(mut stats_vec) = process_subgraph_dir(&subgraph_path, &sample_name, &subgraph_name)? {
+                                add_decomp_stats(&decomp_dir, &mut stats_vec)?;
                                 results.extend(stats_vec);
                             }
                         }
@@ -60,13 +70,14 @@ fn main() -> std::io::Result<()> {
             }
             
             // Also check for files directly in the sample directory
-            if let Some(stats_vec) = process_files_in_dir(&sample_path, &sample_name, "root")? {
+            if let Some(mut stats_vec) = process_files_in_dir(&sample_path, &sample_name, "root")? {
+                add_decomp_stats(&decomp_dir, &mut stats_vec)?;
                 results.extend(stats_vec);
             }
         }
     }
 
-    // Sort results by sample, then subgraph, then total parts, then part number
+    // Sort results
     results.sort_by(|a, b| {
         a.sample_name.cmp(&b.sample_name)
             .then(a.subgraph_name.cmp(&b.subgraph_name))
@@ -84,6 +95,57 @@ fn main() -> std::io::Result<()> {
     Ok(())
 }
 
+fn add_decomp_stats(decomp_dir: &Path, stats_vec: &mut Vec<AlignmentStats>) -> std::io::Result<()> {
+    for stat in stats_vec {
+        if let Some(decomp_stats) = get_decomp_stats(decomp_dir, &stat.sample_name, &stat.subgraph_name, stat.part_number)? {
+            stat.runtime = decomp_stats.runtime;
+            stat.objective_value = decomp_stats.objective_value;
+        }
+    }
+    Ok(())
+}
+
+fn get_decomp_stats(decomp_dir: &Path, sample_name: &str, subgraph_name: &str, part_number: usize) -> std::io::Result<Option<DecompStats>> {
+    let pattern = format!("{}_{}_{}.paths", sample_name, subgraph_name, part_number);
+    
+    for entry in fs::read_dir(decomp_dir)? {
+        let entry = entry?;
+        let path = entry.path();
+        
+        if let Some(file_name) = path.file_name() {
+            let file_name = file_name.to_string_lossy();
+            if file_name.ends_with(&pattern) {
+                return parse_decomp_file(&path);
+            }
+        }
+    }
+    
+    Ok(None)
+}
+
+fn parse_decomp_file(file_path: &Path) -> std::io::Result<Option<DecompStats>> {
+    let file = File::open(file_path)?;
+    let reader = BufReader::new(file);
+
+    let mut runtime = 0.0;
+    let mut objective_value = 0.0;
+
+    for line in reader.lines() {
+        let line = line?;
+        
+        if line.starts_with("Runtime: ") {
+            runtime = line.split_whitespace().nth(1).and_then(|s| s.parse().ok()).unwrap_or(0.0);
+        } else if line.starts_with("Objective Value: ") {
+            objective_value = line.split_whitespace().nth(2).and_then(|s| s.parse().ok()).unwrap_or(0.0);
+        }
+    }
+
+    Ok(Some(DecompStats {
+        runtime,
+        objective_value,
+    }))
+}
+
 fn process_subgraph_dir(dir: &Path, sample_name: &str, subgraph_name: &str) -> std::io::Result<Option<Vec<AlignmentStats>>> {
     process_files_in_dir(dir, sample_name, subgraph_name)
 }
@@ -131,7 +193,7 @@ fn extract_part_numbers(filename: &str) -> (usize, usize) {
             }
         }
     }
-    (1, 1) // Default values if parsing fails
+    (1, 1)
 }
 
 fn parse_alignment_file(
@@ -140,7 +202,7 @@ fn parse_alignment_file(
     subgraph_name: String,
     part_numbers: (usize, usize)
 ) -> std::io::Result<AlignmentStats> {
-    let file = fs::File::open(file_path)?;
+    let file = File::open(file_path)?;
     let reader = BufReader::new(file);
 
     let mut stats = AlignmentStats {
@@ -156,6 +218,8 @@ fn parse_alignment_file(
         score: 0.0,
         start_position: 0,
         end_position: 0,
+        runtime: 0.0,
+        objective_value: 0.0,
     };
 
     for line in reader.lines() {
@@ -221,6 +285,8 @@ fn write_csv_output(output_path: &Path, results: &[AlignmentStats]) -> std::io::
         "Start Position",
         "End Position",
         "Alignment Length",
+        "Runtime (s)",
+        "Objective Value",
     ])?;
 
     for stats in results {
@@ -239,6 +305,8 @@ fn write_csv_output(output_path: &Path, results: &[AlignmentStats]) -> std::io::
             &stats.start_position.to_string(),
             &stats.end_position.to_string(),
             &alignment_length.to_string(),
+            &format!("{:.4}", stats.runtime),
+            &format!("{:.6}", stats.objective_value),
         ])?;
     }
 

From 28acbc4ef630ed8f87308294339b84972bcf4f26 Mon Sep 17 00:00:00 2001
From: Mikhail <mckaylbing@gmail.com>
Date: Fri, 20 Jun 2025 10:29:55 -0600
Subject: [PATCH 09/24] Change where decomp time is pulled from

---
 libs/output_scraper/src/main.rs | 80 +++++++++++++++++++++------------
 1 file changed, 52 insertions(+), 28 deletions(-)

diff --git a/libs/output_scraper/src/main.rs b/libs/output_scraper/src/main.rs
index 67430c4f..3aeb4990 100644
--- a/libs/output_scraper/src/main.rs
+++ b/libs/output_scraper/src/main.rs
@@ -1,7 +1,7 @@
 use std::path::{Path, PathBuf};
 use std::fs::{self, File};
 use std::io::{BufRead, BufReader};
-use std::collections::BTreeMap;
+use std::collections::HashMap;
 use csv::Writer;
 
 #[derive(Debug)]
@@ -22,7 +22,7 @@ struct AlignmentStats {
     objective_value: f64,
 }
 
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 struct DecompStats {
     runtime: f64,
     objective_value: f64,
@@ -40,7 +40,9 @@ fn main() -> std::io::Result<()> {
     let output_path = Path::new(&args[2]);
     let mut results = Vec::new();
 
-    // Process each sample directory
+    let decomp_stats_map = build_decomp_stats_map(&decomp_dir)?;
+
+    // Process each sample dir //
     for sample_entry in fs::read_dir(input_dir)? {
         let sample_entry = sample_entry?;
         let sample_path = sample_entry.path();
@@ -51,7 +53,7 @@ fn main() -> std::io::Result<()> {
                 .to_string_lossy()
                 .to_string();
 
-            // Process each subgraph directory in the sample directory
+            // Process each subgraph dir //
             for subgraph_entry in fs::read_dir(&sample_path)? {
                 let subgraph_entry = subgraph_entry?;
                 let subgraph_path = subgraph_entry.path();
@@ -61,7 +63,7 @@ fn main() -> std::io::Result<()> {
                         let subgraph_name = dir_name.to_string_lossy().to_string();
                         if subgraph_name.starts_with("subgraph_") {
                             if let Some(mut stats_vec) = process_subgraph_dir(&subgraph_path, &sample_name, &subgraph_name)? {
-                                add_decomp_stats(&decomp_dir, &mut stats_vec)?;
+                                add_decomp_stats(&decomp_stats_map, &mut stats_vec);
                                 results.extend(stats_vec);
                             }
                         }
@@ -69,15 +71,15 @@ fn main() -> std::io::Result<()> {
                 }
             }
             
-            // Also check for files directly in the sample directory
+            // Also check for files //
             if let Some(mut stats_vec) = process_files_in_dir(&sample_path, &sample_name, "root")? {
-                add_decomp_stats(&decomp_dir, &mut stats_vec)?;
+                add_decomp_stats(&decomp_stats_map, &mut stats_vec);
                 results.extend(stats_vec);
             }
         }
     }
 
-    // Sort results
+    // Sort results by sample, then subgraph, then total parts, then part number //
     results.sort_by(|a, b| {
         a.sample_name.cmp(&b.sample_name)
             .then(a.subgraph_name.cmp(&b.subgraph_name))
@@ -85,7 +87,7 @@ fn main() -> std::io::Result<()> {
             .then(a.part_number.cmp(&b.part_number))
     });
 
-    // Write CSV output
+    // Write csv //
     write_csv_output(output_path, &results)?;
 
     println!("Successfully processed {} alignment files, output written to {}", 
@@ -95,18 +97,8 @@ fn main() -> std::io::Result<()> {
     Ok(())
 }
 
-fn add_decomp_stats(decomp_dir: &Path, stats_vec: &mut Vec<AlignmentStats>) -> std::io::Result<()> {
-    for stat in stats_vec {
-        if let Some(decomp_stats) = get_decomp_stats(decomp_dir, &stat.sample_name, &stat.subgraph_name, stat.part_number)? {
-            stat.runtime = decomp_stats.runtime;
-            stat.objective_value = decomp_stats.objective_value;
-        }
-    }
-    Ok(())
-}
-
-fn get_decomp_stats(decomp_dir: &Path, sample_name: &str, subgraph_name: &str, part_number: usize) -> std::io::Result<Option<DecompStats>> {
-    let pattern = format!("{}_{}_{}.paths", sample_name, subgraph_name, part_number);
+fn build_decomp_stats_map(decomp_dir: &Path) -> std::io::Result<HashMap<(String, String), DecompStats>> {
+    let mut map = HashMap::new();
     
     for entry in fs::read_dir(decomp_dir)? {
         let entry = entry?;
@@ -114,13 +106,41 @@ fn get_decomp_stats(decomp_dir: &Path, sample_name: &str, subgraph_name: &str, p
         
         if let Some(file_name) = path.file_name() {
             let file_name = file_name.to_string_lossy();
-            if file_name.ends_with(&pattern) {
-                return parse_decomp_file(&path);
+            if file_name.ends_with(".paths") {
+                if let Some((sample_name, subgraph_name)) = parse_decomp_filename(&file_name) {
+                    if let Some(stats) = parse_decomp_file(&path)? {
+                        map.insert((sample_name, subgraph_name), stats);
+                    }
+                }
             }
         }
     }
     
-    Ok(None)
+    Ok(map)
+}
+
+fn parse_decomp_filename(filename: &str) -> Option<(String, String)> {
+    let parts: Vec<&str> = filename.split('_').collect();
+    if parts.len() >= 4 {
+        let sample_end = parts.len() - 3;
+        let sample_name = parts[..sample_end].join("_");
+        let subgraph_name = format!("{}_{}", parts[sample_end], parts[sample_end + 1]);
+        return Some((sample_name, subgraph_name));
+    }
+    None
+}
+
+fn add_decomp_stats(
+    decomp_stats_map: &HashMap<(String, String), DecompStats>,
+    stats_vec: &mut Vec<AlignmentStats>
+) {
+    for stat in stats_vec {
+        let key = (stat.sample_name.clone(), stat.subgraph_name.clone());
+        if let Some(decomp_stats) = decomp_stats_map.get(&key) {
+            stat.runtime = decomp_stats.runtime;
+            stat.objective_value = decomp_stats.objective_value;
+        }
+    }
 }
 
 fn parse_decomp_file(file_path: &Path) -> std::io::Result<Option<DecompStats>> {
@@ -140,10 +160,14 @@ fn parse_decomp_file(file_path: &Path) -> std::io::Result<Option<DecompStats>> {
         }
     }
 
-    Ok(Some(DecompStats {
-        runtime,
-        objective_value,
-    }))
+    if runtime > 0.0 || objective_value > 0.0 {
+        Ok(Some(DecompStats {
+            runtime,
+            objective_value,
+        }))
+    } else {
+        Ok(None)
+    }
 }
 
 fn process_subgraph_dir(dir: &Path, sample_name: &str, subgraph_name: &str) -> std::io::Result<Option<Vec<AlignmentStats>>> {

From 226d9eec3995c56cc7a31447b9c9930a01633b15 Mon Sep 17 00:00:00 2001
From: joserod0704 <jose1.rodriguez@umconnect.umt.edu>
Date: Fri, 20 Jun 2025 14:40:22 -0600
Subject: [PATCH 10/24] Fixed objective value in kleast_errors.py

---
 libs/decompose/kleast_errors.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/libs/decompose/kleast_errors.py b/libs/decompose/kleast_errors.py
index 67741259..2d8a9bf2 100755
--- a/libs/decompose/kleast_errors.py
+++ b/libs/decompose/kleast_errors.py
@@ -89,7 +89,7 @@ def create_k_least_graph(graph, paths):
     
     return k_least_graph
 
-def save_paths_to_file(paths, output_path, num_paths, runtime, mip_gap, objective_value, multigraph_decomposer=None):
+def save_paths_to_file(paths, output_path, num_paths, runtime, objective_value, multigraph_decomposer=None):
     """Save path information to a text file in the specified format."""
     # Calculate total flow through all paths
     total_flow = sum(paths['weights'])
@@ -97,8 +97,7 @@ def save_paths_to_file(paths, output_path, num_paths, runtime, mip_gap, objectiv
     with open(output_path, 'w') as f:
         f.write(f"Decomposition into {num_paths} paths\n")
         f.write(f"Runtime: {runtime:.2f} seconds\n")
-        f.write(f"MIP Gap: {mip_gap:.6f}\n")
-        f.write(f"Objective Value: {objective_value:.6f}\n")
+        f.write(f"Objective Value: {objective_value}\n")
         f.write(f"Number of Paths: {num_paths}\n")
         f.write("Paths and Weights:\n")
         
@@ -299,8 +298,8 @@ def generate_output_files(base_output_path, graph, max_paths, min_paths=1, visua
         
         # Get solver statistics
         runtime = time.time() - start_time
-        mip_gap = k_least.model.MIPGap if hasattr(k_least, 'model') else 1.0
-        objective_value = k_least.model.ObjVal if hasattr(k_least, 'model') else 0.0
+        #mip_gap = k_least.model.MIPGap #if hasattr(k_least, 'model') else 1.0
+        objective_value = k_least.get_objective_value
 
 
         if visualize:
@@ -318,7 +317,6 @@ def generate_output_files(base_output_path, graph, max_paths, min_paths=1, visua
             output_path, 
             num_paths,
             runtime,
-            mip_gap,
             objective_value,
             multigraph_decomposer=decomposer
         )

From bd109ac683c96016a218a2fa9080400fff994d9f Mon Sep 17 00:00:00 2001
From: joserod0704 <jose1.rodriguez@umconnect.umt.edu>
Date: Fri, 20 Jun 2025 15:53:47 -0600
Subject: [PATCH 11/24] Fixed data type for objective value in kleasterrors.py

---
 libs/decompose/kleast_errors.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/libs/decompose/kleast_errors.py b/libs/decompose/kleast_errors.py
index b047d08b..c19d22de 100755
--- a/libs/decompose/kleast_errors.py
+++ b/libs/decompose/kleast_errors.py
@@ -306,12 +306,15 @@ def generate_output_files(base_output_path, graph, max_paths, min_paths=1, visua
         k_least = fp.kLeastAbsErrors(G=graph, k=num_paths, flow_attr='flow', elements_to_ignore=edges_to_ignore)
         k_least.solve()
         paths = k_least.get_solution(remove_empty_paths=True)
-        
+
+  
 
         # Get solver statistics
         runtime = time.time() - start_time
         #mip_gap = k_least.model.MIPGap #if hasattr(k_least, 'model') else 1.0
-        objective_value = k_least.get_objective_value
+        objective_value = k_least.get_objective_value()
+
+        print(f'objective: {type(objective_value)}')
 
 
         if visualize:

From 7ce8be8bd371482f8dcedc79033a1f94e57cee81 Mon Sep 17 00:00:00 2001
From: Mikhail <mckaylbing@gmail.com>
Date: Mon, 23 Jun 2025 11:36:29 -0600
Subject: [PATCH 12/24] Change permissions

---
 libs/output_scraper/src/main.rs | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 libs/output_scraper/src/main.rs

diff --git a/libs/output_scraper/src/main.rs b/libs/output_scraper/src/main.rs
old mode 100644
new mode 100755

From 6002937e98c496467e6d5a453b9d0f4ef3830e22 Mon Sep 17 00:00:00 2001
From: Mikhail <mckaylbing@gmail.com>
Date: Thu, 26 Jun 2025 09:24:18 -0600
Subject: [PATCH 13/24] Edit output scraper to count nodes and edges

---
 libs/output_scraper/src/main.rs | 192 +++++++++++++++++++++++++-------
 1 file changed, 149 insertions(+), 43 deletions(-)

diff --git a/libs/output_scraper/src/main.rs b/libs/output_scraper/src/main.rs
index 3aeb4990..aede0bc3 100755
--- a/libs/output_scraper/src/main.rs
+++ b/libs/output_scraper/src/main.rs
@@ -1,7 +1,7 @@
 use std::path::{Path, PathBuf};
 use std::fs::{self, File};
 use std::io::{BufRead, BufReader};
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 use csv::Writer;
 
 #[derive(Debug)]
@@ -20,6 +20,10 @@ struct AlignmentStats {
     end_position: usize,
     runtime: f64,
     objective_value: f64,
+    nodes: usize,
+    edges: usize,
+    sources: usize,
+    sinks: usize,
 }
 
 #[derive(Debug, Clone)]
@@ -28,6 +32,14 @@ struct DecompStats {
     objective_value: f64,
 }
 
+#[derive(Debug, Default)]
+struct GraphData {
+    nodes: HashSet<usize>,
+    edges: usize,
+    sources: usize,
+    sinks: usize,
+}
+
 fn main() -> std::io::Result<()> {
     let args: Vec<String> = std::env::args().collect();
     if args.len() != 3 {
@@ -37,12 +49,22 @@ fn main() -> std::io::Result<()> {
 
     let input_dir = Path::new(&args[1]);
     let decomp_dir = input_dir.join("../decomp_results");
+    let graphs_dir = input_dir.join("../graphs");
     let output_path = Path::new(&args[2]);
     let mut results = Vec::new();
 
+    println!("Starting processing with:");
+    println!("- Input directory: {}", input_dir.display());
+    println!("- Decomp directory: {}", decomp_dir.display());
+    println!("- Graphs directory: {}", graphs_dir.display());
+    println!("- Output CSV: {}", output_path.display());
+
+    println!("\nBuilding decomp stats map...");
     let decomp_stats_map = build_decomp_stats_map(&decomp_dir)?;
+    println!("Found {} decomp results", decomp_stats_map.len());
 
-    // Process each sample dir //
+    // Process each sample directory //
+    println!("\nProcessing sample directories...");
     for sample_entry in fs::read_dir(input_dir)? {
         let sample_entry = sample_entry?;
         let sample_path = sample_entry.path();
@@ -53,7 +75,10 @@ fn main() -> std::io::Result<()> {
                 .to_string_lossy()
                 .to_string();
 
-            // Process each subgraph dir //
+            println!("\nProcessing sample: {}", sample_name);
+
+            // Process each subgraph directory //
+            println!("Processing subgraph directories...");
             for subgraph_entry in fs::read_dir(&sample_path)? {
                 let subgraph_entry = subgraph_entry?;
                 let subgraph_path = subgraph_entry.path();
@@ -62,7 +87,9 @@ fn main() -> std::io::Result<()> {
                     if let Some(dir_name) = subgraph_path.file_name() {
                         let subgraph_name = dir_name.to_string_lossy().to_string();
                         if subgraph_name.starts_with("subgraph_") {
-                            if let Some(mut stats_vec) = process_subgraph_dir(&subgraph_path, &sample_name, &subgraph_name)? {
+                            println!("  Processing subgraph: {}", subgraph_name);
+                            if let Some(mut stats_vec) = process_subgraph_dir(&subgraph_path, &sample_name, &subgraph_name, &graphs_dir)? {
+                                println!("    Found {} alignment files", stats_vec.len());
                                 add_decomp_stats(&decomp_stats_map, &mut stats_vec);
                                 results.extend(stats_vec);
                             }
@@ -71,8 +98,9 @@ fn main() -> std::io::Result<()> {
                 }
             }
             
-            // Also check for files //
-            if let Some(mut stats_vec) = process_files_in_dir(&sample_path, &sample_name, "root")? {
+            println!("Checking for root-level alignment files...");
+            if let Some(mut stats_vec) = process_files_in_dir(&sample_path, &sample_name, "root", &graphs_dir)? {
+                println!("  Found {} root-level alignment files", stats_vec.len());
                 add_decomp_stats(&decomp_stats_map, &mut stats_vec);
                 results.extend(stats_vec);
             }
@@ -87,19 +115,117 @@ fn main() -> std::io::Result<()> {
             .then(a.part_number.cmp(&b.part_number))
     });
 
-    // Write csv //
+    // Write CSV //
+    println!("\nWriting output to {}...", output_path.display());
     write_csv_output(output_path, &results)?;
 
-    println!("Successfully processed {} alignment files, output written to {}", 
+    println!("\nSuccessfully processed {} alignment files, output written to {}", 
         results.len(), 
         output_path.display());
 
     Ok(())
 }
 
+fn parse_graph_file(file_path: &Path) -> std::io::Result<GraphData> {
+    let file = File::open(file_path)?;
+    let reader = BufReader::new(file);
+    let mut graph_data = GraphData::default();
+
+    let mut lines = reader.lines().skip(1);
+
+    while let Some(Ok(line)) = lines.next() {
+        let parts: Vec<&str> = line.split_whitespace().collect();
+        if parts.len() >= 2 {
+            if let (Ok(from_node), Ok(to_node)) = (parts[0].parse::<usize>(), parts[1].parse::<usize>()) {
+                graph_data.nodes.insert(from_node);
+                graph_data.nodes.insert(to_node);
+                graph_data.edges += 1;
+                
+                // Count sources (edges from node 0) //
+                if from_node == 0 {
+                    graph_data.sources += 1;
+                }
+                // Count sinks (edges to node 1) //
+                if to_node == 1 {
+                    graph_data.sinks += 1;
+                }
+            }
+        }
+    }
+
+    Ok(graph_data)
+}
+
+fn process_subgraph_dir(dir: &Path, sample_name: &str, subgraph_name: &str, graphs_dir: &Path) -> std::io::Result<Option<Vec<AlignmentStats>>> {
+    process_files_in_dir(dir, sample_name, subgraph_name, graphs_dir)
+}
+
+fn process_files_in_dir(dir: &Path, sample_name: &str, subgraph_name: &str, graphs_dir: &Path) -> std::io::Result<Option<Vec<AlignmentStats>>> {
+    let mut stats_vec = Vec::new();
+    
+    println!("    Scanning directory: {}", dir.display());
+    
+    for entry in fs::read_dir(dir)? {
+        let entry = entry?;
+        let path = entry.path();
+        
+        if path.is_file() {
+            if let Some(file_name) = path.file_name() {
+                let file_name = file_name.to_string_lossy();
+                if file_name.ends_with("_vs_ref.txt") {
+                    println!("      Found alignment file: {}", file_name);
+                    let part_numbers = extract_part_numbers(&file_name);
+                    
+                    let mut stats = parse_alignment_file(
+                        &path, 
+                        sample_name.to_string(),
+                        subgraph_name.to_string(),
+                        part_numbers
+                    )?;
+                    
+                    // Find and parse graph file
+                    let subgraph_num = subgraph_name.trim_start_matches("subgraph_");
+                    let graph_file_path = graphs_dir.join(sample_name)
+                        .join("out.dbg_subgraphs")
+                        .join(format!("graph_{}_compressed.dbg", subgraph_num));
+                    
+                    if graph_file_path.exists() {
+                        println!("        Parsing graph file: {}", graph_file_path.display());
+                        let graph_data = parse_graph_file(&graph_file_path)?;
+                        stats.nodes = graph_data.nodes.len();
+                        stats.edges = graph_data.edges;
+                        stats.sources = graph_data.sources;
+                        stats.sinks = graph_data.sinks;
+                        
+                        println!("          Nodes: {}, Edges: {}, Sources (from 0): {}, Sinks (to 1): {}", 
+                            stats.nodes, stats.edges, stats.sources, stats.sinks);
+                    } else {
+                        println!("        Graph file not found: {}", graph_file_path.display());
+                    }
+                    
+                    println!("        Part {}/{}: length={}, identity={:.1}%, gaps={:.1}%", 
+                        stats.part_number, stats.total_parts, stats.length, 
+                        stats.identity_pct, stats.gaps_pct);
+                    
+                    stats_vec.push(stats);
+                }
+            }
+        }
+    }
+    
+    if stats_vec.is_empty() {
+        println!("      No alignment files found");
+        Ok(None)
+    } else {
+        Ok(Some(stats_vec))
+    }
+}
+
 fn build_decomp_stats_map(decomp_dir: &Path) -> std::io::Result<HashMap<(String, String), DecompStats>> {
     let mut map = HashMap::new();
     
+    println!("Scanning decomp directory: {}", decomp_dir.display());
+    
     for entry in fs::read_dir(decomp_dir)? {
         let entry = entry?;
         let path = entry.path();
@@ -107,8 +233,11 @@ fn build_decomp_stats_map(decomp_dir: &Path) -> std::io::Result<HashMap<(String,
         if let Some(file_name) = path.file_name() {
             let file_name = file_name.to_string_lossy();
             if file_name.ends_with(".paths") {
+                println!("  Found decomp file: {}", file_name);
                 if let Some((sample_name, subgraph_name)) = parse_decomp_filename(&file_name) {
+                    println!("    Sample: {}, Subgraph: {}", sample_name, subgraph_name);
                     if let Some(stats) = parse_decomp_file(&path)? {
+                        println!("    Runtime: {:.4}s, Objective: {:.6}", stats.runtime, stats.objective_value);
                         map.insert((sample_name, subgraph_name), stats);
                     }
                 }
@@ -170,41 +299,6 @@ fn parse_decomp_file(file_path: &Path) -> std::io::Result<Option<DecompStats>> {
     }
 }
 
-fn process_subgraph_dir(dir: &Path, sample_name: &str, subgraph_name: &str) -> std::io::Result<Option<Vec<AlignmentStats>>> {
-    process_files_in_dir(dir, sample_name, subgraph_name)
-}
-
-fn process_files_in_dir(dir: &Path, sample_name: &str, subgraph_name: &str) -> std::io::Result<Option<Vec<AlignmentStats>>> {
-    let mut stats_vec = Vec::new();
-    
-    for entry in fs::read_dir(dir)? {
-        let entry = entry?;
-        let path = entry.path();
-        
-        if path.is_file() {
-            if let Some(file_name) = path.file_name() {
-                let file_name = file_name.to_string_lossy();
-                if file_name.ends_with("_vs_ref.txt") {
-                    let part_numbers = extract_part_numbers(&file_name);
-                    
-                    stats_vec.push(parse_alignment_file(
-                        &path, 
-                        sample_name.to_string(),
-                        subgraph_name.to_string(),
-                        part_numbers
-                    )?);
-                }
-            }
-        }
-    }
-    
-    if stats_vec.is_empty() {
-        Ok(None)
-    } else {
-        Ok(Some(stats_vec))
-    }
-}
-
 fn extract_part_numbers(filename: &str) -> (usize, usize) {
     let parts: Vec<&str> = filename.split('_').collect();
     for i in 0..parts.len() {
@@ -244,6 +338,10 @@ fn parse_alignment_file(
         end_position: 0,
         runtime: 0.0,
         objective_value: 0.0,
+        nodes: 0,
+        edges: 0,
+        sources: 0,
+        sinks: 0,
     };
 
     for line in reader.lines() {
@@ -311,6 +409,10 @@ fn write_csv_output(output_path: &Path, results: &[AlignmentStats]) -> std::io::
         "Alignment Length",
         "Runtime (s)",
         "Objective Value",
+        "Nodes",
+        "Edges",
+        "Sources (from 0)",
+        "Sinks (to 1)",
     ])?;
 
     for stats in results {
@@ -331,6 +433,10 @@ fn write_csv_output(output_path: &Path, results: &[AlignmentStats]) -> std::io::
             &alignment_length.to_string(),
             &format!("{:.4}", stats.runtime),
             &format!("{:.6}", stats.objective_value),
+            &stats.nodes.to_string(),
+            &stats.edges.to_string(),
+            &stats.sources.to_string(),
+            &stats.sinks.to_string(),
         ])?;
     }
 

From caf34694c0e27f3c1ae85afc25b91e9da37c5c1a Mon Sep 17 00:00:00 2001
From: Mikhail <mckaylbing@gmail.com>
Date: Thu, 26 Jun 2025 09:43:58 -0600
Subject: [PATCH 14/24] Change which file the scraper is looking at

---
 libs/output_scraper/src/main.rs | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/libs/output_scraper/src/main.rs b/libs/output_scraper/src/main.rs
index aede0bc3..2c4f38d0 100755
--- a/libs/output_scraper/src/main.rs
+++ b/libs/output_scraper/src/main.rs
@@ -59,11 +59,12 @@ fn main() -> std::io::Result<()> {
     println!("- Graphs directory: {}", graphs_dir.display());
     println!("- Output CSV: {}", output_path.display());
 
+    // First collect all decomp stats in a lookup table
     println!("\nBuilding decomp stats map...");
     let decomp_stats_map = build_decomp_stats_map(&decomp_dir)?;
     println!("Found {} decomp results", decomp_stats_map.len());
 
-    // Process each sample directory //
+    // Process each sample directory
     println!("\nProcessing sample directories...");
     for sample_entry in fs::read_dir(input_dir)? {
         let sample_entry = sample_entry?;
@@ -77,7 +78,7 @@ fn main() -> std::io::Result<()> {
 
             println!("\nProcessing sample: {}", sample_name);
 
-            // Process each subgraph directory //
+            // Process each subgraph directory in the sample directory
             println!("Processing subgraph directories...");
             for subgraph_entry in fs::read_dir(&sample_path)? {
                 let subgraph_entry = subgraph_entry?;
@@ -98,6 +99,7 @@ fn main() -> std::io::Result<()> {
                 }
             }
             
+            // Also check for files directly in the sample directory
             println!("Checking for root-level alignment files...");
             if let Some(mut stats_vec) = process_files_in_dir(&sample_path, &sample_name, "root", &graphs_dir)? {
                 println!("  Found {} root-level alignment files", stats_vec.len());
@@ -107,7 +109,7 @@ fn main() -> std::io::Result<()> {
         }
     }
 
-    // Sort results by sample, then subgraph, then total parts, then part number //
+    // Sort results by sample, then subgraph, then total parts, then part number
     results.sort_by(|a, b| {
         a.sample_name.cmp(&b.sample_name)
             .then(a.subgraph_name.cmp(&b.subgraph_name))
@@ -115,7 +117,7 @@ fn main() -> std::io::Result<()> {
             .then(a.part_number.cmp(&b.part_number))
     });
 
-    // Write CSV //
+    // Write CSV output
     println!("\nWriting output to {}...", output_path.display());
     write_csv_output(output_path, &results)?;
 
@@ -131,6 +133,7 @@ fn parse_graph_file(file_path: &Path) -> std::io::Result<GraphData> {
     let reader = BufReader::new(file);
     let mut graph_data = GraphData::default();
 
+    // Skip header line
     let mut lines = reader.lines().skip(1);
 
     while let Some(Ok(line)) = lines.next() {
@@ -141,11 +144,11 @@ fn parse_graph_file(file_path: &Path) -> std::io::Result<GraphData> {
                 graph_data.nodes.insert(to_node);
                 graph_data.edges += 1;
                 
-                // Count sources (edges from node 0) //
+                // Count sources (edges from node 0)
                 if from_node == 0 {
                     graph_data.sources += 1;
                 }
-                // Count sinks (edges to node 1) //
+                // Count sinks (edges to node 1)
                 if to_node == 1 {
                     graph_data.sinks += 1;
                 }
@@ -183,11 +186,9 @@ fn process_files_in_dir(dir: &Path, sample_name: &str, subgraph_name: &str, grap
                         part_numbers
                     )?;
                     
-                    // Find and parse graph file
+                    // Find and parse graph file in the new format: <sample>.super_<num>.dbg
                     let subgraph_num = subgraph_name.trim_start_matches("subgraph_");
-                    let graph_file_path = graphs_dir.join(sample_name)
-                        .join("out.dbg_subgraphs")
-                        .join(format!("graph_{}_compressed.dbg", subgraph_num));
+                    let graph_file_path = graphs_dir.join(format!("{}.super_{}.dbg", sample_name, subgraph_num));
                     
                     if graph_file_path.exists() {
                         println!("        Parsing graph file: {}", graph_file_path.display());
@@ -221,6 +222,11 @@ fn process_files_in_dir(dir: &Path, sample_name: &str, subgraph_name: &str, grap
     }
 }
 
+// [Rest of the functions remain exactly the same as in previous implementation...]
+// [build_decomp_stats_map, parse_decomp_filename, add_decomp_stats, parse_decomp_file]
+// [extract_part_numbers, parse_alignment_file, parse_percentage, parse_count]
+// [write_csv_output]
+
 fn build_decomp_stats_map(decomp_dir: &Path) -> std::io::Result<HashMap<(String, String), DecompStats>> {
     let mut map = HashMap::new();
     

From f3be2c94573e9257bac8b68e08af31d88af5f021 Mon Sep 17 00:00:00 2001
From: joserod0704 <jose1.rodriguez@umconnect.umt.edu>
Date: Fri, 27 Jun 2025 09:49:18 -0600
Subject: [PATCH 15/24] Cleaned up output in pipeline

---
 findviralstrains.smk                   |  2 +-
 libs/FmAssemblyGraph                   |  2 +-
 libs/compress/compress.py              |  7 -------
 libs/decompose/kleast_errors.py        | 12 +++---------
 libs/rebuild/rebuild.py                |  8 --------
 libs/super_source_and_sink/src/main.rs |  5 +----
 6 files changed, 6 insertions(+), 30 deletions(-)

diff --git a/findviralstrains.smk b/findviralstrains.smk
index 4eeac6a1..a8862bda 100755
--- a/findviralstrains.smk
+++ b/findviralstrains.smk
@@ -84,7 +84,7 @@ fastq_filenames = set(fastq_filenames) # Deletes duplicate file entrys by conver
 fastq_filenames = list(fastq_filenames)
 
 fastq_filenames = [entry for entry in fastq_filenames if entry != ""] # Remake list with only populated values #
-print(fastq_filenames)
+
 
 ######################
 ## HELPER FUNCTIONS ##
diff --git a/libs/FmAssemblyGraph b/libs/FmAssemblyGraph
index 120c78ab..affc5bc3 160000
--- a/libs/FmAssemblyGraph
+++ b/libs/FmAssemblyGraph
@@ -1 +1 @@
-Subproject commit 120c78abcc7d6363d69b46f8ca7de71c9a28668c
+Subproject commit affc5bc31d3fc815b0898e91e32a210e54764e6a
diff --git a/libs/compress/compress.py b/libs/compress/compress.py
index cd719268..6196344e 100755
--- a/libs/compress/compress.py
+++ b/libs/compress/compress.py
@@ -128,9 +128,6 @@ def merge_nodes(forward_edges, reverse_edges, edge_seqs, kmer_length):
     # Find initial merge candidates
     candidates = find_merge_candidates(forward_edges, reverse_edges)
 
-    print(f"Initial candidates: {len(candidates)}")
-    
-        
     for node in candidates:
 
         # Get the source and target nodes
@@ -233,7 +230,6 @@ def main():
     forward_edges, reverse_edges, edge_seqs, kmer_length = read_graph(input_file)
 
     # merge the nodes
-    print("Merging nodes...")
     merge_nodes(forward_edges, reverse_edges, edge_seqs, kmer_length)
     
 
@@ -242,10 +238,7 @@ def main():
   
 
     #
-    print("Writing merged graph...")
     write_merged_graph(output_file, forward_edges)
     
-    print(f"Merged graph written to {output_file}")
-
 if __name__ == "__main__":
     main()
\ No newline at end of file
diff --git a/libs/decompose/kleast_errors.py b/libs/decompose/kleast_errors.py
index c19d22de..aef074dd 100755
--- a/libs/decompose/kleast_errors.py
+++ b/libs/decompose/kleast_errors.py
@@ -119,7 +119,7 @@ def save_paths_to_file(paths, output_path, num_paths, runtime, objective_value,
             
             f.write(f"{path_weight:.6f} {path_str}\n")
     
-    print(f"INFO: Path details saved to {output_path}")
+
 
 
 def draw_labeled_multigraph(G, attr_name, ax=None, decimal_places=2, paths=None):
@@ -267,7 +267,7 @@ def visualize_and_save_graph(graph, output_path, num_paths, base_size=10, paths
     
     visualization_file = f"{output_path}_visualization.pdf"
     plt.savefig(visualization_file, dpi=300, bbox_inches='tight')
-    print(f"INFO: Visualization saved to {visualization_file}")
+
 
 
 def get_all_edges_for_node(graph, node):
@@ -314,9 +314,6 @@ def generate_output_files(base_output_path, graph, max_paths, min_paths=1, visua
         #mip_gap = k_least.model.MIPGap #if hasattr(k_least, 'model') else 1.0
         objective_value = k_least.get_objective_value()
 
-        print(f'objective: {type(objective_value)}')
-
-
         if visualize:
             # Visualize the graph
             visualize_and_save_graph(graph, output_path, num_paths, paths = paths)
@@ -345,8 +342,5 @@ def generate_output_files(base_output_path, graph, max_paths, min_paths=1, visua
     # Read the input graph
     graph = read_graph_to_networkx(args.input, min_edge_weight=args.mincount)
 
-
     # Generate output files for all path counts from max_paths down to 1
-    generate_output_files(args.output, graph, args.maxpaths, args.minpaths, visualize=args.visualize)
-
-    print("INFO: Processing completed.")
\ No newline at end of file
+    generate_output_files(args.output, graph, args.maxpaths, args.minpaths, visualize=args.visualize)
\ No newline at end of file
diff --git a/libs/rebuild/rebuild.py b/libs/rebuild/rebuild.py
index b775193d..afcb5ebe 100755
--- a/libs/rebuild/rebuild.py
+++ b/libs/rebuild/rebuild.py
@@ -72,7 +72,6 @@ def main(path_file, edge_file, bd_outfile):
                     print(f"Skipping path - not enough nodes: {nodes}")
                     continue
 
-                print(f"\nProcessing path {counter} of {total_paths}:")
                 genome = ""
                 is_first_node = True
 
@@ -83,16 +82,13 @@ def main(path_file, edge_file, bd_outfile):
                     
                     # Check if this is a special source/sink edge
                     if (from_node, to_node) in special_edges:
-                        print(f"Edge {from_node}->{to_node}: special source/sink edge - no sequence added")
                         continue
                     
                     # Try forward direction first
                     if to_node in sequences.get(from_node, {}):
                         sequence = sequences[from_node][to_node]
-                        print(f"Edge {from_node}->{to_node}: found sequence (length {len(sequence)})")
                         if not is_first_node and len(sequence) > 27:
                             sequence = sequence[27:]
-                            print(f"  Trimmed to {len(sequence)} bases")
                         genome += sequence
                     else:
                         # Try reverse complement
@@ -100,13 +96,10 @@ def main(path_file, edge_file, bd_outfile):
                         rev_to = from_node
                         if rev_to in sequences.get(rev_from, {}):
                             sequence = reverse_complement(sequences[rev_from][rev_to])
-                            print(f"Edge {from_node}->{to_node}: found reverse complement (length {len(sequence)})")
                             if not is_first_node and len(sequence) > 27:
                                 sequence = sequence[27:]
-                                print(f"  Trimmed to {len(sequence)} bases")
                             genome += sequence
                         else:
-                            print(f"WARNING: Edge {from_node}->{to_node} not found in either direction")
                             # Add gap of Ns proportional to expected length
                             gap_size = 100 if (from_node == '0' or to_node == '1') else 30
                             genome += "N" * gap_size
@@ -117,7 +110,6 @@ def main(path_file, edge_file, bd_outfile):
                 output_file = f"{bd_outfile.rsplit('.', 1)[0]}_{counter}_of_{total_paths}.fasta"
                 with open(output_file, 'w') as out_f:
                     out_f.write(f">Weight: {weight}\n{genome}\n")
-                print(f"Generated {output_file} with {len(genome)} bases")
                 
                 counter += 1
 
diff --git a/libs/super_source_and_sink/src/main.rs b/libs/super_source_and_sink/src/main.rs
index 4d3e914f..81adbbd5 100755
--- a/libs/super_source_and_sink/src/main.rs
+++ b/libs/super_source_and_sink/src/main.rs
@@ -140,8 +140,5 @@ fn main() {
     )
     .expect("unable to create super sources and sinks");
 
-    println!(
-        "New nodes and edges with weights written to: {}",
-        output_file_path.display()
-    );
+
 }
\ No newline at end of file

From f8ed7bedd0a63164bfb1231839c7e9ba17340efd Mon Sep 17 00:00:00 2001
From: joserod0704 <jose1.rodriguez@umconnect.umt.edu>
Date: Fri, 27 Jun 2025 11:58:20 -0600
Subject: [PATCH 16/24] Added time limit arg to kleast_errors.py for solver

---
 libs/decompose/kleast_errors.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libs/decompose/kleast_errors.py b/libs/decompose/kleast_errors.py
index aef074dd..6e372740 100755
--- a/libs/decompose/kleast_errors.py
+++ b/libs/decompose/kleast_errors.py
@@ -287,7 +287,7 @@ def get_all_edges_for_node(graph, node):
     return edges
 
 
-def generate_output_files(base_output_path, graph, max_paths, min_paths=1, visualize=False):
+def generate_output_files(base_output_path, graph, time_limit, max_paths, min_paths=1, visualize=False):
     """Generate output files for all path counts from max_paths down to min_paths."""
     # Extract the base filename without extension
     base_name = os.path.splitext(base_output_path)[0]
@@ -303,7 +303,7 @@ def generate_output_files(base_output_path, graph, max_paths, min_paths=1, visua
         edges_to_ignore = get_all_edges_for_node(graph, "0") + get_all_edges_for_node(graph, "1")
     
         # Perform k-least errors analysis for current number of paths
-        k_least = fp.kLeastAbsErrors(G=graph, k=num_paths, flow_attr='flow', elements_to_ignore=edges_to_ignore)
+        k_least = fp.kLeastAbsErrors(G=graph, k=num_paths, flow_attr='flow', elements_to_ignore=edges_to_ignore, time_limit = time_limit)
         k_least.solve()
         paths = k_least.get_solution(remove_empty_paths=True)
 
@@ -343,4 +343,4 @@ def generate_output_files(base_output_path, graph, max_paths, min_paths=1, visua
     graph = read_graph_to_networkx(args.input, min_edge_weight=args.mincount)
 
     # Generate output files for all path counts from max_paths down to 1
-    generate_output_files(args.output, graph, args.maxpaths, args.minpaths, visualize=args.visualize)
\ No newline at end of file
+    generate_output_files(args.output, graph, args.timelimit, args.maxpaths, args.minpaths, visualize=args.visualize)
\ No newline at end of file

From 8b9f572a294924bf83d5d837d65dcf12811fb91b Mon Sep 17 00:00:00 2001
From: joserod0704 <jose1.rodriguez@umconnect.umt.edu>
Date: Fri, 27 Jun 2025 14:10:48 -0600
Subject: [PATCH 17/24] Added threads to be passed to flowpaths in
 kleast_errors.py

---
 libs/decompose/kleast_errors.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libs/decompose/kleast_errors.py b/libs/decompose/kleast_errors.py
index 6e372740..3974618a 100755
--- a/libs/decompose/kleast_errors.py
+++ b/libs/decompose/kleast_errors.py
@@ -287,7 +287,7 @@ def get_all_edges_for_node(graph, node):
     return edges
 
 
-def generate_output_files(base_output_path, graph, time_limit, max_paths, min_paths=1, visualize=False):
+def generate_output_files(base_output_path, graph, time_limit, threads,  max_paths, min_paths=1, visualize=False):
     """Generate output files for all path counts from max_paths down to min_paths."""
     # Extract the base filename without extension
     base_name = os.path.splitext(base_output_path)[0]
@@ -303,7 +303,7 @@ def generate_output_files(base_output_path, graph, time_limit, max_paths, min_pa
         edges_to_ignore = get_all_edges_for_node(graph, "0") + get_all_edges_for_node(graph, "1")
     
         # Perform k-least errors analysis for current number of paths
-        k_least = fp.kLeastAbsErrors(G=graph, k=num_paths, flow_attr='flow', elements_to_ignore=edges_to_ignore, time_limit = time_limit)
+        k_least = fp.kLeastAbsErrors(G=graph, k=num_paths, flow_attr='flow', elements_to_ignore=edges_to_ignore, time_limit = time_limit, threads = threads)
         k_least.solve()
         paths = k_least.get_solution(remove_empty_paths=True)
 
@@ -343,4 +343,4 @@ def generate_output_files(base_output_path, graph, time_limit, max_paths, min_pa
     graph = read_graph_to_networkx(args.input, min_edge_weight=args.mincount)
 
     # Generate output files for all path counts from max_paths down to 1
-    generate_output_files(args.output, graph, args.timelimit, args.maxpaths, args.minpaths, visualize=args.visualize)
\ No newline at end of file
+    generate_output_files(args.output, graph, args.timelimit, args.threads, args.maxpaths, args.minpaths, visualize=args.visualize)
\ No newline at end of file

From 82f090292a8af846dad6c51e64ab8b5a2cb102e1 Mon Sep 17 00:00:00 2001
From: Mikhail <mckaylbing@gmail.com>
Date: Wed, 2 Jul 2025 11:45:56 -0600
Subject: [PATCH 18/24] Change scraper to grab separate decomp files

---
 libs/output_scraper/src/main.rs | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/libs/output_scraper/src/main.rs b/libs/output_scraper/src/main.rs
index 2c4f38d0..28e00488 100755
--- a/libs/output_scraper/src/main.rs
+++ b/libs/output_scraper/src/main.rs
@@ -222,12 +222,7 @@ fn process_files_in_dir(dir: &Path, sample_name: &str, subgraph_name: &str, grap
     }
 }
 
-// [Rest of the functions remain exactly the same as in previous implementation...]
-// [build_decomp_stats_map, parse_decomp_filename, add_decomp_stats, parse_decomp_file]
-// [extract_part_numbers, parse_alignment_file, parse_percentage, parse_count]
-// [write_csv_output]
-
-fn build_decomp_stats_map(decomp_dir: &Path) -> std::io::Result<HashMap<(String, String), DecompStats>> {
+fn build_decomp_stats_map(decomp_dir: &Path) -> std::io::Result<HashMap<(String, String, usize), DecompStats>> {
     let mut map = HashMap::new();
     
     println!("Scanning decomp directory: {}", decomp_dir.display());
@@ -240,11 +235,11 @@ fn build_decomp_stats_map(decomp_dir: &Path) -> std::io::Result<HashMap<(String,
             let file_name = file_name.to_string_lossy();
             if file_name.ends_with(".paths") {
                 println!("  Found decomp file: {}", file_name);
-                if let Some((sample_name, subgraph_name)) = parse_decomp_filename(&file_name) {
-                    println!("    Sample: {}, Subgraph: {}", sample_name, subgraph_name);
+                if let Some((sample_name, subgraph_name, total_parts)) = parse_decomp_filename(&file_name) {
+                    println!("    Sample: {}, Subgraph: {}, Total Parts: {}", sample_name, subgraph_name, total_parts);
                     if let Some(stats) = parse_decomp_file(&path)? {
                         println!("    Runtime: {:.4}s, Objective: {:.6}", stats.runtime, stats.objective_value);
-                        map.insert((sample_name, subgraph_name), stats);
+                        map.insert((sample_name, subgraph_name, total_parts), stats);
                     }
                 }
             }
@@ -254,23 +249,34 @@ fn build_decomp_stats_map(decomp_dir: &Path) -> std::io::Result<HashMap<(String,
     Ok(map)
 }
 
-fn parse_decomp_filename(filename: &str) -> Option<(String, String)> {
+fn parse_decomp_filename(filename: &str) -> Option<(String, String, usize)> {
     let parts: Vec<&str> = filename.split('_').collect();
     if parts.len() >= 4 {
         let sample_end = parts.len() - 3;
         let sample_name = parts[..sample_end].join("_");
         let subgraph_name = format!("{}_{}", parts[sample_end], parts[sample_end + 1]);
-        return Some((sample_name, subgraph_name));
+        
+        // Extract total parts from filename (assuming format like "XXX_YYY_Z.paths")
+        let total_parts = parts.last()
+            .and_then(|s| s.split('.').next())
+            .and_then(|s| s.parse().ok())
+            .unwrap_or(1);
+            
+        return Some((sample_name, subgraph_name, total_parts));
     }
     None
 }
 
 fn add_decomp_stats(
-    decomp_stats_map: &HashMap<(String, String), DecompStats>,
+    decomp_stats_map: &HashMap<(String, String, usize), DecompStats>,
     stats_vec: &mut Vec<AlignmentStats>
 ) {
     for stat in stats_vec {
-        let key = (stat.sample_name.clone(), stat.subgraph_name.clone());
+        let key = (
+            stat.sample_name.clone(), 
+            stat.subgraph_name.clone(),
+            stat.total_parts
+        );
         if let Some(decomp_stats) = decomp_stats_map.get(&key) {
             stat.runtime = decomp_stats.runtime;
             stat.objective_value = decomp_stats.objective_value;

From b7d934714d14f5ca492861a927ddd4159e0d4e9a Mon Sep 17 00:00:00 2001
From: Mikhail <mckaylbing@gmail.com>
Date: Thu, 3 Jul 2025 10:44:42 -0600
Subject: [PATCH 19/24] Create alignment visualizer

---
 libs/alignment_vis/alignment_vis.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100755 libs/alignment_vis/alignment_vis.py

diff --git a/libs/alignment_vis/alignment_vis.py b/libs/alignment_vis/alignment_vis.py
new file mode 100755
index 00000000..e69de29b

From 96d5b95da680ecf21755ab20f39dfc66064692e5 Mon Sep 17 00:00:00 2001
From: Mikhail <mckaylbing@gmail.com>
Date: Thu, 3 Jul 2025 15:20:29 -0600
Subject: [PATCH 20/24] First draft out alignment vis

---
 libs/alignment_vis/alignment_vis.py | 149 ++++++++++++++++++++++++++++
 1 file changed, 149 insertions(+)

diff --git a/libs/alignment_vis/alignment_vis.py b/libs/alignment_vis/alignment_vis.py
index e69de29b..b7e17988 100755
--- a/libs/alignment_vis/alignment_vis.py
+++ b/libs/alignment_vis/alignment_vis.py
@@ -0,0 +1,149 @@
+import re
+import os
+import matplotlib.pyplot as plt
+from matplotlib.patches import Rectangle, Patch
+from matplotlib.collections import PatchCollection
+
+def parse_alignment_file(filename):
+    try:
+        with open(filename, 'r') as f:
+            content = f.read()
+
+        identity_match = re.search(r'Identity:\s+(\d+)/(\d+)\s+\(([\d.]+)%\)', content)
+        identity = float(identity_match.group(3)) if identity_match else 100.0
+
+        # Get aligned regions and positions
+        positions = []
+        matches = []
+        blocks = re.finditer(
+            r'NC_045512\.2\s+(\d+)\s+([ACGT]+).*?\n\s+([|.]+)\nWeight\s+\d+\s+([ACGT]+)',
+            content,
+            re.DOTALL
+        )
+
+        for block in blocks:
+            start = int(block.group(1))
+            ref_seq = block.group(2)
+            match_str = block.group(3)
+            end = start + len(ref_seq) - 1
+            positions.append((start, end))
+            matches.append(match_str)
+
+        return positions, matches, identity
+    except Exception as e:
+        print(f"Error parsing {filename}: {str(e)}")
+        return [], [], 0
+
+def find_alignments(root_dir):
+    """Find all alignment files in subgraph directories"""
+    alignments = {}
+
+    # Walk through subgraph directories
+    for subgraph in os.listdir(root_dir):
+        if not subgraph.startswith('subgraph_'):
+            continue
+
+        subgraph_dir = os.path.join(root_dir, subgraph)
+        if not os.path.isdir(subgraph_dir):
+            continue
+
+        # Find all alignment files in this subgraph
+        for fname in os.listdir(subgraph_dir):
+            if not fname.endswith('_vs_ref.txt'):
+                continue
+
+            # Extract the X_of_Y pattern (e.g., "1_of_1")
+            parts = fname.split('_')
+            try:
+                x_of_y = f"{parts[-4]}_of_{parts[-2]}"
+            except IndexError:
+                continue
+
+            full_path = os.path.join(subgraph_dir, fname)
+            alignments.setdefault(x_of_y, []).append((full_path, subgraph))
+
+    return alignments
+
+def plot_alignment_group(group_name, files, genome_length=29903, output_dir="."):
+    """Plot one group of alignments (e.g., all 1_of_1 files)"""
+    if not files:
+        return
+
+    fig, ax = plt.subplots(figsize=(15, 2 + len(files) * 0.5))
+
+    # Gray background for full genome
+    ax.add_patch(Rectangle((0, 0), genome_length, len(files) + 1,
+                 color='lightgray', alpha=0.3))
+
+    # Plot each alignment in the group
+    for i, (file_path, subgraph) in enumerate(sorted(files), 1):
+        positions, matches, identity = parse_alignment_file(file_path)
+        if not positions:
+            continue
+
+        # Create colored segments
+        patches = []
+        for (start, end), match_str in zip(positions, matches):
+            for pos, char in zip(range(start, end + 1), match_str):
+                color = (0, 0.8, 0) if char == '|' else (0.8, 0, 0)  # Green or red
+                patches.append(Rectangle((pos, i - 0.4), 1, 0.8, color=color))
+
+        ax.add_collection(PatchCollection(patches, match_original=True))
+
+        # Add labels
+        fname = os.path.basename(file_path).replace('_vs_ref.txt', '')
+        ax.text(-1500, i, f"{subgraph}/{fname}", ha='right', va='center', fontsize=8)
+        ax.text(genome_length + 1500, i, f"{identity:.1f}%", ha='left', va='center', fontsize=8)
+
+    # Add genome scale markers
+    for x in range(0, genome_length + 1, 5000):
+        ax.axvline(x, color='gray', linestyle=':', alpha=0.5)
+        if x > 0:
+            ax.text(x, 0.2, f"{x//1000}kb", ha='center', fontsize=8)
+
+    ax.set_xlim(-2000, genome_length + 2000)
+    ax.set_ylim(0, len(files) + 1)
+    ax.set_yticks([])
+    ax.set_xlabel("Genomic Position (bp)")
+    ax.set_title(f"Alignment Group: {group_name.replace('_', ' ')}")
+
+    plt.tight_layout()
+    output_path = os.path.join(output_dir, f"{group_name}_alignment.pdf")
+    plt.savefig(output_path, dpi=300, bbox_inches='tight')
+    plt.close()
+    print(f"Saved: {output_path}")
+
+def main():
+    import sys
+    if len(sys.argv) < 2:
+        print("Usage: python alignment_vis.py <input_dir> [output_dir]")
+        print("Example: python alignment_vis.py path/to/E1250_S84_L001/")
+        return
+
+    input_dir = sys.argv[1].rstrip('/')
+    output_dir = sys.argv[2] if len(sys.argv) > 2 else "alignment_plots"
+
+    if not os.path.exists(input_dir):
+        print(f"Error: Directory not found - {input_dir}")
+        return
+
+    # Create output directory
+    os.makedirs(output_dir, exist_ok=True)
+
+    # Find and group all alignment files
+    alignments = find_alignments(input_dir)
+    if not alignments:
+        print(f"No valid alignment files found in subgraph directories under {input_dir}")
+        print("Please verify that:")
+        print("1. The directory contains subgraph_* folders")
+        print("2. Those subgraphs contain files matching *_X_of_Y_vs_ref.txt")
+        return
+
+    # Process each group
+    for group_name, files in alignments.items():
+        plot_alignment_group(group_name, files, output_dir=output_dir)
+
+    print(f"\nAll plots saved to: {os.path.abspath(output_dir)}")
+
+if __name__ == "__main__":
+    main()

From a595a753e5a13640bfabe634007393125d1c7648 Mon Sep 17 00:00:00 2001
From: joserod0704 <jose1.rodriguez@umconnect.umt.edu>
Date: Tue, 8 Jul 2025 10:20:47 -0600
Subject: [PATCH 21/24] Prints out total weight of all edges in a graph

---
 libs/decompose/kleast_errors.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/libs/decompose/kleast_errors.py b/libs/decompose/kleast_errors.py
index 3974618a..f4e4554a 100755
--- a/libs/decompose/kleast_errors.py
+++ b/libs/decompose/kleast_errors.py
@@ -96,10 +96,14 @@ def save_paths_to_file(paths, output_path, num_paths, runtime, objective_value,
     """Save path information to a text file in the specified format."""
     # Calculate total flow through all paths
     total_flow = sum(paths['weights'])
+
+    # sum of all weights on all edges of original graph
+    total_weight_graph = sum(data['flow'] for u, v, data in graph.edges(data=True))
     
     with open(output_path, 'w') as f:
         f.write(f"Decomposition into {num_paths} paths\n")
         f.write(f"Runtime: {runtime:.2f} seconds\n")
+        f.write(f"Total Flow: {total_weight_graph}\n")
         f.write(f"Objective Value: {objective_value}\n")
         f.write(f"Number of Paths: {num_paths}\n")
         f.write("Paths and Weights:\n")
@@ -307,7 +311,9 @@ def generate_output_files(base_output_path, graph, time_limit, threads,  max_pat
         k_least.solve()
         paths = k_least.get_solution(remove_empty_paths=True)
 
-  
+        
+
+
 
         # Get solver statistics
         runtime = time.time() - start_time

From 2a9c59419e981793798cb06c52f85d33a4b97c97 Mon Sep 17 00:00:00 2001
From: joserod0704 <jose1.rodriguez@umconnect.umt.edu>
Date: Tue, 8 Jul 2025 14:41:40 -0600
Subject: [PATCH 22/24] added total flow for entire graph to the csv for stats

---
 libs/output_scraper/src/main.rs | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/libs/output_scraper/src/main.rs b/libs/output_scraper/src/main.rs
index 28e00488..285d9e53 100755
--- a/libs/output_scraper/src/main.rs
+++ b/libs/output_scraper/src/main.rs
@@ -24,12 +24,14 @@ struct AlignmentStats {
     edges: usize,
     sources: usize,
     sinks: usize,
+    total_flow: f64, 
 }
 
 #[derive(Debug, Clone)]
 struct DecompStats {
     runtime: f64,
     objective_value: f64,
+    total_flow: f64, 
 }
 
 #[derive(Debug, Default)]
@@ -117,6 +119,7 @@ fn main() -> std::io::Result<()> {
             .then(a.part_number.cmp(&b.part_number))
     });
 
+    
     // Write CSV output
     println!("\nWriting output to {}...", output_path.display());
     write_csv_output(output_path, &results)?;
@@ -128,6 +131,8 @@ fn main() -> std::io::Result<()> {
     Ok(())
 }
 
+
+
 fn parse_graph_file(file_path: &Path) -> std::io::Result<GraphData> {
     let file = File::open(file_path)?;
     let reader = BufReader::new(file);
@@ -236,9 +241,12 @@ fn build_decomp_stats_map(decomp_dir: &Path) -> std::io::Result<HashMap<(String,
             if file_name.ends_with(".paths") {
                 println!("  Found decomp file: {}", file_name);
                 if let Some((sample_name, subgraph_name, total_parts)) = parse_decomp_filename(&file_name) {
-                    println!("    Sample: {}, Subgraph: {}, Total Parts: {}", sample_name, subgraph_name, total_parts);
+                    println!("    Sample: {}, Subgraph: {}, Total Parts: {}", 
+                        sample_name, subgraph_name, total_parts);
+
                     if let Some(stats) = parse_decomp_file(&path)? {
-                        println!("    Runtime: {:.4}s, Objective: {:.6}", stats.runtime, stats.objective_value);
+                        println!("    Runtime: {:.4}s, Objective: {:.6}, Total Flow: {:.6}", 
+                            stats.runtime, stats.objective_value, stats.total_flow);
                         map.insert((sample_name, subgraph_name, total_parts), stats);
                     }
                 }
@@ -280,6 +288,7 @@ fn add_decomp_stats(
         if let Some(decomp_stats) = decomp_stats_map.get(&key) {
             stat.runtime = decomp_stats.runtime;
             stat.objective_value = decomp_stats.objective_value;
+            stat.total_flow = decomp_stats.total_flow;
         }
     }
 }
@@ -290,6 +299,7 @@ fn parse_decomp_file(file_path: &Path) -> std::io::Result<Option<DecompStats>> {
 
     let mut runtime = 0.0;
     let mut objective_value = 0.0;
+    let mut total_flow = 0.0; 
 
     for line in reader.lines() {
         let line = line?;
@@ -299,12 +309,16 @@ fn parse_decomp_file(file_path: &Path) -> std::io::Result<Option<DecompStats>> {
         } else if line.starts_with("Objective Value: ") {
             objective_value = line.split_whitespace().nth(2).and_then(|s| s.parse().ok()).unwrap_or(0.0);
         }
+        else if line.starts_with("Total Flow: ") {
+            total_flow = line.split_whitespace().nth(2).and_then(|s| s.parse().ok()).unwrap_or(0.0);
+        }
     }
 
-    if runtime > 0.0 || objective_value > 0.0 {
+    if runtime > 0.0 || objective_value > 0.0 || total_flow > 0.0 {
         Ok(Some(DecompStats {
             runtime,
             objective_value,
+            total_flow,
         }))
     } else {
         Ok(None)
@@ -354,6 +368,8 @@ fn parse_alignment_file(
         edges: 0,
         sources: 0,
         sinks: 0,
+        total_flow: 0.0,
+
     };
 
     for line in reader.lines() {
@@ -402,6 +418,9 @@ fn parse_count(s: &str) -> usize {
         .unwrap_or(0)
 }
 
+
+
+
 fn write_csv_output(output_path: &Path, results: &[AlignmentStats]) -> std::io::Result<()> {
     let mut writer = Writer::from_path(output_path)?;
 
@@ -425,6 +444,7 @@ fn write_csv_output(output_path: &Path, results: &[AlignmentStats]) -> std::io::
         "Edges",
         "Sources (from 0)",
         "Sinks (to 1)",
+        "Total Flow",
     ])?;
 
     for stats in results {
@@ -449,6 +469,7 @@ fn write_csv_output(output_path: &Path, results: &[AlignmentStats]) -> std::io::
             &stats.edges.to_string(),
             &stats.sources.to_string(),
             &stats.sinks.to_string(),
+            &format!("{:.6}", stats.total_flow),
         ])?;
     }
 

From c43de57e3ccaf4e606b1c1bb9750e67ed7eccefe Mon Sep 17 00:00:00 2001
From: joserod0704 <jose1.rodriguez@umconnect.umt.edu>
Date: Wed, 9 Jul 2025 10:11:48 -0600
Subject: [PATCH 23/24] Added column to csv file of explained flow after
 decomposition

---
 libs/output_scraper/src/main.rs | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/libs/output_scraper/src/main.rs b/libs/output_scraper/src/main.rs
index 285d9e53..d6ad6984 100755
--- a/libs/output_scraper/src/main.rs
+++ b/libs/output_scraper/src/main.rs
@@ -25,6 +25,7 @@ struct AlignmentStats {
     sources: usize,
     sinks: usize,
     total_flow: f64, 
+    explained_flow: f64, 
 }
 
 #[derive(Debug, Clone)]
@@ -32,6 +33,7 @@ struct DecompStats {
     runtime: f64,
     objective_value: f64,
     total_flow: f64, 
+    explained_flow: f64, 
 }
 
 #[derive(Debug, Default)]
@@ -245,8 +247,8 @@ fn build_decomp_stats_map(decomp_dir: &Path) -> std::io::Result<HashMap<(String,
                         sample_name, subgraph_name, total_parts);
 
                     if let Some(stats) = parse_decomp_file(&path)? {
-                        println!("    Runtime: {:.4}s, Objective: {:.6}, Total Flow: {:.6}", 
-                            stats.runtime, stats.objective_value, stats.total_flow);
+                        println!("    Runtime: {:.4}s, Objective: {:.6}, Total Flow: {:.6}, Explained Flow: {:.6}", 
+                            stats.runtime, stats.objective_value, stats.total_flow, stats.explained_flow);
                         map.insert((sample_name, subgraph_name, total_parts), stats);
                     }
                 }
@@ -289,17 +291,19 @@ fn add_decomp_stats(
             stat.runtime = decomp_stats.runtime;
             stat.objective_value = decomp_stats.objective_value;
             stat.total_flow = decomp_stats.total_flow;
+            stat.explained_flow = decomp_stats.explained_flow;
+
+          
         }
     }
 }
-
 fn parse_decomp_file(file_path: &Path) -> std::io::Result<Option<DecompStats>> {
     let file = File::open(file_path)?;
     let reader = BufReader::new(file);
 
     let mut runtime = 0.0;
     let mut objective_value = 0.0;
-    let mut total_flow = 0.0; 
+    let mut total_flow = 0.0;
 
     for line in reader.lines() {
         let line = line?;
@@ -315,16 +319,22 @@ fn parse_decomp_file(file_path: &Path) -> std::io::Result<Option<DecompStats>> {
     }
 
     if runtime > 0.0 || objective_value > 0.0 || total_flow > 0.0 {
+        let explained_flow = if total_flow > 0.0 {
+            (total_flow - objective_value) / total_flow
+        } else {
+            0.0
+        };
+        
         Ok(Some(DecompStats {
             runtime,
             objective_value,
             total_flow,
+            explained_flow,
         }))
     } else {
         Ok(None)
     }
 }
-
 fn extract_part_numbers(filename: &str) -> (usize, usize) {
     let parts: Vec<&str> = filename.split('_').collect();
     for i in 0..parts.len() {
@@ -369,6 +379,7 @@ fn parse_alignment_file(
         sources: 0,
         sinks: 0,
         total_flow: 0.0,
+        explained_flow: 0.0,    
 
     };
 
@@ -445,6 +456,7 @@ fn write_csv_output(output_path: &Path, results: &[AlignmentStats]) -> std::io::
         "Sources (from 0)",
         "Sinks (to 1)",
         "Total Flow",
+        "Explained Flow",
     ])?;
 
     for stats in results {
@@ -470,6 +482,7 @@ fn write_csv_output(output_path: &Path, results: &[AlignmentStats]) -> std::io::
             &stats.sources.to_string(),
             &stats.sinks.to_string(),
             &format!("{:.6}", stats.total_flow),
+            &format!("{:.6}", stats.explained_flow),
         ])?;
     }
 

From 62e60e9fefa727d866c33aa997b9a48cafab5f75 Mon Sep 17 00:00:00 2001
From: joserod0704 <jose1.rodriguez@umconnect.umt.edu>
Date: Thu, 17 Jul 2025 11:52:23 -0600
Subject: [PATCH 24/24] Scapes weight of paths for table of output:

---
 libs/output_scraper/src/main.rs | 53 ++++++++++++++++++++++++---------
 1 file changed, 39 insertions(+), 14 deletions(-)

diff --git a/libs/output_scraper/src/main.rs b/libs/output_scraper/src/main.rs
index d6ad6984..0968081d 100755
--- a/libs/output_scraper/src/main.rs
+++ b/libs/output_scraper/src/main.rs
@@ -26,6 +26,7 @@ struct AlignmentStats {
     sinks: usize,
     total_flow: f64, 
     explained_flow: f64, 
+    weight: f64
 }
 
 #[derive(Debug, Clone)]
@@ -34,6 +35,7 @@ struct DecompStats {
     objective_value: f64,
     total_flow: f64, 
     explained_flow: f64, 
+    weight: f64
 }
 
 #[derive(Debug, Default)]
@@ -121,7 +123,6 @@ fn main() -> std::io::Result<()> {
             .then(a.part_number.cmp(&b.part_number))
     });
 
-    
     // Write CSV output
     println!("\nWriting output to {}...", output_path.display());
     write_csv_output(output_path, &results)?;
@@ -247,15 +248,14 @@ fn build_decomp_stats_map(decomp_dir: &Path) -> std::io::Result<HashMap<(String,
                         sample_name, subgraph_name, total_parts);
 
                     if let Some(stats) = parse_decomp_file(&path)? {
-                        println!("    Runtime: {:.4}s, Objective: {:.6}, Total Flow: {:.6}, Explained Flow: {:.6}", 
-                            stats.runtime, stats.objective_value, stats.total_flow, stats.explained_flow);
+                        println!("    Runtime: {:.4}s, Objective: {:.6}, Total Flow: {:.6}, Explained Flow: {:.6}, Weight: {:.6}", 
+                            stats.runtime, stats.objective_value, stats.total_flow, stats.explained_flow, stats.weight);
                         map.insert((sample_name, subgraph_name, total_parts), stats);
                     }
                 }
             }
         }
     }
-    
     Ok(map)
 }
 
@@ -292,6 +292,7 @@ fn add_decomp_stats(
             stat.objective_value = decomp_stats.objective_value;
             stat.total_flow = decomp_stats.total_flow;
             stat.explained_flow = decomp_stats.explained_flow;
+            stat.weight = decomp_stats.weight;
 
           
         }
@@ -305,18 +306,35 @@ fn parse_decomp_file(file_path: &Path) -> std::io::Result<Option<DecompStats>> {
     let mut objective_value = 0.0;
     let mut total_flow = 0.0;
 
-    for line in reader.lines() {
-        let line = line?;
-        
-        if line.starts_with("Runtime: ") {
-            runtime = line.split_whitespace().nth(1).and_then(|s| s.parse().ok()).unwrap_or(0.0);
-        } else if line.starts_with("Objective Value: ") {
-            objective_value = line.split_whitespace().nth(2).and_then(|s| s.parse().ok()).unwrap_or(0.0);
-        }
-        else if line.starts_with("Total Flow: ") {
-            total_flow = line.split_whitespace().nth(2).and_then(|s| s.parse().ok()).unwrap_or(0.0);
+
+
+    let mut path_weights = Vec::new();
+    let mut parsing_paths = false;
+
+for line in reader.lines() {
+    let line = line?;
+
+    if line.starts_with("Runtime: ") {
+        runtime = line.split_whitespace().nth(1).and_then(|s| s.parse().ok()).unwrap_or(0.0);
+    } else if line.starts_with("Objective Value: ") {
+        objective_value = line.split_whitespace().nth(2).and_then(|s| s.parse().ok()).unwrap_or(0.0);
+    } else if line.starts_with("Total Flow: ") {
+        total_flow = line.split_whitespace().nth(2).and_then(|s| s.parse().ok()).unwrap_or(0.0);
+    } else if line.starts_with("Paths and Weights:") {
+        parsing_paths = true;
+    } else if parsing_paths {
+        if line.trim().is_empty() {
+            parsing_paths = false;
+        } else {
+            // weight is the first whitespace-separated field
+            if let Some(weight_str) = line.split_whitespace().next() {
+                if let Ok(weight) = weight_str.parse::<f64>() {
+                    path_weights.push(weight);
+                }
+            }
         }
     }
+}
 
     if runtime > 0.0 || objective_value > 0.0 || total_flow > 0.0 {
         let explained_flow = if total_flow > 0.0 {
@@ -325,11 +343,15 @@ fn parse_decomp_file(file_path: &Path) -> std::io::Result<Option<DecompStats>> {
             0.0
         };
         
+        let total_weight: f64 = path_weights.iter().sum();
+
+
         Ok(Some(DecompStats {
             runtime,
             objective_value,
             total_flow,
             explained_flow,
+            weight: total_weight, 
         }))
     } else {
         Ok(None)
@@ -380,6 +402,7 @@ fn parse_alignment_file(
         sinks: 0,
         total_flow: 0.0,
         explained_flow: 0.0,    
+        weight: 0.0
 
     };
 
@@ -457,6 +480,7 @@ fn write_csv_output(output_path: &Path, results: &[AlignmentStats]) -> std::io::
         "Sinks (to 1)",
         "Total Flow",
         "Explained Flow",
+        "Weight",
     ])?;
 
     for stats in results {
@@ -483,6 +507,7 @@ fn write_csv_output(output_path: &Path, results: &[AlignmentStats]) -> std::io::
             &stats.sinks.to_string(),
             &format!("{:.6}", stats.total_flow),
             &format!("{:.6}", stats.explained_flow),
+            &format!("{:.6}", stats.weight),
         ])?;
     }