From 01c4bb8731d8e9b7c01be436afc769d3026c7644 Mon Sep 17 00:00:00 2001 From: Stephen <519327+stevieing@users.noreply.github.com> Date: Tue, 20 Aug 2024 14:58:47 +0100 Subject: [PATCH 01/14] Add yaml_validator tests for validate_paths and get_file_list --- debug.txt | 4 ++++ output.fa | 4 ++++ src/lib.rs | 2 ++ src/yaml_validator.rs | 30 +++++++++++++++--------------- tests/yaml_validator.rs | 28 ++++++++++++++++++++++++++++ 5 files changed, 53 insertions(+), 15 deletions(-) create mode 100644 debug.txt create mode 100644 output.fa create mode 100644 tests/yaml_validator.rs diff --git a/debug.txt b/debug.txt new file mode 100644 index 0000000..9a7d5a3 --- /dev/null +++ b/debug.txt @@ -0,0 +1,4 @@ +>SUPER_1 + SCAFFOLD_1 -- 1 -- 9 +>SUPER_2 + SCAFFOLD_3 -- 1 -- 5 diff --git a/output.fa b/output.fa new file mode 100644 index 0000000..518e16a --- /dev/null +++ b/output.fa @@ -0,0 +1,4 @@ +>SUPER_1 +GGCATGCAT +>SUPER_2 +AGTGT diff --git a/src/lib.rs b/src/lib.rs index 9bd2841..ace2b2b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,6 @@ pub mod tpf_fasta; pub use tpf_fasta::*; +pub mod yaml_validator; +pub use yaml_validator::*; mod generics; pub use generics::*; diff --git a/src/yaml_validator.rs b/src/yaml_validator.rs index e81a79a..60c2cd9 100644 --- a/src/yaml_validator.rs +++ b/src/yaml_validator.rs @@ -10,7 +10,7 @@ pub mod yaml_validator_mod { use walkdir::WalkDir; /// A function to validate a path given as a &str - fn validate_paths(path: &str) -> String { + pub fn validate_paths(path: &str) -> String { match fs::metadata(path) { Ok(_) => format!("PASS : {}", &path), Err(_) => format!("FAIL : {}", &path), @@ -18,7 +18,7 @@ pub mod yaml_validator_mod { } // Replicate function from generate_csv - fn get_file_list(root: &str) -> Vec { + pub fn get_file_list(root: &str) -> Vec { WalkDir::new(root) .into_iter() .filter_map(|e| e.ok()) @@ -29,17 +29,17 @@ pub mod yaml_validator_mod { #[derive(Debug, Serialize, Deserialize)] // https://doc.rust-lang.org/std/marker/struct.PhantomData.html - struct YamlResults<'a> { - ReferenceResults: String, - CramResults: CRAMtags, - AlignerResults: String, - LongreadResults: String, - BuscoResults: String, - TelomereResults: String, - KmerProfileResults: String, - GenesetResults: Vec, - SyntenicResults: Vec, - phantom: PhantomData<&'a String>, + pub struct YamlResults<'a> { + pub ReferenceResults: String, + pub CramResults: CRAMtags, + pub AlignerResults: String, + pub LongreadResults: String, + pub BuscoResults: String, + pub TelomereResults: String, + pub KmerProfileResults: String, + pub GenesetResults: Vec, + pub SyntenicResults: Vec, + pub phantom: PhantomData<&'a String>, } impl<'a> std::fmt::Display for YamlResults<'a> { @@ -164,11 +164,11 @@ pub mod yaml_validator_mod { // This was helpful for breaking out of a function early // without having to generate some dummy files. #[derive(Debug, Serialize, Deserialize, Default)] - struct CRAMtags { + pub struct CRAMtags { header_sort_order: Vec, other_header_fields: Vec, reference_sequence: Vec, - header_read_groups: Vec, + pub header_read_groups: Vec, } impl std::fmt::Display for CRAMtags { diff --git a/tests/yaml_validator.rs b/tests/yaml_validator.rs new file mode 100644 index 0000000..47bada0 --- /dev/null +++ b/tests/yaml_validator.rs @@ -0,0 +1,28 @@ +use fasta_manipulation::yaml_validator_mod::{ validate_paths, get_file_list, YamlResults}; +use std::path::PathBuf; + +#[test] +fn check_validate_paths() { + assert!(validate_paths("test_data/yaml/test.yaml").contains("PASS")); + assert!(validate_paths("tests/data/invalid.yaml").contains("FAIL")); +} + +#[test] +fn check_get_file_list() { + let path = "test_data/iyAndFlav1".to_string(); + let expected_file_list: Vec = vec![ + PathBuf::from("test_data/iyAndFlav1/full/iyAndFlav1_subset.fa.fai"), + PathBuf::from("test_data/iyAndFlav1/full/iyAndFlav1_subset.fa"), + PathBuf::from("test_data/iyAndFlav1/full/iyAndFlav1.curated_subset.tpf"), + PathBuf::from("test_data/iyAndFlav1/small/small_test.fa.fai"), + PathBuf::from("test_data/iyAndFlav1/small/small_test.fa"), + PathBuf::from("test_data/iyAndFlav1/small/small_test.curated.tpf"), + PathBuf::from("test_data/iyAndFlav1/small/small_test.output.fasta"), + PathBuf::from("test_data/iyAndFlav1/tiny/tiny_test.curated.tpf"), + PathBuf::from("test_data/iyAndFlav1/tiny/tiny_test.output.fasta"), + PathBuf::from("test_data/iyAndFlav1/tiny/tiny_test.fa"), + PathBuf::from("test_data/iyAndFlav1/tiny/tiny_test.fa.fai"), + PathBuf::from("test_data/iyAndFlav1/tiny/tiny_test.debug.txt")]; + let file_list = get_file_list(&path); + assert_eq!(expected_file_list, file_list); +} \ No newline at end of file From 3478b52d4c4214804d427a9b90a6040c161456d1 Mon Sep 17 00:00:00 2001 From: Stephen <519327+stevieing@users.noreply.github.com> Date: Tue, 20 Aug 2024 15:00:23 +0100 Subject: [PATCH 02/14] chore: yaml_validator fix linting issues --- tests/yaml_validator.rs | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/tests/yaml_validator.rs b/tests/yaml_validator.rs index 47bada0..a781fa3 100644 --- a/tests/yaml_validator.rs +++ b/tests/yaml_validator.rs @@ -1,4 +1,4 @@ -use fasta_manipulation::yaml_validator_mod::{ validate_paths, get_file_list, YamlResults}; +use fasta_manipulation::yaml_validator_mod::{get_file_list, validate_paths, YamlResults}; use std::path::PathBuf; #[test] @@ -9,20 +9,21 @@ fn check_validate_paths() { #[test] fn check_get_file_list() { - let path = "test_data/iyAndFlav1".to_string(); - let expected_file_list: Vec = vec![ - PathBuf::from("test_data/iyAndFlav1/full/iyAndFlav1_subset.fa.fai"), - PathBuf::from("test_data/iyAndFlav1/full/iyAndFlav1_subset.fa"), - PathBuf::from("test_data/iyAndFlav1/full/iyAndFlav1.curated_subset.tpf"), - PathBuf::from("test_data/iyAndFlav1/small/small_test.fa.fai"), - PathBuf::from("test_data/iyAndFlav1/small/small_test.fa"), - PathBuf::from("test_data/iyAndFlav1/small/small_test.curated.tpf"), - PathBuf::from("test_data/iyAndFlav1/small/small_test.output.fasta"), - PathBuf::from("test_data/iyAndFlav1/tiny/tiny_test.curated.tpf"), - PathBuf::from("test_data/iyAndFlav1/tiny/tiny_test.output.fasta"), - PathBuf::from("test_data/iyAndFlav1/tiny/tiny_test.fa"), - PathBuf::from("test_data/iyAndFlav1/tiny/tiny_test.fa.fai"), - PathBuf::from("test_data/iyAndFlav1/tiny/tiny_test.debug.txt")]; - let file_list = get_file_list(&path); - assert_eq!(expected_file_list, file_list); -} \ No newline at end of file + let path = "test_data/iyAndFlav1".to_string(); + let expected_file_list: Vec = vec![ + PathBuf::from("test_data/iyAndFlav1/full/iyAndFlav1_subset.fa.fai"), + PathBuf::from("test_data/iyAndFlav1/full/iyAndFlav1_subset.fa"), + PathBuf::from("test_data/iyAndFlav1/full/iyAndFlav1.curated_subset.tpf"), + PathBuf::from("test_data/iyAndFlav1/small/small_test.fa.fai"), + PathBuf::from("test_data/iyAndFlav1/small/small_test.fa"), + PathBuf::from("test_data/iyAndFlav1/small/small_test.curated.tpf"), + PathBuf::from("test_data/iyAndFlav1/small/small_test.output.fasta"), + PathBuf::from("test_data/iyAndFlav1/tiny/tiny_test.curated.tpf"), + PathBuf::from("test_data/iyAndFlav1/tiny/tiny_test.output.fasta"), + PathBuf::from("test_data/iyAndFlav1/tiny/tiny_test.fa"), + PathBuf::from("test_data/iyAndFlav1/tiny/tiny_test.fa.fai"), + PathBuf::from("test_data/iyAndFlav1/tiny/tiny_test.debug.txt"), + ]; + let file_list = get_file_list(&path); + assert_eq!(expected_file_list, file_list); +} From d1bcdd92978e46084383e7f5e06b7a24462d481b Mon Sep 17 00:00:00 2001 From: Stephen <519327+stevieing@users.noreply.github.com> Date: Tue, 20 Aug 2024 16:04:55 +0100 Subject: [PATCH 03/14] chore: Added sort to check_get_file_list test to try and fix ci. --- tests/yaml_validator.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/yaml_validator.rs b/tests/yaml_validator.rs index a781fa3..0620f67 100644 --- a/tests/yaml_validator.rs +++ b/tests/yaml_validator.rs @@ -1,4 +1,4 @@ -use fasta_manipulation::yaml_validator_mod::{get_file_list, validate_paths, YamlResults}; +use fasta_manipulation::yaml_validator_mod::{get_file_list, validate_paths}; use std::path::PathBuf; #[test] @@ -10,7 +10,7 @@ fn check_validate_paths() { #[test] fn check_get_file_list() { let path = "test_data/iyAndFlav1".to_string(); - let expected_file_list: Vec = vec![ + let expected_file_list = vec![ PathBuf::from("test_data/iyAndFlav1/full/iyAndFlav1_subset.fa.fai"), PathBuf::from("test_data/iyAndFlav1/full/iyAndFlav1_subset.fa"), PathBuf::from("test_data/iyAndFlav1/full/iyAndFlav1.curated_subset.tpf"), @@ -23,7 +23,8 @@ fn check_get_file_list() { PathBuf::from("test_data/iyAndFlav1/tiny/tiny_test.fa"), PathBuf::from("test_data/iyAndFlav1/tiny/tiny_test.fa.fai"), PathBuf::from("test_data/iyAndFlav1/tiny/tiny_test.debug.txt"), - ]; - let file_list = get_file_list(&path); + ] + .sort(); + let file_list = get_file_list(&path).sort(); assert_eq!(expected_file_list, file_list); } From 3f9936712b551131bcd0a3e09c3577c97fe4f487 Mon Sep 17 00:00:00 2001 From: Dasun Pubudumal Date: Tue, 3 Sep 2024 14:06:14 +0100 Subject: [PATCH 04/14] Refactor YamlResults attributes to use camel case --- src/yaml_validator.rs | 88 +++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/src/yaml_validator.rs b/src/yaml_validator.rs index 60c2cd9..f5e77b8 100644 --- a/src/yaml_validator.rs +++ b/src/yaml_validator.rs @@ -30,15 +30,15 @@ pub mod yaml_validator_mod { #[derive(Debug, Serialize, Deserialize)] // https://doc.rust-lang.org/std/marker/struct.PhantomData.html pub struct YamlResults<'a> { - pub ReferenceResults: String, - pub CramResults: CRAMtags, - pub AlignerResults: String, - pub LongreadResults: String, - pub BuscoResults: String, - pub TelomereResults: String, - pub KmerProfileResults: String, - pub GenesetResults: Vec, - pub SyntenicResults: Vec, + pub reference_results: String, + pub cram_results: CRAMtags, + pub aligner_results: String, + pub longread_results: String, + pub busco_results: String, + pub telomere_results: String, + pub kmer_profile_results: String, + pub geneset_results: Vec, + pub syntenic_results: Vec, pub phantom: PhantomData<&'a String>, } @@ -48,16 +48,16 @@ pub mod yaml_validator_mod { write!( fmt, "YamlResults:\n\tReference: {:#?}\n\tCram: {:#?}\n\tAligner: {:#?}\n\tLongread: {:#?}\n\tBusco: {:#?}\n\tTelomere: {:#?}\n\tKmerProfile: {:#?}\n\tGenesetPaths: {:#?}\n\tSyntenicPaths: {:#?}\n\t{:#?}", - &self.ReferenceResults, + &self.reference_results, &self.is_cram_valid(), - &self.AlignerResults, - &self.LongreadResults, - &self.BuscoResults, - &self.TelomereResults, - &self.KmerProfileResults, - &self.GenesetResults, - &self.SyntenicResults, - &self.CramResults, + &self.aligner_results, + &self.longread_results, + &self.busco_results, + &self.telomere_results, + &self.kmer_profile_results, + &self.geneset_results, + &self.syntenic_results, + &self.cram_results, ) } } @@ -65,7 +65,7 @@ pub mod yaml_validator_mod { impl<'a> YamlResults<'a> { fn is_cram_valid(&self) -> String { // this should add a field to the cramresults struct - if !self.CramResults.header_read_groups.is_empty() { + if !self.cram_results.header_read_groups.is_empty() { "PASS".to_string() } else { "FAIL".to_string() @@ -80,16 +80,16 @@ pub mod yaml_validator_mod { #[allow(dead_code)] fn to_file(&self, output_location: String) -> Result<(), std::io::Error> { let string_data = format!("YamlResults:\n\tReference: {:#?}\n\tCram: {:#?}\n\tAligner: {:#?}\n\tLongread: {:#?}\n\tBusco: {:#?}\n\tTelomere: {:#?}\n\tKmerProfile: {:#?}\n\tGenesetPaths: {:#?}\n\tSyntenicPaths: {:#?}\n\t{:#?}", - &self.ReferenceResults, - &self.is_cram_valid(), - &self.AlignerResults, - &self.LongreadResults, - &self.BuscoResults, - &self.TelomereResults, - &self.KmerProfileResults, - &self.GenesetResults, - &self.SyntenicResults, - &self.CramResults, + &self.reference_results, + &self.is_cram_valid(), + &self.aligner_results, + &self.longread_results, + &self.busco_results, + &self.telomere_results, + &self.kmer_profile_results, + &self.geneset_results, + &self.syntenic_results, + &self.cram_results, ); fs::write(output_location, string_data) } @@ -129,14 +129,14 @@ pub mod yaml_validator_mod { // will not cause a TreeVal run to fail, // may cause missing data if accidentaly ommitted. let primary_fields: Vec> = vec![ - vec!["Reference", &self.ReferenceResults], - vec!["Aligner", &self.AlignerResults], - vec!["Longread Data", &self.LongreadResults], - vec!["Busco Paths", &self.BuscoResults], - vec!["Telomere Motif", &self.TelomereResults], + vec!["Reference", &self.reference_results], + vec!["Aligner", &self.aligner_results], + vec!["Longread Data", &self.longread_results], + vec!["Busco Paths", &self.busco_results], + vec!["Telomere Motif", &self.telomere_results], ]; let secondary_fields: Vec<&Vec> = - vec![&self.GenesetResults, &self.SyntenicResults]; + vec![&self.geneset_results, &self.syntenic_results]; let failed_primaries = self.check_primaries(primary_fields); let failed_secondary = self.check_secondaries(secondary_fields); @@ -209,15 +209,15 @@ pub mod yaml_validator_mod { /// there is nothing to return but clippy with complain about the let. fn into_results(self) -> YamlResults<'static> { let results = YamlResults { - ReferenceResults: self.validate_fasta(), - CramResults: self.hic_data.validate_cram().1, - AlignerResults: self.hic_data.validate_aligner(), - LongreadResults: self.assem_reads.validate_longread(), - BuscoResults: self.busco.validate_busco_path(), - TelomereResults: self.telomere.validate_telomere(), - KmerProfileResults: self.validate_kmer_prof(), - GenesetResults: self.validate_genesets(), - SyntenicResults: self.validate_synteny(), + reference_results: self.validate_fasta(), + cram_results: self.hic_data.validate_cram().1, + aligner_results: self.hic_data.validate_aligner(), + longread_results: self.assem_reads.validate_longread(), + busco_results: self.busco.validate_busco_path(), + telomere_results: self.telomere.validate_telomere(), + kmer_profile_results: self.validate_kmer_prof(), + geneset_results: self.validate_genesets(), + syntenic_results: self.validate_synteny(), phantom: PhantomData, }; results From 6997f9b09bed06133512b83a631672f72eb920f3 Mon Sep 17 00:00:00 2001 From: Dasun Pubudumal Date: Tue, 3 Sep 2024 14:33:39 +0100 Subject: [PATCH 05/14] Writing tests for is_cram_valid and check_primaries --- src/yaml_validator.rs | 13 ++++--- tests/yaml_validator.rs | 82 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 88 insertions(+), 7 deletions(-) diff --git a/src/yaml_validator.rs b/src/yaml_validator.rs index f5e77b8..8d303ce 100644 --- a/src/yaml_validator.rs +++ b/src/yaml_validator.rs @@ -27,7 +27,7 @@ pub mod yaml_validator_mod { .collect() } - #[derive(Debug, Serialize, Deserialize)] + #[derive(Debug, Serialize, Deserialize, Default)] // https://doc.rust-lang.org/std/marker/struct.PhantomData.html pub struct YamlResults<'a> { pub reference_results: String, @@ -63,7 +63,7 @@ pub mod yaml_validator_mod { } impl<'a> YamlResults<'a> { - fn is_cram_valid(&self) -> String { + pub fn is_cram_valid(&self) -> String { // this should add a field to the cramresults struct if !self.cram_results.header_read_groups.is_empty() { "PASS".to_string() @@ -94,7 +94,8 @@ pub mod yaml_validator_mod { fs::write(output_location, string_data) } - fn check_primaries(&self, primary_list: Vec>) -> Vec { + // Might need to consider why this has been made an associated function with YamlResults + pub fn check_primaries(&self, primary_list: Vec>) -> Vec { let mut failures = Vec::new(); for i in primary_list { if !i[1].contains("PASS") { @@ -165,9 +166,9 @@ pub mod yaml_validator_mod { // without having to generate some dummy files. #[derive(Debug, Serialize, Deserialize, Default)] pub struct CRAMtags { - header_sort_order: Vec, - other_header_fields: Vec, - reference_sequence: Vec, + pub header_sort_order: Vec, + pub other_header_fields: Vec, + pub reference_sequence: Vec, pub header_read_groups: Vec, } diff --git a/tests/yaml_validator.rs b/tests/yaml_validator.rs index 0620f67..13e9627 100644 --- a/tests/yaml_validator.rs +++ b/tests/yaml_validator.rs @@ -1,4 +1,4 @@ -use fasta_manipulation::yaml_validator_mod::{get_file_list, validate_paths}; +use fasta_manipulation::yaml_validator_mod::{get_file_list, validate_paths, CRAMtags, YamlResults}; use std::path::PathBuf; #[test] @@ -28,3 +28,83 @@ fn check_get_file_list() { let file_list = get_file_list(&path).sort(); assert_eq!(expected_file_list, file_list); } + +#[test] +fn check_is_cram_valid() { + let cram_tags_not_empty = CRAMtags { + header_read_groups: vec![String::from("test")], + ..Default::default() + }; + let yaml_results_pass = YamlResults { + cram_results: cram_tags_not_empty, + ..Default::default() + }; + assert_eq!("PASS", yaml_results_pass.is_cram_valid()) +} + +#[test] +fn check_is_cram_invalid() { + let cram_tags_empty = CRAMtags { + header_read_groups: vec![], + ..Default::default() + }; + let yaml_results_fail = YamlResults { + cram_results: cram_tags_empty, + ..Default::default() + }; + assert_eq!("FAIL", yaml_results_fail.is_cram_valid()) +} + +#[test] +fn check_check_primaries_with_all_fails() { + let primaries = vec![ + vec!["Reference", "FAIL"], + vec!["Aligner", "FAIL"], + vec!["Longread Data", "FAIL"], + vec!["Busco Paths", "FAIL"], + vec!["Telomere Motif", "FAIL"], + ]; + let yaml_results = YamlResults { + ..Default::default() + }; + let failures = yaml_results.check_primaries(primaries); + assert_eq!(failures.first().unwrap(), "Failed on: Reference | Value: FAIL"); + assert_eq!(failures[1], "Failed on: Aligner | Value: FAIL"); + assert_eq!(failures[2], "Failed on: Longread Data | Value: FAIL"); + assert_eq!(failures[3], "Failed on: Busco Paths | Value: FAIL"); + assert_eq!(failures.last().unwrap(), "Failed on: Telomere Motif | Value: FAIL") +} + +#[test] +fn check_check_primaries_with_all_passes() { + let primaries = vec![ + vec!["Reference", "PASS"], + vec!["Aligner", "PASS"], + vec!["Longread Data", "PASS"], + vec!["Busco Paths", "PASS"], + vec!["Telomere Motif", "PASS"], + ]; + let yaml_results = YamlResults { + ..Default::default() + }; + let failures = yaml_results.check_primaries(primaries); + assert!(failures.is_empty()) +} + +#[test] +fn check_check_primaries_with_fails_and_passes() { + let primaries = vec![ + vec!["Reference", "FAIL"], + vec!["Aligner", "PASS"], + vec!["Longread Data", "PASS"], + vec!["Busco Paths", "FAIL"], + vec!["Telomere Motif", "FAIL"], + ]; + let yaml_results = YamlResults { + ..Default::default() + }; + let failures = yaml_results.check_primaries(primaries); + assert_eq!(failures.first().unwrap(), "Failed on: Reference | Value: FAIL"); + assert_eq!(failures[1], "Failed on: Busco Paths | Value: FAIL"); + assert_eq!(failures.last().unwrap(), "Failed on: Telomere Motif | Value: FAIL") +} From fe9b11e7ddb47af92eb048a2e45a2c93c1e6bf53 Mon Sep 17 00:00:00 2001 From: Dasun Pubudumal Date: Tue, 3 Sep 2024 14:35:08 +0100 Subject: [PATCH 06/14] Cargo format --- tests/yaml_validator.rs | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/tests/yaml_validator.rs b/tests/yaml_validator.rs index 13e9627..3d176c9 100644 --- a/tests/yaml_validator.rs +++ b/tests/yaml_validator.rs @@ -1,4 +1,6 @@ -use fasta_manipulation::yaml_validator_mod::{get_file_list, validate_paths, CRAMtags, YamlResults}; +use fasta_manipulation::yaml_validator_mod::{ + get_file_list, validate_paths, CRAMtags, YamlResults, +}; use std::path::PathBuf; #[test] @@ -68,11 +70,17 @@ fn check_check_primaries_with_all_fails() { ..Default::default() }; let failures = yaml_results.check_primaries(primaries); - assert_eq!(failures.first().unwrap(), "Failed on: Reference | Value: FAIL"); + assert_eq!( + failures.first().unwrap(), + "Failed on: Reference | Value: FAIL" + ); assert_eq!(failures[1], "Failed on: Aligner | Value: FAIL"); assert_eq!(failures[2], "Failed on: Longread Data | Value: FAIL"); assert_eq!(failures[3], "Failed on: Busco Paths | Value: FAIL"); - assert_eq!(failures.last().unwrap(), "Failed on: Telomere Motif | Value: FAIL") + assert_eq!( + failures.last().unwrap(), + "Failed on: Telomere Motif | Value: FAIL" + ) } #[test] @@ -104,7 +112,13 @@ fn check_check_primaries_with_fails_and_passes() { ..Default::default() }; let failures = yaml_results.check_primaries(primaries); - assert_eq!(failures.first().unwrap(), "Failed on: Reference | Value: FAIL"); + assert_eq!( + failures.first().unwrap(), + "Failed on: Reference | Value: FAIL" + ); assert_eq!(failures[1], "Failed on: Busco Paths | Value: FAIL"); - assert_eq!(failures.last().unwrap(), "Failed on: Telomere Motif | Value: FAIL") + assert_eq!( + failures.last().unwrap(), + "Failed on: Telomere Motif | Value: FAIL" + ) } From d15a6c54fe0d3fc2fc6f1411312fa591d05bfc7d Mon Sep 17 00:00:00 2001 From: Dasun Pubudumal Date: Tue, 3 Sep 2024 14:53:26 +0100 Subject: [PATCH 07/14] Adding tests for check_secondaries function --- src/yaml_validator.rs | 4 ++- tests/yaml_validator.rs | 59 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/src/yaml_validator.rs b/src/yaml_validator.rs index 8d303ce..1da54a5 100644 --- a/src/yaml_validator.rs +++ b/src/yaml_validator.rs @@ -105,7 +105,9 @@ pub mod yaml_validator_mod { failures } - fn check_secondaries(&'a self, secondary_list: Vec<&'a Vec>) -> Vec<&String> { + // Check why this function accepts a Vec<&Vec> while check_primaries accepts + // Vec> + pub fn check_secondaries(&'a self, secondary_list: Vec<&'a Vec>) -> Vec<&String> { let mut failures: Vec<&String> = Vec::new(); for i in secondary_list { let collection = i diff --git a/tests/yaml_validator.rs b/tests/yaml_validator.rs index 3d176c9..cf52f02 100644 --- a/tests/yaml_validator.rs +++ b/tests/yaml_validator.rs @@ -122,3 +122,62 @@ fn check_check_primaries_with_fails_and_passes() { "Failed on: Telomere Motif | Value: FAIL" ) } + +#[test] +fn check_check_secondaries_for_all_pass() { + let vec_one = vec!["PASS".to_string()]; + let vec_two = vec!["PASS".to_string()]; + let vec_three = vec!["PASS".to_string()]; + let secondaries = vec![&vec_one, &vec_two, &vec_three]; + let yaml_results = YamlResults { + ..Default::default() + }; + let failures = yaml_results.check_secondaries(secondaries); + assert!(failures.is_empty()) +} + +#[test] +fn check_check_secondaries_for_all_fails() { + let vec_one = vec!["FAIL".to_string()]; + let vec_two = vec!["FAIL".to_string()]; + let vec_three = vec!["FAIL".to_string()]; + let secondaries = vec![&vec_one, &vec_two, &vec_three]; + let yaml_results = YamlResults { + ..Default::default() + }; + let failures = yaml_results.check_secondaries(secondaries); + assert_eq!(*failures.first().unwrap(), "FAIL"); + assert_eq!(failures[1], "FAIL"); + assert_eq!(*failures.last().unwrap(), "FAIL"); +} + +#[test] +fn check_check_secondaries_for_all_nos() { + let vec_one = vec!["NO".to_string()]; + let vec_two = vec!["NO".to_string()]; + let vec_three = vec!["NO".to_string()]; + let secondaries = vec![&vec_one, &vec_two, &vec_three]; + let yaml_results = YamlResults { + ..Default::default() + }; + let failures = yaml_results.check_secondaries(secondaries); + assert_eq!(*failures.first().unwrap(), "NO"); + assert_eq!(failures[1], "NO"); + assert_eq!(*failures.last().unwrap(), "NO"); +} + +#[test] +fn check_check_secondaries_for_fails_and_nos() { + let vec_one = vec!["FAIL".to_string()]; + let vec_two = vec!["NO".to_string()]; + let vec_three = vec!["FAIL".to_string()]; + let vec_four = vec!["PASS".to_string()]; + let secondaries = vec![&vec_one, &vec_two, &vec_three, &vec_four]; + let yaml_results = YamlResults { + ..Default::default() + }; + let failures = yaml_results.check_secondaries(secondaries); + assert_eq!(*failures.first().unwrap(), "FAIL"); + assert_eq!(failures[1], "NO"); + assert_eq!(*failures.last().unwrap(), "FAIL"); +} From 78ec2747f109a4c1e8e4b583adaa94dbf3bb22af Mon Sep 17 00:00:00 2001 From: Dasun Pubudumal Date: Thu, 5 Sep 2024 13:34:03 +0100 Subject: [PATCH 08/14] [skip ci] Notes for functions --- src/yaml_validator.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/yaml_validator.rs b/src/yaml_validator.rs index 1da54a5..ffc28e3 100644 --- a/src/yaml_validator.rs +++ b/src/yaml_validator.rs @@ -72,7 +72,8 @@ pub mod yaml_validator_mod { } } - #[allow(dead_code)] + // Might worth checking the use of this function, as it can directly be invoked without + // declaring a function with println! macro. fn to_stdout(&self) { println!("{}", &self) } From 41cee4579ebf641b185c2046350e64d80dd39de3 Mon Sep 17 00:00:00 2001 From: Stephen <519327+stevieing@users.noreply.github.com> Date: Tue, 17 Sep 2024 15:01:58 +0100 Subject: [PATCH 09/14] Add tests for validate_fasta and validate_csv --- src/yaml_validator.rs | 74 ++++++++++++------------ test_data/iyAndFlav1/tiny/empty_file.txt | 0 test_data/iyAndFlav1/tiny/valid_csv.csv | 2 + tests/yaml_validator.rs | 36 +++++++++++- 4 files changed, 75 insertions(+), 37 deletions(-) create mode 100644 test_data/iyAndFlav1/tiny/empty_file.txt create mode 100644 test_data/iyAndFlav1/tiny/valid_csv.csv diff --git a/src/yaml_validator.rs b/src/yaml_validator.rs index ffc28e3..b647793 100644 --- a/src/yaml_validator.rs +++ b/src/yaml_validator.rs @@ -189,20 +189,20 @@ pub mod yaml_validator_mod { } } - #[derive(Debug, Serialize, Deserialize)] - struct TreeValYaml { - assembly: Assembly, - reference_file: String, - map_order: String, - assem_reads: AssemReads, - hic_data: HicReads, - kmer_profile: KmerProfile, - alignment: Alignment, - self_comp: SelfComp, - intron: Intron, - telomere: Telomere, - synteny: Synteny, - busco: Busco, + #[derive(Debug, Serialize, Deserialize, Default)] + pub struct TreeValYaml { + pub assembly: Assembly, + pub reference_file: String, + pub map_order: String, + pub assem_reads: AssemReads, + pub hic_data: HicReads, + pub kmer_profile: KmerProfile, + pub alignment: Alignment, + pub self_comp: SelfComp, + pub intron: Intron, + pub telomere: Telomere, + pub synteny: Synteny, + pub busco: Busco, } /// Struct functions @@ -229,7 +229,8 @@ pub mod yaml_validator_mod { #[allow(dead_code)] /// Validate that the input fasta is infact a fasta format and count records. - fn validate_fasta(&self) -> String { + /// is this checking it is a fasta format or just that it has records? + pub fn validate_fasta(&self) -> String { let reader = fasta::reader::Builder.build_from_path(&self.reference_file); let mut binding = reader.expect("NO VALID HEADER / SEQUENCE PAIRS"); @@ -242,7 +243,8 @@ pub mod yaml_validator_mod { } } - fn validate_csv(&self, csv_path: &String) -> String { + /// Are there standard functions to do this? + pub fn validate_csv(&self, csv_path: &String) -> String { let file = File::open(csv_path); match file { @@ -352,16 +354,16 @@ pub mod yaml_validator_mod { } } - #[derive(Debug, Serialize, Deserialize)] - struct KmerProfile { + #[derive(Debug, Serialize, Deserialize, Default)] + pub struct KmerProfile { kmer_length: u16, dir: String, } impl KmerProfile {} - #[derive(Debug, Serialize, Deserialize)] - struct HicReads { + #[derive(Debug, Serialize, Deserialize, Default)] + pub struct HicReads { hic_cram: String, hic_aligner: String, } @@ -478,8 +480,8 @@ pub mod yaml_validator_mod { } } - #[derive(Debug, Serialize, Deserialize)] - struct Assembly { + #[derive(Debug, Serialize, Deserialize, Default)] + pub struct Assembly { sample_id: String, // Anything the user wants latin_name: String, // Not in use but maybe in future, how to validate a latin name. Api call with a fallback to yes... it is alphabetical defined_class: String, @@ -487,8 +489,8 @@ pub mod yaml_validator_mod { project_id: String, // Can be anything the user wants, not in use } - #[derive(Debug, Serialize, Deserialize)] - struct AssemReads { + #[derive(Debug, Serialize, Deserialize, Default)] + pub struct AssemReads { read_type: String, read_data: String, supplementary_data: String, // Not yet in use @@ -523,26 +525,26 @@ pub mod yaml_validator_mod { } } - #[derive(Debug, Serialize, Deserialize)] - struct Alignment { + #[derive(Debug, Serialize, Deserialize, Default)] + pub struct Alignment { data_dir: String, common_name: String, // Not yet in use geneset_id: String, } - #[derive(Debug, Serialize, Deserialize)] - struct SelfComp { + #[derive(Debug, Serialize, Deserialize, Default)] + pub struct SelfComp { motif_len: u16, mummer_chunk: u16, } - #[derive(Debug, Serialize, Deserialize)] - struct Intron { + #[derive(Debug, Serialize, Deserialize, Default)] + pub struct Intron { size: String, } - #[derive(Debug, Serialize, Deserialize)] - struct Telomere { + #[derive(Debug, Serialize, Deserialize, Default)] + pub struct Telomere { teloseq: String, } @@ -560,14 +562,14 @@ pub mod yaml_validator_mod { } } - #[derive(Debug, Serialize, Deserialize)] - struct Synteny { + #[derive(Debug, Serialize, Deserialize, Default)] + pub struct Synteny { synteny_path: String, synteny_genomes: String, } - #[derive(Debug, Serialize, Deserialize)] - struct Busco { + #[derive(Debug, Serialize, Deserialize, Default)] + pub struct Busco { lineages_path: String, lineage: String, } diff --git a/test_data/iyAndFlav1/tiny/empty_file.txt b/test_data/iyAndFlav1/tiny/empty_file.txt new file mode 100644 index 0000000..e69de29 diff --git a/test_data/iyAndFlav1/tiny/valid_csv.csv b/test_data/iyAndFlav1/tiny/valid_csv.csv new file mode 100644 index 0000000..83fec2e --- /dev/null +++ b/test_data/iyAndFlav1/tiny/valid_csv.csv @@ -0,0 +1,2 @@ +a,b,c,d +1,2,3,4 \ No newline at end of file diff --git a/tests/yaml_validator.rs b/tests/yaml_validator.rs index cf52f02..6588db4 100644 --- a/tests/yaml_validator.rs +++ b/tests/yaml_validator.rs @@ -1,5 +1,5 @@ use fasta_manipulation::yaml_validator_mod::{ - get_file_list, validate_paths, CRAMtags, YamlResults, + get_file_list, validate_paths, CRAMtags, TreeValYaml, YamlResults, }; use std::path::PathBuf; @@ -181,3 +181,37 @@ fn check_check_secondaries_for_fails_and_nos() { assert_eq!(failures[1], "NO"); assert_eq!(*failures.last().unwrap(), "FAIL"); } + +#[test] +fn check_validate_fasta() { + let tree_val_yaml = TreeValYaml { + reference_file: "test_data/iyAndFlav1/tiny/tiny_test.fa".to_string(), + ..Default::default() + }; + + assert!(tree_val_yaml.validate_fasta().contains("PASS")); + + let tree_val_yaml = TreeValYaml { + reference_file: "test_data/iyAndFlav1/tiny/empty_file.txt".to_string(), + ..Default::default() + }; + assert!(tree_val_yaml.validate_fasta().contains("FAIL")); +} + +#[test] +fn check_validate_csv() { + let tree_val_yaml = TreeValYaml { + ..Default::default() + }; + + assert!(tree_val_yaml + .validate_csv(&"test_data/iyAndFlav1/tiny/valid_csv.csv".to_string()) + .contains("PASS")); + + let tree_val_yaml = TreeValYaml { + ..Default::default() + }; + assert!(tree_val_yaml + .validate_csv(&"test_data/iyAndFlav1/tiny/empty_file.csv".to_string()) + .contains("FAIL")); +} From 5fecfdaf83614cc21e6a33b7a56dc19a51fce93c Mon Sep 17 00:00:00 2001 From: Stephen <519327+stevieing@users.noreply.github.com> Date: Tue, 17 Sep 2024 15:04:16 +0100 Subject: [PATCH 10/14] Refactor yaml_validator.rs to remove unnecessary code --- src/yaml_validator.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/yaml_validator.rs b/src/yaml_validator.rs index b647793..bca2725 100644 --- a/src/yaml_validator.rs +++ b/src/yaml_validator.rs @@ -249,7 +249,7 @@ pub mod yaml_validator_mod { match file { Ok(valid_data) => { - format!("PASS: {}", csv_path); + // format!("PASS: {}", csv_path); let name = &csv_path.split('/').collect::>(); let mut reader = ReaderBuilder::new() From 0626263012ba531859dde8058edeefe7519279eb Mon Sep 17 00:00:00 2001 From: Dasun Pubudumal Date: Mon, 23 Sep 2024 15:16:54 +0100 Subject: [PATCH 11/14] Adding tests for validate_aligner function --- src/yaml_validator.rs | 6 +++--- tests/yaml_validator.rs | 28 +++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/yaml_validator.rs b/src/yaml_validator.rs index bca2725..b869ff6 100644 --- a/src/yaml_validator.rs +++ b/src/yaml_validator.rs @@ -364,13 +364,13 @@ pub mod yaml_validator_mod { #[derive(Debug, Serialize, Deserialize, Default)] pub struct HicReads { - hic_cram: String, - hic_aligner: String, + pub hic_cram: String, + pub hic_aligner: String, } impl HicReads { /// Validate the aligner against a set Vec of options - fn validate_aligner(&self) -> String { + pub fn validate_aligner(&self) -> String { // Should be const let aligners = vec!["bwamem2".to_string(), "minimap2".to_string()]; if aligners.contains(&self.hic_aligner.to_string()) { diff --git a/tests/yaml_validator.rs b/tests/yaml_validator.rs index 6588db4..9789d50 100644 --- a/tests/yaml_validator.rs +++ b/tests/yaml_validator.rs @@ -1,5 +1,5 @@ use fasta_manipulation::yaml_validator_mod::{ - get_file_list, validate_paths, CRAMtags, TreeValYaml, YamlResults, + get_file_list, validate_paths, CRAMtags, HicReads, TreeValYaml, YamlResults, }; use std::path::PathBuf; @@ -215,3 +215,29 @@ fn check_validate_csv() { .validate_csv(&"test_data/iyAndFlav1/tiny/empty_file.csv".to_string()) .contains("FAIL")); } + +#[test] +fn check_validate_aligner_for_pass() { + let hic_reads_bwamem2 = HicReads { + hic_aligner: "bwamem2".to_string(), + ..Default::default() + }; + let hic_reads_minimap2 = HicReads { + hic_aligner: "minimap2".to_string(), + ..Default::default() + }; + assert_eq!("PASS : bwamem2", hic_reads_bwamem2.validate_aligner()); + assert_eq!("PASS : minimap2", hic_reads_minimap2.validate_aligner()); +} + +#[test] +fn check_validate_aligner_for_fail() { + let hic_reads = HicReads { + hic_aligner: "bwa".to_string(), + ..Default::default() + }; + assert_eq!( + "FAIL : bwa NOT IN [\"bwamem2\", \"minimap2\"]", + hic_reads.validate_aligner() + ); +} From e09c190657c237734aed1a387e1c5d2cffed98f5 Mon Sep 17 00:00:00 2001 From: Dasun Pubudumal Date: Mon, 23 Sep 2024 15:41:46 +0100 Subject: [PATCH 12/14] Adding tests for validate_longread function --- src/yaml_validator.rs | 17 +++++++--- test_data/iyAndFlav1/tiny/empty.fasta.gz | 0 tests/yaml_validator.rs | 42 +++++++++++++++++++++++- 3 files changed, 54 insertions(+), 5 deletions(-) create mode 100644 test_data/iyAndFlav1/tiny/empty.fasta.gz diff --git a/src/yaml_validator.rs b/src/yaml_validator.rs index b869ff6..77eb491 100644 --- a/src/yaml_validator.rs +++ b/src/yaml_validator.rs @@ -491,14 +491,18 @@ pub mod yaml_validator_mod { #[derive(Debug, Serialize, Deserialize, Default)] pub struct AssemReads { - read_type: String, - read_data: String, - supplementary_data: String, // Not yet in use + pub read_type: String, + pub read_data: String, + pub supplementary_data: String, // Not yet in use } impl AssemReads { + // It looks like this function is trying to validate the location of files with + // .fasta.gz extension, but it takes into account only the files that do not have + // .fasta.gz extension. + // /// Validate the location of the FASTA.GZ long read files - fn validate_longread(&self) -> String { + pub fn validate_longread(&self) -> String { let main_path_check = validate_paths(&self.read_data); if main_path_check.contains("FAIL") { @@ -508,11 +512,16 @@ pub mod yaml_validator_mod { let list_of_files = get_file_list(&self.read_data); + // We might have to check + // https://doc.rust-lang.org/std/path/struct.Path.html#method.ends_with + // Might have to use .extension() to get the extension let fasta_reads = &list_of_files .into_iter() .filter(|f| !f.ends_with(".fasta.gz")) .collect::>(); + println!("{:?}", fasta_reads); + if !fasta_reads.is_empty() { format!( "PASS : {} : FASTA.GZ = {}", diff --git a/test_data/iyAndFlav1/tiny/empty.fasta.gz b/test_data/iyAndFlav1/tiny/empty.fasta.gz new file mode 100644 index 0000000..e69de29 diff --git a/tests/yaml_validator.rs b/tests/yaml_validator.rs index 9789d50..d155b48 100644 --- a/tests/yaml_validator.rs +++ b/tests/yaml_validator.rs @@ -1,5 +1,5 @@ use fasta_manipulation::yaml_validator_mod::{ - get_file_list, validate_paths, CRAMtags, HicReads, TreeValYaml, YamlResults, + get_file_list, validate_paths, AssemReads, CRAMtags, HicReads, TreeValYaml, YamlResults, }; use std::path::PathBuf; @@ -241,3 +241,43 @@ fn check_validate_aligner_for_fail() { hic_reads.validate_aligner() ); } + +#[test] +fn check_validate_longread_pass() { + let read_data = "test_data/iyAndFlav1/tiny/tiny_test.fa".to_string(); + let assem_read = AssemReads { + read_data, + ..Default::default() + }; + assert_eq!( + "PASS : test_data/iyAndFlav1/tiny/tiny_test.fa : FASTA.GZ = 1", + assem_read.validate_longread() + ); +} + +// Revise this test +#[test] +fn check_validate_longread_fail() { + let read_data = "test_data/iyAndFlav1/tiny/empty.fasta.gz".to_string(); + let assem_read = AssemReads { + read_data, + ..Default::default() + }; + assert_eq!( + "PASS : test_data/iyAndFlav1/tiny/empty.fasta.gz : FASTA.GZ = 1", + assem_read.validate_longread() + ); +} + +#[test] +fn validate_longread_invalid_paths() { + let read_data = "test_data/iyAndFlav1/tiny/tiny_test1.fa".to_string(); + let assem_read = AssemReads { + read_data, + ..Default::default() + }; + assert_eq!( + "FAIL : test_data/iyAndFlav1/tiny/tiny_test1.fa", + assem_read.validate_longread() + ); +} From 56de6904e8af4b7d32760a82e4e91b40cfcc617a Mon Sep 17 00:00:00 2001 From: Dasun Pubudumal Date: Mon, 23 Sep 2024 15:48:06 +0100 Subject: [PATCH 13/14] Removing the explicitly defined test execution as it is defined as a separate job again in the workflow. --- .github/workflows/test-lint.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/test-lint.yml b/.github/workflows/test-lint.yml index aa14429..0196f49 100644 --- a/.github/workflows/test-lint.yml +++ b/.github/workflows/test-lint.yml @@ -16,8 +16,6 @@ jobs: - uses: actions/checkout@v3 - name: Build run: cargo build --verbose - - name: Run tests - run: cargo test --verbose check: name: "Cargo check" From dcafddd55ee96c7bbbbff854129112c770fcc11d Mon Sep 17 00:00:00 2001 From: Dasun Pubudumal Date: Mon, 23 Sep 2024 15:54:32 +0100 Subject: [PATCH 14/14] Adding tests for validate_telomere function --- src/yaml_validator.rs | 4 ++-- tests/yaml_validator.rs | 27 ++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/yaml_validator.rs b/src/yaml_validator.rs index 77eb491..de06e0e 100644 --- a/src/yaml_validator.rs +++ b/src/yaml_validator.rs @@ -554,13 +554,13 @@ pub mod yaml_validator_mod { #[derive(Debug, Serialize, Deserialize, Default)] pub struct Telomere { - teloseq: String, + pub teloseq: String, } impl Telomere { /// Validate whether the telomere motif is ALPHABETICAL /// No upper bound as motifs can be large. - fn validate_telomere(&self) -> String { + pub fn validate_telomere(&self) -> String { if self.teloseq.chars().all(char::is_alphabetic) && self.teloseq.chars().collect::>().len() > 3 { diff --git a/tests/yaml_validator.rs b/tests/yaml_validator.rs index d155b48..361933f 100644 --- a/tests/yaml_validator.rs +++ b/tests/yaml_validator.rs @@ -1,5 +1,6 @@ use fasta_manipulation::yaml_validator_mod::{ - get_file_list, validate_paths, AssemReads, CRAMtags, HicReads, TreeValYaml, YamlResults, + get_file_list, validate_paths, AssemReads, CRAMtags, HicReads, Telomere, TreeValYaml, + YamlResults, }; use std::path::PathBuf; @@ -281,3 +282,27 @@ fn validate_longread_invalid_paths() { assem_read.validate_longread() ); } + +#[test] +fn check_validate_telomere_pass() { + let telomere = Telomere { + teloseq: "AGGGTT".to_string(), + }; + assert_eq!("PASS : AGGGTT", telomere.validate_telomere()); +} + +#[test] +fn check_validate_telomere_fail_non_alphabetic() { + let telomere = Telomere { + teloseq: "TTGGAA1".to_string(), + }; + assert_eq!("FAIL : TTGGAA1", telomere.validate_telomere()); +} + +#[test] +fn check_validate_telomere_fail_character_length() { + let telomere = Telomere { + teloseq: "TTG".to_string(), + }; + assert_eq!("FAIL : TTG", telomere.validate_telomere()); +}