From cc4a44718aecf0021994b13bb407eee964181263 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 13 Nov 2025 15:19:58 +0300 Subject: [PATCH 1/3] feat: Add unittests for checking if negative correlations are handled correctly. --- src/correlations.rs | 68 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 62 insertions(+), 6 deletions(-) diff --git a/src/correlations.rs b/src/correlations.rs index 29b9064..c3b4202 100644 --- a/src/correlations.rs +++ b/src/correlations.rs @@ -149,11 +149,11 @@ impl<'a> Compute<'a> { .split(self.file_delimiter) .collect::>(), ); - if ty.x_vals.len() < 4 || ty.y_vals.len() < 4 { - // minimum number of acceptable trait values for - // computing the correlations - continue; - } + if ty.x_vals.len() < 4 || ty.y_vals.len() < 4 { + // minimum number of acceptable trait values for + // computing the correlations + continue; + } let (key_name, parsed_x_val, parsed_y_val) = (ty.row_name, ty.x_vals, ty.y_vals); @@ -215,6 +215,51 @@ mod tests { assert_approx_eq!(p_val, 1.341575855, 2f64); } + #[test] + fn test_pearson_negative_rho() { + let new_pearson = Pearson::new(3); + let (corr_coeff, p_val) = + new_pearson.correlate(&[1., 2., 3., 4., 5.], &[5., 4., 3., 2., 1.]); + + assert_eq!(format!("{:.2}", corr_coeff), "-1.00"); + } + + #[test] + fn test_pearson_negative_rho_signs() { + // let new_pearson = Pearson::new(3); + + // Each pair should produce a negative correlation + let test_cases = vec![ + (vec![1., 2., 3., 4., 5.], vec![5., 4., 3., 2., 1.]), // perfect -1.0 + (vec![1., 2., 3., 4., 5.], vec![5., 4.5, 3.5, 3.0, 2.5]), // roughly -0.9 + (vec![1., 2., 3., 4., 5.], vec![4.5, 4.0, 3.8, 3.5, 3.0]), // small negative ~ -0.3 + ]; + + for (x, y) in test_cases { + let new_pearson = Pearson::new(3); + let new_spearman = Spearman::new(3); + let (corr_coeff, _p_val) = new_pearson.correlate(&x, &y); // pearson correlation + let (s_rho, p_val) = new_spearman.correlate(&x, &y); + let formatted = format!("{:.2}", corr_coeff); + let s_formatted = format!("{:.2}", s_rho); + + // Assert the sign is actually negative in the formatted string + assert!( + formatted.starts_with('-'), + "Expected negative correlation, got formatted value `{}` (raw {})", + formatted, + corr_coeff + ); + + assert!( + formatted.starts_with('-'), + "Expected negative correlation, got formatted value `{}` (raw {})", + s_formatted, + s_rho + ); + } + } + #[test] fn test_spearman_obj() { let new_spearman = Spearman::new(5); @@ -302,9 +347,20 @@ mod tests { assert!(corr_results.is_ok()) } - #[test] + fn test_negative_compute() { + let compute_obj = Compute::new( + ',', + "pearson", + "tests/data/mock_negative_dataset.txt", + &[12.0, 15.0, 11.0, 16.0, 11.0, 8.0, 7.0], + "./output_negative.txt", + ); + let corr_results = compute_obj.compute(); + assert!(corr_results.is_ok()) + } + #[test] fn test_parse_f64() { let data = "9. ,5. ,0. ,7. ,6. ,1. ,5. ,0.\n"; From a3101d15adaf7808977b310c2bd13a15c050cd24 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 13 Nov 2025 15:20:34 +0300 Subject: [PATCH 2/3] fix(correlation): sort rho values by absolute magnitude. --- src/sorter.rs | 138 ++++++++++++++++++++++++++------------------------ 1 file changed, 73 insertions(+), 65 deletions(-) diff --git a/src/sorter.rs b/src/sorter.rs index 2f9745e..63ce2b4 100644 --- a/src/sorter.rs +++ b/src/sorter.rs @@ -1,8 +1,8 @@ // implementation of extern sorter with rust //https://betterprogramming.pub/how-to-sort-a-20g-file-in-rust-12abfffbd92b -use std::fs::File; use std::cmp::Ordering; +use std::fs::File; use std::io::{BufWriter, Write}; //use extsort::*; @@ -12,16 +12,13 @@ const BUFFER_CAPACITY: usize = 4_000_000_000; #[derive(Debug, PartialEq, PartialOrd)] struct CorrResults(String, f64, f64, i32); - pub fn sort_write_to_file( filename: String, mut v: Vec<(String, f64, f64, i32)>, ) -> std::io::Result { File::create(filename.clone())?; - - - custom_float_sorter(& mut v); + custom_float_sorter(&mut v); let mut buffer = BufWriter::with_capacity(BUFFER_CAPACITY, File::create(&filename).unwrap()); for (name, rho, p_val, num_overlap) in v.iter() { @@ -36,71 +33,77 @@ pub fn create_large_file(filename: &str) { File::create(filename).unwrap(); } - pub fn custom_float_sorter(v: &mut [(String, f64, f64, i32)]) { v.sort_by(|a, b| { - match (a.1.is_nan(), b.1.is_nan()) { - (true, true) => Ordering::Equal, // Keep relative NaN order - (true, false) => Ordering::Greater, // NaN should be last - (false, true) => Ordering::Less, - (false, false) => match (a.1, b.1) { - (x, y) if x.is_infinite() && y.is_infinite() => y.partial_cmp(&x).unwrap(), - (x, _) if x == f64::INFINITY => Ordering::Less, // ∞ should be first - (_, x) if x == f64::INFINITY => Ordering::Greater, - (x, _) if x == f64::NEG_INFINITY => Ordering::Greater, // -∞ should be last - (_, x) if x == f64::NEG_INFINITY => Ordering::Less, - _ => b.1.partial_cmp(&a.1).unwrap(), // Proper decreasing order - }, + let x = a.1; + let y = b.1; + + // NaNs always last + if x.is_nan() && y.is_nan() { + return Ordering::Equal; } + if x.is_nan() { + return Ordering::Greater; + } + if y.is_nan() { + return Ordering::Less; + } + + // Sort by magnitude (|ρ|) descending, including ±∞ + y.abs().partial_cmp(&x.abs()).unwrap() }); } - - -pub fn float_sorter(unsorted:& mut [f64]){ +pub fn float_sorter(unsorted: &mut [f64]) { //only for test case // custom function to sort floats with nan and inf descending order - unsorted.sort_by(|&a, &b| { - match (a.is_nan()| a.is_infinite(), b.is_nan()|b.is_infinite()) { + unsorted.sort_by( + |&a, &b| match (a.is_nan() | a.is_infinite(), b.is_nan() | b.is_infinite()) { (true, true) => Ordering::Equal, (true, false) => Ordering::Greater, (false, true) => Ordering::Less, (false, false) => b.abs().partial_cmp(&a.abs()).unwrap(), - } - }); + }, + ); } +mod tests { + use super::custom_float_sorter; -mod tests{ - use super::custom_float_sorter; - #[test] - fn test_basic_sorting_desc() { + #[test] + fn test_basic_sorting_by_magnitude() { let mut data = vec![ ("a".to_string(), 3.0, 0.0, 1), ("b".to_string(), -4.5, 0.0, 2), ("c".to_string(), 1.2, 0.0, 3), ]; custom_float_sorter(&mut data); - assert_eq!(data, vec![ - ("a".to_string(), 3.0, 0.0, 1), // Largest number first - ("c".to_string(), 1.2, 0.0, 3), - ("b".to_string(), -4.5, 0.0, 2), // Smallest number last - ]); + assert_eq!( + data, + vec![ + ("b".to_string(), -4.5, 0.0, 2), + ("a".to_string(), 3.0, 0.0, 1), + ("c".to_string(), 1.2, 0.0, 3), + ] + ); } - #[test] - fn test_negative_numbers_desc() { + #[test] + fn test_negative_numbers_by_magnitude() { let mut data = vec![ ("x".to_string(), -1.0, 0.0, 1), ("y".to_string(), -5.0, 0.0, 2), ("z".to_string(), -3.0, 0.0, 3), ]; custom_float_sorter(&mut data); - assert_eq!(data, vec![ - ("x".to_string(), -1.0, 0.0, 1), // -1.0 is largest - ("z".to_string(), -3.0, 0.0, 3), - ("y".to_string(), -5.0, 0.0, 2), // -5.0 is smallest - ]); + assert_eq!( + data, + vec![ + ("y".to_string(), -5.0, 0.0, 2), + ("z".to_string(), -3.0, 0.0, 3), + ("x".to_string(), -1.0, 0.0, 1), + ] + ); } #[test] @@ -114,27 +117,30 @@ mod tests{ custom_float_sorter(&mut data); // NaN values should be at the end - assert!(data[2].1.is_nan()); - assert!(data[3].1.is_nan()); + assert!(data[2].1.is_nan() || data[3].1.is_nan()); + assert!(data[2].1.is_nan() || data[3].1.is_nan()); + } + + #[test] + fn test_infinity_handling() { + let mut data = vec![ + ("a".to_string(), f64::INFINITY, 0.0, 1), + ("b".to_string(), -3.0, 0.0, 2), + ("c".to_string(), f64::NEG_INFINITY, 0.0, 3), + ]; + custom_float_sorter(&mut data); + assert_eq!( + data, + vec![ + ("a".to_string(), f64::INFINITY, 0.0, 1), // largest magnitude + ("c".to_string(), f64::NEG_INFINITY, 0.0, 3), + ("b".to_string(), -3.0, 0.0, 2), + ] + ); } - #[test] - fn test_infinity_handling() { - let mut data = vec![ - ("a".to_string(), f64::INFINITY, 0.0, 1), - ("b".to_string(), -3.0, 0.0, 2), - ("c".to_string(), f64::NEG_INFINITY, 0.0, 3), - ]; - custom_float_sorter(&mut data); - assert_eq!(data, vec![ - ("a".to_string(), f64::INFINITY, 0.0, 1), // ∞ first - ("b".to_string(), -3.0, 0.0, 2), // Finite numbers next - ("c".to_string(), f64::NEG_INFINITY, 0.0, 3), // -∞ last - ]); - } - - #[test] - fn test_mixed_nan_infinity_values() { + #[test] + fn test_mixed_nan_infinity_values_by_magnitude() { let mut data = vec![ ("a".to_string(), f64::INFINITY, 0.0, 1), ("b".to_string(), 3.0, 0.0, 2), @@ -144,11 +150,13 @@ mod tests{ ("f".to_string(), f64::NAN, 0.0, 6), ]; custom_float_sorter(&mut data); - assert_eq!(data[0].1, f64::INFINITY); // ∞ first - assert_eq!(data[1].1, 3.0); - assert_eq!(data[2].1, -2.5); - assert_eq!(data[3].1, -f64::INFINITY); // -∞ should be before NaN + + // Expected by magnitude: ±∞ first, then 3.0, 2.5, NaN last + assert!(data[0].1.is_infinite()); + assert!(data[1].1.is_infinite()); + assert_eq!(data[2].1, 3.0); + assert_eq!(data[3].1, -2.5); assert!(data[4].1.is_nan()); assert!(data[5].1.is_nan()); - } + } } From 3f91fb1fb84255e2ec2bbfdc5a096f800de5fde6 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 13 Nov 2025 15:29:24 +0300 Subject: [PATCH 3/3] refactor: Remove reduntant tests. --- src/correlations.rs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/correlations.rs b/src/correlations.rs index c3b4202..356bfcc 100644 --- a/src/correlations.rs +++ b/src/correlations.rs @@ -347,18 +347,6 @@ mod tests { assert!(corr_results.is_ok()) } - #[test] - fn test_negative_compute() { - let compute_obj = Compute::new( - ',', - "pearson", - "tests/data/mock_negative_dataset.txt", - &[12.0, 15.0, 11.0, 16.0, 11.0, 8.0, 7.0], - "./output_negative.txt", - ); - let corr_results = compute_obj.compute(); - assert!(corr_results.is_ok()) - } #[test] fn test_parse_f64() {