From cc4a44718aecf0021994b13bb407eee964181263 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui <alexanderkabua@gmail.com>
Date: Thu, 13 Nov 2025 15:19:58 +0300
Subject: [PATCH 1/3] feat: Add unittests for checking if negative correlations
 are handled correctly.

---
 src/correlations.rs | 68 +++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 62 insertions(+), 6 deletions(-)

diff --git a/src/correlations.rs b/src/correlations.rs
index 29b9064..c3b4202 100644
--- a/src/correlations.rs
+++ b/src/correlations.rs
@@ -149,11 +149,11 @@ impl<'a> Compute<'a> {
                                 .split(self.file_delimiter)
                                 .collect::<Vec<&str>>(),
                         );
-			if ty.x_vals.len() < 4 || ty.y_vals.len() < 4 {
-			    // minimum number of acceptable trait values for
-			    // computing the correlations
-			    continue;
-			}
+                        if ty.x_vals.len() < 4 || ty.y_vals.len() < 4 {
+                            // minimum number of acceptable trait values for
+                            // computing the correlations
+                            continue;
+                        }
 
                         let (key_name, parsed_x_val, parsed_y_val) =
                             (ty.row_name, ty.x_vals, ty.y_vals);
@@ -215,6 +215,51 @@ mod tests {
         assert_approx_eq!(p_val, 1.341575855, 2f64);
     }
 
+    #[test]
+    fn test_pearson_negative_rho() {
+        let new_pearson = Pearson::new(3);
+        let (corr_coeff, p_val) =
+            new_pearson.correlate(&[1., 2., 3., 4., 5.], &[5., 4., 3., 2., 1.]);
+
+        assert_eq!(format!("{:.2}", corr_coeff), "-1.00");
+    }
+
+    #[test]
+    fn test_pearson_negative_rho_signs() {
+        // let new_pearson = Pearson::new(3);
+
+        // Each pair should produce a negative correlation
+        let test_cases = vec![
+            (vec![1., 2., 3., 4., 5.], vec![5., 4., 3., 2., 1.]), // perfect -1.0
+            (vec![1., 2., 3., 4., 5.], vec![5., 4.5, 3.5, 3.0, 2.5]), // roughly -0.9
+            (vec![1., 2., 3., 4., 5.], vec![4.5, 4.0, 3.8, 3.5, 3.0]), // small negative ~ -0.3
+        ];
+
+        for (x, y) in test_cases {
+            let new_pearson = Pearson::new(3);
+            let new_spearman = Spearman::new(3);
+            let (corr_coeff, _p_val) = new_pearson.correlate(&x, &y); // pearson correlation
+            let (s_rho, p_val) = new_spearman.correlate(&x, &y);
+            let formatted = format!("{:.2}", corr_coeff);
+            let s_formatted = format!("{:.2}", s_rho);
+
+            // Assert the sign is actually negative in the formatted string
+            assert!(
+                formatted.starts_with('-'),
+                "Expected negative correlation, got formatted value `{}` (raw {})",
+                formatted,
+                corr_coeff
+            );
+
+            assert!(
+                formatted.starts_with('-'),
+                "Expected negative correlation, got formatted value `{}` (raw {})",
+                s_formatted,
+                s_rho
+            );
+        }
+    }
+
     #[test]
     fn test_spearman_obj() {
         let new_spearman = Spearman::new(5);
@@ -302,9 +347,20 @@ mod tests {
 
         assert!(corr_results.is_ok())
     }
-
     #[test]
+    fn test_negative_compute() {
+        let compute_obj = Compute::new(
+            ',',
+            "pearson",
+            "tests/data/mock_negative_dataset.txt",
+            &[12.0, 15.0, 11.0, 16.0, 11.0, 8.0, 7.0],
+            "./output_negative.txt",
+        );
+        let corr_results = compute_obj.compute();
+        assert!(corr_results.is_ok())
+    }
 
+    #[test]
     fn test_parse_f64() {
         let data = "9. ,5. ,0. ,7. ,6. ,1. ,5. ,0.\n";
 

From a3101d15adaf7808977b310c2bd13a15c050cd24 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui <alexanderkabua@gmail.com>
Date: Thu, 13 Nov 2025 15:20:34 +0300
Subject: [PATCH 2/3] fix(correlation): sort rho values by absolute magnitude.

---
 src/sorter.rs | 138 ++++++++++++++++++++++++++------------------------
 1 file changed, 73 insertions(+), 65 deletions(-)

diff --git a/src/sorter.rs b/src/sorter.rs
index 2f9745e..63ce2b4 100644
--- a/src/sorter.rs
+++ b/src/sorter.rs
@@ -1,8 +1,8 @@
 // implementation of extern sorter with rust
 
 //https://betterprogramming.pub/how-to-sort-a-20g-file-in-rust-12abfffbd92b
-use std::fs::File;
 use std::cmp::Ordering;
+use std::fs::File;
 use std::io::{BufWriter, Write};
 //use extsort::*;
 
@@ -12,16 +12,13 @@ const BUFFER_CAPACITY: usize = 4_000_000_000;
 #[derive(Debug, PartialEq, PartialOrd)]
 struct CorrResults(String, f64, f64, i32);
 
-
 pub fn sort_write_to_file(
     filename: String,
     mut v: Vec<(String, f64, f64, i32)>,
 ) -> std::io::Result<String> {
     File::create(filename.clone())?;
 
-
-
-    custom_float_sorter(& mut v);
+    custom_float_sorter(&mut v);
 
     let mut buffer = BufWriter::with_capacity(BUFFER_CAPACITY, File::create(&filename).unwrap());
     for (name, rho, p_val, num_overlap) in v.iter() {
@@ -36,71 +33,77 @@ pub fn create_large_file(filename: &str) {
     File::create(filename).unwrap();
 }
 
-
 pub fn custom_float_sorter(v: &mut [(String, f64, f64, i32)]) {
     v.sort_by(|a, b| {
-        match (a.1.is_nan(), b.1.is_nan()) {
-            (true, true) => Ordering::Equal,   // Keep relative NaN order
-            (true, false) => Ordering::Greater, // NaN should be last
-            (false, true) => Ordering::Less,
-            (false, false) => match (a.1, b.1) {
-                (x, y) if x.is_infinite() && y.is_infinite() => y.partial_cmp(&x).unwrap(),
-                (x, _) if x == f64::INFINITY => Ordering::Less, // ∞ should be first
-                (_, x) if x == f64::INFINITY => Ordering::Greater,
-                (x, _) if x == f64::NEG_INFINITY => Ordering::Greater, // -∞ should be last
-                (_, x) if x == f64::NEG_INFINITY => Ordering::Less,
-                _ => b.1.partial_cmp(&a.1).unwrap(), // Proper decreasing order
-            },
+        let x = a.1;
+        let y = b.1;
+
+        // NaNs always last
+        if x.is_nan() && y.is_nan() {
+            return Ordering::Equal;
         }
+        if x.is_nan() {
+            return Ordering::Greater;
+        }
+        if y.is_nan() {
+            return Ordering::Less;
+        }
+
+        // Sort by magnitude (|ρ|) descending, including ±∞
+        y.abs().partial_cmp(&x.abs()).unwrap()
     });
 }
 
-
-
-pub fn float_sorter(unsorted:& mut [f64]){
+pub fn float_sorter(unsorted: &mut [f64]) {
     //only  for test case
     // custom function to sort floats with nan and inf descending order
-    unsorted.sort_by(|&a, &b| {
-        match (a.is_nan()| a.is_infinite(), b.is_nan()|b.is_infinite()) {
+    unsorted.sort_by(
+        |&a, &b| match (a.is_nan() | a.is_infinite(), b.is_nan() | b.is_infinite()) {
             (true, true) => Ordering::Equal,
             (true, false) => Ordering::Greater,
             (false, true) => Ordering::Less,
             (false, false) => b.abs().partial_cmp(&a.abs()).unwrap(),
-        }
-    });
+        },
+    );
 }
 
+mod tests {
+    use super::custom_float_sorter;
 
-mod tests{
-   use super::custom_float_sorter;
-   #[test]
-    fn test_basic_sorting_desc() {
+    #[test]
+    fn test_basic_sorting_by_magnitude() {
         let mut data = vec![
             ("a".to_string(), 3.0, 0.0, 1),
             ("b".to_string(), -4.5, 0.0, 2),
             ("c".to_string(), 1.2, 0.0, 3),
         ];
         custom_float_sorter(&mut data);
-        assert_eq!(data, vec![
-            ("a".to_string(), 3.0, 0.0, 1),  // Largest number first
-            ("c".to_string(), 1.2, 0.0, 3),
-            ("b".to_string(), -4.5, 0.0, 2), // Smallest number last
-        ]);
+        assert_eq!(
+            data,
+            vec![
+                ("b".to_string(), -4.5, 0.0, 2),
+                ("a".to_string(), 3.0, 0.0, 1),
+                ("c".to_string(), 1.2, 0.0, 3),
+            ]
+        );
     }
 
-  #[test]
-    fn test_negative_numbers_desc() {
+    #[test]
+    fn test_negative_numbers_by_magnitude() {
         let mut data = vec![
             ("x".to_string(), -1.0, 0.0, 1),
             ("y".to_string(), -5.0, 0.0, 2),
             ("z".to_string(), -3.0, 0.0, 3),
         ];
         custom_float_sorter(&mut data);
-        assert_eq!(data, vec![
-            ("x".to_string(), -1.0, 0.0, 1), // -1.0 is largest
-            ("z".to_string(), -3.0, 0.0, 3),
-            ("y".to_string(), -5.0, 0.0, 2), // -5.0 is smallest
-        ]);
+        assert_eq!(
+            data,
+            vec![
+                ("y".to_string(), -5.0, 0.0, 2),
+                ("z".to_string(), -3.0, 0.0, 3),
+                ("x".to_string(), -1.0, 0.0, 1),
+            ]
+        );
     }
 
     #[test]
@@ -114,27 +117,30 @@ mod tests{
         custom_float_sorter(&mut data);
 
         // NaN values should be at the end
-        assert!(data[2].1.is_nan());
-        assert!(data[3].1.is_nan());
+        assert!(data[2].1.is_nan() || data[3].1.is_nan());
+        assert!(data[2].1.is_nan() || data[3].1.is_nan());
+    }
+
+    #[test]
+    fn test_infinity_handling() {
+        let mut data = vec![
+            ("a".to_string(), f64::INFINITY, 0.0, 1),
+            ("b".to_string(), -3.0, 0.0, 2),
+            ("c".to_string(), f64::NEG_INFINITY, 0.0, 3),
+        ];
+        custom_float_sorter(&mut data);
+        assert_eq!(
+            data,
+            vec![
+                ("a".to_string(), f64::INFINITY, 0.0, 1), // largest magnitude
+                ("c".to_string(), f64::NEG_INFINITY, 0.0, 3),
+                ("b".to_string(), -3.0, 0.0, 2),
+            ]
+        );
     }
 
-  #[test]
-  fn test_infinity_handling() {
-    let mut data = vec![
-        ("a".to_string(), f64::INFINITY, 0.0, 1),
-        ("b".to_string(), -3.0, 0.0, 2),
-        ("c".to_string(), f64::NEG_INFINITY, 0.0, 3),
-    ];
-    custom_float_sorter(&mut data);
-    assert_eq!(data, vec![
-        ("a".to_string(), f64::INFINITY, 0.0, 1),  // ∞ first
-        ("b".to_string(), -3.0, 0.0, 2),           // Finite numbers next
-        ("c".to_string(), f64::NEG_INFINITY, 0.0, 3), // -∞ last
-    ]);
-  }
-
-  #[test]
-    fn test_mixed_nan_infinity_values() {
+    #[test]
+    fn test_mixed_nan_infinity_values_by_magnitude() {
         let mut data = vec![
             ("a".to_string(), f64::INFINITY, 0.0, 1),
             ("b".to_string(), 3.0, 0.0, 2),
@@ -144,11 +150,13 @@ mod tests{
             ("f".to_string(), f64::NAN, 0.0, 6),
         ];
         custom_float_sorter(&mut data);
-        assert_eq!(data[0].1, f64::INFINITY);  // ∞ first
-        assert_eq!(data[1].1, 3.0);
-        assert_eq!(data[2].1, -2.5);
-        assert_eq!(data[3].1, -f64::INFINITY); // -∞ should be before NaN
+
+        // Expected by magnitude: ±∞ first, then 3.0, 2.5, NaN last
+        assert!(data[0].1.is_infinite());
+        assert!(data[1].1.is_infinite());
+        assert_eq!(data[2].1, 3.0);
+        assert_eq!(data[3].1, -2.5);
         assert!(data[4].1.is_nan());
         assert!(data[5].1.is_nan());
-    }   
+    }
 }

From 3f91fb1fb84255e2ec2bbfdc5a096f800de5fde6 Mon Sep 17 00:00:00 2001
From: Alexander_Kabui <alexanderkabua@gmail.com>
Date: Thu, 13 Nov 2025 15:29:24 +0300
Subject: [PATCH 3/3] refactor: Remove reduntant tests.

---
 src/correlations.rs | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/src/correlations.rs b/src/correlations.rs
index c3b4202..356bfcc 100644
--- a/src/correlations.rs
+++ b/src/correlations.rs
@@ -347,18 +347,6 @@ mod tests {
 
         assert!(corr_results.is_ok())
     }
-    #[test]
-    fn test_negative_compute() {
-        let compute_obj = Compute::new(
-            ',',
-            "pearson",
-            "tests/data/mock_negative_dataset.txt",
-            &[12.0, 15.0, 11.0, 16.0, 11.0, 8.0, 7.0],
-            "./output_negative.txt",
-        );
-        let corr_results = compute_obj.compute();
-        assert!(corr_results.is_ok())
-    }
 
     #[test]
     fn test_parse_f64() {