Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 50 additions & 6 deletions src/correlations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,11 +149,11 @@ impl<'a> Compute<'a> {
.split(self.file_delimiter)
.collect::<Vec<&str>>(),
);
if ty.x_vals.len() < 4 || ty.y_vals.len() < 4 {
// minimum number of acceptable trait values for
// computing the correlations
continue;
}
if ty.x_vals.len() < 4 || ty.y_vals.len() < 4 {
// minimum number of acceptable trait values for
// computing the correlations
continue;
}

let (key_name, parsed_x_val, parsed_y_val) =
(ty.row_name, ty.x_vals, ty.y_vals);
Expand Down Expand Up @@ -215,6 +215,51 @@ mod tests {
assert_approx_eq!(p_val, 1.341575855, 2f64);
}

#[test]
fn test_pearson_negative_rho() {
let new_pearson = Pearson::new(3);
let (corr_coeff, p_val) =
new_pearson.correlate(&[1., 2., 3., 4., 5.], &[5., 4., 3., 2., 1.]);

assert_eq!(format!("{:.2}", corr_coeff), "-1.00");
}

#[test]
fn test_pearson_negative_rho_signs() {
// let new_pearson = Pearson::new(3);

// Each pair should produce a negative correlation
let test_cases = vec![
(vec![1., 2., 3., 4., 5.], vec![5., 4., 3., 2., 1.]), // perfect -1.0
(vec![1., 2., 3., 4., 5.], vec![5., 4.5, 3.5, 3.0, 2.5]), // roughly -0.9
(vec![1., 2., 3., 4., 5.], vec![4.5, 4.0, 3.8, 3.5, 3.0]), // small negative ~ -0.3
];

for (x, y) in test_cases {
let new_pearson = Pearson::new(3);
let new_spearman = Spearman::new(3);
let (corr_coeff, _p_val) = new_pearson.correlate(&x, &y); // pearson correlation
let (s_rho, p_val) = new_spearman.correlate(&x, &y);
let formatted = format!("{:.2}", corr_coeff);
let s_formatted = format!("{:.2}", s_rho);

// Assert the sign is actually negative in the formatted string
assert!(
formatted.starts_with('-'),
"Expected negative correlation, got formatted value `{}` (raw {})",
formatted,
corr_coeff
);

assert!(
formatted.starts_with('-'),
"Expected negative correlation, got formatted value `{}` (raw {})",
s_formatted,
s_rho
);
}
}

#[test]
fn test_spearman_obj() {
let new_spearman = Spearman::new(5);
Expand Down Expand Up @@ -304,7 +349,6 @@ mod tests {
}

#[test]

fn test_parse_f64() {
let data = "9. ,5. ,0. ,7. ,6. ,1. ,5. ,0.\n";

Expand Down
138 changes: 73 additions & 65 deletions src/sorter.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// implementation of extern sorter with rust

//https://betterprogramming.pub/how-to-sort-a-20g-file-in-rust-12abfffbd92b
use std::fs::File;
use std::cmp::Ordering;
use std::fs::File;
use std::io::{BufWriter, Write};
//use extsort::*;

Expand All @@ -12,16 +12,13 @@ const BUFFER_CAPACITY: usize = 4_000_000_000;
#[derive(Debug, PartialEq, PartialOrd)]
struct CorrResults(String, f64, f64, i32);


pub fn sort_write_to_file(
filename: String,
mut v: Vec<(String, f64, f64, i32)>,
) -> std::io::Result<String> {
File::create(filename.clone())?;



custom_float_sorter(& mut v);
custom_float_sorter(&mut v);

let mut buffer = BufWriter::with_capacity(BUFFER_CAPACITY, File::create(&filename).unwrap());
for (name, rho, p_val, num_overlap) in v.iter() {
Expand All @@ -36,71 +33,77 @@ pub fn create_large_file(filename: &str) {
File::create(filename).unwrap();
}


pub fn custom_float_sorter(v: &mut [(String, f64, f64, i32)]) {
v.sort_by(|a, b| {
match (a.1.is_nan(), b.1.is_nan()) {
(true, true) => Ordering::Equal, // Keep relative NaN order
(true, false) => Ordering::Greater, // NaN should be last
(false, true) => Ordering::Less,
(false, false) => match (a.1, b.1) {
(x, y) if x.is_infinite() && y.is_infinite() => y.partial_cmp(&x).unwrap(),
(x, _) if x == f64::INFINITY => Ordering::Less, // ∞ should be first
(_, x) if x == f64::INFINITY => Ordering::Greater,
(x, _) if x == f64::NEG_INFINITY => Ordering::Greater, // -∞ should be last
(_, x) if x == f64::NEG_INFINITY => Ordering::Less,
_ => b.1.partial_cmp(&a.1).unwrap(), // Proper decreasing order
},
let x = a.1;
let y = b.1;

// NaNs always last
if x.is_nan() && y.is_nan() {
return Ordering::Equal;
}
if x.is_nan() {
return Ordering::Greater;
}
if y.is_nan() {
return Ordering::Less;
}

// Sort by magnitude (|ρ|) descending, including ±∞
y.abs().partial_cmp(&x.abs()).unwrap()
});
}



pub fn float_sorter(unsorted:& mut [f64]){
pub fn float_sorter(unsorted: &mut [f64]) {
//only for test case
// custom function to sort floats with nan and inf descending order
unsorted.sort_by(|&a, &b| {
match (a.is_nan()| a.is_infinite(), b.is_nan()|b.is_infinite()) {
unsorted.sort_by(
|&a, &b| match (a.is_nan() | a.is_infinite(), b.is_nan() | b.is_infinite()) {
(true, true) => Ordering::Equal,
(true, false) => Ordering::Greater,
(false, true) => Ordering::Less,
(false, false) => b.abs().partial_cmp(&a.abs()).unwrap(),
}
});
},
);
}

mod tests {
use super::custom_float_sorter;

mod tests{
use super::custom_float_sorter;
#[test]
fn test_basic_sorting_desc() {
#[test]
fn test_basic_sorting_by_magnitude() {
let mut data = vec![
("a".to_string(), 3.0, 0.0, 1),
("b".to_string(), -4.5, 0.0, 2),
("c".to_string(), 1.2, 0.0, 3),
];
custom_float_sorter(&mut data);
assert_eq!(data, vec![
("a".to_string(), 3.0, 0.0, 1), // Largest number first
("c".to_string(), 1.2, 0.0, 3),
("b".to_string(), -4.5, 0.0, 2), // Smallest number last
]);
assert_eq!(
data,
vec![
("b".to_string(), -4.5, 0.0, 2),
("a".to_string(), 3.0, 0.0, 1),
("c".to_string(), 1.2, 0.0, 3),
]
);
}

#[test]
fn test_negative_numbers_desc() {
#[test]
fn test_negative_numbers_by_magnitude() {
let mut data = vec![
("x".to_string(), -1.0, 0.0, 1),
("y".to_string(), -5.0, 0.0, 2),
("z".to_string(), -3.0, 0.0, 3),
];
custom_float_sorter(&mut data);
assert_eq!(data, vec![
("x".to_string(), -1.0, 0.0, 1), // -1.0 is largest
("z".to_string(), -3.0, 0.0, 3),
("y".to_string(), -5.0, 0.0, 2), // -5.0 is smallest
]);
assert_eq!(
data,
vec![
("y".to_string(), -5.0, 0.0, 2),
("z".to_string(), -3.0, 0.0, 3),
("x".to_string(), -1.0, 0.0, 1),
]
);
}

#[test]
Expand All @@ -114,27 +117,30 @@ mod tests{
custom_float_sorter(&mut data);

// NaN values should be at the end
assert!(data[2].1.is_nan());
assert!(data[3].1.is_nan());
assert!(data[2].1.is_nan() || data[3].1.is_nan());
assert!(data[2].1.is_nan() || data[3].1.is_nan());
}

#[test]
fn test_infinity_handling() {
let mut data = vec![
("a".to_string(), f64::INFINITY, 0.0, 1),
("b".to_string(), -3.0, 0.0, 2),
("c".to_string(), f64::NEG_INFINITY, 0.0, 3),
];
custom_float_sorter(&mut data);
assert_eq!(
data,
vec![
("a".to_string(), f64::INFINITY, 0.0, 1), // largest magnitude
("c".to_string(), f64::NEG_INFINITY, 0.0, 3),
("b".to_string(), -3.0, 0.0, 2),
]
);
}

#[test]
fn test_infinity_handling() {
let mut data = vec![
("a".to_string(), f64::INFINITY, 0.0, 1),
("b".to_string(), -3.0, 0.0, 2),
("c".to_string(), f64::NEG_INFINITY, 0.0, 3),
];
custom_float_sorter(&mut data);
assert_eq!(data, vec![
("a".to_string(), f64::INFINITY, 0.0, 1), // ∞ first
("b".to_string(), -3.0, 0.0, 2), // Finite numbers next
("c".to_string(), f64::NEG_INFINITY, 0.0, 3), // -∞ last
]);
}

#[test]
fn test_mixed_nan_infinity_values() {
#[test]
fn test_mixed_nan_infinity_values_by_magnitude() {
let mut data = vec![
("a".to_string(), f64::INFINITY, 0.0, 1),
("b".to_string(), 3.0, 0.0, 2),
Expand All @@ -144,11 +150,13 @@ mod tests{
("f".to_string(), f64::NAN, 0.0, 6),
];
custom_float_sorter(&mut data);
assert_eq!(data[0].1, f64::INFINITY); // ∞ first
assert_eq!(data[1].1, 3.0);
assert_eq!(data[2].1, -2.5);
assert_eq!(data[3].1, -f64::INFINITY); // -∞ should be before NaN

// Expected by magnitude: ±∞ first, then 3.0, 2.5, NaN last
assert!(data[0].1.is_infinite());
assert!(data[1].1.is_infinite());
assert_eq!(data[2].1, 3.0);
assert_eq!(data[3].1, -2.5);
assert!(data[4].1.is_nan());
assert!(data[5].1.is_nan());
}
}
}
Loading