From acceb1d7865fd298635602c95cabc2a291b4e677 Mon Sep 17 00:00:00 2001 From: Alex Knudson Date: Sat, 20 Mar 2021 13:01:16 -0700 Subject: [PATCH] fixed corkendall(RealMatrix, RealVector) corkendall now returns a matrix with one column when operating on a Matrix-Vector input, the same as cor and corspearman. The tests have been updated to reflect the change. --- src/rankcorr.jl | 15 +++++++------ test/rankcorr.jl | 58 ++++++++++++++++++++++-------------------------- 2 files changed, 35 insertions(+), 38 deletions(-) diff --git a/src/rankcorr.jl b/src/rankcorr.jl index 714548d5d..0216e9e6b 100644 --- a/src/rankcorr.jl +++ b/src/rankcorr.jl @@ -143,8 +143,8 @@ function corkendall!(x::RealVector, y::RealVector, permx::AbstractVector{<:Integ if x[i - 1] == x[i] k += 1 elseif k > 0 - # Sort the corresponding chunk of y, so the rows of hcat(x,y) are - # sorted first on x, then (where x values are tied) on y. Hence + # Sort the corresponding chunk of y, so the rows of hcat(x,y) are + # sorted first on x, then (where x values are tied) on y. Hence # double ties can be counted by calling countties. sort!(view(y, (i - k - 1):(i - 1))) ntiesx += div(widen(k) * (k + 1), 2) # Must use wide integers here @@ -176,8 +176,9 @@ matrices or vectors. corkendall(x::RealVector, y::RealVector) = corkendall!(copy(x), copy(y)) function corkendall(X::RealMatrix, y::RealVector) + n = size(X, 2) permy = sortperm(y) - return([corkendall!(copy(y), X[:,i], permy) for i in 1:size(X, 2)]) + return(reshape([corkendall!(copy(y), X[:,i], permy) for i in 1:n], n, 1)) end function corkendall(x::RealVector, Y::RealMatrix) @@ -217,7 +218,7 @@ end """ countties(x::RealVector, lo::Integer, hi::Integer) -Return the number of ties within `x[lo:hi]`. Assumes `x` is sorted. +Return the number of ties within `x[lo:hi]`. Assumes `x` is sorted. """ function countties(x::AbstractVector, lo::Integer, hi::Integer) # Use of widen below prevents possible overflow errors when @@ -246,9 +247,9 @@ const SMALL_THRESHOLD = 64 # merge_sort! copied from Julia Base # (commit 28330a2fef4d9d149ba0fd3ffa06347b50067647, dated 20 Sep 2020) """ - merge_sort!(v::AbstractVector, lo::Integer, hi::Integer, t::AbstractVector=similar(v, 0)) + merge_sort!(v::AbstractVector, lo::Integer, hi::Integer, t::AbstractVector=similar(v, 0)) -Mutates `v` by sorting elements `x[lo:hi]` using the merge sort algorithm. +Mutates `v` by sorting elements `x[lo:hi]` using the merge sort algorithm. This method is a copy-paste-edit of sort! in base/sort.jl, amended to return the bubblesort distance. """ function merge_sort!(v::AbstractVector, lo::Integer, hi::Integer, t::AbstractVector=similar(v, 0)) @@ -300,7 +301,7 @@ midpoint(lo::Integer, hi::Integer) = midpoint(promote(lo, hi)...) """ insertion_sort!(v::AbstractVector, lo::Integer, hi::Integer) -Mutates `v` by sorting elements `x[lo:hi]` using the insertion sort algorithm. +Mutates `v` by sorting elements `x[lo:hi]` using the insertion sort algorithm. This method is a copy-paste-edit of sort! in base/sort.jl, amended to return the bubblesort distance. """ function insertion_sort!(v::AbstractVector, lo::Integer, hi::Integer) diff --git a/test/rankcorr.jl b/test/rankcorr.jl index 93b64449b..30a771aee 100644 --- a/test/rankcorr.jl +++ b/test/rankcorr.jl @@ -35,14 +35,14 @@ c22 = corspearman(x2, x2) @test isnan(corkendall([1,1,1], [1,2,3])) @test corkendall([-Inf,-0.0,Inf],[1,2,3]) == 1.0 -# Test, with exact equality, some known results. +# Test, with exact equality, some known results. # RealVector, RealVector @test corkendall(x1, y) == -1/sqrt(90) @test corkendall(x2, y) == -1/sqrt(72) # RealMatrix, RealVector -@test corkendall(X, y) == [-1/sqrt(90), -1/sqrt(72)] +@test corkendall(X, y) == reshape([-1/sqrt(90), -1/sqrt(72)], 2, 1) # RealVector, RealMatrix -@test corkendall(y, X) == [-1/sqrt(90) -1/sqrt(72)] +@test corkendall(y, X) == [-1/sqrt(90) -1/sqrt(72)] # n = 78_000 tests for overflow errors on 32 bit # Testing for overflow errors on 64bit would require n be too large for practicality @@ -94,19 +94,19 @@ z = [1 1 1; @test corkendall(z) == [1 0 1/3; 0 1 0; 1/3 0 1] @test corkendall(z, z) == [1 0 1/3; 0 1 0; 1/3 0 1] @test corkendall(z[:,1], z) == [1 0 1/3] -@test corkendall(z, z[:,1]) == [1; 0; 1/3] +@test corkendall(z, z[:,1]) == reshape([1; 0; 1/3], 3, 1) z = float(z) @test corkendall(z) == [1 0 1/3; 0 1 0; 1/3 0 1] @test corkendall(z, z) == [1 0 1/3; 0 1 0; 1/3 0 1] @test corkendall(z[:,1], z) == [1 0 1/3] -@test corkendall(z, z[:,1]) == [1; 0; 1/3] +@test corkendall(z, z[:,1]) == reshape([1; 0; 1/3], 3, 1) w = repeat(z, n) @test corkendall(w) == [1 0 1/3; 0 1 0; 1/3 0 1] @test corkendall(w, w) == [1 0 1/3; 0 1 0; 1/3 0 1] @test corkendall(w[:,1], w) == [1 0 1/3] -@test corkendall(w, w[:,1]) == [1; 0; 1/3] +@test corkendall(w, w[:,1]) == reshape([1; 0; 1/3], 3, 1) StatsBase.midpoint(1,10) == 5 StatsBase.midpoint(1,widen(10)) == 5 @@ -120,30 +120,26 @@ Ynan = copy(Y) Ynan[2,1] = NaN for f in (corspearman, corkendall) - @test isnan(f([1.0, NaN, 2.0], [2.0, 1.0, 3.4])) - @test all(isnan, f([1.0, NaN], [1 2; 3 4])) - @test all(isnan, f([1 2; 3 4], [1.0, NaN])) - @test isequal(f([1 NaN; NaN 4]), [1 NaN; NaN 1]) - @test all(isnan, f([1 NaN; NaN 4], [1 NaN; NaN 4])) - @test all(isnan, f([1 NaN; NaN 4], [NaN 1; NaN 4])) - - @test isequal(f(Xnan, Ynan), - [f(Xnan[:,i], Ynan[:,j]) for i in axes(Xnan, 2), j in axes(Ynan, 2)]) - @test isequal(f(Xnan), - [i == j ? 1.0 : f(Xnan[:,i], Xnan[:,j]) - for i in axes(Xnan, 2), j in axes(Xnan, 2)]) - for k in 1:2 - @test isequal(f(Xnan[:,k], Ynan), - [f(Xnan[:,k], Ynan[:,j]) for i in 1:1, j in axes(Ynan, 2)]) - # TODO: fix corkendall (PR#659) - if f === corspearman - @test isequal(f(Xnan, Ynan[:,k]), - [f(Xnan[:,i], Ynan[:,k]) for i in axes(Xnan, 2), j in 1:1]) - else - @test isequal(f(Xnan, Ynan[:,k]), - [f(Xnan[:,i], Ynan[:,k]) for i in axes(Xnan, 2)]) - end - end + @test isnan(f([1.0, NaN, 2.0], [2.0, 1.0, 3.4])) + @test all(isnan, f([1.0, NaN], [1 2; 3 4])) + @test all(isnan, f([1 2; 3 4], [1.0, NaN])) + @test isequal(f([1 NaN; NaN 4]), [1 NaN; NaN 1]) + @test all(isnan, f([1 NaN; NaN 4], [1 NaN; NaN 4])) + @test all(isnan, f([1 NaN; NaN 4], [NaN 1; NaN 4])) + + @test isequal(f(Xnan, Ynan), + [f(Xnan[:,i], Ynan[:,j]) for i in axes(Xnan, 2), j in axes(Ynan, 2)]) + @test isequal(f(Xnan), + [i == j ? 1.0 : f(Xnan[:,i], Xnan[:,j]) + for i in axes(Xnan, 2), j in axes(Xnan, 2)]) + for k in 1:2 + # RealVector, RealMatrix + @test isequal(f(Xnan[:,k], Ynan), + [f(Xnan[:,k], Ynan[:,j]) for i in 1:1, j in axes(Ynan, 2)]) + # RealMatrix, RealVector + @test isequal(f(Xnan, Ynan[:,k]), + [f(Xnan[:,i], Ynan[:,k]) for i in axes(Xnan, 2), j in 1:1]) + end end @@ -158,4 +154,4 @@ end @test_throws ErrorException corkendall([1], [1, 2]) @test_throws ErrorException corkendall([1], [1 2; 3 4]) @test_throws ErrorException corkendall([1 2; 3 4], [1]) -@test_throws ArgumentError corkendall([1 2; 3 4: 4 6], [1 2; 3 4]) \ No newline at end of file +@test_throws ArgumentError corkendall([1 2; 3 4: 4 6], [1 2; 3 4])