diff --git a/.github/workflows/Format.yml b/.github/workflows/Format.yml new file mode 100644 index 000000000..16ebc9e48 --- /dev/null +++ b/.github/workflows/Format.yml @@ -0,0 +1,22 @@ +name: Check formatting +on: + push: + branches: + - 'master' + - 'release-' + tags: + - '*' + pull_request: +jobs: + runic: + name: Runic + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - uses: julia-actions/setup-julia@v2 + with: + version: '1' + - uses: julia-actions/cache@v2 + - uses: fredrikekre/runic-action@v1 + with: + version: '1' diff --git a/README.md b/README.md index 5ac0ca041..59d2bff7a 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,10 @@ *StatsBase.jl* is a Julia package that provides basic support for statistics. Particularly, it implements a variety of statistics-related functions, such as scalar statistics, high-order moment computation, counting, ranking, covariances, sampling, and empirical density estimation. -- **Build & Testing Status:** +- **Build, Testing Status & Code Style:** [![CI](https://github.com/JuliaStats/StatsBase.jl/actions/workflows/ci.yml/badge.svg)](https://github.com/JuliaStats/StatsBase.jl/actions/workflows/ci.yml) [![codecov](https://codecov.io/github/JuliaStats/StatsBase.jl/graph/badge.svg?token=XhM6RcXdrB)](https://codecov.io/github/JuliaStats/StatsBase.jl) + [![code style: runic](https://img.shields.io/badge/code_style-%E1%9A%B1%E1%9A%A2%E1%9A%BE%E1%9B%81%E1%9A%B2-black)](https://github.com/fredrikekre/Runic.jl) - **Documentation**: [![][docs-stable-img]][docs-stable-url] [![][docs-latest-img]][docs-latest-url] diff --git a/docs/make.jl b/docs/make.jl index a737a3de4..d0ca21538 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -11,22 +11,24 @@ makedocs( sitename = "StatsBase.jl", modules = [StatsBase, StatsAPI], format = Documenter.HTML(assets = ["assets/favicon.ico"]), - pages = ["index.md", - "weights.md", - "scalarstats.md", - "robust.md", - "deviation.md", - "cov.md", - "counts.md", - "ranking.md", - "sampling.md", - "empirical.md", - "signalcorr.md", - "multivariate.md", - "misc.md", - "statmodels.md", - "transformations.md"], - checkdocs=:exports + pages = [ + "index.md", + "weights.md", + "scalarstats.md", + "robust.md", + "deviation.md", + "cov.md", + "counts.md", + "ranking.md", + "sampling.md", + "empirical.md", + "signalcorr.md", + "multivariate.md", + "misc.md", + "statmodels.md", + "transformations.md", + ], + checkdocs = :exports ) deploydocs( diff --git a/perf/sampling.jl b/perf/sampling.jl index dc65ff7ee..9b1281796 100644 --- a/perf/sampling.jl +++ b/perf/sampling.jl @@ -22,10 +22,10 @@ mutable struct Xmultinom <: WithRep end tsample!(s::Xmultinom, a, x) = xmultinom_sample!(a, x) mutable struct Sample_WRep <: WithRep end -tsample!(s::Sample_WRep, a, x) = sample!(a, x; replace=true, ordered=false) +tsample!(s::Sample_WRep, a, x) = sample!(a, x; replace = true, ordered = false) mutable struct Sample_WRep_Ord <: WithRep end -tsample!(s::Sample_WRep_Ord, a, x) = sample!(a, x; replace=true, ordered=true) +tsample!(s::Sample_WRep_Ord, a, x) = sample!(a, x; replace = true, ordered = true) mutable struct Knuths <: NoRep end tsample!(s::Knuths, a, x) = knuths_sample!(a, x) @@ -46,22 +46,22 @@ mutable struct Seq_D <: NoRep end tsample!(s::Seq_D, a, x) = seqsample_d!(a, x) mutable struct Sample_NoRep <: NoRep end -tsample!(s::Sample_NoRep, a, x) = sample!(a, x; replace=false, ordered=false) +tsample!(s::Sample_NoRep, a, x) = sample!(a, x; replace = false, ordered = false) mutable struct Sample_NoRep_Ord <: NoRep end -tsample!(s::Sample_NoRep_Ord, a, x) = sample!(a, x; replace=false, ordered=true) +tsample!(s::Sample_NoRep_Ord, a, x) = sample!(a, x; replace = false, ordered = true) # config is in the form of (n, k) Base.string(p::SampleProc{Alg}) where {Alg} = lowercase(string(Alg)) -Base.length(p::SampleProc, cfg::Tuple{Int,Int}) = cfg[2] -Base.isvalid(p::SampleProc{<:WithRep}, cfg::Tuple{Int,Int}) = ((n, k) = cfg; n >= 1 && k >= 1) -Base.isvalid(p::SampleProc{<:NoRep}, cfg::Tuple{Int,Int}) = ((n, k) = cfg; n >= k >= 1) +Base.length(p::SampleProc, cfg::Tuple{Int, Int}) = cfg[2] +Base.isvalid(p::SampleProc{<:WithRep}, cfg::Tuple{Int, Int}) = ((n, k) = cfg; n >= 1 && k >= 1) +Base.isvalid(p::SampleProc{<:NoRep}, cfg::Tuple{Int, Int}) = ((n, k) = cfg; n >= k >= 1) -Base.start(p::SampleProc, cfg::Tuple{Int,Int}) = Vector{Int}(cfg[2]) -Base.run(p::SampleProc{Alg}, cfg::Tuple{Int,Int}, s::Vector{Int}) where {Alg} = tsample!(Alg(), 1:cfg[1], s) +Base.start(p::SampleProc, cfg::Tuple{Int, Int}) = Vector{Int}(cfg[2]) +Base.run(p::SampleProc{Alg}, cfg::Tuple{Int, Int}, s::Vector{Int}) where {Alg} = tsample!(Alg(), 1:cfg[1], s) Base.done(p::SampleProc, cfg, s) = nothing @@ -72,26 +72,30 @@ const ks = 2 .^ [1:16] ## with replacement -const procs1 = Proc[ SampleProc{Direct}(), - SampleProc{Sample_WRep}(), - SampleProc{Xmultinom}(), - SampleProc{Sample_WRep_Ord}() ] +const procs1 = Proc[ + SampleProc{Direct}(), + SampleProc{Sample_WRep}(), + SampleProc{Xmultinom}(), + SampleProc{Sample_WRep_Ord}(), +] const cfgs1 = vec([(n, k) for k in ks, n in ns]) -rtable1 = run(procs1, cfgs1; duration=0.2) +rtable1 = run(procs1, cfgs1; duration = 0.2) println() ## without replacement -const procs2 = Proc[ SampleProc{Knuths}(), - SampleProc{Fisher_Yates}(), - SampleProc{Self_Avoid}(), - SampleProc{Sample_NoRep}(), - SampleProc{Seq_A}(), - SampleProc{Seq_C}(), - SampleProc{Seq_D}(), - SampleProc{Sample_NoRep_Ord}() ] +const procs2 = Proc[ + SampleProc{Knuths}(), + SampleProc{Fisher_Yates}(), + SampleProc{Self_Avoid}(), + SampleProc{Sample_NoRep}(), + SampleProc{Seq_A}(), + SampleProc{Seq_C}(), + SampleProc{Seq_D}(), + SampleProc{Sample_NoRep_Ord}(), +] const cfgs2 = (Int, Int)[] for n in 5 * (2 .^ [0:11]), k in 2 .^ [1:16] @@ -100,17 +104,17 @@ for n in 5 * (2 .^ [0:11]), k in 2 .^ [1:16] end end -rtable2 = run(procs2, cfgs2; duration=0.2) +rtable2 = run(procs2, cfgs2; duration = 0.2) println() ## show results println("Sampling With Replacement") println("===================================") -show(rtable1; unit=:mps, cfghead="(n, k)") +show(rtable1; unit = :mps, cfghead = "(n, k)") println() println("Sampling Without Replacement") println("===================================") -show(rtable2; unit=:mps, cfghead="(n, k)") +show(rtable2; unit = :mps, cfghead = "(n, k)") println() diff --git a/perf/wsampling.jl b/perf/wsampling.jl index 30d665719..ed0d9f2da 100644 --- a/perf/wsampling.jl +++ b/perf/wsampling.jl @@ -28,28 +28,28 @@ mutable struct Direct_S <: WithRep end tsample!(s::Direct_S, wv, x) = sort!(direct_sample!(1:length(wv), wv, x)) mutable struct Sample_WRep <: WithRep end -tsample!(s::Sample_WRep, wv, x) = sample!(1:length(wv), wv, x; ordered=false) +tsample!(s::Sample_WRep, wv, x) = sample!(1:length(wv), wv, x; ordered = false) mutable struct Sample_WRep_Ord <: WithRep end -tsample!(s::Sample_WRep_Ord, wv, x) = sample!(1:length(wv), wv, x; ordered=true) +tsample!(s::Sample_WRep_Ord, wv, x) = sample!(1:length(wv), wv, x; ordered = true) # config is in the form of (n, k) Base.string(p::WSampleProc{Alg}) where {Alg} = lowercase(string(Alg)) -Base.length(p::WSampleProc, cfg::Tuple{Int,Int}) = cfg[2] -Base.isvalid(p::WSampleProc{<:WithRep}, cfg::Tuple{Int,Int}) = ((n, k) = cfg; n >= 1 && k >= 1) -Base.isvalid(p::WSampleProc{<:NoRep}, cfg::Tuple{Int,Int}) = ((n, k) = cfg; n >= k >= 1) +Base.length(p::WSampleProc, cfg::Tuple{Int, Int}) = cfg[2] +Base.isvalid(p::WSampleProc{<:WithRep}, cfg::Tuple{Int, Int}) = ((n, k) = cfg; n >= 1 && k >= 1) +Base.isvalid(p::WSampleProc{<:NoRep}, cfg::Tuple{Int, Int}) = ((n, k) = cfg; n >= k >= 1) -function Base.start(p::WSampleProc, cfg::Tuple{Int,Int}) +function Base.start(p::WSampleProc, cfg::Tuple{Int, Int}) n, k = cfg x = Vector{Int}(k) - w = weights(fill(1.0/n, n)) + w = weights(fill(1.0 / n, n)) return (w, x) end -Base.run(p::WSampleProc{Alg}, cfg::Tuple{Int,Int}, s) where {Alg} = tsample!(Alg(), s[1], s[2]) +Base.run(p::WSampleProc{Alg}, cfg::Tuple{Int, Int}, s) where {Alg} = tsample!(Alg(), s[1], s[2]) Base.done(p::WSampleProc, cfg, s) = nothing @@ -60,17 +60,19 @@ const ks = 2 .^ [1:16] ## with replacement -const procs1 = Proc[ WSampleProc{Direct}(), - WSampleProc{Alias}(), - WSampleProc{Xmultinom_S}(), - WSampleProc{Sample_WRep}(), - WSampleProc{Xmultinom}(), - WSampleProc{Direct_S}(), - WSampleProc{Sample_WRep_Ord}() ] +const procs1 = Proc[ + WSampleProc{Direct}(), + WSampleProc{Alias}(), + WSampleProc{Xmultinom_S}(), + WSampleProc{Sample_WRep}(), + WSampleProc{Xmultinom}(), + WSampleProc{Direct_S}(), + WSampleProc{Sample_WRep_Ord}(), +] const cfgs1 = vec([(n, k) for k in ks, n in ns]) -rtable1 = run(procs1, cfgs1; duration=0.2) +rtable1 = run(procs1, cfgs1; duration = 0.2) println() @@ -78,7 +80,5 @@ println() println("Sampling With Replacement") println("===================================") -show(rtable1; unit=:mps, cfghead="(n, k)") +show(rtable1; unit = :mps, cfghead = "(n, k)") println() - - diff --git a/src/StatsBase.jl b/src/StatsBase.jl index 02ed40a1b..cc46190ff 100644 --- a/src/StatsBase.jl +++ b/src/StatsBase.jl @@ -19,19 +19,19 @@ import IrrationalConstants import Random: rand, rand! import LinearAlgebra: BlasReal, BlasFloat import Statistics: mean, mean!, var, varm, varm!, std, stdm, cov, covm, - cor, corm, unscaled_covzm, quantile, sqrt!, - median, middle + cor, corm, unscaled_covzm, quantile, sqrt!, + median, middle using StatsAPI: StatisticalModel, RegressionModel import StatsAPI: pairwise, pairwise!, params, params!, - fitted, response, responsename, meanresponse, modelmatrix, - crossmodelmatrix, leverage, cooksdistance, residuals, predict, - predict!, dof_residual, coef, coefnames, coeftable, confint, - deviance, islinear, nulldeviance, loglikelihood, nullloglikelihood, - loglikelihood, loglikelihood, score, nobs, dof, mss, rss, - informationmatrix, stderror, vcov, weights, isfitted, fit, fit!, - aic, aicc, bic, r2, r², adjr2, adjr² + fitted, response, responsename, meanresponse, modelmatrix, + crossmodelmatrix, leverage, cooksdistance, residuals, predict, + predict!, dof_residual, coef, coefnames, coeftable, confint, + deviance, islinear, nulldeviance, loglikelihood, nullloglikelihood, + loglikelihood, loglikelihood, score, nobs, dof, mss, rss, + informationmatrix, stderror, vcov, weights, isfitted, fit, fit!, + aic, aicc, bic, r2, r², adjr2, adjr² - ## tackle compatibility issues +## tackle compatibility issues export @@ -48,122 +48,122 @@ export var, ## weights - AbstractWeights, # abstract type to represent any weight vector - Weights, # to represent a generic weight vector - AnalyticWeights, # to represent an analytic/precision/reliability weight vector - FrequencyWeights, # to representing a frequency/case/repeat weight vector + AbstractWeights, # abstract type to represent any weight vector + Weights, # to represent a generic weight vector + AnalyticWeights, # to represent an analytic/precision/reliability weight vector + FrequencyWeights, # to representing a frequency/case/repeat weight vector ProbabilityWeights, # to representing a probability/sampling weight vector - UnitWeights, # to representing a uniform weight vector - weights, # construct a generic Weights vector - aweights, # construct an AnalyticWeights vector - fweights, # construct a FrequencyWeights vector - pweights, # construct a ProbabilityWeights vector - eweights, # construct an exponential Weights vector - uweights, # construct an UnitWeights vector - wsum, # weighted sum with vector as second argument - wsum!, # weighted sum across dimensions with provided storage + UnitWeights, # to representing a uniform weight vector + weights, # construct a generic Weights vector + aweights, # construct an AnalyticWeights vector + fweights, # construct a FrequencyWeights vector + pweights, # construct a ProbabilityWeights vector + eweights, # construct an exponential Weights vector + uweights, # construct an UnitWeights vector + wsum, # weighted sum with vector as second argument + wsum!, # weighted sum across dimensions with provided storage ## moments - skewness, # (standardized) skewness - kurtosis, # (excessive) kurtosis - moment, # central moment of given order - cumulant, # cumulant of given order - mean_and_var, # (mean, var) - mean_and_std, # (mean, std) - mean_and_cov, # (mean, cov) + skewness, # (standardized) skewness + kurtosis, # (excessive) kurtosis + moment, # central moment of given order + cumulant, # cumulant of given order + mean_and_var, # (mean, var) + mean_and_std, # (mean, std) + mean_and_cov, # (mean, cov) ## scalarstats - geomean, # geometric mean - harmmean, # harmonic mean - genmean, # generalized/power mean - middle, # the mean of two real numbers - mode, # find a mode from data (the first one) - modes, # find all modes from data - - zscore, # compute Z-scores - zscore!, # compute Z-scores inplace or to a pre-allocated array - - percentile, # quantile using percentage (instead of fraction) as argument - nquantile, # quantiles at [0:n]/n - quantilerank, # quantile-position (0-1) of a value relative to a collection + geomean, # geometric mean + harmmean, # harmonic mean + genmean, # generalized/power mean + middle, # the mean of two real numbers + mode, # find a mode from data (the first one) + modes, # find all modes from data + + zscore, # compute Z-scores + zscore!, # compute Z-scores inplace or to a pre-allocated array + + percentile, # quantile using percentage (instead of fraction) as argument + nquantile, # quantiles at [0:n]/n + quantilerank, # quantile-position (0-1) of a value relative to a collection percentilerank, # percentile-position (0-100) of a value relative to a collection - span, # The range minimum(x):maximum(x) - variation, # ratio of standard deviation to mean - sem, # standard error of the mean, i.e. sqrt(var / n) - mad, # median absolute deviation - iqr, # interquartile range + span, # The range minimum(x):maximum(x) + variation, # ratio of standard deviation to mean + sem, # standard error of the mean, i.e. sqrt(var / n) + mad, # median absolute deviation + iqr, # interquartile range - genvar, # generalized variance - totalvar, # total variation + genvar, # generalized variance + totalvar, # total variation - entropy, # the entropy of a probability vector - renyientropy, # the Rényi (generalised) entropy of a probability vector - crossentropy, # cross entropy between two probability vectors - kldivergence, # K-L divergence between two probability vectors + entropy, # the entropy of a probability vector + renyientropy, # the Rényi (generalised) entropy of a probability vector + crossentropy, # cross entropy between two probability vectors + kldivergence, # K-L divergence between two probability vectors - summarystats, # summary statistics - describe, # print the summary statistics + summarystats, # summary statistics + describe, # print the summary statistics # deviation - counteq, # count the number of equal pairs - countne, # count the number of non-equal pairs - sqL2dist, # squared L2 distance between two arrays - L2dist, # L2 distance between two arrays - L1dist, # L1 distance between two arrays - Linfdist, # L-inf distance between two arrays - gkldiv, # (Generalized) Kullback-Leibler divergence between two vectors - meanad, # mean absolute deviation - maxad, # maximum absolute deviation - msd, # mean squared deviation - rmsd, # root mean squared deviation - psnr, # peak signal-to-noise ratio (in dB) + counteq, # count the number of equal pairs + countne, # count the number of non-equal pairs + sqL2dist, # squared L2 distance between two arrays + L2dist, # L2 distance between two arrays + L1dist, # L1 distance between two arrays + Linfdist, # L-inf distance between two arrays + gkldiv, # (Generalized) Kullback-Leibler divergence between two vectors + meanad, # mean absolute deviation + maxad, # maximum absolute deviation + msd, # mean squared deviation + rmsd, # root mean squared deviation + psnr, # peak signal-to-noise ratio (in dB) # cov - scattermat, # scatter matrix (i.e. unnormalized covariance) - cov2cor, # converts a covariance matrix to a correlation matrix - cor2cov, # converts a correlation matrix to a covariance matrix + scattermat, # scatter matrix (i.e. unnormalized covariance) + cov2cor, # converts a covariance matrix to a correlation matrix + cor2cov, # converts a correlation matrix to a covariance matrix CovarianceEstimator, # abstract type for covariance estimators - SimpleCovariance, # simple covariance estimator + SimpleCovariance, # simple covariance estimator ## counts - addcounts!, # add counts to an accumulating array or map - counts, # count integer values in given arrays - proportions, # proportions of integer values in given arrays - # (normalized version of counts) - countmap, # count distinct values and return a map - proportionmap, # proportions of distinct values returned as a map + addcounts!, # add counts to an accumulating array or map + counts, # count integer values in given arrays + proportions, # proportions of integer values in given arrays + # (normalized version of counts) + countmap, # count distinct values and return a map + proportionmap, # proportions of distinct values returned as a map ## ranking - ordinalrank, # ordinal ranking ("1234" ranking) - competerank, # competition ranking ("1 2 2 4" ranking) - denserank, # dense ranking ("1 2 2 3" ranking) - tiedrank, # tied ranking ("1 2.5 2.5 4" ranking) + ordinalrank, # ordinal ranking ("1234" ranking) + competerank, # competition ranking ("1 2 2 4" ranking) + denserank, # dense ranking ("1 2 2 3" ranking) + tiedrank, # tied ranking ("1 2.5 2.5 4" ranking) ## rankcorr - corspearman, # spearman's rank correlation - corkendall, # kendall's rank correlation + corspearman, # spearman's rank correlation + corkendall, # kendall's rank correlation ## partialcor - partialcor, # partial correlation + partialcor, # partial correlation ## signalcorr - autocov!, autocov, # auto covariance - autocor!, autocor, # auto correlation - crosscov!, crosscov, # cross covariance - crosscor!, crosscor, # cross correlation - pacf!, pacf, # partial auto-correlation + autocov!, autocov, # auto covariance + autocor!, autocor, # auto correlation + crosscov!, crosscov, # cross covariance + crosscor!, crosscor, # cross correlation + pacf!, pacf, # partial auto-correlation ## sampling - samplepair, # draw a pair of distinct elements    - sample, # sampling from a population - sample!, # sampling from a population, with pre-allocated output - wsample, # sampling from a population with weights - wsample!, # weighted sampling, with pre-allocated output + samplepair, # draw a pair of distinct elements    + sample, # sampling from a population + sample!, # sampling from a population, with pre-allocated output + wsample, # sampling from a population with weights + wsample!, # weighted sampling, with pre-allocated output ## empirical - ecdf, # empirical cumulative distribution function - ECDF, # type for empirical cumulative distribution function + ecdf, # empirical cumulative distribution function + ECDF, # type for empirical cumulative distribution function AbstractHistogram, Histogram, @@ -171,20 +171,20 @@ export # histrange, ## robust - trim, # trimmed set - trim!, # trimmed set - winsor, # Winsorized set - winsor!, # Winsorized set - trimvar, # variance of the mean of a trimmed set + trim, # trimmed set + trim!, # trimmed set + winsor, # Winsorized set + winsor!, # Winsorized set + trimvar, # variance of the mean of a trimmed set ## misc - rle, # run-length encoding - inverse_rle, # inverse run-length encoding - indexmap, # construct a map from element to index - levelsmap, # construct a map from n unique elements to [1, ..., n] - indicatormat, # construct indicator matrix - pairwise, # pairwise application of functions - pairwise!, # pairwise! application of functions + rle, # run-length encoding + inverse_rle, # inverse run-length encoding + indexmap, # construct a map from element to index + levelsmap, # construct a map from n unique elements to [1, ..., n] + indicatormat, # construct indicator matrix + pairwise, # pairwise application of functions + pairwise!, # pairwise! application of functions # statistical models CoefTable, @@ -236,11 +236,11 @@ export # data standardization standardize, AbstractDataTransform, # the type to represent a abstract data transformation - ZScoreTransform, # the type to represent a z-score data transformation - UnitRangeTransform, # the type to represent a 0-1 data transformation + ZScoreTransform, # the type to represent a z-score data transformation + UnitRangeTransform, # the type to represent a 0-1 data transformation # reliability - CronbachAlpha, # the type to represent Cronbach's alpha scores + CronbachAlpha, # the type to represent Cronbach's alpha scores cronbachalpha # function to compute Cronbach's alpha scores # source files diff --git a/src/common.jl b/src/common.jl index 4dc268c6c..c57c46b26 100644 --- a/src/common.jl +++ b/src/common.jl @@ -1,7 +1,7 @@ # common utilities function depcheck(fname::Symbol, varname::Symbol, b::Union{Bool, Nothing}) - if b === nothing + return if b === nothing msg = "$fname will default to $varname=true in the future. Use $varname=false for previous behaviour." Base.depwarn(msg, fname) false @@ -10,4 +10,4 @@ function depcheck(fname::Symbol, varname::Symbol, b::Union{Bool, Nothing}) end end -_add((x1, x2)::Tuple{<:Real,<:Real}, (y1, y2)::Tuple{<:Real,<:Real}) = (x1 + y1, x2 + y2) +_add((x1, x2)::Tuple{<:Real, <:Real}, (y1, y2)::Tuple{<:Real, <:Real}) = (x1 + y1, x2 + y2) diff --git a/src/counts.jl b/src/counts.jl index 43fa1d495..1926c250b 100644 --- a/src/counts.jl +++ b/src/counts.jl @@ -118,7 +118,7 @@ proportions(x::AbstractArray{<:Integer}, wv::AbstractWeights) = proportions(x, s #### functions for counting a single list of integers (2D) -function addcounts!(r::AbstractArray, x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, levels::NTuple{2,UnitRange{<:Integer}}) +function addcounts!(r::AbstractArray, x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, levels::NTuple{2, UnitRange{<:Integer}}) # add counts of pairs from zip(x,y) to r xlevels, ylevels = levels @@ -144,8 +144,10 @@ function addcounts!(r::AbstractArray, x::AbstractArray{<:Integer}, y::AbstractAr return r end -function addcounts!(r::AbstractArray, x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, - levels::NTuple{2,UnitRange{<:Integer}}, wv::AbstractWeights) +function addcounts!( + r::AbstractArray, x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, + levels::NTuple{2, UnitRange{<:Integer}}, wv::AbstractWeights + ) # add counts of pairs from zip(x,y) to r length(x) == length(y) == length(wv) || @@ -180,12 +182,12 @@ end # facet functions -function counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, levels::NTuple{2,UnitRange{<:Integer}}) - addcounts!(zeros(Int, length(levels[1]), length(levels[2])), x, y, levels) +function counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, levels::NTuple{2, UnitRange{<:Integer}}) + return addcounts!(zeros(Int, length(levels[1]), length(levels[2])), x, y, levels) end -function counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, levels::NTuple{2,UnitRange{<:Integer}}, wv::AbstractWeights) - addcounts!(zeros(eltype(wv), length(levels[1]), length(levels[2])), x, y, levels, wv) +function counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, levels::NTuple{2, UnitRange{<:Integer}}, wv::AbstractWeights) + return addcounts!(zeros(eltype(wv), length(levels[1]), length(levels[2])), x, y, levels, wv) end counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, levels::UnitRange{<:Integer}) = @@ -193,9 +195,9 @@ counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, levels::UnitRan counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, levels::UnitRange{<:Integer}, wv::AbstractWeights) = counts(x, y, (levels, levels), wv) -counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, ks::NTuple{2,Integer}) = +counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, ks::NTuple{2, Integer}) = counts(x, y, (1:ks[1], 1:ks[2])) -counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, ks::NTuple{2,Integer}, wv::AbstractWeights) = +counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, ks::NTuple{2, Integer}, wv::AbstractWeights) = counts(x, y, (1:ks[1], 1:ks[2]), wv) counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, k::Integer) = counts(x, y, (1:k, 1:k)) counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, k::Integer, wv::AbstractWeights) = @@ -203,14 +205,14 @@ counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, k::Integer, wv: counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}) = counts(x, y, (span(x), span(y))) counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, wv::AbstractWeights) = counts(x, y, (span(x), span(y)), wv) -proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, levels::NTuple{2,UnitRange{<:Integer}}) = +proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, levels::NTuple{2, UnitRange{<:Integer}}) = counts(x, y, levels) / length(x) -proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, levels::NTuple{2,UnitRange{<:Integer}}, wv::AbstractWeights) = +proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, levels::NTuple{2, UnitRange{<:Integer}}, wv::AbstractWeights) = counts(x, y, levels, wv) / sum(wv) -proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, ks::NTuple{2,Integer}) = +proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, ks::NTuple{2, Integer}) = proportions(x, y, (1:ks[1], 1:ks[2])) -proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, ks::NTuple{2,Integer}, wv::AbstractWeights) = +proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, ks::NTuple{2, Integer}, wv::AbstractWeights) = proportions(x, y, (1:ks[1], 1:ks[2]), wv) proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, k::Integer) = proportions(x, y, (1:k, 1:k)) proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, k::Integer, wv::AbstractWeights) = @@ -231,8 +233,8 @@ proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, wv::Abstra ## auxiliary functions -function _normalize_countmap(cm::Dict{T}, s::Real) where T - r = Dict{T,Float64}() +function _normalize_countmap(cm::Dict{T}, s::Real) where {T} + r = Dict{T, Float64}() for (k, c) in cm r[k] = c / s end @@ -269,7 +271,7 @@ raw counts. """ addcounts!(cm::Dict, x; alg = :auto) = _addcounts!(eltype(x), cm, x, alg = alg) -function _addcounts!(::Type{T}, cm::Dict, x; alg = :auto) where T +function _addcounts!(::Type{T}, cm::Dict, x; alg = :auto) where {T} # if it's safe to be sorted using radixsort then it should be faster # albeit using more RAM if radixsort_safe(T) && (alg == :auto || alg == :radixsort) @@ -277,13 +279,13 @@ function _addcounts!(::Type{T}, cm::Dict, x; alg = :auto) where T elseif alg == :radixsort throw(ArgumentError("`alg = :radixsort` is chosen but type `radixsort_safe($T)` did not return `true`; use `alg = :auto` or `alg = :dict` instead")) else - addcounts_dict!(cm,x) + addcounts_dict!(cm, x) end return cm end """Dict-based addcounts method""" -function addcounts_dict!(cm::Dict{T}, x) where T +function addcounts_dict!(cm::Dict{T}, x) where {T} for v in x index = ht_keyindex2!(cm, v) if index > 0 @@ -304,7 +306,7 @@ function _addcounts!(::Type{Bool}, cm::Dict{Bool}, x::AbstractArray{Bool}; alg = sumx = sum(x) cm[true] = get(cm, true, 0) + sumx cm[false] = get(cm, false, 0) + length(x) - sumx - cm + return cm end # specialized for `Bool` iterator @@ -317,10 +319,10 @@ function _addcounts!(::Type{Bool}, cm::Dict{Bool}, x; alg = :ignored) end cm[true] = get(cm, true, 0) + sumx cm[false] = get(cm, false, 0) + len - sumx - cm + return cm end -function _addcounts!(::Type{T}, cm::Dict{T}, x; alg = :ignored) where T <: Union{UInt8, UInt16, Int8, Int16} +function _addcounts!(::Type{T}, cm::Dict{T}, x; alg = :ignored) where {T <: Union{UInt8, UInt16, Int8, Int16}} counts = zeros(Int, 2^(8sizeof(T))) for xi in x @@ -337,24 +339,26 @@ function _addcounts!(::Type{T}, cm::Dict{T}, x; alg = :ignored) where T <: Union end end end - cm + return cm end -const BaseRadixSortSafeTypes = Union{Int8, Int16, Int32, Int64, Int128, - UInt8, UInt16, UInt32, UInt64, UInt128, - Float32, Float64} +const BaseRadixSortSafeTypes = Union{ + Int8, Int16, Int32, Int64, Int128, + UInt8, UInt16, UInt32, UInt64, UInt128, + Float32, Float64, +} "Can the type be safely sorted by radixsort" -radixsort_safe(::Type{T}) where T = T<:BaseRadixSortSafeTypes +radixsort_safe(::Type{T}) where {T} = T <: BaseRadixSortSafeTypes -function _addcounts_radix_sort_loop!(cm::Dict{T}, sx::AbstractVector{T}) where T +function _addcounts_radix_sort_loop!(cm::Dict{T}, sx::AbstractVector{T}) where {T} isempty(sx) && return cm last_sx = first(sx) start_i = firstindex(sx) # now the data is sorted: can just run through and accumulate values before # adding into the Dict - for i in start_i+1:lastindex(sx) + for i in (start_i + 1):lastindex(sx) sxi = sx[i] if !isequal(last_sx, sxi) cm[last_sx] = get(cm, last_sx, 0) + i - start_i @@ -369,9 +373,9 @@ function _addcounts_radix_sort_loop!(cm::Dict{T}, sx::AbstractVector{T}) where T return cm end -function addcounts_radixsort!(cm::Dict{T}, x::AbstractArray{T}) where T +function addcounts_radixsort!(cm::Dict{T}, x::AbstractArray{T}) where {T} # sort the x using radixsort - sx = sort(vec(x), alg=Base.DEFAULT_UNSTABLE) + sx = sort(vec(x), alg = Base.DEFAULT_UNSTABLE) # Delegate the loop to a separate function since sort might not # be inferred in Julia 0.6 after SortingAlgorithms is loaded. @@ -380,13 +384,13 @@ function addcounts_radixsort!(cm::Dict{T}, x::AbstractArray{T}) where T end # fall-back for `x` an iterator -function addcounts_radixsort!(cm::Dict{T}, x) where T +function addcounts_radixsort!(cm::Dict{T}, x) where {T} cx = vec(collect(x)) sx = sort!(cx, alg = Base.DEFAULT_UNSTABLE) return _addcounts_radix_sort_loop!(cm, sx) end -function addcounts!(cm::Dict{T}, x::AbstractArray{T}, wv::AbstractVector{W}) where {T,W<:Real} +function addcounts!(cm::Dict{T}, x::AbstractArray{T}, wv::AbstractVector{W}) where {T, W <: Real} # add wv weighted counts of integers from x to cm length(x) == length(wv) || @@ -430,8 +434,8 @@ raw counts. RAM, is safe for any data type, is faster for small arrays, and is faster when there are not many duplicates. """ -countmap(x; alg = :auto) = addcounts!(Dict{eltype(x),Int}(), x; alg = alg) -countmap(x::AbstractArray{T}, wv::AbstractVector{W}) where {T,W<:Real} = addcounts!(Dict{T,W}(), x, wv) +countmap(x; alg = :auto) = addcounts!(Dict{eltype(x), Int}(), x; alg = alg) +countmap(x::AbstractArray{T}, wv::AbstractVector{W}) where {T, W <: Real} = addcounts!(Dict{T, W}(), x, wv) """ diff --git a/src/cov.jl b/src/cov.jl index 6796216e6..15db4cfda 100644 --- a/src/cov.jl +++ b/src/cov.jl @@ -5,20 +5,20 @@ function _symmetrize!(a::DenseMatrix) m, n = size(a) m == n || error("a must be a square matrix.") - for j = 1:n - for i = j+1:n - vl = a[i,j] - vr = a[j,i] - a[i,j] = a[j,i] = middle(vl, vr) + for j in 1:n + for i in (j + 1):n + vl = a[i, j] + vr = a[j, i] + a[i, j] = a[j, i] = middle(vl, vr) end end return a end function _scalevars(x::DenseMatrix, s::AbstractWeights, dims::Int) - dims == 1 ? Diagonal(s) * x : - dims == 2 ? x * Diagonal(s) : - error("dims should be either 1 or 2.") + return dims == 1 ? Diagonal(s) * x : + dims == 2 ? x * Diagonal(s) : + error("dims should be either 1 or 2.") end ## scatter matrix @@ -71,33 +71,35 @@ Finally, bias correction is applied to the covariance calculation if """ function mean_and_cov end -scattermat(x::DenseMatrix; mean=nothing, dims::Int=1) = +scattermat(x::DenseMatrix; mean = nothing, dims::Int = 1) = _scattermatm(x, mean, dims) _scattermatm(x::DenseMatrix, ::Nothing, dims::Int) = - _unscaled_covzm(x .- mean(x, dims=dims), dims) -_scattermatm(x::DenseMatrix, mean, dims::Int=1) = + _unscaled_covzm(x .- mean(x, dims = dims), dims) +_scattermatm(x::DenseMatrix, mean, dims::Int = 1) = _unscaled_covzm(x .- mean, dims) -scattermat(x::DenseMatrix, wv::AbstractWeights; mean=nothing, dims::Int=1) = +scattermat(x::DenseMatrix, wv::AbstractWeights; mean = nothing, dims::Int = 1) = _scattermatm(x, wv, mean, dims) _scattermatm(x::DenseMatrix, wv::AbstractWeights, ::Nothing, dims::Int) = - _unscaled_covzm(x .- mean(x, wv, dims=dims), wv, dims) + _unscaled_covzm(x .- mean(x, wv, dims = dims), wv, dims) _scattermatm(x::DenseMatrix, wv::AbstractWeights, mean, dims::Int) = _unscaled_covzm(x .- mean, wv, dims) ## weighted cov -covm(x::DenseMatrix, mean, w::AbstractWeights, dims::Int=1; - corrected::Union{Bool, Nothing}=nothing) = - rmul!(scattermat(x, w, mean=mean, dims=dims), varcorrection(w, depcheck(:covm, :corrected, corrected))) +covm( + x::DenseMatrix, mean, w::AbstractWeights, dims::Int = 1; + corrected::Union{Bool, Nothing} = nothing +) = + rmul!(scattermat(x, w, mean = mean, dims = dims), varcorrection(w, depcheck(:covm, :corrected, corrected))) -cov(x::DenseMatrix, w::AbstractWeights, dims::Int=1; corrected::Union{Bool, Nothing}=nothing) = - covm(x, mean(x, w, dims=dims), w, dims; corrected=depcheck(:cov, :corrected, corrected)) +cov(x::DenseMatrix, w::AbstractWeights, dims::Int = 1; corrected::Union{Bool, Nothing} = nothing) = + covm(x, mean(x, w, dims = dims), w, dims; corrected = depcheck(:cov, :corrected, corrected)) -function corm(x::DenseMatrix, mean, w::AbstractWeights, vardim::Int=1) - c = covm(x, mean, w, vardim; corrected=false) - s = std(x, w, vardim; mean=mean, corrected=false) - cov2cor!(c, s) +function corm(x::DenseMatrix, mean, w::AbstractWeights, vardim::Int = 1) + c = covm(x, mean, w, vardim; corrected = false) + s = std(x, w, vardim; mean = mean, corrected = false) + return cov2cor!(c, s) end """ @@ -106,17 +108,19 @@ end Compute the Pearson correlation matrix of `X` along the dimension `dims` with a weighting `w` . """ -cor(x::DenseMatrix, w::AbstractWeights, dims::Int=1) = - corm(x, mean(x, w, dims=dims), w, dims) +cor(x::DenseMatrix, w::AbstractWeights, dims::Int = 1) = + corm(x, mean(x, w, dims = dims), w, dims) -function mean_and_cov(x::DenseMatrix, dims::Int=1; corrected::Bool=true) - m = mean(x, dims=dims) - return m, covm(x, m, dims, corrected=corrected) +function mean_and_cov(x::DenseMatrix, dims::Int = 1; corrected::Bool = true) + m = mean(x, dims = dims) + return m, covm(x, m, dims, corrected = corrected) end -function mean_and_cov(x::DenseMatrix, wv::AbstractWeights, dims::Int=1; - corrected::Union{Bool, Nothing}=nothing) - m = mean(x, wv, dims=dims) - return m, cov(x, wv, dims; corrected=depcheck(:mean_and_cov, :corrected, corrected)) +function mean_and_cov( + x::DenseMatrix, wv::AbstractWeights, dims::Int = 1; + corrected::Union{Bool, Nothing} = nothing + ) + m = mean(x, wv, dims = dims) + return m, cov(x, wv, dims; corrected = depcheck(:mean_and_cov, :corrected, corrected)) end @@ -143,14 +147,14 @@ function cov2cor!(C::AbstractMatrix, s::AbstractArray = map(sqrt, view(C, diagin Base.require_one_based_indexing(C, s) n = length(s) size(C) == (n, n) || throw(DimensionMismatch("inconsistent dimensions")) - for j = 1:n + for j in 1:n sj = s[j] - for i = 1:(j-1) - C[i,j] = adjoint(C[j,i]) + for i in 1:(j - 1) + C[i, j] = adjoint(C[j, i]) end - C[j,j] = oneunit(C[j,j]) - for i = (j+1):n - C[i,j] = _clampcor(C[i,j] / (s[i] * sj)) + C[j, j] = oneunit(C[j, j]) + for i in (j + 1):n + C[i, j] = _clampcor(C[i, j] / (s[i] * sj)) end end return C @@ -159,24 +163,24 @@ _clampcor(x::Real) = clamp(x, -1, 1) _clampcor(x) = x # Preserve structure of Symmetric and Hermitian covariance matrices -function cov2cor!(C::Union{Symmetric{<:Real},Hermitian}, s::AbstractArray) +function cov2cor!(C::Union{Symmetric{<:Real}, Hermitian}, s::AbstractArray) n = length(s) size(C) == (n, n) || throw(DimensionMismatch("inconsistent dimensions")) A = parent(C) if C.uplo === 'U' - for j = 1:n + for j in 1:n sj = s[j] - for i = 1:(j-1) - A[i,j] = _clampcor(A[i,j] / (s[i] * sj)) + for i in 1:(j - 1) + A[i, j] = _clampcor(A[i, j] / (s[i] * sj)) end - A[j,j] = oneunit(A[j,j]) + A[j, j] = oneunit(A[j, j]) end else - for j = 1:n + for j in 1:n sj = s[j] - A[j,j] = oneunit(A[j,j]) - for i = (j+1):n - A[i,j] = _clampcor(A[i,j] / (s[i] * sj)) + A[j, j] = oneunit(A[j, j]) + for i in (j + 1):n + A[i, j] = _clampcor(A[i, j] / (s[i] * sj)) end end end @@ -207,36 +211,36 @@ function cor2cov!(C::AbstractMatrix, s::AbstractArray) size(C) == (n, n) || throw(DimensionMismatch("inconsistent dimensions")) for j in 1:n sj = s[j] - for i in 1:(j-1) - C[i,j] = adjoint(C[j,i]) + for i in 1:(j - 1) + C[i, j] = adjoint(C[j, i]) end - C[j,j] = sj^2 - for i in (j+1):n - C[i,j] *= s[i] * sj + C[j, j] = sj^2 + for i in (j + 1):n + C[i, j] *= s[i] * sj end end return C end # Preserve structure of Symmetric and Hermitian correlation matrices -function cor2cov!(C::Union{Symmetric{<:Real},Hermitian}, s::AbstractArray) +function cor2cov!(C::Union{Symmetric{<:Real}, Hermitian}, s::AbstractArray) n = length(s) size(C) == (n, n) || throw(DimensionMismatch("inconsistent dimensions")) A = parent(C) if C.uplo === 'U' for j in 1:n sj = s[j] - for i in 1:(j-1) - A[i,j] *= s[i] * sj + for i in 1:(j - 1) + A[i, j] *= s[i] * sj end - A[j,j] = sj^2 + A[j, j] = sj^2 end else for j in 1:n sj = s[j] - A[j,j] = sj^2 - for i in (j+1):n - A[i,j] *= s[i] * sj + A[j, j] = sj^2 + for i in (j + 1):n + A[i, j] *= s[i] * sj end end end @@ -255,7 +259,7 @@ abstract type CovarianceEstimator end Compute a variance estimate from the observation vector `x` using the estimator `ce`. """ -cov(ce::CovarianceEstimator, x::AbstractVector; mean=nothing) = +cov(ce::CovarianceEstimator, x::AbstractVector; mean = nothing) = error("cov is not defined for $(typeof(ce)) and $(typeof(x))") """ @@ -282,10 +286,10 @@ The keyword argument `mean` can be: * when `dims=2`, an `AbstractVector` of length `N` or an `AbstractMatrix` of size `(N,1)`. """ -cov(ce::CovarianceEstimator, X::AbstractMatrix; mean=nothing, dims::Int=1) = +cov(ce::CovarianceEstimator, X::AbstractMatrix; mean = nothing, dims::Int = 1) = error("cov is not defined for $(typeof(ce)) and $(typeof(X))") -cov(ce::CovarianceEstimator, X::AbstractMatrix, w::AbstractWeights; mean=nothing, dims::Int=1) = +cov(ce::CovarianceEstimator, X::AbstractMatrix, w::AbstractWeights; mean = nothing, dims::Int = 1) = error("cov is not defined for $(typeof(ce)), $(typeof(X)) and $(typeof(w))") """ @@ -349,29 +353,29 @@ where `x`, `y` are vectors, `X` is a matrix and `w` is a weighting vector. """ struct SimpleCovariance <: CovarianceEstimator corrected::Bool - SimpleCovariance(;corrected::Bool=false) = new(corrected) + SimpleCovariance(; corrected::Bool = false) = new(corrected) end cov(sc::SimpleCovariance, x::AbstractVector) = - cov(x; corrected=sc.corrected) + cov(x; corrected = sc.corrected) cov(sc::SimpleCovariance, x::AbstractVector, y::AbstractVector) = - cov(x, y; corrected=sc.corrected) + cov(x, y; corrected = sc.corrected) -function cov(sc::SimpleCovariance, X::AbstractMatrix; dims::Int=1, mean=nothing) +function cov(sc::SimpleCovariance, X::AbstractMatrix; dims::Int = 1, mean = nothing) dims ∈ (1, 2) || throw(ArgumentError("Argument dims can only be 1 or 2 (given: $dims)")) if mean === nothing - return cov(X; dims=dims, corrected=sc.corrected) + return cov(X; dims = dims, corrected = sc.corrected) else - return covm(X, mean, dims, corrected=sc.corrected) + return covm(X, mean, dims, corrected = sc.corrected) end end -function cov(sc::SimpleCovariance, X::AbstractMatrix, w::AbstractWeights; dims::Int=1, mean=nothing) +function cov(sc::SimpleCovariance, X::AbstractMatrix, w::AbstractWeights; dims::Int = 1, mean = nothing) dims ∈ (1, 2) || throw(ArgumentError("Argument dims can only be 1 or 2 (given: $dims)")) if mean === nothing - return cov(X, w, dims, corrected=sc.corrected) + return cov(X, w, dims, corrected = sc.corrected) else - return covm(X, mean, w, dims, corrected=sc.corrected) + return covm(X, mean, w, dims, corrected = sc.corrected) end end diff --git a/src/deprecates.jl b/src/deprecates.jl index 16810af13..dacc9dcf8 100644 --- a/src/deprecates.jl +++ b/src/deprecates.jl @@ -5,7 +5,7 @@ if !isdefined(Base, :stderr) else function (io::typeof(stderr))(obj::StatisticalModel) Base.depwarn("stderr(obj::StatisticalModel) is deprecated, use stderror(obj) instead", :stderr) - io === stderr ? stderror(obj) : throw(MethodError(io, (obj,))) + return io === stderr ? stderror(obj) : throw(MethodError(io, (obj,))) end end @@ -13,17 +13,21 @@ end @deprecate norepeats(a::AbstractArray) allunique(a) -@deprecate(mad!(v::AbstractArray{<:Real}, center; - constant::Real = BigFloat("1.482602218505601860547076529360423431326703202590312896536266275245674447622701")), - mad!(v, center=center, constant=constant)) +@deprecate( + mad!( + v::AbstractArray{<:Real}, center; + constant::Real = BigFloat("1.482602218505601860547076529360423431326703202590312896536266275245674447622701") + ), + mad!(v, center = center, constant = constant) +) ### Deprecated January 2019 -@deprecate scattermatm(x::DenseMatrix, mean, dims::Int) scattermat(x, mean=mean, dims=dims) -@deprecate scattermatm(x::DenseMatrix, mean, wv::AbstractWeights, dims::Int) scattermat(x, wv, mean=mean, dims=dims) -@deprecate scattermat(x::DenseMatrix, dims::Int) scattermat(x, dims=dims) -@deprecate scattermat(x::DenseMatrix, wv::AbstractWeights, dims::Int) scattermat(x, wv, dims=dims) -@deprecate mean!(R::AbstractArray, A::AbstractArray, w::AbstractWeights, dims::Int) mean!(R, A, w, dims=dims) -@deprecate mean(A::AbstractArray{T}, w::AbstractWeights{W}, dims::Int) where {T<:Number,W<:Real} mean(A, w, dims=dims) +@deprecate scattermatm(x::DenseMatrix, mean, dims::Int) scattermat(x, mean = mean, dims = dims) +@deprecate scattermatm(x::DenseMatrix, mean, wv::AbstractWeights, dims::Int) scattermat(x, wv, mean = mean, dims = dims) +@deprecate scattermat(x::DenseMatrix, dims::Int) scattermat(x, dims = dims) +@deprecate scattermat(x::DenseMatrix, wv::AbstractWeights, dims::Int) scattermat(x, wv, dims = dims) +@deprecate mean!(R::AbstractArray, A::AbstractArray, w::AbstractWeights, dims::Int) mean!(R, A, w, dims = dims) +@deprecate mean(A::AbstractArray{T}, w::AbstractWeights{W}, dims::Int) where {T <: Number, W <: Real} mean(A, w, dims = dims) @deprecate wquantile(v::AbstractVector{<:Real}, w::AbstractWeights{<:Real}, p::AbstractVector{<:Real}) quantile(v, w, p) @deprecate wquantile(v::AbstractVector{<:Real}, w::AbstractWeights{<:Real}, p::Number) quantile(v, w, [p])[1] @@ -32,24 +36,26 @@ end @deprecate wmedian(v::AbstractVector{<:Real}, w::AbstractWeights{<:Real}) median(v, w) @deprecate wmedian(v::AbstractVector{<:Real}, w::AbstractVector{<:Real}) median(v, weights(w)) -@deprecate quantile(v::AbstractArray{<:Real}) quantile(v, [.0, .25, .5, .75, 1.0]) +@deprecate quantile(v::AbstractArray{<:Real}) quantile(v, [0.0, 0.25, 0.5, 0.75, 1.0]) ### Deprecated September 2019 -@deprecate sum(A::AbstractArray, w::AbstractWeights, dims::Int) sum(A, w, dims=dims) +@deprecate sum(A::AbstractArray, w::AbstractWeights, dims::Int) sum(A, w, dims = dims) @deprecate values(wv::AbstractWeights) convert(Vector, wv) ### Deprecated November 2021 -@deprecate stdm(x::AbstractArray{<:Real}, w::AbstractWeights, m::Real; corrected::Union{Bool, Nothing}=nothing) std(x, w, mean=m, corrected=corrected) false -@deprecate varm(x::AbstractArray{<:Real}, w::AbstractWeights, m::Real; corrected::Union{Bool, Nothing}=nothing) var(x, w, mean=m, corrected=corrected) false -@deprecate stdm(x::AbstractArray{<:Real}, w::AbstractWeights, m::AbstractArray{<:Real}, dim::Int; corrected::Union{Bool, Nothing}=nothing) std(x, w, dim, mean=m, corrected=corrected) false -@deprecate varm(x::AbstractArray{<:Real}, w::AbstractWeights, m::AbstractArray{<:Real}, dim::Int; corrected::Union{Bool, Nothing}=nothing) var(x, w, dim, mean=m, corrected=corrected) false -@deprecate varm!(R::AbstractArray, x::AbstractArray{<:Real}, w::AbstractWeights, m::AbstractArray{<:Real}, dim::Int; corrected::Union{Bool, Nothing}=nothing) var!(R, x, w, dim, mean=m, corrected=corrected) false +@deprecate stdm(x::AbstractArray{<:Real}, w::AbstractWeights, m::Real; corrected::Union{Bool, Nothing} = nothing) std(x, w, mean = m, corrected = corrected) false +@deprecate varm(x::AbstractArray{<:Real}, w::AbstractWeights, m::Real; corrected::Union{Bool, Nothing} = nothing) var(x, w, mean = m, corrected = corrected) false +@deprecate stdm(x::AbstractArray{<:Real}, w::AbstractWeights, m::AbstractArray{<:Real}, dim::Int; corrected::Union{Bool, Nothing} = nothing) std(x, w, dim, mean = m, corrected = corrected) false +@deprecate varm(x::AbstractArray{<:Real}, w::AbstractWeights, m::AbstractArray{<:Real}, dim::Int; corrected::Union{Bool, Nothing} = nothing) var(x, w, dim, mean = m, corrected = corrected) false +@deprecate varm!(R::AbstractArray, x::AbstractArray{<:Real}, w::AbstractWeights, m::AbstractArray{<:Real}, dim::Int; corrected::Union{Bool, Nothing} = nothing) var!(R, x, w, dim, mean = m, corrected = corrected) false ### This was never part of the public API ### Deprecated April 2024 -function make_alias_table!(w::AbstractVector, wsum, - a::AbstractVector{Float64}, - alias::AbstractVector{Int}) +function make_alias_table!( + w::AbstractVector, wsum, + a::AbstractVector{Float64}, + alias::AbstractVector{Int} + ) Base.depwarn("make_alias_table! is both internal and deprecated, use AliasTables.jl instead", :make_alias_table!) # Arguments: # @@ -70,7 +76,7 @@ function make_alias_table!(w::AbstractVector, wsum, throw(DimensionMismatch("Inconsistent array lengths.")) ac = n / wsum - for i = 1:n + for i in 1:n a[i] = w[i] * ac end @@ -79,12 +85,12 @@ function make_alias_table!(w::AbstractVector, wsum, kl = 0 # actual number of larges ks = 0 # actual number of smalls - for i = 1:n + for i in 1:n ai = a[i] if ai > 1.0 - larges[kl+=1] = i # push to larges + larges[kl += 1] = i # push to larges elseif ai < 1.0 - smalls[ks+=1] = i # push to smalls + smalls[ks += 1] = i # push to smalls end end @@ -94,15 +100,15 @@ function make_alias_table!(w::AbstractVector, wsum, alias[s] = l al = a[l] = (a[l] - 1.0) + a[s] if al > 1.0 - larges[kl+=1] = l # push to larges + larges[kl += 1] = l # push to larges else - smalls[ks+=1] = l # push to smalls + smalls[ks += 1] = l # push to smalls end end # this loop should be redundant, except for rounding - for i = 1:ks + for i in 1:ks a[smalls[i]] = 1.0 end - nothing + return nothing end diff --git a/src/deviation.jl b/src/deviation.jl index cfbf96c49..4c2e706d7 100644 --- a/src/deviation.jl +++ b/src/deviation.jl @@ -170,7 +170,7 @@ Return the root mean squared deviation between two optionally normalized arrays. The root mean squared deviation is computed as `sqrt(msd(a, b))`. """ -function rmsd(a::AbstractArray{<:Number}, b::AbstractArray{<:Number}; normalize::Bool=false) +function rmsd(a::AbstractArray{<:Number}, b::AbstractArray{<:Number}; normalize::Bool = false) v = sqrt(msd(a, b)) if normalize amin, amax = isempty(a) ? (zero(eltype(a)), zero(eltype(a))) : extrema(a) diff --git a/src/empirical.jl b/src/empirical.jl index e791ec943..8f5b52fda 100644 --- a/src/empirical.jl +++ b/src/empirical.jl @@ -13,7 +13,7 @@ function (ecdf::ECDF)(x::Real) evenweights = isempty(ecdf.weights) weightsum = evenweights ? length(ecdf.sorted_values) : sum(ecdf.weights) partialsum = evenweights ? n : sum(view(ecdf.weights, 1:n)) - partialsum / weightsum + return partialsum / weightsum end function (ecdf::ECDF)(v::AbstractVector{<:Real}) @@ -53,19 +53,23 @@ evaluate CDF values on other samples. `extrema`, `minimum`, and `maximum` are supported to for obtaining the range over which function is inside the interval ``(0,1)``; the function is defined for the whole real line. """ -function ecdf(X::AbstractVector{<:Real}; weights::AbstractVector{<:Real}=weights(Float64[])) +function ecdf(X::AbstractVector{<:Real}; weights::AbstractVector{<:Real} = weights(Float64[])) any(isnan, X) && throw(ArgumentError("ecdf can not include NaN values")) _weights = weights isa AbstractWeights ? weights : StatsBase.weights(weights) if isempty(_weights) return ECDF(sort(X), _weights) else if length(X) != length(_weights) - throw(ArgumentError(LazyString( - "data and weight vectors must be the same size, got ", - length(X), - " and ", - length(_weights), - ))) + throw( + ArgumentError( + LazyString( + "data and weight vectors must be the same size, got ", + length(X), + " and ", + length(_weights), + ) + ) + ) end ord = sortperm(X) ECDF(X[ord], _weights[ord]) diff --git a/src/hist.jl b/src/hist.jl index b2e696938..687e098a5 100644 --- a/src/hist.jl +++ b/src/hist.jl @@ -11,20 +11,20 @@ import LinearAlgebra: norm, normalize, normalize! for j in 1:N push!(result_expr.args, :(c[$j][i])) end - result_expr + return result_expr end # Need a generated function to promote edge types, because a simple # promote_type(map(eltype, h.edges)...) isn't type stable (tested # with Julia v0.5). -@generated function _promote_edge_types(edges::NTuple{N,AbstractVector}) where N - promote_type(map(eltype, edges.parameters)...) +@generated function _promote_edge_types(edges::NTuple{N, AbstractVector}) where {N} + return promote_type(map(eltype, edges.parameters)...) end ## nice-valued ranges for histograms -function histrange(v::AbstractArray{T}, n::Integer, closed::Symbol=:left) where T +function histrange(v::AbstractArray{T}, n::Integer, closed::Symbol = :left) where {T} F = float(T) nv = length(v) if nv == 0 && n < 0 @@ -36,10 +36,10 @@ function histrange(v::AbstractArray{T}, n::Integer, closed::Symbol=:left) where end lo, hi = extrema(v) - histrange(F(lo), F(hi), n, closed) + return histrange(F(lo), F(hi), n, closed) end -function histrange(lo::F, hi::F, n::Integer, closed::Symbol=:left) where F +function histrange(lo::F, hi::F, n::Integer, closed::Symbol = :left) where {F} if hi == lo start = F(hi) step = one(F) @@ -61,8 +61,8 @@ function histrange(lo::F, hi::F, n::Integer, closed::Symbol=:left) where F step *= 10 end divisor = one(F) - start = step*floor(lo/step) - len = ceil((hi - start)/step) + start = step * floor(lo / step) + len = ceil((hi - start) / step) else divisor = exp10(-floor(lbw)) r = bw * divisor @@ -76,45 +76,46 @@ function histrange(lo::F, hi::F, n::Integer, closed::Symbol=:left) where F divisor /= 10 end step = one(F) - start = floor(lo*divisor) - len = ceil(hi*divisor - start) + start = floor(lo * divisor) + len = ceil(hi * divisor - start) end end # fix up endpoints if closed == :right #(,] - while lo <= start/divisor + while lo <= start / divisor start -= step end - while (start + (len-1)*step)/divisor < hi + while (start + (len - 1) * step) / divisor < hi len += one(F) end else - while lo < start/divisor + while lo < start / divisor start -= step end - while (start + (len-1)*step)/divisor <= hi + while (start + (len - 1) * step) / divisor <= hi len += one(F) end end - StepRangeLen(Base.TwicePrecision{Float64}((start, divisor)), - Base.TwicePrecision{Float64}((step, divisor)), - Int(len)) + return StepRangeLen( + Base.TwicePrecision{Float64}((start, divisor)), + Base.TwicePrecision{Float64}((step, divisor)), + Int(len) + ) end -histrange(vs::NTuple{N,AbstractVector},nbins::NTuple{N,Integer},closed::Symbol) where {N} = - map((v,n) -> histrange(v,n,closed),vs,nbins) -histrange(vs::NTuple{N,AbstractVector},nbins::Integer,closed::Symbol) where {N} = - map(v -> histrange(v,nbins,closed),vs) - +histrange(vs::NTuple{N, AbstractVector}, nbins::NTuple{N, Integer}, closed::Symbol) where {N} = + map((v, n) -> histrange(v, n, closed), vs, nbins) +histrange(vs::NTuple{N, AbstractVector}, nbins::Integer, closed::Symbol) where {N} = + map(v -> histrange(v, nbins, closed), vs) ## histograms ## function sturges(n) # Sturges' formula - n==0 && return one(n) - ceil(Integer, log2(n))+1 + n == 0 && return one(n) + return ceil(Integer, log2(n)) + 1 end -abstract type AbstractHistogram{T<:Real,N,E} end +abstract type AbstractHistogram{T <: Real, N, E} end # N-dimensional histogram object """ @@ -183,13 +184,15 @@ isdensity: true julia> # observe isdensity = true and weights tells us the number of observation per binsize in each bin ``` """ -mutable struct Histogram{T<:Real,N,E} <: AbstractHistogram{T,N,E} +mutable struct Histogram{T <: Real, N, E} <: AbstractHistogram{T, N, E} edges::E - weights::Array{T,N} + weights::Array{T, N} closed::Symbol isdensity::Bool - function Histogram{T,N,E}(edges::NTuple{N,AbstractArray}, weights::Array{T,N}, - closed::Symbol, isdensity::Bool=false) where {T,N,E} + function Histogram{T, N, E}( + edges::NTuple{N, AbstractArray}, weights::Array{T, N}, + closed::Symbol, isdensity::Bool = false + ) where {T, N, E} closed == :right || closed == :left || error("closed must :left or :right") isdensity && !(T <: AbstractFloat) && error("Density histogram must have float-type weights") _edges_nbins(edges) == size(weights) || error("Histogram edge vectors must be 1 longer than corresponding weight dimensions") @@ -201,39 +204,45 @@ mutable struct Histogram{T<:Real,N,E} <: AbstractHistogram{T,N,E} e isa AbstractRange && any(isequal(-0.0), e) && throw(ArgumentError("ranges containing -0.0 not allowed in edges")) end - new{T,N,E}(edges,weights,closed,isdensity) + return new{T, N, E}(edges, weights, closed, isdensity) end end -Histogram(edges::NTuple{N,AbstractVector}, weights::AbstractArray{T,N}, - closed::Symbol=:left, isdensity::Bool=false) where {T,N} = - Histogram{T,N,typeof(edges)}(edges,weights,closed,isdensity) +Histogram( + edges::NTuple{N, AbstractVector}, weights::AbstractArray{T, N}, + closed::Symbol = :left, isdensity::Bool = false +) where {T, N} = + Histogram{T, N, typeof(edges)}(edges, weights, closed, isdensity) -Histogram(edges::NTuple{N,AbstractVector}, ::Type{T}, closed::Symbol=:left, - isdensity::Bool=false) where {T,N} = - Histogram(edges,zeros(T,_edges_nbins(edges)...),closed,isdensity) +Histogram( + edges::NTuple{N, AbstractVector}, ::Type{T}, closed::Symbol = :left, + isdensity::Bool = false +) where {T, N} = + Histogram(edges, zeros(T, _edges_nbins(edges)...), closed, isdensity) -Histogram(edges::NTuple{N,AbstractVector}, closed::Symbol=:left, - isdensity::Bool=false) where {N} = - Histogram(edges,Int,closed,isdensity) +Histogram( + edges::NTuple{N, AbstractVector}, closed::Symbol = :left, + isdensity::Bool = false +) where {N} = + Histogram(edges, Int, closed, isdensity) function show(io::IO, h::AbstractHistogram) println(io, typeof(h)) - println(io,"edges:") + println(io, "edges:") for e in h.edges - println(io," ",e) + println(io, " ", e) end - println(io,"weights: ",h.weights) - println(io,"closed: ",h.closed) - print(io,"isdensity: ",h.isdensity) + println(io, "weights: ", h.weights) + println(io, "closed: ", h.closed) + return print(io, "isdensity: ", h.isdensity) end -(==)(h1::Histogram,h2::Histogram) = (==)(h1.edges,h2.edges) && (==)(h1.weights,h2.weights) && (==)(h1.closed,h2.closed) && (==)(h1.isdensity,h2.isdensity) +(==)(h1::Histogram, h2::Histogram) = (==)(h1.edges, h2.edges) && (==)(h1.weights, h2.weights) && (==)(h1.closed, h2.closed) && (==)(h1.isdensity, h2.isdensity) -binindex(h::AbstractHistogram{T,1}, x::Real) where {T} = binindex(h, (x,))[1] +binindex(h::AbstractHistogram{T, 1}, x::Real) where {T} = binindex(h, (x,))[1] -binindex(h::Histogram{T,N}, xs::NTuple{N,Real}) where {T,N} = +binindex(h::Histogram{T, N}, xs::NTuple{N, Real}) where {T, N} = map((edge, x) -> _edge_binindex(edge, h.closed, x), h.edges, xs) _normalize_zero(x::AbstractFloat) = isequal(x, -0.0) ? zero(x) : x @@ -242,9 +251,9 @@ _normalize_zero(x::Any) = x # Always treat -0.0 like 0.0 @inline function _edge_binindex(edge::AbstractVector, closed::Symbol, x::Real) if closed === :right - return searchsortedfirst(edge, _normalize_zero(x), by=_normalize_zero) - 1 + return searchsortedfirst(edge, _normalize_zero(x), by = _normalize_zero) - 1 else - return searchsortedlast(edge, _normalize_zero(x), by=_normalize_zero) + return searchsortedlast(edge, _normalize_zero(x), by = _normalize_zero) end end # Passing by=_normalize_zero for ranges would have a large performance hit @@ -259,108 +268,108 @@ end end -binvolume(h::AbstractHistogram{T,1}, binidx::Integer) where {T} = binvolume(h, (binidx,)) -binvolume(::Type{V}, h::AbstractHistogram{T,1}, binidx::Integer) where {V,T} = binvolume(V, h, (binidx,)) +binvolume(h::AbstractHistogram{T, 1}, binidx::Integer) where {T} = binvolume(h, (binidx,)) +binvolume(::Type{V}, h::AbstractHistogram{T, 1}, binidx::Integer) where {V, T} = binvolume(V, h, (binidx,)) -binvolume(h::Histogram{T,N}, binidx::NTuple{N,Integer}) where {T,N} = +binvolume(h::Histogram{T, N}, binidx::NTuple{N, Integer}) where {T, N} = binvolume(_promote_edge_types(h.edges), h, binidx) -binvolume(::Type{V}, h::Histogram{T,N}, binidx::NTuple{N,Integer}) where {V,T,N} = +binvolume(::Type{V}, h::Histogram{T, N}, binidx::NTuple{N, Integer}) where {V, T, N} = prod(map((edge, i) -> _edge_binvolume(V, edge, i), h.edges, binidx)) -@inline _edge_binvolume(::Type{V}, edge::AbstractVector, i::Integer) where {V} = V(edge[i+1]) - V(edge[i]) +@inline _edge_binvolume(::Type{V}, edge::AbstractVector, i::Integer) where {V} = V(edge[i + 1]) - V(edge[i]) @inline _edge_binvolume(::Type{V}, edge::AbstractRange, i::Integer) where {V} = V(step(edge)) @inline _edge_binvolume(edge::AbstractVector, i::Integer) = _edge_binvolume(eltype(edge), edge, i) -@inline _edges_nbins(edges::NTuple{N,AbstractVector}) where {N} = map(_edge_nbins, edges) +@inline _edges_nbins(edges::NTuple{N, AbstractVector}) where {N} = map(_edge_nbins, edges) @inline _edge_nbins(edge::AbstractVector) = length(edge) - 1 # 1-dimensional -Histogram(edge::AbstractVector, weights::AbstractVector{T}, closed::Symbol=:left, isdensity::Bool=false) where {T} = +Histogram(edge::AbstractVector, weights::AbstractVector{T}, closed::Symbol = :left, isdensity::Bool = false) where {T} = Histogram((edge,), weights, closed, isdensity) -Histogram(edge::AbstractVector, ::Type{T}, closed::Symbol=:left, isdensity::Bool=false) where {T} = +Histogram(edge::AbstractVector, ::Type{T}, closed::Symbol = :left, isdensity::Bool = false) where {T} = Histogram((edge,), T, closed, isdensity) -Histogram(edge::AbstractVector, closed::Symbol=:left, isdensity::Bool=false) = +Histogram(edge::AbstractVector, closed::Symbol = :left, isdensity::Bool = false) = Histogram((edge,), closed, isdensity) -push!(h::AbstractHistogram{T,1}, x::Real, w::Real) where {T} = push!(h, (x,), w) -push!(h::AbstractHistogram{T,1}, x::Real) where {T} = push!(h,x,one(T)) -append!(h::AbstractHistogram{T,1}, v::AbstractVector) where {T} = append!(h, (v,)) -append!(h::AbstractHistogram{T,1}, v::AbstractVector, wv::Union{AbstractVector,AbstractWeights}) where {T} = append!(h, (v,), wv) +push!(h::AbstractHistogram{T, 1}, x::Real, w::Real) where {T} = push!(h, (x,), w) +push!(h::AbstractHistogram{T, 1}, x::Real) where {T} = push!(h, x, one(T)) +append!(h::AbstractHistogram{T, 1}, v::AbstractVector) where {T} = append!(h, (v,)) +append!(h::AbstractHistogram{T, 1}, v::AbstractVector, wv::Union{AbstractVector, AbstractWeights}) where {T} = append!(h, (v,), wv) -fit(::Type{Histogram{T}},v::AbstractVector, edg::AbstractVector; closed::Symbol=:left) where {T} = - fit(Histogram{T},(v,), (edg,), closed=closed) -fit(::Type{Histogram{T}},v::AbstractVector; closed::Symbol=:left, nbins=sturges(length(v))) where {T} = - fit(Histogram{T},(v,); closed=closed, nbins=nbins) -fit(::Type{Histogram{T}},v::AbstractVector, wv::AbstractWeights, edg::AbstractVector; closed::Symbol=:left) where {T} = - fit(Histogram{T},(v,), wv, (edg,), closed=closed) -fit(::Type{Histogram{T}},v::AbstractVector, wv::AbstractWeights; closed::Symbol=:left, nbins=sturges(length(v))) where {T} = - fit(Histogram{T}, (v,), wv; closed=closed, nbins=nbins) +fit(::Type{Histogram{T}}, v::AbstractVector, edg::AbstractVector; closed::Symbol = :left) where {T} = + fit(Histogram{T}, (v,), (edg,), closed = closed) +fit(::Type{Histogram{T}}, v::AbstractVector; closed::Symbol = :left, nbins = sturges(length(v))) where {T} = + fit(Histogram{T}, (v,); closed = closed, nbins = nbins) +fit(::Type{Histogram{T}}, v::AbstractVector, wv::AbstractWeights, edg::AbstractVector; closed::Symbol = :left) where {T} = + fit(Histogram{T}, (v,), wv, (edg,), closed = closed) +fit(::Type{Histogram{T}}, v::AbstractVector, wv::AbstractWeights; closed::Symbol = :left, nbins = sturges(length(v))) where {T} = + fit(Histogram{T}, (v,), wv; closed = closed, nbins = nbins) fit(::Type{Histogram}, v::AbstractVector, wv::AbstractWeights{W}, args...; kwargs...) where {W} = fit(Histogram{W}, v, wv, args...; kwargs...) # N-dimensional -function push!(h::Histogram{T,N},xs::NTuple{N,Real},w::Real) where {T,N} +function push!(h::Histogram{T, N}, xs::NTuple{N, Real}, w::Real) where {T, N} h.isdensity && error("Density histogram must have float-type weights") idx = binindex(h, xs) if checkbounds(Bool, h.weights, idx...) h.weights[idx...] += w end - h + return h end -function push!(h::Histogram{T,N},xs::NTuple{N,Real},w::Real) where {T<:AbstractFloat,N} +function push!(h::Histogram{T, N}, xs::NTuple{N, Real}, w::Real) where {T <: AbstractFloat, N} idx = binindex(h, xs) if checkbounds(Bool, h.weights, idx...) h.weights[idx...] += h.isdensity ? w / binvolume(h, idx) : w end - h + return h end -push!(h::AbstractHistogram{T,N},xs::NTuple{N,Real}) where {T,N} = push!(h,xs,one(T)) +push!(h::AbstractHistogram{T, N}, xs::NTuple{N, Real}) where {T, N} = push!(h, xs, one(T)) -function append!(h::AbstractHistogram{T,N}, vs::NTuple{N,AbstractVector}) where {T,N} +function append!(h::AbstractHistogram{T, N}, vs::NTuple{N, AbstractVector}) where {T, N} for i in eachindex(vs...) xs = _multi_getindex(i, vs...) push!(h, xs, one(T)) end - h + return h end -function append!(h::AbstractHistogram{T,N}, vs::NTuple{N,AbstractVector}, wv::AbstractVector) where {T,N} +function append!(h::AbstractHistogram{T, N}, vs::NTuple{N, AbstractVector}, wv::AbstractVector) where {T, N} for i in eachindex(wv, vs...) xs = _multi_getindex(i, vs...) push!(h, xs, wv[i]) end - h + return h end # Turn kwargs nbins into a type-stable tuple of integers: -function _nbins_tuple(vs::NTuple{N,AbstractVector}, nbins) where N +function _nbins_tuple(vs::NTuple{N, AbstractVector}, nbins) where {N} template = map(length, vs) result = broadcast((t, x) -> typeof(t)(x), template, nbins) - result::typeof(template) + return result::typeof(template) end -fit(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}, edges::NTuple{N,AbstractVector}; closed::Symbol=:left) where {T,N} = +fit(::Type{Histogram{T}}, vs::NTuple{N, AbstractVector}, edges::NTuple{N, AbstractVector}; closed::Symbol = :left) where {T, N} = append!(Histogram(edges, T, closed, false), vs) -fit(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}; closed::Symbol=:left, nbins=sturges(length(vs[1]))) where {T,N} = - fit(Histogram{T}, vs, histrange(vs,_nbins_tuple(vs, nbins),closed); closed=closed) +fit(::Type{Histogram{T}}, vs::NTuple{N, AbstractVector}; closed::Symbol = :left, nbins = sturges(length(vs[1]))) where {T, N} = + fit(Histogram{T}, vs, histrange(vs, _nbins_tuple(vs, nbins), closed); closed = closed) -fit(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}, wv::AbstractWeights{W}, edges::NTuple{N,AbstractVector}; closed::Symbol=:left) where {T,N,W} = +fit(::Type{Histogram{T}}, vs::NTuple{N, AbstractVector}, wv::AbstractWeights{W}, edges::NTuple{N, AbstractVector}; closed::Symbol = :left) where {T, N, W} = append!(Histogram(edges, T, closed, false), vs, wv) -fit(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}, wv::AbstractWeights; closed::Symbol=:left, nbins=sturges(length(vs[1]))) where {T,N} = - fit(Histogram{T}, vs, wv, histrange(vs,_nbins_tuple(vs, nbins),closed); closed=closed) +fit(::Type{Histogram{T}}, vs::NTuple{N, AbstractVector}, wv::AbstractWeights; closed::Symbol = :left, nbins = sturges(length(vs[1]))) where {T, N} = + fit(Histogram{T}, vs, wv, histrange(vs, _nbins_tuple(vs, nbins), closed); closed = closed) """ fit(Histogram, data[, weight][, edges]; closed=:left[, nbins]) @@ -411,15 +420,15 @@ h = fit(Histogram, (rand(100),rand(100)),nbins=10) ``` """ fit(::Type{Histogram}, args...; kwargs...) = fit(Histogram{Int}, args...; kwargs...) -fit(::Type{Histogram}, vs::NTuple{N,AbstractVector}, wv::AbstractWeights{W}, args...; kwargs...) where {N,W} = fit(Histogram{W}, vs, wv, args...; kwargs...) +fit(::Type{Histogram}, vs::NTuple{N, AbstractVector}, wv::AbstractWeights{W}, args...; kwargs...) where {N, W} = fit(Histogram{W}, vs, wv, args...; kwargs...) # Get a suitable high-precision type for the norm of a histogram. -norm_type(h::Histogram{T,N}) where {T,N} = +norm_type(h::Histogram{T, N}) where {T, N} = promote_type(T, _promote_edge_types(h.edges)) -norm_type(::Type{T}) where {T<:Integer} = promote_type(T, Int64) -norm_type(::Type{T}) where {T<:AbstractFloat} = promote_type(T, Float64) +norm_type(::Type{T}) where {T <: Integer} = promote_type(T, Int64) +norm_type(::Type{T}) where {T <: AbstractFloat} = promote_type(T, Float64) """ @@ -427,8 +436,8 @@ norm_type(::Type{T}) where {T<:AbstractFloat} = promote_type(T, Float64) Calculate the norm of histogram `h` as the absolute value of its integral. """ -@generated function norm(h::Histogram{T,N}) where {T,N} - quote +@generated function norm(h::Histogram{T, N}) where {T, N} + return quote edges = h.edges weights = h.weights SumT = norm_type(h) @@ -437,11 +446,11 @@ Calculate the norm of histogram `h` as the absolute value of its integral. @nloops( $N, i, weights, d -> begin - v_{$N-d+1} = v_{$N-d} * _edge_binvolume(SumT, edges[d], i_d) - s_{$N-d+1} = zero(SumT) + v_{$N - d + 1} = v_{$N - d} * _edge_binvolume(SumT, edges[d], i_d) + s_{$N - d + 1} = zero(SumT) end, d -> begin - s_{$N-d} += s_{$N-d+1} + s_{$N - d} += s_{$N - d + 1} end, begin $(Symbol("s_$(N)")) += (@nref $N weights i) * $(Symbol("v_$N")) @@ -452,10 +461,9 @@ Calculate the norm of histogram `h` as the absolute value of its integral. end -float(h::Histogram{T,N}) where {T<:AbstractFloat,N} = h - -float(h::Histogram{T,N}) where {T,N} = Histogram(h.edges, float(h.weights), h.closed, h.isdensity) +float(h::Histogram{T, N}) where {T <: AbstractFloat, N} = h +float(h::Histogram{T, N}) where {T, N} = Histogram(h.edges, float(h.weights), h.closed, h.isdensity) """ @@ -465,8 +473,8 @@ float(h::Histogram{T,N}) where {T,N} = Histogram(h.edges, float(h.weights), h.cl Normalize the histogram `h` and optionally scale one or more auxiliary weight arrays appropriately. See description of `normalize` for details. Returns `h`. """ -@generated function normalize!(h::Histogram{T,N}, aux_weights::Array{T,N}...; mode::Symbol=:pdf) where {T<:AbstractFloat,N} - quote +@generated function normalize!(h::Histogram{T, N}, aux_weights::Array{T, N}...; mode::Symbol = :pdf) where {T <: AbstractFloat, N} + return quote edges = h.edges weights = h.weights @@ -480,7 +488,7 @@ arrays appropriately. See description of `normalize` for details. Returns `h`. if h.isdensity if mode == :pdf || mode == :probability # histogram already represents a density, just divide weights by norm - s = 1/norm(h) + s = 1 / norm(h) weights .*= s for A in aux_weights A .*= s @@ -493,7 +501,7 @@ arrays appropriately. See description of `normalize` for details. Returns `h`. # Divide weights by bin volume, for :pdf also divide by sum of weights SumT = norm_type(h) vs_0 = (mode == :pdf) ? sum(SumT, weights) : one(SumT) - @nloops $N i weights d->(vs_{$N-d+1} = vs_{$N-d} * _edge_binvolume(SumT, edges[d], i_d)) begin + @nloops $N i weights d -> (vs_{$N - d + 1} = vs_{$N - d} * _edge_binvolume(SumT, edges[d], i_d)) begin (@nref $N weights i) /= $(Symbol("vs_$N")) for A in aux_weights (@nref $N A i) /= $(Symbol("vs_$N")) @@ -538,7 +546,7 @@ Valid values for `mode` are: Successive application of both `:probability` and `:density` normalization (in any order) is equivalent to `:pdf` normalization. """ -normalize(h::Histogram{T,N}; mode::Symbol=:pdf) where {T,N} = +normalize(h::Histogram{T, N}; mode::Symbol = :pdf) where {T, N} = normalize!(deepcopy(float(h)), mode = mode) @@ -551,11 +559,11 @@ uncertainties). The values of the auxiliary arrays are scaled by the same factor as the corresponding histogram weight values. Returns a tuple of the normalized histogram and scaled auxiliary weights. """ -function normalize(h::Histogram{T,N}, aux_weights::Array{T,N}...; mode::Symbol=:pdf) where {T,N} +function normalize(h::Histogram{T, N}, aux_weights::Array{T, N}...; mode::Symbol = :pdf) where {T, N} h_fltcp = deepcopy(float(h)) aux_weights_fltcp = map(x -> deepcopy(float(x)), aux_weights) normalize!(h_fltcp, aux_weights_fltcp..., mode = mode) - (h_fltcp, aux_weights_fltcp...) + return (h_fltcp, aux_weights_fltcp...) end @@ -566,8 +574,8 @@ Create a new histogram with the same binning, type and shape of weights and the same properties (`closed` and `isdensity`) as `h`, with all weights set to zero. """ -Base.zero(h::Histogram{T,N,E}) where {T,N,E} = - Histogram{T,N,E}(deepcopy(h.edges), zero(h.weights), h.closed, h.isdensity) +Base.zero(h::Histogram{T, N, E}) where {T, N, E} = + Histogram{T, N, E}(deepcopy(h.edges), zero(h.weights), h.closed, h.isdensity) """ @@ -586,7 +594,7 @@ function Base.merge!(target::Histogram, others::Histogram...) for h in others target.weights .+= h.weights end - target + return target end diff --git a/src/misc.jl b/src/misc.jl index afff59eb4..fc09d7b9d 100644 --- a/src/misc.jl +++ b/src/misc.jl @@ -18,12 +18,12 @@ julia> rle([1,1,1,2,2,3,3,3,3,2,2,2]) ([1, 2, 3, 2], [3, 2, 4, 3]) ``` """ -function rle(v::AbstractVector{T}) where T +function rle(v::AbstractVector{T}) where {T} n = length(v) vals = T[] lens = Int[] - n>0 || return (vals,lens) + n > 0 || return (vals, lens) cv = v[1] cl = 1 @@ -57,23 +57,26 @@ Reconstruct a vector from its run-length encoding (see [`rle`](@ref)). `vals` is a vector of the values and `lens` is a vector of the corresponding run lengths. """ -function inverse_rle(vals::AbstractVector{T}, lens::AbstractVector{<:Integer}) where T +function inverse_rle(vals::AbstractVector{T}, lens::AbstractVector{<:Integer}) where {T} m = length(vals) mlens = length(lens) - mlens == m || throw(DimensionMismatch( - "number of vals ($m) does not match the number of lens ($mlens)")) + mlens == m || throw( + DimensionMismatch( + "number of vals ($m) does not match the number of lens ($mlens)" + ) + ) n = sum(lens) n >= 0 || throw(ArgumentError("lengths must be non-negative")) r = Vector{T}(undef, n) p = 0 - for i = 1 : m + for i in 1:m j = lens[i] j >= 0 || throw(ArgumentError("lengths must be non-negative")) v = vals[i] while j > 0 - r[p+=1] = v - j -=1 + r[p += 1] = v + j -= 1 end end return r @@ -86,9 +89,9 @@ end Construct a dictionary that maps each unique value in `a` to the index of its first occurrence in `a`. """ -function indexmap(a::AbstractArray{T}) where T - d = Dict{T,Int}() - for i = 1 : length(a) +function indexmap(a::AbstractArray{T}) where {T} + d = Dict{T, Int}() + for i in 1:length(a) k = a[i] if !haskey(d, k) d[k] = i @@ -104,10 +107,10 @@ end Construct a dictionary that maps each of the `n` unique values in `a` to a number between 1 and `n`. """ -function levelsmap(a::AbstractArray{T}) where T - d = Dict{T,Int}() +function levelsmap(a::AbstractArray{T}) where {T} + d = Dict{T, Int}() index = 1 - for i = 1 : length(a) + for i in 1:length(a) k = a[i] if !haskey(d, k) d[k] = index @@ -136,8 +139,8 @@ julia> indicatormat([1 2 2], 2) 0 1 1 ``` """ -function indicatormat(x::AbstractArray{<:Integer}, k::Integer; sparse::Bool=false) - sparse ? _indicatormat_sparse(x, k) : _indicatormat_dense(x, k) +function indicatormat(x::AbstractArray{<:Integer}, k::Integer; sparse::Bool = false) + return sparse ? _indicatormat_sparse(x, k) : _indicatormat_dense(x, k) end @@ -148,30 +151,30 @@ Construct a boolean matrix `I` of size `(length(c), length(x))`. Let `ci` be the index of `x[i]` in `c`. Then `I[ci, i] = true` and all other elements are `false`. """ -function indicatormat(x::AbstractArray, c::AbstractArray; sparse::Bool=false) - sparse ? _indicatormat_sparse(x, c) : _indicatormat_dense(x, c) +function indicatormat(x::AbstractArray, c::AbstractArray; sparse::Bool = false) + return sparse ? _indicatormat_sparse(x, c) : _indicatormat_dense(x, c) end -indicatormat(x::AbstractArray; sparse::Bool=false) = - indicatormat(x, sort!(unique(x)); sparse=sparse) +indicatormat(x::AbstractArray; sparse::Bool = false) = + indicatormat(x, sort!(unique(x)); sparse = sparse) function _indicatormat_dense(x::AbstractArray{<:Integer}, k::Integer) n = length(x) r = zeros(Bool, k, n) - for i = 1 : n + for i in 1:n r[x[i], i] = true end return r end -function _indicatormat_dense(x::AbstractArray{T}, c::AbstractArray{T}) where T +function _indicatormat_dense(x::AbstractArray{T}, c::AbstractArray{T}) where {T} d = indexmap(c) m = length(c) n = length(x) r = zeros(Bool, m, n) o = 0 - for i = 1 : n + for i in 1:n xi = x[i] r[o + d[xi]] = true o += m @@ -181,13 +184,13 @@ end _indicatormat_sparse(x::AbstractArray{<:Integer}, k::Integer) = (n = length(x); sparse(x, 1:n, true, k, n)) -function _indicatormat_sparse(x::AbstractArray{T}, c::AbstractArray{T}) where T +function _indicatormat_sparse(x::AbstractArray{T}, c::AbstractArray{T}) where {T} d = indexmap(c) m = length(c) n = length(x) rinds = Vector{Int}(undef, n) - for i = 1 : n + for i in 1:n rinds[i] = d[x[i]] end return sparse(rinds, 1:n, true, m, n) diff --git a/src/moments.jl b/src/moments.jl index 04de97a83..ece8c7413 100644 --- a/src/moments.jl +++ b/src/moments.jl @@ -18,8 +18,10 @@ replacing ``\\frac{1}{\\sum{w}}`` with a factor dependent on the type of weights * `ProbabilityWeights`: ``\\frac{n}{(n - 1) \\sum w}`` where ``n`` equals `count(!iszero, w)` * `Weights`: `ArgumentError` (bias correction not supported) """ -function var(v::AbstractArray{<:Real}, w::AbstractWeights; mean=nothing, - corrected::Union{Bool, Nothing}=nothing) +function var( + v::AbstractArray{<:Real}, w::AbstractWeights; mean = nothing, + corrected::Union{Bool, Nothing} = nothing + ) length(w) == length(v) || throw(DimensionMismatch("Inconsistent array lengths.")) corrected = depcheck(:var, :corrected, corrected) if mean === nothing @@ -27,8 +29,10 @@ function var(v::AbstractArray{<:Real}, w::AbstractWeights; mean=nothing, end return _moment2(v, w, mean; corrected) end -function var(v::AbstractArray{<:Real}, w::UnitWeights; mean=nothing, - corrected::Union{Bool, Nothing}=nothing) +function var( + v::AbstractArray{<:Real}, w::UnitWeights; mean = nothing, + corrected::Union{Bool, Nothing} = nothing + ) length(w) == length(v) || throw(DimensionMismatch("Inconsistent array lengths.")) corrected = depcheck(:var, :corrected, corrected) return var(v; mean, corrected) @@ -36,19 +40,21 @@ end ## var along dim -function var!(R::AbstractArray, A::AbstractArray{<:Real}, w::AbstractWeights, dims::Int; - mean=nothing, corrected::Union{Bool, Nothing}=nothing) +function var!( + R::AbstractArray, A::AbstractArray{<:Real}, w::AbstractWeights, dims::Int; + mean = nothing, corrected::Union{Bool, Nothing} = nothing + ) corrected = depcheck(:var!, :corrected, corrected) if mean == 0 mean = Base.reducedim_initarray(A, dims, 0, eltype(R)) elseif mean === nothing - mean = Statistics.mean(A, w, dims=dims) + mean = Statistics.mean(A, w, dims = dims) else # check size of mean - for i = 1:ndims(A) - dA = size(A,i) - dM = size(mean,i) + for i in 1:ndims(A) + dA = size(A, i) + dM = size(mean, i) if i == dims dM == 1 || throw(DimensionMismatch("Incorrect size of mean.")) else @@ -56,26 +62,34 @@ function var!(R::AbstractArray, A::AbstractArray{<:Real}, w::AbstractWeights, di end end end - return rmul!(_wsum_centralize!(R, abs2, A, convert(Vector, w), mean, dims, true), - varcorrection(w, corrected)) + return rmul!( + _wsum_centralize!(R, abs2, A, convert(Vector, w), mean, dims, true), + varcorrection(w, corrected) + ) end -function var(A::AbstractArray{<:Real}, w::AbstractWeights, dim::Int; mean=nothing, - corrected::Union{Bool, Nothing}=nothing) +function var( + A::AbstractArray{<:Real}, w::AbstractWeights, dim::Int; mean = nothing, + corrected::Union{Bool, Nothing} = nothing + ) corrected = depcheck(:var, :corrected, corrected) if mean === nothing z = (zero(eltype(w)) * zero(eltype(A))^2) / zero(eltype(w)) else z = (zero(eltype(w)) * zero(zero(eltype(A)) - zero(eltype(mean)))^2) / zero(eltype(w)) end - var!(similar(A, typeof(z), Base.reduced_indices(axes(A), dim)), A, w, dim; - mean=mean, corrected=corrected) + return var!( + similar(A, typeof(z), Base.reduced_indices(axes(A), dim)), A, w, dim; + mean = mean, corrected = corrected + ) end -function var(v::AbstractArray{<:Real}, w::UnitWeights, dim::Int; mean=nothing, - corrected::Union{Bool, Nothing}=nothing) +function var( + v::AbstractArray{<:Real}, w::UnitWeights, dim::Int; mean = nothing, + corrected::Union{Bool, Nothing} = nothing + ) length(w) == length(v) || throw(DimensionMismatch("Inconsistent array lengths.")) corrected = depcheck(:var, :corrected, corrected) - return var(v; mean, corrected, dims=dim) + return var(v; mean, corrected, dims = dim) end ## std @@ -97,12 +111,14 @@ weights used: * `ProbabilityWeights`: ``\\frac{n}{(n - 1) \\sum w}`` where ``n`` equals `count(!iszero, w)` * `Weights`: `ArgumentError` (bias correction not supported) """ -std(v::AbstractArray{<:Real}, w::AbstractWeights; mean=nothing, corrected::Union{Bool, Nothing}=nothing) = - sqrt.(var(v, w; mean=mean, corrected=depcheck(:std, :corrected, corrected))) +std(v::AbstractArray{<:Real}, w::AbstractWeights; mean = nothing, corrected::Union{Bool, Nothing} = nothing) = + sqrt.(var(v, w; mean = mean, corrected = depcheck(:std, :corrected, corrected))) -std(v::AbstractArray{<:Real}, w::AbstractWeights, dim::Int; - mean=nothing, corrected::Union{Bool, Nothing}=nothing) = - sqrt.(var(v, w, dim; mean=mean, corrected=depcheck(:std, :corrected, corrected))) +std( + v::AbstractArray{<:Real}, w::AbstractWeights, dim::Int; + mean = nothing, corrected::Union{Bool, Nothing} = nothing +) = + sqrt.(var(v, w, dim; mean = mean, corrected = depcheck(:std, :corrected, corrected))) ##### Fused statistics """ @@ -114,10 +130,10 @@ A weighting vector `w` can be specified to weight the estimates. Finally, bias correction is be applied to the variance calculation if `corrected=true`. See [`var`](@ref) documentation for more details. """ -function mean_and_var(x; corrected::Bool=true) +function mean_and_var(x; corrected::Bool = true) m = mean(x) - v = var(x, mean=m, corrected=corrected) - m, v + v = var(x, mean = m, corrected = corrected) + return m, v end """ @@ -130,51 +146,54 @@ Finally, bias correction is applied to the standard deviation calculation if `corrected=true`. See [`std`](@ref) documentation for more details. """ -function mean_and_std(x; corrected::Bool=true) +function mean_and_std(x; corrected::Bool = true) m = mean(x) - s = std(x, mean=m, corrected=corrected) - m, s + s = std(x, mean = m, corrected = corrected) + return m, s end -function mean_and_var(x::AbstractArray{<:Real}, w::AbstractWeights; corrected::Union{Bool, Nothing}=nothing) +function mean_and_var(x::AbstractArray{<:Real}, w::AbstractWeights; corrected::Union{Bool, Nothing} = nothing) m = mean(x, w) - v = var(x, w, mean=m, corrected=depcheck(:mean_and_var, :corrected, corrected)) - m, v + v = var(x, w, mean = m, corrected = depcheck(:mean_and_var, :corrected, corrected)) + return m, v end -function mean_and_std(x::AbstractArray{<:Real}, w::AbstractWeights; corrected::Union{Bool, Nothing}=nothing) +function mean_and_std(x::AbstractArray{<:Real}, w::AbstractWeights; corrected::Union{Bool, Nothing} = nothing) m = mean(x, w) - s = std(x, w, mean=m, corrected=depcheck(:mean_and_std, :corrected, corrected)) - m, s + s = std(x, w, mean = m, corrected = depcheck(:mean_and_std, :corrected, corrected)) + return m, s end -function mean_and_var(x::AbstractArray{<:Real}, dim::Int; corrected::Bool=true) - m = mean(x, dims=dim) - v = var(x, dims=dim, mean=m, corrected=corrected) - m, v +function mean_and_var(x::AbstractArray{<:Real}, dim::Int; corrected::Bool = true) + m = mean(x, dims = dim) + v = var(x, dims = dim, mean = m, corrected = corrected) + return m, v end -function mean_and_std(x::AbstractArray{<:Real}, dim::Int; corrected::Bool=true) - m = mean(x, dims=dim) - s = std(x, dims=dim, mean=m, corrected=corrected) - m, s +function mean_and_std(x::AbstractArray{<:Real}, dim::Int; corrected::Bool = true) + m = mean(x, dims = dim) + s = std(x, dims = dim, mean = m, corrected = corrected) + return m, s end -function mean_and_var(x::AbstractArray{<:Real}, w::AbstractWeights, dims::Int; - corrected::Union{Bool, Nothing}=nothing) - m = mean(x, w, dims=dims) - v = var(x, w, dims, mean=m, corrected=depcheck(:mean_and_var, :corrected, corrected)) - m, v +function mean_and_var( + x::AbstractArray{<:Real}, w::AbstractWeights, dims::Int; + corrected::Union{Bool, Nothing} = nothing + ) + m = mean(x, w, dims = dims) + v = var(x, w, dims, mean = m, corrected = depcheck(:mean_and_var, :corrected, corrected)) + return m, v end -function mean_and_std(x::AbstractArray{<:Real}, w::AbstractWeights, dims::Int; - corrected::Union{Bool, Nothing}=nothing) - m = mean(x, w, dims=dims) - s = std(x, w, dims, mean=m, corrected=depcheck(:mean_and_std, :corrected, corrected)) - m, s +function mean_and_std( + x::AbstractArray{<:Real}, w::AbstractWeights, dims::Int; + corrected::Union{Bool, Nothing} = nothing + ) + m = mean(x, w, dims = dims) + s = std(x, w, dims, mean = m, corrected = depcheck(:mean_and_std, :corrected, corrected)) + return m, s end - ##### General central moment function _moment2(v::AbstractArray{<:Real}, m::Real; corrected::Bool) n = length(v) @@ -221,7 +240,7 @@ function _moment3(v::AbstractArray{<:Real}, m::Real) end end end - s / n + return s / n end function _moment3(v::AbstractArray{<:Real}, wv::AbstractWeights, m::Real) @@ -235,9 +254,9 @@ function _moment3(v::AbstractArray{<:Real}, wv::AbstractWeights, m::Real) return zi^3 * wvi end end - s = sum(Broadcast.instantiate(broadcasted)) + s = sum(Broadcast.instantiate(broadcasted)) end - s / sum(wv) + return s / sum(wv) end function _moment4(v::AbstractArray{<:Real}, m::Real) @@ -253,7 +272,7 @@ function _moment4(v::AbstractArray{<:Real}, m::Real) end end end - s / n + return s / n end function _moment4(v::AbstractArray{<:Real}, wv::AbstractWeights, m::Real) @@ -269,7 +288,7 @@ function _moment4(v::AbstractArray{<:Real}, wv::AbstractWeights, m::Real) end s = sum(Broadcast.instantiate(broadcasted)) end - s / sum(wv) + return s / sum(wv) end function _momentk(v::AbstractArray{<:Real}, k::Int, m::Real) @@ -285,7 +304,7 @@ function _momentk(v::AbstractArray{<:Real}, k::Int, m::Real) end end end - s / n + return s / n end function _momentk(v::AbstractArray{<:Real}, k::Int, wv::AbstractWeights, m::Real) @@ -301,7 +320,7 @@ function _momentk(v::AbstractArray{<:Real}, k::Int, wv::AbstractWeights, m::Real end s = sum(Broadcast.instantiate(broadcasted)) end - s / sum(wv) + return s / sum(wv) end @@ -311,19 +330,19 @@ end Return the `k`th order central moment of a real-valued array `v`, optionally specifying a weighting vector `wv` and a center `m`. """ -function moment(v::AbstractArray{<:Real}, k::Int, m::Real=mean(v)) - k == 2 ? _moment2(v, m; corrected = false) : - k == 3 ? _moment3(v, m) : - k == 4 ? _moment4(v, m) : - _momentk(v, k, m) +function moment(v::AbstractArray{<:Real}, k::Int, m::Real = mean(v)) + return k == 2 ? _moment2(v, m; corrected = false) : + k == 3 ? _moment3(v, m) : + k == 4 ? _moment4(v, m) : + _momentk(v, k, m) end -function moment(v::AbstractArray{<:Real}, k::Int, wv::AbstractWeights, m::Real=mean(v, wv)) +function moment(v::AbstractArray{<:Real}, k::Int, wv::AbstractWeights, m::Real = mean(v, wv)) length(wv) == length(v) || throw(DimensionMismatch("Inconsistent array lengths.")) - k == 2 ? _moment2(v, wv, m; corrected = false) : - k == 3 ? _moment3(v, wv, m) : - k == 4 ? _moment4(v, wv, m) : - _momentk(v, k, wv, m) + return k == 2 ? _moment2(v, wv, m; corrected = false) : + k == 3 ? _moment3(v, wv, m) : + k == 4 ? _moment4(v, wv, m) : + _momentk(v, k, wv, m) end function moment(v::AbstractArray{<:Real}, k::Int, wv::UnitWeights, m::Real) length(wv) == length(v) || throw(DimensionMismatch("Inconsistent array lengths.")) @@ -340,7 +359,7 @@ end Compute the standardized skewness of a real-valued array `v`, optionally specifying a weighting vector `wv` and a center `m`. """ -function skewness(v::AbstractArray{<:Real}, m::Real=mean(v)) +function skewness(v::AbstractArray{<:Real}, m::Real = mean(v)) n = length(v) if iszero(n) z = zero(zero(eltype(v)) - m) @@ -356,13 +375,13 @@ function skewness(v::AbstractArray{<:Real}, m::Real=mean(v)) end end end - return (cm3/n) / sqrt((cm2/n)^3) # this is much faster than cm2^1.5 + return (cm3 / n) / sqrt((cm2 / n)^3) # this is much faster than cm2^1.5 end -function skewness(v::AbstractArray{<:Real}, wv::AbstractWeights, m::Real=mean(v, wv)) +function skewness(v::AbstractArray{<:Real}, wv::AbstractWeights, m::Real = mean(v, wv)) n = length(v) length(wv) == n || throw(DimensionMismatch("Inconsistent array lengths.")) - if iszero(n) + if iszero(n) z = zero(zero(eltype(v)) - m) cm2 = z^2 * zero(eltype(wv)) # empirical 2nd centered moment (variance) cm3 = cm2 * z # empirical 3rd centered moment @@ -378,7 +397,7 @@ function skewness(v::AbstractArray{<:Real}, wv::AbstractWeights, m::Real=mean(v, cm2, cm3 = reduce(_add, Broadcast.instantiate(broadcasted)) end sw = sum(wv) - return (cm3/sw) / sqrt((cm2/sw)^3) # this is much faster than cm2^1.5 + return (cm3 / sw) / sqrt((cm2 / sw)^3) # this is much faster than cm2^1.5 end function skewness(v::AbstractArray{<:Real}, wv::UnitWeights, m::Real) length(wv) == length(v) || throw(DimensionMismatch("Inconsistent array lengths.")) @@ -393,7 +412,7 @@ end Compute the excess kurtosis of a real-valued array `v`, optionally specifying a weighting vector `wv` and a center `m`. """ -function kurtosis(v::AbstractArray{<:Real}, m::Real=mean(v)) +function kurtosis(v::AbstractArray{<:Real}, m::Real = mean(v)) n = length(v) if iszero(n) z = zero(zero(eltype(v)) - m) @@ -409,10 +428,10 @@ function kurtosis(v::AbstractArray{<:Real}, m::Real=mean(v)) end end end - return (cm4/n) / (cm2/n)^2 - 3 + return (cm4 / n) / (cm2 / n)^2 - 3 end -function kurtosis(v::AbstractArray{<:Real}, wv::AbstractWeights, m::Real=mean(v, wv)) +function kurtosis(v::AbstractArray{<:Real}, wv::AbstractWeights, m::Real = mean(v, wv)) n = length(v) length(wv) == n || throw(DimensionMismatch("Inconsistent array lengths.")) if iszero(n) @@ -433,7 +452,7 @@ function kurtosis(v::AbstractArray{<:Real}, wv::AbstractWeights, m::Real=mean(v, cm2, cm4 = reduce(_add, Broadcast.instantiate(broadcasted)) end sw = sum(wv) - return (cm4/sw) / (cm2/sw)^2 - 3 + return (cm4 / sw) / (cm2 / sw)^2 - 3 end function kurtosis(v::AbstractArray{<:Real}, wv::UnitWeights, m::Real) length(wv) == length(v) || throw(DimensionMismatch("Inconsistent array lengths.")) @@ -454,8 +473,10 @@ Reference: Smith, P. J. 1995. A Recursive Formulation of the Old Problem of Obta Moments from Cumulants and Vice Versa. The American Statistician, 49(2), 217–218. https://doi.org/10.2307/2684642 """ -function cumulant(v::AbstractArray{<:Real}, krange::Union{Integer, AbstractRange{<:Integer}}, wv::AbstractWeights, - m::Real=mean(v, wv)) +function cumulant( + v::AbstractArray{<:Real}, krange::Union{Integer, AbstractRange{<:Integer}}, wv::AbstractWeights, + m::Real = mean(v, wv) + ) n = length(v) length(wv) == n || throw(DimensionMismatch("Inconsistent array lengths.")) kmin, kmax = extrema(krange) @@ -465,15 +486,15 @@ function cumulant(v::AbstractArray{<:Real}, krange::Union{Integer, AbstractRange cmoms = [moment(v, i, wv, m) for i in 2:kmax] cumls = Vector{eltype(cmoms)}(undef, kmax) cumls[1] = m - for i = 2:kmax - kn = cmoms[i-1] - for j = 2:(i-2) - kn -= binomial(i-1, j)*cmoms[j-1]*cumls[i-j] + for i in 2:kmax + kn = cmoms[i - 1] + for j in 2:(i - 2) + kn -= binomial(i - 1, j) * cmoms[j - 1] * cumls[i - j] end cumls[i] = kn end return cumls[krange] end -cumulant(v::AbstractArray{<:Real}, krange::Union{Integer, AbstractRange{<:Integer}}, m::Real=mean(v)) = +cumulant(v::AbstractArray{<:Real}, krange::Union{Integer, AbstractRange{<:Integer}}, m::Real = mean(v)) = cumulant(v, krange, uweights(length(v)), m) diff --git a/src/pairwise.jl b/src/pairwise.jl index c8023c6a2..f2b8d3b1a 100644 --- a/src/pairwise.jl +++ b/src/pairwise.jl @@ -11,7 +11,7 @@ function _pairwise!(::Val{:none}, f, dest::AbstractMatrix, x, y, symmetric::Bool end if symmetric m, n = size(dest) - for j in 1:n, i in (j+1):m + for j in 1:n, i in (j + 1):m dest[i, j] = dest[j, i] end end @@ -22,8 +22,12 @@ function check_vectors(x, y, skipmissing::Symbol) m = length(x) n = length(y) if !(all(xi -> xi isa AbstractVector, x) && all(yi -> yi isa AbstractVector, y)) - throw(ArgumentError("All entries in x and y must be vectors " * - "when skipmissing=:$skipmissing")) + throw( + ArgumentError( + "All entries in x and y must be vectors " * + "when skipmissing=:$skipmissing" + ) + ) end if m > 1 indsx = keys(first(x)) @@ -39,7 +43,7 @@ function check_vectors(x, y, skipmissing::Symbol) throw(ArgumentError("All input vectors must have the same indices")) end end - if m > 1 && n > 1 + return if m > 1 && n > 1 indsx == indsy || throw(ArgumentError("All input vectors must have the same indices")) end @@ -70,7 +74,7 @@ function _pairwise!(::Val{:pairwise}, f, dest::AbstractMatrix, x, y, symmetric:: end if symmetric m, n = size(dest) - for j in 1:n, i in (j+1):m + for j in 1:n, i in (j + 1):m dest[i, j] = dest[j, i] end end @@ -94,14 +98,18 @@ function _pairwise!(::Val{:listwise}, f, dest::AbstractMatrix, x, y, symmetric:: # TODO: check whether wrapping views in a custom array type which asserts # that entries cannot be `missing` (similar to `skipmissing`) # could offer better performance - return _pairwise!(Val(:none), f, dest, - [view(xi, nminds′) for xi in x], - [view(yi, nminds′) for yi in y], - symmetric) + return _pairwise!( + Val(:none), f, dest, + [view(xi, nminds′) for xi in x], + [view(yi, nminds′) for yi in y], + symmetric + ) end -function _pairwise!(f, dest::AbstractMatrix, x, y; - symmetric::Bool=false, skipmissing::Symbol=:none) +function _pairwise!( + f, dest::AbstractMatrix, x, y; + symmetric::Bool = false, skipmissing::Symbol = :none + ) if !(skipmissing in (:none, :pairwise, :listwise)) throw(ArgumentError("skipmissing must be one of :none, :pairwise or :listwise")) end @@ -123,7 +131,7 @@ using Base: typejoin_union_tuple # Identical to `Base.promote_typejoin` except that it uses `promote_type` # instead of `typejoin` to combine members of `Union` types -function promote_type_union(::Type{T}) where T +function promote_type_union(::Type{T}) where {T} if T === Union{} return Union{} elseif T isa UnionAll @@ -144,8 +152,10 @@ function _pairwise(::Val{skipmissing}, f, x, y, symmetric::Bool) where {skipmiss n = length(y′) T = Core.Compiler.return_type(f, Tuple{eltype(x′), eltype(y′)}) - Tsm = Core.Compiler.return_type((x, y) -> f(disallowmissing(x), disallowmissing(y)), - Tuple{eltype(x′), eltype(y′)}) + Tsm = Core.Compiler.return_type( + (x, y) -> f(disallowmissing(x), disallowmissing(y)), + Tuple{eltype(x′), eltype(y′)} + ) if skipmissing === :none dest = Matrix{T}(undef, m, n) @@ -158,7 +168,7 @@ function _pairwise(::Val{skipmissing}, f, x, y, symmetric::Bool) where {skipmiss # Preserve inferred element type isempty(dest) && return dest - _pairwise!(f, dest, x′, y′, symmetric=symmetric, skipmissing=skipmissing) + _pairwise!(f, dest, x′, y′, symmetric = symmetric, skipmissing = skipmissing) if isconcretetype(eltype(dest)) return dest @@ -232,14 +242,20 @@ julia> dest -0.866025 -1.0 1.0 ``` """ -function pairwise!(f, dest::AbstractMatrix, x, y=x; - symmetric::Bool=false, skipmissing::Symbol=:none) +function pairwise!( + f, dest::AbstractMatrix, x, y = x; + symmetric::Bool = false, skipmissing::Symbol = :none + ) if symmetric && x !== y - throw(ArgumentError("symmetric=true only makes sense passing " * - "a single set of variables (x === y)")) + throw( + ArgumentError( + "symmetric=true only makes sense passing " * + "a single set of variables (x === y)" + ) + ) end - return _pairwise!(f, dest, x, y, symmetric=symmetric, skipmissing=skipmissing) + return _pairwise!(f, dest, x, y, symmetric = symmetric, skipmissing = skipmissing) end """ @@ -295,10 +311,14 @@ julia> pairwise(cor, eachcol(y), skipmissing=:pairwise) -0.866025 -1.0 1.0 ``` """ -function pairwise(f, x, y=x; symmetric::Bool=false, skipmissing::Symbol=:none) +function pairwise(f, x, y = x; symmetric::Bool = false, skipmissing::Symbol = :none) if symmetric && x !== y - throw(ArgumentError("symmetric=true only makes sense passing " * - "a single set of variables (x === y)")) + throw( + ArgumentError( + "symmetric=true only makes sense passing " * + "a single set of variables (x === y)" + ) + ) end return _pairwise(Val(skipmissing), f, x, y, symmetric) @@ -307,23 +327,29 @@ end # cov(x) is faster than cov(x, x) _cov(x, y) = x === y ? cov(x) : cov(x, y) -pairwise!(::typeof(cov), dest::AbstractMatrix, x, y; - symmetric::Bool=false, skipmissing::Symbol=:none) = - pairwise!(_cov, dest, x, y, symmetric=symmetric, skipmissing=skipmissing) +pairwise!( + ::typeof(cov), dest::AbstractMatrix, x, y; + symmetric::Bool = false, skipmissing::Symbol = :none +) = + pairwise!(_cov, dest, x, y, symmetric = symmetric, skipmissing = skipmissing) -pairwise(::typeof(cov), x, y; symmetric::Bool=false, skipmissing::Symbol=:none) = - pairwise(_cov, x, y, symmetric=symmetric, skipmissing=skipmissing) +pairwise(::typeof(cov), x, y; symmetric::Bool = false, skipmissing::Symbol = :none) = + pairwise(_cov, x, y, symmetric = symmetric, skipmissing = skipmissing) -pairwise!(::typeof(cov), dest::AbstractMatrix, x; - symmetric::Bool=true, skipmissing::Symbol=:none) = - pairwise!(_cov, dest, x, x, symmetric=symmetric, skipmissing=skipmissing) +pairwise!( + ::typeof(cov), dest::AbstractMatrix, x; + symmetric::Bool = true, skipmissing::Symbol = :none +) = + pairwise!(_cov, dest, x, x, symmetric = symmetric, skipmissing = skipmissing) -pairwise(::typeof(cov), x; symmetric::Bool=true, skipmissing::Symbol=:none) = - pairwise(_cov, x, x, symmetric=symmetric, skipmissing=skipmissing) +pairwise(::typeof(cov), x; symmetric::Bool = true, skipmissing::Symbol = :none) = + pairwise(_cov, x, x, symmetric = symmetric, skipmissing = skipmissing) -pairwise!(::typeof(cor), dest::AbstractMatrix, x; - symmetric::Bool=true, skipmissing::Symbol=:none) = - pairwise!(cor, dest, x, x, symmetric=symmetric, skipmissing=skipmissing) +pairwise!( + ::typeof(cor), dest::AbstractMatrix, x; + symmetric::Bool = true, skipmissing::Symbol = :none +) = + pairwise!(cor, dest, x, x, symmetric = symmetric, skipmissing = skipmissing) -pairwise(::typeof(cor), x; symmetric::Bool=true, skipmissing::Symbol=:none) = - pairwise(cor, x, x, symmetric=symmetric, skipmissing=skipmissing) +pairwise(::typeof(cor), x; symmetric::Bool = true, skipmissing::Symbol = :none) = + pairwise(cor, x, x, symmetric = symmetric, skipmissing = skipmissing) diff --git a/src/partialcor.jl b/src/partialcor.jl index 33a3ee593..17c2a652e 100644 --- a/src/partialcor.jl +++ b/src/partialcor.jl @@ -16,12 +16,12 @@ end function _partialcor(x::AbstractVector, μx, y::AbstractVector, μy, Z::AbstractMatrix) p = size(Z, 2) p == 1 && return _partialcor(x, μx, y, μy, vec(Z)) - z₀ = view(Z, :, 1) + z₀ = view(Z, :, 1) Zmz₀ = view(Z, :, 2:p) μz₀ = mean(z₀) - rxz = _partialcor(x, μx, z₀, μz₀, Zmz₀) - rzy = _partialcor(z₀, μz₀, y, μy, Zmz₀) - rxy = _partialcor(x, μx, y, μy, Zmz₀)::typeof(rxz) + rxz = _partialcor(x, μx, z₀, μz₀, Zmz₀) + rzy = _partialcor(z₀, μz₀, y, μy, Zmz₀) + rxy = _partialcor(x, μx, y, μy, Zmz₀)::typeof(rxz) return (rxy - rxz * rzy) / (sqrt(1 - rxz^2) * sqrt(1 - rzy^2)) end diff --git a/src/rankcorr.jl b/src/rankcorr.jl index 1713300ff..4f0887ff5 100644 --- a/src/rankcorr.jl +++ b/src/rankcorr.jl @@ -31,13 +31,13 @@ function corspearman(X::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}) C = Matrix{Float64}(I, n, 1) any(isnan, y) && return fill!(C, NaN) yrank = tiedrank(y) - for j = 1:n + for j in 1:n Xj = view(X, :, j) if any(isnan, Xj) - C[j,1] = NaN + C[j, 1] = NaN else Xjrank = tiedrank(Xj) - C[j,1] = cor(Xjrank, yrank) + C[j, 1] = cor(Xjrank, yrank) end end return C @@ -50,13 +50,13 @@ function corspearman(x::AbstractVector{<:Real}, Y::AbstractMatrix{<:Real}) C = Matrix{Float64}(I, 1, n) any(isnan, x) && return fill!(C, NaN) xrank = tiedrank(x) - for j = 1:n + for j in 1:n Yj = view(Y, :, j) if any(isnan, Yj) - C[1,j] = NaN + C[1, j] = NaN else Yjrank = tiedrank(Yj) - C[1,j] = cor(xrank, Yjrank) + C[1, j] = cor(xrank, Yjrank) end end return C @@ -66,23 +66,23 @@ function corspearman(X::AbstractMatrix{<:Real}) n = size(X, 2) C = Matrix{Float64}(I, n, n) anynan = Vector{Bool}(undef, n) - for j = 1:n + for j in 1:n Xj = view(X, :, j) anynan[j] = any(isnan, Xj) if anynan[j] - C[:,j] .= NaN - C[j,:] .= NaN - C[j,j] = 1 + C[:, j] .= NaN + C[j, :] .= NaN + C[j, j] = 1 continue end Xjrank = tiedrank(Xj) - for i = 1:(j-1) + for i in 1:(j - 1) Xi = view(X, :, i) if anynan[i] - C[i,j] = C[j,i] = NaN + C[i, j] = C[j, i] = NaN else Xirank = tiedrank(Xi) - C[i,j] = C[j,i] = cor(Xjrank, Xirank) + C[i, j] = C[j, i] = cor(Xjrank, Xirank) end end end @@ -95,20 +95,20 @@ function corspearman(X::AbstractMatrix{<:Real}, Y::AbstractMatrix{<:Real}) nr = size(X, 2) nc = size(Y, 2) C = Matrix{Float64}(undef, nr, nc) - for j = 1:nr + for j in 1:nr Xj = view(X, :, j) if any(isnan, Xj) - C[j,:] .= NaN + C[j, :] .= NaN continue end Xjrank = tiedrank(Xj) - for i = 1:nc + for i in 1:nc Yi = view(Y, :, i) if any(isnan, Yi) - C[j,i] = NaN + C[j, i] = NaN else Yirank = tiedrank(Yi) - C[j,i] = cor(Xjrank, Yirank) + C[j, i] = cor(Xjrank, Yirank) end end end @@ -125,10 +125,14 @@ end # Knight, William R. “A Computer Method for Calculating Kendall's Tau with Ungrouped Data.” # Journal of the American Statistical Association, vol. 61, no. 314, 1966, pp. 436–439. # JSTOR, www.jstor.org/stable/2282833. -function corkendall!(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, permx::AbstractArray{<:Integer}=sortperm(x)) - if any(isnan, x) || any(isnan, y) return NaN end +function corkendall!(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, permx::AbstractArray{<:Integer} = sortperm(x)) + if any(isnan, x) || any(isnan, y) + return NaN + end n = length(x) - if n != length(y) error("Vectors must have same length") end + if n != length(y) + error("Vectors must have same length") + end # Initial sorting permute!(x, permx) @@ -139,7 +143,7 @@ function corkendall!(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, permx ntiesx = ndoubleties = nswaps = widen(0) k = 0 - for i = 2:n + for i in 2:n if x[i - 1] == x[i] k += 1 elseif k > 0 @@ -148,7 +152,7 @@ function corkendall!(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, permx # double ties can be counted by calling countties. sort!(view(y, (i - k - 1):(i - 1))) ntiesx += div(widen(k) * (k + 1), 2) # Must use wide integers here - ndoubleties += countties(y, i - k - 1, i - 1) + ndoubleties += countties(y, i - k - 1, i - 1) k = 0 end end @@ -163,8 +167,8 @@ function corkendall!(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, permx # Calls to float below prevent possible overflow errors when # length(x) exceeds 77_936 (32 bit) or 5_107_605_667 (64 bit) - (npairs + ndoubleties - ntiesx - ntiesy - 2 * nswaps) / - sqrt(float(npairs - ntiesx) * float(npairs - ntiesy)) + return (npairs + ndoubleties - ntiesx - ntiesy - 2 * nswaps) / + sqrt(float(npairs - ntiesx) * float(npairs - ntiesy)) end """ @@ -177,23 +181,23 @@ corkendall(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}) = corkendall!(c function corkendall(X::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}) permy = sortperm(y) - return([corkendall!(copy(y), X[:,i], permy) for i in 1:size(X, 2)]) + return ([corkendall!(copy(y), X[:, i], permy) for i in 1:size(X, 2)]) end function corkendall(x::AbstractVector{<:Real}, Y::AbstractMatrix{<:Real}) n = size(Y, 2) permx = sortperm(x) - return(reshape([corkendall!(copy(x), Y[:,i], permx) for i in 1:n], 1, n)) + return (reshape([corkendall!(copy(x), Y[:, i], permx) for i in 1:n], 1, n)) end function corkendall(X::AbstractMatrix{<:Real}) n = size(X, 2) C = Matrix{Float64}(I, n, n) - for j = 2:n - permx = sortperm(X[:,j]) - for i = 1:j - 1 - C[j,i] = corkendall!(X[:,j], X[:,i], permx) - C[i,j] = C[j,i] + for j in 2:n + permx = sortperm(X[:, j]) + for i in 1:(j - 1) + C[j, i] = corkendall!(X[:, j], X[:, i], permx) + C[i, j] = C[j, i] end end return C @@ -203,10 +207,10 @@ function corkendall(X::AbstractMatrix{<:Real}, Y::AbstractMatrix{<:Real}) nr = size(X, 2) nc = size(Y, 2) C = Matrix{Float64}(undef, nr, nc) - for j = 1:nr - permx = sortperm(X[:,j]) - for i = 1:nc - C[j,i] = corkendall!(X[:,j], Y[:,i], permx) + for j in 1:nr + permx = sortperm(X[:, j]) + for i in 1:nc + C[j, i] = corkendall!(X[:, j], Y[:, i], permx) end end return C @@ -224,7 +228,7 @@ function countties(x::AbstractVector, lo::Integer, hi::Integer) # length(x) exceeds 2^16 (32 bit) or 2^32 (64 bit) thistiecount = result = widen(0) checkbounds(x, lo:hi) - for i = (lo + 1):hi + for i in (lo + 1):hi if x[i] == x[i - 1] thistiecount += 1 elseif thistiecount > 0 @@ -236,7 +240,7 @@ function countties(x::AbstractVector, lo::Integer, hi::Integer) if thistiecount > 0 result += div(thistiecount * (thistiecount + 1), 2) end - result + return result end # Tests appear to show that a value of 64 is optimal, @@ -251,7 +255,7 @@ const SMALL_THRESHOLD = 64 Mutates `v` by sorting elements `x[lo:hi]` using the merge sort algorithm. This method is a copy-paste-edit of sort! in base/sort.jl, amended to return the bubblesort distance. """ -function merge_sort!(v::AbstractVector, lo::Integer, hi::Integer, t::AbstractVector=similar(v, 0)) +function merge_sort!(v::AbstractVector, lo::Integer, hi::Integer, t::AbstractVector = similar(v, 0)) # Use of widen below prevents possible overflow errors when # length(v) exceeds 2^16 (32 bit) or 2^32 (64 bit) nswaps = widen(0) @@ -261,7 +265,7 @@ function merge_sort!(v::AbstractVector, lo::Integer, hi::Integer, t::AbstractVec m = midpoint(lo, hi) (length(t) < m - lo + 1) && resize!(t, m - lo + 1) - nswaps = merge_sort!(v, lo, m, t) + nswaps = merge_sort!(v, lo, m, t) nswaps += merge_sort!(v, m + 1, hi, t) i, j = 1, lo @@ -294,7 +298,7 @@ end # insertion_sort! and midpoint copied from Julia Base # (commit 28330a2fef4d9d149ba0fd3ffa06347b50067647, dated 20 Sep 2020) -midpoint(lo::T, hi::T) where T <: Integer = lo + ((hi - lo) >>> 0x01) +midpoint(lo::T, hi::T) where {T <: Integer} = lo + ((hi - lo) >>> 0x01) midpoint(lo::Integer, hi::Integer) = midpoint(promote(lo, hi)...) """ @@ -304,9 +308,11 @@ Mutates `v` by sorting elements `x[lo:hi]` using the insertion sort algorithm. This method is a copy-paste-edit of sort! in base/sort.jl, amended to return the bubblesort distance. """ function insertion_sort!(v::AbstractVector, lo::Integer, hi::Integer) - if lo == hi return widen(0) end + if lo == hi + return widen(0) + end nswaps = widen(0) - for i = lo + 1:hi + for i in (lo + 1):hi j = i x = v[i] while j > lo diff --git a/src/ranking.jl b/src/ranking.jl index 317f24186..a63ae12aa 100644 --- a/src/ranking.jl +++ b/src/ranking.jl @@ -11,19 +11,20 @@ function _check_randparams(rks, x, p) nx = length(x) np = length(p) nx == np == n || throw( - DimensionMismatch("lengths of x $nx and p $np do not match that of ranks $n")) + DimensionMismatch("lengths of x $nx and p $np do not match that of ranks $n") + ) return n end # ranking helper function: calls sortperm(x) and then ranking method f! -function _rank(f!, x::AbstractArray, R::Type=Int; sortkwargs...) +function _rank(f!, x::AbstractArray, R::Type = Int; sortkwargs...) rks = similar(x, R) ord = reshape(sortperm(vec(x); sortkwargs...), size(x)) return f!(rks, x, ord) end # ranking helper function for arrays with missing values -function _rank(f!, x::AbstractArray{>: Missing}, R::Type=Int; sortkwargs...) +function _rank(f!, x::AbstractArray{>:Missing}, R::Type = Int; sortkwargs...) inds = findall(!ismissing, vec(x)) isempty(inds) && return missings(R, size(x)) xv = disallowmissing(view(vec(x), inds)) @@ -143,7 +144,7 @@ function _tiedrank!(rks::AbstractArray, x::AbstractArray, p::AbstractArray{<:Int if cx != v # fill average rank to s : e-1 ar = (s + e - 1) / 2 - for i = s : e-1 + for i in s:(e - 1) rks[p[i]] = ar end # switch to next range @@ -154,7 +155,7 @@ function _tiedrank!(rks::AbstractArray, x::AbstractArray, p::AbstractArray{<:Int # the last range ar = (s + n) / 2 - for i = s : n + for i in s:n rks[p[i]] = ar end end diff --git a/src/reliability.jl b/src/reliability.jl index c96ec495a..32b14a40e 100644 --- a/src/reliability.jl +++ b/src/reliability.jl @@ -10,6 +10,7 @@ function Base.show(io::IO, x::CronbachAlpha) for (idx, val) in enumerate(x.dropped) @printf(io, "item %i: %.4f\n", idx, val) end + return end """ @@ -52,13 +53,17 @@ item 4: 0.7826 """ function cronbachalpha(covmatrix::AbstractMatrix{<:Real}) if !isposdef(covmatrix) - throw(ArgumentError("Covariance matrix must be positive definite. " * - "Maybe you passed the data matrix instead of its covariance matrix? " * - "If so, call `cronbachalpha(cov(...))` instead.")) + throw( + ArgumentError( + "Covariance matrix must be positive definite. " * + "Maybe you passed the data matrix instead of its covariance matrix? " * + "If so, call `cronbachalpha(cov(...))` instead." + ) + ) end k = size(covmatrix, 2) k > 1 || throw(ArgumentError("Covariance matrix must have more than one column.")) - v = vec(sum(covmatrix, dims=1)) + v = vec(sum(covmatrix, dims = 1)) σ = sum(v) for i in axes(v, 1) v[i] -= covmatrix[i, i] @@ -67,8 +72,10 @@ function cronbachalpha(covmatrix::AbstractMatrix{<:Real}) alpha = k * (1 - σ_diag / σ) / (k - 1) if k > 2 - dropped = typeof(alpha)[(k - 1) * (1 - (σ_diag - covmatrix[i, i]) / (σ - 2*v[i] - covmatrix[i, i])) / (k - 2) - for i in 1:k] + dropped = typeof(alpha)[ + (k - 1) * (1 - (σ_diag - covmatrix[i, i]) / (σ - 2 * v[i] - covmatrix[i, i])) / (k - 2) + for i in 1:k + ] else # if k = 2 do not produce dropped; this has to be also # correctly handled in show diff --git a/src/robust.jl b/src/robust.jl index f21595395..425f94e26 100644 --- a/src/robust.jl +++ b/src/robust.jl @@ -8,7 +8,7 @@ # Trimmed set "Return the upper and lower bound elements used by `trim` and `winsor`" -function uplo(x::AbstractVector; prop::Real=0.0, count::Integer=0) +function uplo(x::AbstractVector; prop::Real = 0.0, count::Integer = 0) n = length(x) n > 0 || throw(ArgumentError("x can not be empty.")) @@ -17,15 +17,15 @@ function uplo(x::AbstractVector; prop::Real=0.0, count::Integer=0) count = floor(Int, n * prop) else prop == 0 || throw(ArgumentError("prop and count can not both be > 0.")) - 0 <= count < n/2 || throw(ArgumentError("count must satisfy 0 ≤ count < length(x)/2.")) + 0 <= count < n / 2 || throw(ArgumentError("count must satisfy 0 ≤ count < length(x)/2.")) end # indices for lowest count values x2 = Base.copymutable(x) - lo = partialsort!(x2, count+1) - up = partialsort!(x2, n-count) + lo = partialsort!(x2, count + 1) + up = partialsort!(x2, n - count) - up, lo + return up, lo end """ @@ -49,10 +49,10 @@ julia> collect(trim([5,2,4,3,1], prop=0.2)) 3 ``` """ -function trim(x::AbstractVector; prop::Real=0.0, count::Integer=0) - up, lo = uplo(x; prop=prop, count=count) +function trim(x::AbstractVector; prop::Real = 0.0, count::Integer = 0) + up, lo = uplo(x; prop = prop, count = count) - (xi for xi in x if lo <= xi <= up) + return (xi for xi in x if lo <= xi <= up) end """ @@ -60,9 +60,9 @@ end A variant of [`trim`](@ref) that modifies `x` in place. """ -function trim!(x::AbstractVector; prop::Real=0.0, count::Integer=0) - up, lo = uplo(x; prop=prop, count=count) - ix = (i for (i,xi) in enumerate(x) if lo > xi || xi > up) +function trim!(x::AbstractVector; prop::Real = 0.0, count::Integer = 0) + up, lo = uplo(x; prop = prop, count = count) + ix = (i for (i, xi) in enumerate(x) if lo > xi || xi > up) deleteat!(x, ix) return x end @@ -90,10 +90,10 @@ julia> collect(winsor([5,2,3,4,1], prop=0.2)) 2 ``` """ -function winsor(x::AbstractVector; prop::Real=0.0, count::Integer=0) - up, lo = uplo(x; prop=prop, count=count) +function winsor(x::AbstractVector; prop::Real = 0.0, count::Integer = 0) + up, lo = uplo(x; prop = prop, count = count) - (clamp(xi, lo, up) for xi in x) + return (clamp(xi, lo, up) for xi in x) end """ @@ -101,8 +101,8 @@ end A variant of [`winsor`](@ref) that modifies vector `x` in place. """ -function winsor!(x::AbstractVector; prop::Real=0.0, count::Integer=0) - copyto!(x, winsor(x; prop=prop, count=count)) +function winsor!(x::AbstractVector; prop::Real = 0.0, count::Integer = 0) + copyto!(x, winsor(x; prop = prop, count = count)) return x end @@ -120,7 +120,7 @@ end Compute the variance of the trimmed mean of `x`. This function uses the Winsorized variance, as described in Wilcox (2010). """ -function trimvar(x::AbstractVector; prop::Real=0.0, count::Integer=0) +function trimvar(x::AbstractVector; prop::Real = 0.0, count::Integer = 0) n = length(x) n > 0 || throw(ArgumentError("x can not be empty.")) @@ -128,9 +128,9 @@ function trimvar(x::AbstractVector; prop::Real=0.0, count::Integer=0) 0 <= prop < 0.5 || throw(ArgumentError("prop must satisfy 0 ≤ prop < 0.5.")) count = floor(Int, n * prop) else - 0 <= count < n/2 || throw(ArgumentError("count must satisfy 0 ≤ count < length(x)/2.")) - prop = count/n + 0 <= count < n / 2 || throw(ArgumentError("count must satisfy 0 ≤ count < length(x)/2.")) + prop = count / n end - return var(winsor(x, count=count)) / (n * (1 - 2prop)^2) + return var(winsor(x, count = count)) / (n * (1 - 2prop)^2) end diff --git a/src/sampling.jl b/src/sampling.jl index c6294c979..8bf05ad48 100644 --- a/src/sampling.jl +++ b/src/sampling.jl @@ -1,4 +1,3 @@ - ########################################################### # # (non-weighted) sampling @@ -17,11 +16,11 @@ function direct_sample!(rng::AbstractRNG, a::UnitRange, x::AbstractArray) s = Sampler(rng, 1:length(a)) b = a[1] - 1 if b == 0 - for i = 1:length(x) + for i in 1:length(x) x[i] = rand(rng, s) end else - for i = 1:length(x) + for i in 1:length(x) x[i] = b + rand(rng, s) end end @@ -43,7 +42,7 @@ function direct_sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray) Base.mightalias(a, x) && throw(ArgumentError("output array x must not share memory with input array a")) s = Sampler(rng, 1:length(a)) - for i = 1:length(x) + for i in 1:length(x) x[i] = a[rand(rng, s)] end return x @@ -53,12 +52,12 @@ direct_sample!(a::AbstractArray, x::AbstractArray) = direct_sample!(default_rng( # check whether we can use T to store indices 1:n exactly, and # use some heuristics to decide whether it is beneficial for k samples # (true for a subset of hardware-supported numeric types) -_storeindices(n, k, ::Type{T}) where {T<:Integer} = n ≤ typemax(T) -_storeindices(n, k, ::Type{T}) where {T<:Union{Float32,Float64}} = k < 22 && n ≤ maxintfloat(T) +_storeindices(n, k, ::Type{T}) where {T <: Integer} = n ≤ typemax(T) +_storeindices(n, k, ::Type{T}) where {T <: Union{Float32, Float64}} = k < 22 && n ≤ maxintfloat(T) _storeindices(n, k, ::Type{Complex{T}}) where {T} = _storeindices(n, k, T) _storeindices(n, k, ::Type{Rational{T}}) where {T} = k < 16 && _storeindices(n, k, T) _storeindices(n, k, T) = false -storeindices(n, k, ::Type{T}) where {T<:Base.HWNumber} = _storeindices(n, k, T) +storeindices(n, k, ::Type{T}) where {T <: Base.HWNumber} = _storeindices(n, k, T) storeindices(n, k, T) = false # order results of a sampler that does not order automatically @@ -72,14 +71,14 @@ function sample_ordered!(sampler!, rng::AbstractRNG, a::AbstractArray, x::Abstra # in some cases it might be faster to check # issorted(a) to see if we can just sort x if storeindices(n, k, eltype(x)) - sort!(sampler!(rng, Base.OneTo(n), x), by=real, lt=<) - for i = 1:k + sort!(sampler!(rng, Base.OneTo(n), x), by = real, lt = <) + for i in 1:k x[i] = a[Int(x[i])] end else indices = Array{Int}(undef, k) sort!(sampler!(rng, Base.OneTo(n), indices)) - for i = 1:k + for i in 1:k x[i] = a[indices[i]] end end @@ -88,14 +87,16 @@ end # special case of a range can be done more efficiently sample_ordered!(sampler!, rng::AbstractRNG, a::AbstractRange, x::AbstractArray) = - sort!(sampler!(rng, a, x), rev=step(a)<0) + sort!(sampler!(rng, a, x), rev = step(a) < 0) # weighted case: -sample_ordered!(sampler!, rng::AbstractRNG, a::AbstractArray, - wv::AbstractWeights, x::AbstractArray) = +sample_ordered!( + sampler!, rng::AbstractRNG, a::AbstractArray, + wv::AbstractWeights, x::AbstractArray +) = sample_ordered!(rng, a, x) do rng, a, x - sampler!(rng, a, wv, x) - end + sampler!(rng, a, wv, x) +end ### draw a pair of distinct integers in [1:n] @@ -140,8 +141,10 @@ Reference: D. Knuth. *The Art of Computer Programming*. Vol 2, 3.4.2, p.142. This algorithm consumes `length(a)` random numbers. It requires no additional memory space. Suitable for the case where memory is tight. """ -function knuths_sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray; - initshuffle::Bool=true) +function knuths_sample!( + rng::AbstractRNG, a::AbstractArray, x::AbstractArray; + initshuffle::Bool = true + ) 1 == firstindex(a) == firstindex(x) || throw(ArgumentError("non 1-based arrays are not supported")) Base.mightalias(a, x) && @@ -151,11 +154,11 @@ function knuths_sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray; k <= n || error("length(x) should not exceed length(a)") # initialize - for i = 1:k + for i in 1:k x[i] = a[i] end if initshuffle - for j = 1:k + for j in 1:k l = rand(rng, j:k) if l != j t = x[j] @@ -167,15 +170,15 @@ function knuths_sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray; # scan remaining s = Sampler(rng, 1:k) - for i = k+1:n + for i in (k + 1):n if rand(rng) * i < k # keep it with probability k / i x[rand(rng, s)] = a[i] end end return x end -knuths_sample!(a::AbstractArray, x::AbstractArray; initshuffle::Bool=true) = - knuths_sample!(default_rng(), a, x; initshuffle=initshuffle) +knuths_sample!(a::AbstractArray, x::AbstractArray; initshuffle::Bool = true) = + knuths_sample!(default_rng(), a, x; initshuffle = initshuffle) """ fisher_yates_sample!([rng], a::AbstractArray, x::AbstractArray) @@ -211,11 +214,11 @@ function fisher_yates_sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArr k <= n || error("length(x) should not exceed length(a)") inds = Vector{Int}(undef, n) - for i = 1:n + for i in 1:n inds[i] = i end - for i = 1:k + for i in 1:k j = rand(rng, i:n) t = inds[j] inds[j] = inds[i] @@ -260,7 +263,7 @@ function self_avoid_sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray push!(s, idx) # remaining - for i = 2:k + for i in 2:k idx = rand(rng, rgen) while idx in s idx = rand(rng, rgen) @@ -302,14 +305,14 @@ function seqsample_a!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray) n -= 1 q *= (n - k) / n end - x[j+=1] = a[i+=1] + x[j += 1] = a[i += 1] n -= 1 k -= 1 end if k > 0 # checking k > 0 is necessary: x can be empty s = trunc(Int, n * rand(rng)) - x[j+1] = a[i+(s+1)] + x[j + 1] = a[i + (s + 1)] end return x end @@ -348,14 +351,14 @@ function seqsample_c!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray) u -= 1 end s = trunc(Int, minv) + 1 - x[j+=1] = a[i+=s] + x[j += 1] = a[i += s] n -= s k -= 1 end if k > 0 s = trunc(Int, n * rand(rng)) - x[j+1] = a[i+(s+1)] + x[j + 1] = a[i + (s + 1)] end return x end @@ -383,7 +386,7 @@ function seqsample_d!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray) i = 0 j = 0 - vprime = exp(-randexp(rng)/n) + vprime = exp(-randexp(rng) / n) q1 = N - n + 1 q2 = q1 / N alpha = 1 / 13 # choose alpha value @@ -391,14 +394,14 @@ function seqsample_d!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray) while n > 1 && threshold < N while true - local X + local X while true X = N * (1 - vprime) s = trunc(Int, X) if s < q1 break end - vprime = exp(-randexp(rng)/n) + vprime = exp(-randexp(rng) / n) end y = rand(rng) / q2 @@ -426,15 +429,15 @@ function seqsample_d!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray) top -= 1 end - if log(y) < (n - 1)*(log(N) - log(N - X)) - vprime = exp(-randexp(rng) / (n-1)) + if log(y) < (n - 1) * (log(N) - log(N - X)) + vprime = exp(-randexp(rng) / (n - 1)) break end - vprime = exp(-randexp(rng)/n) + vprime = exp(-randexp(rng) / n) end j += 1 - i += s+1 + i += s + 1 x[j] = a[i] N = N - s - 1 n -= 1 @@ -443,11 +446,11 @@ function seqsample_d!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray) threshold -= alpha end - if n > 1 - seqsample_a!(rng, a[i+1:end], @view x[j+1:end]) + return if n > 1 + seqsample_a!(rng, a[(i + 1):end], @view x[(j + 1):end]) else s = trunc(Int, N * vprime) - x[j+=1] = a[i+=s+1] + x[j += 1] = a[i += s + 1] end end @@ -485,8 +488,10 @@ Optionally specify a random number generator `rng` as the first argument Output array `a` must not be the same object as `x` or `wv` nor share memory with them, or the result may be incorrect. """ -function sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray; - replace::Bool=true, ordered::Bool=false) +function sample!( + rng::AbstractRNG, a::AbstractArray, x::AbstractArray; + replace::Bool = true, ordered::Bool = false + ) 1 == firstindex(a) == firstindex(x) || throw(ArgumentError("non 1-based arrays are not supported")) n = length(a) @@ -523,8 +528,8 @@ function sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray; end return x end -sample!(a::AbstractArray, x::AbstractArray; replace::Bool=true, ordered::Bool=false) = - sample!(default_rng(), a, x; replace=replace, ordered=ordered) +sample!(a::AbstractArray, x::AbstractArray; replace::Bool = true, ordered::Bool = false) = + sample!(default_rng(), a, x; replace = replace, ordered = ordered) """ @@ -540,12 +545,14 @@ items appear in the same order as in `a`) should be taken. Optionally specify a random number generator `rng` as the first argument (defaults to `Random.default_rng()`). """ -function sample(rng::AbstractRNG, a::AbstractArray{T}, n::Integer; - replace::Bool=true, ordered::Bool=false) where T - sample!(rng, a, Vector{T}(undef, n); replace=replace, ordered=ordered) +function sample( + rng::AbstractRNG, a::AbstractArray{T}, n::Integer; + replace::Bool = true, ordered::Bool = false + ) where {T} + return sample!(rng, a, Vector{T}(undef, n); replace = replace, ordered = ordered) end -sample(a::AbstractArray, n::Integer; replace::Bool=true, ordered::Bool=false) = - sample(default_rng(), a, n; replace=replace, ordered=ordered) +sample(a::AbstractArray, n::Integer; replace::Bool = true, ordered::Bool = false) = + sample(default_rng(), a, n; replace = replace, ordered = ordered) """ @@ -561,12 +568,14 @@ items appear in the same order as in `a`) should be taken. Optionally specify a random number generator `rng` as the first argument (defaults to `Random.default_rng()`). """ -function sample(rng::AbstractRNG, a::AbstractArray{T}, dims::Dims; - replace::Bool=true, ordered::Bool=false) where T - sample!(rng, a, Array{T}(undef, dims); replace=replace, ordered=ordered) +function sample( + rng::AbstractRNG, a::AbstractArray{T}, dims::Dims; + replace::Bool = true, ordered::Bool = false + ) where {T} + return sample!(rng, a, Array{T}(undef, dims); replace = replace, ordered = ordered) end -sample(a::AbstractArray, dims::Dims; replace::Bool=true, ordered::Bool=false) = - sample(default_rng(), a, dims; replace=replace, ordered=ordered) +sample(a::AbstractArray, dims::Dims; replace::Bool = true, ordered::Bool = false) = + sample(default_rng(), a, dims; replace = replace, ordered = ordered) ################################################################ # @@ -618,8 +627,10 @@ Noting `k=length(x)` and `n=length(a)`, this algorithm: * has time complexity ``O(n k)``, as scanning the weight vector each time takes ``O(n)`` * requires no additional memory space. """ -function direct_sample!(rng::AbstractRNG, a::AbstractArray, - wv::AbstractWeights, x::AbstractArray) +function direct_sample!( + rng::AbstractRNG, a::AbstractArray, + wv::AbstractWeights, x::AbstractArray + ) Base.mightalias(a, x) && throw(ArgumentError("output array x must not share memory with input array a")) Base.mightalias(x, wv) && @@ -628,7 +639,7 @@ function direct_sample!(rng::AbstractRNG, a::AbstractArray, throw(ArgumentError("non 1-based arrays are not supported")) n = length(a) length(wv) == n || throw(DimensionMismatch("Inconsistent lengths.")) - for i = 1:length(x) + for i in 1:length(x) x[i] = a[sample(rng, wv)] end return x @@ -638,16 +649,20 @@ direct_sample!(a::AbstractArray, wv::AbstractWeights, x::AbstractArray) = # Specialization for `UnitWeights` function direct_sample!( - rng::AbstractRNG, a::AbstractArray, wv::UnitWeights, x::AbstractArray, -) + rng::AbstractRNG, a::AbstractArray, wv::UnitWeights, x::AbstractArray, + ) if length(a) != length(wv) - throw(DimensionMismatch(LazyString( - "Number of samples (", - length(a), - ") and sample weights (", - length(wv), - ") must be equal.", - ))) + throw( + DimensionMismatch( + LazyString( + "Number of samples (", + length(a), + ") and sample weights (", + length(wv), + ") must be equal.", + ) + ) + ) end return direct_sample!(rng, a, x) end @@ -697,8 +712,10 @@ when the corresponding sample is picked. Noting `k=length(x)` and `n=length(a)`, this algorithm consumes ``O(k)`` random numbers, and has overall time complexity ``O(n k)``. """ -function naive_wsample_norep!(rng::AbstractRNG, a::AbstractArray, - wv::AbstractWeights, x::AbstractArray) +function naive_wsample_norep!( + rng::AbstractRNG, a::AbstractArray, + wv::AbstractWeights, x::AbstractArray + ) Base.mightalias(a, x) && throw(ArgumentError("output array x must not share memory with input array a")) Base.mightalias(x, wv) && @@ -714,12 +731,12 @@ function naive_wsample_norep!(rng::AbstractRNG, a::AbstractArray, w = Vector{Float64}(undef, n) copyto!(w, wv) - for i = 1:k + for i in 1:k u = rand(rng) * wsum j = 1 c = w[1] while c < u && j < n - c += w[j+=1] + c += w[j += 1] end x[i] = a[j] @@ -744,8 +761,10 @@ Reference: Efraimidis, P. S., Spirakis, P. G. "Weighted random sampling with a r Noting `k=length(x)` and `n=length(a)`, this algorithm takes ``O(n + k \\log k)`` processing time to draw ``k`` elements. It consumes ``n`` random numbers. """ -function efraimidis_a_wsample_norep!(rng::AbstractRNG, a::AbstractArray, - wv::AbstractWeights, x::AbstractArray) +function efraimidis_a_wsample_norep!( + rng::AbstractRNG, a::AbstractArray, + wv::AbstractWeights, x::AbstractArray + ) Base.mightalias(a, x) && throw(ArgumentError("output array x must not share memory with input array a")) Base.mightalias(x, wv) && @@ -760,7 +779,7 @@ function efraimidis_a_wsample_norep!(rng::AbstractRNG, a::AbstractArray, # calculate keys for all items keys = randexp(rng, n) for i in 1:n - keys[i] = wv[i]/keys[i] + keys[i] = wv[i] / keys[i] end # return items with largest keys @@ -786,8 +805,10 @@ Reference: Efraimidis, P. S., Spirakis, P. G. "Weighted random sampling with a r Noting `k=length(x)` and `n=length(a)`, this algorithm takes ``O(k \\log(k) \\log(n / k))`` processing time to draw ``k`` elements. It consumes ``n`` random numbers. """ -function efraimidis_ares_wsample_norep!(rng::AbstractRNG, a::AbstractArray, - wv::AbstractWeights, x::AbstractArray) +function efraimidis_ares_wsample_norep!( + rng::AbstractRNG, a::AbstractArray, + wv::AbstractWeights, x::AbstractArray + ) Base.mightalias(a, x) && throw(ArgumentError("output array x must not share memory with input array a")) Base.mightalias(x, wv) && @@ -801,7 +822,7 @@ function efraimidis_ares_wsample_norep!(rng::AbstractRNG, a::AbstractArray, k > 0 || return x # initialize priority queue - pq = Vector{Pair{Float64,Int}}(undef, k) + pq = Vector{Pair{Float64, Int}}(undef, k) i = 0 s = 0 for _s in 1:n @@ -810,7 +831,7 @@ function efraimidis_ares_wsample_norep!(rng::AbstractRNG, a::AbstractArray, w < 0 && error("Negative weight found in weight vector at index $s") if w > 0 i += 1 - pq[i] = (w/randexp(rng) => s) + pq[i] = (w / randexp(rng) => s) end i >= k && break end @@ -820,11 +841,11 @@ function efraimidis_ares_wsample_norep!(rng::AbstractRNG, a::AbstractArray, # set threshold threshold = pq[1].first - for i in s+1:n + for i in (s + 1):n w = wv[i] w < 0 && error("Negative weight found in weight vector at index $i") w > 0 || continue - key = w/randexp(rng) + key = w / randexp(rng) # if key is larger than the threshold if key > threshold @@ -859,9 +880,11 @@ Reference: Efraimidis, P. S., Spirakis, P. G. "Weighted random sampling with a r Noting `k=length(x)` and `n=length(a)`, this algorithm takes ``O(k \\log(k) \\log(n / k))`` processing time to draw ``k`` elements. It consumes ``O(k \\log(n / k))`` random numbers. """ -function efraimidis_aexpj_wsample_norep!(rng::AbstractRNG, a::AbstractArray, - wv::AbstractWeights, x::AbstractArray; - ordered::Bool=false) +function efraimidis_aexpj_wsample_norep!( + rng::AbstractRNG, a::AbstractArray, + wv::AbstractWeights, x::AbstractArray; + ordered::Bool = false + ) Base.mightalias(a, x) && throw(ArgumentError("output array x must not share memory with input array a")) Base.mightalias(x, wv) && @@ -875,7 +898,7 @@ function efraimidis_aexpj_wsample_norep!(rng::AbstractRNG, a::AbstractArray, k > 0 || return x # initialize priority queue - pq = Vector{Pair{Float64,Int}}(undef, k) + pq = Vector{Pair{Float64, Int}}(undef, k) i = 0 s = 0 for _s in 1:n @@ -884,7 +907,7 @@ function efraimidis_aexpj_wsample_norep!(rng::AbstractRNG, a::AbstractArray, w < 0 && error("Negative weight found in weight vector at index $s") if w > 0 i += 1 - pq[i] = (w/randexp(rng) => s) + pq[i] = (w / randexp(rng) => s) end i >= k && break end @@ -893,9 +916,9 @@ function efraimidis_aexpj_wsample_norep!(rng::AbstractRNG, a::AbstractArray, # set threshold threshold = pq[1].first - X = threshold*randexp(rng) + X = threshold * randexp(rng) - for i in s+1:n + for i in (s + 1):n w = wv[i] w < 0 && error("Negative weight found in weight vector at index $i") w > 0 || continue @@ -903,8 +926,8 @@ function efraimidis_aexpj_wsample_norep!(rng::AbstractRNG, a::AbstractArray, X <= 0 || continue # update priority queue - t = exp(-w/threshold) - pq[1] = (-w/log(t+rand(rng)*(1-t)) => i) + t = exp(-w / threshold) + pq[1] = (-w / log(t + rand(rng) * (1 - t)) => i) percolate_down!(pq, 1) # update threshold @@ -913,7 +936,7 @@ function efraimidis_aexpj_wsample_norep!(rng::AbstractRNG, a::AbstractArray, end if ordered # fill output array with items sorted as in a - sort!(pq, by=last) + sort!(pq, by = last) for i in 1:k x[i] = a[pq[i].second] end @@ -925,12 +948,16 @@ function efraimidis_aexpj_wsample_norep!(rng::AbstractRNG, a::AbstractArray, end return x end -efraimidis_aexpj_wsample_norep!(a::AbstractArray, wv::AbstractWeights, x::AbstractArray; - ordered::Bool=false) = - efraimidis_aexpj_wsample_norep!(default_rng(), a, wv, x; ordered=ordered) - -function sample!(rng::AbstractRNG, a::AbstractArray, wv::AbstractWeights, x::AbstractArray; - replace::Bool=true, ordered::Bool=false) +efraimidis_aexpj_wsample_norep!( + a::AbstractArray, wv::AbstractWeights, x::AbstractArray; + ordered::Bool = false +) = + efraimidis_aexpj_wsample_norep!(default_rng(), a, wv, x; ordered = ordered) + +function sample!( + rng::AbstractRNG, a::AbstractArray, wv::AbstractWeights, x::AbstractArray; + replace::Bool = true, ordered::Bool = false + ) 1 == firstindex(a) == firstindex(wv) == firstindex(x) || throw(ArgumentError("non 1-based arrays are not supported")) n = length(a) @@ -939,7 +966,7 @@ function sample!(rng::AbstractRNG, a::AbstractArray, wv::AbstractWeights, x::Abs if replace if ordered sample_ordered!(rng, a, wv, x) do rng, a, wv, x - sample!(rng, a, wv, x; replace=true, ordered=false) + sample!(rng, a, wv, x; replace = true, ordered = false) end else if n < 40 @@ -955,38 +982,52 @@ function sample!(rng::AbstractRNG, a::AbstractArray, wv::AbstractWeights, x::Abs end else k <= n || error("Cannot draw $k samples from $n samples without replacement.") - efraimidis_aexpj_wsample_norep!(rng, a, wv, x; ordered=ordered) + efraimidis_aexpj_wsample_norep!(rng, a, wv, x; ordered = ordered) end return x end -sample!(a::AbstractArray, wv::AbstractWeights, x::AbstractArray; - replace::Bool=true, ordered::Bool=false) = - sample!(default_rng(), a, wv, x; replace=replace, ordered=ordered) - -sample(rng::AbstractRNG, a::AbstractArray{T}, wv::AbstractWeights, n::Integer; - replace::Bool=true, ordered::Bool=false) where {T} = - sample!(rng, a, wv, Vector{T}(undef, n); replace=replace, ordered=ordered) -sample(a::AbstractArray, wv::AbstractWeights, n::Integer; - replace::Bool=true, ordered::Bool=false) = - sample(default_rng(), a, wv, n; replace=replace, ordered=ordered) - -sample(rng::AbstractRNG, a::AbstractArray{T}, wv::AbstractWeights, dims::Dims; - replace::Bool=true, ordered::Bool=false) where {T} = - sample!(rng, a, wv, Array{T}(undef, dims); replace=replace, ordered=ordered) -sample(a::AbstractArray, wv::AbstractWeights, dims::Dims; - replace::Bool=true, ordered::Bool=false) = - sample(default_rng(), a, wv, dims; replace=replace, ordered=ordered) +sample!( + a::AbstractArray, wv::AbstractWeights, x::AbstractArray; + replace::Bool = true, ordered::Bool = false +) = + sample!(default_rng(), a, wv, x; replace = replace, ordered = ordered) + +sample( + rng::AbstractRNG, a::AbstractArray{T}, wv::AbstractWeights, n::Integer; + replace::Bool = true, ordered::Bool = false +) where {T} = + sample!(rng, a, wv, Vector{T}(undef, n); replace = replace, ordered = ordered) +sample( + a::AbstractArray, wv::AbstractWeights, n::Integer; + replace::Bool = true, ordered::Bool = false +) = + sample(default_rng(), a, wv, n; replace = replace, ordered = ordered) + +sample( + rng::AbstractRNG, a::AbstractArray{T}, wv::AbstractWeights, dims::Dims; + replace::Bool = true, ordered::Bool = false +) where {T} = + sample!(rng, a, wv, Array{T}(undef, dims); replace = replace, ordered = ordered) +sample( + a::AbstractArray, wv::AbstractWeights, dims::Dims; + replace::Bool = true, ordered::Bool = false +) = + sample(default_rng(), a, wv, dims; replace = replace, ordered = ordered) # Specialization for `UnitWeights` -function sample!(rng::AbstractRNG, a::AbstractArray, wv::UnitWeights, x::AbstractArray; replace::Bool=true, ordered::Bool=false) +function sample!(rng::AbstractRNG, a::AbstractArray, wv::UnitWeights, x::AbstractArray; replace::Bool = true, ordered::Bool = false) if length(a) != length(wv) - throw(DimensionMismatch(LazyString( - "Number of samples (", - length(a), - ") and sample weights (", - length(wv), - ") must be equal.", - ))) + throw( + DimensionMismatch( + LazyString( + "Number of samples (", + length(a), + ") and sample weights (", + length(wv), + ") must be equal.", + ) + ) + ) end return sample!(rng, a, x; replace, ordered) end @@ -1005,12 +1046,16 @@ items appear in the same order as in `a`) should be taken. Optionally specify a random number generator `rng` as the first argument (defaults to `Random.default_rng()`). """ -wsample!(rng::AbstractRNG, a::AbstractArray, w::AbstractVector{<:Real}, x::AbstractArray; - replace::Bool=true, ordered::Bool=false) = - sample!(rng, a, weights(w), x; replace=replace, ordered=ordered) -wsample!(a::AbstractArray, w::AbstractVector{<:Real}, x::AbstractArray; - replace::Bool=true, ordered::Bool=false) = - sample!(default_rng(), a, weights(w), x; replace=replace, ordered=ordered) +wsample!( + rng::AbstractRNG, a::AbstractArray, w::AbstractVector{<:Real}, x::AbstractArray; + replace::Bool = true, ordered::Bool = false +) = + sample!(rng, a, weights(w), x; replace = replace, ordered = ordered) +wsample!( + a::AbstractArray, w::AbstractVector{<:Real}, x::AbstractArray; + replace::Bool = true, ordered::Bool = false +) = + sample!(default_rng(), a, weights(w), x; replace = replace, ordered = ordered) """ wsample([rng], [a], w) @@ -1040,12 +1085,16 @@ items appear in the same order as in `a`) should be taken. Optionally specify a random number generator `rng` as the first argument (defaults to `Random.default_rng()`). """ -wsample(rng::AbstractRNG, a::AbstractArray{T}, w::AbstractVector{<:Real}, n::Integer; - replace::Bool=true, ordered::Bool=false) where {T} = - wsample!(rng, a, w, Vector{T}(undef, n); replace=replace, ordered=ordered) -wsample(a::AbstractArray, w::AbstractVector{<:Real}, n::Integer; - replace::Bool=true, ordered::Bool=false) = - wsample(default_rng(), a, w, n; replace=replace, ordered=ordered) +wsample( + rng::AbstractRNG, a::AbstractArray{T}, w::AbstractVector{<:Real}, n::Integer; + replace::Bool = true, ordered::Bool = false +) where {T} = + wsample!(rng, a, w, Vector{T}(undef, n); replace = replace, ordered = ordered) +wsample( + a::AbstractArray, w::AbstractVector{<:Real}, n::Integer; + replace::Bool = true, ordered::Bool = false +) = + wsample(default_rng(), a, w, n; replace = replace, ordered = ordered) """ wsample([rng], [a], w, dims::Dims; replace=true, ordered=false) @@ -1057,9 +1106,13 @@ weights given in `w` if `a` is present, otherwise select a random sample of size Optionally specify a random number generator `rng` as the first argument (defaults to `Random.default_rng()`). """ -wsample(rng::AbstractRNG, a::AbstractArray{T}, w::AbstractVector{<:Real}, dims::Dims; - replace::Bool=true, ordered::Bool=false) where {T} = - wsample!(rng, a, w, Array{T}(undef, dims); replace=replace, ordered=ordered) -wsample(a::AbstractArray, w::AbstractVector{<:Real}, dims::Dims; - replace::Bool=true, ordered::Bool=false) = - wsample(default_rng(), a, w, dims; replace=replace, ordered=ordered) +wsample( + rng::AbstractRNG, a::AbstractArray{T}, w::AbstractVector{<:Real}, dims::Dims; + replace::Bool = true, ordered::Bool = false +) where {T} = + wsample!(rng, a, w, Array{T}(undef, dims); replace = replace, ordered = ordered) +wsample( + a::AbstractArray, w::AbstractVector{<:Real}, dims::Dims; + replace::Bool = true, ordered::Bool = false +) = + wsample(default_rng(), a, w, dims; replace = replace, ordered = ordered) diff --git a/src/scalarstats.jl b/src/scalarstats.jl index bff0cec24..49b889098 100644 --- a/src/scalarstats.jl +++ b/src/scalarstats.jl @@ -53,7 +53,7 @@ Return the mode (most common number) of an array, optionally over a specified range `r` or weighted via a vector `wv`. If several modes exist, the first one (in order of appearance) is returned. """ -function mode(a::AbstractArray{T}, r::UnitRange{T}) where T<:Integer +function mode(a::AbstractArray{T}, r::UnitRange{T}) where {T <: Integer} isempty(a) && throw(ArgumentError("mode is not defined for empty collections")) len = length(a) r0 = r[1] @@ -61,7 +61,7 @@ function mode(a::AbstractArray{T}, r::UnitRange{T}) where T<:Integer cnts = zeros(Int, length(r)) mc = 0 # maximum count mv = r0 # a value corresponding to maximum count - for i = 1:len + for i in 1:len x = a[i] if r0 <= x <= r1 c = (cnts[x - r0 + 1] += 1) @@ -81,14 +81,14 @@ end Return all modes (most common numbers) of an array, optionally over a specified range `r` or weighted via vector `wv`. """ -function modes(a::AbstractArray{T}, r::UnitRange{T}) where T<:Integer +function modes(a::AbstractArray{T}, r::UnitRange{T}) where {T <: Integer} r0 = r[1] r1 = r[end] n = length(r) cnts = zeros(Int, n) # find the maximum count mc = 0 - for i = 1:length(a) + for i in 1:length(a) x = a[i] if r0 <= x <= r1 c = (cnts[x - r0 + 1] += 1) @@ -99,7 +99,7 @@ function modes(a::AbstractArray{T}, r::UnitRange{T}) where T<:Integer end # find all values corresponding to maximum count ms = T[] - for i = 1:n + for i in 1:n if cnts[i] == mc push!(ms, r[i]) end @@ -110,7 +110,7 @@ end # compute mode over arbitrary iterable function mode(a) isempty(a) && throw(ArgumentError("mode is not defined for empty collections")) - cnts = Dict{eltype(a),Int}() + cnts = Dict{eltype(a), Int}() # first element mc = 1 mv, st = iterate(a) @@ -136,7 +136,7 @@ end function modes(a) isempty(a) && throw(ArgumentError("mode is not defined for empty collections")) - cnts = Dict{eltype(a),Int}() + cnts = Dict{eltype(a), Int}() # first element mc = 1 x, st = iterate(a) @@ -161,7 +161,7 @@ function modes(a) end # Weighted mode of arbitrary vectors of values -function mode(a::AbstractVector, wv::AbstractWeights{T}) where T <: Real +function mode(a::AbstractVector, wv::AbstractWeights{T}) where {T <: Real} isempty(a) && throw(ArgumentError("mode is not defined for empty collections")) isfinite(sum(wv)) || throw(ArgumentError("only finite weights are supported")) length(a) == length(wv) || @@ -183,7 +183,7 @@ function mode(a::AbstractVector, wv::AbstractWeights{T}) where T <: Real return mv end -function modes(a::AbstractVector, wv::AbstractWeights{T}) where T <: Real +function modes(a::AbstractVector, wv::AbstractWeights{T}) where {T <: Real} isempty(a) && throw(ArgumentError("mode is not defined for empty collections")) isfinite(sum(wv)) || throw(ArgumentError("only finite weights are supported")) length(a) == length(wv) || @@ -226,7 +226,7 @@ partition `v` into `n` subsets of nearly equal size. Equivalent to `quantile(x, [0:n]/n)`. For example, `nquantiles(x, 5)` returns a vector of quantiles, respectively at `[0.0, 0.2, 0.4, 0.6, 0.8, 1.0]`. """ -nquantile(x, n::Integer) = quantile(x, (0:n)/n) +nquantile(x, n::Integer) = quantile(x, (0:n) / n) """ quantilerank(itr, value; method=:inc) @@ -305,7 +305,7 @@ julia> quantilerank.(Ref(v3), [4, 8]) 0.8888888888888888 ``` """ -function quantilerank(itr, value; method::Symbol=:inc) +function quantilerank(itr, value; method::Symbol = :inc) ((value isa Number && isnan(value)) || ismissing(value)) && throw(ArgumentError("`value` cannot be NaN or missing")) any(x -> ismissing(x) || (x isa Number && isnan(x)), itr) && @@ -370,7 +370,7 @@ function quantilerank(itr, value; method::Symbol=:inc) return (count_less - 1) / (n - 1) end elseif method == :tied - return (count_less + count_equal/2) / n + return (count_less + count_equal / 2) / n elseif method == :strict return count_less / n elseif method == :weak @@ -387,7 +387,7 @@ Return the `q`th percentile of `value` in collection `itr`, i.e. [`quantilerank( See the [`quantilerank`](@ref) docstring for more details. """ -percentilerank(itr, value; method::Symbol=:inc) = quantilerank(itr, value, method=method) * 100 +percentilerank(itr, value; method::Symbol = :inc) = quantilerank(itr, value, method = method) * 100 ############################# # @@ -415,15 +415,15 @@ standard deviation to the mean. If `corrected` is `false`, then `std` is calculated with denominator `n`. Else, the `std` is calculated with denominator `n-1`. """ -variation(x, m; corrected::Bool=true) = stdm(x, m; corrected=corrected) / m -variation(x; corrected::Bool=true) = ((m, s) = mean_and_std(x; corrected=corrected); s/m) +variation(x, m; corrected::Bool = true) = stdm(x, m; corrected = corrected) / m +variation(x; corrected::Bool = true) = ((m, s) = mean_and_std(x; corrected = corrected); s / m) # Standard error of the mean: std / sqrt(len) # Code taken from var in the Statistics stdlib module # faster computation of real(conj(x)*y) -realXcY(x::Real, y::Real) = x*y -realXcY(x::Complex, y::Complex) = real(x)*real(y) + imag(x)*imag(y) +realXcY(x::Real, y::Real) = x * y +realXcY(x::Complex, y::Complex) = real(x) * real(y) + imag(x) * imag(y) """ sem(x; mean=nothing) @@ -447,7 +447,7 @@ The standard error is then the square root of the above quantities. Carl-Erik Särndal, Bengt Swensson, Jan Wretman (1992). Model Assisted Survey Sampling. New York: Springer. pp. 51-53. """ -function sem(x; mean=nothing) +function sem(x; mean = nothing) if isempty(x) # Return the NaN of the type that we would get for a nonempty x T = eltype(x) @@ -485,7 +485,7 @@ function sem(x; mean=nothing) return sqrt(variance / n) end -function sem(x::AbstractArray; mean=nothing) +function sem(x::AbstractArray; mean = nothing) if isempty(x) # Return the NaN of the type that we would get for a nonempty x T = eltype(x) @@ -493,31 +493,35 @@ function sem(x::AbstractArray; mean=nothing) z = abs2(zero(T) - _mean) return oftype((z + z) / 2, NaN) end - return sqrt(var(x; mean=mean, corrected=true) / length(x)) + return sqrt(var(x; mean = mean, corrected = true) / length(x)) end -function sem(x::AbstractArray, weights::UnitWeights; mean=nothing) +function sem(x::AbstractArray, weights::UnitWeights; mean = nothing) if length(x) ≠ length(weights) throw(DimensionMismatch("array and weights do not have the same length")) end - return sem(x; mean=mean) + return sem(x; mean = mean) end # Weighted methods for the above -sem(x::AbstractArray, weights::FrequencyWeights; mean=nothing) = - sqrt(var(x, weights; mean=mean, corrected=true) / sum(weights)) +sem(x::AbstractArray, weights::FrequencyWeights; mean = nothing) = + sqrt(var(x, weights; mean = mean, corrected = true) / sum(weights)) -function sem(x::AbstractArray, weights::ProbabilityWeights; mean=nothing) +function sem(x::AbstractArray, weights::ProbabilityWeights; mean = nothing) if isempty(x) # Return the NaN of the type that we would get for a nonempty x - return var(x, weights; mean=mean, corrected=true) / 0 + return var(x, weights; mean = mean, corrected = true) / 0 else _mean = mean === nothing ? Statistics.mean(x, weights) : mean # sum of squared errors = sse - sse = sum(Broadcast.instantiate(Broadcast.broadcasted(x, weights) do x_i, w - return abs2(w * (x_i - _mean)) - end)) + sse = sum( + Broadcast.instantiate( + Broadcast.broadcasted(x, weights) do x_i, w + return abs2(w * (x_i - _mean)) + end + ) + ) n = count(!iszero, weights) return sqrt(sse * n / (n - 1)) / sum(weights) end @@ -536,8 +540,8 @@ If `normalize` is set to `true`, the MAD is multiplied by `1 / quantile(Normal(), 3/4) ≈ 1.4826`, in order to obtain a consistent estimator of the standard deviation under the assumption that the data is normally distributed. """ -function mad(x; center=nothing, normalize::Union{Bool, Nothing}=nothing, constant=nothing) - mad!(Base.copymutable(x); center=center, normalize=normalize, constant=constant) +function mad(x; center = nothing, normalize::Union{Bool, Nothing} = nothing, constant = nothing) + return mad!(Base.copymutable(x); center = center, normalize = normalize, constant = constant) end """ @@ -550,10 +554,12 @@ If `normalize` is set to `true`, the MAD is multiplied by `1 / quantile(Normal(), 3/4) ≈ 1.4826`, in order to obtain a consistent estimator of the standard deviation under the assumption that the data is normally distributed. """ -function mad!(x::AbstractArray; - center=median!(x), - normalize::Union{Bool,Nothing}=true, - constant=nothing) +function mad!( + x::AbstractArray; + center = median!(x), + normalize::Union{Bool, Nothing} = true, + constant = nothing + ) isempty(x) && throw(ArgumentError("mad is not defined for empty arrays")) c = center === nothing ? median!(x) : center T = promote_type(typeof(c), eltype(x)) @@ -565,7 +571,7 @@ function mad!(x::AbstractArray; Base.depwarn("the `normalize` keyword argument will be false by default in future releases: set it explicitly to silence this deprecation", :mad) normalize = true end - if !isa(constant, Nothing) + return if !isa(constant, Nothing) Base.depwarn("keyword argument `constant` is deprecated, use `normalize` instead or apply the multiplication directly", :mad) m * constant elseif normalize @@ -582,7 +588,7 @@ end Compute the interquartile range (IQR) of collection `x`, i.e. the 75th percentile minus the 25th percentile. """ -iqr(x) = (q = quantile(x, [.25, .75]); q[2] - q[1]) +iqr(x) = (q = quantile(x, [0.25, 0.75]); q[2] - q[1]) # Generalized variance """ @@ -609,7 +615,7 @@ or other iterable, this is equivalent to the sample variance. Otherwise if `X` is a matrix, this is equivalent to the sum of the diagonal elements of the covariance matrix of `X`. """ -totalvar(X::AbstractMatrix) = sum(var(X, dims=1)) +totalvar(X::AbstractMatrix) = sum(var(X, dims = 1)) totalvar(itr) = var(itr) ############################# @@ -622,34 +628,36 @@ function _zscore!(Z::AbstractArray, X::AbstractArray, μ::Real, σ::Real) # Z and X are assumed to have the same size iσ = inv(σ) if μ == zero(μ) - for i = 1 : length(X) + for i in 1:length(X) Z[i] = X[i] * iσ end else - for i = 1 : length(X) + for i in 1:length(X) Z[i] = (X[i] - μ) * iσ end end return Z end -@generated function _zscore!(Z::AbstractArray{S,N}, X::AbstractArray{T,N}, - μ::AbstractArray, σ::AbstractArray) where {S,T,N} - quote +@generated function _zscore!( + Z::AbstractArray{S, N}, X::AbstractArray{T, N}, + μ::AbstractArray, σ::AbstractArray + ) where {S, T, N} + return quote # Z and X are assumed to have the same size # μ and σ are assumed to have the same size, that is compatible with size(X) siz1 = size(X, 1) - @nextract $N ud d->size(μ, d) + @nextract $N ud d -> size(μ, d) if size(μ, 1) == 1 && siz1 > 1 - @nloops $N i d->(d>1 ? (1:size(X,d)) : (1:1)) d->(j_d = ud_d ==1 ? 1 : i_d) begin + @nloops $N i d -> (d > 1 ? (1:size(X, d)) : (1:1)) d -> (j_d = ud_d == 1 ? 1 : i_d) begin v = (@nref $N μ j) c = inv(@nref $N σ j) - for i_1 = 1:siz1 + for i_1 in 1:siz1 (@nref $N Z i) = ((@nref $N X i) - v) * c end end else - @nloops $N i X d->(j_d = ud_d ==1 ? 1 : i_d) begin + @nloops $N i X d -> (j_d = ud_d == 1 ? 1 : i_d) begin (@nref $N Z i) = ((@nref $N X i) - (@nref $N μ j)) / (@nref $N σ j) end end @@ -659,10 +667,11 @@ end function _zscore_chksize(X::AbstractArray, μ::AbstractArray, σ::AbstractArray) size(μ) == size(σ) || throw(DimensionMismatch("μ and σ should have the same size.")) - for i=1:ndims(X) - dμ_i = size(μ,i) - (dμ_i == 1 || dμ_i == size(X,i)) || throw(DimensionMismatch("X and μ have incompatible sizes.")) + for i in 1:ndims(X) + dμ_i = size(μ, i) + (dμ_i == 1 || dμ_i == size(X, i)) || throw(DimensionMismatch("X and μ have incompatible sizes.")) end + return end @@ -676,16 +685,18 @@ observation lies, i.e. ``(x - μ) / σ``. If a destination array `Z` is provided, the scores are stored in `Z` and it must have the same shape as `X`. Otherwise `X` is overwritten. """ -function zscore!(Z::AbstractArray{ZT}, X::AbstractArray{T}, μ::Real, σ::Real) where {ZT<:AbstractFloat,T<:Real} +function zscore!(Z::AbstractArray{ZT}, X::AbstractArray{T}, μ::Real, σ::Real) where {ZT <: AbstractFloat, T <: Real} size(Z) == size(X) || throw(DimensionMismatch("Z and X must have the same size.")) - _zscore!(Z, X, μ, σ) + return _zscore!(Z, X, μ, σ) end -function zscore!(Z::AbstractArray{<:AbstractFloat}, X::AbstractArray{<:Real}, - μ::AbstractArray{<:Real}, σ::AbstractArray{<:Real}) +function zscore!( + Z::AbstractArray{<:AbstractFloat}, X::AbstractArray{<:Real}, + μ::AbstractArray{<:Real}, σ::AbstractArray{<:Real} + ) size(Z) == size(X) || throw(DimensionMismatch("Z and X must have the same size.")) _zscore_chksize(X, μ, σ) - _zscore!(Z, X, μ, σ) + return _zscore!(Z, X, μ, σ) end zscore!(X::AbstractArray{<:AbstractFloat}, μ::Real, σ::Real) = _zscore!(X, X, μ, σ) @@ -705,22 +716,21 @@ above the mean that an observation lies, i.e. ``(x - μ) / σ``. In particular, when `μ` and `σ` are arrays, they should have the same size, and `size(μ, i) == 1 || size(μ, i) == size(X, i)` for each dimension. """ -function zscore(X::AbstractArray{T}, μ::Real, σ::Real) where T<:Real +function zscore(X::AbstractArray{T}, μ::Real, σ::Real) where {T <: Real} ZT = typeof((zero(T) - zero(μ)) / one(σ)) - _zscore!(Array{ZT}(undef, size(X)), X, μ, σ) + return _zscore!(Array{ZT}(undef, size(X)), X, μ, σ) end -function zscore(X::AbstractArray{T}, μ::AbstractArray{U}, σ::AbstractArray{S}) where {T<:Real,U<:Real,S<:Real} +function zscore(X::AbstractArray{T}, μ::AbstractArray{U}, σ::AbstractArray{S}) where {T <: Real, U <: Real, S <: Real} _zscore_chksize(X, μ, σ) ZT = typeof((zero(T) - zero(U)) / one(S)) - _zscore!(Array{ZT}(undef, size(X)), X, μ, σ) + return _zscore!(Array{ZT}(undef, size(X)), X, μ, σ) end zscore(X::AbstractArray{<:Real}) = ((μ, σ) = mean_and_std(X); zscore(X, μ, σ)) zscore(X::AbstractArray{<:Real}, dim::Int) = ((μ, σ) = mean_and_std(X, dim); zscore(X, μ, σ)) - ############################# # # entropy and friends @@ -736,8 +746,12 @@ Elements with probability 0 or 1 add 0 to the entropy. """ function entropy(p) if isempty(p) - throw(ArgumentError("empty collections are not supported since they do not " * - "represent proper probability distributions")) + throw( + ArgumentError( + "empty collections are not supported since they do not " * + "represent proper probability distributions" + ) + ) end return -sum(xlogx, p) end @@ -755,7 +769,7 @@ end Compute the Rényi (generalized) entropy of order `α` of an array `p`. """ -function renyientropy(p::AbstractArray{T}, α::Real) where T<:Real +function renyientropy(p::AbstractArray{T}, α::Real) where {T <: Real} α < 0 && throw(ArgumentError("Order of Rényi entropy not legal, $(α) < 0.")) s = zero(T) @@ -763,7 +777,7 @@ function renyientropy(p::AbstractArray{T}, α::Real) where T<:Real scale = sum(p) if α ≈ 0 - for i = 1:length(p) + for i in 1:length(p) pi = p[i] if pi > z s += 1 @@ -771,7 +785,7 @@ function renyientropy(p::AbstractArray{T}, α::Real) where T<:Real end s = log(s / scale) elseif α ≈ 1 - for i = 1:length(p) + for i in 1:length(p) pi = p[i] if pi > z s -= pi * log(pi) @@ -781,10 +795,10 @@ function renyientropy(p::AbstractArray{T}, α::Real) where T<:Real elseif (isinf(α)) s = -log(maximum(p)) else # a normal Rényi entropy - for i = 1:length(p) + for i in 1:length(p) pi = p[i] if pi > z - s += pi ^ α + s += pi^α end end s = log(s / scale) / (1 - α) @@ -805,7 +819,7 @@ function crossentropy(p::AbstractArray{<:Real}, q::AbstractArray{<:Real}) if isempty(p) Base.depwarn( "support for empty collections will be removed since they do not " * - "represent proper probability distributions", + "represent proper probability distributions", :crossentropy, ) # return zero for empty arrays @@ -818,7 +832,7 @@ function crossentropy(p::AbstractArray{<:Real}, q::AbstractArray{<:Real}) end crossentropy(p::AbstractArray{<:Real}, q::AbstractArray{<:Real}, b::Real) = - crossentropy(p,q) / log(b) + crossentropy(p, q) / log(b) """ @@ -835,8 +849,8 @@ function kldivergence(p::AbstractArray{<:Real}, q::AbstractArray{<:Real}) # handle empty collections if isempty(p) Base.depwarn( - "support for empty collections will be removed since they do not "* - "represent proper probability distributions", + "support for empty collections will be removed since they do not " * + "represent proper probability distributions", :kldivergence, ) # return zero for empty arrays @@ -855,7 +869,7 @@ function kldivergence(p::AbstractArray{<:Real}, q::AbstractArray{<:Real}) end kldivergence(p::AbstractArray{<:Real}, q::AbstractArray{<:Real}, b::Real) = - kldivergence(p,q) / log(b) + kldivergence(p, q) / log(b) ############################# # @@ -863,7 +877,7 @@ kldivergence(p::AbstractArray{<:Real}, q::AbstractArray{<:Real}, b::Real) = # ############################# -struct SummaryStats{T<:Union{AbstractFloat,Missing}} +struct SummaryStats{T <: Union{AbstractFloat, Missing}} mean::T sd::T min::T @@ -884,23 +898,23 @@ Compute summary statistics for a real-valued array `a`. Returns a number of missing observations, standard deviation, mean, minimum, 25th percentile, median, 75th percentile, and maximum. """ -function summarystats(a::AbstractArray{T}) where T<:Union{Real,Missing} +function summarystats(a::AbstractArray{T}) where {T <: Union{Real, Missing}} # `mean` doesn't fail on empty input but rather returns `NaN`, so we can use the # return type to populate the `SummaryStats` structure. s = T >: Missing ? collect(skipmissing(a)) : a m = mean(s) - stdev = std(s, mean=m) + stdev = std(s, mean = m) R = typeof(m) n = length(a) ns = length(s) qs = if ns == 0 R[NaN, NaN, NaN, NaN, NaN] elseif T >: Missing - quantile!(s, [0.00, 0.25, 0.50, 0.75, 1.00]) + quantile!(s, [0.0, 0.25, 0.5, 0.75, 1.0]) else - quantile(s, [0.00, 0.25, 0.50, 0.75, 1.00]) + quantile(s, [0.0, 0.25, 0.5, 0.75, 1.0]) end - SummaryStats{R}(m, stdev, qs..., n, n - ns) + return SummaryStats{R}(m, stdev, qs..., n, n - ns) end function Base.show(io::IO, ss::SummaryStats) @@ -914,7 +928,7 @@ function Base.show(io::IO, ss::SummaryStats) @printf(io, "1st Quartile: %.6f\n", ss.q25) @printf(io, "Median: %.6f\n", ss.median) @printf(io, "3rd Quartile: %.6f\n", ss.q75) - @printf(io, "Maximum: %.6f\n", ss.max) + return @printf(io, "Maximum: %.6f\n", ss.max) end @@ -926,9 +940,9 @@ the mean, minimum, 25th percentile, median, 75th percentile, and maximum. """ DataAPI.describe(x) = describe(stdout, x) -function DataAPI.describe(io::IO, a::AbstractArray{T}) where T<:Union{Real,Missing} +function DataAPI.describe(io::IO, a::AbstractArray{T}) where {T <: Union{Real, Missing}} show(io, summarystats(a)) - println(io, "Type: $(string(eltype(a)))") + return println(io, "Type: $(string(eltype(a)))") end function DataAPI.describe(io::IO, a::AbstractArray) println(io, "Summary Stats:") diff --git a/src/signalcorr.jl b/src/signalcorr.jl index 06c83ba1d..4843e7de3 100644 --- a/src/signalcorr.jl +++ b/src/signalcorr.jl @@ -11,27 +11,27 @@ # ####################################### -default_laglen(lx::Int) = min(lx-1, round(Int,10*log10(lx))) +default_laglen(lx::Int) = min(lx - 1, round(Int, 10 * log10(lx))) check_lags(lx::Int, lags::AbstractVector) = (maximum(lags) < lx || error("lags must be less than the sample length.")) function demean_col!(z::AbstractVector{<:Real}, x::AbstractMatrix{<:Real}, j::Int, demean::Bool) T = eltype(z) m = size(x, 1) @assert m == length(z) - b = m * (j-1) + b = m * (j - 1) if demean s = zero(T) - for i = 1 : m + for i in 1:m s += x[b + i] end mv = s / m - for i = 1 : m + for i in 1:m z[i] = x[b + i] - mv end else - copyto!(z, 1, x, b+1, m) + copyto!(z, 1, x, b + 1, m) end - z + return z end @@ -41,10 +41,10 @@ end # ####################################### -default_autolags(lx::Int) = 0 : default_laglen(lx) +default_autolags(lx::Int) = 0:default_laglen(lx) -_autodot(x::AbstractVector{<:Union{Float32, Float64}}, lx::Int, l::Int) = dot(x, 1:(lx-l), x, (1+l):lx) -_autodot(x::AbstractVector{<:Real}, lx::Int, l::Int) = dot(view(x, 1:(lx-l)), view(x, (1+l):lx)) +_autodot(x::AbstractVector{<:Union{Float32, Float64}}, lx::Int, l::Int) = dot(x, 1:(lx - l), x, (1 + l):lx) +_autodot(x::AbstractVector{<:Real}, lx::Int, l::Int) = dot(view(x, 1:(lx - l)), view(x, (1 + l):lx)) ## autocov @@ -61,7 +61,7 @@ where each column in the result will correspond to a column in `x`. The output is not normalized. See [`autocor!`](@ref) for a method with normalization. """ -function autocov!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function autocov!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) lx = length(x) m = length(lags) length(r) == m || throw(DimensionMismatch()) @@ -69,13 +69,13 @@ function autocov!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, lags::Ab T = typeof(zero(eltype(x)) / 1) z::Vector{T} = demean ? x .- mean(x) : x - for k = 1 : m # foreach lag value + for k in 1:m # foreach lag value r[k] = _autodot(z, lx, lags[k]) / lx end return r end -function autocov!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function autocov!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) lx = size(x, 1) ns = size(x, 2) m = length(lags) @@ -84,10 +84,10 @@ function autocov!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, lags::Ab T = typeof(zero(eltype(x)) / 1) z = Vector{T}(undef, lx) - for j = 1 : ns + for j in 1:ns demean_col!(z, x, j, demean) - for k = 1 : m - r[k,j] = _autodot(z, lx, lags[k]) / lx + for k in 1:m + r[k, j] = _autodot(z, lx, lags[k]) / lx end end return r @@ -110,18 +110,18 @@ When left unspecified, the lags used are the integers from 0 to The output is not normalized. See [`autocor`](@ref) for a function with normalization. """ -function autocov(x::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function autocov(x::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) out = Vector{float(eltype(x))}(undef, length(lags)) - autocov!(out, x, lags; demean=demean) + return autocov!(out, x, lags; demean = demean) end -function autocov(x::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) - out = Matrix{float(eltype(x))}(undef, length(lags), size(x,2)) - autocov!(out, x, lags; demean=demean) +function autocov(x::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) + out = Matrix{float(eltype(x))}(undef, length(lags), size(x, 2)) + return autocov!(out, x, lags; demean = demean) end -autocov(x::AbstractVecOrMat{<:Real}; demean::Bool=true) = - autocov(x, default_autolags(size(x,1)); demean=demean) +autocov(x::AbstractVecOrMat{<:Real}; demean::Bool = true) = + autocov(x, default_autolags(size(x, 1)); demean = demean) ## autocor @@ -139,7 +139,7 @@ where each column in the result will correspond to a column in `x`. The output is normalized by the variance of `x`, i.e. so that the lag 0 autocorrelation is 1. See [`autocov!`](@ref) for the unnormalized form. """ -function autocor!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function autocor!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) lx = length(x) m = length(lags) length(r) == m || throw(DimensionMismatch()) @@ -148,13 +148,13 @@ function autocor!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, lags::Ab T = typeof(zero(eltype(x)) / 1) z::Vector{T} = demean ? x .- mean(x) : x zz = dot(z, z) - for k = 1 : m # foreach lag value + for k in 1:m # foreach lag value r[k] = _autodot(z, lx, lags[k]) / zz end return r end -function autocor!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function autocor!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) lx = size(x, 1) ns = size(x, 2) m = length(lags) @@ -163,11 +163,11 @@ function autocor!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, lags::Ab T = typeof(zero(eltype(x)) / 1) z = Vector{T}(undef, lx) - for j = 1 : ns + for j in 1:ns demean_col!(z, x, j, demean) zz = dot(z, z) - for k = 1 : m - r[k,j] = _autodot(z, lx, lags[k]) / zz + for k in 1:m + r[k, j] = _autodot(z, lx, lags[k]) / zz end end return r @@ -191,18 +191,18 @@ When left unspecified, the lags used are the integers from 0 to The output is normalized by the variance of `x`, i.e. so that the lag 0 autocorrelation is 1. See [`autocov`](@ref) for the unnormalized form. """ -function autocor(x::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function autocor(x::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) out = Vector{float(eltype(x))}(undef, length(lags)) - autocor!(out, x, lags; demean=demean) + return autocor!(out, x, lags; demean = demean) end -function autocor(x::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) - out = Matrix{float(eltype(x))}(undef, length(lags), size(x,2)) - autocor!(out, x, lags; demean=demean) +function autocor(x::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) + out = Matrix{float(eltype(x))}(undef, length(lags), size(x, 2)) + return autocor!(out, x, lags; demean = demean) end -autocor(x::AbstractVecOrMat{<:Real}; demean::Bool=true) = - autocor(x, default_autolags(size(x,1)); demean=demean) +autocor(x::AbstractVecOrMat{<:Real}; demean::Bool = true) = + autocor(x, default_autolags(size(x, 1)); demean = demean) ####################################### @@ -211,20 +211,20 @@ autocor(x::AbstractVecOrMat{<:Real}; demean::Bool=true) = # ####################################### -default_crosslags(lx::Int) = (l=default_laglen(lx); -l:l) +default_crosslags(lx::Int) = (l = default_laglen(lx); -l:l) -function _crossdot(x::AbstractVector{T}, y::AbstractVector{T}, lx::Int, l::Int) where {T<:Union{Float32, Float64}} - if l >= 0 - dot(x, 1:(lx-l), y, (1+l):lx) +function _crossdot(x::AbstractVector{T}, y::AbstractVector{T}, lx::Int, l::Int) where {T <: Union{Float32, Float64}} + return if l >= 0 + dot(x, 1:(lx - l), y, (1 + l):lx) else - dot(x, (1-l):lx, y, 1:(lx+l)) + dot(x, (1 - l):lx, y, 1:(lx + l)) end end function _crossdot(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, lx::Int, l::Int) - if l >= 0 - dot(view(x, 1:(lx-l)), view(y, (1+l):lx)) + return if l >= 0 + dot(view(x, 1:(lx - l)), view(y, (1 + l):lx)) else - dot(view(x, (1-l):lx), view(y, 1:(lx+l))) + dot(view(x, (1 - l):lx), view(y, 1:(lx + l))) end end @@ -246,7 +246,7 @@ three-dimensional array of size `(length(lags), size(x, 2), size(y, 2))`. The output is not normalized. See [`crosscor!`](@ref) for a function with normalization. """ -function crosscov!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function crosscov!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) lx = length(x) m = length(lags) (length(y) == lx && length(r) == m) || throw(DimensionMismatch()) @@ -256,13 +256,13 @@ function crosscov!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, y::Abst zx::Vector{T} = demean ? x .- mean(x) : x S = typeof(zero(eltype(y)) / 1) zy::Vector{S} = demean ? y .- mean(y) : y - for k = 1 : m # foreach lag value + for k in 1:m # foreach lag value r[k] = _crossdot(zx, zy, lx, lags[k]) / lx end return r end -function crosscov!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function crosscov!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) lx = size(x, 1) ns = size(x, 2) m = length(lags) @@ -273,16 +273,16 @@ function crosscov!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, y::Abst zx = Vector{T}(undef, lx) S = typeof(zero(eltype(y)) / 1) zy::Vector{S} = demean ? y .- mean(y) : y - for j = 1 : ns + for j in 1:ns demean_col!(zx, x, j, demean) - for k = 1 : m - r[k,j] = _crossdot(zx, zy, lx, lags[k]) / lx + for k in 1:m + r[k, j] = _crossdot(zx, zy, lx, lags[k]) / lx end end return r end -function crosscov!(r::AbstractMatrix{<:Real}, x::AbstractVector{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function crosscov!(r::AbstractMatrix{<:Real}, x::AbstractVector{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) lx = length(x) ns = size(y, 2) m = length(lags) @@ -293,16 +293,16 @@ function crosscov!(r::AbstractMatrix{<:Real}, x::AbstractVector{<:Real}, y::Abst zx::Vector{T} = demean ? x .- mean(x) : x S = typeof(zero(eltype(y)) / 1) zy = Vector{S}(undef, lx) - for j = 1 : ns + for j in 1:ns demean_col!(zy, y, j, demean) - for k = 1 : m - r[k,j] = _crossdot(zx, zy, lx, lags[k]) / lx + for k in 1:m + r[k, j] = _crossdot(zx, zy, lx, lags[k]) / lx end end return r end -function crosscov!(r::AbstractArray{<:Real,3}, x::AbstractMatrix{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function crosscov!(r::AbstractArray{<:Real, 3}, x::AbstractMatrix{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) lx = size(x, 1) nx = size(x, 2) ny = size(y, 2) @@ -314,11 +314,11 @@ function crosscov!(r::AbstractArray{<:Real,3}, x::AbstractMatrix{<:Real}, y::Abs T = typeof(zero(eltype(x)) / 1) zxs = Vector{T}[] sizehint!(zxs, nx) - for j = 1 : nx - xj = x[:,j] + for j in 1:nx + xj = x[:, j] if demean mv = mean(xj) - for i = 1 : lx + for i in 1:lx xj[i] -= mv end end @@ -327,12 +327,12 @@ function crosscov!(r::AbstractArray{<:Real,3}, x::AbstractMatrix{<:Real}, y::Abs S = typeof(zero(eltype(y)) / 1) zy = Vector{S}(undef, lx) - for j = 1 : ny + for j in 1:ny demean_col!(zy, y, j, demean) - for i = 1 : nx + for i in 1:nx zx = zxs[i] - for k = 1 : m - r[k,i,j] = _crossdot(zx, zy, lx, lags[k]) / lx + for k in 1:m + r[k, i, j] = _crossdot(zx, zy, lx, lags[k]) / lx end end end @@ -356,28 +356,28 @@ When left unspecified, the lags used are the integers from The output is not normalized. See [`crosscor`](@ref) for a function with normalization. """ -function crosscov(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function crosscov(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) out = Vector{float(Base.promote_eltype(x, y))}(undef, length(lags)) - crosscov!(out, x, y, lags; demean=demean) + return crosscov!(out, x, y, lags; demean = demean) end -function crosscov(x::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) - out = Matrix{float(Base.promote_eltype(x, y))}(undef, length(lags), size(x,2)) - crosscov!(out, x, y, lags; demean=demean) +function crosscov(x::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) + out = Matrix{float(Base.promote_eltype(x, y))}(undef, length(lags), size(x, 2)) + return crosscov!(out, x, y, lags; demean = demean) end -function crosscov(x::AbstractVector{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) - out = Matrix{float(Base.promote_eltype(x, y))}(undef, length(lags), size(y,2)) - crosscov!(out, x, y, lags; demean=demean) +function crosscov(x::AbstractVector{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) + out = Matrix{float(Base.promote_eltype(x, y))}(undef, length(lags), size(y, 2)) + return crosscov!(out, x, y, lags; demean = demean) end -function crosscov(x::AbstractMatrix{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) - out = Array{float(Base.promote_eltype(x, y)),3}(undef, length(lags), size(x,2), size(y,2)) - crosscov!(out, x, y, lags; demean=demean) +function crosscov(x::AbstractMatrix{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) + out = Array{float(Base.promote_eltype(x, y)), 3}(undef, length(lags), size(x, 2), size(y, 2)) + return crosscov!(out, x, y, lags; demean = demean) end -crosscov(x::AbstractVecOrMat{<:Real}, y::AbstractVecOrMat{<:Real}; demean::Bool=true) = - crosscov(x, y, default_crosslags(size(x,1)); demean=demean) +crosscov(x::AbstractVecOrMat{<:Real}, y::AbstractVecOrMat{<:Real}; demean::Bool = true) = + crosscov(x, y, default_crosslags(size(x, 1)); demean = demean) ## crosscor @@ -397,7 +397,7 @@ three-dimensional array of size `(length(lags), size(x, 2), size(y, 2))`. The output is normalized by `sqrt(var(x)*var(y))`. See [`crosscov!`](@ref) for the unnormalized form. """ -function crosscor!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function crosscor!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) lx = length(x) m = length(lags) (length(y) == lx && length(r) == m) || throw(DimensionMismatch()) @@ -408,13 +408,13 @@ function crosscor!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, y::Abst S = typeof(zero(eltype(y)) / 1) zy::Vector{S} = demean ? y .- mean(y) : y sc = sqrt(dot(zx, zx) * dot(zy, zy)) - for k = 1 : m # foreach lag value + for k in 1:m # foreach lag value r[k] = _crossdot(zx, zy, lx, lags[k]) / sc end return r end -function crosscor!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function crosscor!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) lx = size(x, 1) ns = size(x, 2) m = length(lags) @@ -426,17 +426,17 @@ function crosscor!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, y::Abst S = typeof(zero(eltype(y)) / 1) zy::Vector{S} = demean ? y .- mean(y) : y yy = dot(zy, zy) - for j = 1 : ns + for j in 1:ns demean_col!(zx, x, j, demean) sc = sqrt(dot(zx, zx) * yy) - for k = 1 : m - r[k,j] = _crossdot(zx, zy, lx, lags[k]) / sc + for k in 1:m + r[k, j] = _crossdot(zx, zy, lx, lags[k]) / sc end end return r end -function crosscor!(r::AbstractMatrix{<:Real}, x::AbstractVector{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function crosscor!(r::AbstractMatrix{<:Real}, x::AbstractVector{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) lx = length(x) ns = size(y, 2) m = length(lags) @@ -448,17 +448,17 @@ function crosscor!(r::AbstractMatrix{<:Real}, x::AbstractVector{<:Real}, y::Abst S = typeof(zero(eltype(y)) / 1) zy = Vector{S}(undef, lx) xx = dot(zx, zx) - for j = 1 : ns + for j in 1:ns demean_col!(zy, y, j, demean) sc = sqrt(xx * dot(zy, zy)) - for k = 1 : m - r[k,j] = _crossdot(zx, zy, lx, lags[k]) / sc + for k in 1:m + r[k, j] = _crossdot(zx, zy, lx, lags[k]) / sc end end return r end -function crosscor!(r::AbstractArray{<:Real,3}, x::AbstractMatrix{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function crosscor!(r::AbstractArray{<:Real, 3}, x::AbstractMatrix{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) lx = size(x, 1) nx = size(x, 2) ny = size(y, 2) @@ -472,11 +472,11 @@ function crosscor!(r::AbstractArray{<:Real,3}, x::AbstractMatrix{<:Real}, y::Abs sizehint!(zxs, nx) xxs = Vector{T}(undef, nx) - for j = 1 : nx - xj = x[:,j] + for j in 1:nx + xj = x[:, j] if demean mv = mean(xj) - for i = 1 : lx + for i in 1:lx xj[i] -= mv end end @@ -486,14 +486,14 @@ function crosscor!(r::AbstractArray{<:Real,3}, x::AbstractMatrix{<:Real}, y::Abs S = typeof(zero(eltype(y)) / 1) zy = Vector{S}(undef, lx) - for j = 1 : ny + for j in 1:ny demean_col!(zy, y, j, demean) yy = dot(zy, zy) - for i = 1 : nx + for i in 1:nx zx = zxs[i] sc = sqrt(xxs[i] * yy) - for k = 1 : m - r[k,i,j] = _crossdot(zx, zy, lx, lags[k]) / sc + for k in 1:m + r[k, i, j] = _crossdot(zx, zy, lx, lags[k]) / sc end end end @@ -517,28 +517,28 @@ When left unspecified, the lags used are the integers from The output is normalized by `sqrt(var(x)*var(y))`. See [`crosscov`](@ref) for the unnormalized form. """ -function crosscor(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function crosscor(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) out = Vector{float(Base.promote_eltype(x, y))}(undef, length(lags)) - crosscor!(out, x, y, lags; demean=demean) + return crosscor!(out, x, y, lags; demean = demean) end -function crosscor(x::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) - out = Matrix{float(Base.promote_eltype(x, y))}(undef, length(lags), size(x,2)) - crosscor!(out, x, y, lags; demean=demean) +function crosscor(x::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) + out = Matrix{float(Base.promote_eltype(x, y))}(undef, length(lags), size(x, 2)) + return crosscor!(out, x, y, lags; demean = demean) end -function crosscor(x::AbstractVector{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) - out = Matrix{float(Base.promote_eltype(x, y))}(undef, length(lags), size(y,2)) - crosscor!(out, x, y, lags; demean=demean) +function crosscor(x::AbstractVector{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) + out = Matrix{float(Base.promote_eltype(x, y))}(undef, length(lags), size(y, 2)) + return crosscor!(out, x, y, lags; demean = demean) end -function crosscor(x::AbstractMatrix{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) - out = Array{float(Base.promote_eltype(x, y)),3}(undef, length(lags), size(x,2), size(y,2)) - crosscor!(out, x, y, lags; demean=demean) +function crosscor(x::AbstractMatrix{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool = true) + out = Array{float(Base.promote_eltype(x, y)), 3}(undef, length(lags), size(x, 2), size(y, 2)) + return crosscor!(out, x, y, lags; demean = demean) end -crosscor(x::AbstractVecOrMat{<:Real}, y::AbstractVecOrMat{<:Real}; demean::Bool=true) = - crosscor(x, y, default_crosslags(size(x,1)); demean=demean) +crosscor(x::AbstractVecOrMat{<:Real}, y::AbstractVecOrMat{<:Real}; demean::Bool = true) = + crosscor(x, y, default_crosslags(size(x, 1)); demean = demean) ####################################### @@ -552,30 +552,31 @@ crosscor(x::AbstractVecOrMat{<:Real}, y::AbstractVecOrMat{<:Real}; demean::Bool= function pacf_regress!(r::AbstractMatrix{<:Real}, X::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}, mk::Integer) lx = size(X, 1) tmpX = ones(eltype(X), lx, mk + 1) - for j = 1 : size(X,2) - for l = 1 : mk - for i = 1+l:lx - tmpX[i,l+1] = X[i-l,j] + for j in 1:size(X, 2) + for l in 1:mk + for i in (1 + l):lx + tmpX[i, l + 1] = X[i - l, j] end end - for i = 1 : length(lags) + for i in 1:length(lags) l = lags[i] - sX = view(tmpX, 1+l:lx, 1:l+1) - r[i,j] = l == 0 ? 1 : (cholesky!(sX'sX, Val(false)) \ (sX'view(X, 1+l:lx, j)))[end] + sX = view(tmpX, (1 + l):lx, 1:(l + 1)) + r[i, j] = l == 0 ? 1 : (cholesky!(sX'sX, Val(false)) \ (sX'view(X, (1 + l):lx, j)))[end] end end - r + return r end -function pacf_yulewalker!(r::AbstractMatrix{<:Real}, X::AbstractMatrix{T}, lags::AbstractVector{<:Integer}, mk::Integer) where T<:Union{Float32, Float64} +function pacf_yulewalker!(r::AbstractMatrix{<:Real}, X::AbstractMatrix{T}, lags::AbstractVector{<:Integer}, mk::Integer) where {T <: Union{Float32, Float64}} tmp = Vector{T}(undef, mk) - for j = 1 : size(X,2) - acfs = autocor(X[:,j], 1:mk) - for i = 1 : length(lags) + for j in 1:size(X, 2) + acfs = autocor(X[:, j], 1:mk) + for i in 1:length(lags) l = lags[i] - r[i,j] = l == 0 ? 1 : l == 1 ? acfs[i] : -durbin!(view(acfs, 1:l), tmp)[l] + r[i, j] = l == 0 ? 1 : l == 1 ? acfs[i] : -durbin!(view(acfs, 1:l), tmp)[l] end end + return end @@ -590,12 +591,12 @@ using the Yule-Walker equations. `r` must be a matrix of size `(length(lags), size(x, 2))`. """ -function pacf!(r::AbstractMatrix{<:Real}, X::AbstractMatrix{T}, lags::AbstractVector{<:Integer}; method::Symbol=:regression) where T<:Union{Float32, Float64} +function pacf!(r::AbstractMatrix{<:Real}, X::AbstractMatrix{T}, lags::AbstractVector{<:Integer}; method::Symbol = :regression) where {T <: Union{Float32, Float64}} lx = size(X, 1) m = length(lags) minlag, maxlag = extrema(lags) (0 <= minlag && 2maxlag < lx) || error("Invalid lag value.") - size(r) == (m, size(X,2)) || throw(DimensionMismatch()) + size(r) == (m, size(X, 2)) || throw(DimensionMismatch()) if method == :regression pacf_regress!(r, X, lags, maxlag) @@ -621,11 +622,11 @@ If `x` is a vector, return a vector of the same length as `lags`. If `x` is a matrix, return a matrix of size `(length(lags), size(x, 2))`, where each column in the result corresponds to a column in `x`. """ -function pacf(X::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; method::Symbol=:regression) - out = Matrix{float(eltype(X))}(undef, length(lags), size(X,2)) - pacf!(out, float(X), lags; method=method) +function pacf(X::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; method::Symbol = :regression) + out = Matrix{float(eltype(X))}(undef, length(lags), size(X, 2)) + return pacf!(out, float(X), lags; method = method) end -function pacf(x::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; method::Symbol=:regression) - vec(pacf(reshape(x, length(x), 1), lags, method=method)) +function pacf(x::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; method::Symbol = :regression) + return vec(pacf(reshape(x, length(x), 1), lags, method = method)) end diff --git a/src/statmodels.jl b/src/statmodels.jl index b487a30e3..81bbe7ffd 100644 --- a/src/statmodels.jl +++ b/src/statmodels.jl @@ -6,19 +6,19 @@ struct PValue <: Real v::Real function PValue(v::Real) 0 <= v <= 1 || isnan(v) || error("p-values must be in [0; 1]") - new(v) + return new(v) end end PValue(p::PValue) = p function show(io::IO, pv::PValue) v = pv.v - if isnan(v) - @printf(io,"%d", v) - elseif v >= 1e-4 - @printf(io,"%.4f", v) + return if isnan(v) + @printf(io, "%d", v) + elseif v >= 1.0e-4 + @printf(io, "%.4f", v) else - @printf(io,"<1e%2.2d", ceil(Integer, max(nextfloat(log10(v)), -99))) + @printf(io, "<1e%2.2d", ceil(Integer, max(nextfloat(log10(v)), -99))) end end @@ -44,7 +44,7 @@ Base.hash(x::Union{TestStat, PValue}, h::UInt) = hash(x.v, h) # necessary to avoid a method ambiguity with isless(::TestStat, NaN) Base.isless(x::Union{TestStat, PValue}, y::AbstractFloat) = isless(x.v, y) -Base.isless(y::AbstractFloat, x::Union{TestStat, PValue},) = isless(y, x.v) +Base.isless(y::AbstractFloat, x::Union{TestStat, PValue}) = isless(y, x.v) Base.isequal(y::AbstractFloat, x::Union{TestStat, PValue}) = isequal(y, x.v) Base.isequal(x::Union{TestStat, PValue}, y::AbstractFloat) = isequal(x.v, y) @@ -69,24 +69,28 @@ mutable struct CoefTable rownms::Vector pvalcol::Int teststatcol::Int - function CoefTable(cols::Vector,colnms::Vector,rownms::Vector, - pvalcol::Int=0,teststatcol::Int=0) + function CoefTable( + cols::Vector, colnms::Vector, rownms::Vector, + pvalcol::Int = 0, teststatcol::Int = 0 + ) nc = length(cols) - nrs = map(length,cols) + nrs = map(length, cols) nr = nrs[1] - length(colnms) in [0,nc] || throw(ArgumentError("colnms should have length 0 or $nc")) - length(rownms) in [0,nr] || throw(ArgumentError("rownms should have length 0 or $nr")) + length(colnms) in [0, nc] || throw(ArgumentError("colnms should have length 0 or $nc")) + length(rownms) in [0, nr] || throw(ArgumentError("rownms should have length 0 or $nr")) all(nrs .== nr) || throw(ArgumentError("Elements of cols should have equal lengths, but got $nrs")) pvalcol in 0:nc || throw(ArgumentError("pvalcol should be between 0 and $nc")) teststatcol in 0:nc || throw(ArgumentError("teststatcol should be between 0 and $nc")) - new(cols,colnms,rownms,pvalcol,teststatcol) + return new(cols, colnms, rownms, pvalcol, teststatcol) end - function CoefTable(mat::Matrix,colnms::Vector,rownms::Vector, - pvalcol::Int=0,teststatcol::Int=0) - nc = size(mat,2) + function CoefTable( + mat::Matrix, colnms::Vector, rownms::Vector, + pvalcol::Int = 0, teststatcol::Int = 0 + ) + nc = size(mat, 2) cols = Any[mat[:, i] for i in 1:nc] - CoefTable(cols,colnms,rownms,pvalcol,teststatcol) + return CoefTable(cols, colnms, rownms, pvalcol, teststatcol) end end @@ -98,45 +102,51 @@ function Base.eltype(ct::CoefTable) types = isempty(ct.rownms) ? Tuple{eltype.(ct.cols)...} : Tuple{eltype(ct.rownms), eltype.(ct.cols)...} - NamedTuple{names, types} + return NamedTuple{names, types} end -function Base.iterate(ct::CoefTable, i::Integer=1) - if i in 1:length(ct) +function Base.iterate(ct::CoefTable, i::Integer = 1) + return if i in 1:length(ct) cols = getindex.(ct.cols, Ref(i)) nt = isempty(ct.rownms) ? eltype(ct)(tuple(cols...)) : eltype(ct)(tuple(ct.rownms[i], cols...)) - (nt, i+1) + (nt, i + 1) else nothing end end function show(io::IO, ::MIME"text/plain", ct::CoefTable) - cols = ct.cols; rownms = ct.rownms; colnms = ct.colnms; + cols = ct.cols; rownms = ct.rownms; colnms = ct.colnms nc = length(cols) nr = length(cols[1]) if length(rownms) == 0 - rownms = [lpad("[$i]",floor(Integer, log10(nr))+3) for i in 1:nr] - end - mat = [j == 1 ? NoQuote(rownms[i]) : - j-1 == ct.pvalcol ? NoQuote(sprint(show, PValue(cols[j-1][i]))) : - j-1 in ct.teststatcol ? TestStat(cols[j-1][i]) : - cols[j-1][i] isa AbstractString ? NoQuote(cols[j-1][i]) : cols[j-1][i] - for i in 1:nr, j in 1:nc+1] + rownms = [lpad("[$i]", floor(Integer, log10(nr)) + 3) for i in 1:nr] + end + mat = [ + j == 1 ? NoQuote(rownms[i]) : + j - 1 == ct.pvalcol ? NoQuote(sprint(show, PValue(cols[j - 1][i]))) : + j - 1 in ct.teststatcol ? TestStat(cols[j - 1][i]) : + cols[j - 1][i] isa AbstractString ? NoQuote(cols[j - 1][i]) : cols[j - 1][i] + for i in 1:nr, j in 1:(nc + 1) + ] # Code inspired by print_matrix in Base - io = IOContext(io, :compact=>true, :limit=>false) - A = Base.alignment(io, mat, 1:size(mat, 1), 1:size(mat, 2), - typemax(Int), typemax(Int), 3) + io = IOContext(io, :compact => true, :limit => false) + A = Base.alignment( + io, mat, 1:size(mat, 1), 1:size(mat, 2), + typemax(Int), typemax(Int), 3 + ) nmswidths = pushfirst!(length.(colnms), 0) - A = [nmswidths[i] > sum(A[i]) ? (A[i][1]+nmswidths[i]-sum(A[i]), A[i][2]) : A[i] - for i in 1:length(A)] + A = [ + nmswidths[i] > sum(A[i]) ? (A[i][1] + nmswidths[i] - sum(A[i]), A[i][2]) : A[i] + for i in 1:length(A) + ] totwidth = sum(sum.(A)) + 2 * (length(A) - 1) println(io, repeat('─', totwidth)) print(io, repeat(' ', sum(A[1]))) for j in 1:length(colnms) - print(io, " ", lpad(colnms[j], sum(A[j+1]))) + print(io, " ", lpad(colnms[j], sum(A[j + 1]))) end println(io, '\n', repeat('─', totwidth)) for i in 1:size(mat, 1) @@ -144,43 +154,49 @@ function show(io::IO, ::MIME"text/plain", ct::CoefTable) i != size(mat, 1) && println(io) end print(io, '\n', repeat('─', totwidth)) - nothing + return nothing end function show(io::IO, ::MIME"text/markdown", ct::CoefTable) - cols = ct.cols; rownms = ct.rownms; colnms = ct.colnms; + cols = ct.cols; rownms = ct.rownms; colnms = ct.colnms nc = length(cols) nr = length(cols[1]) if length(rownms) == 0 - rownms = [lpad("[$i]",floor(Integer, log10(nr))+3) for i in 1:nr] - end - mat = [j == 1 ? NoQuote(rownms[i]) : - j-1 == ct.pvalcol ? NoQuote(sprint(show, PValue(cols[j-1][i]))) : - j-1 in ct.teststatcol ? TestStat(cols[j-1][i]) : - cols[j-1][i] isa AbstractString ? NoQuote(cols[j-1][i]) : cols[j-1][i] - for i in 1:nr, j in 1:nc+1] + rownms = [lpad("[$i]", floor(Integer, log10(nr)) + 3) for i in 1:nr] + end + mat = [ + j == 1 ? NoQuote(rownms[i]) : + j - 1 == ct.pvalcol ? NoQuote(sprint(show, PValue(cols[j - 1][i]))) : + j - 1 in ct.teststatcol ? TestStat(cols[j - 1][i]) : + cols[j - 1][i] isa AbstractString ? NoQuote(cols[j - 1][i]) : cols[j - 1][i] + for i in 1:nr, j in 1:(nc + 1) + ] # Code inspired by print_matrix in Base - io = IOContext(io, :compact=>true, :limit=>false) - A = Base.alignment(io, mat, 1:size(mat, 1), 1:size(mat, 2), - typemax(Int), typemax(Int), 3) + io = IOContext(io, :compact => true, :limit => false) + A = Base.alignment( + io, mat, 1:size(mat, 1), 1:size(mat, 2), + typemax(Int), typemax(Int), 3 + ) nmswidths = pushfirst!(length.(colnms), 0) - A = [nmswidths[i] > sum(A[i]) ? (A[i][1]+nmswidths[i]-sum(A[i]), A[i][2]) : A[i] - for i in 1:length(A)] + A = [ + nmswidths[i] > sum(A[i]) ? (A[i][1] + nmswidths[i] - sum(A[i]), A[i][2]) : A[i] + for i in 1:length(A) + ] # not using Markdown stdlib here because that won't give us nice decimal # alignment (even if that is lost when rendering to HTML, it's still nice # when looking at the markdown itself) - print(io, '|', ' '^(sum(A[1])+1)) + print(io, '|', ' '^(sum(A[1]) + 1)) for j in 1:length(colnms) - print(io, " | ", lpad(colnms[j], sum(A[j+1]))) + print(io, " | ", lpad(colnms[j], sum(A[j + 1]))) end println(io, " |") - print(io, '|', rpad(':', sum(A[1])+2, '-')) + print(io, '|', rpad(':', sum(A[1]) + 2, '-')) for j in 1:length(colnms) - _pad = j-1 in [ct.teststatcol; ct.pvalcol] ? rpad : lpad - print(io, '|', _pad(':', sum(A[j+1])+2, '-')) + _pad = j - 1 in [ct.teststatcol; ct.pvalcol] ? rpad : lpad + print(io, '|', _pad(':', sum(A[j + 1]) + 2, '-')) end println(io, '|') @@ -191,7 +207,7 @@ function show(io::IO, ::MIME"text/markdown", ct::CoefTable) i != size(mat, 1) && println(io) end - nothing + return nothing end """ @@ -201,13 +217,13 @@ The fitting procedure failed to converge in `iters` number of iterations, i.e. the `lastchange` between the cost of the final and penultimate iteration was greater than specified tolerance `tol`. """ -struct ConvergenceException{T<:Real} <: Exception +struct ConvergenceException{T <: Real} <: Exception iters::Int lastchange::T tol::T msg::String - function ConvergenceException{T}(iters, lastchange::T, tol::T, msg::String) where T<:Real - if tol > lastchange + function ConvergenceException{T}(iters, lastchange::T, tol::T, msg::String) where {T <: Real} + return if tol > lastchange throw(ArgumentError("Change must be greater than tol.")) else new(iters, lastchange, tol, msg) @@ -215,8 +231,10 @@ struct ConvergenceException{T<:Real} <: Exception end end -ConvergenceException(iters, lastchange::T=NaN, tol::T=NaN, - msg::AbstractString="") where {T<:Real} = +ConvergenceException( + iters, lastchange::T = NaN, tol::T = NaN, + msg::AbstractString = "" +) where {T <: Real} = ConvergenceException{T}(iters, lastchange, tol, String(msg)) function Base.showerror(io::IO, ce::ConvergenceException) @@ -224,7 +242,7 @@ function Base.showerror(io::IO, ce::ConvergenceException) if !isnan(ce.lastchange) print(io, " Last change ($(ce.lastchange)) was greater than tolerance ($(ce.tol)).") end - if !isempty(ce.msg) + return if !isempty(ce.msg) print(io, ' ', ce.msg) end end diff --git a/src/toeplitzsolvers.jl b/src/toeplitzsolvers.jl index 19146bf7a..553dc2ba7 100644 --- a/src/toeplitzsolvers.jl +++ b/src/toeplitzsolvers.jl @@ -1,66 +1,70 @@ # Symmetric Toeplitz solver -function durbin!(r::AbstractVector{T}, y::AbstractVector{T}) where T<:BlasReal +function durbin!(r::AbstractVector{T}, y::AbstractVector{T}) where {T <: BlasReal} n = length(r) n <= length(y) || throw(DimensionMismatch("Auxiliary vector cannot be shorter than data vector")) y[1] = -r[1] β = one(T) α = -r[1] - for k = 1:n-1 - β *= one(T) - α*α - α = -r[k+1] - for j = 1:k - α -= r[k-j+1]*y[j] + for k in 1:(n - 1) + β *= one(T) - α * α + α = -r[k + 1] + for j in 1:k + α -= r[k - j + 1] * y[j] end α /= β - for j = 1:div(k,2) + for j in 1:div(k, 2) tmp = y[j] - y[j] += α*y[k-j+1] - y[k-j+1] += α*tmp + y[j] += α * y[k - j + 1] + y[k - j + 1] += α * tmp end - if isodd(k) y[div(k,2)+1] *= one(T) + α end - y[k+1] = α + if isodd(k) + y[div(k, 2) + 1] *= one(T) + α + end + y[k + 1] = α end return y end -durbin(r::AbstractVector{T}) where {T<:BlasReal} = durbin!(r, zeros(T, length(r))) +durbin(r::AbstractVector{T}) where {T <: BlasReal} = durbin!(r, zeros(T, length(r))) -function levinson!(r::AbstractVector{T}, b::AbstractVector{T}, x::AbstractVector{T}) where T<:BlasReal +function levinson!(r::AbstractVector{T}, b::AbstractVector{T}, x::AbstractVector{T}) where {T <: BlasReal} n = length(b) n == length(r) || throw(DimensionMismatch("Vectors must have same length")) n <= length(x) || throw(DimensionMismatch("Auxiliary vector cannot be shorter than data vector")) x[1] = b[1] - b[1] = -r[2]/r[1] + b[1] = -r[2] / r[1] β = one(T) - α = -r[2]/r[1] - for k = 1:n-1 - β *= one(T) - α*α - μ = b[k+1] - for j = 2:k+1 - μ -= r[j]/r[1]*x[k-j+2] + α = -r[2] / r[1] + for k in 1:(n - 1) + β *= one(T) - α * α + μ = b[k + 1] + for j in 2:(k + 1) + μ -= r[j] / r[1] * x[k - j + 2] end μ /= β - for j = 1:k - x[j] += μ*b[k-j+1] + for j in 1:k + x[j] += μ * b[k - j + 1] end - x[k+1] = μ + x[k + 1] = μ if k < n - 1 - α = -r[k+2] - for j = 2:k+1 - α -= r[j]*b[k-j+2] + α = -r[k + 2] + for j in 2:(k + 1) + α -= r[j] * b[k - j + 2] end - α /= β*r[1] - for j = 1:div(k,2) + α /= β * r[1] + for j in 1:div(k, 2) tmp = b[j] - b[j] += α*b[k-j+1] - b[k-j+1] += α*tmp + b[j] += α * b[k - j + 1] + b[k - j + 1] += α * tmp + end + if isodd(k) + b[div(k, 2) + 1] *= one(T) + α end - if isodd(k) b[div(k,2)+1] *= one(T) + α end - b[k+1] = α + b[k + 1] = α end end - for i = 1:n + for i in 1:n x[i] /= r[1] end return x end -levinson(r::AbstractVector{T}, b::AbstractVector{T}) where {T<:BlasReal} = levinson!(r, copy(b), zeros(T, length(b))) +levinson(r::AbstractVector{T}, b::AbstractVector{T}) where {T <: BlasReal} = levinson!(r, copy(b), zeros(T, length(b))) diff --git a/src/transformations.jl b/src/transformations.jl index 387aa2bfa..65ca8d12a 100644 --- a/src/transformations.jl +++ b/src/transformations.jl @@ -51,18 +51,18 @@ reconstruct(t::AbstractDataTransform, y::AbstractVector{<:Real}) = Standardization (Z-score transformation) """ -struct ZScoreTransform{T<:Real, U<:AbstractVector{T}} <: AbstractDataTransform +struct ZScoreTransform{T <: Real, U <: AbstractVector{T}} <: AbstractDataTransform len::Int dims::Int mean::U scale::U - function ZScoreTransform(l::Int, dims::Int, m::U, s::U) where {T<:Real, U<:AbstractVector{T}} + function ZScoreTransform(l::Int, dims::Int, m::U, s::U) where {T <: Real, U <: AbstractVector{T}} lenm = length(m) lens = length(s) lenm == l || lenm == 0 || throw(DimensionMismatch("Inconsistent dimensions.")) lens == l || lens == 0 || throw(DimensionMismatch("Inconsistent dimensions.")) - new{T, U}(l, dims, m, s) + return new{T, U}(l, dims, m, s) end end @@ -108,8 +108,10 @@ julia> StatsBase.transform(dt, X) -1.0 0.0 1.0 ``` """ -function fit(::Type{ZScoreTransform}, X::AbstractMatrix{<:Real}; - dims::Union{Integer,Nothing}=nothing, center::Bool=true, scale::Bool=true) +function fit( + ::Type{ZScoreTransform}, X::AbstractMatrix{<:Real}; + dims::Union{Integer, Nothing} = nothing, center::Bool = true, scale::Bool = true + ) if dims === nothing Base.depwarn("fit(t, x) is deprecated: use fit(t, x, dims=2) instead", :fit) dims = 2 @@ -125,25 +127,29 @@ function fit(::Type{ZScoreTransform}, X::AbstractMatrix{<:Real}; else throw(DomainError(dims, "fit only accept dims to be 1 or 2.")) end - return ZScoreTransform(l, dims, (center ? vec(m) : similar(m, 0)), - (scale ? vec(s) : similar(s, 0))) + return ZScoreTransform( + l, dims, (center ? vec(m) : similar(m, 0)), + (scale ? vec(s) : similar(s, 0)) + ) end -function fit(::Type{ZScoreTransform}, X::AbstractVector{<:Real}; - dims::Integer=1, center::Bool=true, scale::Bool=true) +function fit( + ::Type{ZScoreTransform}, X::AbstractVector{<:Real}; + dims::Integer = 1, center::Bool = true, scale::Bool = true + ) if dims != 1 throw(DomainError(dims, "fit only accepts dims=1 over a vector. Try fit(t, x, dims=1).")) end - return fit(ZScoreTransform, reshape(X, :, 1); dims=dims, center=center, scale=scale) + return fit(ZScoreTransform, reshape(X, :, 1); dims = dims, center = center, scale = scale) end function transform!(y::AbstractMatrix{<:Real}, t::ZScoreTransform, x::AbstractMatrix{<:Real}) if t.dims == 1 l = t.len - size(x,2) == size(y,2) == l || throw(DimensionMismatch("Inconsistent dimensions.")) - n = size(y,1) - size(x,1) == n || throw(DimensionMismatch("Inconsistent dimensions.")) + size(x, 2) == size(y, 2) == l || throw(DimensionMismatch("Inconsistent dimensions.")) + n = size(y, 1) + size(x, 1) == n || throw(DimensionMismatch("Inconsistent dimensions.")) m = t.mean s = t.scale @@ -160,7 +166,7 @@ function transform!(y::AbstractMatrix{<:Real}, t::ZScoreTransform, x::AbstractMa if isempty(s) broadcast!(-, y, x, m') else - broadcast!((x,m,s)->(x-m)/s, y, x, m', s') + broadcast!((x, m, s) -> (x - m) / s, y, x, m', s') end end elseif t.dims == 2 @@ -173,9 +179,9 @@ end function reconstruct!(x::AbstractMatrix{<:Real}, t::ZScoreTransform, y::AbstractMatrix{<:Real}) if t.dims == 1 l = t.len - size(x,2) == size(y,2) == l || throw(DimensionMismatch("Inconsistent dimensions.")) - n = size(y,1) - size(x,1) == n || throw(DimensionMismatch("Inconsistent dimensions.")) + size(x, 2) == size(y, 2) == l || throw(DimensionMismatch("Inconsistent dimensions.")) + n = size(y, 1) + size(x, 1) == n || throw(DimensionMismatch("Inconsistent dimensions.")) m = t.mean s = t.scale @@ -192,7 +198,7 @@ function reconstruct!(x::AbstractMatrix{<:Real}, t::ZScoreTransform, y::Abstract if isempty(s) broadcast!(+, x, y, m') else - broadcast!((y,m,s)->y*s+m, x, y, m', s') + broadcast!((y, m, s) -> y * s + m, x, y, m', s') end end elseif t.dims == 2 @@ -207,19 +213,19 @@ end Unit range normalization """ -struct UnitRangeTransform{T<:Real, U<:AbstractVector} <: AbstractDataTransform +struct UnitRangeTransform{T <: Real, U <: AbstractVector} <: AbstractDataTransform len::Int dims::Int unit::Bool min::U scale::U - function UnitRangeTransform(l::Int, dims::Int, unit::Bool, min::U, max::U) where {T, U<:AbstractVector{T}} + function UnitRangeTransform(l::Int, dims::Int, unit::Bool, min::U, max::U) where {T, U <: AbstractVector{T}} lenmin = length(min) lenmax = length(max) lenmin == l || lenmin == 0 || throw(DimensionMismatch("Inconsistent dimensions.")) lenmax == l || lenmax == 0 || throw(DimensionMismatch("Inconsistent dimensions.")) - new{T, U}(l, dims, unit, min, max) + return new{T, U}(l, dims, unit, min, max) end end @@ -264,8 +270,10 @@ julia> StatsBase.transform(dt, X) 0.0 0.5 1.0 ``` """ -function fit(::Type{UnitRangeTransform}, X::AbstractMatrix{<:Real}; - dims::Union{Integer,Nothing}=nothing, unit::Bool=true) +function fit( + ::Type{UnitRangeTransform}, X::AbstractMatrix{<:Real}; + dims::Union{Integer, Nothing} = nothing, unit::Bool = true + ) if dims === nothing Base.depwarn("fit(t, x) is deprecated: use fit(t, x, dims=2) instead", :fit) dims = 2 @@ -288,8 +296,10 @@ function _compute_extrema(X::AbstractMatrix, dims::Integer) return tmin, tmax end -function fit(::Type{UnitRangeTransform}, X::AbstractVector{<:Real}; - dims::Integer=1, unit::Bool=true) +function fit( + ::Type{UnitRangeTransform}, X::AbstractVector{<:Real}; + dims::Integer = 1, unit::Bool = true + ) if dims != 1 throw(DomainError(dims, "fit only accept dims=1 over a vector. Try fit(t, x, dims=1).")) end @@ -301,15 +311,15 @@ end function transform!(y::AbstractMatrix{<:Real}, t::UnitRangeTransform, x::AbstractMatrix{<:Real}) if t.dims == 1 l = t.len - size(x,2) == size(y,2) == l || throw(DimensionMismatch("Inconsistent dimensions.")) - n = size(x,1) - size(y,1) == n || throw(DimensionMismatch("Inconsistent dimensions.")) + size(x, 2) == size(y, 2) == l || throw(DimensionMismatch("Inconsistent dimensions.")) + n = size(x, 1) + size(y, 1) == n || throw(DimensionMismatch("Inconsistent dimensions.")) tmin = t.min tscale = t.scale if t.unit - broadcast!((x,s,m)->(x-m)*s, y, x, tscale', tmin') + broadcast!((x, s, m) -> (x - m) * s, y, x, tscale', tmin') else broadcast!(*, y, x, tscale') end @@ -323,15 +333,15 @@ end function reconstruct!(x::AbstractMatrix{<:Real}, t::UnitRangeTransform, y::AbstractMatrix{<:Real}) if t.dims == 1 l = t.len - size(x,2) == size(y,2) == l || throw(DimensionMismatch("Inconsistent dimensions.")) - n = size(y,1) - size(x,1) == n || throw(DimensionMismatch("Inconsistent dimensions.")) + size(x, 2) == size(y, 2) == l || throw(DimensionMismatch("Inconsistent dimensions.")) + n = size(y, 1) + size(x, 1) == n || throw(DimensionMismatch("Inconsistent dimensions.")) tmin = t.min tscale = t.scale if t.unit - broadcast!((y,s,m)->y/s+m, x, y, tscale', tmin') + broadcast!((y, s, m) -> y / s + m, x, y, tscale', tmin') else broadcast!(/, x, y, tscale') end diff --git a/src/weights.jl b/src/weights.jl index c7164b33d..5f7eaf2ee 100644 --- a/src/weights.jl +++ b/src/weights.jl @@ -1,5 +1,5 @@ ##### Weight vector ##### -abstract type AbstractWeights{S<:Real, T<:Real, V<:AbstractVector{T}} <: AbstractVector{T} end +abstract type AbstractWeights{S <: Real, T <: Real, V <: AbstractVector{T}} <: AbstractVector{T} end """ @weights name @@ -9,15 +9,15 @@ and stores the `values` (`V<:AbstractVector{<:Real}`) and `sum` (`S<:Real`). """ macro weights(name) return quote - mutable struct $name{S<:Real, T<:Real, V<:AbstractVector{T}} <: AbstractWeights{S, T, V} + mutable struct $name{S <: Real, T <: Real, V <: AbstractVector{T}} <: AbstractWeights{S, T, V} values::V sum::S - function $(esc(name)){S, T, V}(values, sum) where {S<:Real, T<:Real, V<:AbstractVector{T}} + function $(esc(name)){S, T, V}(values, sum) where {S <: Real, T <: Real, V <: AbstractVector{T}} isfinite(sum) || throw(ArgumentError("weights cannot contain Inf or NaN values")) return new{S, T, V}(values, sum) end end - $(esc(name))(values::AbstractVector{T}, sum::S) where {S<:Real, T<:Real} = $(esc(name)){S, T, typeof(values)}(values, sum) + $(esc(name))(values::AbstractVector{T}, sum::S) where {S <: Real, T <: Real} = $(esc(name)){S, T, typeof(values)}(values, sum) $(esc(name))(values::AbstractVector{<:Real}) = $(esc(name))(values, sum(values)) end end @@ -28,7 +28,7 @@ isempty(wv::AbstractWeights) = isempty(wv.values) size(wv::AbstractWeights) = size(wv.values) Base.axes(wv::AbstractWeights) = Base.axes(wv.values) -Base.IndexStyle(::Type{<:AbstractWeights{S,T,V}}) where {S,T,V} = IndexStyle(V) +Base.IndexStyle(::Type{<:AbstractWeights{S, T, V}}) where {S, T, V} = IndexStyle(V) Base.dataids(wv::AbstractWeights) = Base.dataids(wv.values) @@ -36,13 +36,13 @@ Base.convert(::Type{Vector}, wv::AbstractWeights) = convert(Vector, wv.values) @propagate_inbounds function Base.getindex(wv::AbstractWeights, i::Integer) @boundscheck checkbounds(wv, i) - wv.values[i] + return wv.values[i] end -@propagate_inbounds function Base.getindex(wv::W, i::AbstractArray) where W <: AbstractWeights +@propagate_inbounds function Base.getindex(wv::W, i::AbstractArray) where {W <: AbstractWeights} @boundscheck checkbounds(wv, i) v = wv.values[i] - W(v, sum(v)) + return W(v, sum(v)) end Base.getindex(wv::W, ::Colon) where {W <: AbstractWeights} = W(copy(wv.values), sum(wv)) @@ -53,7 +53,7 @@ Base.getindex(wv::W, ::Colon) where {W <: AbstractWeights} = W(copy(wv.values), isfinite(sum) || throw(ArgumentError("weights cannot contain Inf or NaN values")) wv.values[i] = v wv.sum = sum - v + return v end """ @@ -64,7 +64,7 @@ Compute a bias correction factor for calculating `var`, `std` and `cov` with (i.e. [Bessel's correction](https://en.wikipedia.org/wiki/Bessel's_correction)), otherwise returns ``\\frac{1}{n}`` (i.e. no correction). """ -@inline varcorrection(n::Integer, corrected::Bool=false) = 1 / (n - Int(corrected)) +@inline varcorrection(n::Integer, corrected::Bool = false) = 1 / (n - Int(corrected)) @weights Weights @@ -94,10 +94,14 @@ weights(vs::AbstractVector{<:Real}) = Weights(vs) Returns ``\\frac{1}{\\sum w}`` when `corrected=false` and throws an `ArgumentError` if `corrected=true`. """ -@inline function varcorrection(w::Weights, corrected::Bool=false) - corrected && throw(ArgumentError("Weights type does not support bias correction: " * - "use FrequencyWeights, AnalyticWeights or ProbabilityWeights if applicable.")) - 1 / w.sum +@inline function varcorrection(w::Weights, corrected::Bool = false) + corrected && throw( + ArgumentError( + "Weights type does not support bias correction: " * + "use FrequencyWeights, AnalyticWeights or ProbabilityWeights if applicable." + ) + ) + return 1 / w.sum end @weights AnalyticWeights @@ -129,11 +133,11 @@ aweights(vs::AbstractArray{<:Real}) = AnalyticWeights(vec(vs)) * `corrected=true`: ``\\frac{1}{\\sum w - \\sum {w^2} / \\sum w}`` * `corrected=false`: ``\\frac{1}{\\sum w}`` """ -@inline function varcorrection(w::AnalyticWeights, corrected::Bool=false) +@inline function varcorrection(w::AnalyticWeights, corrected::Bool = false) s = w.sum - if corrected - sum_sn = sum(x -> (x / s) ^ 2, w) + return if corrected + sum_sn = sum(x -> (x / s)^2, w) 1 / (s * (1 - sum_sn)) else 1 / s @@ -167,10 +171,10 @@ fweights(vs::AbstractArray{<:Real}) = FrequencyWeights(vec(vs)) * `corrected=true`: ``\\frac{1}{\\sum{w} - 1}`` * `corrected=false`: ``\\frac{1}{\\sum w}`` """ -@inline function varcorrection(w::FrequencyWeights, corrected::Bool=false) +@inline function varcorrection(w::FrequencyWeights, corrected::Bool = false) s = w.sum - if corrected + return if corrected 1 / (s - 1) else 1 / s @@ -205,10 +209,10 @@ pweights(vs::AbstractArray{<:Real}) = ProbabilityWeights(vec(vs)) * `corrected=true`: ``\\frac{n}{(n - 1) \\sum w}``, where ``n`` equals `count(!iszero, w)` * `corrected=false`: ``\\frac{1}{\\sum w}`` """ -@inline function varcorrection(w::ProbabilityWeights, corrected::Bool=false) +@inline function varcorrection(w::ProbabilityWeights, corrected::Bool = false) s = w.sum - if corrected + return if corrected n = count(!iszero, w) n / (s * (n - 1)) else @@ -282,7 +286,7 @@ eweights(n::Integer, λ::Real; kwargs...) = _eweights(1:n, λ, n; kwargs...) eweights(t::AbstractVector, r::AbstractRange, λ::Real; kwargs...) = _eweights(something.(indexin(t, r)), λ, length(r); kwargs...) -function _eweights(t::AbstractArray{<:Integer}, λ::Real, n::Integer; scale::Union{Bool, Nothing}=nothing) +function _eweights(t::AbstractArray{<:Integer}, λ::Real, n::Integer; scale::Union{Bool, Nothing} = nothing) 0 < λ <= 1 || throw(ArgumentError("Smoothing factor must be between 0 and 1")) f = depcheck(:eweights, :scale, scale) ? _scaled_eweight : _unscaled_eweight @@ -292,7 +296,7 @@ function _eweights(t::AbstractArray{<:Integer}, λ::Real, n::Integer; scale::Uni end s = sum(w0) - Weights(w0, s) + return Weights(w0, s) end _unscaled_eweight(i, λ, n) = λ * (1 - λ)^(1 - i) @@ -300,7 +304,7 @@ _scaled_eweight(i, λ, n) = (1 - λ)^(n - i) # NOTE: no variance correction is implemented for exponential weights -struct UnitWeights{T<:Real} <: AbstractWeights{Int, T, V where V<:Vector{T}} +struct UnitWeights{T <: Real} <: AbstractWeights{Int, T, V where {V <: Vector{T}}} len::Int end @@ -311,7 +315,7 @@ Construct a `UnitWeights` vector with length `s` and weight elements of type `T` All weight elements are identically one. """ UnitWeights -sum(wv::UnitWeights{T}) where T = convert(T, length(wv)) +sum(wv::UnitWeights{T}) where {T} = convert(T, length(wv)) isempty(wv::UnitWeights) = iszero(wv.len) length(wv::UnitWeights) = wv.len size(wv::UnitWeights) = tuple(length(wv)) @@ -320,19 +324,19 @@ Base.axes(wv::UnitWeights) = tuple(Base.OneTo(length(wv))) Base.dataids(::UnitWeights) = () Base.convert(::Type{Vector}, wv::UnitWeights{T}) where {T} = ones(T, length(wv)) -@propagate_inbounds function Base.getindex(wv::UnitWeights{T}, i::Integer) where T +@propagate_inbounds function Base.getindex(wv::UnitWeights{T}, i::Integer) where {T} @boundscheck checkbounds(wv, i) - one(T) + return one(T) end -@propagate_inbounds function Base.getindex(wv::UnitWeights{T}, i::AbstractArray{<:Int}) where T +@propagate_inbounds function Base.getindex(wv::UnitWeights{T}, i::AbstractArray{<:Int}) where {T} @boundscheck checkbounds(wv, i) - UnitWeights{T}(length(i)) + return UnitWeights{T}(length(i)) end -function Base.getindex(wv::UnitWeights{T}, i::AbstractArray{Bool}) where T - length(wv) == length(i) || throw(DimensionMismatch()) - UnitWeights{T}(count(i)) +function Base.getindex(wv::UnitWeights{T}, i::AbstractArray{Bool}) where {T} + length(wv) == length(i) || throw(DimensionMismatch()) + return UnitWeights{T}(count(i)) end Base.getindex(wv::UnitWeights{T}, ::Colon) where {T} = UnitWeights{T}(wv.len) @@ -359,8 +363,8 @@ julia> uweights(Float64, 3) 1.0 ``` """ -uweights(s::Int) = UnitWeights{Int}(s) -uweights(::Type{T}, s::Int) where {T<:Real} = UnitWeights{T}(s) +uweights(s::Int) = UnitWeights{Int}(s) +uweights(::Type{T}, s::Int) where {T <: Real} = UnitWeights{T}(s) """ varcorrection(w::UnitWeights, corrected=false) @@ -370,8 +374,8 @@ uweights(::Type{T}, s::Int) where {T<:Real} = UnitWeights{T}(s) This definition is equivalent to the correction applied to unweighted data. """ -@inline function varcorrection(w::UnitWeights, corrected::Bool=false) - corrected ? (1 / (w.len - 1)) : (1 / w.len) +@inline function varcorrection(w::UnitWeights, corrected::Bool = false) + return corrected ? (1 / (w.len - 1)) : (1 / w.len) end #### Equality tests ##### @@ -379,15 +383,15 @@ end for w in (AnalyticWeights, FrequencyWeights, ProbabilityWeights, Weights) @eval begin Base.isequal(x::$w, y::$w) = isequal(x.sum, y.sum) && isequal(x.values, y.values) - Base.:(==)(x::$w, y::$w) = (x.sum == y.sum) && (x.values == y.values) + Base.:(==)(x::$w, y::$w) = (x.sum == y.sum) && (x.values == y.values) end end Base.isequal(x::UnitWeights, y::UnitWeights) = isequal(x.len, y.len) -Base.:(==)(x::UnitWeights, y::UnitWeights) = (x.len == y.len) +Base.:(==)(x::UnitWeights, y::UnitWeights) = (x.len == y.len) Base.isequal(x::AbstractWeights, y::AbstractWeights) = false -Base.:(==)(x::AbstractWeights, y::AbstractWeights) = false +Base.:(==)(x::AbstractWeights, y::AbstractWeights) = false Base.allequal(wv::AbstractWeights) = allequal(wv.values) Base.allequal(::UnitWeights) = true @@ -404,7 +408,7 @@ Base.allunique(wv::UnitWeights) = length(wv) <= 1 Compute the weighted sum of an array `v` with weights `w`, optionally over the dimension `dim`. """ -wsum(v::AbstractArray, w::AbstractVector, dims::Colon=:) = transpose(w) * vec(v) +wsum(v::AbstractArray, w::AbstractVector, dims::Colon = :) = transpose(w) * vec(v) # Optimized methods (to ensure we use BLAS when possible) for W in (AnalyticWeights, FrequencyWeights, ProbabilityWeights, Weights) @@ -441,57 +445,65 @@ end ## general Cartesian-based weighted sum across dimensions -@generated function _wsum_general!(R::AbstractArray{RT}, f::supertype(typeof(abs)), - A::AbstractArray{T,N}, w::AbstractVector{WT}, dim::Int, init::Bool) where {T,RT,WT,N} - quote +@generated function _wsum_general!( + R::AbstractArray{RT}, f::supertype(typeof(abs)), + A::AbstractArray{T, N}, w::AbstractVector{WT}, dim::Int, init::Bool + ) where {T, RT, WT, N} + return quote init && fill!(R, zero(RT)) wi = zero(WT) if dim == 1 - @nextract $N sizeR d->size(R,d) + @nextract $N sizeR d -> size(R, d) sizA1 = size(A, 1) - @nloops $N i d->(d>1 ? (1:size(A,d)) : (1:1)) d->(j_d = sizeR_d==1 ? 1 : i_d) begin + @nloops $N i d -> (d > 1 ? (1:size(A, d)) : (1:1)) d -> (j_d = sizeR_d == 1 ? 1 : i_d) begin r = (@nref $N R j) - for i_1 = 1:sizA1 + for i_1 in 1:sizA1 r += f(@nref $N A i) * w[i_1] end (@nref $N R j) = r end else - @nloops $N i A d->(if d == dim - wi = w[i_d] - j_d = 1 - else - j_d = i_d - end) (@nref $N R j) += f(@nref $N A i) * wi + @nloops $N i A d -> ( + if d == dim + wi = w[i_d] + j_d = 1 + else + j_d = i_d + end + ) (@nref $N R j) += f(@nref $N A i) * wi end return R end end -@generated function _wsum_centralize!(R::AbstractArray{RT}, f::supertype(typeof(abs)), - A::AbstractArray{T,N}, w::AbstractVector{WT}, means, - dim::Int, init::Bool) where {T,RT,WT,N} - quote +@generated function _wsum_centralize!( + R::AbstractArray{RT}, f::supertype(typeof(abs)), + A::AbstractArray{T, N}, w::AbstractVector{WT}, means, + dim::Int, init::Bool + ) where {T, RT, WT, N} + return quote init && fill!(R, zero(RT)) wi = zero(WT) if dim == 1 - @nextract $N sizeR d->size(R,d) + @nextract $N sizeR d -> size(R, d) sizA1 = size(A, 1) - @nloops $N i d->(d>1 ? (1:size(A,d)) : (1:1)) d->(j_d = sizeR_d==1 ? 1 : i_d) begin + @nloops $N i d -> (d > 1 ? (1:size(A, d)) : (1:1)) d -> (j_d = sizeR_d == 1 ? 1 : i_d) begin r = (@nref $N R j) m = (@nref $N means j) - for i_1 = 1:sizA1 + for i_1 in 1:sizA1 r += f((@nref $N A i) - m) * w[i_1] end (@nref $N R j) = r end else - @nloops $N i A d->(if d == dim - wi = w[i_d] - j_d = 1 - else - j_d = i_d - end) (@nref $N R j) += f((@nref $N A i) - (@nref $N means j)) * wi + @nloops $N i A d -> ( + if d == dim + wi = w[i_d] + j_d = 1 + else + j_d = i_d + end + ) (@nref $N R j) += f((@nref $N A i) - (@nref $N means j)) * wi end return R end @@ -502,7 +514,7 @@ _wsum!(R::AbstractArray, A::AbstractArray, w::AbstractVector, dim::Int, init::Bo ## wsum! and wsum -wsumtype(::Type{T}, ::Type{W}) where {T,W} = typeof(zero(T) * zero(W) + zero(T) * zero(W)) +wsumtype(::Type{T}, ::Type{W}) where {T, W} = typeof(zero(T) * zero(W) + zero(T) * zero(W)) """ wsum!(R::AbstractArray, A::AbstractArray, @@ -513,22 +525,22 @@ Compute the weighted sum of `A` with weights `w` over the dimension `dim` and st the result in `R`. If `init=false`, the sum is added to `R` rather than starting from zero. """ -function wsum!(R::AbstractArray, A::AbstractArray{T,N}, w::AbstractVector, dim::Int; init::Bool=true) where {T,N} +function wsum!(R::AbstractArray, A::AbstractArray{T, N}, w::AbstractVector, dim::Int; init::Bool = true) where {T, N} 1 <= dim <= N || error("dim should be within [1, $N]") ndims(R) <= N || error("ndims(R) should not exceed $N") - length(w) == size(A,dim) || throw(DimensionMismatch("Inconsistent array dimension.")) + length(w) == size(A, dim) || throw(DimensionMismatch("Inconsistent array dimension.")) # TODO: more careful examination of R's size - _wsum!(R, A, w, dim, init) + return _wsum!(R, A, w, dim, init) end -function wsum(A::AbstractArray{T}, w::AbstractVector{W}, dim::Int) where {T<:Number,W<:Real} - length(w) == size(A,dim) || throw(DimensionMismatch("Inconsistent array dimension.")) - _wsum!(similar(A, wsumtype(T,W), Base.reduced_indices(axes(A), dim)), A, w, dim, true) +function wsum(A::AbstractArray{T}, w::AbstractVector{W}, dim::Int) where {T <: Number, W <: Real} + length(w) == size(A, dim) || throw(DimensionMismatch("Inconsistent array dimension.")) + return _wsum!(similar(A, wsumtype(T, W), Base.reduced_indices(axes(A), dim)), A, w, dim, true) end function wsum(A::AbstractArray{<:Number}, w::UnitWeights, dim::Int) size(A, dim) != length(w) && throw(DimensionMismatch("Inconsistent array dimension.")) - return sum(A, dims=dim) + return sum(A, dims = dim) end ## extended sum! and wsum @@ -542,8 +554,8 @@ Compute the weighted sum of `A` with weights `w` over the dimension `dim` and st the result in `R`. If `init=false`, the sum is added to `R` rather than starting from zero. """ -Base.sum!(R::AbstractArray, A::AbstractArray, w::AbstractWeights{<:Real}, dim::Int; init::Bool=true) = - wsum!(R, A, w, dim; init=init) +Base.sum!(R::AbstractArray, A::AbstractArray, w::AbstractWeights{<:Real}, dim::Int; init::Bool = true) = + wsum!(R, A, w, dim; init = init) """ sum(v::AbstractArray, w::AbstractWeights{<:Real}; [dims]) @@ -551,14 +563,14 @@ Base.sum!(R::AbstractArray, A::AbstractArray, w::AbstractWeights{<:Real}, dim::I Compute the weighted sum of an array `v` with weights `w`, optionally over the dimension `dims`. """ -Base.sum(A::AbstractArray, w::AbstractWeights{<:Real}; dims::Union{Colon,Int}=:) = +Base.sum(A::AbstractArray, w::AbstractWeights{<:Real}; dims::Union{Colon, Int} = :) = wsum(A, w, dims) ##### Weighted means ##### function wmean(v::AbstractArray{<:Number}, w::AbstractVector) Base.depwarn("wmean is deprecated, use mean(v, weights(w)) instead.", :wmean) - mean(v, weights(w)) + return mean(v, weights(w)) end """ @@ -567,14 +579,14 @@ end Compute the weighted mean of array `A` with weight vector `w` (of type `AbstractWeights`) along dimension `dims`, and write results to `R`. """ -mean!(R::AbstractArray, A::AbstractArray, w::AbstractWeights; dims::Union{Nothing,Int}=nothing) = +mean!(R::AbstractArray, A::AbstractArray, w::AbstractWeights; dims::Union{Nothing, Int} = nothing) = _mean!(R, A, w, dims) _mean!(R::AbstractArray, A::AbstractArray, w::AbstractWeights, dims::Nothing) = throw(ArgumentError("dims argument must be provided")) _mean!(R::AbstractArray, A::AbstractArray, w::AbstractWeights, dims::Int) = rmul!(Base.sum!(R, A, w, dims), inv(sum(w))) -wmeantype(::Type{T}, ::Type{W}) where {T,W} = typeof((zero(T)*zero(W) + zero(T)*zero(W)) / one(W)) +wmeantype(::Type{T}, ::Type{W}) where {T, W} = typeof((zero(T) * zero(W) + zero(T) * zero(W)) / one(W)) """ mean(A::AbstractArray, w::AbstractWeights[, dims::Int]) @@ -591,17 +603,17 @@ w = rand(n) mean(x, weights(w)) ``` """ -mean(A::AbstractArray, w::AbstractWeights; dims::Union{Colon,Int}=:) = +mean(A::AbstractArray, w::AbstractWeights; dims::Union{Colon, Int} = :) = _mean(A, w, dims) _mean(A::AbstractArray, w::AbstractWeights, dims::Colon) = sum(A, w) / sum(w) -_mean(A::AbstractArray{T}, w::AbstractWeights{W}, dims::Int) where {T,W} = +_mean(A::AbstractArray{T}, w::AbstractWeights{W}, dims::Int) where {T, W} = _mean!(similar(A, wmeantype(T, W), Base.reduced_indices(axes(A), dims)), A, w, dims) -function mean(A::AbstractArray, w::UnitWeights; dims::Union{Colon,Int}=:) +function mean(A::AbstractArray, w::UnitWeights; dims::Union{Colon, Int} = :) a = (dims === :) ? length(A) : size(A, dims) a != length(w) && throw(DimensionMismatch("Inconsistent array dimension.")) - return mean(A, dims=dims) + return mean(A, dims = dims) end ##### Weighted quantile ##### @@ -626,7 +638,7 @@ is strictly superior to ``h``. The weighted ``p`` quantile is given by ``v_k + with ``γ = (h - S_k)/(S_{k+1} - S_k)``. In particular, when all weights are equal, the function returns the same result as the unweighted `quantile`. """ -function quantile(v::AbstractVector{V}, w::AbstractWeights{W}, p::AbstractVector{<:Real}) where {V, W<:Real} +function quantile(v::AbstractVector{V}, w::AbstractWeights{W}, p::AbstractVector{<:Real}) where {V, W <: Real} # checks isempty(v) && throw(ArgumentError("quantile of an empty array is undefined")) isempty(p) && throw(ArgumentError("empty quantile array")) @@ -634,15 +646,23 @@ function quantile(v::AbstractVector{V}, w::AbstractWeights{W}, p::AbstractVector all(x -> 0 <= x <= 1, p) || throw(ArgumentError("input probability out of [0,1] range")) w.sum == 0 && throw(ArgumentError("weight vector cannot sum to zero")) - length(v) == length(w) || throw(ArgumentError("data and weight vectors must be the same size," * - "got $(length(v)) and $(length(w))")) + length(v) == length(w) || throw( + ArgumentError( + "data and weight vectors must be the same size," * + "got $(length(v)) and $(length(w))" + ) + ) for x in w.values x < 0 && throw(ArgumentError("weight vector cannot contain negative entries")) end isa(w, FrequencyWeights) && !(eltype(w) <: Integer) && any(!isinteger, w) && - throw(ArgumentError("The values of the vector of `FrequencyWeights` must be numerically" * - "equal to integers. Use `ProbabilityWeights` or `AnalyticWeights` instead.")) + throw( + ArgumentError( + "The values of the vector of `FrequencyWeights` must be numerically" * + "equal to integers. Use `ProbabilityWeights` or `AnalyticWeights` instead." + ) + ) # ::Bool is there to prevent JET from reporting a problem on Julia 1.10 any(ismissing, v)::Bool && @@ -660,7 +680,7 @@ function quantile(v::AbstractVector{V}, w::AbstractWeights{W}, p::AbstractVector # prepare out vector v1 = vw[1][1] - out = Vector{typeof(v1 + zero(eltype(p))*zero(W)*zero(v1))}(undef, length(p)) + out = Vector{typeof(v1 + zero(eltype(p)) * zero(W) * zero(v1))}(undef, length(p)) fill!(out, vw[end][1]) # This behavior isn't consistent with Statistics.quantile, @@ -684,8 +704,8 @@ function quantile(v::AbstractVector{V}, w::AbstractWeights{W}, p::AbstractVector while Sk <= h k += 1 if k > N - # out was initialized with maximum v - return out + # out was initialized with maximum v + return out end Skold, vkold = Sk, vk vk, wk = vw[k] diff --git a/test/counts.jl b/test/counts.jl index cb16abe3f..6bd7445d3 100644 --- a/test/counts.jl +++ b/test/counts.jl @@ -10,34 +10,34 @@ n = 5000 x0 = deepcopy(x) w0 = deepcopy(w) - c0 = Int[count(v->v == i, x) for i in 1:5] - @test counts(x, 5) == c0 - @test counts(x .+ 1, 2:6) == c0 - @test proportions(x, 1:5) ≈ (c0 ./ n) + c0 = Int[count(v -> v == i, x) for i in 1:5] + @test counts(x, 5) == c0 + @test counts(x .+ 1, 2:6) == c0 + @test proportions(x, 1:5) ≈ (c0 ./ n) @test counts(reshape(x, 10, 50, 10), 5) == c0 - @test counts(x) == c0 - @test proportions(x) ≈ (c0 ./ n) + @test counts(x) == c0 + @test proportions(x) ≈ (c0 ./ n) @test counts(reshape(x, 10, 50, 10)) == c0 c0 = reshape(c0, 1, 5) - @test addcounts!(fill(0, 1, 5), x, 1:5) == c0 + @test addcounts!(fill(0, 1, 5), x, 1:5) == c0 @test addcounts!(fill(0, 1, 5), reshape(x, 10, 50, 10), 1:5) == c0 c0 = Float64[sum(w.values[x .== i]) for i in 1:5] - @test counts(x, 5, w) ≈ c0 - @test counts(x .+ 1, 2:6, w) ≈ c0 - @test proportions(x, 1:5, w) ≈ (c0 ./ sum(w)) + @test counts(x, 5, w) ≈ c0 + @test counts(x .+ 1, 2:6, w) ≈ c0 + @test proportions(x, 1:5, w) ≈ (c0 ./ sum(w)) @test counts(reshape(x, 10, 50, 10), 5, w) ≈ c0 # Perhaps this should not be allowed - @test counts(x, w) ≈ c0 - @test counts(x .+ 1, 2:6, w) ≈ c0 - @test proportions(x, w) ≈ (c0 ./ sum(w)) + @test counts(x, w) ≈ c0 + @test counts(x .+ 1, 2:6, w) ≈ c0 + @test proportions(x, w) ≈ (c0 ./ sum(w)) @test counts(reshape(x, 10, 50, 10), w) ≈ c0 # Perhaps this should not be allowed #addcounts! to row matrix c0 = reshape(c0, 1, 5) - @test addcounts!(fill(0.0, 1, 5), x, 1:5, w) ≈ c0 + @test addcounts!(fill(0.0, 1, 5), x, 1:5, w) ≈ c0 @test addcounts!(fill(0.0, 1, 5), reshape(x, 10, 50, 10), 1:5, w) ≈ c0 # Perhaps this should not be allowed @test x == x0 @@ -53,26 +53,26 @@ end y0 = deepcopy(y) w0 = deepcopy(w) - c0 = Int[count(t->t != 0, (x .== i) .& (y .== j)) for i in 1:4, j in 1:5] - @test counts(x, y, (4, 5)) == c0 - @test counts(x .+ 2, y .+ 3, (3:6, 4:8)) == c0 - @test proportions(x, y, (1:4, 1:5)) ≈ (c0 ./ n) + c0 = Int[count(t -> t != 0, (x .== i) .& (y .== j)) for i in 1:4, j in 1:5] + @test counts(x, y, (4, 5)) == c0 + @test counts(x .+ 2, y .+ 3, (3:6, 4:8)) == c0 + @test proportions(x, y, (1:4, 1:5)) ≈ (c0 ./ n) @test counts(reshape(x, 10, 50, 10), reshape(y, 10, 50, 10), (4, 5)) == c0 - @test counts(x, y) == c0 - @test counts(x .+ 2, y .+ 3, (3:6, 4:8)) == c0 - @test proportions(x, y,) ≈ (c0 ./ n) + @test counts(x, y) == c0 + @test counts(x .+ 2, y .+ 3, (3:6, 4:8)) == c0 + @test proportions(x, y) ≈ (c0 ./ n) @test counts(reshape(x, 10, 50, 10), reshape(y, 10, 50, 10)) == c0 c0 = Float64[sum(w.values[(x .== i) .& (y .== j)]) for i in 1:4, j in 1:5] - @test counts(x, y, (4, 5), w) ≈ c0 - @test counts(x .+ 2, y .+ 3, (3:6, 4:8), w) ≈ c0 - @test proportions(x, y, (1:4, 1:5), w) ≈ (c0 ./ sum(w)) + @test counts(x, y, (4, 5), w) ≈ c0 + @test counts(x .+ 2, y .+ 3, (3:6, 4:8), w) ≈ c0 + @test proportions(x, y, (1:4, 1:5), w) ≈ (c0 ./ sum(w)) @test counts(reshape(x, 10, 50, 10), reshape(y, 10, 50, 10), (4, 5), w) ≈ c0 # Perhaps this should not be allowed - @test counts(x, y, w) ≈ c0 - @test counts(x .+ 2, y .+ 3, (3:6, 4:8), w) ≈ c0 - @test proportions(x, y, w) ≈ (c0 ./ sum(w)) + @test counts(x, y, w) ≈ c0 + @test counts(x .+ 2, y .+ 3, (3:6, 4:8), w) ≈ c0 + @test proportions(x, y, w) ≈ (c0 ./ sum(w)) @test counts(reshape(x, 10, 50, 10), reshape(y, 10, 50, 10), w) ≈ c0 # Perhaps this should not be allowed @test x == x0 @@ -97,13 +97,13 @@ end @test cm_any_itr isa Dict{Any, Int} pm = proportionmap(x) - @test pm["a"] ≈ (1/2) - @test pm["b"] ≈ (1/3) - @test pm["c"] ≈ (1/6) + @test pm["a"] ≈ (1 / 2) + @test pm["b"] ≈ (1 / 3) + @test pm["c"] ≈ (1 / 6) # testing the radixsort branch of countmap - xx = repeat([6, 1, 3, 1], outer=100_000) + xx = repeat([6, 1, 3, 1], outer = 100_000) cm = countmap(xx) @test cm == Dict(1 => 200_000, 3 => 100_000, 6 => 100_000) @@ -113,36 +113,36 @@ end @test cm_missing == cm cm_any_itr = countmap((i for i in xx)) - @test cm_any_itr isa Dict{Any,Int} # no knowledge about type + @test cm_any_itr isa Dict{Any, Int} # no knowledge about type @test cm_any_itr == cm # with multidimensional array - @test countmap(reshape(xx, 20, 100, 20, 10); alg=:radixsort) == cm - @test countmap(reshape(xx, 20, 100, 20, 10); alg=:dict) == cm + @test countmap(reshape(xx, 20, 100, 20, 10); alg = :radixsort) == cm + @test countmap(reshape(xx, 20, 100, 20, 10); alg = :dict) == cm # with empty array @test countmap(Int[]) == Dict{Int, Int}() # testing the radixsort-based addcounts - xx = repeat([6, 1, 3, 1], outer=100_000) + xx = repeat([6, 1, 3, 1], outer = 100_000) cm = Dict{Int, Int}() - StatsBase.addcounts_radixsort!(cm,xx) + StatsBase.addcounts_radixsort!(cm, xx) @test cm == Dict(1 => 200_000, 3 => 100_000, 6 => 100_000) - xx2 = repeat([7, 1, 3, 1], outer=100_000) - StatsBase.addcounts_radixsort!(cm,xx2) + xx2 = repeat([7, 1, 3, 1], outer = 100_000) + StatsBase.addcounts_radixsort!(cm, xx2) @test cm == Dict(1 => 400_000, 3 => 200_000, 6 => 100_000, 7 => 100_000) # with iterator cm_missing = Dict{Int, Int}() - StatsBase.addcounts_radixsort!(cm_missing,skipmissing(xx)) + StatsBase.addcounts_radixsort!(cm_missing, skipmissing(xx)) @test cm_missing == Dict(1 => 200_000, 3 => 100_000, 6 => 100_000) - StatsBase.addcounts_radixsort!(cm_missing,skipmissing(xx2)) + StatsBase.addcounts_radixsort!(cm_missing, skipmissing(xx2)) @test cm_missing == Dict(1 => 400_000, 3 => 200_000, 6 => 100_000, 7 => 100_000) # testing the Dict-based addcounts cm = Dict{Int, Int}() cm_itr = Dict{Int, Int}() - StatsBase.addcounts_dict!(cm,xx) - StatsBase.addcounts_dict!(cm_itr,skipmissing(xx)) + StatsBase.addcounts_dict!(cm, xx) + StatsBase.addcounts_dict!(cm_itr, skipmissing(xx)) @test cm_itr == cm == Dict(1 => 200_000, 3 => 100_000, 6 => 100_000) @test cm_itr isa Dict{Int, Int} @@ -174,16 +174,16 @@ end end # -0.0 and NaN - @test countmap([0.0, -0.0, 0.0, -0.0, -0.0], alg=:dict) == - countmap([0.0, -0.0, 0.0, -0.0, -0.0], alg=:radixsort) == + @test countmap([0.0, -0.0, 0.0, -0.0, -0.0], alg = :dict) == + countmap([0.0, -0.0, 0.0, -0.0, -0.0], alg = :radixsort) == Dict(0.0 => 2, -0.0 => 3) - @test countmap([NaN, NaN], alg=:dict) == - countmap([NaN, NaN], alg=:radixsort) == + @test countmap([NaN, NaN], alg = :dict) == + countmap([NaN, NaN], alg = :radixsort) == Dict(NaN => 2) end @testset "views" begin - X = view([1,1,1,2,2], 1:5) + X = view([1, 1, 1, 2, 2], 1:5) @test countmap(X) == countmap(copy(X)) end @@ -191,19 +191,19 @@ end x = rand(1:5, n) w = rand(n) xw = weights(w) - y = OffsetArray(x, n÷2) - yw = weights(OffsetArray(w, n÷2)) + y = OffsetArray(x, n ÷ 2) + yw = weights(OffsetArray(w, n ÷ 2)) z = OffsetArray(x, -2n) zw = weights(OffsetArray(w, -2n)) # proportions calls counts which calls addcounts! - @test proportions(x) == proportions(y) == proportions(z) - @test proportions(x, xw) == proportions(y, yw) == proportions(z, zw) - @test proportionmap(x) == proportionmap(y) == proportionmap(z) + @test proportions(x) == proportions(y) == proportions(z) + @test proportions(x, xw) == proportions(y, yw) == proportions(z, zw) + @test proportionmap(x) == proportionmap(y) == proportionmap(z) @test proportionmap(x, xw) == proportionmap(y, yw) == proportionmap(z, zw) @test countmap(x) == countmap(x; alg = :dict) == countmap(x; alg = :radixsort) == - countmap(y) == countmap(y; alg = :dict) == countmap(y; alg = :radixsort) == - countmap(z) == countmap(z; alg = :dict) == countmap(z; alg = :radixsort) + countmap(y) == countmap(y; alg = :dict) == countmap(y; alg = :radixsort) == + countmap(z) == countmap(z; alg = :dict) == countmap(z; alg = :radixsort) @test proportionmap(x, xw) == proportionmap(y, yw) == proportionmap(z, zw) # countmap and proportionmap only support the :dict algorithm for weighted sums. end diff --git a/test/cov.jl b/test/cov.jl index 6cc16ebc9..536013990 100644 --- a/test/cov.jl +++ b/test/cov.jl @@ -4,329 +4,329 @@ using LinearAlgebra, Random, Test struct EmptyCovarianceEstimator <: CovarianceEstimator end @testset "StatsBase.Covariance" begin -weight_funcs = (weights, aweights, fweights, pweights) - -function test_isapprox_preserves_symherm_structure(f::F, x::AbstractMatrix, y::AbstractMatrix, args...) where F - for wrapper in (identity, x -> Symmetric(x, :U), x -> Symmetric(x, :L), x -> Hermitian(x, :U), x -> Hermitian(x, :L)) - A = wrapper(copy(x)) - fA = @inferred(f(A, args...)) - @test fA ≈ y - if f === StatsBase.cov2cor! || f === StatsBase.cor2cov! - @test fA === A - if A isa Union{Symmetric,Hermitian} - @test parent(fA) != fA # only active triangle is written to - end - else - @test fA !== A - if A isa Union{Symmetric,Hermitian} - @test fA isa (A isa Symmetric ? Symmetric : Hermitian) - @test fA.uplo == A.uplo + weight_funcs = (weights, aweights, fweights, pweights) + + function test_isapprox_preserves_symherm_structure(f::F, x::AbstractMatrix, y::AbstractMatrix, args...) where {F} + for wrapper in (identity, x -> Symmetric(x, :U), x -> Symmetric(x, :L), x -> Hermitian(x, :U), x -> Hermitian(x, :L)) + A = wrapper(copy(x)) + fA = @inferred(f(A, args...)) + @test fA ≈ y + if f === StatsBase.cov2cor! || f === StatsBase.cor2cov! + @test fA === A + if A isa Union{Symmetric, Hermitian} + @test parent(fA) != fA # only active triangle is written to + end + else + @test fA !== A + if A isa Union{Symmetric, Hermitian} + @test fA isa (A isa Symmetric ? Symmetric : Hermitian) + @test fA.uplo == A.uplo + end end end end -end - -@testset "$f" for f in weight_funcs - X = randn(3, 8) - - Z1 = X .- mean(X, dims = 1) - Z2 = X .- mean(X, dims = 2) - - w1 = rand(3) - w2 = rand(8) - - # varcorrection is negative if sum of weights is smaller than 1 - if f === fweights - w1[1] += 1 - w2[1] += 1 - end - - wv1 = f(w1) - wv2 = f(w2) - - Z1w = X .- mean(X, wv1, dims=1) - Z2w = X .- mean(X, wv2, dims=2) - - ## reference results - - S1 = Z1'Z1 - S2 = Z2 * Z2' - Sz1 = X'X - Sz2 = X * X' + @testset "$f" for f in weight_funcs + X = randn(3, 8) - S1w = Z1w' * Matrix(Diagonal(w1)) * Z1w - S2w = Z2w * Matrix(Diagonal(w2)) * Z2w' + Z1 = X .- mean(X, dims = 1) + Z2 = X .- mean(X, dims = 2) - Sz1w = X' * Matrix(Diagonal(w1)) * X - Sz2w = X * Matrix(Diagonal(w2)) * X' + w1 = rand(3) + w2 = rand(8) - @testset "Scattermat" begin - @test scattermat(X) ≈ S1 - @test scattermat(X, dims=2) ≈ S2 - - @test StatsBase.scattermat(X, mean=0) ≈ Sz1 - @test StatsBase.scattermat(X, mean=0, dims=2) ≈ Sz2 - - @test StatsBase.scattermat(X, mean=mean(X, dims=1)) ≈ S1 - @test StatsBase.scattermat(X, mean=mean(X, dims=2), dims=2) ≈ S2 + # varcorrection is negative if sum of weights is smaller than 1 + if f === fweights + w1[1] += 1 + w2[1] += 1 + end - @test StatsBase.scattermat(X, mean=zeros(1,8)) ≈ Sz1 - @test StatsBase.scattermat(X, mean=zeros(3), dims=2) ≈ Sz2 + wv1 = f(w1) + wv2 = f(w2) - @testset "Weighted" begin - @test scattermat(X, wv1) ≈ S1w - @test scattermat(X, wv2, dims=2) ≈ S2w + Z1w = X .- mean(X, wv1, dims = 1) + Z2w = X .- mean(X, wv2, dims = 2) - @test StatsBase.scattermat(X, wv1, mean=0) ≈ Sz1w - @test StatsBase.scattermat(X, wv2, mean=0, dims=2) ≈ Sz2w + ## reference results - @test StatsBase.scattermat(X, wv1, mean=mean(X, wv1, dims=1)) ≈ S1w - @test StatsBase.scattermat(X, wv2, mean=mean(X, wv2, dims=2), dims=2) ≈ S2w + S1 = Z1'Z1 + S2 = Z2 * Z2' - @test StatsBase.scattermat(X, wv1, mean=zeros(1,8)) ≈ Sz1w - @test StatsBase.scattermat(X, wv2, mean=zeros(3), dims=2) ≈ Sz2w - end - end + Sz1 = X'X + Sz2 = X * X' - @testset "Uncorrected" begin - @testset "Weighted Covariance" begin - @test cov(X, wv1; corrected=false) ≈ S1w ./ sum(wv1) - @test cov(X, wv2, 2; corrected=false) ≈ S2w ./ sum(wv2) + S1w = Z1w' * Matrix(Diagonal(w1)) * Z1w + S2w = Z2w * Matrix(Diagonal(w2)) * Z2w' - @test StatsBase.covm(X, 0, wv1, 1; corrected=false) ≈ Sz1w ./ sum(wv1) - @test StatsBase.covm(X, 0, wv2, 2; corrected=false) ≈ Sz2w ./ sum(wv2) + Sz1w = X' * Matrix(Diagonal(w1)) * X + Sz2w = X * Matrix(Diagonal(w2)) * X' - @test StatsBase.covm(X, mean(X, wv1, dims=1), wv1, 1; corrected=false) ≈ S1w ./ sum(wv1) - @test StatsBase.covm(X, mean(X, wv2, dims=2), wv2, 2; corrected=false) ≈ S2w ./ sum(wv2) + @testset "Scattermat" begin + @test scattermat(X) ≈ S1 + @test scattermat(X, dims = 2) ≈ S2 - @test StatsBase.covm(X, zeros(1,8), wv1, 1; corrected=false) ≈ Sz1w ./ sum(wv1) - @test StatsBase.covm(X, zeros(3), wv2, 2; corrected=false) ≈ Sz2w ./ sum(wv2) - end + @test StatsBase.scattermat(X, mean = 0) ≈ Sz1 + @test StatsBase.scattermat(X, mean = 0, dims = 2) ≈ Sz2 - @testset "Mean and covariance" begin - (m, C) = mean_and_cov(X; corrected=false) - @test m == mean(X, dims=1) - @test C == cov(X, dims=1, corrected=false) + @test StatsBase.scattermat(X, mean = mean(X, dims = 1)) ≈ S1 + @test StatsBase.scattermat(X, mean = mean(X, dims = 2), dims = 2) ≈ S2 - (m, C) = mean_and_cov(X, 1; corrected=false) - @test m == mean(X, dims=1) - @test C == cov(X, dims=1, corrected = false) + @test StatsBase.scattermat(X, mean = zeros(1, 8)) ≈ Sz1 + @test StatsBase.scattermat(X, mean = zeros(3), dims = 2) ≈ Sz2 - (m, C) = mean_and_cov(X, 2; corrected=false) - @test m == mean(X, dims=2) - @test C == cov(X, dims=2, corrected = false) + @testset "Weighted" begin + @test scattermat(X, wv1) ≈ S1w + @test scattermat(X, wv2, dims = 2) ≈ S2w - (m, C) = mean_and_cov(X, wv1; corrected=false) - @test m == mean(X, wv1, dims=1) - @test C == cov(X, wv1, 1, corrected=false) + @test StatsBase.scattermat(X, wv1, mean = 0) ≈ Sz1w + @test StatsBase.scattermat(X, wv2, mean = 0, dims = 2) ≈ Sz2w - (m, C) = mean_and_cov(X, wv1, 1; corrected=false) - @test m == mean(X, wv1, dims=1) - @test C == cov(X, wv1, 1, corrected=false) + @test StatsBase.scattermat(X, wv1, mean = mean(X, wv1, dims = 1)) ≈ S1w + @test StatsBase.scattermat(X, wv2, mean = mean(X, wv2, dims = 2), dims = 2) ≈ S2w - (m, C) = mean_and_cov(X, wv2, 2; corrected=false) - @test m == mean(X, wv2, dims=2) - @test C == cov(X, wv2, 2, corrected=false) - end - @testset "Conversions" begin - std1 = std(X, wv1, 1; corrected=false) - std2 = std(X, wv2, 2; corrected=false) - - cov1 = cov(X, wv1, 1; corrected=false) - cov2 = cov(X, wv2, 2; corrected=false) - - cor1 = cor(X, wv1, 1) - cor2 = cor(X, wv2, 2) - - @testset "cov2cor" begin - test_isapprox_preserves_symherm_structure(cov2cor, cov(X, dims = 1), cor(X, dims = 1), std(X, dims = 1)) - test_isapprox_preserves_symherm_structure(cov2cor, cov(X, dims = 2), cor(X, dims = 2), std(X, dims = 2)) - test_isapprox_preserves_symherm_structure(cov2cor, cov1, cor1) - test_isapprox_preserves_symherm_structure(cov2cor, cov2, cor2) - test_isapprox_preserves_symherm_structure(cov2cor, cov1, cor1, std1) - test_isapprox_preserves_symherm_structure(cov2cor, cov2, cor2, std2) - end - @testset "StatsBase.cov2cor!" begin - test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov(X, dims = 1), cor(X, dims = 1), std(X, dims = 1)) - test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov(X, dims = 2), cor(X, dims = 2), std(X, dims = 2)) - test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov1, cor1) - test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov2, cor2) - test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov1, cor1, std1) - test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov2, cor2, std2) - end - @testset "cor2cov" begin - test_isapprox_preserves_symherm_structure(cor2cov, cor(X, dims = 1), cov(X, dims = 1), std(X, dims = 1)) - test_isapprox_preserves_symherm_structure(cor2cov, cor(X, dims = 2), cov(X, dims = 2), std(X, dims = 2)) - test_isapprox_preserves_symherm_structure(cor2cov, cor1, cov1, std1) - test_isapprox_preserves_symherm_structure(cor2cov, cor2, cov2, std2) - end - @testset "StatsBase.cor2cov!" begin - test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, cor(X, dims = 1), cov(X, dims = 1), std(X, dims = 1)) - test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, cor(X, dims = 2), cov(X, dims = 2), std(X, dims = 2)) - test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, cor1, cov1, std1) - test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, cor2, cov2, std2) + @test StatsBase.scattermat(X, wv1, mean = zeros(1, 8)) ≈ Sz1w + @test StatsBase.scattermat(X, wv2, mean = zeros(3), dims = 2) ≈ Sz2w end end - end - - @testset "Corrected" begin - @testset "Weighted Covariance" begin - if isa(wv1, Weights) - @test_throws ArgumentError cov(X, wv1; corrected=true) - else - var_corr1 = StatsBase.varcorrection(wv1, true) - var_corr2 = StatsBase.varcorrection(wv2, true) - @test cov(X, wv1; corrected=true) ≈ S1w .* var_corr1 - @test cov(X, wv2, 2; corrected=true) ≈ S2w .* var_corr2 + @testset "Uncorrected" begin + @testset "Weighted Covariance" begin + @test cov(X, wv1; corrected = false) ≈ S1w ./ sum(wv1) + @test cov(X, wv2, 2; corrected = false) ≈ S2w ./ sum(wv2) - @test StatsBase.covm(X, 0, wv1, 1; corrected=true) ≈ Sz1w .* var_corr1 - @test StatsBase.covm(X, 0, wv2, 2; corrected=true) ≈ Sz2w .* var_corr2 + @test StatsBase.covm(X, 0, wv1, 1; corrected = false) ≈ Sz1w ./ sum(wv1) + @test StatsBase.covm(X, 0, wv2, 2; corrected = false) ≈ Sz2w ./ sum(wv2) - @test StatsBase.covm(X, mean(X, wv1, dims=1), wv1, 1; corrected=true) ≈ S1w .* var_corr1 - @test StatsBase.covm(X, mean(X, wv2, dims=2), wv2, 2; corrected=true) ≈ S2w .* var_corr2 + @test StatsBase.covm(X, mean(X, wv1, dims = 1), wv1, 1; corrected = false) ≈ S1w ./ sum(wv1) + @test StatsBase.covm(X, mean(X, wv2, dims = 2), wv2, 2; corrected = false) ≈ S2w ./ sum(wv2) - @test StatsBase.covm(X, zeros(1,8), wv1, 1; corrected=true) ≈ Sz1w .* var_corr1 - @test StatsBase.covm(X, zeros(3), wv2, 2; corrected=true) ≈ Sz2w .* var_corr2 + @test StatsBase.covm(X, zeros(1, 8), wv1, 1; corrected = false) ≈ Sz1w ./ sum(wv1) + @test StatsBase.covm(X, zeros(3), wv2, 2; corrected = false) ≈ Sz2w ./ sum(wv2) end - end - @testset "Mean and covariance" begin - (m, C) = mean_and_cov(X; corrected=true) - @test m == mean(X, dims=1) - @test C == cov(X, dims=1, corrected = true) - (m, C) = mean_and_cov(X, 1; corrected=true) - @test m == mean(X, dims=1) - @test C == cov(X, dims=1, corrected = true) + @testset "Mean and covariance" begin + (m, C) = mean_and_cov(X; corrected = false) + @test m == mean(X, dims = 1) + @test C == cov(X, dims = 1, corrected = false) - (m, C) = mean_and_cov(X, 2; corrected=true) - @test m == mean(X, dims=2) - @test C == cov(X, dims=2, corrected = true) + (m, C) = mean_and_cov(X, 1; corrected = false) + @test m == mean(X, dims = 1) + @test C == cov(X, dims = 1, corrected = false) - if isa(wv1, Weights) - @test_throws ArgumentError mean_and_cov(X, wv1; corrected=true) - else - (m, C) = mean_and_cov(X, wv1; corrected=true) - @test m == mean(X, wv1, dims=1) - @test C == cov(X, wv1, 1; corrected=true) + (m, C) = mean_and_cov(X, 2; corrected = false) + @test m == mean(X, dims = 2) + @test C == cov(X, dims = 2, corrected = false) + + (m, C) = mean_and_cov(X, wv1; corrected = false) + @test m == mean(X, wv1, dims = 1) + @test C == cov(X, wv1, 1, corrected = false) - (m, C) = mean_and_cov(X, wv1, 1; corrected=true) - @test m == mean(X, wv1, dims=1) - @test C == cov(X, wv1, 1; corrected=true) + (m, C) = mean_and_cov(X, wv1, 1; corrected = false) + @test m == mean(X, wv1, dims = 1) + @test C == cov(X, wv1, 1, corrected = false) - (m, C) = mean_and_cov(X, wv2, 2; corrected=true) - @test m == mean(X, wv2, dims=2) - @test C == cov(X, wv2, 2; corrected=true) + (m, C) = mean_and_cov(X, wv2, 2; corrected = false) + @test m == mean(X, wv2, dims = 2) + @test C == cov(X, wv2, 2, corrected = false) end - end - @testset "Conversions" begin - if !isa(wv1, Weights) - std1 = std(X, wv1, 1; corrected=true) - std2 = std(X, wv2, 2; corrected=true) + @testset "Conversions" begin + std1 = std(X, wv1, 1; corrected = false) + std2 = std(X, wv2, 2; corrected = false) - cov1 = cov(X, wv1, 1; corrected=true) - cov2 = cov(X, wv2, 2; corrected=true) + cov1 = cov(X, wv1, 1; corrected = false) + cov2 = cov(X, wv2, 2; corrected = false) cor1 = cor(X, wv1, 1) cor2 = cor(X, wv2, 2) @testset "cov2cor" begin + test_isapprox_preserves_symherm_structure(cov2cor, cov(X, dims = 1), cor(X, dims = 1), std(X, dims = 1)) + test_isapprox_preserves_symherm_structure(cov2cor, cov(X, dims = 2), cor(X, dims = 2), std(X, dims = 2)) test_isapprox_preserves_symherm_structure(cov2cor, cov1, cor1) test_isapprox_preserves_symherm_structure(cov2cor, cov2, cor2) test_isapprox_preserves_symherm_structure(cov2cor, cov1, cor1, std1) test_isapprox_preserves_symherm_structure(cov2cor, cov2, cor2, std2) end @testset "StatsBase.cov2cor!" begin + test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov(X, dims = 1), cor(X, dims = 1), std(X, dims = 1)) + test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov(X, dims = 2), cor(X, dims = 2), std(X, dims = 2)) test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov1, cor1) test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov2, cor2) test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov1, cor1, std1) test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov2, cor2, std2) end @testset "cor2cov" begin + test_isapprox_preserves_symherm_structure(cor2cov, cor(X, dims = 1), cov(X, dims = 1), std(X, dims = 1)) + test_isapprox_preserves_symherm_structure(cor2cov, cor(X, dims = 2), cov(X, dims = 2), std(X, dims = 2)) test_isapprox_preserves_symherm_structure(cor2cov, cor1, cov1, std1) test_isapprox_preserves_symherm_structure(cor2cov, cor2, cov2, std2) end @testset "StatsBase.cor2cov!" begin + test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, cor(X, dims = 1), cov(X, dims = 1), std(X, dims = 1)) + test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, cor(X, dims = 2), cov(X, dims = 2), std(X, dims = 2)) test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, cor1, cov1, std1) test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, cor2, cov2, std2) end end end - end - @testset "Correlation" begin - @test cor(X, f(ones(3)), 1) ≈ cor(X, dims = 1) - @test cor(X, f(ones(8)), 2) ≈ cor(X, dims = 2) + @testset "Corrected" begin + @testset "Weighted Covariance" begin + if isa(wv1, Weights) + @test_throws ArgumentError cov(X, wv1; corrected = true) + else + var_corr1 = StatsBase.varcorrection(wv1, true) + var_corr2 = StatsBase.varcorrection(wv2, true) - cov1 = cov(X, wv1, 1; corrected=false) - std1 = std(X, wv1, 1; corrected=false) - cov2 = cov(X, wv2, 2; corrected=false) - std2 = std(X, wv2, 2; corrected=false) - expected_cor1 = StatsBase.cov2cor!(cov1, std1) - expected_cor2 = StatsBase.cov2cor!(cov2, std2) + @test cov(X, wv1; corrected = true) ≈ S1w .* var_corr1 + @test cov(X, wv2, 2; corrected = true) ≈ S2w .* var_corr2 - @test cor(X, wv1, 1) ≈ expected_cor1 - @test cor(X, wv2, 2) ≈ expected_cor2 - end + @test StatsBase.covm(X, 0, wv1, 1; corrected = true) ≈ Sz1w .* var_corr1 + @test StatsBase.covm(X, 0, wv2, 2; corrected = true) ≈ Sz2w .* var_corr2 - @testset "Abstract covariance estimation" begin - Xm1 = mean(X, dims=1) - Xm2 = mean(X, dims=2) - - for corrected ∈ (false, true) - scc = SimpleCovariance(corrected=corrected) - @test_throws ArgumentError cov(scc, X, dims=0) - @test_throws ArgumentError cov(scc, X, wv1, dims=0) - @test cov(scc, X) ≈ cov(X, corrected=corrected) - @test cov(scc, X, mean=Xm1) ≈ StatsBase.covm(X, Xm1, corrected=corrected) - @test cov(scc, X, mean=Xm2, dims=2) ≈ StatsBase.covm(X, Xm2, 2, corrected=corrected) - if f !== weights || corrected === false - @test cov(scc, X, wv1, dims=1) ≈ cov(X, wv1, 1, corrected=corrected) - @test cov(scc, X, wv2, dims=2) ≈ cov(X, wv2, 2, corrected=corrected) - @test cov(scc, X, wv1, mean=Xm1) ≈ StatsBase.covm(X, Xm1, wv1, corrected=corrected) - @test cov(scc, X, wv2, mean=Xm2, dims=2) ≈ StatsBase.covm(X, Xm2, wv2, 2, corrected=corrected) + @test StatsBase.covm(X, mean(X, wv1, dims = 1), wv1, 1; corrected = true) ≈ S1w .* var_corr1 + @test StatsBase.covm(X, mean(X, wv2, dims = 2), wv2, 2; corrected = true) ≈ S2w .* var_corr2 + + @test StatsBase.covm(X, zeros(1, 8), wv1, 1; corrected = true) ≈ Sz1w .* var_corr1 + @test StatsBase.covm(X, zeros(3), wv2, 2; corrected = true) ≈ Sz2w .* var_corr2 + end + end + @testset "Mean and covariance" begin + (m, C) = mean_and_cov(X; corrected = true) + @test m == mean(X, dims = 1) + @test C == cov(X, dims = 1, corrected = true) + + (m, C) = mean_and_cov(X, 1; corrected = true) + @test m == mean(X, dims = 1) + @test C == cov(X, dims = 1, corrected = true) + + (m, C) = mean_and_cov(X, 2; corrected = true) + @test m == mean(X, dims = 2) + @test C == cov(X, dims = 2, corrected = true) + + if isa(wv1, Weights) + @test_throws ArgumentError mean_and_cov(X, wv1; corrected = true) + else + (m, C) = mean_and_cov(X, wv1; corrected = true) + @test m == mean(X, wv1, dims = 1) + @test C == cov(X, wv1, 1; corrected = true) + + (m, C) = mean_and_cov(X, wv1, 1; corrected = true) + @test m == mean(X, wv1, dims = 1) + @test C == cov(X, wv1, 1; corrected = true) + + (m, C) = mean_and_cov(X, wv2, 2; corrected = true) + @test m == mean(X, wv2, dims = 2) + @test C == cov(X, wv2, 2; corrected = true) + end + end + @testset "Conversions" begin + if !isa(wv1, Weights) + std1 = std(X, wv1, 1; corrected = true) + std2 = std(X, wv2, 2; corrected = true) + + cov1 = cov(X, wv1, 1; corrected = true) + cov2 = cov(X, wv2, 2; corrected = true) + + cor1 = cor(X, wv1, 1) + cor2 = cor(X, wv2, 2) + + @testset "cov2cor" begin + test_isapprox_preserves_symherm_structure(cov2cor, cov1, cor1) + test_isapprox_preserves_symherm_structure(cov2cor, cov2, cor2) + test_isapprox_preserves_symherm_structure(cov2cor, cov1, cor1, std1) + test_isapprox_preserves_symherm_structure(cov2cor, cov2, cor2, std2) + end + @testset "StatsBase.cov2cor!" begin + test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov1, cor1) + test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov2, cor2) + test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov1, cor1, std1) + test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov2, cor2, std2) + end + @testset "cor2cov" begin + test_isapprox_preserves_symherm_structure(cor2cov, cor1, cov1, std1) + test_isapprox_preserves_symherm_structure(cor2cov, cor2, cov2, std2) + end + @testset "StatsBase.cor2cov!" begin + test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, cor1, cov1, std1) + test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, cor2, cov2, std2) + end + end + end + end + + @testset "Correlation" begin + @test cor(X, f(ones(3)), 1) ≈ cor(X, dims = 1) + @test cor(X, f(ones(8)), 2) ≈ cor(X, dims = 2) + + cov1 = cov(X, wv1, 1; corrected = false) + std1 = std(X, wv1, 1; corrected = false) + cov2 = cov(X, wv2, 2; corrected = false) + std2 = std(X, wv2, 2; corrected = false) + expected_cor1 = StatsBase.cov2cor!(cov1, std1) + expected_cor2 = StatsBase.cov2cor!(cov2, std2) + + @test cor(X, wv1, 1) ≈ expected_cor1 + @test cor(X, wv2, 2) ≈ expected_cor2 + end + + @testset "Abstract covariance estimation" begin + Xm1 = mean(X, dims = 1) + Xm2 = mean(X, dims = 2) + + for corrected in (false, true) + scc = SimpleCovariance(corrected = corrected) + @test_throws ArgumentError cov(scc, X, dims = 0) + @test_throws ArgumentError cov(scc, X, wv1, dims = 0) + @test cov(scc, X) ≈ cov(X, corrected = corrected) + @test cov(scc, X, mean = Xm1) ≈ StatsBase.covm(X, Xm1, corrected = corrected) + @test cov(scc, X, mean = Xm2, dims = 2) ≈ StatsBase.covm(X, Xm2, 2, corrected = corrected) + if f !== weights || corrected === false + @test cov(scc, X, wv1, dims = 1) ≈ cov(X, wv1, 1, corrected = corrected) + @test cov(scc, X, wv2, dims = 2) ≈ cov(X, wv2, 2, corrected = corrected) + @test cov(scc, X, wv1, mean = Xm1) ≈ StatsBase.covm(X, Xm1, wv1, corrected = corrected) + @test cov(scc, X, wv2, mean = Xm2, dims = 2) ≈ StatsBase.covm(X, Xm2, wv2, 2, corrected = corrected) + end end end end -end - -@testset "Abstract covariance estimation" begin - est = EmptyCovarianceEstimator() - wv = fweights(rand(2)) - @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0]) - @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], wv) - @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], dims = 2) - @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], wv, dims = 2) - @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], mean = nothing) - @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], wv, mean = nothing) - @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], dims = 2, mean = nothing) - @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], wv, dims = 2, mean = nothing) - @test_throws ErrorException cov(est, [1.0, 2.0], [3.0, 4.0]) - @test_throws ErrorException cov(est, [1.0, 2.0]) - - x = rand(8) - y = rand(8) - wv = fweights(rand(8)) - X = hcat(x, y) - - for corrected ∈ (false, true) - @test_throws MethodError SimpleCovariance(corrected) - scc = SimpleCovariance(corrected=corrected) - @test cov(scc, x) ≈ cov(x; corrected=corrected) - @test cov(scc, x, y) ≈ cov(x, y; corrected=corrected) - @test cov(scc, X) ≈ cov(X; corrected=corrected) - @test cov(scc, X, wv) ≈ cov(X, wv; corrected=corrected) - - @test var(scc, x) ≈ var(x; corrected=corrected) - @test std(scc, x) ≈ std(x; corrected=corrected) - - # NB That we should get the same correlation regardless of `corrected`, since it - # only affects the overall scale of the covariance. This cancels out when turning - # it into a correlation matrix. - @test cor(scc, x, y) ≈ cor(x, y) - @test cor(scc, X) ≈ cor(X) - @test cor(scc, X, wv) ≈ cor(X, wv) + + @testset "Abstract covariance estimation" begin + est = EmptyCovarianceEstimator() + wv = fweights(rand(2)) + @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0]) + @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], wv) + @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], dims = 2) + @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], wv, dims = 2) + @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], mean = nothing) + @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], wv, mean = nothing) + @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], dims = 2, mean = nothing) + @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], wv, dims = 2, mean = nothing) + @test_throws ErrorException cov(est, [1.0, 2.0], [3.0, 4.0]) + @test_throws ErrorException cov(est, [1.0, 2.0]) + + x = rand(8) + y = rand(8) + wv = fweights(rand(8)) + X = hcat(x, y) + + for corrected in (false, true) + @test_throws MethodError SimpleCovariance(corrected) + scc = SimpleCovariance(corrected = corrected) + @test cov(scc, x) ≈ cov(x; corrected = corrected) + @test cov(scc, x, y) ≈ cov(x, y; corrected = corrected) + @test cov(scc, X) ≈ cov(X; corrected = corrected) + @test cov(scc, X, wv) ≈ cov(X, wv; corrected = corrected) + + @test var(scc, x) ≈ var(x; corrected = corrected) + @test std(scc, x) ≈ std(x; corrected = corrected) + + # NB That we should get the same correlation regardless of `corrected`, since it + # only affects the overall scale of the covariance. This cancels out when turning + # it into a correlation matrix. + @test cor(scc, x, y) ≈ cor(x, y) + @test cor(scc, X) ≈ cor(X) + @test cor(scc, X, wv) ≈ cor(X, wv) + end end -end end # @testset "StatsBase.Covariance" diff --git a/test/deviation.jl b/test/deviation.jl index 4ace5846d..042858ad0 100644 --- a/test/deviation.jl +++ b/test/deviation.jl @@ -3,7 +3,7 @@ using Test @testset "counting (arrays with element types $T1 and $T2)" for T1 in (Int, Float32, Float64), T2 in (Int, Float32, Float64) a = T1[1, 2, 3, 4, 5, 6, 7] - b = T2[1, 3, 3, 4, 6, 7, 8] + b = T2[1, 3, 3, 4, 6, 7, 8] a_offset = OffsetArray(a, -5:1) b_offset = OffsetArray(b, -5:1) for (a, b) in ((a, b), (a_offset, b_offset)) @@ -37,18 +37,18 @@ end b_offset = OffsetArray(b, 5, -10) for (a, b) in ((a, b), (a_offset, b_offset)) @test @inferred(sqL2dist(a, b))::T ≈ sum(abs2.(a - b)) - @test @inferred(L2dist(a, b))::T ≈ sqrt(sqL2dist(a, b)) - @test @inferred(L1dist(a, b))::T ≈ sum(abs.(a - b)) + @test @inferred(L2dist(a, b))::T ≈ sqrt(sqL2dist(a, b)) + @test @inferred(L1dist(a, b))::T ≈ sum(abs.(a - b)) @test @inferred(Linfdist(a, b))::T ≈ maximum(abs.(a - b)) @test @inferred(gkldiv(a, b))::T ≈ sum(a .* log.(a ./ b) - a + b) - @test @inferred(meanad(a, b))::T ≈ mean(abs.(a - b)) - @test @inferred(maxad(a, b))::T ≈ maximum(abs.(a - b)) - @test @inferred(msd(a, b))::T ≈ mean(abs2.(a - b)) - @test @inferred(rmsd(a, b))::T ≈ sqrt(msd(a, b)) - @test @inferred(rmsd(a, b; normalize=true))::T ≈ rmsd(a, b) / (maximum(a) - minimum(a)) + @test @inferred(meanad(a, b))::T ≈ mean(abs.(a - b)) + @test @inferred(maxad(a, b))::T ≈ maximum(abs.(a - b)) + @test @inferred(msd(a, b))::T ≈ mean(abs2.(a - b)) + @test @inferred(rmsd(a, b))::T ≈ sqrt(msd(a, b)) + @test @inferred(rmsd(a, b; normalize = true))::T ≈ rmsd(a, b) / (maximum(a) - minimum(a)) for T2 in (Int, Float32, Float64) S = promote_type(T, T2) - @test @inferred(psnr(a, b, T2(2)))::S ≈ 10 * log10(4 / msd(a, b)) + @test @inferred(psnr(a, b, T2(2)))::S ≈ 10 * log10(4 / msd(a, b)) end end @@ -67,7 +67,7 @@ end @test iszero(@inferred(maxad(a, b))::T) @test isnan(@inferred(msd(a, b))::T) @test isnan(@inferred(rmsd(a, b))::T) - @test isnan(@inferred(rmsd(a, b; normalize=true))::T) + @test isnan(@inferred(rmsd(a, b; normalize = true))::T) for T2 in (Int, Float32, Float64) S = promote_type(T, T2) @test isnan(@inferred(psnr(a, b, T2(2)))::S) @@ -85,7 +85,7 @@ end @test_throws err maxad(a, b) @test_throws err msd(a, b) @test_throws err rmsd(a, b) - @test_throws err rmsd(a, b; normalize=true) + @test_throws err rmsd(a, b; normalize = true) for T2 in (Int, Float32, Float64) @test_throws err psnr(a, b, T2(2)) end diff --git a/test/empirical.jl b/test/empirical.jl index 6af076611..6734708f3 100644 --- a/test/empirical.jl +++ b/test/empirical.jl @@ -5,14 +5,14 @@ using Test x = randn(10000000) fnecdf = ecdf(x) y = [-1.96, -1.644854, -1.281552, -0.6744898, 0, 0.6744898, 1.281552, 1.644854, 1.96] - @test isapprox(fnecdf(y), [0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975], atol=1e-3) - @test isapprox(fnecdf(1.96), 0.975, atol=1e-3) + @test isapprox(fnecdf(y), [0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975], atol = 1.0e-3) + @test isapprox(fnecdf(1.96), 0.975, atol = 1.0e-3) @test fnecdf(y) ≈ map(fnecdf, y) @test extrema(fnecdf) == (minimum(fnecdf), maximum(fnecdf)) == extrema(x) fnecdf = ecdf([0.5]) @test fnecdf([zeros(5000); ones(5000)]) == [zeros(5000); ones(5000)] @test extrema(fnecdf) == (minimum(fnecdf), maximum(fnecdf)) == (0.5, 0.5) - @test isnan(ecdf([1,2,3])(NaN)) + @test isnan(ecdf([1, 2, 3])(NaN)) @test_throws ArgumentError ecdf([1, NaN]) end @@ -20,27 +20,27 @@ end x = randn(10000000) w1 = rand(10000000) w2 = weights(w1) - fnecdf = ecdf(x, weights=w1) - fnecdfalt = ecdf(x, weights=w2) + fnecdf = ecdf(x, weights = w1) + fnecdfalt = ecdf(x, weights = w2) @test fnecdf.sorted_values == fnecdfalt.sorted_values @test fnecdf.weights == fnecdfalt.weights @test fnecdf.weights != w1 # check that w wasn't accidentally modified in place @test fnecdfalt.weights != w2 y = [-1.96, -1.644854, -1.281552, -0.6744898, 0, 0.6744898, 1.281552, 1.644854, 1.96] - @test isapprox(fnecdf(y), [0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975], atol=1e-3) - @test isapprox(fnecdf(1.96), 0.975, atol=1e-3) + @test isapprox(fnecdf(y), [0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975], atol = 1.0e-3) + @test isapprox(fnecdf(1.96), 0.975, atol = 1.0e-3) @test fnecdf(y) ≈ map(fnecdf, y) @test extrema(fnecdf) == (minimum(fnecdf), maximum(fnecdf)) == extrema(x) - fnecdf = ecdf([1.0, 0.5], weights=weights([3, 1])) + fnecdf = ecdf([1.0, 0.5], weights = weights([3, 1])) @test fnecdf(0.75) == 0.25 @test extrema(fnecdf) == (minimum(fnecdf), maximum(fnecdf)) == (0.5, 1.0) - @test_throws ArgumentError ecdf(rand(8), weights=weights(rand(10))) + @test_throws ArgumentError ecdf(rand(8), weights = weights(rand(10))) # Check frequency weights v = randn(100) r = rand(1:100, 100) vv = vcat(fill.(v, r)...) # repeat elements of v according to r fw = fweights(r) - frecdf1 = ecdf(v, weights=fw) + frecdf1 = ecdf(v, weights = fw) frecdf2 = ecdf(vv) @test frecdf1(y) ≈ frecdf2(y) # Check probability weights @@ -49,7 +49,7 @@ end b̃ = abs(10randn()) * b bw1 = pweights(b) bw2 = pweights(b̃) - precdf1 = ecdf(a, weights=bw1) - precdf2 = ecdf(a, weights=bw2) + precdf1 = ecdf(a, weights = bw1) + precdf2 = ecdf(a, weights = bw2) @test precdf1(y) ≈ precdf2(y) end diff --git a/test/hist.jl b/test/hist.jl index 5d82fe7f6..e1e0f264c 100644 --- a/test/hist.jl +++ b/test/hist.jl @@ -4,268 +4,268 @@ using LinearAlgebra, Random, Test @testset "StatsBase.Histogram" begin -@testset "Histogram binindex and binvolume" begin - edg1 = -2:0.5:9 - edg1f0 = -2:0.5f0:9 - edg2 = [-2, -1, 2, 7, 19] - h1 = Histogram(edg1) - h2 = Histogram((edg1, edg2)) - h3 = Histogram((edg1f0, edg2)) - - @test h1 == Histogram(edg1, :left, false) - - @test @inferred StatsBase.binindex(h1, -0.5) == 4 - @test @inferred StatsBase.binindex(h2, (1.5, 2)) == (8, 3) - - @test [StatsBase.binvolume(h1, i) for i in axes(h1.weights, 1)] ≈ diff(edg1) - @test [StatsBase.binvolume(h2, (i,j)) for i in axes(h2.weights, 1), j in axes(h2.weights, 2)] ≈ diff(edg1) * diff(edg2)' - - @test typeof(@inferred(StatsBase.binvolume(h2, (1,1)))) == Float64 - @test typeof(@inferred(StatsBase.binvolume(h3, (1,1)))) == Float32 - @test typeof(@inferred(StatsBase.binvolume(Float64, h3, (1,1)))) == Float64 -end - - -@testset "Histogram append" begin - h = Histogram(0:20:100, Float64, :left, false) - @test @inferred(append!(h, 0:0.5:99.99)) == h - @test append!(Histogram(0:20:100, Float64, :left, false), 0:0.5:99.99).weights ≈ [40,40,40,40,40] - @test append!(Histogram(0:20:100, Float64, :left, true), 0:0.5:99.99).weights ≈ [2,2,2,2,2] - @test append!(Histogram(0:20:100, Float64, :left, false), 0:0.5:99.99, fill(2, 200)).weights ≈ [80,80,80,80,80] - @test append!(Histogram(0:20:100, Float64, :left, true), 0:0.5:99.99, fill(2, 200)).weights ≈ [4,4,4,4,4] -end - - -@testset "Histogram fit" begin - @test sum(fit(Histogram,[1,2,3]).weights) == 3 - @test fit(Histogram,Int[]).weights == Int[] - @test fit(Histogram,[1]).weights == [1] - @test fit(Histogram,[1,2,3],[0,2,4]) == Histogram([0,2,4],[1,2], :left) - @test fit(Histogram,[1,2,3],[0,2,4]) != Histogram([0,2,4],[1,1], :left) - @test fit(Histogram,[1,2,3],0:2:4) == Histogram(0:2:4,[1,2], :left) - @test all(fit(Histogram,[0:99;]/100,0.0:0.01:1.0).weights .==1) - @test fit(Histogram,[1,1,1,1,1]).weights[1] == 5 - @test sum(fit(Histogram,(rand(100),rand(100))).weights) == 100 - @test fit(Histogram,1:100,nbins=5,closed=:right).weights == [20,20,20,20,20] - @test fit(Histogram,1:100,nbins=5,closed=:left).weights == [19,20,20,20,20,1] - @test fit(Histogram,0:99,nbins=5,closed=:right).weights == [1,20,20,20,20,19] - @test fit(Histogram,0:99,nbins=5,closed=:left).weights == [20,20,20,20,20] - - @test fit(Histogram,(0:99,0:99),nbins=5).weights == Matrix(Diagonal([20,20,20,20,20])) - @test fit(Histogram,(0:99,0:99),nbins=(5,5)).weights == Matrix(Diagonal([20,20,20,20,20])) - - @test fit(Histogram,0:99,weights(ones(100)),nbins=5).weights == [20,20,20,20,20] - @test fit(Histogram,0:99,weights(2*ones(100)),nbins=5).weights == [40,40,40,40,40] - @test fit(Histogram{Int32},0:99,weights(2*ones(100)),nbins=5).weights == [40,40,40,40,40] - @test fit(Histogram{Float32},0:99,weights(2*ones(100)),nbins=5).weights == [40,40,40,40,40] - - d = collect(0:99) - v = view(d, fill(true, 100)) - @test fit(Histogram{Float32},v,weights(2*ones(100)),nbins=5).weights == [40,40,40,40,40] -end - - -@testset "Histogram element type" begin - @test eltype(@inferred(fit(Histogram,1:100,weights(ones(Int,100)),nbins=5)).weights) == Int - @test eltype(@inferred(fit(Histogram{Float32},1:100,weights(ones(Int,100)),nbins=5)).weights) == Float32 - @test eltype(@inferred(fit(Histogram,1:100,weights(ones(Float64,100)),nbins=5)).weights) == Float64 - @test eltype(@inferred(fit(Histogram{Float32},1:100,weights(ones(Float64,100)),nbins=5)).weights) == Float32 -end - - -@testset "histrange" begin - # Note: atm histrange must be qualified - @test @inferred(StatsBase.histrange(Float64[], 0, :left)) == 0.0:1.0:0.0 - @test StatsBase.histrange(Float64[1:5;], 1, :left) == 0.0:5.0:10.0 - @test StatsBase.histrange(Float64[1:10;], 1, :left) == 0.0:10.0:20.0 - @test StatsBase.histrange(1.0, 10.0, 1, :left) == 0.0:10.0:20.0 - - @test StatsBase.histrange([0.201,0.299], 10, :left) == 0.2:0.01:0.3 - @test StatsBase.histrange([0.2,0.299], 10, :left) == 0.2:0.01:0.3 - @test StatsBase.histrange([0.2,0.3], 10, :left) == 0.2:0.01:0.31 - @test StatsBase.histrange(0.2, 0.3, 10, :left) == 0.2:0.01:0.31 - @test StatsBase.histrange([0.2,0.3], 10, :right) == 0.19:0.01:0.3 - @test StatsBase.histrange(0.2, 0.3, 10, :right) == 0.19:0.01:0.3 - - @test StatsBase.histrange([200.1,299.9], 10, :left) == 200.0:10.0:300.0 - @test StatsBase.histrange([200.0,299.9], 10, :left) == 200.0:10.0:300.0 - @test StatsBase.histrange([200.0,300.0], 10, :left) == 200.0:10.0:310.0 - @test StatsBase.histrange([200.0,300.0], 10, :right) == 190.0:10.0:300.0 - - @test @inferred(StatsBase.histrange(Int64[1:5;], 1, :left)) == 0:5:10 - @test StatsBase.histrange(Int64[1:10;], 1, :left) == 0:10:20 - - @test StatsBase.histrange([0, 1, 2, 3], 4, :left) == 0.0:1.0:4.0 - @test StatsBase.histrange([0, 1, 1, 3], 4, :left) == 0.0:1.0:4.0 - @test StatsBase.histrange([0, 9], 4, :left) == 0.0:5.0:10.0 - @test StatsBase.histrange([0, 19], 4, :left) == 0.0:5.0:20.0 - @test StatsBase.histrange([0, 599], 4, :left) == 0.0:200.0:600.0 - @test StatsBase.histrange([-1, -1000], 4, :left) == -1000.0:500.0:0.0 - - # Base issue #13326 - l,h = extrema(StatsBase.histrange([typemin(Int),typemax(Int)], 10, :left)) - @test l <= typemin(Int) - @test h >= typemax(Int) - - # Issue 616/667 - @test StatsBase.histrange([1.0 for i in 1:100], 10, :left) == 1.0:1.0:2.0 - @test StatsBase.histrange([1.05 for i in 1:100], 10, :left) == 1.05:1.0:2.05 - - @test_throws ArgumentError StatsBase.histrange([1, 10], 0, :left) - @test_throws ArgumentError StatsBase.histrange([1, 10], -1, :left) - @test_throws ArgumentError StatsBase.histrange([1.0, 10.0], 0, :left) - @test_throws ArgumentError StatsBase.histrange([1.0, 10.0], -1, :left) - @test_throws ArgumentError StatsBase.histrange(Float64[],-1, :left) - @test_throws ArgumentError StatsBase.histrange([0.], 0, :left) -end - - -@testset "Histogram show" begin - # hist show - show_h = sprint(show, fit(Histogram,[0,1,2])) - @test occursin("edges:\n 0.0:1.0:3.0", show_h) - @test occursin("weights: $([1,1,1])", show_h) - @test occursin("closed: left", show_h) - @test occursin("isdensity: false", show_h) -end - - -@testset "Histogram norm and normalize" begin - rng = MersenneTwister(345678) - edges = ( - cumsum(rand(rng) * rand(rng, 9)), - cumsum(rand(rng, 1:10) * rand(rng, 1:100, 11)), - cumsum(5 * rand(rng) * rand(rng, 14)) - ) - - n = 100000 - - data = ( - maximum(edges[1]) .* (randn(rng, n) ./ 6 .+ 0.5), - rand(rng, 1:maximum(edges[2]), n), - maximum(edges[3]) .* rand(rng, n) - ) - - h = fit(Histogram, data, edges, closed = :left) - - weight_sum = sum(h.weights) - bin_vols = [ x * y * z for x in diff(edges[1]), y in diff(edges[2]), z in diff(edges[3])] - - @test norm(h) ≈ sum(h.weights .* bin_vols) - - @test @inferred(normalize(h, mode = :none)) == h - - - h_pdf = normalize(h, mode = :pdf) - @test h_pdf.weights ≈ h.weights ./ bin_vols ./ weight_sum - @test h_pdf.isdensity == true - @test @inferred(norm(h_pdf)) ≈ 1 -# @test @inferred(normalize(h_pdf, mode = :pdf)) == h_pdf - @test @inferred(normalize(h_pdf, mode = :density)) == h_pdf -# @test @inferred(normalize(h_pdf, mode = :probability)) == h_pdf - - h_density = normalize(h, mode = :density) - @test h_density.weights ≈ h.weights ./ bin_vols - @test h_density.isdensity == true - @test @inferred(norm(h_density)) ≈ weight_sum - @test @inferred(normalize(h_density, mode = :pdf)) == - Histogram(h_density.edges, h_density.weights .* (1/norm(h_density)), h_density.closed, true) - @test normalize(h_density, mode = :pdf).weights ≈ h_pdf.weights - @test normalize(h_density, mode = :density) == h_density - @test normalize(h_density, mode = :probability).weights ≈ h_pdf.weights - - h_fraction = normalize(h, mode = :probability) - @test sum(h_fraction.weights) ≈ 1 - @test h_fraction.isdensity == false - @test normalize(h_fraction, mode = :pdf).weights ≈ h_pdf.weights - @test normalize(h_fraction, mode = :density).weights ≈ h_pdf.weights - @test normalize(h_fraction, mode = :probability).weights ≈ h_fraction.weights - - h_copy = deepcopy(float(h)) - @test @inferred(normalize!(h_copy, mode = :density)) == h_copy - - h2 = deepcopy(float(h)) - mod_h2 = normalize!(h2, mode = :density) - @test mod_h2 === h2 && mod_h2.weights === h2.weights - @test h2.weights == h_density.weights - - aux_weights = sqrt.(h.weights) - divor0 = (a,b) -> (a == 0 && b == 0) ? 0 : a/b - divor0_cmp = (a_n, a_d, b_n, b_d) -> maximum(abs.(map(divor0, a_n, a_d) - map(divor0, b_n, b_d))) < 1e-10 - - h_pdf2, h_pdf2_aux = normalize(float(h), aux_weights, mode = :pdf) - @test divor0_cmp(h_pdf2_aux, aux_weights, h_pdf2.weights, h.weights) - - h_density2, h_density2_aux = normalize(float(h), aux_weights, mode = :density) - @test divor0_cmp(h_density2_aux, aux_weights, h_density2.weights, h.weights) - - h_density3, h_density3_aux = normalize(h_density2, h_density2_aux, mode = :pdf) - @test divor0_cmp(h_density3_aux, h_density2_aux, h_density3.weights, h_density2.weights) -end - - -@testset "Histogram zero" begin - h = fit(Histogram, (rand(100), rand(100))) - h2 = @inferred zero(h) - @test all(x -> x≈0, h2.weights) - @test !(h.weights === h2.weights) - @test h.edges == h2.edges - @test h.closed == h2.closed - @test h.isdensity == h2.isdensity -end - - -@testset "Histogram merge" begin - histograms = [fit(Histogram, (rand(100), 10 * rand(100)), (0:0.1:1, 0:1:10)) for _ in 1:10] - h = zero(histograms[1]) - merge!(h, histograms ...) - @test h.weights == (+).((x->x.weights).(histograms)...) - @test (@inferred merge(histograms...)) == h -end - -@testset "midpoints" begin - @test StatsBase.midpoints([1, 2, 4]) == [1.5, 3.0] - @test StatsBase.midpoints(range(0, stop = 1, length = 5)) == 0.125:0.25:0.875 -end - -@testset "histogram with -0.0" begin - @test fit(Histogram, [-0.0, 1.0]) == fit(Histogram, [0.0, 1.0]) - @test fit(Histogram, [-0.0, 1.0], closed=:right) == - fit(Histogram, [0.0, 1.0], closed=:right) - @test fit(Histogram, [-0.0, -1.0]) == fit(Histogram, [0.0, -1.0]) - @test fit(Histogram, [-0.0, -1.0], closed=:right) == - fit(Histogram, [0.0, -1.0], closed=:right) - - @test fit(Histogram, [-0.0, 1.0], [-0.0, 0.5]) == - fit(Histogram, [0.0, 1.0], [0.0, 0.5]) == - fit(Histogram, [-0.0, 1.0], [0.0, 0.5]) == - fit(Histogram, [0.0, 1.0], [-0.0, 0.5]) == - fit(Histogram, [0.0, 1.0], 0.0:0.5:0.5) == - fit(Histogram, [-0.0, 1.0], 0.0:0.5:0.5) - @test fit(Histogram, [-0.0, 1.0], [-0.5, -0.0]) == - fit(Histogram, [0.0, 1.0], [-0.5, -0.0]) == - fit(Histogram, [-0.0, 1.0], [-0.5, 0.0]) == - fit(Histogram, [0.0, 1.0], [-0.5, 0.0]) == - fit(Histogram, [-0.0, 1.0], -0.5:0.5:0.0) == - fit(Histogram, [0.0, 1.0], -0.5:0.5:0.0) - @test fit(Histogram, [-0.0, 1.0], [-0.5, -0.0], closed=:right) == - fit(Histogram, [0.0, 1.0], [-0.5, 0.0], closed=:right) == - fit(Histogram, [0.0, 1.0], -0.5:0.5:0.0, closed=:right) - @test fit(Histogram, [-0.0, 1.0], [-0.0, 0.5], closed=:right) == - fit(Histogram, [0.0, 1.0], [0.0, 0.5], closed=:right) == - fit(Histogram, [0.0, 1.0], [-0.0, 0.5], closed=:right) == - fit(Histogram, [-0.0, 1.0], [0.0, 0.5], closed=:right) == - fit(Histogram, [0.0, 1.0], 0.0:0.5:0.5, closed=:right) == - fit(Histogram, [-0.0, 1.0], 0.0:0.5:0.5, closed=:right) - @test fit(Histogram, [-0.0, 1.0], [-0.5, -0.0], closed=:right) == - fit(Histogram, [0.0, 1.0], [-0.5, 0.0], closed=:right) == - fit(Histogram, [0.0, 1.0], [-0.5, -0.0], closed=:right) == - fit(Histogram, [-0.0, 1.0], [-0.5, 0.0], closed=:right) == - fit(Histogram, [0.0, 1.0], -0.5:0.5:0.0, closed=:right) == - fit(Histogram, [-0.0, 1.0], -0.5:0.5:0.0, closed=:right) - - @test_throws ArgumentError fit(Histogram, [-0.5], LinRange(-1.0, -0.0, 3)) - @test_throws ArgumentError fit(Histogram, [-0.5], UnitRange(-0.0, 1.0)) -end + @testset "Histogram binindex and binvolume" begin + edg1 = -2:0.5:9 + edg1f0 = -2:0.5f0:9 + edg2 = [-2, -1, 2, 7, 19] + h1 = Histogram(edg1) + h2 = Histogram((edg1, edg2)) + h3 = Histogram((edg1f0, edg2)) + + @test h1 == Histogram(edg1, :left, false) + + @test @inferred StatsBase.binindex(h1, -0.5) == 4 + @test @inferred StatsBase.binindex(h2, (1.5, 2)) == (8, 3) + + @test [StatsBase.binvolume(h1, i) for i in axes(h1.weights, 1)] ≈ diff(edg1) + @test [StatsBase.binvolume(h2, (i, j)) for i in axes(h2.weights, 1), j in axes(h2.weights, 2)] ≈ diff(edg1) * diff(edg2)' + + @test typeof(@inferred(StatsBase.binvolume(h2, (1, 1)))) == Float64 + @test typeof(@inferred(StatsBase.binvolume(h3, (1, 1)))) == Float32 + @test typeof(@inferred(StatsBase.binvolume(Float64, h3, (1, 1)))) == Float64 + end + + + @testset "Histogram append" begin + h = Histogram(0:20:100, Float64, :left, false) + @test @inferred(append!(h, 0:0.5:99.99)) == h + @test append!(Histogram(0:20:100, Float64, :left, false), 0:0.5:99.99).weights ≈ [40, 40, 40, 40, 40] + @test append!(Histogram(0:20:100, Float64, :left, true), 0:0.5:99.99).weights ≈ [2, 2, 2, 2, 2] + @test append!(Histogram(0:20:100, Float64, :left, false), 0:0.5:99.99, fill(2, 200)).weights ≈ [80, 80, 80, 80, 80] + @test append!(Histogram(0:20:100, Float64, :left, true), 0:0.5:99.99, fill(2, 200)).weights ≈ [4, 4, 4, 4, 4] + end + + + @testset "Histogram fit" begin + @test sum(fit(Histogram, [1, 2, 3]).weights) == 3 + @test fit(Histogram, Int[]).weights == Int[] + @test fit(Histogram, [1]).weights == [1] + @test fit(Histogram, [1, 2, 3], [0, 2, 4]) == Histogram([0, 2, 4], [1, 2], :left) + @test fit(Histogram, [1, 2, 3], [0, 2, 4]) != Histogram([0, 2, 4], [1, 1], :left) + @test fit(Histogram, [1, 2, 3], 0:2:4) == Histogram(0:2:4, [1, 2], :left) + @test all(fit(Histogram, [0:99;] / 100, 0.0:0.01:1.0).weights .== 1) + @test fit(Histogram, [1, 1, 1, 1, 1]).weights[1] == 5 + @test sum(fit(Histogram, (rand(100), rand(100))).weights) == 100 + @test fit(Histogram, 1:100, nbins = 5, closed = :right).weights == [20, 20, 20, 20, 20] + @test fit(Histogram, 1:100, nbins = 5, closed = :left).weights == [19, 20, 20, 20, 20, 1] + @test fit(Histogram, 0:99, nbins = 5, closed = :right).weights == [1, 20, 20, 20, 20, 19] + @test fit(Histogram, 0:99, nbins = 5, closed = :left).weights == [20, 20, 20, 20, 20] + + @test fit(Histogram, (0:99, 0:99), nbins = 5).weights == Matrix(Diagonal([20, 20, 20, 20, 20])) + @test fit(Histogram, (0:99, 0:99), nbins = (5, 5)).weights == Matrix(Diagonal([20, 20, 20, 20, 20])) + + @test fit(Histogram, 0:99, weights(ones(100)), nbins = 5).weights == [20, 20, 20, 20, 20] + @test fit(Histogram, 0:99, weights(2 * ones(100)), nbins = 5).weights == [40, 40, 40, 40, 40] + @test fit(Histogram{Int32}, 0:99, weights(2 * ones(100)), nbins = 5).weights == [40, 40, 40, 40, 40] + @test fit(Histogram{Float32}, 0:99, weights(2 * ones(100)), nbins = 5).weights == [40, 40, 40, 40, 40] + + d = collect(0:99) + v = view(d, fill(true, 100)) + @test fit(Histogram{Float32}, v, weights(2 * ones(100)), nbins = 5).weights == [40, 40, 40, 40, 40] + end + + + @testset "Histogram element type" begin + @test eltype(@inferred(fit(Histogram, 1:100, weights(ones(Int, 100)), nbins = 5)).weights) == Int + @test eltype(@inferred(fit(Histogram{Float32}, 1:100, weights(ones(Int, 100)), nbins = 5)).weights) == Float32 + @test eltype(@inferred(fit(Histogram, 1:100, weights(ones(Float64, 100)), nbins = 5)).weights) == Float64 + @test eltype(@inferred(fit(Histogram{Float32}, 1:100, weights(ones(Float64, 100)), nbins = 5)).weights) == Float32 + end + + + @testset "histrange" begin + # Note: atm histrange must be qualified + @test @inferred(StatsBase.histrange(Float64[], 0, :left)) == 0.0:1.0:0.0 + @test StatsBase.histrange(Float64[1:5;], 1, :left) == 0.0:5.0:10.0 + @test StatsBase.histrange(Float64[1:10;], 1, :left) == 0.0:10.0:20.0 + @test StatsBase.histrange(1.0, 10.0, 1, :left) == 0.0:10.0:20.0 + + @test StatsBase.histrange([0.201, 0.299], 10, :left) == 0.2:0.01:0.3 + @test StatsBase.histrange([0.2, 0.299], 10, :left) == 0.2:0.01:0.3 + @test StatsBase.histrange([0.2, 0.3], 10, :left) == 0.2:0.01:0.31 + @test StatsBase.histrange(0.2, 0.3, 10, :left) == 0.2:0.01:0.31 + @test StatsBase.histrange([0.2, 0.3], 10, :right) == 0.19:0.01:0.3 + @test StatsBase.histrange(0.2, 0.3, 10, :right) == 0.19:0.01:0.3 + + @test StatsBase.histrange([200.1, 299.9], 10, :left) == 200.0:10.0:300.0 + @test StatsBase.histrange([200.0, 299.9], 10, :left) == 200.0:10.0:300.0 + @test StatsBase.histrange([200.0, 300.0], 10, :left) == 200.0:10.0:310.0 + @test StatsBase.histrange([200.0, 300.0], 10, :right) == 190.0:10.0:300.0 + + @test @inferred(StatsBase.histrange(Int64[1:5;], 1, :left)) == 0:5:10 + @test StatsBase.histrange(Int64[1:10;], 1, :left) == 0:10:20 + + @test StatsBase.histrange([0, 1, 2, 3], 4, :left) == 0.0:1.0:4.0 + @test StatsBase.histrange([0, 1, 1, 3], 4, :left) == 0.0:1.0:4.0 + @test StatsBase.histrange([0, 9], 4, :left) == 0.0:5.0:10.0 + @test StatsBase.histrange([0, 19], 4, :left) == 0.0:5.0:20.0 + @test StatsBase.histrange([0, 599], 4, :left) == 0.0:200.0:600.0 + @test StatsBase.histrange([-1, -1000], 4, :left) == -1000.0:500.0:0.0 + + # Base issue #13326 + l, h = extrema(StatsBase.histrange([typemin(Int), typemax(Int)], 10, :left)) + @test l <= typemin(Int) + @test h >= typemax(Int) + + # Issue 616/667 + @test StatsBase.histrange([1.0 for i in 1:100], 10, :left) == 1.0:1.0:2.0 + @test StatsBase.histrange([1.05 for i in 1:100], 10, :left) == 1.05:1.0:2.05 + + @test_throws ArgumentError StatsBase.histrange([1, 10], 0, :left) + @test_throws ArgumentError StatsBase.histrange([1, 10], -1, :left) + @test_throws ArgumentError StatsBase.histrange([1.0, 10.0], 0, :left) + @test_throws ArgumentError StatsBase.histrange([1.0, 10.0], -1, :left) + @test_throws ArgumentError StatsBase.histrange(Float64[], -1, :left) + @test_throws ArgumentError StatsBase.histrange([0.0], 0, :left) + end + + + @testset "Histogram show" begin + # hist show + show_h = sprint(show, fit(Histogram, [0, 1, 2])) + @test occursin("edges:\n 0.0:1.0:3.0", show_h) + @test occursin("weights: $([1, 1, 1])", show_h) + @test occursin("closed: left", show_h) + @test occursin("isdensity: false", show_h) + end + + + @testset "Histogram norm and normalize" begin + rng = MersenneTwister(345678) + edges = ( + cumsum(rand(rng) * rand(rng, 9)), + cumsum(rand(rng, 1:10) * rand(rng, 1:100, 11)), + cumsum(5 * rand(rng) * rand(rng, 14)), + ) + + n = 100000 + + data = ( + maximum(edges[1]) .* (randn(rng, n) ./ 6 .+ 0.5), + rand(rng, 1:maximum(edges[2]), n), + maximum(edges[3]) .* rand(rng, n), + ) + + h = fit(Histogram, data, edges, closed = :left) + + weight_sum = sum(h.weights) + bin_vols = [ x * y * z for x in diff(edges[1]), y in diff(edges[2]), z in diff(edges[3])] + + @test norm(h) ≈ sum(h.weights .* bin_vols) + + @test @inferred(normalize(h, mode = :none)) == h + + + h_pdf = normalize(h, mode = :pdf) + @test h_pdf.weights ≈ h.weights ./ bin_vols ./ weight_sum + @test h_pdf.isdensity == true + @test @inferred(norm(h_pdf)) ≈ 1 + # @test @inferred(normalize(h_pdf, mode = :pdf)) == h_pdf + @test @inferred(normalize(h_pdf, mode = :density)) == h_pdf + # @test @inferred(normalize(h_pdf, mode = :probability)) == h_pdf + + h_density = normalize(h, mode = :density) + @test h_density.weights ≈ h.weights ./ bin_vols + @test h_density.isdensity == true + @test @inferred(norm(h_density)) ≈ weight_sum + @test @inferred(normalize(h_density, mode = :pdf)) == + Histogram(h_density.edges, h_density.weights .* (1 / norm(h_density)), h_density.closed, true) + @test normalize(h_density, mode = :pdf).weights ≈ h_pdf.weights + @test normalize(h_density, mode = :density) == h_density + @test normalize(h_density, mode = :probability).weights ≈ h_pdf.weights + + h_fraction = normalize(h, mode = :probability) + @test sum(h_fraction.weights) ≈ 1 + @test h_fraction.isdensity == false + @test normalize(h_fraction, mode = :pdf).weights ≈ h_pdf.weights + @test normalize(h_fraction, mode = :density).weights ≈ h_pdf.weights + @test normalize(h_fraction, mode = :probability).weights ≈ h_fraction.weights + + h_copy = deepcopy(float(h)) + @test @inferred(normalize!(h_copy, mode = :density)) == h_copy + + h2 = deepcopy(float(h)) + mod_h2 = normalize!(h2, mode = :density) + @test mod_h2 === h2 && mod_h2.weights === h2.weights + @test h2.weights == h_density.weights + + aux_weights = sqrt.(h.weights) + divor0 = (a, b) -> (a == 0 && b == 0) ? 0 : a / b + divor0_cmp = (a_n, a_d, b_n, b_d) -> maximum(abs.(map(divor0, a_n, a_d) - map(divor0, b_n, b_d))) < 1.0e-10 + + h_pdf2, h_pdf2_aux = normalize(float(h), aux_weights, mode = :pdf) + @test divor0_cmp(h_pdf2_aux, aux_weights, h_pdf2.weights, h.weights) + + h_density2, h_density2_aux = normalize(float(h), aux_weights, mode = :density) + @test divor0_cmp(h_density2_aux, aux_weights, h_density2.weights, h.weights) + + h_density3, h_density3_aux = normalize(h_density2, h_density2_aux, mode = :pdf) + @test divor0_cmp(h_density3_aux, h_density2_aux, h_density3.weights, h_density2.weights) + end + + + @testset "Histogram zero" begin + h = fit(Histogram, (rand(100), rand(100))) + h2 = @inferred zero(h) + @test all(x -> x ≈ 0, h2.weights) + @test !(h.weights === h2.weights) + @test h.edges == h2.edges + @test h.closed == h2.closed + @test h.isdensity == h2.isdensity + end + + + @testset "Histogram merge" begin + histograms = [fit(Histogram, (rand(100), 10 * rand(100)), (0:0.1:1, 0:1:10)) for _ in 1:10] + h = zero(histograms[1]) + merge!(h, histograms ...) + @test h.weights == (+).((x -> x.weights).(histograms)...) + @test (@inferred merge(histograms...)) == h + end + + @testset "midpoints" begin + @test StatsBase.midpoints([1, 2, 4]) == [1.5, 3.0] + @test StatsBase.midpoints(range(0, stop = 1, length = 5)) == 0.125:0.25:0.875 + end + + @testset "histogram with -0.0" begin + @test fit(Histogram, [-0.0, 1.0]) == fit(Histogram, [0.0, 1.0]) + @test fit(Histogram, [-0.0, 1.0], closed = :right) == + fit(Histogram, [0.0, 1.0], closed = :right) + @test fit(Histogram, [-0.0, -1.0]) == fit(Histogram, [0.0, -1.0]) + @test fit(Histogram, [-0.0, -1.0], closed = :right) == + fit(Histogram, [0.0, -1.0], closed = :right) + + @test fit(Histogram, [-0.0, 1.0], [-0.0, 0.5]) == + fit(Histogram, [0.0, 1.0], [0.0, 0.5]) == + fit(Histogram, [-0.0, 1.0], [0.0, 0.5]) == + fit(Histogram, [0.0, 1.0], [-0.0, 0.5]) == + fit(Histogram, [0.0, 1.0], 0.0:0.5:0.5) == + fit(Histogram, [-0.0, 1.0], 0.0:0.5:0.5) + @test fit(Histogram, [-0.0, 1.0], [-0.5, -0.0]) == + fit(Histogram, [0.0, 1.0], [-0.5, -0.0]) == + fit(Histogram, [-0.0, 1.0], [-0.5, 0.0]) == + fit(Histogram, [0.0, 1.0], [-0.5, 0.0]) == + fit(Histogram, [-0.0, 1.0], -0.5:0.5:0.0) == + fit(Histogram, [0.0, 1.0], -0.5:0.5:0.0) + @test fit(Histogram, [-0.0, 1.0], [-0.5, -0.0], closed = :right) == + fit(Histogram, [0.0, 1.0], [-0.5, 0.0], closed = :right) == + fit(Histogram, [0.0, 1.0], -0.5:0.5:0.0, closed = :right) + @test fit(Histogram, [-0.0, 1.0], [-0.0, 0.5], closed = :right) == + fit(Histogram, [0.0, 1.0], [0.0, 0.5], closed = :right) == + fit(Histogram, [0.0, 1.0], [-0.0, 0.5], closed = :right) == + fit(Histogram, [-0.0, 1.0], [0.0, 0.5], closed = :right) == + fit(Histogram, [0.0, 1.0], 0.0:0.5:0.5, closed = :right) == + fit(Histogram, [-0.0, 1.0], 0.0:0.5:0.5, closed = :right) + @test fit(Histogram, [-0.0, 1.0], [-0.5, -0.0], closed = :right) == + fit(Histogram, [0.0, 1.0], [-0.5, 0.0], closed = :right) == + fit(Histogram, [0.0, 1.0], [-0.5, -0.0], closed = :right) == + fit(Histogram, [-0.0, 1.0], [-0.5, 0.0], closed = :right) == + fit(Histogram, [0.0, 1.0], -0.5:0.5:0.0, closed = :right) == + fit(Histogram, [-0.0, 1.0], -0.5:0.5:0.0, closed = :right) + + @test_throws ArgumentError fit(Histogram, [-0.5], LinRange(-1.0, -0.0, 3)) + @test_throws ArgumentError fit(Histogram, [-0.5], UnitRange(-0.0, 1.0)) + end end # @testset "StatsBase.Histogram" diff --git a/test/misc.jl b/test/misc.jl index 288a76f40..9f5c2b36a 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -32,65 +32,67 @@ vals, lens = rle(z) a = [1, 1, 2, 2, 2, 3, 1, 2, 2, 3, 3, 3, 3, 2] b = [true, false, false, true, false, true, true, false] -@test levelsmap(a) == Dict(2=>2, 3=>3, 1=>1) -@test levelsmap(b) == Dict(false=>2, true=>1) +@test levelsmap(a) == Dict(2 => 2, 3 => 3, 1 => 1) +@test levelsmap(b) == Dict(false => 2, true => 1) # indicatormat -II = [false true false false false; - true false false false true; - false false true true false] +II = [ + false true false false false; + true false false false true; + false false true true false +] x = [2, 1, 3, 3, 2] @test indicatormat(x, 3) == II -@test Matrix(indicatormat(x, 3; sparse=true)) == II +@test Matrix(indicatormat(x, 3; sparse = true)) == II x = ["b", "a", "c", "c", "b"] @test indicatormat(x) == II -@test Matrix(indicatormat(x; sparse=true)) == II +@test Matrix(indicatormat(x; sparse = true)) == II io = IOBuffer() describe(io, collect(1:10)) @test String(take!(io)) == """ - Summary Stats: - Length: 10 - Missing Count: 0 - Mean: 5.500000 - Std. Deviation: 3.027650 - Minimum: 1.000000 - 1st Quartile: 3.250000 - Median: 5.500000 - 3rd Quartile: 7.750000 - Maximum: 10.000000 - Type: $Int - """ + Summary Stats: + Length: 10 + Missing Count: 0 + Mean: 5.500000 + Std. Deviation: 3.027650 + Minimum: 1.000000 + 1st Quartile: 3.250000 + Median: 5.500000 + 3rd Quartile: 7.750000 + Maximum: 10.000000 + Type: $Int + """ -describe(io, Union{Float32,Missing}[1.0, 4.5, missing, missing, 33.1]) +describe(io, Union{Float32, Missing}[1.0, 4.5, missing, missing, 33.1]) @test String(take!(io)) == """ - Summary Stats: - Length: 5 - Missing Count: 2 - Mean: 12.866666 - Std. Deviation: 17.609751 - Minimum: 1.000000 - 1st Quartile: 2.750000 - Median: 4.500000 - 3rd Quartile: 18.799999 - Maximum: 33.099998 - Type: $(Union{Float32,Missing}) - """ + Summary Stats: + Length: 5 + Missing Count: 2 + Mean: 12.866666 + Std. Deviation: 17.609751 + Minimum: 1.000000 + 1st Quartile: 2.750000 + Median: 4.500000 + 3rd Quartile: 18.799999 + Maximum: 33.099998 + Type: $(Union{Float32, Missing}) + """ describe(io, Float64[]) @test String(take!(io)) == """ - Summary Stats: - Length: 0 - Type: Float64 - """ + Summary Stats: + Length: 0 + Type: Float64 + """ describe(io, fill("s", 3)) @test String(take!(io)) == """ - Summary Stats: - Length: 3 - Type: String - Number Unique: 1 - """ + Summary Stats: + Length: 3 + Type: String + Number Unique: 1 + """ diff --git a/test/moments.jl b/test/moments.jl index a3402b965..5b0572653 100644 --- a/test/moments.jl +++ b/test/moments.jl @@ -3,482 +3,482 @@ using Statistics using Test @testset "StatsBase.Moments" begin -weight_funcs = (weights, aweights, fweights, pweights) + weight_funcs = (weights, aweights, fweights, pweights) -##### weighted var & std + ##### weighted var & std -x = [0.57, 0.10, 0.91, 0.72, 0.46, 0.0] -xf0 = Float32.(x) -w = [3.84, 2.70, 8.29, 8.91, 9.71, 0.0] -wf0 = Float32.(w) + x = [0.57, 0.1, 0.91, 0.72, 0.46, 0.0] + xf0 = Float32.(x) + w = [3.84, 2.7, 8.29, 8.91, 9.71, 0.0] + wf0 = Float32.(w) -@testset "Uncorrected with $f (values of type $(eltype(x)), weights of type $(eltype(w)))" for f in weight_funcs, x in (x, xf0), w in (w, wf0) - TX = eltype(x) - T = promote_type(TX, eltype(w)) - wv = f(w) - m = @inferred(mean(x, wv))::T + @testset "Uncorrected with $f (values of type $(eltype(x)), weights of type $(eltype(w)))" for f in weight_funcs, x in (x, xf0), w in (w, wf0) + TX = eltype(x) + T = promote_type(TX, eltype(w)) + wv = f(w) + m = @inferred(mean(x, wv))::T - # expected uncorrected output - expected_var = sum(abs2.(x .- m), wv) / sum(wv) - expected_std = sqrt.(expected_var) - - @testset "Variance" begin - @test @inferred(var(x, wv; corrected=false))::T ≈ expected_var - @test @inferred(var(x, wv; mean=m, corrected=false))::T ≈ expected_var - @test @inferred(varm(x, wv, m; corrected=false))::T ≈ expected_var - end - - @testset "Standard Deviation" begin - @test @inferred(std(x, wv; corrected=false))::T ≈ expected_std - @test @inferred(std(x, wv; mean=m, corrected=false))::T ≈ expected_std - @test @inferred(stdm(x, wv, m; corrected=false))::T ≈ expected_std - end - - @testset "Mean and Variance" begin - (m, v) = @inferred(mean_and_var(x; corrected=false))::Tuple{TX,TX} - @test m == mean(x) - @test v == var(x; corrected=corrected=false) - - (m, v) = @inferred(mean_and_var(x, wv; corrected=false))::Tuple{T,T} - @test m == mean(x, wv) - @test v == var(x, wv; corrected=false) - end - - @testset "Mean and Standard Deviation" begin - (m, s) = @inferred(mean_and_std(x; corrected=false))::Tuple{TX,TX} - @test m == mean(x) - @test s == std(x; corrected=false) + # expected uncorrected output + expected_var = sum(abs2.(x .- m), wv) / sum(wv) + expected_std = sqrt.(expected_var) - (m, s) = @inferred(mean_and_std(x, wv; corrected=false))::Tuple{T,T} - @test m == mean(x, wv) - @test s == std(x, wv; corrected=false) - end -end - -# expected corrected output for (weights, aweights, fweights, pweights) -expected_var = [NaN, 0.0694434191182236, 0.05466601256158146, 0.06628969012045285] -expected_std = sqrt.(expected_var) - -@testset "Corrected with $(weight_funcs[i]) (values of type $(eltype(x)), weights of type $(eltype(w)))" for i in eachindex(weight_funcs), x in (x, xf0), w in (w, wf0) - TX = eltype(x) - TW = eltype(w) - T = promote_type(TX, TW) - TR = TX === Float32 || TW === Float32 ? Float32 : Float64 - wv = weight_funcs[i](w) - m = @inferred(mean(x, wv))::T - - @testset "Variance" begin - if isa(wv, Weights) - @test_throws ArgumentError var(x, wv; corrected=true) - else - @test @inferred(var(x, wv; corrected=true))::T ≈ TR(expected_var[i]) - @test @inferred(var(x, wv; mean=m, corrected=true))::T ≈ TR(expected_var[i]) - @test @inferred(varm(x, wv, m; corrected=true))::T ≈ TR(expected_var[i]) + @testset "Variance" begin + @test @inferred(var(x, wv; corrected = false))::T ≈ expected_var + @test @inferred(var(x, wv; mean = m, corrected = false))::T ≈ expected_var + @test @inferred(varm(x, wv, m; corrected = false))::T ≈ expected_var end - end - @testset "Standard Deviation" begin - if isa(wv, Weights) - @test_throws ArgumentError std(x, wv; corrected=true) - else - @test @inferred(std(x, wv; corrected=true))::T ≈ TR(expected_std[i]) - @test @inferred(std(x, wv; mean=m, corrected=true))::T ≈ TR(expected_std[i]) - @test @inferred(stdm(x, wv, m; corrected=true))::T ≈ TR(expected_std[i]) + @testset "Standard Deviation" begin + @test @inferred(std(x, wv; corrected = false))::T ≈ expected_std + @test @inferred(std(x, wv; mean = m, corrected = false))::T ≈ expected_std + @test @inferred(stdm(x, wv, m; corrected = false))::T ≈ expected_std end - end - @testset "Mean and Variance" begin - (m, v) = @inferred(mean_and_var(x; corrected=true))::Tuple{TX,TX} - @test m == mean(x) - @test v == var(x; corrected=true) + @testset "Mean and Variance" begin + (m, v) = @inferred(mean_and_var(x; corrected = false))::Tuple{TX, TX} + @test m == mean(x) + @test v == var(x; corrected = corrected = false) - if isa(wv, Weights) - @test_throws ArgumentError mean_and_var(x, wv; corrected=true) - else - (m, v) = @inferred(mean_and_var(x, wv; corrected=true))::Tuple{T,T} + (m, v) = @inferred(mean_and_var(x, wv; corrected = false))::Tuple{T, T} @test m == mean(x, wv) - @test v == var(x, wv; corrected=true) + @test v == var(x, wv; corrected = false) end - end - @testset "Mean and Standard Deviation" begin - (m, s) = @inferred(mean_and_std(x; corrected=true))::Tuple{TX,TX} - @test m == mean(x) - @test s == std(x; corrected=true) + @testset "Mean and Standard Deviation" begin + (m, s) = @inferred(mean_and_std(x; corrected = false))::Tuple{TX, TX} + @test m == mean(x) + @test s == std(x; corrected = false) - if isa(wv, Weights) - @test_throws ArgumentError mean_and_std(x, wv; corrected=true) - else - (m, s) = @inferred(mean_and_std(x, wv; corrected=true))::Tuple{T,T} + (m, s) = @inferred(mean_and_std(x, wv; corrected = false))::Tuple{T, T} @test m == mean(x, wv) - @test s == std(x, wv; corrected=true) + @test s == std(x, wv; corrected = false) end end -end - -x = rand(5, 6) -xf0 = Float32.(x) -w1 = [0.57, 5.10, 0.91, 1.72, 0.0] -w1f0 = Float32.(w1) -w2 = [3.84, 2.70, 8.29, 8.91, 9.71, 0.0] -w2f0 = Float32.(w2) - -@testset "Uncorrected with $f (values of type $(eltype(x)), 1st weights of type $(eltype(w1)), 2nd weights of type $(eltype(w2)))" for f in weight_funcs, x in (x, xf0), w1 in (w1, w1f0), w2 in (w2, w2f0) - TX = eltype(x) - TW1 = eltype(w1) - TW2 = eltype(w2) - T1 = promote_type(TX, TW1) - T2 = promote_type(TX, TW2) - wv1 = f(w1) - wv2 = f(w2) - m1 = @inferred(mean(x, wv1, dims=1))::Matrix{T1} - m2 = @inferred(mean(x, wv2, dims=2))::Matrix{T2} - - expected_var1 = sum(abs2.(x .- m1) .* w1, dims = 1) ./ sum(wv1) - expected_var2 = sum(abs2.(x .- m2) .* w2', dims = 2) ./ sum(wv2) - expected_std1 = sqrt.(expected_var1) - expected_std2 = sqrt.(expected_var2) - - @testset "Variance" begin - @test @inferred(var(x, wv1, 1; corrected=false))::Matrix{T1} ≈ expected_var1 - @test @inferred(var(x, wv2, 2; corrected=false))::Matrix{T2} ≈ expected_var2 - @test @inferred(var(x, wv1, 1; mean=m1, corrected=false))::Matrix{T1} ≈ expected_var1 - @test @inferred(var(x, wv2, 2; mean=m2, corrected=false))::Matrix{T2} ≈ expected_var2 - @test @inferred(varm(x, wv1, m1, 1; corrected=false))::Matrix{T1} ≈ expected_var1 - @test @inferred(varm(x, wv2, m2, 2; corrected=false))::Matrix{T2} ≈ expected_var2 - end - @testset "Standard Deviation" begin - @test @inferred(std(x, wv1, 1; corrected=false))::Matrix{T1} ≈ expected_std1 - @test @inferred(std(x, wv2, 2; corrected=false))::Matrix{T2} ≈ expected_std2 - @test @inferred(std(x, wv1, 1; mean=m1, corrected=false))::Matrix{T1} ≈ expected_std1 - @test @inferred(std(x, wv2, 2; mean=m2, corrected=false))::Matrix{T2} ≈ expected_std2 - @test @inferred(stdm(x, wv1, m1, 1; corrected=false))::Matrix{T1} ≈ expected_std1 - @test @inferred(stdm(x, wv2, m2, 2; corrected=false))::Matrix{T2} ≈ expected_std2 - end + # expected corrected output for (weights, aweights, fweights, pweights) + expected_var = [NaN, 0.0694434191182236, 0.05466601256158146, 0.06628969012045285] + expected_std = sqrt.(expected_var) - @testset "Mean and Variance" begin - for d in 1:2 - (m, v) = @inferred(mean_and_var(x, d; corrected=false))::Tuple{Matrix{TX},Matrix{TX}} - @test m == mean(x, dims=d) - @test v == var(x, dims=d, corrected=false) + @testset "Corrected with $(weight_funcs[i]) (values of type $(eltype(x)), weights of type $(eltype(w)))" for i in eachindex(weight_funcs), x in (x, xf0), w in (w, wf0) + TX = eltype(x) + TW = eltype(w) + T = promote_type(TX, TW) + TR = TX === Float32 || TW === Float32 ? Float32 : Float64 + wv = weight_funcs[i](w) + m = @inferred(mean(x, wv))::T + + @testset "Variance" begin + if isa(wv, Weights) + @test_throws ArgumentError var(x, wv; corrected = true) + else + @test @inferred(var(x, wv; corrected = true))::T ≈ TR(expected_var[i]) + @test @inferred(var(x, wv; mean = m, corrected = true))::T ≈ TR(expected_var[i]) + @test @inferred(varm(x, wv, m; corrected = true))::T ≈ TR(expected_var[i]) + end end - (m, v) = @inferred(mean_and_var(x, wv1, 1; corrected=false))::Tuple{Matrix{T1},Matrix{T1}} - @test m == mean(x, wv1, dims=1) - @test v == var(x, wv1, 1; corrected=false) + @testset "Standard Deviation" begin + if isa(wv, Weights) + @test_throws ArgumentError std(x, wv; corrected = true) + else + @test @inferred(std(x, wv; corrected = true))::T ≈ TR(expected_std[i]) + @test @inferred(std(x, wv; mean = m, corrected = true))::T ≈ TR(expected_std[i]) + @test @inferred(stdm(x, wv, m; corrected = true))::T ≈ TR(expected_std[i]) + end + end - (m, v) = @inferred(mean_and_var(x, wv2, 2; corrected=false))::Tuple{Matrix{T2},Matrix{T2}} - @test m == mean(x, wv2, dims=2) - @test v == var(x, wv2, 2; corrected=false) - end + @testset "Mean and Variance" begin + (m, v) = @inferred(mean_and_var(x; corrected = true))::Tuple{TX, TX} + @test m == mean(x) + @test v == var(x; corrected = true) - @testset "Mean and Standard Deviation" begin - for d in 1:2 - (m, s) = @inferred(mean_and_std(x, d; corrected=false))::Tuple{Matrix{TX},Matrix{TX}} - @test m == mean(x, dims=d) - @test s == std(x, dims=d; corrected=false) + if isa(wv, Weights) + @test_throws ArgumentError mean_and_var(x, wv; corrected = true) + else + (m, v) = @inferred(mean_and_var(x, wv; corrected = true))::Tuple{T, T} + @test m == mean(x, wv) + @test v == var(x, wv; corrected = true) + end end - (m, s) = @inferred(mean_and_std(x, wv1, 1; corrected=false))::Tuple{Matrix{T1},Matrix{T1}} - @test m == mean(x, wv1, dims=1) - @test s == std(x, wv1, 1; corrected=false) + @testset "Mean and Standard Deviation" begin + (m, s) = @inferred(mean_and_std(x; corrected = true))::Tuple{TX, TX} + @test m == mean(x) + @test s == std(x; corrected = true) - (m, s) = @inferred(mean_and_std(x, wv2, 2; corrected=false))::Tuple{Matrix{T2},Matrix{T2}} - @test m == mean(x, wv2, dims=2) - @test s == std(x, wv2, 2; corrected=false) + if isa(wv, Weights) + @test_throws ArgumentError mean_and_std(x, wv; corrected = true) + else + (m, s) = @inferred(mean_and_std(x, wv; corrected = true))::Tuple{T, T} + @test m == mean(x, wv) + @test s == std(x, wv; corrected = true) + end + end end -end - -@testset "Corrected with $f (values of type $(eltype(x)), weights of type $(eltype(w1)))" for f in weight_funcs, x in (Float32.(x), Float64.(x)), (w1, w2) in ((Float32.(w1), Float32.(w2)), (Float64.(w1), Float64.(w2))) - TX = eltype(x) - TW1 = eltype(w1) - TW2 = eltype(w2) - T1 = promote_type(TX, TW1) - T2 = promote_type(TX, TW2) - wv1 = f(w1) - wv2 = f(w2) - m1 = @inferred(mean(x, wv1, dims=1))::Matrix{T1} - m2 = @inferred(mean(x, wv2, dims=2))::Matrix{T2} - - if !isa(wv1, Weights) - expected_var1 = sum(abs2.(x .- m1) .* w1, dims = 1) .* StatsBase.varcorrection(wv1, true) - expected_var2 = sum(abs2.(x .- m2) .* w2', dims = 2) .* StatsBase.varcorrection(wv2, true) + + x = rand(5, 6) + xf0 = Float32.(x) + w1 = [0.57, 5.1, 0.91, 1.72, 0.0] + w1f0 = Float32.(w1) + w2 = [3.84, 2.7, 8.29, 8.91, 9.71, 0.0] + w2f0 = Float32.(w2) + + @testset "Uncorrected with $f (values of type $(eltype(x)), 1st weights of type $(eltype(w1)), 2nd weights of type $(eltype(w2)))" for f in weight_funcs, x in (x, xf0), w1 in (w1, w1f0), w2 in (w2, w2f0) + TX = eltype(x) + TW1 = eltype(w1) + TW2 = eltype(w2) + T1 = promote_type(TX, TW1) + T2 = promote_type(TX, TW2) + wv1 = f(w1) + wv2 = f(w2) + m1 = @inferred(mean(x, wv1, dims = 1))::Matrix{T1} + m2 = @inferred(mean(x, wv2, dims = 2))::Matrix{T2} + + expected_var1 = sum(abs2.(x .- m1) .* w1, dims = 1) ./ sum(wv1) + expected_var2 = sum(abs2.(x .- m2) .* w2', dims = 2) ./ sum(wv2) expected_std1 = sqrt.(expected_var1) expected_std2 = sqrt.(expected_var2) - end - @testset "Variance" begin - if isa(wv1, Weights) - @test_throws ArgumentError var(x, wv1, 1; corrected=true) - else - @test @inferred(var(x, wv1, 1; corrected=true))::Matrix{T1} ≈ expected_var1 - @test @inferred(var(x, wv2, 2; corrected=true))::Matrix{T2} ≈ expected_var2 - @test @inferred(var(x, wv1, 1; mean=m1, corrected=true))::Matrix{T1} ≈ expected_var1 - @test @inferred(var(x, wv2, 2; mean=m2, corrected=true))::Matrix{T2} ≈ expected_var2 - @test @inferred(varm(x, wv1, m1, 1; corrected=true))::Matrix{T1} ≈ expected_var1 - @test @inferred(varm(x, wv2, m2, 2; corrected=true))::Matrix{T2} ≈ expected_var2 + @testset "Variance" begin + @test @inferred(var(x, wv1, 1; corrected = false))::Matrix{T1} ≈ expected_var1 + @test @inferred(var(x, wv2, 2; corrected = false))::Matrix{T2} ≈ expected_var2 + @test @inferred(var(x, wv1, 1; mean = m1, corrected = false))::Matrix{T1} ≈ expected_var1 + @test @inferred(var(x, wv2, 2; mean = m2, corrected = false))::Matrix{T2} ≈ expected_var2 + @test @inferred(varm(x, wv1, m1, 1; corrected = false))::Matrix{T1} ≈ expected_var1 + @test @inferred(varm(x, wv2, m2, 2; corrected = false))::Matrix{T2} ≈ expected_var2 end - end - @testset "Standard Deviation" begin - if isa(wv1, Weights) - @test_throws ArgumentError std(x, wv1, 1; corrected=true) - else - @test @inferred(std(x, wv1, 1; corrected=true))::Matrix{T1} ≈ expected_std1 - @test @inferred(std(x, wv2, 2; corrected=true))::Matrix{T2} ≈ expected_std2 - @test @inferred(std(x, wv1, 1; mean=m1, corrected=true))::Matrix{T1} ≈ expected_std1 - @test @inferred(std(x, wv2, 2; mean=m2, corrected=true))::Matrix{T2} ≈ expected_std2 - @test @inferred(stdm(x, wv1, m1, 1; corrected=true))::Matrix{T1} ≈ expected_std1 - @test @inferred(stdm(x, wv2, m2, 2; corrected=true))::Matrix{T2} ≈ expected_std2 + @testset "Standard Deviation" begin + @test @inferred(std(x, wv1, 1; corrected = false))::Matrix{T1} ≈ expected_std1 + @test @inferred(std(x, wv2, 2; corrected = false))::Matrix{T2} ≈ expected_std2 + @test @inferred(std(x, wv1, 1; mean = m1, corrected = false))::Matrix{T1} ≈ expected_std1 + @test @inferred(std(x, wv2, 2; mean = m2, corrected = false))::Matrix{T2} ≈ expected_std2 + @test @inferred(stdm(x, wv1, m1, 1; corrected = false))::Matrix{T1} ≈ expected_std1 + @test @inferred(stdm(x, wv2, m2, 2; corrected = false))::Matrix{T2} ≈ expected_std2 end - end - @testset "Mean and Variance" begin - for d in 1:2 - (m, v) = @inferred(mean_and_var(x, d; corrected=true))::Tuple{Matrix{TX},Matrix{TX}} - @test m == mean(x, dims=d) - @test v == var(x, dims=d, corrected=true) + @testset "Mean and Variance" begin + for d in 1:2 + (m, v) = @inferred(mean_and_var(x, d; corrected = false))::Tuple{Matrix{TX}, Matrix{TX}} + @test m == mean(x, dims = d) + @test v == var(x, dims = d, corrected = false) + end + + (m, v) = @inferred(mean_and_var(x, wv1, 1; corrected = false))::Tuple{Matrix{T1}, Matrix{T1}} + @test m == mean(x, wv1, dims = 1) + @test v == var(x, wv1, 1; corrected = false) + + (m, v) = @inferred(mean_and_var(x, wv2, 2; corrected = false))::Tuple{Matrix{T2}, Matrix{T2}} + @test m == mean(x, wv2, dims = 2) + @test v == var(x, wv2, 2; corrected = false) end - if isa(wv1, Weights) - @test_throws ArgumentError mean_and_var(x, wv1, 1; corrected=true) - else - (m, v) = @inferred(mean_and_var(x, wv1, 1; corrected=true))::Tuple{Matrix{T1},Matrix{T1}} - @test m == mean(x, wv1, dims=1) - @test v == var(x, wv1, 1; corrected=true) + @testset "Mean and Standard Deviation" begin + for d in 1:2 + (m, s) = @inferred(mean_and_std(x, d; corrected = false))::Tuple{Matrix{TX}, Matrix{TX}} + @test m == mean(x, dims = d) + @test s == std(x, dims = d; corrected = false) + end + + (m, s) = @inferred(mean_and_std(x, wv1, 1; corrected = false))::Tuple{Matrix{T1}, Matrix{T1}} + @test m == mean(x, wv1, dims = 1) + @test s == std(x, wv1, 1; corrected = false) - (m, v) = @inferred(mean_and_var(x, wv2, 2; corrected=true))::Tuple{Matrix{T2},Matrix{T2}} - @test m == mean(x, wv2, dims=2) - @test v == var(x, wv2, 2; corrected=true) + (m, s) = @inferred(mean_and_std(x, wv2, 2; corrected = false))::Tuple{Matrix{T2}, Matrix{T2}} + @test m == mean(x, wv2, dims = 2) + @test s == std(x, wv2, 2; corrected = false) end end - @testset "Mean and Standard Deviation" begin - for d in 1:2 - (m, s) = @inferred(mean_and_std(x, d; corrected=true))::Tuple{Matrix{TX},Matrix{TX}} - @test m == mean(x, dims=d) - @test s == std(x, dims=d, corrected=true) + @testset "Corrected with $f (values of type $(eltype(x)), weights of type $(eltype(w1)))" for f in weight_funcs, x in (Float32.(x), Float64.(x)), (w1, w2) in ((Float32.(w1), Float32.(w2)), (Float64.(w1), Float64.(w2))) + TX = eltype(x) + TW1 = eltype(w1) + TW2 = eltype(w2) + T1 = promote_type(TX, TW1) + T2 = promote_type(TX, TW2) + wv1 = f(w1) + wv2 = f(w2) + m1 = @inferred(mean(x, wv1, dims = 1))::Matrix{T1} + m2 = @inferred(mean(x, wv2, dims = 2))::Matrix{T2} + + if !isa(wv1, Weights) + expected_var1 = sum(abs2.(x .- m1) .* w1, dims = 1) .* StatsBase.varcorrection(wv1, true) + expected_var2 = sum(abs2.(x .- m2) .* w2', dims = 2) .* StatsBase.varcorrection(wv2, true) + expected_std1 = sqrt.(expected_var1) + expected_std2 = sqrt.(expected_var2) end - if isa(wv1, Weights) - @test_throws ArgumentError mean_and_std(x, wv1, 1; corrected=true) - else - (m, s) = @inferred(mean_and_std(x, wv1, 1; corrected=true))::Tuple{Matrix{T1},Matrix{T1}} - @test m == mean(x, wv1, dims=1) - @test s == std(x, wv1, 1; corrected=true) + @testset "Variance" begin + if isa(wv1, Weights) + @test_throws ArgumentError var(x, wv1, 1; corrected = true) + else + @test @inferred(var(x, wv1, 1; corrected = true))::Matrix{T1} ≈ expected_var1 + @test @inferred(var(x, wv2, 2; corrected = true))::Matrix{T2} ≈ expected_var2 + @test @inferred(var(x, wv1, 1; mean = m1, corrected = true))::Matrix{T1} ≈ expected_var1 + @test @inferred(var(x, wv2, 2; mean = m2, corrected = true))::Matrix{T2} ≈ expected_var2 + @test @inferred(varm(x, wv1, m1, 1; corrected = true))::Matrix{T1} ≈ expected_var1 + @test @inferred(varm(x, wv2, m2, 2; corrected = true))::Matrix{T2} ≈ expected_var2 + end + end - (m, s) = @inferred(mean_and_std(x, wv2, 2; corrected=true))::Tuple{Matrix{T2},Matrix{T2}} - @test m == mean(x, wv2, dims=2) - @test s == std(x, wv2, 2; corrected=true) + @testset "Standard Deviation" begin + if isa(wv1, Weights) + @test_throws ArgumentError std(x, wv1, 1; corrected = true) + else + @test @inferred(std(x, wv1, 1; corrected = true))::Matrix{T1} ≈ expected_std1 + @test @inferred(std(x, wv2, 2; corrected = true))::Matrix{T2} ≈ expected_std2 + @test @inferred(std(x, wv1, 1; mean = m1, corrected = true))::Matrix{T1} ≈ expected_std1 + @test @inferred(std(x, wv2, 2; mean = m2, corrected = true))::Matrix{T2} ≈ expected_std2 + @test @inferred(stdm(x, wv1, m1, 1; corrected = true))::Matrix{T1} ≈ expected_std1 + @test @inferred(stdm(x, wv2, m2, 2; corrected = true))::Matrix{T2} ≈ expected_std2 + end end - end -end -@testset "Skewness and Kurtosis with $f" for f in weight_funcs - for T in (Int, Float32, Float64) - for v in (T(1):T(5), collect(T, 1:5)) - s = @inferred(skewness(v)) - @test s isa float(T) - @test iszero(s) + @testset "Mean and Variance" begin + for d in 1:2 + (m, v) = @inferred(mean_and_var(x, d; corrected = true))::Tuple{Matrix{TX}, Matrix{TX}} + @test m == mean(x, dims = d) + @test v == var(x, dims = d, corrected = true) + end - k = @inferred(kurtosis(v)) - @test k isa float(T) - @test k ≈ oftype(k, -1.3) + if isa(wv1, Weights) + @test_throws ArgumentError mean_and_var(x, wv1, 1; corrected = true) + else + (m, v) = @inferred(mean_and_var(x, wv1, 1; corrected = true))::Tuple{Matrix{T1}, Matrix{T1}} + @test m == mean(x, wv1, dims = 1) + @test v == var(x, wv1, 1; corrected = true) + + (m, v) = @inferred(mean_and_var(x, wv2, 2; corrected = true))::Tuple{Matrix{T2}, Matrix{T2}} + @test m == mean(x, wv2, dims = 2) + @test v == var(x, wv2, 2; corrected = true) + end + end + + @testset "Mean and Standard Deviation" begin + for d in 1:2 + (m, s) = @inferred(mean_and_std(x, d; corrected = true))::Tuple{Matrix{TX}, Matrix{TX}} + @test m == mean(x, dims = d) + @test s == std(x, dims = d, corrected = true) + end + + if isa(wv1, Weights) + @test_throws ArgumentError mean_and_std(x, wv1, 1; corrected = true) + else + (m, s) = @inferred(mean_and_std(x, wv1, 1; corrected = true))::Tuple{Matrix{T1}, Matrix{T1}} + @test m == mean(x, wv1, dims = 1) + @test s == std(x, wv1, 1; corrected = true) + + (m, s) = @inferred(mean_and_std(x, wv2, 2; corrected = true))::Tuple{Matrix{T2}, Matrix{T2}} + @test m == mean(x, wv2, dims = 2) + @test s == std(x, wv2, 2; corrected = true) + end end + end + + @testset "Skewness and Kurtosis with $f" for f in weight_funcs + for T in (Int, Float32, Float64) + for v in (T(1):T(5), collect(T, 1:5)) + s = @inferred(skewness(v)) + @test s isa float(T) + @test iszero(s) + + k = @inferred(kurtosis(v)) + @test k isa float(T) + @test k ≈ oftype(k, -1.3) + end - v = T[1, 2, 2, 2, 5] - s = @inferred(skewness(v)) - @test s isa float(T) - @test s ≈ oftype(s, 1.1731251294063556) - - v = T[1, 4, 4, 4, 5] - s = @inferred(skewness(v)) - @test s isa float(T) - @test s ≈ oftype(s, -1.1731251294063556) - - v = T[1, 2, 3, 3, 2] - k = @inferred(kurtosis(v)) - @test k isa float(T) - @test k ≈ oftype(k, -1.1530612244897953) - - # Empty arrays - s = @inferred(skewness(T[])) - @test s isa float(T) - @test isnan(s) - k = @inferred(kurtosis(T[])) - @test k isa float(T) - @test isnan(k) - - for T2 in (Int, Float32, Float64) - wv = f(fill(T2(2), 5)) v = T[1, 2, 2, 2, 5] - s = @inferred(skewness(v, wv)) - @test s isa float(promote_type(T, T2)) + s = @inferred(skewness(v)) + @test s isa float(T) @test s ≈ oftype(s, 1.1731251294063556) - v = collect(T, 1:5) - k = @inferred(kurtosis(v, wv)) - @test k isa float(promote_type(T, T2)) - @test k ≈ oftype(k, -1.3) + v = T[1, 4, 4, 4, 5] + s = @inferred(skewness(v)) + @test s isa float(T) + @test s ≈ oftype(s, -1.1731251294063556) + + v = T[1, 2, 3, 3, 2] + k = @inferred(kurtosis(v)) + @test k isa float(T) + @test k ≈ oftype(k, -1.1530612244897953) # Empty arrays - wv = f(T2[]) - s = @inferred(skewness(T[], wv)) - @test s isa float(promote_type(T, T2)) + s = @inferred(skewness(T[])) + @test s isa float(T) @test isnan(s) - k = @inferred(kurtosis(T[], wv)) - @test k isa float(promote_type(T, T2)) + k = @inferred(kurtosis(T[])) + @test k isa float(T) @test isnan(k) - end - - # Invalid arguments - v = collect(T, 1:5) - for n in (length(x) - 1, length(x) + 1) - @test_throws DimensionMismatch("Inconsistent array lengths.") kurtosis(v, f(ones(T, n))) - @test_throws DimensionMismatch("Inconsistent array lengths.") skewness(v, f(ones(T, n))) - end - end -end -@testset "General Moments with $f" for f in weight_funcs - for T in (Int, Float32, Float64) - x = collect(T, 2:8) - for k in 2:5 - momk = @inferred(moment(x, k)) - @test momk isa float(T) - @test momk ≈ sum((x .- 5).^k) / length(x) - - # Empty array - momk = @inferred(moment(T[], k)) - @test momk isa float(T) - @test isnan(momk) - - for TM in (Int, Float32, Float64) - m = TM(4) - momk = @inferred(moment(x, k, m)) - @test momk isa float(promote_type(T, TM)) - @test momk ≈ sum((x .- 4).^k) / length(x) + for T2 in (Int, Float32, Float64) + wv = f(fill(T2(2), 5)) + v = T[1, 2, 2, 2, 5] + s = @inferred(skewness(v, wv)) + @test s isa float(promote_type(T, T2)) + @test s ≈ oftype(s, 1.1731251294063556) + + v = collect(T, 1:5) + k = @inferred(kurtosis(v, wv)) + @test k isa float(promote_type(T, T2)) + @test k ≈ oftype(k, -1.3) + + # Empty arrays + wv = f(T2[]) + s = @inferred(skewness(T[], wv)) + @test s isa float(promote_type(T, T2)) + @test isnan(s) + k = @inferred(kurtosis(T[], wv)) + @test k isa float(promote_type(T, T2)) + @test isnan(k) + end - # Empty array - momk = @inferred(moment(T[], k, zero(TM))) - @test momk isa float(promote_type(T, TM)) - @test isnan(momk) + # Invalid arguments + v = collect(T, 1:5) + for n in (length(x) - 1, length(x) + 1) + @test_throws DimensionMismatch("Inconsistent array lengths.") kurtosis(v, f(ones(T, n))) + @test_throws DimensionMismatch("Inconsistent array lengths.") skewness(v, f(ones(T, n))) end end + end - for T2 in (Int, Float32, Float64) - wv = f(T2[1, 1, 1, 1, 1, 0, 0]) - x2 = collect(T, 2:6) + @testset "General Moments with $f" for f in weight_funcs + for T in (Int, Float32, Float64) + x = collect(T, 2:8) for k in 2:5 - momk = @inferred(moment(x, k, wv)) - @test momk isa float(promote_type(T, T2)) - @test momk ≈ sum((x2 .- 4).^k) / 5 + momk = @inferred(moment(x, k)) + @test momk isa float(T) + @test momk ≈ sum((x .- 5) .^ k) / length(x) # Empty array - momk = @inferred(moment(T[], k, f(T2[]))) - @test momk isa float(promote_type(T, T2)) + momk = @inferred(moment(T[], k)) + @test momk isa float(T) @test isnan(momk) for TM in (Int, Float32, Float64) - m = TM(3) - momk = @inferred(moment(x, k, wv, m)) - @test momk isa float(promote_type(T, T2, TM)) - @test momk ≈ sum((x2 .- 3).^k) / 5 + m = TM(4) + momk = @inferred(moment(x, k, m)) + @test momk isa float(promote_type(T, TM)) + @test momk ≈ sum((x .- 4) .^ k) / length(x) # Empty array - momk = @inferred(moment(T[], k, f(T2[]), zero(TM))) - @test momk isa float(promote_type(T, T2, TM)) + momk = @inferred(moment(T[], k, zero(TM))) + @test momk isa float(promote_type(T, TM)) @test isnan(momk) end end - end - end -end - -@testset "Cumulants with $f" for f in weight_funcs - for T in (Int, Float32, Float64) - x = collect(T, 2:8) - for k in 1:6 - cumk = @inferred(cumulant(x, k)) - @test cumk isa float(T) - if k == 1 - @test cumk ≈ mean(x) - elseif k == 2 || k == 3 - @test cumk ≈ moment(x, k) - elseif k == 4 - @test cumk ≈ moment(x, 4) - 3*moment(x, 2)^2 - elseif k == 5 - @test cumk ≈ moment(x, 5) - 10*moment(x, 3)*moment(x, 2) - else - @assert k == 6 - @test cumk ≈ moment(x, 6) - 15*moment(x, 4)*moment(x, 2) - 10*moment(x, 3)^2 + 30*moment(x, 2)^3 + + for T2 in (Int, Float32, Float64) + wv = f(T2[1, 1, 1, 1, 1, 0, 0]) + x2 = collect(T, 2:6) + for k in 2:5 + momk = @inferred(moment(x, k, wv)) + @test momk isa float(promote_type(T, T2)) + @test momk ≈ sum((x2 .- 4) .^ k) / 5 + + # Empty array + momk = @inferred(moment(T[], k, f(T2[]))) + @test momk isa float(promote_type(T, T2)) + @test isnan(momk) + + for TM in (Int, Float32, Float64) + m = TM(3) + momk = @inferred(moment(x, k, wv, m)) + @test momk isa float(promote_type(T, T2, TM)) + @test momk ≈ sum((x2 .- 3) .^ k) / 5 + + # Empty array + momk = @inferred(moment(T[], k, f(T2[]), zero(TM))) + @test momk isa float(promote_type(T, T2, TM)) + @test isnan(momk) + end + end end end - cumks = @inferred(cumulant(x, 1:6)) - @test cumks isa Vector{float(T)} - @test cumks == [cumulant(x, i) for i in 1:6] + end - for TM in (Int, Float32, Float64) - m = TM(4) + @testset "Cumulants with $f" for f in weight_funcs + for T in (Int, Float32, Float64) + x = collect(T, 2:8) for k in 1:6 - cumk = @inferred(cumulant(x, k, m)) - @test cumk isa float(promote_type(T, TM)) + cumk = @inferred(cumulant(x, k)) + @test cumk isa float(T) if k == 1 - @test cumk ≈ m + @test cumk ≈ mean(x) elseif k == 2 || k == 3 - @test cumk ≈ moment(x, k, m) + @test cumk ≈ moment(x, k) elseif k == 4 - @test cumk ≈ moment(x, 4, m) - 3*moment(x, 2, m)^2 + @test cumk ≈ moment(x, 4) - 3 * moment(x, 2)^2 elseif k == 5 - @test cumk ≈ moment(x, 5, m) - 10*moment(x, 3, m)*moment(x, 2, m) + @test cumk ≈ moment(x, 5) - 10 * moment(x, 3) * moment(x, 2) else @assert k == 6 - @test cumk ≈ moment(x, 6, m) - 15*moment(x, 4, m)*moment(x, 2, m) - 10*moment(x, 3, m)^2 + 30*moment(x, 2, m)^3 + @test cumk ≈ moment(x, 6) - 15 * moment(x, 4) * moment(x, 2) - 10 * moment(x, 3)^2 + 30 * moment(x, 2)^3 end end - cumks = @inferred(cumulant(x, 1:6, m)) - @test cumks isa Vector{float(promote_type(T, TM))} - @test cumks == [cumulant(x, i, m) for i in 1:6] - end + cumks = @inferred(cumulant(x, 1:6)) + @test cumks isa Vector{float(T)} + @test cumks == [cumulant(x, i) for i in 1:6] - for T2 in (Int, Float32, Float64) - wv = f(T2[1, 1, 1, 1, 1, 0, 0]) - x2 = collect(T, 2:6) - for k in 1:6 - cumk = @inferred(cumulant(x, k, wv)) - @test cumk isa float(promote_type(T, T2)) - @test cumk ≈ cumulant(x2, k) rtol = cbrt(eps(typeof(cumk))) + for TM in (Int, Float32, Float64) + m = TM(4) + for k in 1:6 + cumk = @inferred(cumulant(x, k, m)) + @test cumk isa float(promote_type(T, TM)) + if k == 1 + @test cumk ≈ m + elseif k == 2 || k == 3 + @test cumk ≈ moment(x, k, m) + elseif k == 4 + @test cumk ≈ moment(x, 4, m) - 3 * moment(x, 2, m)^2 + elseif k == 5 + @test cumk ≈ moment(x, 5, m) - 10 * moment(x, 3, m) * moment(x, 2, m) + else + @assert k == 6 + @test cumk ≈ moment(x, 6, m) - 15 * moment(x, 4, m) * moment(x, 2, m) - 10 * moment(x, 3, m)^2 + 30 * moment(x, 2, m)^3 + end + end + cumks = @inferred(cumulant(x, 1:6, m)) + @test cumks isa Vector{float(promote_type(T, TM))} + @test cumks == [cumulant(x, i, m) for i in 1:6] end - cumks = @inferred(cumulant(x, 1:6, wv)) - @test cumks isa Vector{float(promote_type(T, T2))} - @test cumks == [cumulant(x, i, wv) for i in 1:6] - for TM in (Int, Float32, Float64) - m = TM(3) + for T2 in (Int, Float32, Float64) + wv = f(T2[1, 1, 1, 1, 1, 0, 0]) + x2 = collect(T, 2:6) for k in 1:6 - cumk = @inferred(cumulant(x, k, wv, m)) - @test cumk isa float(promote_type(T, T2, TM)) - @test cumk ≈ cumulant(x2, k, m) rtol = cbrt(eps(typeof(cumk))) + cumk = @inferred(cumulant(x, k, wv)) + @test cumk isa float(promote_type(T, T2)) + @test cumk ≈ cumulant(x2, k) rtol = cbrt(eps(typeof(cumk))) + end + cumks = @inferred(cumulant(x, 1:6, wv)) + @test cumks isa Vector{float(promote_type(T, T2))} + @test cumks == [cumulant(x, i, wv) for i in 1:6] + + for TM in (Int, Float32, Float64) + m = TM(3) + for k in 1:6 + cumk = @inferred(cumulant(x, k, wv, m)) + @test cumk isa float(promote_type(T, T2, TM)) + @test cumk ≈ cumulant(x2, k, m) rtol = cbrt(eps(typeof(cumk))) + end + cumks = @inferred(cumulant(x, 1:6, wv, m)) + @test cumks isa Vector{float(promote_type(T, T2, TM))} + @test cumks == [cumulant(x, i, wv, m) for i in 1:6] end - cumks = @inferred(cumulant(x, 1:6, wv, m)) - @test cumks isa Vector{float(promote_type(T, T2, TM))} - @test cumks == [cumulant(x, i, wv, m) for i in 1:6] end - end - # Invalid arguments - @test_throws ArgumentError cumulant(x, -1) - @test_throws ArgumentError cumulant(x, 0) - @test_throws ArgumentError cumulant(x, 0:3) - @test_throws ArgumentError cumulant(x, -1:3) - @test_throws ArgumentError cumulant(x, 1:0) + # Invalid arguments + @test_throws ArgumentError cumulant(x, -1) + @test_throws ArgumentError cumulant(x, 0) + @test_throws ArgumentError cumulant(x, 0:3) + @test_throws ArgumentError cumulant(x, -1:3) + @test_throws ArgumentError cumulant(x, 1:0) - for n in (length(x) - 1, length(x) + 1), krange in (1, 1:3) - @test_throws DimensionMismatch("Inconsistent array lengths.") cumulant(x, krange, f(ones(n))) - @test_throws DimensionMismatch("Inconsistent array lengths.") cumulant(x, krange, f(ones(n)), 0.0) + for n in (length(x) - 1, length(x) + 1), krange in (1, 1:3) + @test_throws DimensionMismatch("Inconsistent array lengths.") cumulant(x, krange, f(ones(n))) + @test_throws DimensionMismatch("Inconsistent array lengths.") cumulant(x, krange, f(ones(n)), 0.0) + end end end -end end # @testset "StatsBase.Moments" diff --git a/test/pairwise.jl b/test/pairwise.jl index aad724660..c492f05da 100644 --- a/test/pairwise.jl +++ b/test/pairwise.jl @@ -33,14 +33,16 @@ arbitrary_fun(x, y) = cor(x, y) res2 = zeros(AbstractFloat, size(res)) @test pairwise!(f, res2, Any[[1.0, 2.0, 3.0], [1.0f0, 3.0f0, 10.5f0]]) === res2 @test res == res2 == - [f(xi, yi) for xi in ([1.0, 2.0, 3.0], [1.0f0, 3.0f0, 10.5f0]), - yi in ([1.0, 2.0, 3.0], [1.0f0, 3.0f0, 10.5f0])] + [ + f(xi, yi) for xi in ([1.0, 2.0, 3.0], [1.0f0, 3.0f0, 10.5f0]), + yi in ([1.0, 2.0, 3.0], [1.0f0, 3.0f0, 10.5f0]) + ] @test res isa Matrix{Float64} @inferred pairwise(f, x, y) - @test_throws Union{ArgumentError,MethodError} pairwise(f, [Int[]], [Int[]]) - @test_throws Union{ArgumentError,MethodError} pairwise!(f, zeros(1, 1), [Int[]], [Int[]]) + @test_throws Union{ArgumentError, MethodError} pairwise(f, [Int[]], [Int[]]) + @test_throws Union{ArgumentError, MethodError} pairwise!(f, zeros(1, 1), [Int[]], [Int[]]) res = pairwise(f, [], []) @test size(res) == (0, 0) @@ -68,8 +70,10 @@ arbitrary_fun(x, y) = cor(x, y) @test_throws DimensionMismatch pairwise!(f, zeros(1, 2), x, y) @test_throws DimensionMismatch pairwise!(f, zeros(1, 2), [], []) - @test_throws DimensionMismatch pairwise!(f, zeros(0, 0), - [], [[1, 2], [2, 3]]) + @test_throws DimensionMismatch pairwise!( + f, zeros(0, 0), + [], [[1, 2], [2, 3]] + ) end @testset "missing values handling interface" begin @@ -83,53 +87,73 @@ arbitrary_fun(x, y) = cor(x, y) @test pairwise!(f, res2, xm, ym) === res2 @test res ≅ res2 ≅ [missing for xi in xm, yi in ym] - res = pairwise(f, xm, ym, skipmissing=:pairwise) + res = pairwise(f, xm, ym, skipmissing = :pairwise) @test res isa Matrix{Float64} res2 = zeros(Union{Float64, Missing}, size(res)) - @test pairwise!(f, res2, xm, ym, skipmissing=:pairwise) === res2 + @test pairwise!(f, res2, xm, ym, skipmissing = :pairwise) === res2 @test res ≅ res2 - @test isapprox(res, [f(collect.(skipmissings(xi, yi))...) for xi in xm, yi in ym], - rtol=1e-6) + @test isapprox( + res, [f(collect.(skipmissings(xi, yi))...) for xi in xm, yi in ym], + rtol = 1.0e-6 + ) - res = pairwise(f, ym, zm, skipmissing=:pairwise) + res = pairwise(f, ym, zm, skipmissing = :pairwise) @test res isa Matrix{Float32} res2 = zeros(Union{Float32, Missing}, size(res)) - @test pairwise!(f, res2, ym, zm, skipmissing=:pairwise) === res2 + @test pairwise!(f, res2, ym, zm, skipmissing = :pairwise) === res2 @test res ≅ res2 - @test isapprox(res, [f(collect.(skipmissings(yi, zi))...) for yi in ym, zi in zm], - rtol=1e-6) - - nminds = mapreduce(x -> .!ismissing.(x), - (x, y) -> x .& y, - [xm; ym]) - res = pairwise(f, xm, ym, skipmissing=:listwise) + @test isapprox( + res, [f(collect.(skipmissings(yi, zi))...) for yi in ym, zi in zm], + rtol = 1.0e-6 + ) + + nminds = mapreduce( + x -> .!ismissing.(x), + (x, y) -> x .& y, + [xm; ym] + ) + res = pairwise(f, xm, ym, skipmissing = :listwise) @test res isa Matrix{Float64} res2 = zeros(Union{Float64, Missing}, size(res)) - @test pairwise!(f, res2, xm, ym, skipmissing=:listwise) === res2 + @test pairwise!(f, res2, xm, ym, skipmissing = :listwise) === res2 @test res ≅ res2 - @test isapprox(res, [f(view(xi, nminds), view(yi, nminds)) for xi in xm, yi in ym], - rtol=1e-6) + @test isapprox( + res, [f(view(xi, nminds), view(yi, nminds)) for xi in xm, yi in ym], + rtol = 1.0e-6 + ) # inference of cor fails so use an inferrable function # to check that pairwise itself is inferrable for skipmissing in (:none, :pairwise, :listwise) - g(x, y=x) = pairwise((x, y) -> x[1] * y[1], x, y, skipmissing=skipmissing) + g(x, y = x) = pairwise((x, y) -> x[1] * y[1], x, y, skipmissing = skipmissing) @test Core.Compiler.return_type(g, Tuple{Vector{Vector{Union{Float64, Missing}}}}) == - Core.Compiler.return_type(g, Tuple{Vector{Vector{Union{Float64, Missing}}}, - Vector{Vector{Union{Float64, Missing}}}}) == - Matrix{<: Union{Float64, Missing}} + Core.Compiler.return_type( + g, Tuple{ + Vector{Vector{Union{Float64, Missing}}}, + Vector{Vector{Union{Float64, Missing}}}, + } + ) == + Matrix{<:Union{Float64, Missing}} if skipmissing in (:pairwise, :listwise) @test_broken Core.Compiler.return_type(g, Tuple{Vector{Vector{Union{Float64, Missing}}}}) == - Core.Compiler.return_type(g, Tuple{Vector{Vector{Union{Float64, Missing}}}, - Vector{Vector{Union{Float64, Missing}}}}) == - Matrix{Float64} + Core.Compiler.return_type( + g, Tuple{ + Vector{Vector{Union{Float64, Missing}}}, + Vector{Vector{Union{Float64, Missing}}}, + } + ) == + Matrix{Float64} end end - @test_throws ArgumentError pairwise(f, xm, ym, skipmissing=:something) - @test_throws ArgumentError pairwise!(f, zeros(Union{Float64, Missing}, - length(xm), length(ym)), xm, ym, - skipmissing=:something) + @test_throws ArgumentError pairwise(f, xm, ym, skipmissing = :something) + @test_throws ArgumentError pairwise!( + f, zeros( + Union{Float64, Missing}, + length(xm), length(ym) + ), xm, ym, + skipmissing = :something + ) # variable with only missings xm = [fill(missing, 10), rand(10)] @@ -141,17 +165,17 @@ arbitrary_fun(x, y) = cor(x, y) @test pairwise!(f, res2, xm, ym) === res2 @test res ≅ res2 ≅ [f(xi, yi) for xi in xm, yi in ym] - @test_throws Union{ArgumentError,MethodError} pairwise(f, xm, ym, skipmissing=:pairwise) - @test_throws Union{ArgumentError,MethodError} pairwise(f, xm, ym, skipmissing=:listwise) + @test_throws Union{ArgumentError, MethodError} pairwise(f, xm, ym, skipmissing = :pairwise) + @test_throws Union{ArgumentError, MethodError} pairwise(f, xm, ym, skipmissing = :listwise) res = zeros(Union{Float64, Missing}, length(xm), length(ym)) - @test_throws Union{ArgumentError,MethodError} pairwise!(f, res, xm, ym, skipmissing=:pairwise) - @test_throws Union{ArgumentError,MethodError} pairwise!(f, res, xm, ym, skipmissing=:listwise) + @test_throws Union{ArgumentError, MethodError} pairwise!(f, res, xm, ym, skipmissing = :pairwise) + @test_throws Union{ArgumentError, MethodError} pairwise!(f, res, xm, ym, skipmissing = :listwise) for sm in (:pairwise, :listwise) - @test_throws ArgumentError pairwise(f, [[1, 2]], [1], skipmissing=sm) - @test_throws ArgumentError pairwise(f, [1], [[1, 2]], skipmissing=sm) - @test_throws ArgumentError pairwise(f, [1], [1], skipmissing=sm) + @test_throws ArgumentError pairwise(f, [[1, 2]], [1], skipmissing = sm) + @test_throws ArgumentError pairwise(f, [1], [[1, 2]], skipmissing = sm) + @test_throws ArgumentError pairwise(f, [1], [1], skipmissing = sm) end end @@ -178,10 +202,14 @@ arbitrary_fun(x, y) = cor(x, y) [f(collect(xi), collect(yi)) for xi in x, yi in y] @test pairwise((x, y) -> f(collect(x), collect(y)), x) == [f(collect(xi1), collect(xi2)) for xi1 in x, xi2 in x] - @test_throws ArgumentError pairwise((x, y) -> f(collect(x), collect(y)), x, y, - skipmissing=:pairwise) - @test_throws ArgumentError pairwise((x, y) -> f(collect(x), collect(y)), x, y, - skipmissing=:listwise) + @test_throws ArgumentError pairwise( + (x, y) -> f(collect(x), collect(y)), x, y, + skipmissing = :pairwise + ) + @test_throws ArgumentError pairwise( + (x, y) -> f(collect(x), collect(y)), x, y, + skipmissing = :listwise + ) end @testset "two-argument method" begin @@ -196,58 +224,82 @@ arbitrary_fun(x, y) = cor(x, y) x = [rand(10) for _ in 1:4] y = [rand(10) for _ in 1:4] - @test pairwise(f, x, x, symmetric=true) == - pairwise(f, x, symmetric=true) == + @test pairwise(f, x, x, symmetric = true) == + pairwise(f, x, symmetric = true) == Symmetric(pairwise(f, x, x), :U) res = zeros(4, 4) res2 = zeros(4, 4) - @test pairwise!(f, res, x, x, symmetric=true) === res - @test pairwise!(f, res2, x, symmetric=true) === res2 + @test pairwise!(f, res, x, x, symmetric = true) === res + @test pairwise!(f, res2, x, symmetric = true) === res2 @test res == res2 == Symmetric(pairwise(f, x, x), :U) - @test_throws ArgumentError pairwise(f, x, y, symmetric=true) - @test_throws ArgumentError pairwise!(f, res, x, y, symmetric=true) + @test_throws ArgumentError pairwise(f, x, y, symmetric = true) + @test_throws ArgumentError pairwise!(f, res, x, y, symmetric = true) end @testset "cor corner cases" begin # Integer inputs must give a Float64 output res = pairwise(cor, [[1, 2, 3], [1, 5, 2]]) @test res isa Matrix{Float64} - @test res == [cor(xi, yi) for xi in ([1, 2, 3], [1, 5, 2]), - yi in ([1, 2, 3], [1, 5, 2])] + @test res == [ + cor(xi, yi) for xi in ([1, 2, 3], [1, 5, 2]), + yi in ([1, 2, 3], [1, 5, 2]) + ] # NaNs are ignored for the diagonal res = pairwise(cor, [[1, 2, NaN], [1, 5, 2]]) @test res isa Matrix{Float64} - @test res ≅ [1.0 NaN - NaN 1.0] + @test res ≅ [ + 1.0 NaN + NaN 1.0 + ] # missings are ignored for the diagonal res = pairwise(cor, [[1, 2, 7], [1, 5, missing]]) @test res isa Matrix{Union{Float64, Missing}} - @test res ≅ [1.0 missing - missing 1.0] - res = pairwise(cor, Vector{Union{Int, Missing}}[[missing, missing, missing], - [missing, missing, missing]]) + @test res ≅ [ + 1.0 missing + missing 1.0 + ] + res = pairwise( + cor, Vector{Union{Int, Missing}}[ + [missing, missing, missing], + [missing, missing, missing], + ] + ) @test res isa Matrix{Union{Float64, Missing}} - @test res ≅ [1.0 missing - missing 1.0] + @test res ≅ [ + 1.0 missing + missing 1.0 + ] # except when eltype is Missing - res = pairwise(cor, [[missing, missing, missing], - [missing, missing, missing]]) + res = pairwise( + cor, [ + [missing, missing, missing], + [missing, missing, missing], + ] + ) @test res isa Matrix{Missing} - @test res ≅ [missing missing - missing missing] + @test res ≅ [ + missing missing + missing missing + ] for sm in (:pairwise, :listwise) - res = pairwise(cor, [[1, 2, NaN, 4], [1, 5, 5, missing]], skipmissing=sm) + res = pairwise(cor, [[1, 2, NaN, 4], [1, 5, 5, missing]], skipmissing = sm) @test res isa Matrix{Float64} - @test res ≅ [1.0 NaN - NaN 1.0] - @test_throws ArgumentError pairwise(cor, [[missing, missing, missing], - [missing, missing, missing]], - skipmissing=sm) + @test res ≅ [ + 1.0 NaN + NaN 1.0 + ] + @test_throws ArgumentError pairwise( + cor, [ + [missing, missing, missing], + [missing, missing, missing], + ], + skipmissing = sm + ) end end @@ -260,19 +312,21 @@ arbitrary_fun(x, y) = cor(x, y) @test StatsBase.promote_type_union(Vector) === Any @test StatsBase.promote_type_union(Union{}) === Union{} @test StatsBase.promote_type_union(Tuple{Union{Int, Float64}}) === - Tuple{Real} + Tuple{Real} end @testset "type-unstable corner case (#771)" begin - v = [rand(5) for _=1:10] + v = [rand(5) for _ in 1:10] function f(v) pairwise(v) do x, y - (x[1] < 0 ? nothing : - x[1] > y[1] ? 1 : 1.5, - 0) + ( + x[1] < 0 ? nothing : + x[1] > y[1] ? 1 : 1.5, + 0, + ) end end res = f(v) @test res isa Matrix{Tuple{Real, Int}} end -end \ No newline at end of file +end diff --git a/test/partialcor.jl b/test/partialcor.jl index 77ae3cba7..a59106e35 100644 --- a/test/partialcor.jl +++ b/test/partialcor.jl @@ -2,14 +2,14 @@ using StatsBase using Test wechsler = Float32[ - 7 5 9 8 - 8 8 5 6 + 7 5 9 8 + 8 8 5 6 16 18 11 9 - 8 3 7 9 - 6 3 13 9 + 8 3 7 9 + 6 3 13 9 11 8 10 10 12 7 9 8 - 8 11 9 3 + 8 11 9 3 14 12 11 4 13 13 13 6 13 9 9 9 @@ -27,7 +27,7 @@ wechsler = Float32[ 10 7 14 6 10 10 9 6 10 7 10 10 - 7 6 5 9 + 7 6 5 9 15 12 10 6 17 15 15 8 16 13 16 9 @@ -41,10 +41,12 @@ wechsler = Float32[ 14 13 14 9 ] -@test @inferred(partialcor(wechsler[:,1], wechsler[:,2], wechsler[:,3:4])) ≈ 0.7118787 rtol=1e-6 +@test @inferred(partialcor(wechsler[:, 1], wechsler[:, 2], wechsler[:, 3:4])) ≈ 0.7118787 rtol = 1.0e-6 -X = [ 2 1 0 - 4 2 0 - 15 3 1 - 20 4 1] -@test @inferred(partialcor(view(X,:,1), view(X,:,2), view(X,:,3))) ≈ 0.919145 rtol=1e-6 +X = [ + 2 1 0 + 4 2 0 + 15 3 1 + 20 4 1 +] +@test @inferred(partialcor(view(X, :, 1), view(X, :, 2), view(X, :, 3))) ≈ 0.919145 rtol = 1.0e-6 diff --git a/test/rankcorr.jl b/test/rankcorr.jl index dc0207ee1..21a1a4dc5 100644 --- a/test/rankcorr.jl +++ b/test/rankcorr.jl @@ -4,9 +4,9 @@ using Test X = Float64[1 0; 2 1; 3 0; 4 1; 5 10] Y = Float64[5 5 6; 3 4 1; 4 0 4; 2 6 1; 5 7 10] -x1 = X[:,1] -x2 = X[:,2] -y = Y[:,1] +x1 = X[:, 1] +x2 = X[:, 2] +y = Y[:, 1] # corspearman @@ -22,41 +22,41 @@ c22 = corspearman(x2, x2) @test c11 ≈ 1.0 @test c22 ≈ 1.0 @test corspearman(X, X) ≈ [c11 c12; c12 c22] -@test corspearman(X) ≈ [c11 c12; c12 c22] +@test corspearman(X) ≈ [c11 c12; c12 c22] @test corspearman(X, Y) == - [corspearman(X[:,i], Y[:,j]) for i in axes(X, 2), j in axes(Y, 2)] + [corspearman(X[:, i], Y[:, j]) for i in axes(X, 2), j in axes(Y, 2)] # corkendall # Check error, handling of NaN, Inf etc -@test_throws ErrorException("Vectors must have same length") corkendall([1,2,3,4], [1,2,3]) -@test isnan(corkendall([1,2], [3,NaN])) -@test isnan(corkendall([1,1,1], [1,2,3])) -@test corkendall([-Inf,-0.0,Inf],[1,2,3]) == 1.0 +@test_throws ErrorException("Vectors must have same length") corkendall([1, 2, 3, 4], [1, 2, 3]) +@test isnan(corkendall([1, 2], [3, NaN])) +@test isnan(corkendall([1, 1, 1], [1, 2, 3])) +@test corkendall([-Inf, -0.0, Inf], [1, 2, 3]) == 1.0 -# Test, with exact equality, some known results. +# Test, with exact equality, some known results. # AbstractVector{<:Real}, AbstractVector{<:Real} -@test corkendall(x1, y) == -1/sqrt(90) -@test corkendall(x2, y) == -1/sqrt(72) +@test corkendall(x1, y) == -1 / sqrt(90) +@test corkendall(x2, y) == -1 / sqrt(72) # AbstractMatrix{<:Real}, AbstractVector{<:Real} -@test corkendall(X, y) == [-1/sqrt(90), -1/sqrt(72)] +@test corkendall(X, y) == [-1 / sqrt(90), -1 / sqrt(72)] # AbstractVector{<:Real}, AbstractMatrix{<:Real} -@test corkendall(y, X) == [-1/sqrt(90) -1/sqrt(72)] +@test corkendall(y, X) == [-1 / sqrt(90) -1 / sqrt(72)] # n = 78_000 tests for overflow errors on 32 bit # Testing for overflow errors on 64bit would require n be too large for practicality # This also tests merge_sort! since n is (much) greater than SMALL_THRESHOLD. n = 78_000 # Test with many repeats -@test corkendall(repeat(x1, n), repeat(y, n)) ≈ -1/sqrt(90) -@test corkendall(repeat(x2, n), repeat(y, n)) ≈ -1/sqrt(72) -@test corkendall(repeat(X, n), repeat(y, n)) ≈ [-1/sqrt(90), -1/sqrt(72)] -@test corkendall(repeat(y, n), repeat(X, n)) ≈ [-1/sqrt(90) -1/sqrt(72)] -@test corkendall(repeat([0,1,1,0], n), repeat([1,0,1,0], n)) == 0.0 +@test corkendall(repeat(x1, n), repeat(y, n)) ≈ -1 / sqrt(90) +@test corkendall(repeat(x2, n), repeat(y, n)) ≈ -1 / sqrt(72) +@test corkendall(repeat(X, n), repeat(y, n)) ≈ [-1 / sqrt(90), -1 / sqrt(72)] +@test corkendall(repeat(y, n), repeat(X, n)) ≈ [-1 / sqrt(90) -1 / sqrt(72)] +@test corkendall(repeat([0, 1, 1, 0], n), repeat([1, 0, 1, 0], n)) == 0.0 # Test with no repeats, note testing for exact equality -@test corkendall(collect(1:n), collect(1:n)) == 1.0 +@test corkendall(collect(1:n), collect(1:n)) == 1.0 @test corkendall(collect(1:n), reverse(collect(1:n))) == -1.0 # All elements identical should yield NaN @@ -69,81 +69,95 @@ c22 = corkendall(x2, x2) # AbstractMatrix{<:Real}, AbstractMatrix{<:Real} @test corkendall(X, X) ≈ [c11 c12; c12 c22] # AbstractMatrix{<:Real} -@test corkendall(X) ≈ [c11 c12; c12 c22] +@test corkendall(X) ≈ [c11 c12; c12 c22] @test c11 == 1.0 @test c22 == 1.0 -@test c12 == 3/sqrt(20) +@test c12 == 3 / sqrt(20) # Finished testing for overflow, so redefine n for speedier tests n = 100 @test corkendall(repeat(X, n), repeat(X, n)) ≈ [c11 c12; c12 c22] -@test corkendall(repeat(X, n)) ≈ [c11 c12; c12 c22] +@test corkendall(repeat(X, n)) ≈ [c11 c12; c12 c22] # All eight three-element permutations -z = [1 1 1; - 1 1 2; - 1 2 2; - 1 2 2; - 1 2 1; - 2 1 2; - 1 1 2; - 2 2 2] - -@test corkendall(z) == [1 0 1/3; 0 1 0; 1/3 0 1] -@test corkendall(z, z) == [1 0 1/3; 0 1 0; 1/3 0 1] -@test corkendall(z[:,1], z) == [1 0 1/3] -@test corkendall(z, z[:,1]) == [1; 0; 1/3] +z = [ + 1 1 1; + 1 1 2; + 1 2 2; + 1 2 2; + 1 2 1; + 2 1 2; + 1 1 2; + 2 2 2 +] + +@test corkendall(z) == [1 0 1 / 3; 0 1 0; 1 / 3 0 1] +@test corkendall(z, z) == [1 0 1 / 3; 0 1 0; 1 / 3 0 1] +@test corkendall(z[:, 1], z) == [1 0 1 / 3] +@test corkendall(z, z[:, 1]) == [1; 0; 1 / 3] z = float(z) -@test corkendall(z) == [1 0 1/3; 0 1 0; 1/3 0 1] -@test corkendall(z, z) == [1 0 1/3; 0 1 0; 1/3 0 1] -@test corkendall(z[:,1], z) == [1 0 1/3] -@test corkendall(z, z[:,1]) == [1; 0; 1/3] +@test corkendall(z) == [1 0 1 / 3; 0 1 0; 1 / 3 0 1] +@test corkendall(z, z) == [1 0 1 / 3; 0 1 0; 1 / 3 0 1] +@test corkendall(z[:, 1], z) == [1 0 1 / 3] +@test corkendall(z, z[:, 1]) == [1; 0; 1 / 3] w = repeat(z, n) -@test corkendall(w) == [1 0 1/3; 0 1 0; 1/3 0 1] -@test corkendall(w, w) == [1 0 1/3; 0 1 0; 1/3 0 1] -@test corkendall(w[:,1], w) == [1 0 1/3] -@test corkendall(w, w[:,1]) == [1; 0; 1/3] +@test corkendall(w) == [1 0 1 / 3; 0 1 0; 1 / 3 0 1] +@test corkendall(w, w) == [1 0 1 / 3; 0 1 0; 1 / 3 0 1] +@test corkendall(w[:, 1], w) == [1 0 1 / 3] +@test corkendall(w, w[:, 1]) == [1; 0; 1 / 3] -StatsBase.midpoint(1,10) == 5 -StatsBase.midpoint(1,widen(10)) == 5 +StatsBase.midpoint(1, 10) == 5 +StatsBase.midpoint(1, widen(10)) == 5 # NaN handling Xnan = copy(X) -Xnan[1,1] = NaN +Xnan[1, 1] = NaN Ynan = copy(Y) -Ynan[2,1] = NaN +Ynan[2, 1] = NaN for f in (corspearman, corkendall) - @test isnan(f([1.0, NaN, 2.0], [2.0, 1.0, 3.4])) - @test all(isnan, f([1.0, NaN], [1 2; 3 4])) - @test all(isnan, f([1 2; 3 4], [1.0, NaN])) - @test isequal(f([1 NaN; NaN 4]), [1 NaN; NaN 1]) - @test all(isnan, f([1 NaN; NaN 4], [1 NaN; NaN 4])) - @test all(isnan, f([1 NaN; NaN 4], [NaN 1; NaN 4])) - - @test isequal(f(Xnan, Ynan), - [f(Xnan[:,i], Ynan[:,j]) for i in axes(Xnan, 2), j in axes(Ynan, 2)]) - @test isequal(f(Xnan), - [i == j ? 1.0 : f(Xnan[:,i], Xnan[:,j]) - for i in axes(Xnan, 2), j in axes(Xnan, 2)]) - for k in 1:2 - @test isequal(f(Xnan[:,k], Ynan), - [f(Xnan[:,k], Ynan[:,j]) for i in 1:1, j in axes(Ynan, 2)]) - # TODO: fix corkendall (PR#659) - if f === corspearman - @test isequal(f(Xnan, Ynan[:,k]), - [f(Xnan[:,i], Ynan[:,k]) for i in axes(Xnan, 2), j in 1:1]) - else - @test isequal(f(Xnan, Ynan[:,k]), - [f(Xnan[:,i], Ynan[:,k]) for i in axes(Xnan, 2)]) - end - end + @test isnan(f([1.0, NaN, 2.0], [2.0, 1.0, 3.4])) + @test all(isnan, f([1.0, NaN], [1 2; 3 4])) + @test all(isnan, f([1 2; 3 4], [1.0, NaN])) + @test isequal(f([1 NaN; NaN 4]), [1 NaN; NaN 1]) + @test all(isnan, f([1 NaN; NaN 4], [1 NaN; NaN 4])) + @test all(isnan, f([1 NaN; NaN 4], [NaN 1; NaN 4])) + + @test isequal( + f(Xnan, Ynan), + [f(Xnan[:, i], Ynan[:, j]) for i in axes(Xnan, 2), j in axes(Ynan, 2)] + ) + @test isequal( + f(Xnan), + [ + i == j ? 1.0 : f(Xnan[:, i], Xnan[:, j]) + for i in axes(Xnan, 2), j in axes(Xnan, 2) + ] + ) + for k in 1:2 + @test isequal( + f(Xnan[:, k], Ynan), + [f(Xnan[:, k], Ynan[:, j]) for i in 1:1, j in axes(Ynan, 2)] + ) + # TODO: fix corkendall (PR#659) + if f === corspearman + @test isequal( + f(Xnan, Ynan[:, k]), + [f(Xnan[:, i], Ynan[:, k]) for i in axes(Xnan, 2), j in 1:1] + ) + else + @test isequal( + f(Xnan, Ynan[:, k]), + [f(Xnan[:, i], Ynan[:, k]) for i in axes(Xnan, 2)] + ) + end + end end diff --git a/test/ranking.jl b/test/ranking.jl index fea0a48c1..b7f091b93 100644 --- a/test/ranking.jl +++ b/test/ranking.jl @@ -39,4 +39,4 @@ s = ["c", "a", "b", "d", "d", "b", "e", "d"] # s is a vector of strings ordered @test tiedrank(x, lt = (x, y) -> isless(y, x)) == tiedrank(-x) -@test_throws DimensionMismatch StatsBase._check_randparams([1,2], [1,2], [1]) +@test_throws DimensionMismatch StatsBase._check_randparams([1, 2], [1, 2], [1]) diff --git a/test/reliability.jl b/test/reliability.jl index 916e097c4..e95d90a4b 100644 --- a/test/reliability.jl +++ b/test/reliability.jl @@ -3,10 +3,12 @@ using LinearAlgebra, Random, Test @testset "Cronbach's Alpha" begin # basic vanilla test - cov_X = [10 6 6 6; - 6 11 6 6; - 6 6 12 6; - 6 6 6 13] + cov_X = [ + 10 6 6 6; + 6 11 6 6; + 6 6 12 6; + 6 6 6 13 + ] cronbach_X = cronbachalpha(cov_X) @test cronbach_X isa CronbachAlpha{Float64} @test cronbach_X.alpha ≈ 0.8135593220338981 @@ -31,20 +33,26 @@ using LinearAlgebra, Random, Test # testing corner cases @test_throws MethodError cronbachalpha([1.0, 2.0]) - cov_k2 = [10 6; - 6 11] + cov_k2 = [ + 10 6; + 6 11 + ] cronbach_k2 = cronbachalpha(cov_k2) @test cronbach_k2.alpha ≈ 0.7272727272727273 @test isempty(cronbach_k2.dropped) # testing when Matrix is not positive-definite - cov_not_pos = [-1 1; - -1 1] + cov_not_pos = [ + -1 1; + -1 1 + ] @test_throws ArgumentError cronbachalpha(cov_not_pos) # testing with a zero - cov_zero = [1 2; - 0 1] + cov_zero = [ + 1 2; + 0 1 + ] @test_throws ArgumentError cronbachalpha(cov_not_pos) # testing with one column @@ -52,8 +60,10 @@ using LinearAlgebra, Random, Test @test_throws ArgumentError cronbachalpha(cov_k1) # testing with Missing - cov_missing = [1 2; - missing 1] + cov_missing = [ + 1 2; + missing 1 + ] @test_throws MethodError cronbachalpha(cov_missing) diff --git a/test/robust.jl b/test/robust.jl index 59721291a..8c919ffdc 100644 --- a/test/robust.jl +++ b/test/robust.jl @@ -3,61 +3,61 @@ using Test, Random ### Trimming outliers -@test collect(trim([8,2,3,4,5,6,7,1], prop=0.1)) == [8,2,3,4,5,6,7,1] -@test collect(trim([8,2,3,4,5,6,7,1], prop=0.2)) == [2,3,4,5,6,7] -@test collect(trim([1,2,3,4,5,6,7,8,9], prop=0.4)) == [4,5,6] -@test collect(trim([8,7,6,5,4,3,2,1], count=1)) == [7,6,5,4,3,2] -@test collect(trim([1,2,3,4,5,6,7,8,9], count=3)) == [4,5,6] +@test collect(trim([8, 2, 3, 4, 5, 6, 7, 1], prop = 0.1)) == [8, 2, 3, 4, 5, 6, 7, 1] +@test collect(trim([8, 2, 3, 4, 5, 6, 7, 1], prop = 0.2)) == [2, 3, 4, 5, 6, 7] +@test collect(trim([1, 2, 3, 4, 5, 6, 7, 8, 9], prop = 0.4)) == [4, 5, 6] +@test collect(trim([8, 7, 6, 5, 4, 3, 2, 1], count = 1)) == [7, 6, 5, 4, 3, 2] +@test collect(trim([1, 2, 3, 4, 5, 6, 7, 8, 9], count = 3)) == [4, 5, 6] @test_throws ArgumentError trim([]) -@test_throws ArgumentError trim([1,2,3,4,5], prop=0.5) +@test_throws ArgumentError trim([1, 2, 3, 4, 5], prop = 0.5) -@test collect(trim!([8,2,3,4,5,6,7,1], prop=0.1)) == [8,2,3,4,5,6,7,1] -@test collect(trim!([8,2,3,4,5,6,7,1], prop=0.2)) == [2,3,4,5,6,7] -@test collect(trim!([1,2,3,4,5,6,7,8,9], prop=0.4)) == [4,5,6] -@test collect(trim!([8,7,6,5,4,3,2,1], count=1)) == [7,6,5,4,3,2] -@test collect(trim!([1,2,3,4,5,6,7,8,9], count=3)) == [4,5,6] +@test collect(trim!([8, 2, 3, 4, 5, 6, 7, 1], prop = 0.1)) == [8, 2, 3, 4, 5, 6, 7, 1] +@test collect(trim!([8, 2, 3, 4, 5, 6, 7, 1], prop = 0.2)) == [2, 3, 4, 5, 6, 7] +@test collect(trim!([1, 2, 3, 4, 5, 6, 7, 8, 9], prop = 0.4)) == [4, 5, 6] +@test collect(trim!([8, 7, 6, 5, 4, 3, 2, 1], count = 1)) == [7, 6, 5, 4, 3, 2] +@test collect(trim!([1, 2, 3, 4, 5, 6, 7, 8, 9], count = 3)) == [4, 5, 6] @test_throws ArgumentError trim!([]) -@test_throws ArgumentError trim!([1,2,3,4,5], prop=0.5) +@test_throws ArgumentError trim!([1, 2, 3, 4, 5], prop = 0.5) -@test collect(winsor([8,2,3,4,5,6,7,1], prop=0.1)) == [8,2,3,4,5,6,7,1] -@test collect(winsor([8,2,3,4,5,6,7,1], prop=0.2)) == [7,2,3,4,5,6,7,2] -@test collect(winsor([1,2,3,4,5,6,7,8,9], prop=0.4)) == [4,4,4,4,5,6,6,6,6] -@test collect(winsor([1,2,3,4,5,6,7,8], count=1)) == [2,2,3,4,5,6,7,7] -@test collect(winsor([8,7,6,5,4,3,2,1], count=1)) == [7,7,6,5,4,3,2,2] -@test collect(winsor([1,2,3,4,5,6,7,8,9], count=3)) == [4,4,4,4,5,6,6,6,6] +@test collect(winsor([8, 2, 3, 4, 5, 6, 7, 1], prop = 0.1)) == [8, 2, 3, 4, 5, 6, 7, 1] +@test collect(winsor([8, 2, 3, 4, 5, 6, 7, 1], prop = 0.2)) == [7, 2, 3, 4, 5, 6, 7, 2] +@test collect(winsor([1, 2, 3, 4, 5, 6, 7, 8, 9], prop = 0.4)) == [4, 4, 4, 4, 5, 6, 6, 6, 6] +@test collect(winsor([1, 2, 3, 4, 5, 6, 7, 8], count = 1)) == [2, 2, 3, 4, 5, 6, 7, 7] +@test collect(winsor([8, 7, 6, 5, 4, 3, 2, 1], count = 1)) == [7, 7, 6, 5, 4, 3, 2, 2] +@test collect(winsor([1, 2, 3, 4, 5, 6, 7, 8, 9], count = 3)) == [4, 4, 4, 4, 5, 6, 6, 6, 6] @test_throws ArgumentError winsor([]) -@test_throws ArgumentError winsor([1,2,3,4,5], prop=0.5) +@test_throws ArgumentError winsor([1, 2, 3, 4, 5], prop = 0.5) -@test collect(winsor!([8,2,3,4,5,6,7,1], prop=0.1)) == [8,2,3,4,5,6,7,1] -@test collect(winsor!([8,2,3,4,5,6,7,1], prop=0.2)) == [7,2,3,4,5,6,7,2] -@test collect(winsor!([1,2,3,4,5,6,7,8,9], prop=0.4)) == [4,4,4,4,5,6,6,6,6] -@test collect(winsor!([8,7,6,5,4,3,2,1], count=1)) == [7,7,6,5,4,3,2,2] -@test collect(winsor!([1,2,3,4,5,6,7,8,9], count=3)) == [4,4,4,4,5,6,6,6,6] +@test collect(winsor!([8, 2, 3, 4, 5, 6, 7, 1], prop = 0.1)) == [8, 2, 3, 4, 5, 6, 7, 1] +@test collect(winsor!([8, 2, 3, 4, 5, 6, 7, 1], prop = 0.2)) == [7, 2, 3, 4, 5, 6, 7, 2] +@test collect(winsor!([1, 2, 3, 4, 5, 6, 7, 8, 9], prop = 0.4)) == [4, 4, 4, 4, 5, 6, 6, 6, 6] +@test collect(winsor!([8, 7, 6, 5, 4, 3, 2, 1], count = 1)) == [7, 7, 6, 5, 4, 3, 2, 2] +@test collect(winsor!([1, 2, 3, 4, 5, 6, 7, 8, 9], count = 3)) == [4, 4, 4, 4, 5, 6, 6, 6, 6] @test_throws ArgumentError winsor!([]) -@test_throws ArgumentError winsor!([1,2,3,4,5], prop=0.5) +@test_throws ArgumentError winsor!([1, 2, 3, 4, 5], prop = 0.5) ### Variance -@test trimvar([1,1,1,1,1]) ≈ 0.0 -@test trimvar([2,3,4,5,6,7,8,9], prop=0.25) ≈ 1.0 +@test trimvar([1, 1, 1, 1, 1]) ≈ 0.0 +@test trimvar([2, 3, 4, 5, 6, 7, 8, 9], prop = 0.25) ≈ 1.0 @test_throws ArgumentError trimvar([]) -@test_throws ArgumentError trimvar([1,2,3,4,5], prop=0.5) +@test_throws ArgumentError trimvar([1, 2, 3, 4, 5], prop = 0.5) ### Other -@test mean(trim([-Inf,1,2,3,4], count=1)) == 2 -@test mean(winsor([-Inf,1,2,3,4], count=1)) == 2 +@test mean(trim([-Inf, 1, 2, 3, 4], count = 1)) == 2 +@test mean(winsor([-Inf, 1, 2, 3, 4], count = 1)) == 2 Random.seed!(1234) for n in 2100:2120, c in 0:1000 x = randperm(n) - @test sort!(collect(winsor(x, count=c))) == - reverse!(collect(winsor(n:-1:1, count=c))) == - collect(winsor(1:n, count=c)) + @test sort!(collect(winsor(x, count = c))) == + reverse!(collect(winsor(n:-1:1, count = c))) == + collect(winsor(1:n, count = c)) end diff --git a/test/runtests.jl b/test/runtests.jl index da8212d9e..ca3ac6512 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,30 +3,32 @@ using Dates using LinearAlgebra using Random -tests = ["ambiguous", - "weights", - "moments", - "scalarstats", - "deviation", - "cov", - "counts", - "ranking", - "empirical", - "hist", - "rankcorr", - "signalcorr", - "misc", - "pairwise", - "reliability", - "robust", - "sampling", - "wsampling", - "statmodels", - "partialcor", - "transformations", - # Test with JET after all other tests since it has side effects - "jet"] - #"statquiz"] +tests = [ + "ambiguous", + "weights", + "moments", + "scalarstats", + "deviation", + "cov", + "counts", + "ranking", + "empirical", + "hist", + "rankcorr", + "signalcorr", + "misc", + "pairwise", + "reliability", + "robust", + "sampling", + "wsampling", + "statmodels", + "partialcor", + "transformations", + # Test with JET after all other tests since it has side effects + "jet", +] +#"statquiz"] println("Running tests:") diff --git a/test/sampling.jl b/test/sampling.jl index a4f31a012..34e706480 100644 --- a/test/sampling.jl +++ b/test/sampling.jl @@ -17,7 +17,7 @@ function test_rng_use(func, non_rng_args...) # repeatability @test func(MersenneTwister(1), deepcopy(non_rng_args)...) == - func(MersenneTwister(1), deepcopy(non_rng_args)...) + func(MersenneTwister(1), deepcopy(non_rng_args)...) # default RNG is Random.GLOBAL_RNG/Random.default_rng() Random.seed!(47) x = func(deepcopy(non_rng_args)...) @@ -26,31 +26,31 @@ function test_rng_use(func, non_rng_args...) @test x == y Random.seed!(47) y = func(Random.default_rng(), deepcopy(non_rng_args)...) - @test x == y + return @test x == y end #### sample with replacement -function check_sample_wrep(a::AbstractArray, vrgn, ptol::Real; ordered::Bool=false, rev::Bool=false) +function check_sample_wrep(a::AbstractArray, vrgn, ptol::Real; ordered::Bool = false, rev::Bool = false) vmin, vmax = vrgn (amin, amax) = extrema(a) @test vmin <= amin <= amax <= vmax n = vmax - vmin + 1 - p0 = fill(1/n, n) - if ordered - @test issorted(a; rev=rev) + p0 = fill(1 / n, n) + return if ordered + @test issorted(a; rev = rev) if ptol > 0 - @test isapprox(proportions(a, vmin:vmax), p0, atol=ptol) + @test isapprox(proportions(a, vmin:vmax), p0, atol = ptol) end else - @test !issorted(a; rev=rev) - ncols = size(a,2) + @test !issorted(a; rev = rev) + ncols = size(a, 2) if ncols == 1 - @test isapprox(proportions(a, vmin:vmax), p0, atol=ptol) + @test isapprox(proportions(a, vmin:vmax), p0, atol = ptol) else - for j = 1:ncols + for j in 1:ncols aj = view(a, :, j) - @test isapprox(proportions(aj, vmin:vmax), p0, atol=ptol) + @test isapprox(proportions(aj, vmin:vmax), p0, atol = ptol) end end end @@ -59,27 +59,27 @@ end import StatsBase: direct_sample! a = direct_sample!(1:10, zeros(Int, n, 3)) -check_sample_wrep(a, (1, 10), 5.0e-3; ordered=false) +check_sample_wrep(a, (1, 10), 5.0e-3; ordered = false) a = direct_sample!(3:12, zeros(Int, n, 3)) -check_sample_wrep(a, (3, 12), 5.0e-3; ordered=false) +check_sample_wrep(a, (3, 12), 5.0e-3; ordered = false) a = direct_sample!([11:20;], zeros(Int, n, 3)) -check_sample_wrep(a, (11, 20), 5.0e-3; ordered=false) +check_sample_wrep(a, (11, 20), 5.0e-3; ordered = false) test_rng_use(direct_sample!, 1:10, zeros(Int, 6)) a = sample(3:12, n) -check_sample_wrep(a, (3, 12), 5.0e-3; ordered=false) +check_sample_wrep(a, (3, 12), 5.0e-3; ordered = false) for rev in (true, false), T in (Int, Int16, Float64, Float16, BigInt, ComplexF64, Rational{Int}) r = rev ? reverse(3:12) : (3:12) - r = T===Int ? r : T.(r) - aa = Int.(sample(r, n; ordered=true)) - check_sample_wrep(aa, (3, 12), 5.0e-3; ordered=true, rev=rev) + r = T === Int ? r : T.(r) + aa = Int.(sample(r, n; ordered = true)) + check_sample_wrep(aa, (3, 12), 5.0e-3; ordered = true, rev = rev) - aa = Int.(sample(r, 10; ordered=true)) - check_sample_wrep(aa, (3, 12), 0; ordered=true, rev=rev) + aa = Int.(sample(r, 10; ordered = true)) + check_sample_wrep(aa, (3, 12), 0; ordered = true, rev = rev) end @test StatsBase._storeindices(1, 1, BigFloat) == StatsBase._storeindices(1, 1, BigFloat) == false @@ -90,11 +90,11 @@ test_rng_use(sample, 1:10, 10) rng = StableRNG(1) - @test samplepair(rng, 2) === (2, 1) + @test samplepair(rng, 2) === (2, 1) @test samplepair(rng, 10) === (5, 6) @test samplepair(rng, [3, 4, 2, 6, 8]) === (3, 8) - @test samplepair(rng, [1, 2]) === (1, 2) + @test samplepair(rng, [1, 2]) === (1, 2) onetwo = samplepair(rng, UInt128(2)) @test extrema(onetwo) == (1, 2) @@ -105,7 +105,7 @@ test_rng_use(samplepair, 1000) #### sample without replacement -function check_sample_norep(a::AbstractArray, vrgn, ptol::Real; ordered::Bool=false, rev::Bool=false) +function check_sample_norep(a::AbstractArray, vrgn, ptol::Real; ordered::Bool = false, rev::Bool = false) # each column of a for one run vmin, vmax = vrgn @@ -113,23 +113,23 @@ function check_sample_norep(a::AbstractArray, vrgn, ptol::Real; ordered::Bool=fa @test vmin <= amin <= amax <= vmax n = vmax - vmin + 1 - for j = 1:size(a,2) - aj = view(a,:,j) + for j in 1:size(a, 2) + aj = view(a, :, j) @assert allunique(aj) if ordered - @assert issorted(aj, rev=rev) + @assert issorted(aj, rev = rev) end end - if ptol > 0 - p0 = fill(1/n, n) + return if ptol > 0 + p0 = fill(1 / n, n) if ordered - @test isapprox(proportions(a, vmin:vmax), p0, atol=ptol) + @test isapprox(proportions(a, vmin:vmax), p0, atol = ptol) else b = transpose(a) - for j = 1:size(b,2) - bj = view(b,:,j) - @test isapprox(proportions(bj, vmin:vmax), p0, atol=ptol) + for j in 1:size(b, 2) + bj = view(b, :, j) + @test isapprox(proportions(bj, vmin:vmax), p0, atol = ptol) end end end @@ -139,124 +139,126 @@ import StatsBase: knuths_sample!, fisher_yates_sample!, self_avoid_sample! import StatsBase: seqsample_a!, seqsample_c!, seqsample_d! a = zeros(Int, 5, n) -for j = 1:size(a,2) - knuths_sample!(3:12, view(a,:,j)) +for j in 1:size(a, 2) + knuths_sample!(3:12, view(a, :, j)) end -check_sample_norep(a, (3, 12), 5.0e-3; ordered=false) +check_sample_norep(a, (3, 12), 5.0e-3; ordered = false) test_rng_use(knuths_sample!, 1:10, zeros(Int, 6)) a = zeros(Int, 5, n) -for j = 1:size(a,2) - fisher_yates_sample!(3:12, view(a,:,j)) +for j in 1:size(a, 2) + fisher_yates_sample!(3:12, view(a, :, j)) end -check_sample_norep(a, (3, 12), 5.0e-3; ordered=false) +check_sample_norep(a, (3, 12), 5.0e-3; ordered = false) test_rng_use(fisher_yates_sample!, 1:10, zeros(Int, 6)) a = zeros(Int, 5, n) -for j = 1:size(a,2) - self_avoid_sample!(3:12, view(a,:,j)) +for j in 1:size(a, 2) + self_avoid_sample!(3:12, view(a, :, j)) end -check_sample_norep(a, (3, 12), 5.0e-3; ordered=false) +check_sample_norep(a, (3, 12), 5.0e-3; ordered = false) test_rng_use(self_avoid_sample!, 1:10, zeros(Int, 6)) a = zeros(Int, 5, n) -for j = 1:size(a,2) - seqsample_a!(3:12, view(a,:,j)) +for j in 1:size(a, 2) + seqsample_a!(3:12, view(a, :, j)) end -check_sample_norep(a, (3, 12), 5.0e-3; ordered=true) +check_sample_norep(a, (3, 12), 5.0e-3; ordered = true) test_rng_use(seqsample_a!, 1:10, zeros(Int, 6)) a = zeros(Int, 5, n) -for j = 1:size(a,2) - seqsample_c!(3:12, view(a,:,j)) +for j in 1:size(a, 2) + seqsample_c!(3:12, view(a, :, j)) end -check_sample_norep(a, (3, 12), 5.0e-3; ordered=true) +check_sample_norep(a, (3, 12), 5.0e-3; ordered = true) test_rng_use(seqsample_c!, 1:10, zeros(Int, 6)) a = zeros(Int, 5, n) -for j = 1:size(a,2) - seqsample_d!(3:12, view(a,:,j)) +for j in 1:size(a, 2) + seqsample_d!(3:12, view(a, :, j)) end -check_sample_norep(a, (3, 12), 5.0e-3; ordered=true) +check_sample_norep(a, (3, 12), 5.0e-3; ordered = true) test_rng_use(seqsample_d!, 1:10, zeros(Int, 6)) -a = sample(3:12, 5; replace=false) -check_sample_norep(a, (3, 12), 0; ordered=false) +a = sample(3:12, 5; replace = false) +check_sample_norep(a, (3, 12), 0; ordered = false) -a = sample(3:12, 5; replace=false, ordered=true) -check_sample_norep(a, (3, 12), 0; ordered=true) +a = sample(3:12, 5; replace = false, ordered = true) +check_sample_norep(a, (3, 12), 0; ordered = true) -a = sample(reverse(3:12), 5; replace=false, ordered=true) -check_sample_norep(a, (3, 12), 0; ordered=true, rev=true) +a = sample(reverse(3:12), 5; replace = false, ordered = true) +check_sample_norep(a, (3, 12), 0; ordered = true, rev = true) # tests of multidimensional sampling -a = sample(3:12, (2, 2); replace=false) -check_sample_norep(a, (3, 12), 0; ordered=false) +a = sample(3:12, (2, 2); replace = false) +check_sample_norep(a, (3, 12), 0; ordered = false) -@test sample(1:1, (2, 2); replace=true) == ones(Int, 2, 2) +@test sample(1:1, (2, 2); replace = true) == ones(Int, 2, 2) # test of weighted sampling without replacement a = [1:10;] wv = Weights([zeros(6); 1:4]) -x = vcat([sample(a, wv, 1, replace=false) for j in 1:100000]...) +x = vcat([sample(a, wv, 1, replace = false) for j in 1:100000]...) @test minimum(x) == 7 @test maximum(x) == 10 -@test maximum(abs, proportions(x) .- (1:4)/10) < 0.01 +@test maximum(abs, proportions(x) .- (1:4) / 10) < 0.01 -x = vcat([sample(a, wv, 2, replace=false) for j in 1:50000]...) +x = vcat([sample(a, wv, 2, replace = false) for j in 1:50000]...) exact2 = [0.117261905, 0.220634921, 0.304166667, 0.357936508] @test minimum(x) == 7 @test maximum(x) == 10 @test maximum(abs, proportions(x) .- exact2) < 0.01 -x = vcat([sample(a, wv, 4, replace=false) for j in 1:10000]...) +x = vcat([sample(a, wv, 4, replace = false) for j in 1:10000]...) @test minimum(x) == 7 @test maximum(x) == 10 @test maximum(abs, proportions(x) .- 0.25) == 0 -@test_throws DimensionMismatch sample(a, wv, 5, replace=false) +@test_throws DimensionMismatch sample(a, wv, 5, replace = false) wv = Weights([zeros(5); 1:4; -1]) -@test_throws ErrorException sample(a, wv, 1, replace=false) +@test_throws ErrorException sample(a, wv, 1, replace = false) #### weighted sampling with dimension # weights respected; this works because of the 0-weight -@test sample([1, 2], Weights([0, 1]), (2,2)) == [2 2 ; 2 2] -wm = sample(collect(1:4), Weights(1:4), (2,2), replace=false) +@test sample([1, 2], Weights([0, 1]), (2, 2)) == [2 2 ; 2 2] +wm = sample(collect(1:4), Weights(1:4), (2, 2), replace = false) @test size(wm) == (2, 2) # correct shape @test length(Set(wm)) == 4 # no duplicates in elements #### check that sample and sample! do the same thing -function test_same(;kws...) +function test_same(; kws...) wv = Weights(rand(20)) Random.seed!(1) x1 = sample(1:20, wv, 10; kws...) Random.seed!(1) x2 = zeros(Int, 10) sample!(1:20, wv, x2; kws...) - @test x1 == x2 + return @test x1 == x2 end test_same() -test_same(replace=true) -test_same(replace=false) -test_same(replace=true, ordered=true) -test_same(replace=false, ordered=true) -test_same(replace=true, ordered=false) -test_same(replace=false, ordered=false) +test_same(replace = true) +test_same(replace = false) +test_same(replace = true, ordered = true) +test_same(replace = false, ordered = true) +test_same(replace = true, ordered = false) +test_same(replace = false, ordered = false) @testset "validation of inputs" begin - for f in (sample!, knuths_sample!, fisher_yates_sample!, self_avoid_sample!, - seqsample_a!, seqsample_c!, seqsample_d!) + for f in ( + sample!, knuths_sample!, fisher_yates_sample!, self_avoid_sample!, + seqsample_a!, seqsample_c!, seqsample_d!, + ) x = rand(10) y = rand(10) ox = OffsetArray(x, -4:5) @@ -280,17 +282,17 @@ end T == BigInt && f == unsigned && continue T = f(T) # The type of the second argument should not affect the return type - let samp = sample(T(1):T(10), T(2); replace=false, ordered=false) + let samp = sample(T(1):T(10), T(2); replace = false, ordered = false) @test all(x -> x isa T, samp) @test all(x -> T(1) <= x <= T(10), samp) @test length(samp) == 2 end - let samp = sample(T(1):T(10), 2; replace=false, ordered=false) + let samp = sample(T(1):T(10), 2; replace = false, ordered = false) @test all(x -> x isa T, samp) @test all(x -> T(1) <= x <= T(10), samp) @test length(samp) == 2 end - let samp = sample(1:10, T(2); replace=false, ordered=false) + let samp = sample(1:10, T(2); replace = false, ordered = false) @test all(x -> x isa Int, samp) @test all(x -> 1 <= x <= 10, samp) @test length(samp) == 2 @@ -315,9 +317,9 @@ end @testset "issue #950" begin # Sampling with unit weights behaves the same as sampling without weights Random.seed!(123) - xs = sample(1:100, uweights(100), 10; replace=false) + xs = sample(1:100, uweights(100), 10; replace = false) Random.seed!(123) - @test xs == sample(1:100, 10; replace=false) + @test xs == sample(1:100, 10; replace = false) Random.seed!(123) x = sample(uweights(100)) @@ -330,14 +332,14 @@ end @test xs == direct_sample!(1:100, Vector{Int}(undef, 10)) # Errors - @test_throws DimensionMismatch("Number of samples (100) and sample weights (99) must be equal.") sample(1:100, uweights(99), 10; replace=false) + @test_throws DimensionMismatch("Number of samples (100) and sample weights (99) must be equal.") sample(1:100, uweights(99), 10; replace = false) @test_throws DimensionMismatch("Number of samples (80) and sample weights (53) must be equal.") direct_sample!(1:80, uweights(53), Vector{Int}(undef, 10)) # Custom unit weights don't error and behave the same as sampling with `Weights` Random.seed!(123) - xs = sample(1:100, YAUnitWeights(100), 10; replace=false) + xs = sample(1:100, YAUnitWeights(100), 10; replace = false) Random.seed!(123) - @test xs == sample(1:100, weights(ones(Int, 100)), 10; replace=false) + @test xs == sample(1:100, weights(ones(Int, 100)), 10; replace = false) for f in (StatsBase.efraimidis_a_wsample_norep!, StatsBase.efraimidis_ares_wsample_norep!, StatsBase.efraimidis_aexpj_wsample_norep!) Random.seed!(123) xs = f(1:100, YAUnitWeights(100), Vector{Int}(undef, 10)) diff --git a/test/scalarstats.jl b/test/scalarstats.jl index eec64ad74..e472f70c4 100644 --- a/test/scalarstats.jl +++ b/test/scalarstats.jl @@ -7,28 +7,28 @@ using Statistics ## geomean -@test geomean([1, 2, 3]) ≈ cbrt(6.0) -@test geomean(1:3) ≈ cbrt(6.0) -@test geomean([2, 8]) ≈ 4.0 -@test geomean([4, 1, 1/32]) ≈ 0.5 +@test geomean([1, 2, 3]) ≈ cbrt(6.0) +@test geomean(1:3) ≈ cbrt(6.0) +@test geomean([2, 8]) ≈ 4.0 +@test geomean([4, 1, 1 / 32]) ≈ 0.5 @test geomean([1, 0, 2]) == 0.0 ## harmmean -@test harmmean([1, 2, 3]) ≈ 3 / (1 + 1/2 + 1/3) -@test harmmean(1:3) ≈ 3 / (1 + 1/2 + 1/3) +@test harmmean([1, 2, 3]) ≈ 3 / (1 + 1 / 2 + 1 / 3) +@test harmmean(1:3) ≈ 3 / (1 + 1 / 2 + 1 / 3) @test harmmean([1, 2, 4]) ≈ 12 / 7 ## genmean -@test genmean([1,1,2,3], 1) ≈ 7/4 -@test genmean([1,4,2], -1) ≈ 12/7 -@test genmean([1,1,2,3], 0) ≈ (6.0)^(1/4) -@test genmean([1.2,-0.5,0], 2) ≈ sqrt(169/300) -@test genmean([16/9,0.25,1.0], 1.5) ≈ (755/648)^(2/3) +@test genmean([1, 1, 2, 3], 1) ≈ 7 / 4 +@test genmean([1, 4, 2], -1) ≈ 12 / 7 +@test genmean([1, 1, 2, 3], 0) ≈ (6.0)^(1 / 4) +@test genmean([1.2, -0.5, 0], 2) ≈ sqrt(169 / 300) +@test genmean([16 / 9, 0.25, 1.0], 1.5) ≈ (755 / 648)^(2 / 3) # Test numerical stability for `p` close to 0 (genmean should be close to geometric mean). -@test isapprox(genmean([1,1,2,3], -1e-8), (6.0)^(1/4), atol=1e-8) +@test isapprox(genmean([1, 1, 2, 3], -1.0e-8), (6.0)^(1 / 4), atol = 1.0e-8) # Test numerical stability for large `p` (genmean should be close to max). -@test isapprox(genmean([0.98,1.02], 1e4), 1.02, atol=1e-4) +@test isapprox(genmean([0.98, 1.02], 1.0e4), 1.02, atol = 1.0e-4) ## mode & modes @@ -51,8 +51,8 @@ wv = weights([0.1:0.1:0.7; 0.1]) @test mode(d2) == 'c' @test mode(d1, wv) == 5 @test mode(d2, wv) == 'e' -@test sort(modes(d1[1:end-1], weights(ones(7)))) == [3, 5] -@test sort(modes(d1, weights([.9, .1, .1, .1, .9, .1, .1, .1]))) == [1, 4] +@test sort(modes(d1[1:(end - 1)], weights(ones(7)))) == [3, 5] +@test sort(modes(d1, weights([0.9, 0.1, 0.1, 0.1, 0.9, 0.1, 0.1, 0.1]))) == [1, 4] @test_throws ArgumentError mode(Int[]) @test_throws ArgumentError modes(Int[]) @@ -68,23 +68,23 @@ wv = weights([0.1:0.1:0.7; 0.1]) @test zscore([-3:3;], 1.5, 0.5) == [-9.0:2.0:3.0;] a = [3 4 5 6; 7 8 1 2; 6 9 3 0] -z1 = [4. 6. 8. 10.; 5. 6. -1. 0.; 1.5 3.0 0.0 -1.5] -z2 = [8. 2. 3. 1.; 24. 10. -1. -1.; 20. 12. 1. -2.] +z1 = [4.0 6.0 8.0 10.0; 5.0 6.0 -1.0 0.0; 1.5 3.0 0.0 -1.5] +z2 = [8.0 2.0 3.0 1.0; 24.0 10.0 -1.0 -1.0; 20.0 12.0 1.0 -2.0] -@test zscore(a, [1, 2, 3], [0.5, 1.0, 2.0]) ≈ z1 +@test zscore(a, [1, 2, 3], [0.5, 1.0, 2.0]) ≈ z1 @test zscore(a, [1 3 2 4], [0.25 0.5 1.0 2.0]) ≈ z2 @test zscore!(collect(-3.0:3.0), 1.5, 0.5) == [-9.0:2.0:3.0;] -@test zscore!(float(a), [1, 2, 3], [0.5, 1.0, 2.0]) ≈ z1 +@test zscore!(float(a), [1, 2, 3], [0.5, 1.0, 2.0]) ≈ z1 @test zscore!(float(a), [1 3 2 4], [0.25 0.5 1.0 2.0]) ≈ z2 @test zscore!(zeros(7), [-3:3;], 1.5, 0.5) == [-9.0:2.0:3.0;] -@test zscore!(zeros(size(a)), a, [1, 2, 3], [0.5, 1.0, 2.0]) ≈ z1 +@test zscore!(zeros(size(a)), a, [1, 2, 3], [0.5, 1.0, 2.0]) ≈ z1 @test zscore!(zeros(size(a)), a, [1 3 2 4], [0.25 0.5 1.0 2.0]) ≈ z2 -@test zscore(a) ≈ zscore(a, mean(a), std(a)) -@test zscore(a, 1) ≈ zscore(a, mean(a, dims=1), std(a, dims=1)) -@test zscore(a, 2) ≈ zscore(a, mean(a, dims=2), std(a, dims=2)) +@test zscore(a) ≈ zscore(a, mean(a), std(a)) +@test zscore(a, 1) ≈ zscore(a, mean(a, dims = 1), std(a, dims = 1)) +@test zscore(a, 2) ≈ zscore(a, mean(a, dims = 2), std(a, dims = 2)) ###### quantile & friends @@ -93,101 +93,103 @@ z2 = [8. 2. 3. 1.; 24. 10. -1. -1.; 20. 12. 1. -2.] @test nquantile(1:5, 4) ≈ [1:5;] @test nquantile(skipmissing([missing, 2, 5, missing]), 2) ≈ [2.0, 3.5, 5.0] -@test percentile([1:5;], 25) ≈ 2.0 +@test percentile([1:5;], 25) ≈ 2.0 @test percentile([1:5;], [25, 50, 75]) ≈ [2.0, 3.0, 4.0] @test percentile(skipmissing([missing, 2, 5, missing]), 25) ≈ 2.75 @test percentile(skipmissing([missing, 2, 5, missing]), [25, 50, 75]) ≈ [2.75, 3.5, 4.25] @testset "quantilerank and percentilerank" begin - @testset "value as number and array" begin - @testset ":inc and :exc" begin - v1 = [1, 1, 1, 2, 3, 4, 8, 11, 12, 13] - v2 = [1, 2, 3, 6, 6, 6, 7, 8, 9] - v3 = [1, 2, 4, 3, 4] - v4 = [1, 2, 1, 3, 4] - @test quantilerank(v1, 2, method=:inc) == 1/3 - @test quantilerank(v1, 4, method=:inc) == 5/9 - @test quantilerank(v1, 8, method=:inc) == 2/3 - @test quantilerank(v1, 5, method=:inc) == 7/12 - @test quantilerank(v2, 7, method=:exc) == 0.7 - @test quantilerank(v2, 5.43, method=:exc) == 0.381 - @test quantilerank(v3, 4, method=:exc) == 6/9 - @test quantilerank(v3, 4, method=:inc) == 3/4 - @test quantilerank(v4, 1, method=:exc) == 1/6 - @test quantilerank(v4, -100, method=:inc) == 0.0 - @test quantilerank(v4, 100, method=:inc) == 1.0 - @test quantilerank(v4, -100, method=:exc) == 0.0 - @test quantilerank(v4, 100, method=:exc) == 1.0 - @test percentilerank(v1, 2) == 100 * quantilerank(v1, 2) - @test percentilerank(v2, 7, method=:exc) == 100 * quantilerank(v2, 7, method=:exc) - end - @testset ":compete" begin - v = [0, 0, 1, 1, 2, 2, 2, 2, 4, 4] - @test quantilerank(v, 1, method=:compete) == 2/9 - @test quantilerank(v, 2, method=:compete) == 4/9 - @test quantilerank(v, 4, method=:compete) == 8/9 - @test quantilerank(v, -100, method=:compete) == 0.0 - @test quantilerank(v, 100, method=:compete) == 1.0 - end - @testset ":strict, :weak and :tied" begin - v = [7, 8, 2, 1, 3, 4, 5, 4, 6, 9] - for (method, res1, res2) in [(:tied, .4, [.4, .85]), - (:strict, .3, [.3, .8]), - (:weak, .5, [.5, .9])] - @test quantilerank(v, 4, method=method) == res1 - end - end - end - @testset "errors" begin - v1 = [1, 2, 3, 5, 6, missing, 8] - v2 = [missing, missing] - v3 = [1, 2, 3, 5, 6, NaN, 8] - v4 = [1, 2, 3, 3, 4] - for method in (:tied, :strict, :weak) - @test_throws ArgumentError quantilerank(v1, 4, method=method) - @test_throws ArgumentError quantilerank(v2, 4, method=method) - @test_throws ArgumentError quantilerank(v3, 4, method=method) - end - @test_throws ArgumentError quantilerank(v4, 3, method=:wrongargument) - @test_throws ArgumentError quantilerank(v4, NaN) - @test_throws ArgumentError quantilerank(v4, missing) - @test_throws ArgumentError quantilerank([], 3) - @test_throws ArgumentError quantilerank([1], 3) - end - end + @testset "value as number and array" begin + @testset ":inc and :exc" begin + v1 = [1, 1, 1, 2, 3, 4, 8, 11, 12, 13] + v2 = [1, 2, 3, 6, 6, 6, 7, 8, 9] + v3 = [1, 2, 4, 3, 4] + v4 = [1, 2, 1, 3, 4] + @test quantilerank(v1, 2, method = :inc) == 1 / 3 + @test quantilerank(v1, 4, method = :inc) == 5 / 9 + @test quantilerank(v1, 8, method = :inc) == 2 / 3 + @test quantilerank(v1, 5, method = :inc) == 7 / 12 + @test quantilerank(v2, 7, method = :exc) == 0.7 + @test quantilerank(v2, 5.43, method = :exc) == 0.381 + @test quantilerank(v3, 4, method = :exc) == 6 / 9 + @test quantilerank(v3, 4, method = :inc) == 3 / 4 + @test quantilerank(v4, 1, method = :exc) == 1 / 6 + @test quantilerank(v4, -100, method = :inc) == 0.0 + @test quantilerank(v4, 100, method = :inc) == 1.0 + @test quantilerank(v4, -100, method = :exc) == 0.0 + @test quantilerank(v4, 100, method = :exc) == 1.0 + @test percentilerank(v1, 2) == 100 * quantilerank(v1, 2) + @test percentilerank(v2, 7, method = :exc) == 100 * quantilerank(v2, 7, method = :exc) + end + @testset ":compete" begin + v = [0, 0, 1, 1, 2, 2, 2, 2, 4, 4] + @test quantilerank(v, 1, method = :compete) == 2 / 9 + @test quantilerank(v, 2, method = :compete) == 4 / 9 + @test quantilerank(v, 4, method = :compete) == 8 / 9 + @test quantilerank(v, -100, method = :compete) == 0.0 + @test quantilerank(v, 100, method = :compete) == 1.0 + end + @testset ":strict, :weak and :tied" begin + v = [7, 8, 2, 1, 3, 4, 5, 4, 6, 9] + for (method, res1, res2) in [ + (:tied, 0.4, [0.4, 0.85]), + (:strict, 0.3, [0.3, 0.8]), + (:weak, 0.5, [0.5, 0.9]), + ] + @test quantilerank(v, 4, method = method) == res1 + end + end + end + @testset "errors" begin + v1 = [1, 2, 3, 5, 6, missing, 8] + v2 = [missing, missing] + v3 = [1, 2, 3, 5, 6, NaN, 8] + v4 = [1, 2, 3, 3, 4] + for method in (:tied, :strict, :weak) + @test_throws ArgumentError quantilerank(v1, 4, method = method) + @test_throws ArgumentError quantilerank(v2, 4, method = method) + @test_throws ArgumentError quantilerank(v3, 4, method = method) + end + @test_throws ArgumentError quantilerank(v4, 3, method = :wrongargument) + @test_throws ArgumentError quantilerank(v4, NaN) + @test_throws ArgumentError quantilerank(v4, missing) + @test_throws ArgumentError quantilerank([], 3) + @test_throws ArgumentError quantilerank([1], 3) + end +end ##### Dispersion @test span([3, 4, 5, 6, 2]) == (2:6) @test span(skipmissing([1, missing, 5, missing])) == 1:5 -@test variation([1:5;]) ≈ 0.527046276694730 -@test variation([1:5;]; corrected=false) ≈ 0.471404520791032 -@test variation(skipmissing([missing; 1:5; missing])) ≈ 0.527046276694730 +@test variation([1:5;]) ≈ 0.52704627669473 +@test variation([1:5;]; corrected = false) ≈ 0.471404520791032 +@test variation(skipmissing([missing; 1:5; missing])) ≈ 0.52704627669473 @test isnan(variation(1)) -@test variation(1; corrected=false) == 0 +@test variation(1; corrected = false) == 0 # Possibly deprecated -@test variation([1:5;],4) ≈ 0.4841229182759271 -@test variation([1:5;],4; corrected=false) ≈ 0.4330127018922193 +@test variation([1:5;], 4) ≈ 0.4841229182759271 +@test variation([1:5;], 4; corrected = false) ≈ 0.4330127018922193 @test @inferred(sem([1:5;])) ≈ 0.707106781186548 @test @inferred(sem(skipmissing([missing; 1:5; missing]))) ≈ 0.707106781186548 -@test @inferred(sem(skipmissing([missing; 1:5; missing]), mean=3.0)) ≈ 0.707106781186548 +@test @inferred(sem(skipmissing([missing; 1:5; missing]), mean = 3.0)) ≈ 0.707106781186548 @test @inferred(sem([1:5;], UnitWeights{Int}(5))) ≈ 0.707106781186548 -@test @inferred(sem([1:5;], UnitWeights{Int}(5); mean=mean(1:5))) ≈ 0.707106781186548 +@test @inferred(sem([1:5;], UnitWeights{Int}(5); mean = mean(1:5))) ≈ 0.707106781186548 @test_throws DimensionMismatch sem(1:5, UnitWeights{Int}(4)) -@test @inferred(sem([1:5;], ProbabilityWeights([1:5;]))) ≈ 0.6166 rtol=.001 +@test @inferred(sem([1:5;], ProbabilityWeights([1:5;]))) ≈ 0.6166 rtol = 0.001 μ = mean(1:5, ProbabilityWeights([1:5;])) -@test @inferred(sem([1:5;], ProbabilityWeights([1:5;]); mean=μ)) ≈ 0.6166 rtol=.001 -@test @inferred(sem([10; 1:5;], ProbabilityWeights([0; 1:5;]); mean=μ)) ≈ 0.6166 rtol=.001 +@test @inferred(sem([1:5;], ProbabilityWeights([1:5;]); mean = μ)) ≈ 0.6166 rtol = 0.001 +@test @inferred(sem([10; 1:5;], ProbabilityWeights([0; 1:5;]); mean = μ)) ≈ 0.6166 rtol = 0.001 x = sort!(vcat([5:-1:i for i in 1:5]...)) μ = mean(x) @test @inferred(sem([1:5;], FrequencyWeights([1:5;]))) ≈ sem(x) -@test @inferred(sem([1:5;], FrequencyWeights([1:5;]); mean=μ)) ≈ sem(x) +@test @inferred(sem([1:5;], FrequencyWeights([1:5;]); mean = μ)) ≈ sem(x) -@inferred sem([1:5f0;]; mean=μ) ≈ sem(x) -@inferred sem([1:5f0;], ProbabilityWeights([1:5;]); mean=μ) -@inferred sem([1:5f0;], FrequencyWeights([1:5;]); mean=μ) +@inferred sem([1:5.0f0;]; mean = μ) ≈ sem(x) +@inferred sem([1:5.0f0;], ProbabilityWeights([1:5;]); mean = μ) +@inferred sem([1:5.0f0;], FrequencyWeights([1:5;]); mean = μ) # Broken: Bug to do with Statistics.jl's implementation of `var` # @inferred sem([1:5f0;], UnitWeights{Int}(5); mean=μ) @@ -195,31 +197,31 @@ x = sort!(vcat([5:-1:i for i in 1:5]...)) @test @inferred(isnan(sem(Int[], FrequencyWeights(Int[])))) @test @inferred(isnan(sem(Int[], ProbabilityWeights(Int[])))) -@test @inferred(isnan(sem(Int[]; mean=0f0))) -@test @inferred(isnan(sem(Int[], FrequencyWeights(Int[]); mean=0f0))) -@test @inferred(isnan(sem(Int[], ProbabilityWeights(Int[]); mean=0f0))) +@test @inferred(isnan(sem(Int[]; mean = 0.0f0))) +@test @inferred(isnan(sem(Int[], FrequencyWeights(Int[]); mean = 0.0f0))) +@test @inferred(isnan(sem(Int[], ProbabilityWeights(Int[]); mean = 0.0f0))) -@test @inferred(isnan(sem(skipmissing(Union{Int,Missing}[missing, missing])))) +@test @inferred(isnan(sem(skipmissing(Union{Int, Missing}[missing, missing])))) @test_throws Exception sem(Any[]) @test_throws Exception sem(skipmissing([missing])) -@test mad(1:5; center=3, normalize=true) ≈ 1.4826022185056018 -@test mad(skipmissing([missing; 1:5; missing]); center=3, normalize=true) ≈ 1.4826022185056018 -@test StatsBase.mad!([1:5;]; center=3, normalize=true) ≈ 1.4826022185056018 -@test mad(1:5, normalize=true) ≈ 1.4826022185056018 -@test mad(1:5, normalize=false) ≈ 1.0 -@test mad(skipmissing([missing; 1:5; missing]), normalize=true) ≈ 1.4826022185056018 -@test mad(skipmissing([missing; 1:5; missing]), normalize=false) ≈ 1.0 -@test StatsBase.mad!([1:5;], normalize=false) ≈ 1.0 -@test mad(1:5, center=3, normalize=false) ≈ 1.0 -@test mad(skipmissing([missing; 1:5; missing]), center=3, normalize=false) ≈ 1.0 -@test StatsBase.mad!([1:5;], center=3, normalize=false) ≈ 1.0 -@test mad((x for x in (1, 2.1)), normalize=false) ≈ 0.55 -@test mad(Any[1, 2.1], normalize=false) ≈ 0.55 -@test mad(Union{Int,Missing}[1, 2], normalize=false) ≈ 0.5 +@test mad(1:5; center = 3, normalize = true) ≈ 1.4826022185056018 +@test mad(skipmissing([missing; 1:5; missing]); center = 3, normalize = true) ≈ 1.4826022185056018 +@test StatsBase.mad!([1:5;]; center = 3, normalize = true) ≈ 1.4826022185056018 +@test mad(1:5, normalize = true) ≈ 1.4826022185056018 +@test mad(1:5, normalize = false) ≈ 1.0 +@test mad(skipmissing([missing; 1:5; missing]), normalize = true) ≈ 1.4826022185056018 +@test mad(skipmissing([missing; 1:5; missing]), normalize = false) ≈ 1.0 +@test StatsBase.mad!([1:5;], normalize = false) ≈ 1.0 +@test mad(1:5, center = 3, normalize = false) ≈ 1.0 +@test mad(skipmissing([missing; 1:5; missing]), center = 3, normalize = false) ≈ 1.0 +@test StatsBase.mad!([1:5;], center = 3, normalize = false) ≈ 1.0 +@test mad((x for x in (1, 2.1)), normalize = false) ≈ 0.55 +@test mad(Any[1, 2.1], normalize = false) ≈ 0.55 +@test mad(Union{Int, Missing}[1, 2], normalize = false) ≈ 0.5 @test_throws ArgumentError mad(Int[], normalize = true) @test mad(Iterators.repeated(4, 10)) == 0 -@test mad(Integer[1,2,3,4]) === mad(1:4) +@test mad(Integer[1, 2, 3, 4]) === mad(1:4) mad_allocs(itr) = @allocations(mad(itr; normalize = false)) let itr = (i for i in 1:10000) mad_allocs(itr) @@ -227,17 +229,19 @@ let itr = (i for i in 1:10000) end # Issue 197 -@test mad(1:2, normalize=true) ≈ 0.7413011092528009 +@test mad(1:2, normalize = true) ≈ 0.7413011092528009 @test iqr(1:5) ≈ 2.0 -nutrient = readdlm(joinpath(@__DIR__, "data", "nutrient.txt"))[:,2:end] -@test @inferred(genvar(nutrient)) ≈ 2.8310418e19 rtol=1e-6 -@test @inferred(totalvar(nutrient)) ≈ 2.83266877e6 rtol=1e-6 +nutrient = readdlm(joinpath(@__DIR__, "data", "nutrient.txt"))[:, 2:end] +@test @inferred(genvar(nutrient)) ≈ 2.8310418e19 rtol = 1.0e-6 +@test @inferred(totalvar(nutrient)) ≈ 2.83266877e6 rtol = 1.0e-6 -X = [1 2 5 - 4 1 6 - 4 0 4] +X = [ + 1 2 5 + 4 1 6 + 4 0 4 +] @test @inferred(genvar(X)) ≈ 0.0 @test @inferred(totalvar(X)) ≈ 5.0 @@ -250,15 +254,15 @@ it = (xᵢ for xᵢ in x) ##### entropy -@test @inferred(entropy([0.5, 0.5])) ≈ 0.6931471805599453 -@test @inferred(entropy([1//2, 1//2])) ≈ 0.6931471805599453 +@test @inferred(entropy([0.5, 0.5])) ≈ 0.6931471805599453 +@test @inferred(entropy([1 // 2, 1 // 2])) ≈ 0.6931471805599453 @test @inferred(entropy([0.5f0, 0.5f0])) isa Float32 @test @inferred(entropy([0.2, 0.3, 0.5])) ≈ 1.0296530140645737 @test iszero(@inferred(entropy([0, 1]))) @test iszero(@inferred(entropy([0.0, 1.0]))) -@test @inferred(entropy([0.5, 0.5], 2)) ≈ 1.0 -@test @inferred(entropy([1//2, 1//2], 2)) ≈ 1.0 +@test @inferred(entropy([0.5, 0.5], 2)) ≈ 1.0 +@test @inferred(entropy([1 // 2, 1 // 2], 2)) ≈ 1.0 @test @inferred(entropy([0.2, 0.3, 0.5], 2)) ≈ 1.4854752972273344 # issue #924 @@ -275,7 +279,7 @@ dist = rand(nindiv) dist /= sum(dist) # Check Shannon entropy against Renyi entropy of order 1 -@test entropy(dist) ≈ renyientropy(dist, 1) +@test entropy(dist) ≈ renyientropy(dist, 1) @test renyientropy(dist, 1) ≈ renyientropy(dist, 1.0) # Check Renyi entropy of order 0 is the natural log of the count of non-zeros @@ -301,18 +305,18 @@ udist = ones(nindiv) / nindiv # And test generalised probability distributions (sum(p) != 1) scale = rand() -@test renyientropy(udist * scale, 0) ≈ renyientropy(udist, 0) - log(scale) -@test renyientropy(udist * scale, 1) ≈ renyientropy(udist, 1) - log(scale) -@test renyientropy(udist * scale, Inf) ≈ renyientropy(udist, Inf) - log(scale) +@test renyientropy(udist * scale, 0) ≈ renyientropy(udist, 0) - log(scale) +@test renyientropy(udist * scale, 1) ≈ renyientropy(udist, 1) - log(scale) +@test renyientropy(udist * scale, Inf) ≈ renyientropy(udist, Inf) - log(scale) @test renyientropy(udist * scale, order) ≈ renyientropy(udist, order) - log(scale) ##### Cross entropy -@test @inferred(crossentropy([0.2, 0.3, 0.5], [0.3, 0.4, 0.3])) ≈ 1.1176681825904018 -@test @inferred(crossentropy([1//5, 3//10, 1//2], [0.3, 0.4, 0.3])) ≈ 1.1176681825904018 -@test @inferred(crossentropy([1//5, 3//10, 1//2], [0.3f0, 0.4f0, 0.3f0])) isa Float32 -@test @inferred(crossentropy([0.2, 0.3, 0.5], [0.3, 0.4, 0.3], 2)) ≈ 1.6124543443825532 -@test @inferred(crossentropy([1//5, 3//10, 1//2], [0.3, 0.4, 0.3], 2)) ≈ 1.6124543443825532 -@test @inferred(crossentropy([1//5, 3//10, 1//2], [0.3f0, 0.4f0, 0.3f0], 2f0)) isa Float32 +@test @inferred(crossentropy([0.2, 0.3, 0.5], [0.3, 0.4, 0.3])) ≈ 1.1176681825904018 +@test @inferred(crossentropy([1 // 5, 3 // 10, 1 // 2], [0.3, 0.4, 0.3])) ≈ 1.1176681825904018 +@test @inferred(crossentropy([1 // 5, 3 // 10, 1 // 2], [0.3f0, 0.4f0, 0.3f0])) isa Float32 +@test @inferred(crossentropy([0.2, 0.3, 0.5], [0.3, 0.4, 0.3], 2)) ≈ 1.6124543443825532 +@test @inferred(crossentropy([1 // 5, 3 // 10, 1 // 2], [0.3, 0.4, 0.3], 2)) ≈ 1.6124543443825532 +@test @inferred(crossentropy([1 // 5, 3 // 10, 1 // 2], [0.3f0, 0.4f0, 0.3f0], 2.0f0)) isa Float32 # deprecated, should throw an `ArgumentError` at some point logpattern = (:warn, "support for empty collections will be removed since they do not represent proper probability distributions") @@ -320,13 +324,13 @@ logpattern = (:warn, "support for empty collections will be removed since they d @test iszero(@test_logs logpattern @inferred(crossentropy(Int[], Int[]))) ##### KL divergence -@test @inferred(kldivergence([0.2, 0.3, 0.5], [0.3, 0.4, 0.3])) ≈ 0.08801516852582819 -@test @inferred(kldivergence([1//5, 3//10, 1//2], [0.3, 0.4, 0.3])) ≈ 0.08801516852582819 -@test @inferred(kldivergence([1//5, 3//10, 1//2], [0.3f0, 0.4f0, 0.3f0])) isa Float32 -@test @inferred(kldivergence([0.2, 0.3, 0.5], [0.3, 0.4, 0.3], 2)) ≈ 0.12697904715521868 -@test @inferred(kldivergence([1//5, 3//10, 1//2], [0.3, 0.4, 0.3], 2)) ≈ 0.12697904715521868 -@test @inferred(kldivergence([1//5, 3//10, 1//2], [0.3f0, 0.4f0, 0.3f0], 2f0)) isa Float32 -@test iszero(@inferred(kldivergence([0, 1], [0f0, 1f0]))) +@test @inferred(kldivergence([0.2, 0.3, 0.5], [0.3, 0.4, 0.3])) ≈ 0.08801516852582819 +@test @inferred(kldivergence([1 // 5, 3 // 10, 1 // 2], [0.3, 0.4, 0.3])) ≈ 0.08801516852582819 +@test @inferred(kldivergence([1 // 5, 3 // 10, 1 // 2], [0.3f0, 0.4f0, 0.3f0])) isa Float32 +@test @inferred(kldivergence([0.2, 0.3, 0.5], [0.3, 0.4, 0.3], 2)) ≈ 0.12697904715521868 +@test @inferred(kldivergence([1 // 5, 3 // 10, 1 // 2], [0.3, 0.4, 0.3], 2)) ≈ 0.12697904715521868 +@test @inferred(kldivergence([1 // 5, 3 // 10, 1 // 2], [0.3f0, 0.4f0, 0.3f0], 2.0f0)) isa Float32 +@test iszero(@inferred(kldivergence([0, 1], [0.0f0, 1.0f0]))) # deprecated, should throw an `ArgumentError` at some point logpattern = (:warn, "support for empty collections will be removed since they do not represent proper probability distributions") @@ -339,42 +343,42 @@ s = summarystats(1:5) @test isa(s, StatsBase.SummaryStats) @test s.min == 1.0 @test s.max == 5.0 -@test s.nobs == 5 -@test s.nmiss == 0 -@test s.mean ≈ 3.0 +@test s.nobs == 5 +@test s.nmiss == 0 +@test s.mean ≈ 3.0 @test s.median ≈ 3.0 -@test s.q25 ≈ 2.0 -@test s.q75 ≈ 4.0 -@test s.sd ≈ 1.5811388300841898 +@test s.q25 ≈ 2.0 +@test s.q75 ≈ 4.0 +@test s.sd ≈ 1.5811388300841898 # Issue #631 s = summarystats([-2, -1, 0, 1, 2, missing]) @test isa(s, StatsBase.SummaryStats) @test s.min == -2.0 @test s.max == 2.0 -@test s.nobs == 6 -@test s.nmiss == 1 -@test s.mean ≈ 0.0 +@test s.nobs == 6 +@test s.nmiss == 1 +@test s.mean ≈ 0.0 @test s.median ≈ 0.0 -@test s.q25 ≈ -1.0 -@test s.q75 ≈ +1.0 -@test s.sd ≈ 1.5811388300841898 +@test s.q25 ≈ -1.0 +@test s.q75 ≈ +1.0 +@test s.sd ≈ 1.5811388300841898 # Issue #631 s = summarystats(zeros(10)) @test isa(s, StatsBase.SummaryStats) @test s.min == 0.0 @test s.max == 0.0 -@test s.nobs == 10 -@test s.nmiss == 0 -@test s.mean ≈ 0.0 +@test s.nobs == 10 +@test s.nmiss == 0 +@test s.mean ≈ 0.0 @test s.median ≈ 0.0 -@test s.q25 ≈ 0.0 -@test s.q75 ≈ 0.0 -@test s.sd ≈ 0.0 +@test s.q25 ≈ 0.0 +@test s.q75 ≈ 0.0 +@test s.sd ≈ 0.0 # Issue #631 -s = summarystats(Union{Float64,Missing}[missing, missing]) +s = summarystats(Union{Float64, Missing}[missing, missing]) @test isa(s, StatsBase.SummaryStats) @test s.nobs == 2 @test s.nmiss == 2 diff --git a/test/signalcorr.jl b/test/signalcorr.jl index bfbe90fed..614879005 100644 --- a/test/signalcorr.jl +++ b/test/signalcorr.jl @@ -9,16 +9,18 @@ using Test # random data for testing -x = [-2.133252557240862 -.7445937365828654; - .1775816414485478 -.5834801838041446; - -.6264517920318317 -.68444205333293; - -.8809042583216906 .9071671734302398; - .09251017186697393 -1.0404476733379926; - -.9271887119115569 -.620728578941385; - 3.355819743178915 -.8325051361909978; - -.2834039258495755 -.22394811874731657; - .5354280026977677 .7481337671592626; - .39182285417742585 .3085762550821047] +x = [ + -2.133252557240862 -0.7445937365828654; + 0.1775816414485478 -0.5834801838041446; + -0.6264517920318317 -0.68444205333293; + -0.8809042583216906 0.9071671734302398; + 0.09251017186697393 -1.0404476733379926; + -0.9271887119115569 -0.620728578941385; + 3.355819743178915 -0.8325051361909978; + -0.2834039258495755 -0.22394811874731657; + 0.5354280026977677 0.7481337671592626; + 0.39182285417742585 0.3085762550821047 +] x1 = view(x, :, 1) x2 = view(x, :, 2) @@ -28,53 +30,59 @@ realx2 = convert(AbstractVector{Real}, x2) # autocov & autocorr -@test autocov([1:5;]) ≈ [2.0, 0.8, -0.2, -0.8, -0.8] +@test autocov([1:5;]) ≈ [2.0, 0.8, -0.2, -0.8, -0.8] @test autocor([1, 2, 3, 4, 5]) ≈ [1.0, 0.4, -0.1, -0.4, -0.4] -racovx1 = [1.839214242630635709475, - -0.406784553146903871124, - 0.421772254824993531042, - 0.035874943792884653182, - -0.255679775928512320604, - 0.231154400105831353551, - -0.787016960267425180753, - 0.039909287349160660341, - -0.110149697877911914579, - -0.088687020167434751916] +racovx1 = [ + 1.839214242630635709475, + -0.406784553146903871124, + 0.421772254824993531042, + 0.035874943792884653182, + -0.255679775928512320604, + 0.231154400105831353551, + -0.787016960267425180753, + 0.039909287349160660341, + -0.110149697877911914579, + -0.088687020167434751916, +] @test autocov(x1) ≈ racovx1 @test autocov(realx1) ≈ racovx1 -@test autocov(x) ≈ [autocov(x1) autocov(x2)] -@test autocov(realx) ≈ [autocov(realx1) autocov(realx2)] - -racorx1 = [0.999999999999999888978, - -0.221173011668873431557, - 0.229321981664153962122, - 0.019505581764945757045, - -0.139015765538446717242, - 0.125681062460244019618, - -0.427909344123907742219, - 0.021699096507690283225, - -0.059889541590524189574, - -0.048220059475281865091] +@test autocov(x) ≈ [autocov(x1) autocov(x2)] +@test autocov(realx) ≈ [autocov(realx1) autocov(realx2)] + +racorx1 = [ + 0.999999999999999888978, + -0.221173011668873431557, + 0.229321981664153962122, + 0.019505581764945757045, + -0.139015765538446717242, + 0.125681062460244019618, + -0.427909344123907742219, + 0.021699096507690283225, + -0.059889541590524189574, + -0.048220059475281865091, +] @test autocor(x1) ≈ racorx1 @test autocor(realx1) ≈ racorx1 -@test autocor(x) ≈ [autocor(x1) autocor(x2)] -@test autocor(realx) ≈ [autocor(realx1) autocor(realx2)] +@test autocor(x) ≈ [autocor(x1) autocor(x2)] +@test autocor(realx) ≈ [autocor(realx1) autocor(realx2)] # crosscov & crosscor -rcov0 = [0.320000000000000006661, - -0.319999999999999951150, - 0.080000000000000029421, - -0.479999999999999982236, - 0.000000000000000000000, - 0.479999999999999982236, - -0.080000000000000029421, - 0.319999999999999951150, - -0.320000000000000006661] +rcov0 = [ + 0.320000000000000006661, + -0.31999999999999995115, + 0.080000000000000029421, + -0.479999999999999982236, + 0.0, + 0.479999999999999982236, + -0.080000000000000029421, + 0.31999999999999995115, + -0.320000000000000006661, +] @test crosscov([1, 2, 3, 4, 5], [1, -1, 1, -1, 1]) ≈ rcov0 @test crosscov([1:5;], [1:5;]) ≈ [-0.8, -0.8, -0.2, 0.8, 2.0, 0.8, -0.2, -0.8, -0.8] @@ -85,27 +93,29 @@ c21 = crosscov(x2, x1) c22 = crosscov(x2, x2) @test crosscov(realx1, realx2) ≈ c12 -@test crosscov(x, x1) ≈ [c11 c21] +@test crosscov(x, x1) ≈ [c11 c21] @test crosscov(realx, realx1) ≈ [c11 c21] -@test crosscov(x1, x) ≈ [c11 c12] -@test crosscov(realx1, realx) ≈ [c11 c12] -@test crosscov(x, x) ≈ cat([c11 c21], [c12 c22], dims=3) -@test crosscov(realx, realx) ≈ cat([c11 c21], [c12 c22], dims=3) +@test crosscov(x1, x) ≈ [c11 c12] +@test crosscov(realx1, realx) ≈ [c11 c12] +@test crosscov(x, x) ≈ cat([c11 c21], [c12 c22], dims = 3) +@test crosscov(realx, realx) ≈ cat([c11 c21], [c12 c22], dims = 3) # issue #805: avoid converting one input to the other's eltype @test crosscov([34566.5345, 3466.4566], Float16[1, 10]) ≈ crosscov(Float16[1, 10], [34566.5345, 3466.4566]) ≈ crosscov([34566.5345, 3466.4566], Float16[1, 10]) -rcor0 = [0.230940107675850, - -0.230940107675850, - 0.057735026918963, - -0.346410161513775, - 0.000000000000000, - 0.346410161513775, - -0.057735026918963, - 0.230940107675850, - -0.230940107675850] +rcor0 = [ + 0.23094010767585, + -0.23094010767585, + 0.057735026918963, + -0.346410161513775, + 0.0, + 0.346410161513775, + -0.057735026918963, + 0.23094010767585, + -0.23094010767585, +] @test crosscor([1, 2, 3, 4, 5], [1, -1, 1, -1, 1]) ≈ rcor0 @test crosscor([1:5;], [1:5;]) ≈ [-0.4, -0.4, -0.1, 0.4, 1.0, 0.4, -0.1, -0.4, -0.4] @@ -116,12 +126,12 @@ c21 = crosscor(x2, x1) c22 = crosscor(x2, x2) @test crosscor(realx1, realx2) ≈ c12 -@test crosscor(x, x1) ≈ [c11 c21] +@test crosscor(x, x1) ≈ [c11 c21] @test crosscor(realx, realx1) ≈ [c11 c21] -@test crosscor(x1, x) ≈ [c11 c12] -@test crosscor(realx1, realx) ≈ [c11 c12] -@test crosscor(x, x) ≈ cat([c11 c21], [c12 c22], dims=3) -@test crosscor(realx, realx) ≈ cat([c11 c21], [c12 c22], dims=3) +@test crosscor(x1, x) ≈ [c11 c12] +@test crosscor(realx1, realx) ≈ [c11 c12] +@test crosscor(x, x) ≈ cat([c11 c21], [c12 c22], dims = 3) +@test crosscor(realx, realx) ≈ cat([c11 c21], [c12 c22], dims = 3) # issue #805: avoid converting one input to the other's eltype @test crosscor([34566.5345, 3466.4566], Float16[1, 10]) ≈ @@ -131,16 +141,20 @@ c22 = crosscor(x2, x2) ## pacf -rpacfr = [-0.218158122381419, - 0.195015316828711, - 0.144315804606139, - -0.199791229449779] +rpacfr = [ + -0.218158122381419, + 0.195015316828711, + 0.144315804606139, + -0.199791229449779, +] -@test pacf(x[:,1], 1:4) ≈ rpacfr +@test pacf(x[:, 1], 1:4) ≈ rpacfr -rpacfy = [-0.221173011668873, - 0.189683314308021, - 0.111857020733719, - -0.175020669835420] +rpacfy = [ + -0.221173011668873, + 0.189683314308021, + 0.111857020733719, + -0.17502066983542, +] -@test pacf(x[:,1], 1:4, method=:yulewalker) ≈ rpacfy +@test pacf(x[:, 1], 1:4, method = :yulewalker) ≈ rpacfy diff --git a/test/statmodels.jl b/test/statmodels.jl index 70d864bec..1b0deed2d 100644 --- a/test/statmodels.jl +++ b/test/statmodels.jl @@ -7,49 +7,55 @@ v2 = ["Good", "Great", "Bad"] v3 = [1, 56, 2] v4 = [-12.56, 0.1326, 2.68e-16] v5 = [0.12, 0.3467, 1.345e-16] -ct = CoefTable(Any[v1, v2, v3, v4, v5], - ["Estimate", "Comments", "df", "t", "p"], - ["x1", "x2", "x3"], 5, 4) -ct_noname = CoefTable(Any[v1, v2, v3, v4, v5], - ["Estimate", "Comments", "df", "t", "p"], - [], 5, 4) +ct = CoefTable( + Any[v1, v2, v3, v4, v5], + ["Estimate", "Comments", "df", "t", "p"], + ["x1", "x2", "x3"], 5, 4 +) +ct_noname = CoefTable( + Any[v1, v2, v3, v4, v5], + ["Estimate", "Comments", "df", "t", "p"], + [], 5, 4 +) @test sprint(show, "text/plain", ct) == """ -─────────────────────────────────────────────── - Estimate Comments df t p -─────────────────────────────────────────────── -x1 1.45666 Good 1 -12.56 0.1200 -x2 -23.14 Great 56 0.13 0.3467 -x3 1.56734e-13 Bad 2 0.00 <1e-15 -───────────────────────────────────────────────""" + ─────────────────────────────────────────────── + Estimate Comments df t p + ─────────────────────────────────────────────── + x1 1.45666 Good 1 -12.56 0.1200 + x2 -23.14 Great 56 0.13 0.3467 + x3 1.56734e-13 Bad 2 0.00 <1e-15 + ───────────────────────────────────────────────""" @test sprint(show, "text/plain", ct_noname) == """ -──────────────────────────────────────────────── - Estimate Comments df t p -──────────────────────────────────────────────── -[1] 1.45666 Good 1 -12.56 0.1200 -[2] -23.14 Great 56 0.13 0.3467 -[3] 1.56734e-13 Bad 2 0.00 <1e-15 -────────────────────────────────────────────────""" + ──────────────────────────────────────────────── + Estimate Comments df t p + ──────────────────────────────────────────────── + [1] 1.45666 Good 1 -12.56 0.1200 + [2] -23.14 Great 56 0.13 0.3467 + [3] 1.56734e-13 Bad 2 0.00 <1e-15 + ────────────────────────────────────────────────""" @test sprint(show, MIME"text/markdown"(), ct) == """ -| | Estimate | Comments | df | t | p | -|:---|--------------:|---------:|---:|-------:|:-------| -| x1 | 1.45666 | Good | 1 | -12.56 | 0.1200 | -| x2 | -23.14 | Great | 56 | 0.13 | 0.3467 | -| x3 | 1.56734e-13 | Bad | 2 | 0.00 | <1e-15 |""" + | | Estimate | Comments | df | t | p | + |:---|--------------:|---------:|---:|-------:|:-------| + | x1 | 1.45666 | Good | 1 | -12.56 | 0.1200 | + | x2 | -23.14 | Great | 56 | 0.13 | 0.3467 | + | x3 | 1.56734e-13 | Bad | 2 | 0.00 | <1e-15 |""" @test sprint(show, MIME"text/markdown"(), ct_noname) == """ -| | Estimate | Comments | df | t | p | -|:----|--------------:|---------:|---:|-------:|:-------| -| [1] | 1.45666 | Good | 1 | -12.56 | 0.1200 | -| [2] | -23.14 | Great | 56 | 0.13 | 0.3467 | -| [3] | 1.56734e-13 | Bad | 2 | 0.00 | <1e-15 |""" + | | Estimate | Comments | df | t | p | + |:----|--------------:|---------:|---:|-------:|:-------| + | [1] | 1.45666 | Good | 1 | -12.56 | 0.1200 | + | [2] | -23.14 | Great | 56 | 0.13 | 0.3467 | + | [3] | 1.56734e-13 | Bad | 2 | 0.00 | <1e-15 |""" @test length(ct) === 3 @test eltype(ct) == - NamedTuple{(:Name, :Estimate, :Comments, :df, :t, :p), - Tuple{String,Float64,String,Int,Float64,Float64}} + NamedTuple{ + (:Name, :Estimate, :Comments, :df, :t, :p), + Tuple{String, Float64, String, Int, Float64, Float64}, +} @test collect(ct) == [ (Name = "x1", Estimate = 1.45666, Comments = "Good", df = 1, t = -12.56, p = 0.12) (Name = "x2", Estimate = -23.14, Comments = "Great", df = 56, t = 0.1326, p = 0.3467) @@ -57,48 +63,58 @@ x3 1.56734e-13 Bad 2 0.00 <1e-15 ] -m = [0.11258244478647295 0.05664544616214151 0.38181274408522614 0.8197779704008801 - 0.36831406658084287 0.12078054506961555 0.8151038332483567 0.6699313951612162 - 0.3444540231363058 0.17957407667101322 0.2422083248151139 0.4530583319523316] +m = [ + 0.11258244478647295 0.05664544616214151 0.38181274408522614 0.8197779704008801 + 0.36831406658084287 0.12078054506961555 0.8151038332483567 0.6699313951612162 + 0.3444540231363058 0.17957407667101322 0.2422083248151139 0.4530583319523316 +] ct = CoefTable(m, ["Estimate", "Stderror", "df", "p"], [], 4) @test sprint(show, "text/plain", ct) == """ -────────────────────────────────────────── - Estimate Stderror df p -────────────────────────────────────────── -[1] 0.112582 0.0566454 0.381813 0.8198 -[2] 0.368314 0.120781 0.815104 0.6699 -[3] 0.344454 0.179574 0.242208 0.4531 -──────────────────────────────────────────""" + ────────────────────────────────────────── + Estimate Stderror df p + ────────────────────────────────────────── + [1] 0.112582 0.0566454 0.381813 0.8198 + [2] 0.368314 0.120781 0.815104 0.6699 + [3] 0.344454 0.179574 0.242208 0.4531 + ──────────────────────────────────────────""" @test length(ct) === 3 @test eltype(ct) == - NamedTuple{(:Estimate, :Stderror, :df, :p), - Tuple{Float64,Float64,Float64,Float64}} + NamedTuple{ + (:Estimate, :Stderror, :df, :p), + Tuple{Float64, Float64, Float64, Float64}, +} @test collect(ct) == [ - (Estimate = 0.11258244478647295, Stderror = 0.05664544616214151, - df = 0.38181274408522614, p = 0.8197779704008801) - (Estimate = 0.36831406658084287, Stderror = 0.12078054506961555, - df = 0.8151038332483567, p = 0.6699313951612162) - (Estimate = 0.3444540231363058, Stderror = 0.17957407667101322, - df = 0.2422083248151139, p = 0.4530583319523316) + ( + Estimate = 0.11258244478647295, Stderror = 0.05664544616214151, + df = 0.38181274408522614, p = 0.8197779704008801, + ) + ( + Estimate = 0.36831406658084287, Stderror = 0.12078054506961555, + df = 0.8151038332483567, p = 0.6699313951612162, + ) + ( + Estimate = 0.3444540231363058, Stderror = 0.17957407667101322, + df = 0.2422083248151139, p = 0.4530583319523316, + ) ] @test sprint(show, PValue(1.0)) == "1.0000" -@test sprint(show, PValue(1e-1)) == "0.1000" -@test sprint(show, PValue(1e-5)) == "<1e-04" +@test sprint(show, PValue(1.0e-1)) == "0.1000" +@test sprint(show, PValue(1.0e-5)) == "<1e-04" @test sprint(show, PValue(NaN)) == "NaN" @test_throws ErrorException PValue(-0.1) @test_throws ErrorException PValue(1.1) @test sprint(show, TestStat(NaN)) == "NaN" -@test sprint(show, TestStat(1e-1)) == "0.10" -@test sprint(show, TestStat(1e-5)) == "0.00" +@test sprint(show, TestStat(1.0e-1)) == "0.10" +@test sprint(show, TestStat(1.0e-5)) == "0.00" @test sprint(show, TestStat(π)) == "3.14" @testset "Union{PValue, TestStat} is Real" begin - vals = [0.0, Rational(1,3), NaN] + vals = [0.0, Rational(1, 3), NaN] for T in [PValue, TestStat], - f in (==, <, ≤, >, ≥, isless, isequal), - lhs in vals, rhs in vals + f in (==, <, ≤, >, ≥, isless, isequal), + lhs in vals, rhs in vals # make sure that T behaves like a Real, # regardless of whether it's on the LHS, RHS or both @test f(T(lhs), T(rhs)) == f(lhs, rhs) @@ -108,9 +124,9 @@ ct = CoefTable(m, ["Estimate", "Stderror", "df", "p"], [], 4) # the (approximate) equality operators get a bit more attention for T in [PValue, TestStat] - @test T(Rational(1,3)) ≈ T(1/3) - @test Rational(1,3) ≈ T(1/3) atol=0.01 - @test T(Rational(1,3)) isa Real + @test T(Rational(1, 3)) ≈ T(1 / 3) + @test Rational(1, 3) ≈ T(1 / 3) atol = 0.01 + @test T(Rational(1, 3)) isa Real @test T(T(0.05)) === T(0.05) @test hash(T(0.05)) == hash(0.05) @test hash(T(0.05), UInt(42)) == hash(0.05, UInt(42)) @@ -125,7 +141,7 @@ end @test sprint(showerror, ConvergenceException(10, 0.2, 0.1, "Try changing maxIter.")) == "failure to converge after 10 iterations. Last change (0.2) was greater than tolerance (0.1). Try changing maxIter." -err = @test_throws ArgumentError ConvergenceException(10,.1,.2) +err = @test_throws ArgumentError ConvergenceException(10, 0.1, 0.2) @test err.value.msg == "Change must be greater than tol." struct MyStatisticalModel <: StatisticalModel @@ -174,16 +190,18 @@ StatsAPI.modelmatrix(::MyRegressionModel) = [1 2; 3 4] end @testset "StatsAPI model reexports" begin - for f in (fitted, response, responsename, meanresponse, - modelmatrix, crossmodelmatrix, leverage, cooksdistance, residuals, - predict, predict!, dof_residual, coef, coefnames, coeftable, confint, - deviance, islinear, nulldeviance, loglikelihood, nullloglikelihood, - loglikelihood, loglikelihood, score, nobs, dof, mss, rss, - informationmatrix, stderror, vcov, weights, isfitted, fit, fit!, - aic, aicc, bic, r2, r², adjr2, adjr²) + for f in ( + fitted, response, responsename, meanresponse, + modelmatrix, crossmodelmatrix, leverage, cooksdistance, residuals, + predict, predict!, dof_residual, coef, coefnames, coeftable, confint, + deviance, islinear, nulldeviance, loglikelihood, nullloglikelihood, + loglikelihood, loglikelihood, score, nobs, dof, mss, rss, + informationmatrix, stderror, vcov, weights, isfitted, fit, fit!, + aic, aicc, bic, r2, r², adjr2, adjr², + ) @test f isa Function end # Defined but not reexported @test StatsBase.params isa Function @test StatsBase.params! isa Function -end \ No newline at end of file +end diff --git a/test/statquiz.jl b/test/statquiz.jl index 6e91453a9..3c43a2b11 100644 --- a/test/statquiz.jl +++ b/test/statquiz.jl @@ -9,40 +9,42 @@ using Printf testeps = sqrt(eps()) -nasty = DataFrame( label = ["One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine"], - x = collect(1.:9), - zero = fill(0.0,9), - miss = fill(NA, 9), - big = 99999990.0 + collect(1:9), - little = (99999990.0 + collect(1:9))/10^8, - huge = collect(1.:9)*1e12, - tiny = collect(1.:9)*1e-12, - round = collect(0.5:8.5)) +nasty = DataFrame( + label = ["One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine"], + x = collect(1.0:9), + zero = fill(0.0, 9), + miss = fill(NA, 9), + big = 99999990.0 + collect(1:9), + little = (99999990.0 + collect(1:9)) / 10^8, + huge = collect(1.0:9) * 1.0e12, + tiny = collect(1.0:9) * 1.0e-12, + round = collect(0.5:8.5) +) println(nasty) println("\nII Real Numbers:\nII A") print("Test rounding: ") -@test [@sprintf("%1.0f", x) for x in nasty[:round]] == ["1","2","3","4","5","6","7","8","9"] +@test [@sprintf("%1.0f", x) for x in nasty[:round]] == ["1", "2", "3", "4", "5", "6", "7", "8", "9"] println("OK") print("Test math: ") -@test round(Int, 2.6*7 - 0.2) == 18 -@test 2 - round(Int, exp(log(sqrt(2)*sqrt(2)))) == 0 -@test round(Int, 3 - exp(log(sqrt(2)*sqrt(2)))) == 1 +@test round(Int, 2.6 * 7 - 0.2) == 18 +@test 2 - round(Int, exp(log(sqrt(2) * sqrt(2)))) == 0 +@test round(Int, 3 - exp(log(sqrt(2) * sqrt(2)))) == 1 println("OK") print("Test means: ") for vars in names(nasty)[2:end] if vars == :miss - @test isna(mean(nasty[vars])) + @test isna(mean(nasty[vars])) else - @test mean(nasty[vars]) ≈ nasty[vars][5] + @test mean(nasty[vars]) ≈ nasty[vars][5] end end println("OK") print("Test standard deviation: ") for vars in names(nasty)[[2;5:9]] -# @test (@sprintf("%.9e", std(nasty[vars])))[1:10] == "2.73861278" + # @test (@sprintf("%.9e", std(nasty[vars])))[1:10] == "2.73861278" @test repr(std(nasty[vars]))[1:10] == "2.73861278" end println("OK") @@ -52,8 +54,8 @@ println("\nII D") print("Test correlation: ") cn = names(nasty)[[2;5:9]] for i in 1:5 - for j = i+1:6 - @test cor(nasty[cn[i]], nasty[cn[j]]) ≈ 1 + for j in (i + 1):6 + @test cor(nasty[cn[i]], nasty[cn[j]]) ≈ 1 end end println("OK") @@ -61,8 +63,8 @@ println("OK") print("Test spearman correlation: ") cn = names(nasty)[[2;5:9]] for i in 1:5 - for j = i+1:6 - @test corspearman(nasty[cn[i]], nasty[cn[j]]) ≈ 1 + for j in (i + 1):6 + @test corspearman(nasty[cn[i]], nasty[cn[j]]) ≈ 1 end end println("OK") @@ -75,26 +77,26 @@ ctable = coeftable(lm(big ~ x, nasty)) @test Vector{Float64}(ctable.cols[1]) ≈ [99999990, 1] @test sprint(show, ctable) == """\ - Estimate Std.Error t value Pr(>|t|) -(Intercept) 1.0e8 0.0 Inf <1e-99 -x 1.0 0.0 Inf <1e-99 -""" + Estimate Std.Error t value Pr(>|t|) + (Intercept) 1.0e8 0.0 Inf <1e-99 + x 1.0 0.0 Inf <1e-99 + """ println("OK") println("\nIV Regression:\nIV A") nasty[:x1] = nasty[:x] -nasty[:x2] = nasty[:x].^2 -nasty[:x3] = nasty[:x].^3 -nasty[:x4] = nasty[:x].^4 -nasty[:x5] = nasty[:x].^5 -nasty[:x6] = nasty[:x].^6 -nasty[:x7] = nasty[:x].^7 -nasty[:x8] = nasty[:x].^8 -nasty[:x9] = nasty[:x].^9 +nasty[:x2] = nasty[:x] .^ 2 +nasty[:x3] = nasty[:x] .^ 3 +nasty[:x4] = nasty[:x] .^ 4 +nasty[:x5] = nasty[:x] .^ 5 +nasty[:x6] = nasty[:x] .^ 6 +nasty[:x7] = nasty[:x] .^ 7 +nasty[:x8] = nasty[:x] .^ 8 +nasty[:x9] = nasty[:x] .^ 9 ## Is it intended that the least squares problem be overdetermined in the lm fit? ## n = 9 and p = 10 because of the implicit intercept. -lm(x1~x2+x3+x4+x5+x6+x7+x8+x9, nasty) -@test coef(lm(x~x, nasty)) ≈ [0,1] +lm(x1 ~ x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9, nasty) +@test coef(lm(x ~ x, nasty)) ≈ [0, 1] println("OK") diff --git a/test/transformations.jl b/test/transformations.jl index 7d8e2b0a9..357343270 100644 --- a/test/transformations.jl +++ b/test/transformations.jl @@ -8,7 +8,7 @@ using Test X = rand(5, 8) X_ = copy(X) - t = fit(ZScoreTransform, X, dims=1, center=false, scale=false) + t = fit(ZScoreTransform, X, dims = 1, center = false, scale = false) Y = transform(t, X) @test isa(t, AbstractDataTransform) @test isempty(t.mean) @@ -21,11 +21,11 @@ using Test @test Y ≈ X_ X = copy(X_) - t = fit(ZScoreTransform, X, dims=1, center=false) + t = fit(ZScoreTransform, X, dims = 1, center = false) Y = transform(t, X) @test isempty(t.mean) @test length(t.scale) == 8 - @test Y ≈ X ./ std(X, dims=1) + @test Y ≈ X ./ std(X, dims = 1) @test reconstruct(t, Y) ≈ X @test transform!(t, X) === X @test isequal(X, Y) @@ -33,11 +33,11 @@ using Test @test Y ≈ X_ X = copy(X_) - t = fit(ZScoreTransform, X, dims=1, scale=false) + t = fit(ZScoreTransform, X, dims = 1, scale = false) Y = transform(t, X) @test length(t.mean) == 8 @test isempty(t.scale) - @test Y ≈ X .- mean(X, dims=1) + @test Y ≈ X .- mean(X, dims = 1) @test reconstruct(t, Y) ≈ X @test transform!(t, X) === X @test isequal(X, Y) @@ -45,37 +45,37 @@ using Test @test Y ≈ X_ X = copy(X_) - t = fit(ZScoreTransform, X, dims=1) + t = fit(ZScoreTransform, X, dims = 1) Y = transform(t, X) @test length(t.mean) == 8 @test length(t.scale) == 8 - @test Y ≈ (X .- mean(X, dims=1)) ./ std(X, dims=1) + @test Y ≈ (X .- mean(X, dims = 1)) ./ std(X, dims = 1) @test reconstruct(t, Y) ≈ X - @test Y ≈ standardize(ZScoreTransform, X, dims=1) + @test Y ≈ standardize(ZScoreTransform, X, dims = 1) @test transform!(t, X) === X @test isequal(X, Y) @test reconstruct!(t, Y) === Y @test Y ≈ X_ X = copy(X_) - t = fit(ZScoreTransform, X, dims=2) + t = fit(ZScoreTransform, X, dims = 2) Y = transform(t, X) @test length(t.mean) == 5 @test length(t.scale) == 5 - @test Y ≈ (X .- mean(X, dims=2)) ./ std(X, dims=2) + @test Y ≈ (X .- mean(X, dims = 2)) ./ std(X, dims = 2) @test reconstruct(t, Y) ≈ X - @test Y ≈ standardize(ZScoreTransform, X, dims=2) + @test Y ≈ standardize(ZScoreTransform, X, dims = 2) @test transform!(t, X) === X @test isequal(X, Y) @test reconstruct!(t, Y) === Y @test Y ≈ X_ X = copy(X_) - t = fit(UnitRangeTransform, X, dims=1, unit=false) + t = fit(UnitRangeTransform, X, dims = 1, unit = false) Y = transform(t, X) @test length(t.min) == 8 @test length(t.scale) == 8 - @test Y ≈ X ./ (maximum(X, dims=1) .- minimum(X, dims=1)) + @test Y ≈ X ./ (maximum(X, dims = 1) .- minimum(X, dims = 1)) @test reconstruct(t, Y) ≈ X @test transform!(t, X) === X @test isequal(X, Y) @@ -83,26 +83,26 @@ using Test @test Y ≈ X_ X = copy(X_) - t = fit(UnitRangeTransform, X, dims=1) + t = fit(UnitRangeTransform, X, dims = 1) Y = transform(t, X) @test isa(t, AbstractDataTransform) @test length(t.min) == 8 @test length(t.scale) == 8 - @test Y ≈ (X .- minimum(X, dims=1)) ./ (maximum(X, dims=1) .- minimum(X, dims=1)) + @test Y ≈ (X .- minimum(X, dims = 1)) ./ (maximum(X, dims = 1) .- minimum(X, dims = 1)) @test reconstruct(t, Y) ≈ X - @test Y ≈ standardize(UnitRangeTransform, X, dims=1) + @test Y ≈ standardize(UnitRangeTransform, X, dims = 1) @test transform!(t, X) === X @test isequal(X, Y) @test reconstruct!(t, Y) === Y @test Y ≈ X_ X = copy(X_) - t = fit(UnitRangeTransform, X, dims=2) + t = fit(UnitRangeTransform, X, dims = 2) Y = transform(t, X) @test isa(t, AbstractDataTransform) @test length(t.min) == 5 @test length(t.scale) == 5 - @test Y ≈ (X .- minimum(X, dims=2)) ./ (maximum(X, dims=2) .- minimum(X, dims=2)) + @test Y ≈ (X .- minimum(X, dims = 2)) ./ (maximum(X, dims = 2) .- minimum(X, dims = 2)) @test reconstruct(t, Y) ≈ X @test transform!(t, X) === X @test isequal(X, Y) @@ -113,7 +113,7 @@ using Test X = rand(10) X_ = copy(X) - t = fit(ZScoreTransform, X, dims=1, center=false, scale=false) + t = fit(ZScoreTransform, X, dims = 1, center = false, scale = false) Y = transform(t, X) @test transform(t, X) ≈ Y @test reconstruct(t, Y) ≈ X @@ -123,9 +123,9 @@ using Test @test Y ≈ X_ X = copy(X_) - t = fit(ZScoreTransform, X, dims=1, center=false) + t = fit(ZScoreTransform, X, dims = 1, center = false) Y = transform(t, X) - @test Y ≈ X ./ std(X, dims=1) + @test Y ≈ X ./ std(X, dims = 1) @test transform(t, X) ≈ Y @test reconstruct(t, Y) ≈ X @test transform!(t, X) === X @@ -134,9 +134,9 @@ using Test @test Y ≈ X_ X = copy(X_) - t = fit(ZScoreTransform, X, dims=1, scale=false) + t = fit(ZScoreTransform, X, dims = 1, scale = false) Y = transform(t, X) - @test Y ≈ X .- mean(X, dims=1) + @test Y ≈ X .- mean(X, dims = 1) @test transform(t, X) ≈ Y @test reconstruct(t, Y) ≈ X @test transform!(t, X) === X @@ -145,21 +145,21 @@ using Test @test Y ≈ X_ X = copy(X_) - t = fit(ZScoreTransform, X, dims=1) + t = fit(ZScoreTransform, X, dims = 1) Y = transform(t, X) - @test Y ≈ (X .- mean(X, dims=1)) ./ std(X, dims=1) + @test Y ≈ (X .- mean(X, dims = 1)) ./ std(X, dims = 1) @test transform(t, X) ≈ Y @test reconstruct(t, Y) ≈ X - @test Y ≈ standardize(ZScoreTransform, X, dims=1) + @test Y ≈ standardize(ZScoreTransform, X, dims = 1) @test transform!(t, X) === X @test isequal(X, Y) @test reconstruct!(t, Y) === Y @test Y ≈ X_ X = copy(X_) - t = fit(UnitRangeTransform, X, dims=1) + t = fit(UnitRangeTransform, X, dims = 1) Y = transform(t, X) - @test Y ≈ (X .- minimum(X, dims=1)) ./ (maximum(X, dims=1) .- minimum(X, dims=1)) + @test Y ≈ (X .- minimum(X, dims = 1)) ./ (maximum(X, dims = 1) .- minimum(X, dims = 1)) @test transform(t, X) ≈ Y @test reconstruct(t, Y) ≈ X @test transform!(t, X) === X @@ -168,12 +168,12 @@ using Test @test Y ≈ X_ X = copy(X_) - t = fit(UnitRangeTransform, X, dims=1, unit=false) + t = fit(UnitRangeTransform, X, dims = 1, unit = false) Y = transform(t, X) - @test Y ≈ X ./ (maximum(X, dims=1) .- minimum(X, dims=1)) + @test Y ≈ X ./ (maximum(X, dims = 1) .- minimum(X, dims = 1)) @test transform(t, X) ≈ Y @test reconstruct(t, Y) ≈ X - @test Y ≈ standardize(UnitRangeTransform, X, dims=1, unit=false) + @test Y ≈ standardize(UnitRangeTransform, X, dims = 1, unit = false) @test transform!(t, X) === X @test isequal(X, Y) @test reconstruct!(t, Y) === Y diff --git a/test/weights.jl b/test/weights.jl index c99f31c92..742bc4a06 100644 --- a/test/weights.jl +++ b/test/weights.jl @@ -11,640 +11,640 @@ MyWeights(values) = MyWeights(values, sum(values)) @testset "StatsBase.Weights" begin -weight_funcs = (weights, aweights, fweights, pweights) - -## Construction - -@testset "$f" for f in weight_funcs - @test isa(f([1, 2, 3]), AbstractWeights{Int}) - @test isa(f([1., 2., 3.]), AbstractWeights{Float64}) - @test isa(f([1 2 3; 4 5 6]), AbstractWeights{Int}) - - @test isempty(f(Float64[])) - @test size(f([1, 2, 3])) == (3,) - @test axes(f([1, 2, 3])) == (Base.OneTo(3),) - @test IndexStyle(f([1, 2, 3])) == IndexLinear() - - w = [1., 2., 3.] - wv = f(w) - @test eltype(wv) === Float64 - @test length(wv) === 3 - @test wv == w - @test sum(wv) === 6.0 - @test !isempty(wv) - @test Base.mightalias(w, wv) - @test !Base.mightalias([1], wv) - - b = trues(3) - bv = f(b) - @test eltype(bv) === Bool - @test length(bv) === 3 - @test convert(Vector, bv) == b - @test sum(bv) === 3 - @test !isempty(bv) - - ba = BitArray([true, false, true]) - sa = sparsevec([1., 0., 2.]) - - @test sum(ba, wv) === 4.0 - @test sum(sa, wv) === 7.0 - - @test_throws ArgumentError f([0.1, Inf]) - @test_throws ArgumentError f([0.1, NaN]) + weight_funcs = (weights, aweights, fweights, pweights) + + ## Construction + + @testset "$f" for f in weight_funcs + @test isa(f([1, 2, 3]), AbstractWeights{Int}) + @test isa(f([1.0, 2.0, 3.0]), AbstractWeights{Float64}) + @test isa(f([1 2 3; 4 5 6]), AbstractWeights{Int}) + + @test isempty(f(Float64[])) + @test size(f([1, 2, 3])) == (3,) + @test axes(f([1, 2, 3])) == (Base.OneTo(3),) + @test IndexStyle(f([1, 2, 3])) == IndexLinear() + + w = [1.0, 2.0, 3.0] + wv = f(w) + @test eltype(wv) === Float64 + @test length(wv) === 3 + @test wv == w + @test sum(wv) === 6.0 + @test !isempty(wv) + @test Base.mightalias(w, wv) + @test !Base.mightalias([1], wv) + + b = trues(3) + bv = f(b) + @test eltype(bv) === Bool + @test length(bv) === 3 + @test convert(Vector, bv) == b + @test sum(bv) === 3 + @test !isempty(bv) + + ba = BitArray([true, false, true]) + sa = sparsevec([1.0, 0.0, 2.0]) + + @test sum(ba, wv) === 4.0 + @test sum(sa, wv) === 7.0 + + @test_throws ArgumentError f([0.1, Inf]) + @test_throws ArgumentError f([0.1, NaN]) -end + end -@testset "$f, setindex!" for f in weight_funcs - w = [1., 2., 3.] - wv = f(w) - - # Check getindex & sum - @test wv[1] === 1. - @test sum(wv) === 6. - @test wv == w - - # Test setindex! success - @test (wv[1] = 4) === 4 # setindex! returns original val - @test wv[1] === 4. # value correctly converted and set - @test sum(wv) === 9. # sum updated - @test wv == [4., 2., 3.] # Test state of all values - - # Test mulivalue setindex! - wv[1:2] = [3., 5.] - @test wv[1] === 3. - @test wv[2] === 5. - @test sum(wv) === 11. - @test wv == [3., 5., 3.] # Test state of all values - - @test_throws ArgumentError wv[1] = Inf - @test_throws ArgumentError wv[1] = NaN - - # Test failed setindex! due to conversion error - w = [1, 2, 3] - wv = f(w) - - @test_throws InexactError wv[1] = 1.5 # Returns original value - @test wv[1] === 1 # value not updated - @test sum(wv) === 6 # sum not corrupted - @test wv == [1, 2, 3] # Test state of all values -end + @testset "$f, setindex!" for f in weight_funcs + w = [1.0, 2.0, 3.0] + wv = f(w) + + # Check getindex & sum + @test wv[1] === 1.0 + @test sum(wv) === 6.0 + @test wv == w + + # Test setindex! success + @test (wv[1] = 4) === 4 # setindex! returns original val + @test wv[1] === 4.0 # value correctly converted and set + @test sum(wv) === 9.0 # sum updated + @test wv == [4.0, 2.0, 3.0] # Test state of all values + + # Test mulivalue setindex! + wv[1:2] = [3.0, 5.0] + @test wv[1] === 3.0 + @test wv[2] === 5.0 + @test sum(wv) === 11.0 + @test wv == [3.0, 5.0, 3.0] # Test state of all values + + @test_throws ArgumentError wv[1] = Inf + @test_throws ArgumentError wv[1] = NaN + + # Test failed setindex! due to conversion error + w = [1, 2, 3] + wv = f(w) + + @test_throws InexactError wv[1] = 1.5 # Returns original value + @test wv[1] === 1 # value not updated + @test sum(wv) === 6 # sum not corrupted + @test wv == [1, 2, 3] # Test state of all values + end -@testset "$f, isequal and ==" for f in weight_funcs - x = f([1, 2, 3]) + @testset "$f, isequal and ==" for f in weight_funcs + x = f([1, 2, 3]) - y = f([1, 2, 3]) # same values, type and parameters - @test isequal(x, y) - @test x == y + y = f([1, 2, 3]) # same values, type and parameters + @test isequal(x, y) + @test x == y - y = f([1.0, 2.0, 3.0]) # same values and type, different parameters - @test isequal(x, y) - @test x == y + y = f([1.0, 2.0, 3.0]) # same values and type, different parameters + @test isequal(x, y) + @test x == y - if f != fweights # same values and parameters, different types - y = fweights([1, 2, 3]) + if f != fweights # same values and parameters, different types + y = fweights([1, 2, 3]) + @test !isequal(x, y) + @test x != y + end + + x = f([1.0, 2.0, 0.0]) # isequal and == treat ±0.0 differently + y = f([1.0, 2.0, -0.0]) @test !isequal(x, y) - @test x != y + @test x == y end - x = f([1.0, 2.0, 0.0]) # isequal and == treat ±0.0 differently - y = f([1.0, 2.0, -0.0]) - @test !isequal(x, y) - @test x == y -end - -@testset "Unit weights" begin - wv = uweights(Float64, 3) - @test wv[1] === 1. - @test wv[1:3] == fill(1.0, 3) - @test wv[:] == fill(1.0, 3) - @test !isempty(wv) - @test length(wv) === 3 - @test size(wv) === (3,) - @test axes(wv) === (Base.OneTo(3),) - @test sum(wv) === 3. - @test wv == fill(1.0, 3) - @test StatsBase.varcorrection(wv) == 1/3 - @test !isequal(wv, fweights(fill(1.0, 3))) - @test isequal(wv, uweights(3)) - @test wv != fweights(fill(1.0, 3)) - @test wv == uweights(3) - @test wv[[true, false, false]] == uweights(Float64, 1) - @test convert(Vector, wv) == ones(3) - @test !Base.mightalias(wv, uweights(Float64, 3)) - @test Base.dataids(wv) == () -end + @testset "Unit weights" begin + wv = uweights(Float64, 3) + @test wv[1] === 1.0 + @test wv[1:3] == fill(1.0, 3) + @test wv[:] == fill(1.0, 3) + @test !isempty(wv) + @test length(wv) === 3 + @test size(wv) === (3,) + @test axes(wv) === (Base.OneTo(3),) + @test sum(wv) === 3.0 + @test wv == fill(1.0, 3) + @test StatsBase.varcorrection(wv) == 1 / 3 + @test !isequal(wv, fweights(fill(1.0, 3))) + @test isequal(wv, uweights(3)) + @test wv != fweights(fill(1.0, 3)) + @test wv == uweights(3) + @test wv[[true, false, false]] == uweights(Float64, 1) + @test convert(Vector, wv) == ones(3) + @test !Base.mightalias(wv, uweights(Float64, 3)) + @test Base.dataids(wv) == () + end -## wsum + ## wsum -@testset "wsum" begin - x = [6., 8., 9.] - w = [2., 3., 4.] - p = [1. 2. ; 3. 4.] - q = [1., 2., 3., 4.] + @testset "wsum" begin + x = [6.0, 8.0, 9.0] + w = [2.0, 3.0, 4.0] + p = [1.0 2.0 ; 3.0 4.0] + q = [1.0, 2.0, 3.0, 4.0] - @test wsum(Float64[], Float64[]) === 0.0 - @test wsum(x, w) === 72.0 - @test wsum(p, q) === 29.0 + @test wsum(Float64[], Float64[]) === 0.0 + @test wsum(x, w) === 72.0 + @test wsum(p, q) === 29.0 - ## wsum along dimension + ## wsum along dimension - @test wsum(x, w, 1) == [72.0] + @test wsum(x, w, 1) == [72.0] - x = rand(6, 8) - w1 = rand(6) - w2 = rand(8) + x = rand(6, 8) + w1 = rand(6) + w2 = rand(8) - @test size(wsum(x, w1, 1)) == (1, 8) - @test size(wsum(x, w2, 2)) == (6, 1) + @test size(wsum(x, w1, 1)) == (1, 8) + @test size(wsum(x, w2, 2)) == (6, 1) - @test wsum(x, w1, 1) ≈ sum(x .* w1, dims=1) - @test wsum(x, w2, 2) ≈ sum(x .* w2', dims=2) + @test wsum(x, w1, 1) ≈ sum(x .* w1, dims = 1) + @test wsum(x, w2, 2) ≈ sum(x .* w2', dims = 2) - x = rand(6, 5, 4) - w1 = rand(6) - w2 = rand(5) - w3 = rand(4) + x = rand(6, 5, 4) + w1 = rand(6) + w2 = rand(5) + w3 = rand(4) - @test size(wsum(x, w1, 1)) == (1, 5, 4) - @test size(wsum(x, w2, 2)) == (6, 1, 4) - @test size(wsum(x, w3, 3)) == (6, 5, 1) + @test size(wsum(x, w1, 1)) == (1, 5, 4) + @test size(wsum(x, w2, 2)) == (6, 1, 4) + @test size(wsum(x, w3, 3)) == (6, 5, 1) - @test wsum(x, w1, 1) ≈ sum(x .* w1, dims=1) - @test wsum(x, w2, 2) ≈ sum(x .* w2', dims=2) - @test wsum(x, w3, 3) ≈ sum(x .* reshape(w3, 1, 1, 4), dims=3) + @test wsum(x, w1, 1) ≈ sum(x .* w1, dims = 1) + @test wsum(x, w2, 2) ≈ sum(x .* w2', dims = 2) + @test wsum(x, w3, 3) ≈ sum(x .* reshape(w3, 1, 1, 4), dims = 3) - v = view(x, 2:4, :, :) + v = view(x, 2:4, :, :) - @test wsum(v, w1[1:3], 1) ≈ sum(v .* w1[1:3], dims=1) - @test wsum(v, w2, 2) ≈ sum(v .* w2', dims=2) - @test wsum(v, w3, 3) ≈ sum(v .* reshape(w3, 1, 1, 4), dims=3) + @test wsum(v, w1[1:3], 1) ≈ sum(v .* w1[1:3], dims = 1) + @test wsum(v, w2, 2) ≈ sum(v .* w2', dims = 2) + @test wsum(v, w3, 3) ≈ sum(v .* reshape(w3, 1, 1, 4), dims = 3) - ## wsum for Arrays with non-BlasReal elements + ## wsum for Arrays with non-BlasReal elements - x = rand(1:100, 6, 8) - w1 = rand(6) - w2 = rand(8) + x = rand(1:100, 6, 8) + w1 = rand(6) + w2 = rand(8) - @test wsum(x, w1, 1) ≈ sum(x .* w1, dims=1) - @test wsum(x, w2, 2) ≈ sum(x .* w2', dims=2) + @test wsum(x, w1, 1) ≈ sum(x .* w1, dims = 1) + @test wsum(x, w2, 2) ≈ sum(x .* w2', dims = 2) - ## wsum! + ## wsum! - x = rand(6) - w = rand(6) + x = rand(6) + w = rand(6) - r = ones(1) - @test wsum!(r, x, w, 1; init=true) === r - @test r ≈ [dot(x, w)] + r = ones(1) + @test wsum!(r, x, w, 1; init = true) === r + @test r ≈ [dot(x, w)] - r = ones(1) - @test wsum!(r, x, w, 1; init=false) === r - @test r ≈ [dot(x, w) + 1.0] + r = ones(1) + @test wsum!(r, x, w, 1; init = false) === r + @test r ≈ [dot(x, w) + 1.0] - x = rand(6, 8) - w1 = rand(6) - w2 = rand(8) + x = rand(6, 8) + w1 = rand(6) + w2 = rand(8) - r = ones(1, 8) - @test wsum!(r, x, w1, 1; init=true) === r - @test r ≈ sum(x .* w1, dims=1) + r = ones(1, 8) + @test wsum!(r, x, w1, 1; init = true) === r + @test r ≈ sum(x .* w1, dims = 1) - r = ones(1, 8) - @test wsum!(r, x, w1, 1; init=false) === r - @test r ≈ sum(x .* w1, dims=1) .+ 1.0 + r = ones(1, 8) + @test wsum!(r, x, w1, 1; init = false) === r + @test r ≈ sum(x .* w1, dims = 1) .+ 1.0 - r = ones(6) - @test wsum!(r, x, w2, 2; init=true) === r - @test r ≈ sum(x .* w2', dims=2) + r = ones(6) + @test wsum!(r, x, w2, 2; init = true) === r + @test r ≈ sum(x .* w2', dims = 2) - r = ones(6) - @test wsum!(r, x, w2, 2; init=false) === r - @test r ≈ sum(x .* w2', dims=2) .+ 1.0 + r = ones(6) + @test wsum!(r, x, w2, 2; init = false) === r + @test r ≈ sum(x .* w2', dims = 2) .+ 1.0 - x = rand(8, 6, 5) - w1 = rand(8) - w2 = rand(6) - w3 = rand(5) + x = rand(8, 6, 5) + w1 = rand(8) + w2 = rand(6) + w3 = rand(5) - r = ones(1, 6, 5) - @test wsum!(r, x, w1, 1; init=true) === r - @test r ≈ sum(x .* w1, dims=1) + r = ones(1, 6, 5) + @test wsum!(r, x, w1, 1; init = true) === r + @test r ≈ sum(x .* w1, dims = 1) - r = ones(1, 6, 5) - @test wsum!(r, x, w1, 1; init=false) === r - @test r ≈ sum(x .* w1, dims=1) .+ 1.0 + r = ones(1, 6, 5) + @test wsum!(r, x, w1, 1; init = false) === r + @test r ≈ sum(x .* w1, dims = 1) .+ 1.0 - r = ones(8, 1, 5) - @test wsum!(r, x, w2, 2; init=true) === r - @test r ≈ sum(x .* w2', dims=2) + r = ones(8, 1, 5) + @test wsum!(r, x, w2, 2; init = true) === r + @test r ≈ sum(x .* w2', dims = 2) - r = ones(8, 1, 5) - @test wsum!(r, x, w2, 2; init=false) === r - @test r ≈ sum(x .* w2', dims=2) .+ 1.0 + r = ones(8, 1, 5) + @test wsum!(r, x, w2, 2; init = false) === r + @test r ≈ sum(x .* w2', dims = 2) .+ 1.0 - r = ones(8, 6) - @test wsum!(r, x, w3, 3; init=true) === r - @test r ≈ sum(x .* reshape(w3, (1, 1, 5)), dims=3) + r = ones(8, 6) + @test wsum!(r, x, w3, 3; init = true) === r + @test r ≈ sum(x .* reshape(w3, (1, 1, 5)), dims = 3) - r = ones(8, 6) - @test wsum!(r, x, w3, 3; init=false) === r - @test r ≈ sum(x .* reshape(w3, (1, 1, 5)), dims=3) .+ 1.0 -end + r = ones(8, 6) + @test wsum!(r, x, w3, 3; init = false) === r + @test r ≈ sum(x .* reshape(w3, (1, 1, 5)), dims = 3) .+ 1.0 + end -## sum, mean and quantile + ## sum, mean and quantile -a = reshape(1.0:27.0, 3, 3, 3) + a = reshape(1.0:27.0, 3, 3, 3) -@testset "Sum $f" for f in weight_funcs - @test sum([1.0, 2.0, 3.0], f([1.0, 0.5, 0.5])) ≈ 3.5 - @test sum(1:3, f([1.0, 1.0, 0.5])) ≈ 4.5 - @test sum([1 + 2im, 2 + 3im], f([1.0, 0.5])) ≈ 2 + 3.5im - @test sum([[1, 2], [3, 4]], f([2, 3])) == [11, 16] + @testset "Sum $f" for f in weight_funcs + @test sum([1.0, 2.0, 3.0], f([1.0, 0.5, 0.5])) ≈ 3.5 + @test sum(1:3, f([1.0, 1.0, 0.5])) ≈ 4.5 + @test sum([1 + 2im, 2 + 3im], f([1.0, 0.5])) ≈ 2 + 3.5im + @test sum([[1, 2], [3, 4]], f([2, 3])) == [11, 16] - for wt in ([1.0, 1.0, 1.0], [1.0, 0.2, 0.0], [0.2, 0.0, 1.0]) - @test sum(a, f(wt), dims=1) ≈ sum(a.*reshape(wt, length(wt), 1, 1), dims=1) - @test sum(a, f(wt), dims=2) ≈ sum(a.*reshape(wt, 1, length(wt), 1), dims=2) - @test sum(a, f(wt), dims=3) ≈ sum(a.*reshape(wt, 1, 1, length(wt)), dims=3) + for wt in ([1.0, 1.0, 1.0], [1.0, 0.2, 0.0], [0.2, 0.0, 1.0]) + @test sum(a, f(wt), dims = 1) ≈ sum(a .* reshape(wt, length(wt), 1, 1), dims = 1) + @test sum(a, f(wt), dims = 2) ≈ sum(a .* reshape(wt, 1, length(wt), 1), dims = 2) + @test sum(a, f(wt), dims = 3) ≈ sum(a .* reshape(wt, 1, 1, length(wt)), dims = 3) + end end -end -@testset "Mean $f" for f in weight_funcs - @test mean([1:3;], f([1.0, 1.0, 0.5])) ≈ 1.8 - @test mean(1:3, f([1.0, 1.0, 0.5])) ≈ 1.8 - @test mean([1 + 2im, 4 + 5im], f([1.0, 0.5])) ≈ 2 + 3im + @testset "Mean $f" for f in weight_funcs + @test mean([1:3;], f([1.0, 1.0, 0.5])) ≈ 1.8 + @test mean(1:3, f([1.0, 1.0, 0.5])) ≈ 1.8 + @test mean([1 + 2im, 4 + 5im], f([1.0, 0.5])) ≈ 2 + 3im - for wt in ([1.0, 1.0, 1.0], [1.0, 0.2, 0.0], [0.2, 0.0, 1.0]) - @test mean(a, f(wt), dims=1) ≈ sum(a.*reshape(wt, length(wt), 1, 1), dims=1)/sum(wt) - @test mean(a, f(wt), dims=2) ≈ sum(a.*reshape(wt, 1, length(wt), 1), dims=2)/sum(wt) - @test mean(a, f(wt), dims=3) ≈ sum(a.*reshape(wt, 1, 1, length(wt)), dims=3)/sum(wt) - @test_throws ErrorException mean(a, f(wt), dims=4) + for wt in ([1.0, 1.0, 1.0], [1.0, 0.2, 0.0], [0.2, 0.0, 1.0]) + @test mean(a, f(wt), dims = 1) ≈ sum(a .* reshape(wt, length(wt), 1, 1), dims = 1) / sum(wt) + @test mean(a, f(wt), dims = 2) ≈ sum(a .* reshape(wt, 1, length(wt), 1), dims = 2) / sum(wt) + @test mean(a, f(wt), dims = 3) ≈ sum(a .* reshape(wt, 1, 1, length(wt)), dims = 3) / sum(wt) + @test_throws ErrorException mean(a, f(wt), dims = 4) + end end -end -@testset "Quantile fweights" begin - data = ( - [7, 1, 2, 4, 10], - [7, 1, 2, 4, 10], - [7, 1, 2, 4, 10, 15], - [1, 2, 4, 7, 10, 15], - [0, 10, 20, 30], - [1, 2, 3, 4, 5], - [1, 2, 3, 4, 5], - [30, 40, 50, 60, 35], - [2, 0.6, 1.3, 0.3, 0.3, 1.7, 0.7, 1.7], - [1, 2, 2], - [3.7, 3.3, 3.5, 2.8], - [100, 125, 123, 60, 45, 56, 66], - [2, 2, 2, 2, 2, 2], - [2.3], - [-2, -3, 1, 2, -10], - [1, 2, 3, 4, 5], - [5, 4, 3, 2, 1], - [-2, 2, -1, 3, 6], - [-10, 1, 1, -10, -10], - ) - wt = ( - [3, 1, 1, 1, 3], - [1, 1, 1, 1, 1], - [3, 1, 1, 1, 3, 3], - [1, 1, 1, 3, 3, 3], - [30, 191, 9, 0], - [10, 1, 1, 1, 9], - [10, 1, 1, 1, 900], - [1, 3, 5, 4, 2], - [2, 2, 5, 0, 2, 2, 1, 6], - [1, 1, 8], - [5, 5, 4, 1], - [30, 56, 144, 24, 55, 43, 67], - [1, 2, 3, 4, 5, 6], - [12], - [7, 1, 1, 1, 6], - [1, 0, 0, 0, 2], - [1, 2, 3, 4, 5], - [1, 2, 3, 2, 1], - [0, 1, 1, 1, 1], - ) - p = [0.0, 0.25, 0.5, 0.75, 1.0] - function _rep(x::AbstractVector, lengths::AbstractVector{Int}) - res = similar(x, sum(lengths)) - i = 1 - for idx in 1:length(x) - tmp = x[idx] - for kdx in 1:lengths[idx] - res[i] = tmp - i += 1 + @testset "Quantile fweights" begin + data = ( + [7, 1, 2, 4, 10], + [7, 1, 2, 4, 10], + [7, 1, 2, 4, 10, 15], + [1, 2, 4, 7, 10, 15], + [0, 10, 20, 30], + [1, 2, 3, 4, 5], + [1, 2, 3, 4, 5], + [30, 40, 50, 60, 35], + [2, 0.6, 1.3, 0.3, 0.3, 1.7, 0.7, 1.7], + [1, 2, 2], + [3.7, 3.3, 3.5, 2.8], + [100, 125, 123, 60, 45, 56, 66], + [2, 2, 2, 2, 2, 2], + [2.3], + [-2, -3, 1, 2, -10], + [1, 2, 3, 4, 5], + [5, 4, 3, 2, 1], + [-2, 2, -1, 3, 6], + [-10, 1, 1, -10, -10], + ) + wt = ( + [3, 1, 1, 1, 3], + [1, 1, 1, 1, 1], + [3, 1, 1, 1, 3, 3], + [1, 1, 1, 3, 3, 3], + [30, 191, 9, 0], + [10, 1, 1, 1, 9], + [10, 1, 1, 1, 900], + [1, 3, 5, 4, 2], + [2, 2, 5, 0, 2, 2, 1, 6], + [1, 1, 8], + [5, 5, 4, 1], + [30, 56, 144, 24, 55, 43, 67], + [1, 2, 3, 4, 5, 6], + [12], + [7, 1, 1, 1, 6], + [1, 0, 0, 0, 2], + [1, 2, 3, 4, 5], + [1, 2, 3, 2, 1], + [0, 1, 1, 1, 1], + ) + p = [0.0, 0.25, 0.5, 0.75, 1.0] + function _rep(x::AbstractVector, lengths::AbstractVector{Int}) + res = similar(x, sum(lengths)) + i = 1 + for idx in 1:length(x) + tmp = x[idx] + for kdx in 1:lengths[idx] + res[i] = tmp + i += 1 + end end + return res + end + # quantile with fweights is the same as repeated vectors + for i in 1:length(data) + @test quantile(data[i], fweights(wt[i]), p) ≈ quantile(_rep(data[i], wt[i]), p) + end + # quantile with fweights = 1 is the same as quantile + for i in 1:length(data) + @test quantile(data[i], fweights(fill!(similar(wt[i]), 1)), p) ≈ quantile(data[i], p) end - return res - end - # quantile with fweights is the same as repeated vectors - for i = 1:length(data) - @test quantile(data[i], fweights(wt[i]), p) ≈ quantile(_rep(data[i], wt[i]), p) - end - # quantile with fweights = 1 is the same as quantile - for i = 1:length(data) - @test quantile(data[i], fweights(fill!(similar(wt[i]), 1)), p) ≈ quantile(data[i], p) - end - # Issue #313 - @test quantile([1, 2, 3, 4, 5], fweights([0,1,2,1,0]), p) ≈ quantile([2, 3, 3, 4], p) - @test quantile([1, 2], fweights([1, 1]), 0.25) ≈ 1.25 - @test quantile([1, 2], fweights([2, 2]), 0.25) ≈ 1.0 + # Issue #313 + @test quantile([1, 2, 3, 4, 5], fweights([0, 1, 2, 1, 0]), p) ≈ quantile([2, 3, 3, 4], p) + @test quantile([1, 2], fweights([1, 1]), 0.25) ≈ 1.25 + @test quantile([1, 2], fweights([2, 2]), 0.25) ≈ 1.0 - # test non integer frequency weights - quantile([1, 2], fweights([1.0, 2.0]), 0.25) == quantile([1, 2], fweights([1, 2]), 0.25) - @test_throws ArgumentError quantile([1, 2], fweights([1.5, 2.0]), 0.25) + # test non integer frequency weights + quantile([1, 2], fweights([1.0, 2.0]), 0.25) == quantile([1, 2], fweights([1, 2]), 0.25) + @test_throws ArgumentError quantile([1, 2], fweights([1.5, 2.0]), 0.25) - @test_throws ArgumentError quantile([1, 2], fweights([1, 2]), nextfloat(1.0)) - @test_throws ArgumentError quantile([1, 2], fweights([1, 2]), prevfloat(0.0)) -end + @test_throws ArgumentError quantile([1, 2], fweights([1, 2]), nextfloat(1.0)) + @test_throws ArgumentError quantile([1, 2], fweights([1, 2]), prevfloat(0.0)) + end -@testset "Quantile aweights, pweights and weights" for f in (aweights, pweights, weights) - data = ( - [7, 1, 2, 4, 10], - [7, 1, 2, 4, 10], - [7, 1, 2, 4, 10, 15], - [1, 2, 4, 7, 10, 15], - [0, 10, 20, 30], - [1, 2, 3, 4, 5], - [1, 2, 3, 4, 5], - [30, 40, 50, 60, 35], - [2, 0.6, 1.3, 0.3, 0.3, 1.7, 0.7, 1.7], - [1, 2, 2], - [3.7, 3.3, 3.5, 2.8], - [100, 125, 123, 60, 45, 56, 66], - [2, 2, 2, 2, 2, 2], - [2.3], - [-2, -3, 1, 2, -10], - [1, 2, 3, 4, 5], - [5, 4, 3, 2, 1], - [-2, 2, -1, 3, 6], - [-10, 1, 1, -10, -10], - ) - wt = ( - [1, 1/3, 1/3, 1/3, 1], - [1, 1, 1, 1, 1], - [1, 1/3, 1/3, 1/3, 1, 1], - [1/3, 1/3, 1/3, 1, 1, 1], - [30, 191, 9, 0], - [10, 1, 1, 1, 9], - [10, 1, 1, 1, 900], - [1, 3, 5, 4, 2], - [2, 2, 5, 1, 2, 2, 1, 6], - [0.1, 0.1, 0.8], - [5, 5, 4, 1], - [30, 56, 144, 24, 55, 43, 67], - [0.1, 0.2, 0.3, 0.4, 0.5, 0.6], - [12], - [7, 1, 1, 1, 6], - [1, 0, 0, 0, 2], - [1, 2, 3, 4, 5], - [0.1, 0.2, 0.3, 0.2, 0.1], - [1, 1, 1, 1, 1], - ) - quantile_answers = ( - [1.0, 4.0, 6.0, 8.0, 10.0], - [1.0, 2.0, 4.0, 7.0, 10.0], - [1.0, 4.75, 7.5, 10.4166667, 15.0], - [1.0, 4.75, 7.5, 10.4166667, 15.0], - [0.0, 2.6178010, 5.2356021, 7.8534031, 20.0], - [1.0, 4.0, 4.3333333, 4.6666667, 5.0], - [1.0, 4.2475, 4.4983333, 4.7491667, 5.0], - [30.0, 37.5, 44.0, 51.25, 60.0], - [0.3, 0.7, 1.3, 1.7, 2.0], - [1.0, 2.0, 2.0, 2.0, 2.0], - [2.8, 3.15, 3.4, 3.56, 3.7], - [45.0, 62.149253, 102.875, 117.4097222, 125.0], - [2.0, 2.0, 2.0, 2.0, 2.0], - [2.3, 2.3, 2.3, 2.3, 2.3], - [-10.0, -2.7857143, -2.4285714, -2.0714286, 2.0], - [1.0, 2.0, 3.0, 4.0, 5.0], - [1.0, 1.625, 2.3333333, 3.25, 5.0], - [-2.0, -1.3333333, 0.5, 2.5, 6.0], - [-10.0, -10.0, -10.0, 1.0, 1.0] - ) - p = [0.0, 0.25, 0.5, 0.75, 1.0] - - Random.seed!(10) - for i = 1:length(data) - @test quantile(data[i], f(wt[i]), p) ≈ quantile_answers[i] atol = 1e-5 - for j = 1:10 - # order of p does not matter - reorder = sortperm(rand(length(p))) - @test quantile(data[i], f(wt[i]), p[reorder]) ≈ quantile_answers[i][reorder] atol = 1e-5 - end - for j = 1:10 - # order of w does not matter - reorder = sortperm(rand(length(data[i]))) - @test quantile(data[i][reorder], f(wt[i][reorder]), p) ≈ quantile_answers[i] atol = 1e-5 + @testset "Quantile aweights, pweights and weights" for f in (aweights, pweights, weights) + data = ( + [7, 1, 2, 4, 10], + [7, 1, 2, 4, 10], + [7, 1, 2, 4, 10, 15], + [1, 2, 4, 7, 10, 15], + [0, 10, 20, 30], + [1, 2, 3, 4, 5], + [1, 2, 3, 4, 5], + [30, 40, 50, 60, 35], + [2, 0.6, 1.3, 0.3, 0.3, 1.7, 0.7, 1.7], + [1, 2, 2], + [3.7, 3.3, 3.5, 2.8], + [100, 125, 123, 60, 45, 56, 66], + [2, 2, 2, 2, 2, 2], + [2.3], + [-2, -3, 1, 2, -10], + [1, 2, 3, 4, 5], + [5, 4, 3, 2, 1], + [-2, 2, -1, 3, 6], + [-10, 1, 1, -10, -10], + ) + wt = ( + [1, 1 / 3, 1 / 3, 1 / 3, 1], + [1, 1, 1, 1, 1], + [1, 1 / 3, 1 / 3, 1 / 3, 1, 1], + [1 / 3, 1 / 3, 1 / 3, 1, 1, 1], + [30, 191, 9, 0], + [10, 1, 1, 1, 9], + [10, 1, 1, 1, 900], + [1, 3, 5, 4, 2], + [2, 2, 5, 1, 2, 2, 1, 6], + [0.1, 0.1, 0.8], + [5, 5, 4, 1], + [30, 56, 144, 24, 55, 43, 67], + [0.1, 0.2, 0.3, 0.4, 0.5, 0.6], + [12], + [7, 1, 1, 1, 6], + [1, 0, 0, 0, 2], + [1, 2, 3, 4, 5], + [0.1, 0.2, 0.3, 0.2, 0.1], + [1, 1, 1, 1, 1], + ) + quantile_answers = ( + [1.0, 4.0, 6.0, 8.0, 10.0], + [1.0, 2.0, 4.0, 7.0, 10.0], + [1.0, 4.75, 7.5, 10.4166667, 15.0], + [1.0, 4.75, 7.5, 10.4166667, 15.0], + [0.0, 2.617801, 5.2356021, 7.8534031, 20.0], + [1.0, 4.0, 4.3333333, 4.6666667, 5.0], + [1.0, 4.2475, 4.4983333, 4.7491667, 5.0], + [30.0, 37.5, 44.0, 51.25, 60.0], + [0.3, 0.7, 1.3, 1.7, 2.0], + [1.0, 2.0, 2.0, 2.0, 2.0], + [2.8, 3.15, 3.4, 3.56, 3.7], + [45.0, 62.149253, 102.875, 117.4097222, 125.0], + [2.0, 2.0, 2.0, 2.0, 2.0], + [2.3, 2.3, 2.3, 2.3, 2.3], + [-10.0, -2.7857143, -2.4285714, -2.0714286, 2.0], + [1.0, 2.0, 3.0, 4.0, 5.0], + [1.0, 1.625, 2.3333333, 3.25, 5.0], + [-2.0, -1.3333333, 0.5, 2.5, 6.0], + [-10.0, -10.0, -10.0, 1.0, 1.0], + ) + p = [0.0, 0.25, 0.5, 0.75, 1.0] + + Random.seed!(10) + for i in 1:length(data) + @test quantile(data[i], f(wt[i]), p) ≈ quantile_answers[i] atol = 1.0e-5 + for j in 1:10 + # order of p does not matter + reorder = sortperm(rand(length(p))) + @test quantile(data[i], f(wt[i]), p[reorder]) ≈ quantile_answers[i][reorder] atol = 1.0e-5 + end + for j in 1:10 + # order of w does not matter + reorder = sortperm(rand(length(data[i]))) + @test quantile(data[i][reorder], f(wt[i][reorder]), p) ≈ quantile_answers[i] atol = 1.0e-5 + end end - end - # All equal weights corresponds to base quantile - for v in (1, 2, 345) - for i = 1:length(data) - w = f(fill(v, length(data[i]))) - @test quantile(data[i], w, p) ≈ quantile(data[i], p) atol = 1e-5 - for j = 1:10 - prandom = rand(4) - @test quantile(data[i], w, prandom) ≈ quantile(data[i], prandom) atol = 1e-5 + # All equal weights corresponds to base quantile + for v in (1, 2, 345) + for i in 1:length(data) + w = f(fill(v, length(data[i]))) + @test quantile(data[i], w, p) ≈ quantile(data[i], p) atol = 1.0e-5 + for j in 1:10 + prandom = rand(4) + @test quantile(data[i], w, prandom) ≈ quantile(data[i], prandom) atol = 1.0e-5 + end end end + # test zeros are removed + for i in 1:length(data) + @test quantile(vcat(1.0, data[i]), f(vcat(0.0, wt[i])), p) ≈ quantile_answers[i] atol = 1.0e-5 + end + # Syntax + v = [7, 1, 2, 4, 10] + w = [1, 1 / 3, 1 / 3, 1 / 3, 1] + answer = 6.0 + @test quantile(data[1], f(w), 0.5) ≈ answer atol = 1.0e-5 + + # Test non-Real eltype + @test_throws ArgumentError quantile([missing, 1], f([1, 2]), 0.5) + @test quantile(Union{Float64, Missing}[1, 2, 3, 4], f([1, 2, 2, 1]), 0.5) == + quantile(Any[1, 2, 3, 4], f([1, 2, 2, 1]), 0.5) == + quantile([1, 2, 3, 4], f([1, 2, 2, 1]), 0.5) + @test quantile([Date(2005, 01, 01), Date(2005, 01, 01)], f([1, 1]), 0.5) == + Date(2005, 01, 01) + + @test_throws ArgumentError quantile([missing, 1], f([1, 2]), [0.5, 0.75]) + @test quantile(Union{Float64, Missing}[1, 2, 3, 4], f([1, 2, 2, 1]), [0.5, 0.75]) == + quantile(Any[1, 2, 3, 4], f([1, 2, 2, 1]), [0.5, 0.75]) == + quantile([1, 2, 3, 4], f([1, 2, 2, 1]), [0.5, 0.75]) + @test quantile(fill(Date(2005, 01, 01), 3), f([1, 1, 1]), [0.5, 0.75]) == + fill(Date(2005, 01, 01), 2) end - # test zeros are removed - for i = 1:length(data) - @test quantile(vcat(1.0, data[i]), f(vcat(0.0, wt[i])), p) ≈ quantile_answers[i] atol = 1e-5 - end - # Syntax - v = [7, 1, 2, 4, 10] - w = [1, 1/3, 1/3, 1/3, 1] - answer = 6.0 - @test quantile(data[1], f(w), 0.5) ≈ answer atol = 1e-5 - - # Test non-Real eltype - @test_throws ArgumentError quantile([missing, 1], f([1, 2]), 0.5) - @test quantile(Union{Float64, Missing}[1, 2, 3, 4], f([1, 2, 2, 1]), 0.5) == - quantile(Any[1, 2, 3, 4], f([1, 2, 2, 1]), 0.5) == - quantile([1, 2, 3, 4], f([1, 2, 2, 1]), 0.5) - @test quantile([Date(2005, 01, 01), Date(2005, 01, 01)], f([1, 1]), 0.5) == - Date(2005, 01, 01) - - @test_throws ArgumentError quantile([missing, 1], f([1, 2]), [0.5, 0.75]) - @test quantile(Union{Float64, Missing}[1, 2, 3, 4], f([1, 2, 2, 1]), [0.5, 0.75]) == - quantile(Any[1, 2, 3, 4], f([1, 2, 2, 1]), [0.5, 0.75]) == - quantile([1, 2, 3, 4], f([1, 2, 2, 1]), [0.5, 0.75]) - @test quantile(fill(Date(2005, 01, 01), 3), f([1, 1, 1]), [0.5, 0.75]) == - fill(Date(2005, 01, 01), 2) -end -@testset "Median $f" for f in weight_funcs - data = [4, 3, 2, 1] - wt = [0, 0, 0, 0] - @test_throws ArgumentError median(data, f(wt)) - @test_throws ArgumentError median(Float64[], f(Float64[])) - wt = [1, 2, 3, 4, 5] - @test_throws ArgumentError median(data, f(wt)) - @test_throws MethodError median([4 3 2 1 0], f(wt)) - @test_throws MethodError median([[1 2] ; [4 5] ; [7 8] ; [10 11] ; [13 14]], f(wt)) - data = [1, 3, 2, NaN, 2] - @test isnan(median(data, f(wt))) - wt = [1, 2, NaN, 4, 5] - @test_throws ArgumentError median(data, f(wt)) - data = [1, 3, 2, 1, 2] - @test_throws ArgumentError median(data, f(wt)) - wt = [-1, -1, -1, -1, -1] - @test_throws ArgumentError median(data, f(wt)) - wt = [-1, -1, -1, 0, 0] - @test_throws ArgumentError median(data, f(wt)) - - data = [4, 3, 2, 1] - wt = [1, 2, 3, 4] - @test median(data, f(wt)) ≈ quantile(data, f(wt), 0.5) atol = 1e-5 - - # Test non-Real eltype - @test_throws ArgumentError median([missing, 1], f([1, 2])) - @test median(Union{Float64, Missing}[1, 2, 3, 4], f([1, 2, 2, 1])) == - median(Any[1, 2, 3, 4], f([1, 2, 2, 1])) == - median([1, 2, 3, 4], f([1, 2, 2, 1])) - @test median([Date(2005, 01, 01), Date(2005, 01, 01)], f([1, 1])) == - Date(2005, 01, 01) -end + @testset "Median $f" for f in weight_funcs + data = [4, 3, 2, 1] + wt = [0, 0, 0, 0] + @test_throws ArgumentError median(data, f(wt)) + @test_throws ArgumentError median(Float64[], f(Float64[])) + wt = [1, 2, 3, 4, 5] + @test_throws ArgumentError median(data, f(wt)) + @test_throws MethodError median([4 3 2 1 0], f(wt)) + @test_throws MethodError median([[1 2] ; [4 5] ; [7 8] ; [10 11] ; [13 14]], f(wt)) + data = [1, 3, 2, NaN, 2] + @test isnan(median(data, f(wt))) + wt = [1, 2, NaN, 4, 5] + @test_throws ArgumentError median(data, f(wt)) + data = [1, 3, 2, 1, 2] + @test_throws ArgumentError median(data, f(wt)) + wt = [-1, -1, -1, -1, -1] + @test_throws ArgumentError median(data, f(wt)) + wt = [-1, -1, -1, 0, 0] + @test_throws ArgumentError median(data, f(wt)) + + data = [4, 3, 2, 1] + wt = [1, 2, 3, 4] + @test median(data, f(wt)) ≈ quantile(data, f(wt), 0.5) atol = 1.0e-5 + + # Test non-Real eltype + @test_throws ArgumentError median([missing, 1], f([1, 2])) + @test median(Union{Float64, Missing}[1, 2, 3, 4], f([1, 2, 2, 1])) == + median(Any[1, 2, 3, 4], f([1, 2, 2, 1])) == + median([1, 2, 3, 4], f([1, 2, 2, 1])) + @test median([Date(2005, 01, 01), Date(2005, 01, 01)], f([1, 1])) == + Date(2005, 01, 01) + end -@testset "Mismatched eltypes" begin - @test round(mean(Union{Int,Missing}[1,2], weights([1,2])), digits=3) ≈ 1.667 -end + @testset "Mismatched eltypes" begin + @test round(mean(Union{Int, Missing}[1, 2], weights([1, 2])), digits = 3) ≈ 1.667 + end -@testset "Sum, mean, quantiles and variance for unit weights" begin - wt = uweights(Float64, 3) + @testset "Sum, mean, quantiles and variance for unit weights" begin + wt = uweights(Float64, 3) - @test sum([1.0, 2.0, 3.0], wt) ≈ wsum([1.0, 2.0, 3.0], wt) ≈ 6.0 - @test mean([1.0, 2.0, 3.0], wt) ≈ 2.0 + @test sum([1.0, 2.0, 3.0], wt) ≈ wsum([1.0, 2.0, 3.0], wt) ≈ 6.0 + @test mean([1.0, 2.0, 3.0], wt) ≈ 2.0 - @test sum(a, wt, dims=1) ≈ sum(a, dims=1) - @test sum(a, wt, dims=2) ≈ sum(a, dims=2) - @test sum(a, wt, dims=3) ≈ sum(a, dims=3) + @test sum(a, wt, dims = 1) ≈ sum(a, dims = 1) + @test sum(a, wt, dims = 2) ≈ sum(a, dims = 2) + @test sum(a, wt, dims = 3) ≈ sum(a, dims = 3) - @test wsum(a, wt, 1) ≈ sum(a, dims=1) - @test wsum(a, wt, 2) ≈ sum(a, dims=2) - @test wsum(a, wt, 3) ≈ sum(a, dims=3) + @test wsum(a, wt, 1) ≈ sum(a, dims = 1) + @test wsum(a, wt, 2) ≈ sum(a, dims = 2) + @test wsum(a, wt, 3) ≈ sum(a, dims = 3) - @test mean(a, wt, dims=1) ≈ mean(a, dims=1) - @test mean(a, wt, dims=2) ≈ mean(a, dims=2) - @test mean(a, wt, dims=3) ≈ mean(a, dims=3) + @test mean(a, wt, dims = 1) ≈ mean(a, dims = 1) + @test mean(a, wt, dims = 2) ≈ mean(a, dims = 2) + @test mean(a, wt, dims = 3) ≈ mean(a, dims = 3) - @test_throws DimensionMismatch sum(a, wt) - @test_throws DimensionMismatch sum(a, wt, dims=4) - @test_throws DimensionMismatch wsum(a, wt, 4) - @test_throws DimensionMismatch mean(a, wt, dims=4) + @test_throws DimensionMismatch sum(a, wt) + @test_throws DimensionMismatch sum(a, wt, dims = 4) + @test_throws DimensionMismatch wsum(a, wt, 4) + @test_throws DimensionMismatch mean(a, wt, dims = 4) - @test quantile([1.0, 4.0, 6.0, 8.0, 10.0], uweights(5), [0.5]) ≈ [6.0] - @test quantile([1.0, 4.0, 6.0, 8.0, 10.0], uweights(5), 0.5) ≈ 6.0 - @test median([1.0, 4.0, 6.0, 8.0, 10.0], uweights(5)) ≈ 6.0 + @test quantile([1.0, 4.0, 6.0, 8.0, 10.0], uweights(5), [0.5]) ≈ [6.0] + @test quantile([1.0, 4.0, 6.0, 8.0, 10.0], uweights(5), 0.5) ≈ 6.0 + @test median([1.0, 4.0, 6.0, 8.0, 10.0], uweights(5)) ≈ 6.0 - @test var(a, uweights(Float64, 27), corrected=false) ≈ var(a, corrected=false) - @test var(a, uweights(Float64, 27), corrected=true) ≈ var(a, corrected= true) -end + @test var(a, uweights(Float64, 27), corrected = false) ≈ var(a, corrected = false) + @test var(a, uweights(Float64, 27), corrected = true) ≈ var(a, corrected = true) + end -@testset "Exponential Weights" begin - λ = 0.2 - @testset "Usage" begin - v = [(1 - λ) ^ (4 - i) for i = 1:4] - w = Weights(v) + @testset "Exponential Weights" begin + λ = 0.2 + @testset "Usage" begin + v = [(1 - λ)^(4 - i) for i in 1:4] + w = Weights(v) - @test round.(w, digits=4) == [0.512, 0.64, 0.8, 1.0] + @test round.(w, digits = 4) == [0.512, 0.64, 0.8, 1.0] - @testset "basic" begin - @test eweights(1:4, λ; scale=true) ≈ w - end + @testset "basic" begin + @test eweights(1:4, λ; scale = true) ≈ w + end - @testset "1:n" begin - @test eweights(4, λ; scale=true) ≈ w - end + @testset "1:n" begin + @test eweights(4, λ; scale = true) ≈ w + end - @testset "indexin" begin - v = [(1 - λ) ^ (10 - i) for i = 1:10] + @testset "indexin" begin + v = [(1 - λ)^(10 - i) for i in 1:10] - # Test that we should be able to skip indices easily - @test eweights([1, 3, 5, 7], 1:10, λ; scale=true) ≈ Weights(v[[1, 3, 5, 7]]) + # Test that we should be able to skip indices easily + @test eweights([1, 3, 5, 7], 1:10, λ; scale = true) ≈ Weights(v[[1, 3, 5, 7]]) - # This should also work with actual time types - t1 = DateTime(2019, 1, 1, 1) - tx = t1 + Hour(7) - tn = DateTime(2019, 1, 1, 10) + # This should also work with actual time types + t1 = DateTime(2019, 1, 1, 1) + tx = t1 + Hour(7) + tn = DateTime(2019, 1, 1, 10) - @test eweights(t1:Hour(2):tx, t1:Hour(1):tn, λ; scale=true) ≈ Weights(v[[1, 3, 5, 7]]) + @test eweights(t1:Hour(2):tx, t1:Hour(1):tn, λ; scale = true) ≈ Weights(v[[1, 3, 5, 7]]) + end end - end - @testset "Empty" begin - @test eweights(0, 0.3; scale=true) == Weights(Float64[]) - @test eweights(1:0, 0.3; scale=true) == Weights(Float64[]) - @test eweights(Int[], 1:10, 0.4; scale=true) == Weights(Float64[]) - end + @testset "Empty" begin + @test eweights(0, 0.3; scale = true) == Weights(Float64[]) + @test eweights(1:0, 0.3; scale = true) == Weights(Float64[]) + @test eweights(Int[], 1:10, 0.4; scale = true) == Weights(Float64[]) + end - @testset "Failure Conditions" begin - # λ > 1.0 - @test_throws ArgumentError eweights(1, 1.1; scale=true) + @testset "Failure Conditions" begin + # λ > 1.0 + @test_throws ArgumentError eweights(1, 1.1; scale = true) - # time indices are not all positive non-zero integers - @test_throws ArgumentError eweights([0, 1, 2, 3], 0.3; scale=true) + # time indices are not all positive non-zero integers + @test_throws ArgumentError eweights([0, 1, 2, 3], 0.3; scale = true) - # Passing in an array of bools will work because Bool <: Integer, - # but any `false` values will trigger the same argument error as 0.0 - @test_throws ArgumentError eweights([true, false, true, true], 0.3; scale=true) - end + # Passing in an array of bools will work because Bool <: Integer, + # but any `false` values will trigger the same argument error as 0.0 + @test_throws ArgumentError eweights([true, false, true, true], 0.3; scale = true) + end - @testset "scale=false" begin - v = [λ * (1 - λ)^(1 - i) for i = 1:4] - w = Weights(v) + @testset "scale=false" begin + v = [λ * (1 - λ)^(1 - i) for i in 1:4] + w = Weights(v) - @test round.(w, digits=4) == [0.2, 0.25, 0.3125, 0.3906] + @test round.(w, digits = 4) == [0.2, 0.25, 0.3125, 0.3906] - wv = eweights(1:10, λ; scale=false) - @test eweights(1:10, λ; scale=true) ≈ wv / maximum(wv) + wv = eweights(1:10, λ; scale = false) + @test eweights(1:10, λ; scale = true) ≈ wv / maximum(wv) + end end -end -@testset "allequal and allunique" begin - # General weights - for f in (weights, aweights, fweights, pweights) - @test allunique(f(Float64[])) - @test allunique(f([0.4])) - @test allunique(f([0.4, 0.3])) - @test !allunique(f([0.4, 0.4])) - @test allunique(f([0.4, 0.3, 0.5])) - @test !allunique(f([0.4, 0.4, 0.5])) - @test allunique(f([0.4, 0.3, 0.5, 0.35])) - @test !allunique(f([0.4, 0.3, 0.5, 0.4])) + @testset "allequal and allunique" begin + # General weights + for f in (weights, aweights, fweights, pweights) + @test allunique(f(Float64[])) + @test allunique(f([0.4])) + @test allunique(f([0.4, 0.3])) + @test !allunique(f([0.4, 0.4])) + @test allunique(f([0.4, 0.3, 0.5])) + @test !allunique(f([0.4, 0.4, 0.5])) + @test allunique(f([0.4, 0.3, 0.5, 0.35])) + @test !allunique(f([0.4, 0.3, 0.5, 0.4])) + + if isdefined(Base, :allequal) + @test allequal(f(Float64[])) + @test allequal(f([0.4])) + @test allequal(f([0.4, 0.4])) + @test !allequal(f([0.4, 0.3])) + @test allequal(f([0.4, 0.4, 0.4, 0.4])) + @test !allunique(f([0.4, 0.4, 0.3, 0.4])) + end + end + + # Uniform weights + @test allunique(uweights(0)) + @test allunique(uweights(1)) + @test !allunique(uweights(2)) + @test !allunique(uweights(5)) if isdefined(Base, :allequal) - @test allequal(f(Float64[])) - @test allequal(f([0.4])) - @test allequal(f([0.4, 0.4])) - @test !allequal(f([0.4, 0.3])) - @test allequal(f([0.4, 0.4, 0.4, 0.4])) - @test !allunique(f([0.4, 0.4, 0.3, 0.4])) + @test allequal(uweights(0)) + @test allequal(uweights(1)) + @test allequal(uweights(2)) + @test allequal(uweights(5)) end end - # Uniform weights - @test allunique(uweights(0)) - @test allunique(uweights(1)) - @test !allunique(uweights(2)) - @test !allunique(uweights(5)) - - if isdefined(Base, :allequal) - @test allequal(uweights(0)) - @test allequal(uweights(1)) - @test allequal(uweights(2)) - @test allequal(uweights(5)) + @testset "custom weight types" begin + @test mean([1, 2, 3], MyWeights([1, 4, 10])) ≈ 2.6 + @test mean([1, 2, 3], MyWeights([NaN, 4, 10])) |> isnan + @test mode([1, 2, 3], MyWeights([1, 4, 10])) == 3 + @test_throws ArgumentError mode([1, 2, 3], MyWeights([NaN, 4, 10])) end -end - -@testset "custom weight types" begin - @test mean([1, 2, 3], MyWeights([1, 4, 10])) ≈ 2.6 - @test mean([1, 2, 3], MyWeights([NaN, 4, 10])) |> isnan - @test mode([1, 2, 3], MyWeights([1, 4, 10])) == 3 - @test_throws ArgumentError mode([1, 2, 3], MyWeights([NaN, 4, 10])) -end end # @testset StatsBase.Weights diff --git a/test/wsampling.jl b/test/wsampling.jl index efe9a608f..ef0228121 100644 --- a/test/wsampling.jl +++ b/test/wsampling.jl @@ -5,28 +5,30 @@ Random.seed!(1234) #### weighted sample with replacement -function check_wsample_wrep(a::AbstractArray, vrgn, wv::AbstractWeights, ptol::Real; - ordered::Bool=false, rev::Bool=false) +function check_wsample_wrep( + a::AbstractArray, vrgn, wv::AbstractWeights, ptol::Real; + ordered::Bool = false, rev::Bool = false + ) K = length(wv) (vmin, vmax) = vrgn (amin, amax) = extrema(a) @test vmin <= amin <= amax <= vmax p0 = wv ./ sum(wv) rev && reverse!(p0) - if ordered - @test issorted(a; rev=rev) + return if ordered + @test issorted(a; rev = rev) if ptol > 0 - @test isapprox(proportions(a, vmin:vmax), p0, atol=ptol) + @test isapprox(proportions(a, vmin:vmax), p0, atol = ptol) end else - @test !issorted(a; rev=rev) - ncols = size(a,2) + @test !issorted(a; rev = rev) + ncols = size(a, 2) if ncols == 1 - @test isapprox(proportions(a, vmin:vmax), p0, atol=ptol) + @test isapprox(proportions(a, vmin:vmax), p0, atol = ptol) else - for j = 1:ncols + for j in 1:ncols aj = view(a, :, j) - @test isapprox(proportions(aj, vmin:vmax), p0, atol=ptol) + @test isapprox(proportions(aj, vmin:vmax), p0, atol = ptol) end end end @@ -38,21 +40,21 @@ n = 10^6 wv = weights([0.2, 0.8, 0.4, 0.6]) for wv in ( - weights([0.2, 0.8, 0.4, 0.6]), - weights([2, 8, 4, 6]), - weights(Float32[0.2, 0.8, 0.4, 0.6]), - Weights(Float32[0.2, 0.8, 0.4, 0.6], 2), - Weights([2, 8, 4, 6], 20.0), -) + weights([0.2, 0.8, 0.4, 0.6]), + weights([2, 8, 4, 6]), + weights(Float32[0.2, 0.8, 0.4, 0.6]), + Weights(Float32[0.2, 0.8, 0.4, 0.6], 2), + Weights([2, 8, 4, 6], 20.0), + ) a = direct_sample!(4:7, wv, zeros(Int, n, 3)) - check_wsample_wrep(a, (4, 7), wv, 5.0e-3; ordered=false) + check_wsample_wrep(a, (4, 7), wv, 5.0e-3; ordered = false) test_rng_use(direct_sample!, 4:7, wv, zeros(Int, 100)) a = alias_sample!(4:7, wv, zeros(Int, n, 3)) - check_wsample_wrep(a, (4, 7), wv, 5.0e-3; ordered=false) + check_wsample_wrep(a, (4, 7), wv, 5.0e-3; ordered = false) - a = sample(4:7, wv, n; ordered=false) - check_wsample_wrep(a, (4, 7), wv, 5.0e-3; ordered=false) + a = sample(4:7, wv, n; ordered = false) + check_wsample_wrep(a, (4, 7), wv, 5.0e-3; ordered = false) end @test_throws ArgumentError alias_sample!(rand(10), weights(fill(0, 10)), rand(10)) @@ -60,17 +62,19 @@ end for rev in (true, false), T in (Int, Int16, Float64, Float16, BigInt, ComplexF64, Rational{Int}) r = rev ? reverse(4:7) : (4:7) - r = T===Int ? r : T.(r) - aa = Int.(sample(r, wv, n; ordered=true)) - check_wsample_wrep(aa, (4, 7), wv, 5.0e-3; ordered=true, rev=rev) - aa = Int.(sample(r, wv, 10; ordered=true)) - check_wsample_wrep(aa, (4, 7), wv, -1; ordered=true, rev=rev) + r = T === Int ? r : T.(r) + aa = Int.(sample(r, wv, n; ordered = true)) + check_wsample_wrep(aa, (4, 7), wv, 5.0e-3; ordered = true, rev = rev) + aa = Int.(sample(r, wv, 10; ordered = true)) + check_wsample_wrep(aa, (4, 7), wv, -1; ordered = true, rev = rev) end #### weighted sampling without replacement -function check_wsample_norep(a::AbstractArray, vrgn, wv::AbstractWeights, ptol::Real; - ordered::Bool=false, rev::Bool=false) +function check_wsample_norep( + a::AbstractArray, vrgn, wv::AbstractWeights, ptol::Real; + ordered::Bool = false, rev::Bool = false + ) # each column of a for one run vmin, vmax = vrgn @@ -78,63 +82,63 @@ function check_wsample_norep(a::AbstractArray, vrgn, wv::AbstractWeights, ptol:: @test vmin <= amin <= amax <= vmax n = vmax - vmin + 1 - for j = 1:size(a,2) - aj = view(a,:,j) + for j in 1:size(a, 2) + aj = view(a, :, j) @assert allunique(aj) if ordered - @assert issorted(aj; rev=rev) + @assert issorted(aj; rev = rev) end end - if ptol > 0 + return if ptol > 0 p0 = wv ./ sum(wv) rev && reverse!(p0) - @test isapprox(proportions(a[1,:], vmin:vmax), p0, atol=ptol) + @test isapprox(proportions(a[1, :], vmin:vmax), p0, atol = ptol) end end import StatsBase: naive_wsample_norep!, efraimidis_a_wsample_norep!, - efraimidis_ares_wsample_norep!, efraimidis_aexpj_wsample_norep! + efraimidis_ares_wsample_norep!, efraimidis_aexpj_wsample_norep! n = 10^5 wv = weights([0.2, 0.8, 0.4, 0.6]) a = zeros(Int, 3, n) -for j = 1:n - naive_wsample_norep!(4:7, wv, view(a,:,j)) +for j in 1:n + naive_wsample_norep!(4:7, wv, view(a, :, j)) end -check_wsample_norep(a, (4, 7), wv, 5.0e-3; ordered=false) +check_wsample_norep(a, (4, 7), wv, 5.0e-3; ordered = false) test_rng_use(naive_wsample_norep!, 4:7, wv, zeros(Int, 2)) a = zeros(Int, 3, n) -for j = 1:n - efraimidis_a_wsample_norep!(4:7, wv, view(a,:,j)) +for j in 1:n + efraimidis_a_wsample_norep!(4:7, wv, view(a, :, j)) end -check_wsample_norep(a, (4, 7), wv, 5.0e-3; ordered=false) +check_wsample_norep(a, (4, 7), wv, 5.0e-3; ordered = false) test_rng_use(efraimidis_a_wsample_norep!, 4:7, wv, zeros(Int, 2)) a = zeros(Int, 3, n) -for j = 1:n - efraimidis_ares_wsample_norep!(4:7, wv, view(a,:,j)) +for j in 1:n + efraimidis_ares_wsample_norep!(4:7, wv, view(a, :, j)) end -check_wsample_norep(a, (4, 7), wv, 5.0e-3; ordered=false) +check_wsample_norep(a, (4, 7), wv, 5.0e-3; ordered = false) test_rng_use(efraimidis_ares_wsample_norep!, 4:7, wv, zeros(Int, 2)) a = zeros(Int, 3, n) -for j = 1:n - efraimidis_aexpj_wsample_norep!(4:7, wv, view(a,:,j)) +for j in 1:n + efraimidis_aexpj_wsample_norep!(4:7, wv, view(a, :, j)) end -check_wsample_norep(a, (4, 7), wv, 5.0e-3; ordered=false) +check_wsample_norep(a, (4, 7), wv, 5.0e-3; ordered = false) test_rng_use(efraimidis_aexpj_wsample_norep!, 4:7, wv, zeros(Int, 2)) -a = sample(4:7, wv, 3; replace=false, ordered=false) -check_wsample_norep(a, (4, 7), wv, -1; ordered=false) +a = sample(4:7, wv, 3; replace = false, ordered = false) +check_wsample_norep(a, (4, 7), wv, -1; ordered = false) for rev in (true, false), T in (Int, Int16, Float64, Float16, BigInt, ComplexF64, Rational{Int}) r = rev ? reverse(4:7) : (4:7) - r = T===Int ? r : T.(r) - aa = Int.(sample(r, wv, 3; replace=false, ordered=true)) - check_wsample_norep(aa, (4, 7), wv, -1; ordered=true, rev=rev) + r = T === Int ? r : T.(r) + aa = Int.(sample(r, wv, 3; replace = false, ordered = true)) + check_wsample_norep(aa, (4, 7), wv, -1; ordered = true, rev = rev) end @testset "validation of inputs" begin @@ -147,8 +151,10 @@ end @test_throws ArgumentError sample(weights(ox)) - for f in (sample!, wsample!, naive_wsample_norep!, efraimidis_a_wsample_norep!, - efraimidis_ares_wsample_norep!, efraimidis_aexpj_wsample_norep!) + for f in ( + sample!, wsample!, naive_wsample_norep!, efraimidis_a_wsample_norep!, + efraimidis_ares_wsample_norep!, efraimidis_aexpj_wsample_norep!, + ) # Test that offset arrays throw an error @test_throws ArgumentError f(ox, weights(y), z) @test_throws ArgumentError f(x, weights(oy), z) @@ -165,4 +171,4 @@ end # but it currently fails as Base.mightalias is not smart enough @test_broken f(y, weights(view(x, 5:6)), view(x, 2:4)) end -end \ No newline at end of file +end