JuliaStats · anurag-mds · Dec 27, 2025
diff --git a/src/scalarstats.jl b/src/scalarstats.jl
@@ -46,14 +46,67 @@ end
 
 # compute mode, given the range of integer values
 """
-    mode(a, [r])
-    mode(a::AbstractArray, wv::AbstractWeights)
+    mode(a; method=:frequency)
+    mode(a::AbstractArray, wv::AbstractWeights; method=:frequency)
+    mode(a::AbstractArray{T}, r::UnitRange{T}; method=:frequency) where T<:Integer
+
+Return the mode (most common value) of `a`, optionally
+over a specified range `r` or weighted via a vector
+`wv`.
+
+The `method` keyword argument selects the estimation method:
+
+- `:frequency`: Frequency-based mode. Counts occurrences and returns the most common
+  value. If several modes exist, the first one (in order of appearance) is returned.
+  This is appropriate for discrete data.
+
+- `:halfsample`: Half-sample mode (HSM). A robust estimator of the mode for
+  continuous data. Repeatedly finds the contiguous half-sample with the smallest
+  range until at most 2 points remain, then returns their midpoint. The return value
+  is always a floating-point number and may not be an element of `a`.
+  Throws an `ArgumentError` if `a` contains any non-finite values (`NaN`, `Inf`,
+  or `-Inf`).
+  This method currently does not support `r` and `wv` arguments.
+
+  # References
+- D.R. Bickel, R. Fruehwirth (2006). On a fast, robust estimator of the mode.
+  Computational Statistics & Data Analysis, 50(12), 3500-3530.
+- T. Robertson, J.D. Cryer (1974). An iterative procedure for estimating the mode.
+  Journal of the American Statistical Association, 69(348), 1012-1016.
 
-Return the mode (most common number) of an array, optionally
-over a specified range `r` or weighted via a vector `wv`.
-If several modes exist, the first one (in order of appearance) is returned.
+# Examples
+```julia
+julia> mode([1, 2, 2, 3, 3, 3, 4])
+3
+
+julia> mode([1, 2, 2, 3, 3, 3, 4], method=:frequency)
+3
+
+julia> mode([1.0, 1.1, 1.2, 5.0, 5.1], method=:halfsample)
+1.1
+```
 """
-function mode(a::AbstractArray{T}, r::UnitRange{T}) where T<:Integer
+function mode(a; method::Symbol=:frequency)
+    if method === :halfsample
+        return _hsm_mode(a)
+    elseif method === :frequency
+        return _frequency_mode(a)
+    else
+        throw(ArgumentError(LazyString("`method` must be `:frequency` or `:halfsample`, got `:", method, "`")))
+    end
+end
+
+function mode(a::AbstractArray{T}, r::UnitRange{T}; method::Symbol=:frequency) where T<:Integer
+    if method === :halfsample
+        throw(ArgumentError("The `:halfsample` method does not support a range argument. Call `mode(a, method=:halfsample)` without a range."))
+    elseif method === :frequency
+        return _frequency_mode(a, r)
+    else
+        throw(ArgumentError(LazyString("`method` must be `:frequency` or `:halfsample`, got `:", method, "`")))
+    end
+end
+
+function _frequency_mode(a::AbstractArray{T}, r::UnitRange{T}) where T<:Integer
     isempty(a) && throw(ArgumentError("mode is not defined for empty collections"))
     len = length(a)
     r0 = r[1]
@@ -107,8 +160,7 @@ function modes(a::AbstractArray{T}, r::UnitRange{T}) where T<:Integer
     return ms
 end
 
-# compute mode over arbitrary iterable
-function mode(a)
+function _frequency_mode(a)
     isempty(a) && throw(ArgumentError("mode is not defined for empty collections"))
     cnts = Dict{eltype(a),Int}()
     # first element
@@ -204,6 +256,41 @@ function modes(a::AbstractVector, wv::AbstractWeights{T}) where T <: Real
     return [x for (x, w) in weights if w == mw]
 end
 
+#Internal implementation of the Half-Sample Mode (HSM) estimator.
+function _hsm_mode(a)
+    isempty(a) && throw(ArgumentError("mode is not defined for empty collections"))
+
+    # Filter NaN values and sort
+    if !all(isfinite, a)
+        throw(ArgumentError("mode with `method=:halfsample` is not defined " *
+                            "for collections containing non-finite values"))
+    end
+    filteredv = sort!(collect(a))
+    len = length(filteredv)
+
+    len == 1 && return middle(filteredv[1], filteredv[1])
+    len == 2 && return middle(filteredv[1], filteredv[2])
+
+    # Iteratively find the half-sample with the smallest range
+    filteredv = @view filteredv[1:end]
+    while len > 2
+        half = cld(len, 2)
+        best_i = 1
+        best_width = filteredv[half] - filteredv[1]
+        for i in 2:(len - half + 1)
+            w = filteredv[i + half - 1] - filteredv[i]
+            if w < best_width
+                best_width = w
+                best_i = i
+            end
+        end
+        filteredv = @view filteredv[best_i:(best_i + half - 1)]
+        len = length(filteredv)
+    end
+
+    return middle(filteredv[1], filteredv[len])
+end
+
 #############################
 #
 #   quantile and friends

diff --git a/test/scalarstats.jl b/test/scalarstats.jl
@@ -63,6 +63,53 @@ wv = weights([0.1:0.1:0.7; 0.1])
 @test_throws ArgumentError mode([1, 2, 3], weights([0.1, 0.3]))
 @test_throws ArgumentError modes([1, 2, 3], weights([0.1, 0.3]))
 
+## mode with method=:frequency (explicit, same as default)
+@test mode([1, 1, 1, 2, 3, 4], method=:frequency) == 1
+@test mode((x for x in [1, 1, 1, 2, 3, 4]), method=:frequency) == 1
+
+# Check no type inference regression on the default/frequency path
+@test @inferred(mode([1]))::Int == 1
+my_frequency_mode(x) = mode(x, method=:frequency)
+@test @inferred(my_frequency_mode([1]))::Int == 1
+
+# Check type inference on halfsample path
+my_hsm_mode(x) = mode(x, method=:halfsample)
+@test @inferred(my_hsm_mode([1]))::Float64 == 1.0
+
+## mode with method=:halfsample (half-sample mode)
+@test mode([10], method=:halfsample)::Float64 == 10.0
+@test mode([1, 5], method=:halfsample)::Float64 == 3.0         # midpoint of two elements
+@test mode([1, 2, 2, 3, 4, 4, 4, 5], method=:halfsample)::Float64 == 4.0
+@test mode([1.0, 1.1, 1.2, 5.0, 5.1], method=:halfsample) == 1.15
+@test mode([1.0, 2.0, 3.0, 4.0, 10.0, 11.0, 12.0, 13.0], method=:halfsample) == 1.5
+@test mode([1.0, 2.0, 10.0, 10.1, 10.2, 10.3], method=:halfsample) == 10.05
+
+# Robustness to outliers
+@test mode([1.0, 1.01, 1.011, 100.0, 200.0], method=:halfsample) == 1.0105
+
+# Non-finite values throw ArgumentError
+@test_throws ArgumentError mode([1.0, NaN, 2.0, 2.0, Inf], method=:halfsample)
+@test_throws ArgumentError mode([1.0, NaN, Inf, -Inf], method=:halfsample)
+@test_throws ArgumentError mode([NaN, Inf, -Inf], method=:halfsample)
+@test_throws ArgumentError mode([Inf, -Inf], method=:halfsample)
+@test_throws ArgumentError mode([NaN], method=:halfsample)
+@test_throws ArgumentError mode([NaN, NaN, NaN], method=:halfsample)
+
+# Edge cases
+@test_throws ArgumentError mode(Float64[], method=:halfsample)
+
+
+# Invalid method throws
+@test_throws ArgumentError mode([1, 2, 3], method=:invalid)
+
+## mode with range argument
+@test mode([1, 2, 2, 3, 4, 4, 4, 5], 2:4, method=:frequency) == 4
+@test_throws ArgumentError mode([1, 2, 2, 3, 4, 4, 4, 5], 2:4, method=:halfsample)
+@test_throws ArgumentError mode([1, 2, 2, 3, 4, 4, 4, 5], 2:4, method=:invalid)
+
+
+
+@test mode([1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 5.0, 5.1, 5.2, 5.3], method=:halfsample) == 1.15
 ## zscores
 
 @test zscore([-3:3;], 1.5, 0.5) == [-9.0:2.0:3.0;]