From 3950de00ee7fd8d0f47d8e42551f0dcaea84b77f Mon Sep 17 00:00:00 2001 From: Marcus Kruse Date: Tue, 4 Jul 2023 22:39:11 +0200 Subject: [PATCH 1/6] Outlier detection and outlier exclusion --- lib/statistex.ex | 197 ++++++++++++++++++++++++----- lib/statistex/percentile.ex | 3 +- test/statistex/percentile_test.exs | 34 ++--- test/statistex_test.exs | 98 ++++++++++++++ 4 files changed, 279 insertions(+), 53 deletions(-) diff --git a/lib/statistex.ex b/lib/statistex.ex index 9d6b539..71d7dd0 100644 --- a/lib/statistex.ex +++ b/lib/statistex.ex @@ -27,6 +27,8 @@ defmodule Statistex do :mode, :minimum, :maximum, + :outliers_bounds, + :outliers, sample_size: 0 ] @@ -47,6 +49,8 @@ defmodule Statistex do mode: mode, minimum: number, maximum: number, + outliers_bounds: {number, number}, + outliers: [number], sample_size: non_neg_integer } @@ -81,6 +85,8 @@ defmodule Statistex do @empty_list_error_message "Passed an empty list ([]) to calculate statistics from, please pass a list containing at least on number." + @iqr_factor 1.5 + @doc """ Calculate all statistics Statistex offers for a given list of numbers. @@ -89,7 +95,15 @@ defmodule Statistex do `Argumenterror` is raised if the given list is empty. ## Options - In a `percentiles` options arguments for the calculation of percentiles (see `percentiles/2`) can be given. The 50th percentile is always calculated as it is the median. + + In a `percentiles` options arguments for the calculation of percentiles (see `percentiles/2`) can + be given. The percentiles 25th, 50th (median) and 75th are always calculated. + + The option `exclude_outliers` can be set to `:once`, `:repeatedly` or `nil`, + `nil` is the default. If this option set to `:once` the outliers are excluded + and the statistics are calculated with the rest of the samples. The value + `:repeatedly` repeats the outlier exclusion until the samples no longer + contains outliers. ## Examples @@ -100,7 +114,7 @@ defmodule Statistex do standard_deviation: 200.0, standard_deviation_ratio: 0.4, median: 500.0, - percentiles: %{50 => 500.0}, + percentiles: %{25 => 400.0, 50 => 500.0, 75 => 600.0}, frequency_distribution: %{ 200 => 1, 400 => 3, @@ -112,7 +126,9 @@ defmodule Statistex do minimum: 200, maximum: 900, sample_size: 9, - total: 4500 + total: 4500, + outliers: [], + outliers_bounds: {200, 900.0} } iex> Statistex.statistics([]) @@ -125,13 +141,15 @@ defmodule Statistex do standard_deviation: 0.0, standard_deviation_ratio: 0.0, median: 0.0, - percentiles: %{50 => 0.0}, + percentiles: %{25 => 0.0, 50 => 0.0, 75 => 0.0}, frequency_distribution: %{0 => 4}, mode: 0, minimum: 0, maximum: 0, sample_size: 4, - total: 0 + total: 0, + outliers: [], + outliers_bounds: {0.0, 0.0} } """ @@ -143,33 +161,65 @@ defmodule Statistex do end def statistics(samples, configuration) do - total = total(samples) - sample_size = length(samples) - average = average(samples, total: total, sample_size: sample_size) - variance = variance(samples, average: average, sample_size: sample_size) - standard_deviation = standard_deviation(samples, variance: variance) + samples = Enum.sort(samples) - standard_deviation_ratio = - standard_deviation_ratio(samples, standard_deviation: standard_deviation) + minimum = hd(samples) + maximum = List.last(samples) percentiles = calculate_percentiles(samples, configuration) - frequency_distribution = frequency_distribution(samples) - - %__MODULE__{ - total: total, - average: average, - variance: variance, - standard_deviation: standard_deviation, - standard_deviation_ratio: standard_deviation_ratio, - median: median(samples, percentiles: percentiles), - percentiles: percentiles, - frequency_distribution: frequency_distribution, - mode: mode(samples, frequency_distribution: frequency_distribution), - minimum: minimum(samples), - maximum: maximum(samples), - sample_size: sample_size - } + outliers_bounds = + do_outliers_bounds(samples, percentiles: percentiles, minimum: minimum, maximum: maximum) + + {outliers, rest} = do_outliers(samples, outliers_bounds: outliers_bounds) + + if exclude_outliers?(configuration) and not Enum.empty?(outliers) do + configuration = + configuration + |> Keyword.put(:outliers_excluded, true) + |> Keyword.update!(:exclude_outliers, fn + :once -> :stop + :repeatedly -> :repeatedly + end) + |> Keyword.update(:acc_outliers, outliers, fn list -> list ++ outliers end) + + statistics(rest, configuration) + else + outliers = outliers ++ Keyword.get(configuration, :acc_outliers, []) + + total = total(samples) + sample_size = length(samples) + average = average(samples, total: total, sample_size: sample_size) + variance = variance(samples, average: average, sample_size: sample_size) + + frequency_distribution = frequency_distribution(samples) + + standard_deviation = standard_deviation(samples, variance: variance) + + standard_deviation_ratio = + standard_deviation_ratio(samples, standard_deviation: standard_deviation) + + %__MODULE__{ + total: total, + average: average, + variance: variance, + standard_deviation: standard_deviation, + standard_deviation_ratio: standard_deviation_ratio, + median: median(samples, percentiles: percentiles), + percentiles: percentiles, + frequency_distribution: frequency_distribution, + mode: mode(samples, frequency_distribution: frequency_distribution), + minimum: minimum, + maximum: maximum, + outliers_bounds: outliers_bounds, + outliers: outliers, + sample_size: sample_size + } + end + end + + defp exclude_outliers?(configuration) do + Keyword.get(configuration, :exclude_outliers) in [:once, :repeatedly] end @doc """ @@ -396,8 +446,10 @@ defmodule Statistex do percentiles_configuration = Keyword.get(configuration, :percentiles, []) # median_percentile is manually added so that it can be used directly by median - percentiles_configuration = Enum.uniq([@median_percentile | percentiles_configuration]) - percentiles(samples, percentiles_configuration) + percentiles_configuration = + Enum.uniq([25, @median_percentile, 75 | percentiles_configuration]) + + Percentile.percentiles(samples, percentiles_configuration) end @doc """ @@ -447,7 +499,9 @@ defmodule Statistex do """ @spec percentiles(samples, number | [number(), ...]) :: percentiles() - defdelegate(percentiles(samples, percentiles), to: Percentile) + def percentiles(samples, percentiles) do + samples |> Enum.sort() |> Percentile.percentiles(percentiles) + end @doc """ A map showing which sample occurs how often in the samples. @@ -541,10 +595,85 @@ defmodule Statistex do def median(samples, options) do percentiles = - Keyword.get_lazy(options, :percentiles, fn -> percentiles(samples, @median_percentile) end) + Keyword.get_lazy(options, :percentiles, fn -> + Percentile.percentiles(samples, @median_percentile) + end) + + get_percentile(samples, @median_percentile, percentiles) + end + + @doc """ + Calculates the lower and upper bound for outliers. + + Any sample that is `<` as the lower bound and any sample `>` are outliers of + the given `samples`. + + ## Examples + + iex> Statistex.outliers_bounds([3, 4, 5]) + {3, 5} + + iex> Statistex.outliers_bounds([1, 2, 6, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]) + {22.5, 50} + + iex> Statistex.outliers_bounds([50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 99, 99, 99]) + {50, 80.625} + """ + @spec outliers_bounds(samples, keyword) :: {lower :: number, upper :: number} + def outliers_bounds(samples, options \\ []) + def outliers_bounds([], _), do: raise(ArgumentError, @empty_list_error_message) + def outliers_bounds(samples, options), do: samples |> Enum.sort() |> do_outliers_bounds(options) + + defp do_outliers_bounds(samples, options) do + percentiles = + Keyword.get_lazy(options, :percentiles, fn -> Percentile.percentiles(samples, [25, 75]) end) + + minimum = Keyword.get_lazy(options, :minimum, fn -> hd(samples) end) + maximum = Keyword.get_lazy(options, :maximum, fn -> List.last(samples) end) + + p25 = get_percentile(samples, 25, percentiles) + p75 = get_percentile(samples, 75, percentiles) + iqr = p75 - p25 + + {max(p25 - iqr * @iqr_factor, minimum), min(p75 + iqr * @iqr_factor, maximum)} + end + + @doc """ + Returns all outliers for the given `samples`. + + ## Examples + + iex> Statistex.outliers([3, 4, 5]) + [] + + iex> Statistex.outliers([1, 2, 6, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]) + [1, 2, 6] + + iex> Statistex.outliers([50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 99, 99, 99]) + [99, 99, 99] + """ + @spec outliers(samples, keyword) :: samples | [] + def outliers(samples, options \\ []) do + {outliers, _rest} = samples |> Enum.sort() |> do_outliers(options) + + outliers + end + + defp do_outliers(samples, options) do + {lower_bound, upper_bound} = + Keyword.get_lazy(options, :outliers_bounds, fn -> do_outliers_bounds(samples, options) end) + + {min, rest} = Enum.split_while(samples, fn sample -> sample < lower_bound end) + + {max, rest} = + rest |> Enum.reverse() |> Enum.split_while(fn sample -> sample > upper_bound end) + + {min ++ max, rest} + end - Map.get_lazy(percentiles, @median_percentile, fn -> - samples |> percentiles(@median_percentile) |> Map.fetch!(@median_percentile) + defp get_percentile(samples, percentile, percentiles) do + Map.get_lazy(percentiles, percentile, fn -> + samples |> Percentile.percentiles(percentile) |> Map.fetch!(percentile) end) end diff --git a/lib/statistex/percentile.ex b/lib/statistex/percentile.ex index 162ccb7..c475da6 100644 --- a/lib/statistex/percentile.ex +++ b/lib/statistex/percentile.ex @@ -12,12 +12,11 @@ defmodule Statistex.Percentile do def percentiles(samples, percentile_ranks) do number_of_samples = length(samples) - sorted_samples = Enum.sort(samples) percentile_ranks |> List.wrap() |> Enum.reduce(%{}, fn percentile_rank, acc -> - perc = percentile(sorted_samples, number_of_samples, percentile_rank) + perc = percentile(samples, number_of_samples, percentile_rank) Map.put(acc, percentile_rank, perc) end) end diff --git a/test/statistex/percentile_test.exs b/test/statistex/percentile_test.exs index fbc03a5..020f523 100644 --- a/test/statistex/percentile_test.exs +++ b/test/statistex/percentile_test.exs @@ -4,20 +4,20 @@ defmodule Statistex.PercentileTest do doctest Statistex.Percentile - @nist_sample_data [ - 95.1772, - 95.1567, - 95.1937, - 95.1959, - 95.1442, - 95.0610, - 95.1591, - 95.1195, - 95.1065, - 95.0925, - 95.1990, - 95.1682 - ] + @nist_sample_data Enum.sort([ + 95.1772, + 95.1567, + 95.1937, + 95.1959, + 95.1442, + 95.0610, + 95.1591, + 95.1195, + 95.1065, + 95.0925, + 95.1990, + 95.1682 + ]) # Test data from: # http://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm @@ -49,7 +49,7 @@ defmodule Statistex.PercentileTest do end describe "a list of two elements" do - @samples [300, 200] + @samples [200, 300] test "1st percentile (small sample size simply picks first element)" do %{1 => result} = percentiles(@samples, [1]) assert result == 200.0 @@ -67,7 +67,7 @@ defmodule Statistex.PercentileTest do end describe "seemingly problematic 2 element list [9, 1]" do - @samples [9, 1] + @samples [1, 9] percentiles = %{ 25 => 1, @@ -88,7 +88,7 @@ defmodule Statistex.PercentileTest do end describe "a list of three elements" do - @samples [100, 300, 200] + @samples [100, 200, 300] test "1st percentile (small sample size simply picks first element)" do %{1 => result} = percentiles(@samples, [1]) assert result == 100.0 diff --git a/test/statistex_test.exs b/test/statistex_test.exs index 3c602a2..65ef5fd 100644 --- a/test/statistex_test.exs +++ b/test/statistex_test.exs @@ -12,6 +12,104 @@ defmodule Statistex.StatistexTest do end end + describe ".outliers_bounds/2" do + test "returns outlier bounds for samples without outliers" do + assert Statistex.outliers_bounds([200, 400, 400, 400, 500, 500, 500, 700, 900]) == + {200, 900.0} + end + + test "returns outlier bounds for samples with outliers" do + assert Statistex.outliers_bounds([50, 50, 450, 450, 450, 500, 500, 500, 600, 900]) == + {87.5, 787.5} + end + end + + describe ".statistics/2" do + test "returns Statistex struct without outliers" do + assert Statistex.statistics([200, 400, 400, 400, 500, 500, 500, 700, 900]) == + %Statistex{ + total: 4500, + average: 500.0, + variance: 40000.0, + standard_deviation: 200.0, + standard_deviation_ratio: 0.4, + median: 500.0, + percentiles: %{25 => 400.0, 50 => 500.0, 75 => 600.0}, + frequency_distribution: %{200 => 1, 400 => 3, 500 => 3, 700 => 1, 900 => 1}, + mode: [500, 400], + minimum: 200, + maximum: 900, + outliers_bounds: {200, 900.0}, + outliers: [], + sample_size: 9 + } + end + + test "returns Statistex struct with outliers" do + assert Statistex.statistics([50, 50, 450, 450, 450, 500, 500, 500, 600, 900]) == + %Statistex{ + total: 4450, + average: 445.0, + variance: 61361.11111111111, + standard_deviation: 247.71175004652304, + standard_deviation_ratio: 0.5566556180820742, + median: 475.0, + percentiles: %{25 => 350.0, 50 => 475.0, 75 => 525.0}, + frequency_distribution: %{50 => 2, 450 => 3, 500 => 3, 600 => 1, 900 => 1}, + mode: [500, 450], + minimum: 50, + maximum: 900, + outliers_bounds: {87.5, 787.5}, + outliers: [50, 50, 900], + sample_size: 10 + } + end + + test "returns Statistex struct with excluded outliers once" do + assert Statistex.statistics([50, 50, 450, 450, 450, 500, 500, 500, 600, 900], + exclude_outliers: :once + ) == + %Statistex{ + total: 3450, + average: 492.85714285714283, + variance: 2857.142857142857, + standard_deviation: 53.452248382484875, + standard_deviation_ratio: 0.1084538372977954, + median: 500.0, + percentiles: %{25 => 450.0, 50 => 500.0, 75 => 500.0}, + frequency_distribution: %{450 => 3, 500 => 3, 600 => 1}, + mode: [500, 450], + minimum: 450, + maximum: 600, + outliers_bounds: {450, 575.0}, + outliers: [600, 50, 50, 900], + sample_size: 7 + } + end + + test "returns Statistex struct with excluded outliers repeatedly" do + assert Statistex.statistics([50, 50, 450, 450, 450, 500, 500, 500, 600, 900], + exclude_outliers: :repeatedly + ) == + %Statistex{ + total: 2850, + average: 475.0, + variance: 750.0, + standard_deviation: 27.386127875258307, + standard_deviation_ratio: 0.05765500605317538, + median: 475.0, + percentiles: %{25 => 450.0, 50 => 475.0, 75 => 500.0}, + frequency_distribution: %{450 => 3, 500 => 3}, + mode: [500, 450], + minimum: 450, + maximum: 500, + outliers_bounds: {450, 500}, + outliers: [50, 50, 900, 600], + sample_size: 6 + } + end + end + describe "property testing as we might get loads of data" do property "doesn't blow up no matter what kind of nonempty list of floats it's given" do check all(samples <- list_of(float(), min_length: 1)) do From d735acd3a82f76a7ad62b506faac44ad3e521a6d Mon Sep 17 00:00:00 2001 From: Marcus Kruse Date: Wed, 15 Jan 2025 21:57:40 +0100 Subject: [PATCH 2/6] Replace not Enum.empty? by Enum.any? --- lib/statistex.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/statistex.ex b/lib/statistex.ex index 71d7dd0..4700c0b 100644 --- a/lib/statistex.ex +++ b/lib/statistex.ex @@ -173,7 +173,7 @@ defmodule Statistex do {outliers, rest} = do_outliers(samples, outliers_bounds: outliers_bounds) - if exclude_outliers?(configuration) and not Enum.empty?(outliers) do + if exclude_outliers?(configuration) and Enum.any?(outliers) do configuration = configuration |> Keyword.put(:outliers_excluded, true) From 4d94e6aae973107ce40e84dee54459846183ae6d Mon Sep 17 00:00:00 2001 From: Marcus Kruse Date: Wed, 15 Jan 2025 22:04:36 +0100 Subject: [PATCH 3/6] Remove outliers_excluded value --- lib/statistex.ex | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/statistex.ex b/lib/statistex.ex index 4700c0b..d0e7567 100644 --- a/lib/statistex.ex +++ b/lib/statistex.ex @@ -176,7 +176,6 @@ defmodule Statistex do if exclude_outliers?(configuration) and Enum.any?(outliers) do configuration = configuration - |> Keyword.put(:outliers_excluded, true) |> Keyword.update!(:exclude_outliers, fn :once -> :stop :repeatedly -> :repeatedly From 01f1b29b065e9e36dd4a8388d527f26cacea93ab Mon Sep 17 00:00:00 2001 From: Marcus Kruse Date: Wed, 15 Jan 2025 22:12:16 +0100 Subject: [PATCH 4/6] Add @first_quartile and @third_quartile --- lib/statistex.ex | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/statistex.ex b/lib/statistex.ex index d0e7567..fdbe963 100644 --- a/lib/statistex.ex +++ b/lib/statistex.ex @@ -85,6 +85,8 @@ defmodule Statistex do @empty_list_error_message "Passed an empty list ([]) to calculate statistics from, please pass a list containing at least on number." + @first_quartile 25 + @third_quartile 75 @iqr_factor 1.5 @doc """ @@ -625,16 +627,18 @@ defmodule Statistex do defp do_outliers_bounds(samples, options) do percentiles = - Keyword.get_lazy(options, :percentiles, fn -> Percentile.percentiles(samples, [25, 75]) end) + Keyword.get_lazy(options, :percentiles, fn -> + Percentile.percentiles(samples, [@first_quartile, @third_quartile]) + end) minimum = Keyword.get_lazy(options, :minimum, fn -> hd(samples) end) maximum = Keyword.get_lazy(options, :maximum, fn -> List.last(samples) end) - p25 = get_percentile(samples, 25, percentiles) - p75 = get_percentile(samples, 75, percentiles) - iqr = p75 - p25 + q1 = get_percentile(samples, @first_quartile, percentiles) + q3 = get_percentile(samples, @third_quartile, percentiles) + iqr = q1 - q3 - {max(p25 - iqr * @iqr_factor, minimum), min(p75 + iqr * @iqr_factor, maximum)} + {max(q1 - iqr * @iqr_factor, minimum), min(q3 + iqr * @iqr_factor, maximum)} end @doc """ From 37d00b79b80e7d3531460f1cb19984bb7abdd0f4 Mon Sep 17 00:00:00 2001 From: Marcus Kruse Date: Wed, 15 Jan 2025 23:32:57 +0100 Subject: [PATCH 5/6] Fix calculation --- lib/statistex.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/statistex.ex b/lib/statistex.ex index fdbe963..d808f9d 100644 --- a/lib/statistex.ex +++ b/lib/statistex.ex @@ -636,7 +636,7 @@ defmodule Statistex do q1 = get_percentile(samples, @first_quartile, percentiles) q3 = get_percentile(samples, @third_quartile, percentiles) - iqr = q1 - q3 + iqr = q3 - q1 {max(q1 - iqr * @iqr_factor, minimum), min(q3 + iqr * @iqr_factor, maximum)} end From 7b04ae03cbdf66acbc62e5a5affad425c7ca97b7 Mon Sep 17 00:00:00 2001 From: Marcus Kruse Date: Wed, 15 Jan 2025 23:34:40 +0100 Subject: [PATCH 6/6] Fix typo in outlier_bound --- lib/statistex.ex | 34 +++++++++++++++++----------------- test/statistex_test.exs | 14 +++++++------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/lib/statistex.ex b/lib/statistex.ex index d808f9d..088a81a 100644 --- a/lib/statistex.ex +++ b/lib/statistex.ex @@ -27,7 +27,7 @@ defmodule Statistex do :mode, :minimum, :maximum, - :outliers_bounds, + :outlier_bounds, :outliers, sample_size: 0 ] @@ -49,7 +49,7 @@ defmodule Statistex do mode: mode, minimum: number, maximum: number, - outliers_bounds: {number, number}, + outlier_bounds: {number, number}, outliers: [number], sample_size: non_neg_integer } @@ -130,7 +130,7 @@ defmodule Statistex do sample_size: 9, total: 4500, outliers: [], - outliers_bounds: {200, 900.0} + outlier_bounds: {200, 900.0} } iex> Statistex.statistics([]) @@ -151,7 +151,7 @@ defmodule Statistex do sample_size: 4, total: 0, outliers: [], - outliers_bounds: {0.0, 0.0} + outlier_bounds: {0.0, 0.0} } """ @@ -170,10 +170,10 @@ defmodule Statistex do percentiles = calculate_percentiles(samples, configuration) - outliers_bounds = - do_outliers_bounds(samples, percentiles: percentiles, minimum: minimum, maximum: maximum) + outlier_bounds = + do_outlier_bounds(samples, percentiles: percentiles, minimum: minimum, maximum: maximum) - {outliers, rest} = do_outliers(samples, outliers_bounds: outliers_bounds) + {outliers, rest} = do_outliers(samples, outlier_bounds: outlier_bounds) if exclude_outliers?(configuration) and Enum.any?(outliers) do configuration = @@ -212,7 +212,7 @@ defmodule Statistex do mode: mode(samples, frequency_distribution: frequency_distribution), minimum: minimum, maximum: maximum, - outliers_bounds: outliers_bounds, + outlier_bounds: outlier_bounds, outliers: outliers, sample_size: sample_size } @@ -611,21 +611,21 @@ defmodule Statistex do ## Examples - iex> Statistex.outliers_bounds([3, 4, 5]) + iex> Statistex.outlier_bounds([3, 4, 5]) {3, 5} - iex> Statistex.outliers_bounds([1, 2, 6, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]) + iex> Statistex.outlier_bounds([1, 2, 6, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]) {22.5, 50} - iex> Statistex.outliers_bounds([50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 99, 99, 99]) + iex> Statistex.outlier_bounds([50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 99, 99, 99]) {50, 80.625} """ - @spec outliers_bounds(samples, keyword) :: {lower :: number, upper :: number} - def outliers_bounds(samples, options \\ []) - def outliers_bounds([], _), do: raise(ArgumentError, @empty_list_error_message) - def outliers_bounds(samples, options), do: samples |> Enum.sort() |> do_outliers_bounds(options) + @spec outlier_bounds(samples, keyword) :: {lower :: number, upper :: number} + def outlier_bounds(samples, options \\ []) + def outlier_bounds([], _), do: raise(ArgumentError, @empty_list_error_message) + def outlier_bounds(samples, options), do: samples |> Enum.sort() |> do_outlier_bounds(options) - defp do_outliers_bounds(samples, options) do + defp do_outlier_bounds(samples, options) do percentiles = Keyword.get_lazy(options, :percentiles, fn -> Percentile.percentiles(samples, [@first_quartile, @third_quartile]) @@ -664,7 +664,7 @@ defmodule Statistex do defp do_outliers(samples, options) do {lower_bound, upper_bound} = - Keyword.get_lazy(options, :outliers_bounds, fn -> do_outliers_bounds(samples, options) end) + Keyword.get_lazy(options, :outlier_bounds, fn -> do_outlier_bounds(samples, options) end) {min, rest} = Enum.split_while(samples, fn sample -> sample < lower_bound end) diff --git a/test/statistex_test.exs b/test/statistex_test.exs index 65ef5fd..301ce2a 100644 --- a/test/statistex_test.exs +++ b/test/statistex_test.exs @@ -12,14 +12,14 @@ defmodule Statistex.StatistexTest do end end - describe ".outliers_bounds/2" do + describe ".outlier_bounds/2" do test "returns outlier bounds for samples without outliers" do - assert Statistex.outliers_bounds([200, 400, 400, 400, 500, 500, 500, 700, 900]) == + assert Statistex.outlier_bounds([200, 400, 400, 400, 500, 500, 500, 700, 900]) == {200, 900.0} end test "returns outlier bounds for samples with outliers" do - assert Statistex.outliers_bounds([50, 50, 450, 450, 450, 500, 500, 500, 600, 900]) == + assert Statistex.outlier_bounds([50, 50, 450, 450, 450, 500, 500, 500, 600, 900]) == {87.5, 787.5} end end @@ -39,7 +39,7 @@ defmodule Statistex.StatistexTest do mode: [500, 400], minimum: 200, maximum: 900, - outliers_bounds: {200, 900.0}, + outlier_bounds: {200, 900.0}, outliers: [], sample_size: 9 } @@ -59,7 +59,7 @@ defmodule Statistex.StatistexTest do mode: [500, 450], minimum: 50, maximum: 900, - outliers_bounds: {87.5, 787.5}, + outlier_bounds: {87.5, 787.5}, outliers: [50, 50, 900], sample_size: 10 } @@ -81,7 +81,7 @@ defmodule Statistex.StatistexTest do mode: [500, 450], minimum: 450, maximum: 600, - outliers_bounds: {450, 575.0}, + outlier_bounds: {450, 575.0}, outliers: [600, 50, 50, 900], sample_size: 7 } @@ -103,7 +103,7 @@ defmodule Statistex.StatistexTest do mode: [500, 450], minimum: 450, maximum: 500, - outliers_bounds: {450, 500}, + outlier_bounds: {450, 500}, outliers: [50, 50, 900, 600], sample_size: 6 }