From c2fda942f133ce1b97707cbb27864411d054ccb3 Mon Sep 17 00:00:00 2001
From: Marcus Kruse <esurk.sucram@gmail.com>
Date: Tue, 4 Jul 2023 22:39:11 +0200
Subject: [PATCH 01/23] Outlier detection and outlier exclusion

---
 lib/statistex.ex                   | 197 ++++++++++++++++++++++++-----
 lib/statistex/percentile.ex        |   3 +-
 test/statistex/percentile_test.exs |  34 ++---
 test/statistex_test.exs            |  98 ++++++++++++++
 4 files changed, 279 insertions(+), 53 deletions(-)

diff --git a/lib/statistex.ex b/lib/statistex.ex
index 9fcf019..cc8ca7b 100644
--- a/lib/statistex.ex
+++ b/lib/statistex.ex
@@ -27,6 +27,8 @@ defmodule Statistex do
     :mode,
     :minimum,
     :maximum,
+    :outliers_bounds,
+    :outliers,
     sample_size: 0
   ]
 
@@ -47,6 +49,8 @@ defmodule Statistex do
           mode: mode,
           minimum: number,
           maximum: number,
+          outliers_bounds: {number, number},
+          outliers: [number],
           sample_size: non_neg_integer
         }
 
@@ -81,6 +85,8 @@ defmodule Statistex do
 
   @empty_list_error_message "Passed an empty list ([]) to calculate statistics from, please pass a list containing at least one number."
 
+  @iqr_factor 1.5
+
   @doc """
   Calculate all statistics Statistex offers for a given list of numbers.
 
@@ -89,7 +95,15 @@ defmodule Statistex do
   `Argumenterror` is raised if the given list is empty.
 
   ## Options
-  In a `percentiles` options arguments for the calculation of percentiles (see `percentiles/2`) can be given. The 50th percentile is always calculated as it is the median.
+
+  In a `percentiles` options arguments for the calculation of percentiles (see `percentiles/2`) can
+  be given. The percentiles 25th, 50th (median) and 75th are always calculated.
+
+  The option `exclude_outliers` can be set to `:once`, `:repeatedly` or `nil`,
+  `nil` is the default. If this option set to `:once` the outliers are excluded
+  and the statistics are calculated with the rest of the samples. The value
+  `:repeatedly` repeats the outlier exclusion until the samples no longer
+  contains outliers.
 
   ## Examples
 
@@ -100,7 +114,7 @@ defmodule Statistex do
         standard_deviation:       200.0,
         standard_deviation_ratio: 0.4,
         median:                   500.0,
-        percentiles:              %{50 => 500.0},
+        percentiles:              %{25 => 400.0, 50 => 500.0, 75 => 600.0},
         frequency_distribution:   %{
           200 => 1,
           400 => 3,
@@ -112,7 +126,9 @@ defmodule Statistex do
         minimum:                  200,
         maximum:                  900,
         sample_size:              9,
-        total:                    4500
+        total:                    4500,
+        outliers: [],
+        outliers_bounds: {200, 900.0}
       }
 
       iex> Statistex.statistics([])
@@ -125,13 +141,15 @@ defmodule Statistex do
         standard_deviation:       0.0,
         standard_deviation_ratio: 0.0,
         median:                   0.0,
-        percentiles:              %{50 => 0.0},
+        percentiles:              %{25 => 0.0, 50 => 0.0, 75 => 0.0},
         frequency_distribution:   %{0 => 4},
         mode:                     0,
         minimum:                  0,
         maximum:                  0,
         sample_size:              4,
-        total:                    0
+        total:                    0,
+        outliers: [],
+        outliers_bounds: {0.0, 0.0}
       }
 
   """
@@ -143,33 +161,65 @@ defmodule Statistex do
   end
 
   def statistics(samples, configuration) do
-    total = total(samples)
-    sample_size = length(samples)
-    average = average(samples, total: total, sample_size: sample_size)
-    variance = variance(samples, average: average, sample_size: sample_size)
-    standard_deviation = standard_deviation(samples, variance: variance)
+    samples = Enum.sort(samples)
 
-    standard_deviation_ratio =
-      standard_deviation_ratio(samples, standard_deviation: standard_deviation)
+    minimum = hd(samples)
+    maximum = List.last(samples)
 
     percentiles = calculate_percentiles(samples, configuration)
 
-    frequency_distribution = frequency_distribution(samples)
-
-    %__MODULE__{
-      total: total,
-      average: average,
-      variance: variance,
-      standard_deviation: standard_deviation,
-      standard_deviation_ratio: standard_deviation_ratio,
-      median: median(samples, percentiles: percentiles),
-      percentiles: percentiles,
-      frequency_distribution: frequency_distribution,
-      mode: mode(samples, frequency_distribution: frequency_distribution),
-      minimum: minimum(samples),
-      maximum: maximum(samples),
-      sample_size: sample_size
-    }
+    outliers_bounds =
+      do_outliers_bounds(samples, percentiles: percentiles, minimum: minimum, maximum: maximum)
+
+    {outliers, rest} = do_outliers(samples, outliers_bounds: outliers_bounds)
+
+    if exclude_outliers?(configuration) and not Enum.empty?(outliers) do
+      configuration =
+        configuration
+        |> Keyword.put(:outliers_excluded, true)
+        |> Keyword.update!(:exclude_outliers, fn
+          :once -> :stop
+          :repeatedly -> :repeatedly
+        end)
+        |> Keyword.update(:acc_outliers, outliers, fn list -> list ++ outliers end)
+
+      statistics(rest, configuration)
+    else
+      outliers = outliers ++ Keyword.get(configuration, :acc_outliers, [])
+
+      total = total(samples)
+      sample_size = length(samples)
+      average = average(samples, total: total, sample_size: sample_size)
+      variance = variance(samples, average: average, sample_size: sample_size)
+
+      frequency_distribution = frequency_distribution(samples)
+
+      standard_deviation = standard_deviation(samples, variance: variance)
+
+      standard_deviation_ratio =
+        standard_deviation_ratio(samples, standard_deviation: standard_deviation)
+
+      %__MODULE__{
+        total: total,
+        average: average,
+        variance: variance,
+        standard_deviation: standard_deviation,
+        standard_deviation_ratio: standard_deviation_ratio,
+        median: median(samples, percentiles: percentiles),
+        percentiles: percentiles,
+        frequency_distribution: frequency_distribution,
+        mode: mode(samples, frequency_distribution: frequency_distribution),
+        minimum: minimum,
+        maximum: maximum,
+        outliers_bounds: outliers_bounds,
+        outliers: outliers,
+        sample_size: sample_size
+      }
+    end
+  end
+
+  defp exclude_outliers?(configuration) do
+    Keyword.get(configuration, :exclude_outliers) in [:once, :repeatedly]
   end
 
   @doc """
@@ -396,8 +446,10 @@ defmodule Statistex do
     percentiles_configuration = Keyword.get(configuration, :percentiles, [])
 
     # median_percentile is manually added so that it can be used directly by median
-    percentiles_configuration = Enum.uniq([@median_percentile | percentiles_configuration])
-    percentiles(samples, percentiles_configuration)
+    percentiles_configuration =
+      Enum.uniq([25, @median_percentile, 75 | percentiles_configuration])
+
+    Percentile.percentiles(samples, percentiles_configuration)
   end
 
   @doc """
@@ -447,7 +499,9 @@ defmodule Statistex do
   """
   @spec percentiles(samples, number | [number(), ...]) ::
           percentiles()
-  defdelegate(percentiles(samples, percentiles), to: Percentile)
+  def percentiles(samples, percentiles) do
+    samples |> Enum.sort() |> Percentile.percentiles(percentiles)
+  end
 
   @doc """
   A map showing which sample occurs how often in the samples.
@@ -541,10 +595,85 @@ defmodule Statistex do
 
   def median(samples, options) do
     percentiles =
-      Keyword.get_lazy(options, :percentiles, fn -> percentiles(samples, @median_percentile) end)
+      Keyword.get_lazy(options, :percentiles, fn ->
+        Percentile.percentiles(samples, @median_percentile)
+      end)
+
+    get_percentile(samples, @median_percentile, percentiles)
+  end
+
+  @doc """
+  Calculates the lower and upper bound for outliers.
+
+  Any sample that is `<` as the lower bound and any sample `>` are outliers of
+  the given `samples`.
+
+  ## Examples
+
+      iex> Statistex.outliers_bounds([3, 4, 5])
+      {3, 5}
+
+      iex> Statistex.outliers_bounds([1, 2, 6, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50])
+      {22.5, 50}
+
+      iex> Statistex.outliers_bounds([50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 99, 99, 99])
+      {50, 80.625}
+  """
+  @spec outliers_bounds(samples, keyword) :: {lower :: number, upper :: number}
+  def outliers_bounds(samples, options \\ [])
+  def outliers_bounds([], _), do: raise(ArgumentError, @empty_list_error_message)
+  def outliers_bounds(samples, options), do: samples |> Enum.sort() |> do_outliers_bounds(options)
+
+  defp do_outliers_bounds(samples, options) do
+    percentiles =
+      Keyword.get_lazy(options, :percentiles, fn -> Percentile.percentiles(samples, [25, 75]) end)
+
+    minimum = Keyword.get_lazy(options, :minimum, fn -> hd(samples) end)
+    maximum = Keyword.get_lazy(options, :maximum, fn -> List.last(samples) end)
+
+    p25 = get_percentile(samples, 25, percentiles)
+    p75 = get_percentile(samples, 75, percentiles)
+    iqr = p75 - p25
+
+    {max(p25 - iqr * @iqr_factor, minimum), min(p75 + iqr * @iqr_factor, maximum)}
+  end
+
+  @doc """
+  Returns all outliers for the given `samples`.
+
+  ## Examples
+
+      iex> Statistex.outliers([3, 4, 5])
+      []
+
+      iex> Statistex.outliers([1, 2, 6, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50])
+      [1, 2, 6]
+
+      iex> Statistex.outliers([50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 99, 99, 99])
+      [99, 99, 99]
+  """
+  @spec outliers(samples, keyword) :: samples | []
+  def outliers(samples, options \\ []) do
+    {outliers, _rest} = samples |> Enum.sort() |> do_outliers(options)
+
+    outliers
+  end
+
+  defp do_outliers(samples, options) do
+    {lower_bound, upper_bound} =
+      Keyword.get_lazy(options, :outliers_bounds, fn -> do_outliers_bounds(samples, options) end)
+
+    {min, rest} = Enum.split_while(samples, fn sample -> sample < lower_bound end)
+
+    {max, rest} =
+      rest |> Enum.reverse() |> Enum.split_while(fn sample -> sample > upper_bound end)
+
+    {min ++ max, rest}
+  end
 
-    Map.get_lazy(percentiles, @median_percentile, fn ->
-      samples |> percentiles(@median_percentile) |> Map.fetch!(@median_percentile)
+  defp get_percentile(samples, percentile, percentiles) do
+    Map.get_lazy(percentiles, percentile, fn ->
+      samples |> Percentile.percentiles(percentile) |> Map.fetch!(percentile)
     end)
   end
 
diff --git a/lib/statistex/percentile.ex b/lib/statistex/percentile.ex
index dd6bd31..1a32f01 100644
--- a/lib/statistex/percentile.ex
+++ b/lib/statistex/percentile.ex
@@ -12,12 +12,11 @@ defmodule Statistex.Percentile do
 
   def percentiles(samples, percentile_ranks) do
     number_of_samples = length(samples)
-    sorted_samples = Enum.sort(samples)
 
     percentile_ranks
     |> List.wrap()
     |> Enum.reduce(%{}, fn percentile_rank, acc ->
-      perc = percentile(sorted_samples, number_of_samples, percentile_rank)
+      perc = percentile(samples, number_of_samples, percentile_rank)
       Map.put(acc, percentile_rank, perc)
     end)
   end
diff --git a/test/statistex/percentile_test.exs b/test/statistex/percentile_test.exs
index fbc03a5..020f523 100644
--- a/test/statistex/percentile_test.exs
+++ b/test/statistex/percentile_test.exs
@@ -4,20 +4,20 @@ defmodule Statistex.PercentileTest do
 
   doctest Statistex.Percentile
 
-  @nist_sample_data [
-    95.1772,
-    95.1567,
-    95.1937,
-    95.1959,
-    95.1442,
-    95.0610,
-    95.1591,
-    95.1195,
-    95.1065,
-    95.0925,
-    95.1990,
-    95.1682
-  ]
+  @nist_sample_data Enum.sort([
+                      95.1772,
+                      95.1567,
+                      95.1937,
+                      95.1959,
+                      95.1442,
+                      95.0610,
+                      95.1591,
+                      95.1195,
+                      95.1065,
+                      95.0925,
+                      95.1990,
+                      95.1682
+                    ])
 
   # Test data from:
   #   http://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm
@@ -49,7 +49,7 @@ defmodule Statistex.PercentileTest do
   end
 
   describe "a list of two elements" do
-    @samples [300, 200]
+    @samples [200, 300]
     test "1st percentile (small sample size simply picks first element)" do
       %{1 => result} = percentiles(@samples, [1])
       assert result == 200.0
@@ -67,7 +67,7 @@ defmodule Statistex.PercentileTest do
   end
 
   describe "seemingly problematic 2 element list [9, 1]" do
-    @samples [9, 1]
+    @samples [1, 9]
 
     percentiles = %{
       25 => 1,
@@ -88,7 +88,7 @@ defmodule Statistex.PercentileTest do
   end
 
   describe "a list of three elements" do
-    @samples [100, 300, 200]
+    @samples [100, 200, 300]
     test "1st percentile (small sample size simply picks first element)" do
       %{1 => result} = percentiles(@samples, [1])
       assert result == 100.0
diff --git a/test/statistex_test.exs b/test/statistex_test.exs
index 3c602a2..65ef5fd 100644
--- a/test/statistex_test.exs
+++ b/test/statistex_test.exs
@@ -12,6 +12,104 @@ defmodule Statistex.StatistexTest do
     end
   end
 
+  describe ".outliers_bounds/2" do
+    test "returns outlier bounds for samples without outliers" do
+      assert Statistex.outliers_bounds([200, 400, 400, 400, 500, 500, 500, 700, 900]) ==
+               {200, 900.0}
+    end
+
+    test "returns outlier bounds for samples with outliers" do
+      assert Statistex.outliers_bounds([50, 50, 450, 450, 450, 500, 500, 500, 600, 900]) ==
+               {87.5, 787.5}
+    end
+  end
+
+  describe ".statistics/2" do
+    test "returns Statistex struct without outliers" do
+      assert Statistex.statistics([200, 400, 400, 400, 500, 500, 500, 700, 900]) ==
+               %Statistex{
+                 total: 4500,
+                 average: 500.0,
+                 variance: 40000.0,
+                 standard_deviation: 200.0,
+                 standard_deviation_ratio: 0.4,
+                 median: 500.0,
+                 percentiles: %{25 => 400.0, 50 => 500.0, 75 => 600.0},
+                 frequency_distribution: %{200 => 1, 400 => 3, 500 => 3, 700 => 1, 900 => 1},
+                 mode: [500, 400],
+                 minimum: 200,
+                 maximum: 900,
+                 outliers_bounds: {200, 900.0},
+                 outliers: [],
+                 sample_size: 9
+               }
+    end
+
+    test "returns Statistex struct with outliers" do
+      assert Statistex.statistics([50, 50, 450, 450, 450, 500, 500, 500, 600, 900]) ==
+               %Statistex{
+                 total: 4450,
+                 average: 445.0,
+                 variance: 61361.11111111111,
+                 standard_deviation: 247.71175004652304,
+                 standard_deviation_ratio: 0.5566556180820742,
+                 median: 475.0,
+                 percentiles: %{25 => 350.0, 50 => 475.0, 75 => 525.0},
+                 frequency_distribution: %{50 => 2, 450 => 3, 500 => 3, 600 => 1, 900 => 1},
+                 mode: [500, 450],
+                 minimum: 50,
+                 maximum: 900,
+                 outliers_bounds: {87.5, 787.5},
+                 outliers: [50, 50, 900],
+                 sample_size: 10
+               }
+    end
+
+    test "returns Statistex struct with excluded outliers once" do
+      assert Statistex.statistics([50, 50, 450, 450, 450, 500, 500, 500, 600, 900],
+               exclude_outliers: :once
+             ) ==
+               %Statistex{
+                 total: 3450,
+                 average: 492.85714285714283,
+                 variance: 2857.142857142857,
+                 standard_deviation: 53.452248382484875,
+                 standard_deviation_ratio: 0.1084538372977954,
+                 median: 500.0,
+                 percentiles: %{25 => 450.0, 50 => 500.0, 75 => 500.0},
+                 frequency_distribution: %{450 => 3, 500 => 3, 600 => 1},
+                 mode: [500, 450],
+                 minimum: 450,
+                 maximum: 600,
+                 outliers_bounds: {450, 575.0},
+                 outliers: [600, 50, 50, 900],
+                 sample_size: 7
+               }
+    end
+
+    test "returns Statistex struct with excluded outliers repeatedly" do
+      assert Statistex.statistics([50, 50, 450, 450, 450, 500, 500, 500, 600, 900],
+               exclude_outliers: :repeatedly
+             ) ==
+               %Statistex{
+                 total: 2850,
+                 average: 475.0,
+                 variance: 750.0,
+                 standard_deviation: 27.386127875258307,
+                 standard_deviation_ratio: 0.05765500605317538,
+                 median: 475.0,
+                 percentiles: %{25 => 450.0, 50 => 475.0, 75 => 500.0},
+                 frequency_distribution: %{450 => 3, 500 => 3},
+                 mode: [500, 450],
+                 minimum: 450,
+                 maximum: 500,
+                 outliers_bounds: {450, 500},
+                 outliers: [50, 50, 900, 600],
+                 sample_size: 6
+               }
+    end
+  end
+
   describe "property testing as we might get loads of data" do
     property "doesn't blow up no matter what kind of nonempty list of floats it's given" do
       check all(samples <- list_of(float(), min_length: 1)) do

From 2182dda7184bdf1f14f364c402c93dae1f29fbac Mon Sep 17 00:00:00 2001
From: Marcus Kruse <esurk.sucram@gmail.com>
Date: Wed, 15 Jan 2025 21:57:40 +0100
Subject: [PATCH 02/23] Replace not Enum.empty? by Enum.any?

---
 lib/statistex.ex | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/statistex.ex b/lib/statistex.ex
index cc8ca7b..27ec576 100644
--- a/lib/statistex.ex
+++ b/lib/statistex.ex
@@ -173,7 +173,7 @@ defmodule Statistex do
 
     {outliers, rest} = do_outliers(samples, outliers_bounds: outliers_bounds)
 
-    if exclude_outliers?(configuration) and not Enum.empty?(outliers) do
+    if exclude_outliers?(configuration) and Enum.any?(outliers) do
       configuration =
         configuration
         |> Keyword.put(:outliers_excluded, true)

From 505bd571e17465247b52cd0c6cc94385b847d6df Mon Sep 17 00:00:00 2001
From: Marcus Kruse <esurk.sucram@gmail.com>
Date: Wed, 15 Jan 2025 22:04:36 +0100
Subject: [PATCH 03/23] Remove outliers_excluded value

---
 lib/statistex.ex | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/statistex.ex b/lib/statistex.ex
index 27ec576..f2a782f 100644
--- a/lib/statistex.ex
+++ b/lib/statistex.ex
@@ -176,7 +176,6 @@ defmodule Statistex do
     if exclude_outliers?(configuration) and Enum.any?(outliers) do
       configuration =
         configuration
-        |> Keyword.put(:outliers_excluded, true)
         |> Keyword.update!(:exclude_outliers, fn
           :once -> :stop
           :repeatedly -> :repeatedly

From 8a8497f043fe9ac7f678a53b595f49a4f8656cff Mon Sep 17 00:00:00 2001
From: Marcus Kruse <esurk.sucram@gmail.com>
Date: Wed, 15 Jan 2025 22:12:16 +0100
Subject: [PATCH 04/23] Add @first_quartile and @third_quartile

---
 lib/statistex.ex | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/lib/statistex.ex b/lib/statistex.ex
index f2a782f..8ca99fb 100644
--- a/lib/statistex.ex
+++ b/lib/statistex.ex
@@ -85,6 +85,8 @@ defmodule Statistex do
 
   @empty_list_error_message "Passed an empty list ([]) to calculate statistics from, please pass a list containing at least one number."
 
+  @first_quartile 25
+  @third_quartile 75
   @iqr_factor 1.5
 
   @doc """
@@ -625,16 +627,18 @@ defmodule Statistex do
 
   defp do_outliers_bounds(samples, options) do
     percentiles =
-      Keyword.get_lazy(options, :percentiles, fn -> Percentile.percentiles(samples, [25, 75]) end)
+      Keyword.get_lazy(options, :percentiles, fn ->
+        Percentile.percentiles(samples, [@first_quartile, @third_quartile])
+      end)
 
     minimum = Keyword.get_lazy(options, :minimum, fn -> hd(samples) end)
     maximum = Keyword.get_lazy(options, :maximum, fn -> List.last(samples) end)
 
-    p25 = get_percentile(samples, 25, percentiles)
-    p75 = get_percentile(samples, 75, percentiles)
-    iqr = p75 - p25
+    q1 = get_percentile(samples, @first_quartile, percentiles)
+    q3 = get_percentile(samples, @third_quartile, percentiles)
+    iqr = q1 - q3
 
-    {max(p25 - iqr * @iqr_factor, minimum), min(p75 + iqr * @iqr_factor, maximum)}
+    {max(q1 - iqr * @iqr_factor, minimum), min(q3 + iqr * @iqr_factor, maximum)}
   end
 
   @doc """

From 44d062812d3efdf9ac8b3de5dd0066898a64b0ad Mon Sep 17 00:00:00 2001
From: Marcus Kruse <esurk.sucram@gmail.com>
Date: Wed, 15 Jan 2025 23:32:57 +0100
Subject: [PATCH 05/23] Fix calculation

---
 lib/statistex.ex | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/statistex.ex b/lib/statistex.ex
index 8ca99fb..ccef934 100644
--- a/lib/statistex.ex
+++ b/lib/statistex.ex
@@ -636,7 +636,7 @@ defmodule Statistex do
 
     q1 = get_percentile(samples, @first_quartile, percentiles)
     q3 = get_percentile(samples, @third_quartile, percentiles)
-    iqr = q1 - q3
+    iqr = q3 - q1
 
     {max(q1 - iqr * @iqr_factor, minimum), min(q3 + iqr * @iqr_factor, maximum)}
   end

From 947483c79ab7ddc4d01fcd4a449015958be34995 Mon Sep 17 00:00:00 2001
From: Marcus Kruse <esurk.sucram@gmail.com>
Date: Wed, 15 Jan 2025 23:34:40 +0100
Subject: [PATCH 06/23] Fix typo in outlier_bound

---
 lib/statistex.ex        | 34 +++++++++++++++++-----------------
 test/statistex_test.exs | 14 +++++++-------
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/lib/statistex.ex b/lib/statistex.ex
index ccef934..9c66643 100644
--- a/lib/statistex.ex
+++ b/lib/statistex.ex
@@ -27,7 +27,7 @@ defmodule Statistex do
     :mode,
     :minimum,
     :maximum,
-    :outliers_bounds,
+    :outlier_bounds,
     :outliers,
     sample_size: 0
   ]
@@ -49,7 +49,7 @@ defmodule Statistex do
           mode: mode,
           minimum: number,
           maximum: number,
-          outliers_bounds: {number, number},
+          outlier_bounds: {number, number},
           outliers: [number],
           sample_size: non_neg_integer
         }
@@ -130,7 +130,7 @@ defmodule Statistex do
         sample_size:              9,
         total:                    4500,
         outliers: [],
-        outliers_bounds: {200, 900.0}
+        outlier_bounds: {200, 900.0}
       }
 
       iex> Statistex.statistics([])
@@ -151,7 +151,7 @@ defmodule Statistex do
         sample_size:              4,
         total:                    0,
         outliers: [],
-        outliers_bounds: {0.0, 0.0}
+        outlier_bounds: {0.0, 0.0}
       }
 
   """
@@ -170,10 +170,10 @@ defmodule Statistex do
 
     percentiles = calculate_percentiles(samples, configuration)
 
-    outliers_bounds =
-      do_outliers_bounds(samples, percentiles: percentiles, minimum: minimum, maximum: maximum)
+    outlier_bounds =
+      do_outlier_bounds(samples, percentiles: percentiles, minimum: minimum, maximum: maximum)
 
-    {outliers, rest} = do_outliers(samples, outliers_bounds: outliers_bounds)
+    {outliers, rest} = do_outliers(samples, outlier_bounds: outlier_bounds)
 
     if exclude_outliers?(configuration) and Enum.any?(outliers) do
       configuration =
@@ -212,7 +212,7 @@ defmodule Statistex do
         mode: mode(samples, frequency_distribution: frequency_distribution),
         minimum: minimum,
         maximum: maximum,
-        outliers_bounds: outliers_bounds,
+        outlier_bounds: outlier_bounds,
         outliers: outliers,
         sample_size: sample_size
       }
@@ -611,21 +611,21 @@ defmodule Statistex do
 
   ## Examples
 
-      iex> Statistex.outliers_bounds([3, 4, 5])
+      iex> Statistex.outlier_bounds([3, 4, 5])
       {3, 5}
 
-      iex> Statistex.outliers_bounds([1, 2, 6, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50])
+      iex> Statistex.outlier_bounds([1, 2, 6, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50])
       {22.5, 50}
 
-      iex> Statistex.outliers_bounds([50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 99, 99, 99])
+      iex> Statistex.outlier_bounds([50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 99, 99, 99])
       {50, 80.625}
   """
-  @spec outliers_bounds(samples, keyword) :: {lower :: number, upper :: number}
-  def outliers_bounds(samples, options \\ [])
-  def outliers_bounds([], _), do: raise(ArgumentError, @empty_list_error_message)
-  def outliers_bounds(samples, options), do: samples |> Enum.sort() |> do_outliers_bounds(options)
+  @spec outlier_bounds(samples, keyword) :: {lower :: number, upper :: number}
+  def outlier_bounds(samples, options \\ [])
+  def outlier_bounds([], _), do: raise(ArgumentError, @empty_list_error_message)
+  def outlier_bounds(samples, options), do: samples |> Enum.sort() |> do_outlier_bounds(options)
 
-  defp do_outliers_bounds(samples, options) do
+  defp do_outlier_bounds(samples, options) do
     percentiles =
       Keyword.get_lazy(options, :percentiles, fn ->
         Percentile.percentiles(samples, [@first_quartile, @third_quartile])
@@ -664,7 +664,7 @@ defmodule Statistex do
 
   defp do_outliers(samples, options) do
     {lower_bound, upper_bound} =
-      Keyword.get_lazy(options, :outliers_bounds, fn -> do_outliers_bounds(samples, options) end)
+      Keyword.get_lazy(options, :outlier_bounds, fn -> do_outlier_bounds(samples, options) end)
 
     {min, rest} = Enum.split_while(samples, fn sample -> sample < lower_bound end)
 
diff --git a/test/statistex_test.exs b/test/statistex_test.exs
index 65ef5fd..301ce2a 100644
--- a/test/statistex_test.exs
+++ b/test/statistex_test.exs
@@ -12,14 +12,14 @@ defmodule Statistex.StatistexTest do
     end
   end
 
-  describe ".outliers_bounds/2" do
+  describe ".outlier_bounds/2" do
     test "returns outlier bounds for samples without outliers" do
-      assert Statistex.outliers_bounds([200, 400, 400, 400, 500, 500, 500, 700, 900]) ==
+      assert Statistex.outlier_bounds([200, 400, 400, 400, 500, 500, 500, 700, 900]) ==
                {200, 900.0}
     end
 
     test "returns outlier bounds for samples with outliers" do
-      assert Statistex.outliers_bounds([50, 50, 450, 450, 450, 500, 500, 500, 600, 900]) ==
+      assert Statistex.outlier_bounds([50, 50, 450, 450, 450, 500, 500, 500, 600, 900]) ==
                {87.5, 787.5}
     end
   end
@@ -39,7 +39,7 @@ defmodule Statistex.StatistexTest do
                  mode: [500, 400],
                  minimum: 200,
                  maximum: 900,
-                 outliers_bounds: {200, 900.0},
+                 outlier_bounds: {200, 900.0},
                  outliers: [],
                  sample_size: 9
                }
@@ -59,7 +59,7 @@ defmodule Statistex.StatistexTest do
                  mode: [500, 450],
                  minimum: 50,
                  maximum: 900,
-                 outliers_bounds: {87.5, 787.5},
+                 outlier_bounds: {87.5, 787.5},
                  outliers: [50, 50, 900],
                  sample_size: 10
                }
@@ -81,7 +81,7 @@ defmodule Statistex.StatistexTest do
                  mode: [500, 450],
                  minimum: 450,
                  maximum: 600,
-                 outliers_bounds: {450, 575.0},
+                 outlier_bounds: {450, 575.0},
                  outliers: [600, 50, 50, 900],
                  sample_size: 7
                }
@@ -103,7 +103,7 @@ defmodule Statistex.StatistexTest do
                  mode: [500, 450],
                  minimum: 450,
                  maximum: 500,
-                 outliers_bounds: {450, 500},
+                 outlier_bounds: {450, 500},
                  outliers: [50, 50, 900, 600],
                  sample_size: 6
                }

From 5b5c329090e141727764be2f071d176d012af109 Mon Sep 17 00:00:00 2001
From: Tobias Pfeiffer <pragtob@gmail.com>
Date: Fri, 2 May 2025 13:33:26 +0200
Subject: [PATCH 07/23] Provide sources for the 1.5 iqr rule

---
 lib/statistex.ex | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/statistex.ex b/lib/statistex.ex
index 9c66643..44c4d0d 100644
--- a/lib/statistex.ex
+++ b/lib/statistex.ex
@@ -87,6 +87,8 @@ defmodule Statistex do
 
   @first_quartile 25
   @third_quartile 75
+  # https://en.wikipedia.org/wiki/Interquartile_range#Outliers
+  # https://builtin.com/articles/1-5-iqr-rule
   @iqr_factor 1.5
 
   @doc """

From ad7174b51d1fe36b54d001d32f1b583515b05587 Mon Sep 17 00:00:00 2001
From: Tobias Pfeiffer <pragtob@gmail.com>
Date: Fri, 2 May 2025 14:00:27 +0200
Subject: [PATCH 08/23] WIP: (known failure) try to remove repeatedly
 identifying outliers

From: https://github.com/bencheeorg/statistex/pull/5#discussion_r1917296624

Need to think through it again/and or check some more samples
and test it against that. Getting different bounds/outliers
right now although I think they're right.
---
 lib/statistex.ex        | 110 +++++++++++++++++++++-------------------
 test/statistex_test.exs |  26 ++--------
 2 files changed, 62 insertions(+), 74 deletions(-)

diff --git a/lib/statistex.ex b/lib/statistex.ex
index 44c4d0d..dc449ba 100644
--- a/lib/statistex.ex
+++ b/lib/statistex.ex
@@ -96,18 +96,16 @@ defmodule Statistex do
 
   The statistics themselves are described in the individual samples that can be used to calculate individual values.
 
-  `Argumenterror` is raised if the given list is empty.
+  `ArgumentError` is raised if the given list is empty.
 
   ## Options
 
-  In a `percentiles` options arguments for the calculation of percentiles (see `percentiles/2`) can
+  With a `percentiles` options arguments for the calculation of percentiles (see `percentiles/2`) can
   be given. The percentiles 25th, 50th (median) and 75th are always calculated.
 
-  The option `exclude_outliers` can be set to `:once`, `:repeatedly` or `nil`,
-  `nil` is the default. If this option set to `:once` the outliers are excluded
-  and the statistics are calculated with the rest of the samples. The value
-  `:repeatedly` repeats the outlier exclusion until the samples no longer
-  contains outliers.
+  The option `exclude_outliers` can be set to `true`, `false`. Defaults to `false`.
+  If this option is set to `true` the outliers are excluded
+  and the statistics are calculated with the rest of the samples.
 
   ## Examples
 
@@ -167,62 +165,72 @@ defmodule Statistex do
   def statistics(samples, configuration) do
     samples = Enum.sort(samples)
 
-    minimum = hd(samples)
-    maximum = List.last(samples)
-
-    percentiles = calculate_percentiles(samples, configuration)
+    # these statistics are required to do the outlier calculations
+    %{minimum: minimum, maximum: maximum, percentiles: percentiles} =
+      base_statistics(samples, configuration)
 
     outlier_bounds =
       do_outlier_bounds(samples, percentiles: percentiles, minimum: minimum, maximum: maximum)
 
+    # make sure rest remains sorted and so can be used again to ok results
     {outliers, rest} = do_outliers(samples, outlier_bounds: outlier_bounds)
 
     if exclude_outliers?(configuration) and Enum.any?(outliers) do
-      configuration =
-        configuration
-        |> Keyword.update!(:exclude_outliers, fn
-          :once -> :stop
-          :repeatedly -> :repeatedly
-        end)
-        |> Keyword.update(:acc_outliers, outliers, fn list -> list ++ outliers end)
-
-      statistics(rest, configuration)
+      # figure out to avoid double sorting
+      rest = Enum.sort(rest)
+      # need to recalculate with the outliers removed
+      %{minimum: minimum, maximum: maximum, percentiles: percentiles} =
+        base_statistics(rest, configuration)
+
+      create_full_statistics(rest, minimum, maximum, percentiles, outliers, outlier_bounds)
     else
-      outliers = outliers ++ Keyword.get(configuration, :acc_outliers, [])
-
-      total = total(samples)
-      sample_size = length(samples)
-      average = average(samples, total: total, sample_size: sample_size)
-      variance = variance(samples, average: average, sample_size: sample_size)
-
-      frequency_distribution = frequency_distribution(samples)
-
-      standard_deviation = standard_deviation(samples, variance: variance)
-
-      standard_deviation_ratio =
-        standard_deviation_ratio(samples, standard_deviation: standard_deviation)
-
-      %__MODULE__{
-        total: total,
-        average: average,
-        variance: variance,
-        standard_deviation: standard_deviation,
-        standard_deviation_ratio: standard_deviation_ratio,
-        median: median(samples, percentiles: percentiles),
-        percentiles: percentiles,
-        frequency_distribution: frequency_distribution,
-        mode: mode(samples, frequency_distribution: frequency_distribution),
-        minimum: minimum,
-        maximum: maximum,
-        outlier_bounds: outlier_bounds,
-        outliers: outliers,
-        sample_size: sample_size
-      }
+      create_full_statistics(samples, minimum, maximum, percentiles, outliers, outlier_bounds)
     end
   end
 
+  defp base_statistics(samples, configuration) do
+    minimum = hd(samples)
+    maximum = List.last(samples)
+
+    percentiles = calculate_percentiles(samples, configuration)
+
+    %{minimum: minimum, maximum: maximum, percentiles: percentiles}
+  end
+
   defp exclude_outliers?(configuration) do
-    Keyword.get(configuration, :exclude_outliers) in [:once, :repeatedly]
+    Access.get(configuration, :exclude_outliers) == true
+  end
+
+  # maybe make argument a map
+  defp create_full_statistics(samples, minimum, maximum, percentiles, outliers, outlier_bounds) do
+    total = total(samples)
+    sample_size = length(samples)
+    average = average(samples, total: total, sample_size: sample_size)
+    variance = variance(samples, average: average, sample_size: sample_size)
+
+    frequency_distribution = frequency_distribution(samples)
+
+    standard_deviation = standard_deviation(samples, variance: variance)
+
+    standard_deviation_ratio =
+      standard_deviation_ratio(samples, standard_deviation: standard_deviation)
+
+    %__MODULE__{
+      total: total,
+      average: average,
+      variance: variance,
+      standard_deviation: standard_deviation,
+      standard_deviation_ratio: standard_deviation_ratio,
+      median: median(samples, percentiles: percentiles),
+      percentiles: percentiles,
+      frequency_distribution: frequency_distribution,
+      mode: mode(samples, frequency_distribution: frequency_distribution),
+      minimum: minimum,
+      maximum: maximum,
+      outlier_bounds: outlier_bounds,
+      outliers: outliers,
+      sample_size: sample_size
+    }
   end
 
   @doc """
diff --git a/test/statistex_test.exs b/test/statistex_test.exs
index 301ce2a..5c0b742 100644
--- a/test/statistex_test.exs
+++ b/test/statistex_test.exs
@@ -67,7 +67,7 @@ defmodule Statistex.StatistexTest do
 
     test "returns Statistex struct with excluded outliers once" do
       assert Statistex.statistics([50, 50, 450, 450, 450, 500, 500, 500, 600, 900],
-               exclude_outliers: :once
+               exclude_outliers: true
              ) ==
                %Statistex{
                  total: 3450,
@@ -81,33 +81,13 @@ defmodule Statistex.StatistexTest do
                  mode: [500, 450],
                  minimum: 450,
                  maximum: 600,
+                 # check with other sources what is right and what isn't, I fear we may have calculated outliers twice before
                  outlier_bounds: {450, 575.0},
+                 # Either sort them or make the test ignorant of order
                  outliers: [600, 50, 50, 900],
                  sample_size: 7
                }
     end
-
-    test "returns Statistex struct with excluded outliers repeatedly" do
-      assert Statistex.statistics([50, 50, 450, 450, 450, 500, 500, 500, 600, 900],
-               exclude_outliers: :repeatedly
-             ) ==
-               %Statistex{
-                 total: 2850,
-                 average: 475.0,
-                 variance: 750.0,
-                 standard_deviation: 27.386127875258307,
-                 standard_deviation_ratio: 0.05765500605317538,
-                 median: 475.0,
-                 percentiles: %{25 => 450.0, 50 => 475.0, 75 => 500.0},
-                 frequency_distribution: %{450 => 3, 500 => 3},
-                 mode: [500, 450],
-                 minimum: 450,
-                 maximum: 500,
-                 outlier_bounds: {450, 500},
-                 outliers: [50, 50, 900, 600],
-                 sample_size: 6
-               }
-    end
   end
 
   describe "property testing as we might get loads of data" do

From cdf1490ce1a35677e9623e1d8024557d6f7b3eae Mon Sep 17 00:00:00 2001
From: Tobias Pfeiffer <pragtob@gmail.com>
Date: Sat, 3 May 2025 13:44:42 +0200
Subject: [PATCH 09/23] Further deep dive into outliers, quantiles

* Remove the limiting of bounds with min/max
* consult and get some more samples
* include R samples as some authorative examples
---
 lib/statistex.ex            |  13 ++---
 lib/statistex/percentile.ex |  11 +++-
 test/statistex_test.exs     | 102 +++++++++++++++++++++++++++---------
 3 files changed, 91 insertions(+), 35 deletions(-)

diff --git a/lib/statistex.ex b/lib/statistex.ex
index dc449ba..fd60259 100644
--- a/lib/statistex.ex
+++ b/lib/statistex.ex
@@ -130,7 +130,7 @@ defmodule Statistex do
         sample_size:              9,
         total:                    4500,
         outliers: [],
-        outlier_bounds: {200, 900.0}
+        outlier_bounds: {100.0, 900.0}
       }
 
       iex> Statistex.statistics([])
@@ -622,13 +622,13 @@ defmodule Statistex do
   ## Examples
 
       iex> Statistex.outlier_bounds([3, 4, 5])
-      {3, 5}
+      {0.0, 8.0}
 
       iex> Statistex.outlier_bounds([1, 2, 6, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50])
-      {22.5, 50}
+      {22.5, 66.5}
 
       iex> Statistex.outlier_bounds([50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 99, 99, 99])
-      {50, 80.625}
+      {31.625, 80.625}
   """
   @spec outlier_bounds(samples, keyword) :: {lower :: number, upper :: number}
   def outlier_bounds(samples, options \\ [])
@@ -641,14 +641,11 @@ defmodule Statistex do
         Percentile.percentiles(samples, [@first_quartile, @third_quartile])
       end)
 
-    minimum = Keyword.get_lazy(options, :minimum, fn -> hd(samples) end)
-    maximum = Keyword.get_lazy(options, :maximum, fn -> List.last(samples) end)
-
     q1 = get_percentile(samples, @first_quartile, percentiles)
     q3 = get_percentile(samples, @third_quartile, percentiles)
     iqr = q3 - q1
 
-    {max(q1 - iqr * @iqr_factor, minimum), min(q3 + iqr * @iqr_factor, maximum)}
+    {q1 - iqr * @iqr_factor, q3 + iqr * @iqr_factor}
   end
 
   @doc """
diff --git a/lib/statistex/percentile.ex b/lib/statistex/percentile.ex
index 1a32f01..53887a9 100644
--- a/lib/statistex/percentile.ex
+++ b/lib/statistex/percentile.ex
@@ -62,11 +62,20 @@ defmodule Statistex.Percentile do
   # particular sample). Of the 9 main strategies, (types 1-9), types 6, 7, and 8
   # are generally acceptable and give similar results.
   #
+  # R uses type 7, but you can change the strategies used in R with arguments.
+  #
+  # > quantile(c(9, 9, 10, 10, 10, 11, 12, 36), probs = c(0.25, 0.5, 0.75), type = 6)
+  #   25%   50%   75%
+  #  9.25 10.00 11.75
+  # > quantile(c(9, 9, 10, 10, 10, 11, 12, 36), probs = c(0.25, 0.5, 0.75), type = 7)
+  #   25%   50%   75%
+  #  9.75 10.00 11.25
+  #
   # For more information on interpolation strategies, see:
   # - https://stat.ethz.ch/R-manual/R-devel/library/stats/html/quantile.html
   # - http://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm
   defp interpolation_value(lower_bound, upper_bound, rank) do
-    # in our source rank is k, and interpolation_weitgh is d
+    # in our source rank is k, and interpolation_weight is d
     interpolation_weight = rank - trunc(rank)
     interpolation_weight * (upper_bound - lower_bound)
   end
diff --git a/test/statistex_test.exs b/test/statistex_test.exs
index 5c0b742..fa646f2 100644
--- a/test/statistex_test.exs
+++ b/test/statistex_test.exs
@@ -13,9 +13,10 @@ defmodule Statistex.StatistexTest do
   end
 
   describe ".outlier_bounds/2" do
+    # examples doubled up, maybe get rid of them?
     test "returns outlier bounds for samples without outliers" do
       assert Statistex.outlier_bounds([200, 400, 400, 400, 500, 500, 500, 700, 900]) ==
-               {200, 900.0}
+               {100.0, 900.0}
     end
 
     test "returns outlier bounds for samples with outliers" do
@@ -30,7 +31,7 @@ defmodule Statistex.StatistexTest do
                %Statistex{
                  total: 4500,
                  average: 500.0,
-                 variance: 40000.0,
+                 variance: 40_000.0,
                  standard_deviation: 200.0,
                  standard_deviation_ratio: 0.4,
                  median: 500.0,
@@ -39,7 +40,7 @@ defmodule Statistex.StatistexTest do
                  mode: [500, 400],
                  minimum: 200,
                  maximum: 900,
-                 outlier_bounds: {200, 900.0},
+                 outlier_bounds: {100.0, 900.0},
                  outliers: [],
                  sample_size: 9
                }
@@ -50,7 +51,7 @@ defmodule Statistex.StatistexTest do
                %Statistex{
                  total: 4450,
                  average: 445.0,
-                 variance: 61361.11111111111,
+                 variance: 61_361.11111111111,
                  standard_deviation: 247.71175004652304,
                  standard_deviation_ratio: 0.5566556180820742,
                  median: 475.0,
@@ -65,28 +66,77 @@ defmodule Statistex.StatistexTest do
                }
     end
 
-    test "returns Statistex struct with excluded outliers once" do
-      assert Statistex.statistics([50, 50, 450, 450, 450, 500, 500, 500, 600, 900],
-               exclude_outliers: true
-             ) ==
-               %Statistex{
-                 total: 3450,
-                 average: 492.85714285714283,
-                 variance: 2857.142857142857,
-                 standard_deviation: 53.452248382484875,
-                 standard_deviation_ratio: 0.1084538372977954,
-                 median: 500.0,
-                 percentiles: %{25 => 450.0, 50 => 500.0, 75 => 500.0},
-                 frequency_distribution: %{450 => 3, 500 => 3, 600 => 1},
-                 mode: [500, 450],
-                 minimum: 450,
-                 maximum: 600,
-                 # check with other sources what is right and what isn't, I fear we may have calculated outliers twice before
-                 outlier_bounds: {450, 575.0},
-                 # Either sort them or make the test ignorant of order
-                 outliers: [600, 50, 50, 900],
-                 sample_size: 7
-               }
+    # https://www.youtube.com/watch?v=rZJbj2I-_Ek
+    test "gets outliers from the sample right" do
+      # One could argue that this is controversial, R comes up with these results (by default):
+      # > summary(c(9, 9, 10, 10, 10, 11, 12, 36))
+      #  Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
+      #  9.00    9.75   10.00   13.38   11.25   36.00
+      #
+      # R by default uses type 7 interpolation, we implemented type 6 interpolation though. Which
+      # R can also use:
+      # > quantile(c(9, 9, 10, 10, 10, 11, 12, 36), probs = c(0.25, 0.5, 0.75), type = 6)
+      # 25%   50%   75%
+      # 9.25 10.00 11.75
+      # Which is our result.
+
+      assert %Statistex{
+               median: 10.0,
+               percentiles: %{25 => 9.25, 50 => 10.0, 75 => 11.75},
+               minimum: 9,
+               maximum: 36,
+               outlier_bounds: {5.5, 15.5},
+               outliers: [36]
+             } = Statistex.statistics([9, 9, 10, 10, 10, 11, 12, 36], exclude_outliers: false)
+    end
+
+    # https://en.wikipedia.org/wiki/Box_plot#Example_with_outliers
+    test "another example with outliers" do
+      data = [
+        52,
+        57,
+        57,
+        58,
+        63,
+        66,
+        66,
+        67,
+        67,
+        68,
+        69,
+        70,
+        70,
+        70,
+        70,
+        72,
+        73,
+        75,
+        75,
+        76,
+        76,
+        78,
+        79,
+        89
+      ]
+
+      assert %Statistex{
+               median: 70.0,
+               percentiles: %{25 => 66.0, 50 => 70.0, 75 => 75.0},
+               # report interquantile range?
+               outlier_bounds: {52.5, 88.5},
+               outliers: [52, 89]
+             } = Statistex.statistics(data, exclude_outliers: false)
+    end
+
+    # https://en.wikipedia.org/wiki/Interquartile_range#Data_set_in_a_table
+    test "quartile example" do
+      assert %Statistex{
+               median: 87.0,
+               percentiles: %{25 => 31.0, 50 => 87.0, 75 => 119.0}
+             } =
+               Statistex.statistics([7, 7, 31, 31, 47, 75, 87, 115, 116, 119, 119, 155, 177],
+                 exclude_outliers: false
+               )
     end
   end
 

From d769e4e8848b650bde638b1c6e2072f7b4d775dc Mon Sep 17 00:00:00 2001
From: Tobias Pfeiffer <pragtob@gmail.com>
Date: Sat, 3 May 2025 14:09:26 +0200
Subject: [PATCH 10/23] separate keys for outlier boudns

---
 lib/statistex.ex        | 17 ++++++++++++-----
 test/statistex_test.exs | 12 ++++++++----
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/lib/statistex.ex b/lib/statistex.ex
index fd60259..3c812fc 100644
--- a/lib/statistex.ex
+++ b/lib/statistex.ex
@@ -27,7 +27,8 @@ defmodule Statistex do
     :mode,
     :minimum,
     :maximum,
-    :outlier_bounds,
+    :lower_outlier_bound,
+    :upper_outlier_bound,
     :outliers,
     sample_size: 0
   ]
@@ -49,7 +50,8 @@ defmodule Statistex do
           mode: mode,
           minimum: number,
           maximum: number,
-          outlier_bounds: {number, number},
+          lower_outlier_bound: number,
+          upper_outlier_bound: number,
           outliers: [number],
           sample_size: non_neg_integer
         }
@@ -130,7 +132,8 @@ defmodule Statistex do
         sample_size:              9,
         total:                    4500,
         outliers: [],
-        outlier_bounds: {100.0, 900.0}
+        lower_outlier_bound: 100.0,
+        upper_outlier_bound: 900.0
       }
 
       iex> Statistex.statistics([])
@@ -151,7 +154,8 @@ defmodule Statistex do
         sample_size:              4,
         total:                    0,
         outliers: [],
-        outlier_bounds: {0.0, 0.0}
+        lower_outlier_bound: 0.0,
+        upper_outlier_bound: 0.0,
       }
 
   """
@@ -215,6 +219,8 @@ defmodule Statistex do
     standard_deviation_ratio =
       standard_deviation_ratio(samples, standard_deviation: standard_deviation)
 
+    {lower_outlier_bound, upper_outlier_bound} = outlier_bounds
+
     %__MODULE__{
       total: total,
       average: average,
@@ -227,7 +233,8 @@ defmodule Statistex do
       mode: mode(samples, frequency_distribution: frequency_distribution),
       minimum: minimum,
       maximum: maximum,
-      outlier_bounds: outlier_bounds,
+      lower_outlier_bound: lower_outlier_bound,
+      upper_outlier_bound: upper_outlier_bound,
       outliers: outliers,
       sample_size: sample_size
     }
diff --git a/test/statistex_test.exs b/test/statistex_test.exs
index fa646f2..59a30d5 100644
--- a/test/statistex_test.exs
+++ b/test/statistex_test.exs
@@ -40,7 +40,8 @@ defmodule Statistex.StatistexTest do
                  mode: [500, 400],
                  minimum: 200,
                  maximum: 900,
-                 outlier_bounds: {100.0, 900.0},
+                 lower_outlier_bound: 100.0,
+                 upper_outlier_bound: 900.0,
                  outliers: [],
                  sample_size: 9
                }
@@ -60,7 +61,8 @@ defmodule Statistex.StatistexTest do
                  mode: [500, 450],
                  minimum: 50,
                  maximum: 900,
-                 outlier_bounds: {87.5, 787.5},
+                 lower_outlier_bound: 87.5,
+                 upper_outlier_bound: 787.5,
                  outliers: [50, 50, 900],
                  sample_size: 10
                }
@@ -85,7 +87,8 @@ defmodule Statistex.StatistexTest do
                percentiles: %{25 => 9.25, 50 => 10.0, 75 => 11.75},
                minimum: 9,
                maximum: 36,
-               outlier_bounds: {5.5, 15.5},
+               lower_outlier_bound: 5.5,
+               upper_outlier_bound: 15.5,
                outliers: [36]
              } = Statistex.statistics([9, 9, 10, 10, 10, 11, 12, 36], exclude_outliers: false)
     end
@@ -123,7 +126,8 @@ defmodule Statistex.StatistexTest do
                median: 70.0,
                percentiles: %{25 => 66.0, 50 => 70.0, 75 => 75.0},
                # report interquantile range?
-               outlier_bounds: {52.5, 88.5},
+               lower_outlier_bound: 52.5,
+               upper_outlier_bound: 88.5,
                outliers: [52, 89]
              } = Statistex.statistics(data, exclude_outliers: false)
     end

From 3b709c01081f4ae38a4d2bf5cb6379385e2125df Mon Sep 17 00:00:00 2001
From: Tobias Pfeiffer <pragtob@gmail.com>
Date: Sat, 3 May 2025 15:06:29 +0200
Subject: [PATCH 11/23] Simplify the determination of outliers & pass on
 sorted?:

There could be an argument made that if we have few outliers,
reversing the lists twice could be faster than passing through
the entire list once with 2 conditions.

We can probably optimize & benchmark on this later.
---
 lib/statistex.ex            | 79 +++++++++++++++++++++++--------------
 lib/statistex/helper.ex     | 18 +++++++++
 lib/statistex/percentile.ex | 13 ++++--
 3 files changed, 77 insertions(+), 33 deletions(-)
 create mode 100644 lib/statistex/helper.ex

diff --git a/lib/statistex.ex b/lib/statistex.ex
index 3c812fc..d0661b9 100644
--- a/lib/statistex.ex
+++ b/lib/statistex.ex
@@ -15,6 +15,8 @@ defmodule Statistex do
   alias Statistex.{Mode, Percentile}
   require Integer
 
+  import Statistex.Helper, only: [maybe_sort: 2]
+
   defstruct [
     :total,
     :average,
@@ -88,6 +90,7 @@ defmodule Statistex do
   @empty_list_error_message "Passed an empty list ([]) to calculate statistics from, please pass a list containing at least one number."
 
   @first_quartile 25
+  @median_percentile 50
   @third_quartile 75
   # https://en.wikipedia.org/wiki/Interquartile_range#Outliers
   # https://builtin.com/articles/1-5-iqr-rule
@@ -167,17 +170,21 @@ defmodule Statistex do
   end
 
   def statistics(samples, configuration) do
-    samples = Enum.sort(samples)
+    sorted_samples = Enum.sort(samples)
 
     # these statistics are required to do the outlier calculations
     %{minimum: minimum, maximum: maximum, percentiles: percentiles} =
-      base_statistics(samples, configuration)
+      base_statistics(sorted_samples, configuration)
 
     outlier_bounds =
-      do_outlier_bounds(samples, percentiles: percentiles, minimum: minimum, maximum: maximum)
+      do_outlier_bounds(sorted_samples,
+        percentiles: percentiles,
+        minimum: minimum,
+        maximum: maximum
+      )
 
     # make sure rest remains sorted and so can be used again to ok results
-    {outliers, rest} = do_outliers(samples, outlier_bounds: outlier_bounds)
+    {outliers, rest} = do_outliers(sorted_samples, outlier_bounds: outlier_bounds)
 
     if exclude_outliers?(configuration) and Enum.any?(outliers) do
       # figure out to avoid double sorting
@@ -188,15 +195,22 @@ defmodule Statistex do
 
       create_full_statistics(rest, minimum, maximum, percentiles, outliers, outlier_bounds)
     else
-      create_full_statistics(samples, minimum, maximum, percentiles, outliers, outlier_bounds)
+      create_full_statistics(
+        sorted_samples,
+        minimum,
+        maximum,
+        percentiles,
+        outliers,
+        outlier_bounds
+      )
     end
   end
 
-  defp base_statistics(samples, configuration) do
-    minimum = hd(samples)
-    maximum = List.last(samples)
+  defp base_statistics(sorted_samples, configuration) do
+    minimum = hd(sorted_samples)
+    maximum = List.last(sorted_samples)
 
-    percentiles = calculate_percentiles(samples, configuration)
+    percentiles = calculate_percentiles(sorted_samples, configuration)
 
     %{minimum: minimum, maximum: maximum, percentiles: percentiles}
   end
@@ -459,15 +473,18 @@ defmodule Statistex do
     end
   end
 
-  @median_percentile 50
-  defp calculate_percentiles(samples, configuration) do
+  defp calculate_percentiles(sorted_samples, configuration) do
     percentiles_configuration = Keyword.get(configuration, :percentiles, [])
 
     # median_percentile is manually added so that it can be used directly by median
     percentiles_configuration =
-      Enum.uniq([25, @median_percentile, 75 | percentiles_configuration])
+      Enum.uniq([
+        @first_quartile,
+        @median_percentile,
+        @third_quartile | percentiles_configuration
+      ])
 
-    Percentile.percentiles(samples, percentiles_configuration)
+    Percentile.percentiles(sorted_samples, percentiles_configuration, sorted: true)
   end
 
   @doc """
@@ -475,7 +492,7 @@ defmodule Statistex do
 
   Think of this as the
   value below which `percentile_rank` percent of the samples lie. For example,
-  if `Statistex.percentile(samples, 99)` == 123.45,
+  if `Statistex.percentile(samples, 99) == 123.45`,
   99% of samples are less than 123.45.
 
   Passing a number for `percentile_rank` calculates a single percentile.
@@ -517,9 +534,8 @@ defmodule Statistex do
   """
   @spec percentiles(samples, number | [number(), ...]) ::
           percentiles()
-  def percentiles(samples, percentiles) do
-    samples |> Enum.sort() |> Percentile.percentiles(percentiles)
-  end
+  defdelegate percentiles(samples, percentiles, options), to: Percentile
+  defdelegate percentiles(samples, percentiles), to: Percentile
 
   @doc """
   A map showing which sample occurs how often in the samples.
@@ -631,6 +647,9 @@ defmodule Statistex do
       iex> Statistex.outlier_bounds([3, 4, 5])
       {0.0, 8.0}
 
+      iex> Statistex.outlier_bounds([4, 5, 3])
+      {0.0, 8.0}
+
       iex> Statistex.outlier_bounds([1, 2, 6, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50])
       {22.5, 66.5}
 
@@ -640,19 +659,21 @@ defmodule Statistex do
   @spec outlier_bounds(samples, keyword) :: {lower :: number, upper :: number}
   def outlier_bounds(samples, options \\ [])
   def outlier_bounds([], _), do: raise(ArgumentError, @empty_list_error_message)
-  def outlier_bounds(samples, options), do: samples |> Enum.sort() |> do_outlier_bounds(options)
+  def outlier_bounds(samples, options), do: do_outlier_bounds(samples, options)
 
   defp do_outlier_bounds(samples, options) do
+    # double check do we need both get lazies here?
     percentiles =
       Keyword.get_lazy(options, :percentiles, fn ->
-        Percentile.percentiles(samples, [@first_quartile, @third_quartile])
+        Percentile.percentiles(samples, [@first_quartile, @third_quartile], options)
       end)
 
     q1 = get_percentile(samples, @first_quartile, percentiles)
     q3 = get_percentile(samples, @third_quartile, percentiles)
     iqr = q3 - q1
+    outlier_tolerance = iqr * @iqr_factor
 
-    {q1 - iqr * @iqr_factor, q3 + iqr * @iqr_factor}
+    {q1 - outlier_tolerance, q3 + outlier_tolerance}
   end
 
   @doc """
@@ -671,21 +692,21 @@ defmodule Statistex do
   """
   @spec outliers(samples, keyword) :: samples | []
   def outliers(samples, options \\ []) do
-    {outliers, _rest} = samples |> Enum.sort() |> do_outliers(options)
+    sorted_samples = maybe_sort(samples, options)
+
+    # maybe allow folks to get the same
+    {outliers, _rest} = do_outliers(sorted_samples, options)
 
     outliers
   end
 
-  defp do_outliers(samples, options) do
+  defp do_outliers(sorted_samples, options) do
     {lower_bound, upper_bound} =
-      Keyword.get_lazy(options, :outlier_bounds, fn -> do_outlier_bounds(samples, options) end)
-
-    {min, rest} = Enum.split_while(samples, fn sample -> sample < lower_bound end)
-
-    {max, rest} =
-      rest |> Enum.reverse() |> Enum.split_while(fn sample -> sample > upper_bound end)
+      Keyword.get_lazy(options, :outlier_bounds, fn ->
+        do_outlier_bounds(sorted_samples, options)
+      end)
 
-    {min ++ max, rest}
+    Enum.split_with(sorted_samples, fn sample -> sample < lower_bound || sample > upper_bound end)
   end
 
   defp get_percentile(samples, percentile, percentiles) do
diff --git a/lib/statistex/helper.ex b/lib/statistex/helper.ex
new file mode 100644
index 0000000..958d034
--- /dev/null
+++ b/lib/statistex/helper.ex
@@ -0,0 +1,18 @@
+defmodule Statistex.Helper do
+  @moduledoc false
+  # Everyone loves helper modules... ok ok, no. But I needed/wanted this function,
+  # but didn't wanna put it on the main module.
+
+  # With the design goal that we don't want to needlessly do operations, esp. big ones
+  # like sorting we need an optional `sorted?` arguments in a bunch of places.
+  # This unifies the handling of that.
+  def maybe_sort(samples, options) do
+    sorted? = Access.get(options, :sorted?, false)
+
+    if sorted? do
+      samples
+    else
+      Enum.sort(samples)
+    end
+  end
+end
diff --git a/lib/statistex/percentile.ex b/lib/statistex/percentile.ex
index 53887a9..4cab86b 100644
--- a/lib/statistex/percentile.ex
+++ b/lib/statistex/percentile.ex
@@ -1,22 +1,27 @@
 defmodule Statistex.Percentile do
   @moduledoc false
 
-  @spec percentiles(Statistex.samples(), number | [number, ...]) ::
+  import Statistex.Helper, only: [maybe_sort: 2]
+
+  @spec percentiles(Statistex.samples(), number | [number, ...], keyword()) ::
           Statistex.percentiles()
-  def percentiles([], _) do
+  def percentiles(samples, percentiles, options \\ [])
+
+  def percentiles([], _, _) do
     raise(
       ArgumentError,
       "Passed an empty list ([]) to calculate statistics from, please pass a list containing at least one number."
     )
   end
 
-  def percentiles(samples, percentile_ranks) do
+  def percentiles(samples, percentile_ranks, options) do
     number_of_samples = length(samples)
+    sorted_samples = maybe_sort(samples, options)
 
     percentile_ranks
     |> List.wrap()
     |> Enum.reduce(%{}, fn percentile_rank, acc ->
-      perc = percentile(samples, number_of_samples, percentile_rank)
+      perc = percentile(sorted_samples, number_of_samples, percentile_rank)
       Map.put(acc, percentile_rank, perc)
     end)
   end

From 0d6bdbefea13d5a1e1aedbfa82fe9e1095f74b5c Mon Sep 17 00:00:00 2001
From: Tobias Pfeiffer <pragtob@gmail.com>
Date: Sat, 3 May 2025 15:32:25 +0200
Subject: [PATCH 12/23] Clean up the property based tests a little

---
 test/statistex_test.exs | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/test/statistex_test.exs b/test/statistex_test.exs
index 59a30d5..be590cd 100644
--- a/test/statistex_test.exs
+++ b/test/statistex_test.exs
@@ -162,6 +162,12 @@ defmodule Statistex.StatistexTest do
     defp assert_statistics_properties(samples) do
       stats = statistics(samples)
 
+      assert_basic_statistics(stats)
+      assert_mode_in_samples(stats, samples)
+      frequency_assertions(stats, samples)
+    end
+
+    defp assert_basic_statistics(stats) do
       assert stats.sample_size >= 1
       assert stats.minimum <= stats.maximum
 
@@ -176,8 +182,9 @@ defmodule Statistex.StatistexTest do
       assert stats.variance >= 0
       assert stats.standard_deviation >= 0
       assert stats.standard_deviation_ratio >= 0
+    end
 
-      # mode actually occurs in the samples
+    defp assert_mode_in_samples(stats, samples) do
       case stats.mode do
         [_ | _] ->
           Enum.each(stats.mode, fn mode ->
@@ -191,7 +198,9 @@ defmodule Statistex.StatistexTest do
         mode ->
           assert mode in samples
       end
+    end
 
+    defp frequency_assertions(stats, samples) do
       frequency_distribution = stats.frequency_distribution
       frequency_entry_count = map_size(frequency_distribution)
 
@@ -208,7 +217,7 @@ defmodule Statistex.StatistexTest do
       # all samples are in frequencies
       Enum.each(samples, fn sample -> assert Map.has_key?(frequency_distribution, sample) end)
 
-      # counts some up to sample_size
+      # counts of frequencies sum up to sample_size
       count_sum =
         frequency_distribution
         |> Map.values()

From 20e720d2d70885195f81b03cef273d83766d1253 Mon Sep 17 00:00:00 2001
From: Tobias Pfeiffer <pragtob@gmail.com>
Date: Sat, 3 May 2025 15:35:04 +0200
Subject: [PATCH 13/23] New property: shuffling the samples doesn't change the
 result

---
 test/statistex_test.exs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/test/statistex_test.exs b/test/statistex_test.exs
index be590cd..cb27589 100644
--- a/test/statistex_test.exs
+++ b/test/statistex_test.exs
@@ -165,6 +165,10 @@ defmodule Statistex.StatistexTest do
       assert_basic_statistics(stats)
       assert_mode_in_samples(stats, samples)
       frequency_assertions(stats, samples)
+
+      # shuffling values around shouldn't change the results
+      shuffled_stats = samples |> Enum.shuffle() |> statistics()
+      assert stats == shuffled_stats
     end
 
     defp assert_basic_statistics(stats) do

From 29a2f30f1491e2bf721157772933ce0650537458 Mon Sep 17 00:00:00 2001
From: Tobias Pfeiffer <pragtob@gmail.com>
Date: Sat, 3 May 2025 15:42:54 +0200
Subject: [PATCH 14/23] More property based properties

---
 test/statistex_test.exs | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/test/statistex_test.exs b/test/statistex_test.exs
index cb27589..187d27b 100644
--- a/test/statistex_test.exs
+++ b/test/statistex_test.exs
@@ -164,7 +164,8 @@ defmodule Statistex.StatistexTest do
 
       assert_basic_statistics(stats)
       assert_mode_in_samples(stats, samples)
-      frequency_assertions(stats, samples)
+      assert_frequencies(stats, samples)
+      assert_bounds(stats, samples)
 
       # shuffling values around shouldn't change the results
       shuffled_stats = samples |> Enum.shuffle() |> statistics()
@@ -183,6 +184,9 @@ defmodule Statistex.StatistexTest do
 
       assert stats.median == stats.percentiles[50]
 
+      assert stats.median >= stats.percentiles[25]
+      assert stats.percentiles[75] >= stats.median
+
       assert stats.variance >= 0
       assert stats.standard_deviation >= 0
       assert stats.standard_deviation_ratio >= 0
@@ -204,7 +208,7 @@ defmodule Statistex.StatistexTest do
       end
     end
 
-    defp frequency_assertions(stats, samples) do
+    defp assert_frequencies(stats, samples) do
       frequency_distribution = stats.frequency_distribution
       frequency_entry_count = map_size(frequency_distribution)
 
@@ -230,6 +234,16 @@ defmodule Statistex.StatistexTest do
       assert count_sum == stats.sample_size
     end
 
+    defp assert_bounds(stats, samples) do
+      Enum.each(stats.outliers, fn outlier ->
+        assert outlier in samples
+        assert outlier < stats.lower_outlier_bound || outlier > stats.upper_outlier_bound
+      end)
+
+      assert stats.lower_outlier_bound <= stats.percentiles[25]
+      assert stats.upper_outlier_bound >= stats.percentiles[75]
+    end
+
     defp big_list_big_floats do
       sized(fn size ->
         resize(

From fee2065fa8293bc6b55d69dea4fa987a5fb568f3 Mon Sep 17 00:00:00 2001
From: Tobias Pfeiffer <pragtob@gmail.com>
Date: Sat, 3 May 2025 16:11:43 +0200
Subject: [PATCH 15/23] exclude_statistics shown in docs, cleaned up docs and
 some properties to go along

---
 lib/statistex.ex        | 87 +++++++++++++++++++----------------------
 test/statistex_test.exs | 45 ++++++++++++++++++++-
 2 files changed, 84 insertions(+), 48 deletions(-)

diff --git a/lib/statistex.ex b/lib/statistex.ex
index d0661b9..b3402d5 100644
--- a/lib/statistex.ex
+++ b/lib/statistex.ex
@@ -105,62 +105,57 @@ defmodule Statistex do
 
   ## Options
 
-  With a `percentiles` options arguments for the calculation of percentiles (see `percentiles/2`) can
-  be given. The percentiles 25th, 50th (median) and 75th are always calculated.
+  * `percentiles`: percentiles to calculate (see `percentiles/2`).
+  The percentiles 25th, 50th (median) and 75th are always calculated.
 
-  The option `exclude_outliers` can be set to `true`, `false`. Defaults to `false`.
-  If this option is set to `true` the outliers are excluded
-  and the statistics are calculated with the rest of the samples.
+  * `exclude_outliers` can be set to `true` or `false`. Defaults to `false`.
+  If this option is set to `true` the outliers are excluded from the calculation
+  of the statistics.
 
   ## Examples
 
-      iex> Statistex.statistics([200, 400, 400, 400, 500, 500, 500, 700, 900])
+      iex> Statistex.statistics([50, 50, 450, 450, 450, 500, 500, 500, 600, 900])
       %Statistex{
-        average:                  500.0,
-        variance:                 40_000.0,
-        standard_deviation:       200.0,
-        standard_deviation_ratio: 0.4,
-        median:                   500.0,
-        percentiles:              %{25 => 400.0, 50 => 500.0, 75 => 600.0},
-        frequency_distribution:   %{
-          200 => 1,
-          400 => 3,
-          500 => 3,
-          700 => 1,
-          900 => 1
-        },
-        mode:                     [500, 400],
-        minimum:                  200,
-        maximum:                  900,
-        sample_size:              9,
-        total:                    4500,
-        outliers: [],
-        lower_outlier_bound: 100.0,
-        upper_outlier_bound: 900.0
+        total: 4450,
+        average: 445.0,
+        variance: 61_361.11111111111,
+        standard_deviation: 247.71175004652304,
+        standard_deviation_ratio: 0.5566556180820742,
+        median: 475.0,
+        percentiles: %{25 => 350.0, 50 => 475.0, 75 => 525.0},
+        frequency_distribution: %{50 => 2, 450 => 3, 500 => 3, 600 => 1, 900 => 1},
+        mode: [500, 450],
+        minimum: 50,
+        maximum: 900,
+        lower_outlier_bound: 87.5,
+        upper_outlier_bound: 787.5,
+        outliers: [50, 50, 900],
+        sample_size: 10
       }
 
-      iex> Statistex.statistics([])
-      ** (ArgumentError) Passed an empty list ([]) to calculate statistics from, please pass a list containing at least one number.
-
-      iex> Statistex.statistics([0, 0, 0, 0])
+      # excluding outliers changes the results
+      iex> Statistex.statistics([50, 50, 450, 450, 450, 500, 500, 500, 600, 900], exclude_outliers: true)
       %Statistex{
-        average:                  0.0,
-        variance:                 0.0,
-        standard_deviation:       0.0,
-        standard_deviation_ratio: 0.0,
-        median:                   0.0,
-        percentiles:              %{25 => 0.0, 50 => 0.0, 75 => 0.0},
-        frequency_distribution:   %{0 => 4},
-        mode:                     0,
-        minimum:                  0,
-        maximum:                  0,
-        sample_size:              4,
-        total:                    0,
-        outliers: [],
-        lower_outlier_bound: 0.0,
-        upper_outlier_bound: 0.0,
+        total: 3450,
+        average: 492.85714285714283,
+        variance: 2857.142857142857,
+        standard_deviation: 53.452248382484875,
+        standard_deviation_ratio: 0.1084538372977954,
+        median: 500.0,
+        percentiles: %{25 => 450.0, 50 => 500.0, 75 => 500.0},
+        frequency_distribution: %{450 => 3, 500 => 3, 600 => 1},
+        mode: [500, 450],
+        maximum: 600,
+        minimum: 450,
+        lower_outlier_bound: 87.5,
+        upper_outlier_bound: 787.5,
+        outliers: [50, 50, 900],
+        sample_size: 7
       }
 
+      iex> Statistex.statistics([])
+      ** (ArgumentError) Passed an empty list ([]) to calculate statistics from, please pass a list containing at least one number.
+
   """
   @spec statistics(samples, configuration) :: t()
   def statistics(samples, configuration \\ [])
diff --git a/test/statistex_test.exs b/test/statistex_test.exs
index 187d27b..e4a62d6 100644
--- a/test/statistex_test.exs
+++ b/test/statistex_test.exs
@@ -26,6 +26,26 @@ defmodule Statistex.StatistexTest do
   end
 
   describe ".statistics/2" do
+    test "all 0 values do what you think they would" do
+      assert Statistex.statistics([0, 0, 0, 0]) == %Statistex{
+               average: 0.0,
+               variance: 0.0,
+               standard_deviation: 0.0,
+               standard_deviation_ratio: 0.0,
+               median: 0.0,
+               percentiles: %{25 => 0.0, 50 => 0.0, 75 => 0.0},
+               frequency_distribution: %{0 => 4},
+               mode: 0,
+               minimum: 0,
+               maximum: 0,
+               sample_size: 4,
+               total: 0,
+               outliers: [],
+               lower_outlier_bound: 0.0,
+               upper_outlier_bound: 0.0
+             }
+    end
+
     test "returns Statistex struct without outliers" do
       assert Statistex.statistics([200, 400, 400, 400, 500, 500, 500, 700, 900]) ==
                %Statistex{
@@ -165,7 +185,7 @@ defmodule Statistex.StatistexTest do
       assert_basic_statistics(stats)
       assert_mode_in_samples(stats, samples)
       assert_frequencies(stats, samples)
-      assert_bounds(stats, samples)
+      assert_bounds_and_outliers(stats, samples)
 
       # shuffling values around shouldn't change the results
       shuffled_stats = samples |> Enum.shuffle() |> statistics()
@@ -234,7 +254,7 @@ defmodule Statistex.StatistexTest do
       assert count_sum == stats.sample_size
     end
 
-    defp assert_bounds(stats, samples) do
+    defp assert_bounds_and_outliers(stats, samples) do
       Enum.each(stats.outliers, fn outlier ->
         assert outlier in samples
         assert outlier < stats.lower_outlier_bound || outlier > stats.upper_outlier_bound
@@ -242,6 +262,27 @@ defmodule Statistex.StatistexTest do
 
       assert stats.lower_outlier_bound <= stats.percentiles[25]
       assert stats.upper_outlier_bound >= stats.percentiles[75]
+
+      non_outlier_statistics = Statistex.statistics(samples, exclude_outliers: true)
+      # outlier or not, outliers or bounds aren't changed
+      assert non_outlier_statistics.outliers == stats.outliers
+      assert non_outlier_statistics.lower_outlier_bound == stats.lower_outlier_bound
+      assert non_outlier_statistics.upper_outlier_bound == stats.upper_outlier_bound
+
+      if Enum.empty?(stats.outliers) do
+        # no outliers? Then excluding outliers shouldn't change anything!
+        assert non_outlier_statistics == stats
+      else
+        assert non_outlier_statistics.sample_size < stats.sample_size
+        assert non_outlier_statistics.standard_deviation < stats.standard_deviation
+        # property may not hold vor the std_dev ratio seemingly as values may be skewed too much
+
+        frequency_occurrences = Map.keys(non_outlier_statistics.percentiles)
+
+        # outliers don't make an appearances in the frequency occurrences
+        assert MapSet.intersection(MapSet.new(stats.outliers), MapSet.new(frequency_occurrences)) ==
+                 MapSet.new([])
+      end
     end
 
     defp big_list_big_floats do

From 690773f6250efd60e1864862c1560bfd64d07d9b Mon Sep 17 00:00:00 2001
From: Tobias Pfeiffer <pragtob@gmail.com>
Date: Sat, 3 May 2025 16:23:00 +0200
Subject: [PATCH 16/23] Simplify `statistics/2` as min/max aren't needed

Since we changed the rules for outlier bounds we don't need them
any more. Helps simplify the code quite a bit.
---
 lib/statistex.ex | 71 ++++++++++++++++--------------------------------
 1 file changed, 24 insertions(+), 47 deletions(-)

diff --git a/lib/statistex.ex b/lib/statistex.ex
index b3402d5..6c1e83a 100644
--- a/lib/statistex.ex
+++ b/lib/statistex.ex
@@ -105,12 +105,13 @@ defmodule Statistex do
 
   ## Options
 
-  * `percentiles`: percentiles to calculate (see `percentiles/2`).
+  * `:percentiles`: percentiles to calculate (see `percentiles/2`).
   The percentiles 25th, 50th (median) and 75th are always calculated.
-
-  * `exclude_outliers` can be set to `true` or `false`. Defaults to `false`.
+  * `:exclude_outliers` can be set to `true` or `false`. Defaults to `false`.
   If this option is set to `true` the outliers are excluded from the calculation
   of the statistics.
+  * `:sorted?`: indicating the samples you're passing in are already sorted. Only set this,
+  if they are truly sorted - otherwise your results will be wrong.
 
   ## Examples
 
@@ -165,68 +166,44 @@ defmodule Statistex do
   end
 
   def statistics(samples, configuration) do
-    sorted_samples = Enum.sort(samples)
-
-    # these statistics are required to do the outlier calculations
-    %{minimum: minimum, maximum: maximum, percentiles: percentiles} =
-      base_statistics(sorted_samples, configuration)
+    sorted_samples = maybe_sort(samples, configuration)
 
-    outlier_bounds =
-      do_outlier_bounds(sorted_samples,
-        percentiles: percentiles,
-        minimum: minimum,
-        maximum: maximum
-      )
+    percentiles = calculate_percentiles(sorted_samples, configuration)
+    outlier_bounds = do_outlier_bounds(sorted_samples, percentiles: percentiles)
 
-    # make sure rest remains sorted and so can be used again to ok results
+    # rest remains sorted here/it's an important property
     {outliers, rest} = do_outliers(sorted_samples, outlier_bounds: outlier_bounds)
 
     if exclude_outliers?(configuration) and Enum.any?(outliers) do
-      # figure out to avoid double sorting
-      rest = Enum.sort(rest)
       # need to recalculate with the outliers removed
-      %{minimum: minimum, maximum: maximum, percentiles: percentiles} =
-        base_statistics(rest, configuration)
+      percentiles = calculate_percentiles(rest, configuration)
 
-      create_full_statistics(rest, minimum, maximum, percentiles, outliers, outlier_bounds)
+      create_full_statistics(rest, percentiles, outliers, outlier_bounds)
     else
-      create_full_statistics(
-        sorted_samples,
-        minimum,
-        maximum,
-        percentiles,
-        outliers,
-        outlier_bounds
-      )
+      create_full_statistics(sorted_samples, percentiles, outliers, outlier_bounds)
     end
   end
 
-  defp base_statistics(sorted_samples, configuration) do
-    minimum = hd(sorted_samples)
-    maximum = List.last(sorted_samples)
-
-    percentiles = calculate_percentiles(sorted_samples, configuration)
-
-    %{minimum: minimum, maximum: maximum, percentiles: percentiles}
-  end
-
   defp exclude_outliers?(configuration) do
     Access.get(configuration, :exclude_outliers) == true
   end
 
   # maybe make argument a map
-  defp create_full_statistics(samples, minimum, maximum, percentiles, outliers, outlier_bounds) do
-    total = total(samples)
-    sample_size = length(samples)
-    average = average(samples, total: total, sample_size: sample_size)
-    variance = variance(samples, average: average, sample_size: sample_size)
+  defp create_full_statistics(sorted_samples, percentiles, outliers, outlier_bounds) do
+    total = total(sorted_samples)
+    sample_size = length(sorted_samples)
+    minimum = hd(sorted_samples)
+    maximum = List.last(sorted_samples)
+
+    average = average(sorted_samples, total: total, sample_size: sample_size)
+    variance = variance(sorted_samples, average: average, sample_size: sample_size)
 
-    frequency_distribution = frequency_distribution(samples)
+    frequency_distribution = frequency_distribution(sorted_samples)
 
-    standard_deviation = standard_deviation(samples, variance: variance)
+    standard_deviation = standard_deviation(sorted_samples, variance: variance)
 
     standard_deviation_ratio =
-      standard_deviation_ratio(samples, standard_deviation: standard_deviation)
+      standard_deviation_ratio(sorted_samples, standard_deviation: standard_deviation)
 
     {lower_outlier_bound, upper_outlier_bound} = outlier_bounds
 
@@ -236,10 +213,10 @@ defmodule Statistex do
       variance: variance,
       standard_deviation: standard_deviation,
       standard_deviation_ratio: standard_deviation_ratio,
-      median: median(samples, percentiles: percentiles),
+      median: median(sorted_samples, percentiles: percentiles),
       percentiles: percentiles,
       frequency_distribution: frequency_distribution,
-      mode: mode(samples, frequency_distribution: frequency_distribution),
+      mode: mode(sorted_samples, frequency_distribution: frequency_distribution),
       minimum: minimum,
       maximum: maximum,
       lower_outlier_bound: lower_outlier_bound,

From 1da5611b2f5286a6da5c5b535fa2763274961ca0 Mon Sep 17 00:00:00 2001
From: Tobias Pfeiffer <pragtob@gmail.com>
Date: Sat, 3 May 2025 17:02:52 +0200
Subject: [PATCH 17/23] Redo docs to highlight optional arguments

Also cracked down on some of the too flexbile code surrounding
`get_percentile` - there were layers of get lazies here because
the API we promised is almost too forgiving.

Hence, made it easier - if the percentiles we need are there
take them, if not calculate them without some layers to it
that I also found too hard to follow now :)
---
 lib/statistex.ex        | 94 ++++++++++++++++++++++++++++++-----------
 test/statistex_test.exs |  5 +++
 2 files changed, 74 insertions(+), 25 deletions(-)

diff --git a/lib/statistex.ex b/lib/statistex.ex
index 6c1e83a..f5890af 100644
--- a/lib/statistex.ex
+++ b/lib/statistex.ex
@@ -110,7 +110,7 @@ defmodule Statistex do
   * `:exclude_outliers` can be set to `true` or `false`. Defaults to `false`.
   If this option is set to `true` the outliers are excluded from the calculation
   of the statistics.
-  * `:sorted?`: indicating the samples you're passing in are already sorted. Only set this,
+  * `:sorted?`: indicating the samples you're passing in are already sorted. Defaults to `false`. Only set this,
   if they are truly sorted - otherwise your results will be wrong.
 
   ## Examples
@@ -188,7 +188,6 @@ defmodule Statistex do
     Access.get(configuration, :exclude_outliers) == true
   end
 
-  # maybe make argument a map
   defp create_full_statistics(sorted_samples, percentiles, outliers, outlier_bounds) do
     total = total(sorted_samples)
     sample_size = length(sorted_samples)
@@ -376,7 +375,7 @@ defmodule Statistex do
       iex> Statistex.standard_deviation([4, 9, 11, 12, 17, 5, 8, 12, 12])
       4.0
 
-      iex> Statistex.standard_deviation([4, 9, 11, 12, 17, 5, 8, 12, 12], variance: 16.0)
+      iex> Statistex.standard_deviation(:dontcare, variance: 16.0)
       4.0
 
       iex> Statistex.standard_deviation([42])
@@ -462,9 +461,8 @@ defmodule Statistex do
   @doc """
   Calculates the value at the `percentile_rank`-th percentile.
 
-  Think of this as the
-  value below which `percentile_rank` percent of the samples lie. For example,
-  if `Statistex.percentile(samples, 99) == 123.45`,
+  Think of this as the value below which `percentile_rank` percent of the samples lie.
+  For example, if `Statistex.percentile(samples, 99) == 123.45`,
   99% of samples are less than 123.45.
 
   Passing a number for `percentile_rank` calculates a single percentile.
@@ -478,11 +476,19 @@ defmodule Statistex do
 
   `Argumenterror` is raised if the given list is empty.
 
+  ## Options
+
+  * `:sorted?`: indicating the samples you're passing in are already sorted. Defaults to `false`. Only set this,
+  if they are truly sorted - otherwise your results will be wrong.
+
   ## Examples
 
       iex> Statistex.percentiles([5, 3, 4, 5, 1, 3, 1, 3], 12.5)
       %{12.5 => 1.0}
 
+      iex> Statistex.percentiles([1, 1, 3, 3, 3, 4, 5, 5], 12.5, sorted?: true)
+      %{12.5 => 1.0}
+
       iex> Statistex.percentiles([5, 3, 4, 5, 1, 3, 1, 3], [50])
       %{50 => 3.0}
 
@@ -581,11 +587,26 @@ defmodule Statistex do
 
   `Argumenterror` is raised if the given list is empty.
 
+  ## Options
+  * `:percentiles` - you can pass it a map of calculated percentiles to fetch the median from (it is the 50th percentile).
+  If it doesn't include the median/50th percentile - it will still be computed.
+  * `:sorted?`: indicating the samples you're passing in are already sorted. Defaults to `false`. Only set this,
+  if they are truly sorted - otherwise your results will be wrong. Sorting only occurs when percentiles aren't provided.
+
   ## Examples
 
       iex> Statistex.median([1, 3, 4, 6, 7, 8, 9])
       6.0
 
+      iex> Statistex.median([1, 3, 4, 6, 7, 8, 9], percentiles: %{50 => 6.0})
+      6.0
+
+      iex> Statistex.median([1, 3, 4, 6, 7, 8, 9], percentiles: %{25 => 3.0})
+      6.0
+
+      iex> Statistex.median([1, 3, 4, 6, 7, 8, 9], sorted?: true)
+      6.0
+
       iex> Statistex.median([1, 2, 3, 4, 5, 6, 8, 9])
       4.5
 
@@ -600,12 +621,19 @@ defmodule Statistex do
   def median([], _), do: raise(ArgumentError, @empty_list_error_message)
 
   def median(samples, options) do
+    percentiles = Access.get(options, :percentiles, %{})
+
     percentiles =
-      Keyword.get_lazy(options, :percentiles, fn ->
-        Percentile.percentiles(samples, @median_percentile)
-      end)
+      case percentiles do
+        %{@median_percentile => _} ->
+          percentiles
 
-    get_percentile(samples, @median_percentile, percentiles)
+        # missing necessary keys
+        %{} ->
+          Percentile.percentiles(samples, @median_percentile, options)
+      end
+
+    Map.fetch!(percentiles, @median_percentile)
   end
 
   @doc """
@@ -614,6 +642,12 @@ defmodule Statistex do
   Any sample that is `<` as the lower bound and any sample `>` are outliers of
   the given `samples`.
 
+  ## Options
+  * `:percentiles` - you can pass it a map of calculated percentiles (25th and 75th are needed).
+  If it doesn't include them - it will still be computed.
+  * `:sorted?`: indicating the samples you're passing in are already sorted. Defaults to `false`. Only set this,
+  if they are truly sorted - otherwise your results will be wrong. Sorting only occurs when percentiles aren't provided.
+
   ## Examples
 
       iex> Statistex.outlier_bounds([3, 4, 5])
@@ -622,6 +656,12 @@ defmodule Statistex do
       iex> Statistex.outlier_bounds([4, 5, 3])
       {0.0, 8.0}
 
+      iex> Statistex.outlier_bounds([3, 4, 5], sorted?: true)
+      {0.0, 8.0}
+
+      iex> Statistex.outlier_bounds([3, 4, 5], percentiles: %{25 => 3.0, 75 => 5.0})
+      {0.0, 8.0}
+
       iex> Statistex.outlier_bounds([1, 2, 6, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50])
       {22.5, 66.5}
 
@@ -634,14 +674,20 @@ defmodule Statistex do
   def outlier_bounds(samples, options), do: do_outlier_bounds(samples, options)
 
   defp do_outlier_bounds(samples, options) do
-    # double check do we need both get lazies here?
+    percentiles = Access.get(options, :percentiles, %{})
+
     percentiles =
-      Keyword.get_lazy(options, :percentiles, fn ->
-        Percentile.percentiles(samples, [@first_quartile, @third_quartile], options)
-      end)
+      case percentiles do
+        %{@first_quartile => _, @third_quartile => _} ->
+          percentiles
+
+        # missing necessary keys
+        %{} ->
+          Percentile.percentiles(samples, [@first_quartile, @third_quartile], options)
+      end
 
-    q1 = get_percentile(samples, @first_quartile, percentiles)
-    q3 = get_percentile(samples, @third_quartile, percentiles)
+    q1 = Map.fetch!(percentiles, @first_quartile)
+    q3 = Map.fetch!(percentiles, @third_quartile)
     iqr = q3 - q1
     outlier_tolerance = iqr * @iqr_factor
 
@@ -651,6 +697,12 @@ defmodule Statistex do
   @doc """
   Returns all outliers for the given `samples`.
 
+  ## Options
+  * `:percentiles` - you can pass it a map of calculated percentiles (25th and 75th are needed).
+  If it doesn't include them - it will still be computed.
+  * `:sorted?`: indicating the samples you're passing in are already sorted. Defaults to `false`. Only set this,
+  if they are truly sorted - otherwise your results will be wrong. Sorting only occurs when percentiles aren't provided.
+
   ## Examples
 
       iex> Statistex.outliers([3, 4, 5])
@@ -664,10 +716,8 @@ defmodule Statistex do
   """
   @spec outliers(samples, keyword) :: samples | []
   def outliers(samples, options \\ []) do
-    sorted_samples = maybe_sort(samples, options)
-
     # maybe allow folks to get the same
-    {outliers, _rest} = do_outliers(sorted_samples, options)
+    {outliers, _rest} = do_outliers(samples, options)
 
     outliers
   end
@@ -681,12 +731,6 @@ defmodule Statistex do
     Enum.split_with(sorted_samples, fn sample -> sample < lower_bound || sample > upper_bound end)
   end
 
-  defp get_percentile(samples, percentile, percentiles) do
-    Map.get_lazy(percentiles, percentile, fn ->
-      samples |> Percentile.percentiles(percentile) |> Map.fetch!(percentile)
-    end)
-  end
-
   @doc """
   The biggest sample.
 
diff --git a/test/statistex_test.exs b/test/statistex_test.exs
index e4a62d6..43d77f2 100644
--- a/test/statistex_test.exs
+++ b/test/statistex_test.exs
@@ -10,6 +10,11 @@ defmodule Statistex.StatistexTest do
     test "if handed percentiles missing the median percentile still calculates it" do
       assert Statistex.median([1, 2, 3, 4, 5, 6, 8, 9], percentiles: %{}) == 4.5
     end
+
+    # what an odd test to write, huh? Well that way we can see we trust the `sorted?` value not resorting.
+    test "if told that the list is sorted while it isn't the result will be wrong" do
+      assert Statistex.median([1, 6, 4, 3, 5, 9, 2, 8], sorted?: true) != 4.5
+    end
   end
 
   describe ".outlier_bounds/2" do

From 2b45471e672e0a5ef760262f74c0eb6369e9cb69 Mon Sep 17 00:00:00 2001
From: Tobias Pfeiffer <pragtob@gmail.com>
Date: Sat, 3 May 2025 17:15:43 +0200
Subject: [PATCH 18/23] Make outliers return both the outliers and remaining
 values just as we use it

---
 lib/statistex.ex        | 34 ++++++++++++++++------------------
 test/statistex_test.exs |  3 ++-
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/lib/statistex.ex b/lib/statistex.ex
index f5890af..91b985c 100644
--- a/lib/statistex.ex
+++ b/lib/statistex.ex
@@ -169,10 +169,10 @@ defmodule Statistex do
     sorted_samples = maybe_sort(samples, configuration)
 
     percentiles = calculate_percentiles(sorted_samples, configuration)
-    outlier_bounds = do_outlier_bounds(sorted_samples, percentiles: percentiles)
+    outlier_bounds = outlier_bounds(sorted_samples, percentiles: percentiles)
 
     # rest remains sorted here/it's an important property
-    {outliers, rest} = do_outliers(sorted_samples, outlier_bounds: outlier_bounds)
+    {outliers, rest} = outliers(sorted_samples, outlier_bounds: outlier_bounds)
 
     if exclude_outliers?(configuration) and Enum.any?(outliers) do
       # need to recalculate with the outliers removed
@@ -671,9 +671,8 @@ defmodule Statistex do
   @spec outlier_bounds(samples, keyword) :: {lower :: number, upper :: number}
   def outlier_bounds(samples, options \\ [])
   def outlier_bounds([], _), do: raise(ArgumentError, @empty_list_error_message)
-  def outlier_bounds(samples, options), do: do_outlier_bounds(samples, options)
 
-  defp do_outlier_bounds(samples, options) do
+  def outlier_bounds(samples, options) do
     percentiles = Access.get(options, :percentiles, %{})
 
     percentiles =
@@ -695,9 +694,12 @@ defmodule Statistex do
   end
 
   @doc """
-  Returns all outliers for the given `samples`.
+  Returns all outliers for the given `samples`, along with the remaining values.
+
+  Returns: `{outliers, remaining_samples`} where `remaining_samples` has the outliers removed.
 
   ## Options
+  * `:outlier_bounds` - if you already have calculated the outlier bounds.
   * `:percentiles` - you can pass it a map of calculated percentiles (25th and 75th are needed).
   If it doesn't include them - it will still be computed.
   * `:sorted?`: indicating the samples you're passing in are already sorted. Defaults to `false`. Only set this,
@@ -706,29 +708,25 @@ defmodule Statistex do
   ## Examples
 
       iex> Statistex.outliers([3, 4, 5])
-      []
+      {[], [3, 4, 5]}
 
       iex> Statistex.outliers([1, 2, 6, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50])
-      [1, 2, 6]
+      {[1, 2, 6], [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}
+
+      iex> Statistex.outliers([50, 50, 1, 50, 50, 50, 50, 50, 2, 50, 50, 50, 50, 6])
+      {[1, 2, 6], [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}
 
       iex> Statistex.outliers([50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 99, 99, 99])
-      [99, 99, 99]
+      {[99, 99, 99], [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]}
   """
-  @spec outliers(samples, keyword) :: samples | []
+  @spec outliers(samples, keyword) :: {samples | [], samples}
   def outliers(samples, options \\ []) do
-    # maybe allow folks to get the same
-    {outliers, _rest} = do_outliers(samples, options)
-
-    outliers
-  end
-
-  defp do_outliers(sorted_samples, options) do
     {lower_bound, upper_bound} =
       Keyword.get_lazy(options, :outlier_bounds, fn ->
-        do_outlier_bounds(sorted_samples, options)
+        outlier_bounds(samples, options)
       end)
 
-    Enum.split_with(sorted_samples, fn sample -> sample < lower_bound || sample > upper_bound end)
+    Enum.split_with(samples, fn sample -> sample < lower_bound || sample > upper_bound end)
   end
 
   @doc """
diff --git a/test/statistex_test.exs b/test/statistex_test.exs
index 43d77f2..5292270 100644
--- a/test/statistex_test.exs
+++ b/test/statistex_test.exs
@@ -11,7 +11,8 @@ defmodule Statistex.StatistexTest do
       assert Statistex.median([1, 2, 3, 4, 5, 6, 8, 9], percentiles: %{}) == 4.5
     end
 
-    # what an odd test to write, huh? Well that way we can see we trust the `sorted?` value not resorting.
+    # what an odd test to write, huh? Well that way we can see we trust the `sorted?`
+    # value not resorting.
     test "if told that the list is sorted while it isn't the result will be wrong" do
       assert Statistex.median([1, 6, 4, 3, 5, 9, 2, 8], sorted?: true) != 4.5
     end

From c222eeadd9f02a430c5c37685839d4e922f4b96d Mon Sep 17 00:00:00 2001
From: Tobias Pfeiffer <pragtob@gmail.com>
Date: Sat, 3 May 2025 17:16:08 +0200
Subject: [PATCH 19/23] ignore dialyzer stuff

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index dbc9658..33b2fa2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,6 @@ docs
 
 # Don't feel like tracking that gives me what I want any more :)
 .tool-versions
+
+# dialyzer
+/tools

From b9a38f42c324cbf46e6458951c5c35798971ef58 Mon Sep 17 00:00:00 2001
From: Tobias Pfeiffer <pragtob@gmail.com>
Date: Sat, 3 May 2025 17:18:19 +0200
Subject: [PATCH 20/23] Map.new >>>> Enum.reduce in this case

---
 lib/statistex/percentile.ex | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/statistex/percentile.ex b/lib/statistex/percentile.ex
index 4cab86b..91688b6 100644
--- a/lib/statistex/percentile.ex
+++ b/lib/statistex/percentile.ex
@@ -20,9 +20,9 @@ defmodule Statistex.Percentile do
 
     percentile_ranks
     |> List.wrap()
-    |> Enum.reduce(%{}, fn percentile_rank, acc ->
+    |> Map.new(fn percentile_rank ->
       perc = percentile(sorted_samples, number_of_samples, percentile_rank)
-      Map.put(acc, percentile_rank, perc)
+      {percentile_rank, perc}
     end)
   end
 

From e5edbb62c095a0f7bebad83767a1c594f895dee5 Mon Sep 17 00:00:00 2001
From: Tobias Pfeiffer <pragtob@gmail.com>
Date: Sat, 3 May 2025 17:20:57 +0200
Subject: [PATCH 21/23] Get test coverage back to 100% cos we can

---
 lib/statistex.ex | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lib/statistex.ex b/lib/statistex.ex
index 91b985c..8b67d9c 100644
--- a/lib/statistex.ex
+++ b/lib/statistex.ex
@@ -642,6 +642,8 @@ defmodule Statistex do
   Any sample that is `<` as the lower bound and any sample `>` are outliers of
   the given `samples`.
 
+  List passed needs to be non empty, otherwise an `ArgumentError` is raised.
+
   ## Options
   * `:percentiles` - you can pass it a map of calculated percentiles (25th and 75th are needed).
   If it doesn't include them - it will still be computed.
@@ -667,6 +669,9 @@ defmodule Statistex do
 
       iex> Statistex.outlier_bounds([50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 99, 99, 99])
       {31.625, 80.625}
+
+      iex> Statistex.outlier_bounds([])
+      ** (ArgumentError) Passed an empty list ([]) to calculate statistics from, please pass a list containing at least one number.
   """
   @spec outlier_bounds(samples, keyword) :: {lower :: number, upper :: number}
   def outlier_bounds(samples, options \\ [])

From 93620e7ce239e932987edcd2c7301c01f12a080f Mon Sep 17 00:00:00 2001
From: Tobias Pfeiffer <pragtob@gmail.com>
Date: Sat, 3 May 2025 17:24:31 +0200
Subject: [PATCH 22/23] Undo changes having the tests expect the input list is
 ordered

I think that's a dangerous assumption and our code should be
resilient to it :)
---
 test/statistex/percentile_test.exs | 34 +++++++++++++++---------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/test/statistex/percentile_test.exs b/test/statistex/percentile_test.exs
index 020f523..fbc03a5 100644
--- a/test/statistex/percentile_test.exs
+++ b/test/statistex/percentile_test.exs
@@ -4,20 +4,20 @@ defmodule Statistex.PercentileTest do
 
   doctest Statistex.Percentile
 
-  @nist_sample_data Enum.sort([
-                      95.1772,
-                      95.1567,
-                      95.1937,
-                      95.1959,
-                      95.1442,
-                      95.0610,
-                      95.1591,
-                      95.1195,
-                      95.1065,
-                      95.0925,
-                      95.1990,
-                      95.1682
-                    ])
+  @nist_sample_data [
+    95.1772,
+    95.1567,
+    95.1937,
+    95.1959,
+    95.1442,
+    95.0610,
+    95.1591,
+    95.1195,
+    95.1065,
+    95.0925,
+    95.1990,
+    95.1682
+  ]
 
   # Test data from:
   #   http://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm
@@ -49,7 +49,7 @@ defmodule Statistex.PercentileTest do
   end
 
   describe "a list of two elements" do
-    @samples [200, 300]
+    @samples [300, 200]
     test "1st percentile (small sample size simply picks first element)" do
       %{1 => result} = percentiles(@samples, [1])
       assert result == 200.0
@@ -67,7 +67,7 @@ defmodule Statistex.PercentileTest do
   end
 
   describe "seemingly problematic 2 element list [9, 1]" do
-    @samples [1, 9]
+    @samples [9, 1]
 
     percentiles = %{
       25 => 1,
@@ -88,7 +88,7 @@ defmodule Statistex.PercentileTest do
   end
 
   describe "a list of three elements" do
-    @samples [100, 200, 300]
+    @samples [100, 300, 200]
     test "1st percentile (small sample size simply picks first element)" do
       %{1 => result} = percentiles(@samples, [1])
       assert result == 100.0

From 5b7349835f93550f6f8f1f3e35e32495095f5c9c Mon Sep 17 00:00:00 2001
From: Tobias Pfeiffer <pragtob@gmail.com>
Date: Sat, 3 May 2025 17:32:29 +0200
Subject: [PATCH 23/23] Preliminary changelog

---
 CHANGELOG.md | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 34d69b2..2f43b51 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,17 @@
+## 1.1 (Unreleased)
+
+This release adds functionality around identifying outliers.
+
+* the Statistex struct comes with more keys: `:lower_outlier_bound`, `:upper_outlier_bound` & `:outliers`,
+along with the new public functions `:outliers/2` and `:outlier_bounds/2`.
+* `statistics/2` now also accepts `exclude_outliers: true` to exclude the outliers from the calculation
+of statistics.
+* some functions have also been updated to accept more optional arguments such as `:sorted?` to avoid unnecessary extra work.
+
+Huge thanks for these changes go to [@NickNeck](https://github.com/NickNeck)!
+
 ## 1.0 2019-07-05
 
 Import of the initial functionality from [benchee](github.com/bencheeorg/benchee).
 
-Dubbed 1.0 because many people had already been running this code indirectly through benchee.
\ No newline at end of file
+Dubbed 1.0 because many people had already been running this code indirectly through benchee.