diff --git a/quantile_estimator/__init__.py b/quantile_estimator/__init__.py index c4beb84..3c41560 100644 --- a/quantile_estimator/__init__.py +++ b/quantile_estimator/__init__.py @@ -48,10 +48,10 @@ def __init__(self, *invariants): self._invariants = _DEFAULT_INVARIANTS else: self._invariants = [_Quantile(q, e) for (q, e) in invariants] + self._buffer = [] self._head = None self._observations = 0 - self._items = 0 def observe(self, value): """Samples an observation's value. @@ -90,7 +90,7 @@ def query(self, rank): return current._value mid_rank = math.floor(rank * self._observations) - max_rank = mid_rank + math.floor(self._invariant(mid_rank, self._observations) / 2) + max_rank = mid_rank + math.ceil(self._invariant(mid_rank, self._observations) / 2) rank = 0.0 while current._successor: @@ -115,7 +115,8 @@ def _replace_batch(self): return if not self._head: - self._head, self._buffer = self._record(self._buffer[0], 1, 0, None), self._buffer[1:] + self._head = self._record(self._buffer[0], 1, 0, None) + self._buffer = self._buffer[1:] rank = 0.0 current = self._head @@ -136,7 +137,6 @@ def _replace_batch(self): def _record(self, value, rank, delta, successor): """Catalogs a sample.""" self._observations += 1 - self._items += 1 return _Sample(value, rank, delta, successor) @@ -187,10 +187,10 @@ def __init__(self, quantile, inaccuracy): """Computes the delta for the observation.""" def _delta(self, rank, n): - if rank <= math.floor((self._quantile * n)): + if rank <= math.floor(self._quantile * n): return self._coefficient_i * (n - rank) - - return self._coefficient_ii * rank + else: + return self._coefficient_ii * rank _DEFAULT_INVARIANTS = [_Quantile(0.50, 0.01), _Quantile(0.99, 0.001)] diff --git a/tests/test_estimator.py b/tests/test_estimator.py index 71dd8c5..a6e36d7 100644 --- a/tests/test_estimator.py +++ b/tests/test_estimator.py @@ -1,3 +1,4 @@ +import math import random import pytest @@ -7,19 +8,26 @@ @pytest.mark.parametrize("num_observations", [1, 10, 100, 1000, 10000, 100000]) def test_random_observations(num_observations): - estimator = Estimator() - for _ in range(num_observations): - estimator.observe(random.randint(1, 1000) / 100) + invariants = (0.5, 0.01), (0.9, 0.01), (0.99, 0.01) + estimator = Estimator(*invariants) - assert 0 <= estimator.query(0.5) <= estimator.query(0.9) <= estimator.query(0.99) <= 10 + values = [random.uniform(0, 100) for _ in range(num_observations)] + for value in values: + estimator.observe(value) + + values.sort() + for quantile, inaccuracy in invariants: + min_rank = math.floor(quantile * num_observations - inaccuracy * num_observations) + max_rank = min(math.ceil(quantile * num_observations + inaccuracy * num_observations), num_observations - 1) + assert 0 <= values[min_rank] <= estimator.query(quantile) <= values[max_rank] <= 100 def test_border_invariants(): estimator = Estimator((0.0, 0.0), (1.0, 0.0)) - values = [random.randint(1, 1000) for _ in range(1000)] - for x in values: - estimator.observe(x) + values = [random.uniform(0, 100) for _ in range(500)] + for value in values: + estimator.observe(value) - assert estimator.query(0) == min(values) - assert estimator.query(1) == max(values) + assert estimator.query(0.0) == min(values) + assert estimator.query(1.0) == max(values)