Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions quantile_estimator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ def __init__(self, *invariants):
self._invariants = _DEFAULT_INVARIANTS
else:
self._invariants = [_Quantile(q, e) for (q, e) in invariants]

self._buffer = []
self._head = None
self._observations = 0
self._items = 0

def observe(self, value):
"""Samples an observation's value.
Expand Down Expand Up @@ -90,7 +90,7 @@ def query(self, rank):
return current._value

mid_rank = math.floor(rank * self._observations)
max_rank = mid_rank + math.floor(self._invariant(mid_rank, self._observations) / 2)
max_rank = mid_rank + math.ceil(self._invariant(mid_rank, self._observations) / 2)

rank = 0.0
while current._successor:
Expand All @@ -115,7 +115,8 @@ def _replace_batch(self):
return

if not self._head:
self._head, self._buffer = self._record(self._buffer[0], 1, 0, None), self._buffer[1:]
self._head = self._record(self._buffer[0], 1, 0, None)
self._buffer = self._buffer[1:]

rank = 0.0
current = self._head
Expand All @@ -136,7 +137,6 @@ def _replace_batch(self):
def _record(self, value, rank, delta, successor):
"""Catalogs a sample."""
self._observations += 1
self._items += 1

return _Sample(value, rank, delta, successor)

Expand Down Expand Up @@ -187,10 +187,10 @@ def __init__(self, quantile, inaccuracy):

"""Computes the delta for the observation."""
def _delta(self, rank, n):
if rank <= math.floor((self._quantile * n)):
if rank <= math.floor(self._quantile * n):
return self._coefficient_i * (n - rank)

return self._coefficient_ii * rank
else:
return self._coefficient_ii * rank


_DEFAULT_INVARIANTS = [_Quantile(0.50, 0.01), _Quantile(0.99, 0.001)]
Expand Down
26 changes: 17 additions & 9 deletions tests/test_estimator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
import random

import pytest
Expand All @@ -7,19 +8,26 @@

@pytest.mark.parametrize("num_observations", [1, 10, 100, 1000, 10000, 100000])
def test_random_observations(num_observations):
estimator = Estimator()
for _ in range(num_observations):
estimator.observe(random.randint(1, 1000) / 100)
invariants = (0.5, 0.01), (0.9, 0.01), (0.99, 0.01)
estimator = Estimator(*invariants)

assert 0 <= estimator.query(0.5) <= estimator.query(0.9) <= estimator.query(0.99) <= 10
values = [random.uniform(0, 100) for _ in range(num_observations)]
for value in values:
estimator.observe(value)

values.sort()
for quantile, inaccuracy in invariants:
min_rank = math.floor(quantile * num_observations - inaccuracy * num_observations)
max_rank = min(math.ceil(quantile * num_observations + inaccuracy * num_observations), num_observations - 1)
assert 0 <= values[min_rank] <= estimator.query(quantile) <= values[max_rank] <= 100


def test_border_invariants():
estimator = Estimator((0.0, 0.0), (1.0, 0.0))

values = [random.randint(1, 1000) for _ in range(1000)]
for x in values:
estimator.observe(x)
values = [random.uniform(0, 100) for _ in range(500)]
for value in values:
estimator.observe(value)

assert estimator.query(0) == min(values)
assert estimator.query(1) == max(values)
assert estimator.query(0.0) == min(values)
assert estimator.query(1.0) == max(values)