From aab2ee8e3b2668d7d6e8680bc84d1d72382eb7e8 Mon Sep 17 00:00:00 2001 From: developer-agent Date: Thu, 18 Jun 2026 10:46:14 -0400 Subject: [PATCH 1/2] fix: emit zero-value hits counter for active miss labels (OQP-2) When cache_hits is empty after a container restart, Prometheus has no time series for oqp_embedding_cache_hits_total and the Grafana cache hit rate panel shows "No data". For each label in cache_misses that is absent from cache_hits, emit a 0 sample so the time series always exists once the cache is active. agent-id: developer --- src/ollama_queue_proxy/routes/status.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/ollama_queue_proxy/routes/status.py b/src/ollama_queue_proxy/routes/status.py index 9c158fc..c62c963 100644 --- a/src/ollama_queue_proxy/routes/status.py +++ b/src/ollama_queue_proxy/routes/status.py @@ -202,6 +202,14 @@ async def metrics(request: Request): f'oqp_embedding_cache_hits_total{{client="{_pm_label(client_id)}",model="{_pm_label(model)}",' f'endpoint="{_pm_label(endpoint)}"}} {count}' ) + # Emit zero for miss labels absent from hits so Prometheus always has the time series + for label in cache_misses: + if label not in cache_hits: + client_id, model, endpoint = label.split(",", 2) + lines.append( + f'oqp_embedding_cache_hits_total{{client="{_pm_label(client_id)}",model="{_pm_label(model)}",' + f'endpoint="{_pm_label(endpoint)}"}} 0' + ) lines += [ "# HELP oqp_embedding_cache_misses_total Embedding cache misses", From c605e29d03ef9fe4ad20c8e0577907422fb4df48 Mon Sep 17 00:00:00 2001 From: developer-agent Date: Thu, 18 Jun 2026 10:49:43 -0400 Subject: [PATCH 2/2] chore: bump version to 0.3.2 agent-id: developer --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2a3a9c7..962f7e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ollama-queue-proxy" -version = "0.3.1" +version = "0.3.2" description = "Drop-in HTTP proxy for Ollama with priority queuing, per-client auth, and model-aware failover" readme = "README.md" license = { text = "MIT" }