diff --git a/pyproject.toml b/pyproject.toml index 2a3a9c7..962f7e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ollama-queue-proxy" -version = "0.3.1" +version = "0.3.2" description = "Drop-in HTTP proxy for Ollama with priority queuing, per-client auth, and model-aware failover" readme = "README.md" license = { text = "MIT" } diff --git a/src/ollama_queue_proxy/routes/status.py b/src/ollama_queue_proxy/routes/status.py index 9c158fc..c62c963 100644 --- a/src/ollama_queue_proxy/routes/status.py +++ b/src/ollama_queue_proxy/routes/status.py @@ -202,6 +202,14 @@ async def metrics(request: Request): f'oqp_embedding_cache_hits_total{{client="{_pm_label(client_id)}",model="{_pm_label(model)}",' f'endpoint="{_pm_label(endpoint)}"}} {count}' ) + # Emit zero for miss labels absent from hits so Prometheus always has the time series + for label in cache_misses: + if label not in cache_hits: + client_id, model, endpoint = label.split(",", 2) + lines.append( + f'oqp_embedding_cache_hits_total{{client="{_pm_label(client_id)}",model="{_pm_label(model)}",' + f'endpoint="{_pm_label(endpoint)}"}} 0' + ) lines += [ "# HELP oqp_embedding_cache_misses_total Embedding cache misses",