Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ where

let router = router
.nest("/health", routes::health::serve())
.nest("/analytics", routes::analytics::serve())
.layer(middleware)
.with_state(global_app_state.clone());

Expand Down
1 change: 1 addition & 0 deletions src/routes.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub mod analytics;
pub mod hybrid_routing;
// pub mod data;
pub mod decide_gateway;
Expand Down
336 changes: 336 additions & 0 deletions src/routes/analytics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,336 @@
use std::collections::HashMap;
use std::sync::Arc;

use axum::{extract::Query, Json};
use serde::{Deserialize, Serialize};

use crate::tenant::GlobalAppState;

// ---------------------------------------------------------------------------
// Shared types
// ---------------------------------------------------------------------------

#[derive(Debug, Deserialize)]
pub struct TimeRangeParams {
/// Time range: 15m, 1h, 6h, 24h, 7d
pub range: Option<String>,
/// Bucket granularity: 10s, 1m, 5m, 1h
pub granularity: Option<String>,
}

#[derive(Debug, Deserialize)]
pub struct GatewayScoreParams {
pub merchant: Option<String>,
pub pmt: Option<String>,
pub gateway: Option<String>,
Comment on lines +23 to +25
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

merchant and pmt query params are accepted but never used for filtering, and gateway filtering is implemented as a substring match against the endpoint label. Consider either (a) implementing merchant/pmt filtering, and renaming the filter to endpoint (or making it an exact match) to avoid implying this filters by PSP/gateway name.

Suggested change
pub merchant: Option<String>,
pub pmt: Option<String>,
pub gateway: Option<String>,
/// Filters by endpoint label.
/// Accept `gateway` as a backward-compatible alias for older clients.
#[serde(alias = "gateway")]
pub endpoint: Option<String>,

Copilot uses AI. Check for mistakes.
#[serde(flatten)]
pub time: TimeRangeParams,
}

#[derive(Debug, Deserialize)]
pub struct DecisionParams {
pub group_by: Option<String>,
#[serde(flatten)]
pub time: TimeRangeParams,
Comment on lines +14 to +34
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

range and granularity are defined as query params but are not used by any handler in this module. This makes the API contract misleading (clients may assume time-windowed/bucketed results). Either implement time-range bucketing/filtering, or remove these params until supported.

Suggested change
pub struct TimeRangeParams {
/// Time range: 15m, 1h, 6h, 24h, 7d
pub range: Option<String>,
/// Bucket granularity: 10s, 1m, 5m, 1h
pub granularity: Option<String>,
}
#[derive(Debug, Deserialize)]
pub struct GatewayScoreParams {
pub merchant: Option<String>,
pub pmt: Option<String>,
pub gateway: Option<String>,
#[serde(flatten)]
pub time: TimeRangeParams,
}
#[derive(Debug, Deserialize)]
pub struct DecisionParams {
pub group_by: Option<String>,
#[serde(flatten)]
pub time: TimeRangeParams,
pub struct GatewayScoreParams {
pub merchant: Option<String>,
pub pmt: Option<String>,
pub gateway: Option<String>,
}
#[derive(Debug, Deserialize)]
pub struct DecisionParams {
pub group_by: Option<String>,

Copilot uses AI. Check for mistakes.
}

#[derive(Debug, Deserialize)]
pub struct RoutingStatsParams {
pub range: Option<String>,
}

// ---------------------------------------------------------------------------
// Response types
// ---------------------------------------------------------------------------

#[derive(Debug, Serialize)]
pub struct GatewayScoreEntry {
pub endpoint: String,
pub total_requests: u64,
pub success_count: u64,
pub failure_count: u64,
pub success_rate: f64,
}

#[derive(Debug, Serialize)]
pub struct GatewayScoresResponse {
pub current: Vec<GatewayScoreEntry>,
}

#[derive(Debug, Serialize)]
pub struct DecisionBucket {
pub endpoint: String,
pub total_count: u64,
pub success_count: u64,
pub failure_count: u64,
}

#[derive(Debug, Serialize)]
pub struct DecisionsResponse {
pub buckets: Vec<DecisionBucket>,
}

#[derive(Debug, Serialize)]
pub struct FeedbackEntry {
pub endpoint: String,
pub total_count: u64,
pub success_count: u64,
pub failure_count: u64,
}

#[derive(Debug, Serialize)]
pub struct FeedbacksResponse {
pub entries: Vec<FeedbackEntry>,
}

#[derive(Debug, Serialize)]
pub struct RoutingStatEntry {
pub endpoint: String,
pub total_requests: u64,
pub success_count: u64,
pub failure_count: u64,
pub error_rate: f64,
}

#[derive(Debug, Serialize)]
pub struct RoutingStatsResponse {
pub stats: Vec<RoutingStatEntry>,
}

// ---------------------------------------------------------------------------
// Router
// ---------------------------------------------------------------------------

pub fn serve() -> axum::Router<Arc<GlobalAppState>> {
axum::Router::new()
.route("/gateway-scores", axum::routing::get(gateway_scores))
.route("/decisions", axum::routing::get(decisions))
.route("/feedbacks", axum::routing::get(feedbacks))
.route("/routing-stats", axum::routing::get(routing_stats))
}

// ---------------------------------------------------------------------------
// Helpers – read current Prometheus counters
// ---------------------------------------------------------------------------

/// Collect per-endpoint totals from `API_REQUEST_TOTAL_COUNTER`.
fn collect_total_counts() -> HashMap<String, u64> {
let mut totals: HashMap<String, u64> = HashMap::new();
let metric_families = prometheus::gather();
for mf in &metric_families {
Comment on lines +116 to +120
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

collect_total_counts calls prometheus::gather(). Since handlers also call collect_status_counts (which gathers again), a single request typically gathers all metrics twice. Consider gathering once per request and passing the gathered MetricFamily list into both parsing functions (or a single function that extracts both totals and status counts).

Suggested change
/// Collect per-endpoint totals from `API_REQUEST_TOTAL_COUNTER`.
fn collect_total_counts() -> HashMap<String, u64> {
let mut totals: HashMap<String, u64> = HashMap::new();
let metric_families = prometheus::gather();
for mf in &metric_families {
/// Collect per-endpoint totals from an already-gathered Prometheus snapshot.
fn collect_total_counts(
metric_families: &[prometheus::proto::MetricFamily],
) -> HashMap<String, u64> {
let mut totals: HashMap<String, u64> = HashMap::new();
for mf in metric_families {

Copilot uses AI. Check for mistakes.
if mf.get_name() == "api_requests_total" {
for m in mf.get_metric() {
let mut endpoint = String::new();
for lp in m.get_label() {
if lp.get_name() == "endpoint" {
endpoint = lp.get_value().to_string();
}
}
if !endpoint.is_empty() {
let val = m.get_counter().get_value() as u64;
*totals.entry(endpoint).or_default() += val;
}
}
}
}
totals
}

/// Collect per-endpoint per-status counts from `API_REQUEST_COUNTER`.
fn collect_status_counts() -> HashMap<String, HashMap<String, u64>> {
let mut counts: HashMap<String, HashMap<String, u64>> = HashMap::new();
let metric_families = prometheus::gather();
for mf in &metric_families {
if mf.get_name() == "api_requests_by_status" {
for m in mf.get_metric() {
Comment on lines +140 to +145
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as collect_total_counts: this function calls prometheus::gather() again, so most analytics handlers gather the entire Prometheus registry twice per request. Refactor to share a single gather result across both totals + status extraction to reduce overhead on the hot path.

Copilot uses AI. Check for mistakes.
let mut endpoint = String::new();
let mut status = String::new();
for lp in m.get_label() {
match lp.get_name() {
"endpoint" => endpoint = lp.get_value().to_string(),
"status" => status = lp.get_value().to_string(),
_ => {}
}
}
if !endpoint.is_empty() {
let val = m.get_counter().get_value() as u64;
*counts
.entry(endpoint)
.or_default()
.entry(status)
.or_default() += val;
}
}
}
}
counts
}

// ---------------------------------------------------------------------------
// Handlers
// ---------------------------------------------------------------------------

/// `GET /analytics/gateway-scores`
///
/// Returns the current scoring snapshot derived from Prometheus counters.
/// Filters optionally by `gateway` query param (matches endpoint name substring).
pub async fn gateway_scores(
Query(params): Query<GatewayScoreParams>,
) -> Json<GatewayScoresResponse> {
let totals = collect_total_counts();
let status_counts = collect_status_counts();

let gateway_filter = params.gateway.as_deref().unwrap_or("");

let mut current: Vec<GatewayScoreEntry> = Vec::new();

for (endpoint, total) in &totals {
if !gateway_filter.is_empty() && !endpoint.contains(gateway_filter) {
continue;
}

let statuses = status_counts.get(endpoint);
let success = statuses
.and_then(|s| s.get("success"))
.copied()
.unwrap_or(0);
let failure = statuses
.and_then(|s| s.get("failure"))
.copied()
.unwrap_or(0);

let sr = if *total > 0 {
(success as f64 / *total as f64) * 100.0
} else {
0.0
};

current.push(GatewayScoreEntry {
endpoint: endpoint.clone(),
total_requests: *total,
success_count: success,
failure_count: failure,
success_rate: (sr * 100.0).round() / 100.0,
});
}

current.sort_by(|a, b| b.total_requests.cmp(&a.total_requests));

Json(GatewayScoresResponse { current })
}

/// `GET /analytics/decisions`
///
/// Returns decision counts from Prometheus, optionally grouped by endpoint.
pub async fn decisions(Query(params): Query<DecisionParams>) -> Json<DecisionsResponse> {
let totals = collect_total_counts();
let status_counts = collect_status_counts();

let decision_endpoints: Vec<&str> = match params.group_by.as_deref() {
Some("gateway") => vec!["decide_gateway", "decision_gateway"],
Some("approach") => vec!["decide_gateway"],
_ => totals.keys().map(|k| k.as_str()).collect(),
};
Comment on lines +229 to +233
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

group_by is described as a grouping control, but here it only switches between hard-coded endpoint lists and the response is still per-endpoint (no grouping by gateway/approach is actually performed). This also makes group_by=approach misleading since no "approach" label is available from these metrics. Consider rejecting unsupported group_by values with a 400, or implementing real grouping semantics that match the documented API.

Copilot uses AI. Check for mistakes.

let mut buckets: Vec<DecisionBucket> = Vec::new();

for endpoint in decision_endpoints {
let total = totals.get(endpoint).copied().unwrap_or(0);
let statuses = status_counts.get(endpoint);
let success = statuses
.and_then(|s| s.get("success"))
.copied()
.unwrap_or(0);
let failure = statuses
.and_then(|s| s.get("failure"))
.copied()
.unwrap_or(0);

buckets.push(DecisionBucket {
endpoint: endpoint.to_string(),
total_count: total,
success_count: success,
failure_count: failure,
});
}

buckets.sort_by(|a, b| b.total_count.cmp(&a.total_count));

Json(DecisionsResponse { buckets })
}

/// `GET /analytics/feedbacks`
///
/// Returns feedback ingestion stats from Prometheus counters.
pub async fn feedbacks(Query(_params): Query<TimeRangeParams>) -> Json<FeedbacksResponse> {
let totals = collect_total_counts();
let status_counts = collect_status_counts();

let feedback_endpoints = ["update_score", "update_gateway_score"];

let mut entries: Vec<FeedbackEntry> = Vec::new();

for endpoint in &feedback_endpoints {
let total = totals.get(*endpoint).copied().unwrap_or(0);
let statuses = status_counts.get(*endpoint);
let success = statuses
.and_then(|s| s.get("success"))
.copied()
.unwrap_or(0);
let failure = statuses
.and_then(|s| s.get("failure"))
.copied()
.unwrap_or(0);

entries.push(FeedbackEntry {
endpoint: endpoint.to_string(),
total_count: total,
success_count: success,
failure_count: failure,
});
}

Json(FeedbacksResponse { entries })
}

/// `GET /analytics/routing-stats`
///
/// Returns per-endpoint routing statistics including error rate.
pub async fn routing_stats(
Query(_params): Query<RoutingStatsParams>,
) -> Json<RoutingStatsResponse> {
Comment on lines +299 to +301
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

RoutingStatsParams includes a range query param but the handler ignores it entirely. Either apply the range to the returned stats (e.g., windowed rates) or remove the param to avoid silently ignoring client input.

Suggested change
pub async fn routing_stats(
Query(_params): Query<RoutingStatsParams>,
) -> Json<RoutingStatsResponse> {
pub async fn routing_stats() -> Json<RoutingStatsResponse> {

Copilot uses AI. Check for mistakes.
let totals = collect_total_counts();
let status_counts = collect_status_counts();

let mut stats: Vec<RoutingStatEntry> = Vec::new();

for (endpoint, total) in &totals {
let statuses = status_counts.get(endpoint);
let success = statuses
.and_then(|s| s.get("success"))
.copied()
.unwrap_or(0);
let failure = statuses
.and_then(|s| s.get("failure"))
.copied()
.unwrap_or(0);

let error_rate = if *total > 0 {
(failure as f64 / *total as f64) * 100.0
} else {
0.0
};

stats.push(RoutingStatEntry {
endpoint: endpoint.clone(),
total_requests: *total,
success_count: success,
failure_count: failure,
error_rate: (error_rate * 100.0).round() / 100.0,
});
}

stats.sort_by(|a, b| b.total_requests.cmp(&a.total_requests));

Json(RoutingStatsResponse { stats })
}
Loading