From 6aec035da21650e4ddea754352508443f0767321 Mon Sep 17 00:00:00 2001 From: kalyangupta12 Date: Sat, 21 Mar 2026 14:58:57 +0530 Subject: [PATCH 1/2] feat: add average metrics per question to model detail page --- src/routes/model/$modelId.tsx | 137 ++++++++++++++++++++++++++-------- 1 file changed, 106 insertions(+), 31 deletions(-) diff --git a/src/routes/model/$modelId.tsx b/src/routes/model/$modelId.tsx index d2b58c4..cfd4b58 100644 --- a/src/routes/model/$modelId.tsx +++ b/src/routes/model/$modelId.tsx @@ -130,15 +130,26 @@ interface StatCard { } function ModelStats({ model }: { model: ModelResult }) { - const items: StatCard[] = []; - + const totalItems: StatCard[] = []; + const avgItems: StatCard[] = []; + + // Compute average tool calls + const totalToolCalls = model.questionDetails?.reduce( + (sum, q) => sum + (q.toolCallCount ?? 0), + 0, + ) ?? 0; + const avgToolCalls = model.totalQuestions > 0 + ? totalToolCalls / model.totalQuestions + : 0; + + // Total stats if (model.totalTokens > 0) { const parts: string[] = []; if (model.totalPromptTokens > 0) parts.push(`Input: ${model.totalPromptTokens.toLocaleString()}`); if (model.totalCompletionTokens > 0) parts.push(`Output: ${model.totalCompletionTokens.toLocaleString()}`); - items.push({ + totalItems.push({ icon: Layers, label: "Total tokens", value: model.totalTokens.toLocaleString(), @@ -146,53 +157,117 @@ function ModelStats({ model }: { model: ModelResult }) { }); } if (model.totalDurationMs > 0) { - items.push({ + totalItems.push({ icon: Clock, label: "Total duration", value: formatDuration(model.totalDurationMs), }); } + if (model.totalCost > 0) { + totalItems.push({ + icon: Coins, + label: "Total cost", + value: `$${model.totalCost.toFixed(4)}`, + }); + } + if (totalToolCalls > 0) { + totalItems.push({ + icon: Layers, + label: "Total tool calls", + value: totalToolCalls.toLocaleString(), + }); + } + + // Average stats per question + if (model.totalTokens > 0 && model.totalQuestions > 0) { + const avgTokens = Math.round(model.totalTokens / model.totalQuestions); + const avgPrompt = Math.round(model.totalPromptTokens / model.totalQuestions); + const avgCompletion = Math.round(model.totalCompletionTokens / model.totalQuestions); + avgItems.push({ + icon: Layers, + label: "Avg tokens/question", + value: avgTokens.toLocaleString(), + tooltip: `Input: ${avgPrompt.toLocaleString()}\nOutput: ${avgCompletion.toLocaleString()}`, + }); + } + if (model.totalDurationMs > 0 && model.totalQuestions > 0) { + const avgDurationMs = Math.round(model.totalDurationMs / model.totalQuestions); + avgItems.push({ + icon: Clock, + label: "Avg duration/question", + value: formatDuration(avgDurationMs), + }); + } if (model.averageTokensPerSecond > 0) { - items.push({ + avgItems.push({ icon: Gauge, label: "Avg speed", value: `${model.averageTokensPerSecond.toFixed(1)} tok/s`, }); } - if (model.totalCost > 0) { - items.push({ + if (model.totalCost > 0 && model.totalQuestions > 0) { + const avgCost = model.totalCost / model.totalQuestions; + avgItems.push({ icon: Coins, - label: "Total cost", - value: `$ ${model.totalCost.toFixed(4)}`, + label: "Avg cost/question", + value: avgCost < 0.0001 ? "<$0.0001" : `$${avgCost.toFixed(4)}`, + }); + } + if (avgToolCalls > 0) { + avgItems.push({ + icon: Layers, + label: "Avg tool calls/question", + value: avgToolCalls.toFixed(1), }); } - if (items.length === 0) return null; + if (totalItems.length === 0 && avgItems.length === 0) return null; + + const renderCard = (item: StatCard) => ( +
+
+ + {item.label} +
+ + {item.value} + {item.tooltip && ( + + + + {item.tooltip} + + + )} + +
+ ); return ( -
- {items.map((item) => ( -
-
- - {item.label} +
+ {totalItems.length > 0 && ( +
+

+ Totals +

+
+ {totalItems.map(renderCard)} +
+
+ )} + {avgItems.length > 0 && ( +
+

+ Averages +

+
+ {avgItems.map(renderCard)}
- - {item.value} - {item.tooltip && ( - - - - {item.tooltip} - - - )} -
- ))} + )}
); } From 564eeffeb0f1b7f165a2f9937dc5f7f0d8652e62 Mon Sep 17 00:00:00 2001 From: kalyangupta12 Date: Thu, 26 Mar 2026 23:57:39 +0530 Subject: [PATCH 2/2] fix: address PR review feedback - Use Wrench icon for tool calls to distinguish from tokens (Layers) - Make tooltip keyboard accessible with button element and focus states - Add aria-label for screen readers --- src/routes/model/$modelId.tsx | 43 ++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/src/routes/model/$modelId.tsx b/src/routes/model/$modelId.tsx index cfd4b58..02cc4f6 100644 --- a/src/routes/model/$modelId.tsx +++ b/src/routes/model/$modelId.tsx @@ -7,6 +7,7 @@ import { Gauge, Info, Layers, + Wrench, } from "lucide-react"; import { useCallback, useEffect, useMemo } from "react"; import CategoryTabs from "#/components/CategoryTabs"; @@ -134,13 +135,13 @@ function ModelStats({ model }: { model: ModelResult }) { const avgItems: StatCard[] = []; // Compute average tool calls - const totalToolCalls = model.questionDetails?.reduce( - (sum, q) => sum + (q.toolCallCount ?? 0), - 0, - ) ?? 0; - const avgToolCalls = model.totalQuestions > 0 - ? totalToolCalls / model.totalQuestions - : 0; + const totalToolCalls = + model.questionDetails?.reduce( + (sum, q) => sum + (q.toolCallCount ?? 0), + 0, + ) ?? 0; + const avgToolCalls = + model.totalQuestions > 0 ? totalToolCalls / model.totalQuestions : 0; // Total stats if (model.totalTokens > 0) { @@ -172,7 +173,7 @@ function ModelStats({ model }: { model: ModelResult }) { } if (totalToolCalls > 0) { totalItems.push({ - icon: Layers, + icon: Wrench, label: "Total tool calls", value: totalToolCalls.toLocaleString(), }); @@ -181,8 +182,12 @@ function ModelStats({ model }: { model: ModelResult }) { // Average stats per question if (model.totalTokens > 0 && model.totalQuestions > 0) { const avgTokens = Math.round(model.totalTokens / model.totalQuestions); - const avgPrompt = Math.round(model.totalPromptTokens / model.totalQuestions); - const avgCompletion = Math.round(model.totalCompletionTokens / model.totalQuestions); + const avgPrompt = Math.round( + model.totalPromptTokens / model.totalQuestions, + ); + const avgCompletion = Math.round( + model.totalCompletionTokens / model.totalQuestions, + ); avgItems.push({ icon: Layers, label: "Avg tokens/question", @@ -191,7 +196,9 @@ function ModelStats({ model }: { model: ModelResult }) { }); } if (model.totalDurationMs > 0 && model.totalQuestions > 0) { - const avgDurationMs = Math.round(model.totalDurationMs / model.totalQuestions); + const avgDurationMs = Math.round( + model.totalDurationMs / model.totalQuestions, + ); avgItems.push({ icon: Clock, label: "Avg duration/question", @@ -215,7 +222,7 @@ function ModelStats({ model }: { model: ModelResult }) { } if (avgToolCalls > 0) { avgItems.push({ - icon: Layers, + icon: Wrench, label: "Avg tool calls/question", value: avgToolCalls.toFixed(1), }); @@ -232,15 +239,19 @@ function ModelStats({ model }: { model: ModelResult }) { {item.value} {item.tooltip && ( - + )}