From e2131869b5729ab79ba03a711475ee551dd65987 Mon Sep 17 00:00:00 2001 From: Amrit Krishnan Date: Sat, 14 Mar 2026 20:51:22 -0400 Subject: [PATCH 1/3] Add analytics for QA agent --- bookstack_agent/api/main.py | 69 ++- .../analytics/components/query-metrics.tsx | 107 ++++ .../components/query-velocity-chart.tsx | 115 +++++ .../components/recent-queries-table.tsx | 276 +++++++++++ .../analytics/components/tool-usage-chart.tsx | 120 +++++ bookstack_agent/ui/app/analytics/page.tsx | 113 +++++ .../ui/app/api/bookstack-trace/route.ts | 37 ++ bookstack_agent/ui/app/layout.tsx | 2 +- .../ui/lib/bookstack-data-fetcher.ts | 156 ++++++ bookstack_agent/ui/lib/bookstack-types.ts | 67 +++ bookstack_agent/ui/package-lock.json | 342 ++++++++++++- bookstack_agent/ui/package.json | 1 + pyproject.toml | 5 + src/aieng_bot/bookstack/activity_logger.py | 278 +++++++++++ tests/bookstack/test_activity_logger.py | 455 ++++++++++++++++++ uv.lock | 160 ++++++ 16 files changed, 2296 insertions(+), 7 deletions(-) create mode 100644 bookstack_agent/ui/app/analytics/components/query-metrics.tsx create mode 100644 bookstack_agent/ui/app/analytics/components/query-velocity-chart.tsx create mode 100644 bookstack_agent/ui/app/analytics/components/recent-queries-table.tsx create mode 100644 bookstack_agent/ui/app/analytics/components/tool-usage-chart.tsx create mode 100644 bookstack_agent/ui/app/analytics/page.tsx create mode 100644 bookstack_agent/ui/app/api/bookstack-trace/route.ts create mode 100644 bookstack_agent/ui/lib/bookstack-data-fetcher.ts create mode 100644 bookstack_agent/ui/lib/bookstack-types.ts create mode 100644 src/aieng_bot/bookstack/activity_logger.py create mode 100644 tests/bookstack/test_activity_logger.py diff --git a/bookstack_agent/api/main.py b/bookstack_agent/api/main.py index 19a0d2c..565749a 100644 --- a/bookstack_agent/api/main.py +++ b/bookstack_agent/api/main.py @@ -27,7 +27,9 @@ import asyncio import json +import logging import os +import time import uuid from collections import OrderedDict from collections.abc import AsyncGenerator @@ -41,8 +43,11 @@ from pydantic import BaseModel, Field from aieng_bot.bookstack import BookstackQAAgent +from aieng_bot.bookstack.activity_logger import BookstackActivityLogger from aieng_bot.bookstack.agent import MessageHistory +api_logger = logging.getLogger(__name__) + load_dotenv() MAX_SESSIONS = 500 # prune oldest sessions beyond this limit @@ -78,6 +83,9 @@ async def lifespan(application: FastAPI) -> AsyncGenerator[None, None]: application.state.sessions = OrderedDict() application.state.session_locks = {} + # Analytics logger — initialised lazily; failures are non-fatal + application.state.activity_logger = BookstackActivityLogger() + yield @@ -158,6 +166,35 @@ def _get_session_lock(session_id: str) -> asyncio.Lock: return locks[session_id] +# --------------------------------------------------------------------------- +# Analytics helpers +# --------------------------------------------------------------------------- + + +async def _log_query_bg( + activity_logger: BookstackActivityLogger, + session_id: str, + question: str, + tool_calls: list[dict[str, Any]], + answer: str, + duration_seconds: float, + status: str, +) -> None: + """Run analytics logging in a thread pool (non-blocking).""" + try: + await asyncio.to_thread( + activity_logger.log_query, + session_id, + question, + tool_calls, + answer, + duration_seconds, + status, + ) + except Exception as exc: # noqa: BLE001 + api_logger.warning("Analytics logging failed (non-fatal): %s", exc) + + # --------------------------------------------------------------------------- # Models # --------------------------------------------------------------------------- @@ -207,6 +244,12 @@ async def event_stream() -> AsyncGenerator[str, None]: sid, history = _get_or_create_session(request.session_id) lock = _get_session_lock(sid) + # Analytics accumulators + start_time = time.monotonic() + query_tool_calls: list[dict[str, Any]] = [] + final_answer = "" + final_status = "error" + # Emit the session ID immediately so the client can store it yield f"data: {json.dumps({'type': 'session', 'session_id': sid})}\n\n" @@ -216,11 +259,20 @@ async def event_stream() -> AsyncGenerator[str, None]: async for event in agent.ask_stream(request.question, history=history): event_type = event.get("type") - if event_type == "answer": + if event_type == "tool_use": + query_tool_calls.append( + {"tool": event.get("tool", ""), "input": event.get("input", {})} + ) + yield f"data: {json.dumps(event)}\n\n" + + elif event_type == "answer": updated_history = event.pop("history", history) + final_answer = event.get("text", "") + final_status = "success" yield f"data: {json.dumps(event)}\n\n" elif event_type == "error": + final_status = "error" yield f"data: {json.dumps(event)}\n\n" else: @@ -228,6 +280,21 @@ async def event_stream() -> AsyncGenerator[str, None]: _save_session(sid, updated_history) + # Fire analytics logging asynchronously — does not block the stream + duration = time.monotonic() - start_time + activity_logger: BookstackActivityLogger = app.state.activity_logger + asyncio.create_task( + _log_query_bg( + activity_logger=activity_logger, + session_id=sid, + question=request.question, + tool_calls=query_tool_calls, + answer=final_answer, + duration_seconds=duration, + status=final_status, + ) + ) + yield "data: [DONE]\n\n" return StreamingResponse( diff --git a/bookstack_agent/ui/app/analytics/components/query-metrics.tsx b/bookstack_agent/ui/app/analytics/components/query-metrics.tsx new file mode 100644 index 0000000..e2ee742 --- /dev/null +++ b/bookstack_agent/ui/app/analytics/components/query-metrics.tsx @@ -0,0 +1,107 @@ +import { MessageSquare, Users, Clock, Wrench, TrendingUp, CheckCircle } from 'lucide-react' +import type { BookstackMetrics } from '@/lib/bookstack-types' + +interface MetricCardProps { + label: string + value: string | number + sub?: string + icon: React.ReactNode + accent?: string +} + +function MetricCard({ label, value, sub, icon, accent = 'from-vector-magenta to-vector-violet' }: MetricCardProps) { + return ( +
+
+ {label} +
+ {icon} +
+
+
+

{value}

+ {sub &&

{sub}

} +
+
+ ) +} + +interface QueryMetricsProps { + metrics: BookstackMetrics +} + +export default function QueryMetrics({ metrics }: QueryMetricsProps) { + const successPct = Math.round(metrics.success_rate * 100) + + return ( +
+
+

Overview

+

All-time query statistics

+
+ +
+ } + accent="from-vector-magenta to-vector-violet" + /> + } + accent="from-vector-violet to-vector-cobalt" + /> + } + accent="from-vector-cobalt to-vector-violet" + /> + } + accent="from-emerald-500 to-teal-600" + /> + } + accent="from-amber-500 to-orange-600" + /> + } + accent="from-purple-500 to-pink-600" + /> +
+ + {/* Success / error bar */} + {metrics.total_queries > 0 && ( +
+
+ Answer Rate + + {metrics.successful_queries} answered · {metrics.error_queries} errored + +
+
+
+
+
+ )} +
+ ) +} diff --git a/bookstack_agent/ui/app/analytics/components/query-velocity-chart.tsx b/bookstack_agent/ui/app/analytics/components/query-velocity-chart.tsx new file mode 100644 index 0000000..00d4e90 --- /dev/null +++ b/bookstack_agent/ui/app/analytics/components/query-velocity-chart.tsx @@ -0,0 +1,115 @@ +'use client' + +import { + AreaChart, + Area, + XAxis, + YAxis, + CartesianGrid, + Tooltip, + ResponsiveContainer, + Legend, +} from 'recharts' + +type ChartPoint = { + date: string + success: number + error: number + total: number +} + +const CHART_CONFIG = { + grid: { strokeDasharray: '3 3', stroke: '#334155', opacity: 0.4 }, + axis: { stroke: '#64748b', style: { fontSize: '11px' }, tickLine: false }, + tooltip: { + contentStyle: { + backgroundColor: '#1e293b', + border: 'none', + borderRadius: '8px', + color: '#fff', + padding: '8px 12px', + }, + labelStyle: { color: '#94a3b8', marginBottom: '4px' }, + }, +} + +function shouldShowLabel(index: number, total: number): boolean { + if (total <= 7) return true + if (total <= 14) return index % 2 === 0 + if (total <= 30) return index % 3 === 0 + if (total <= 45) return index % 5 === 0 + return index % 7 === 0 +} + +export default function QueryVelocityChart({ data }: { data: ChartPoint[] }) { + const maxVal = data.length > 0 ? Math.max(...data.map(d => d.total)) : 0 + const yMax = maxVal <= 10 ? 10 : maxVal <= 20 ? 20 : Math.ceil(maxVal * 1.2 / 5) * 5 + + return ( +
+
+

Query Velocity

+

Queries answered per day (last 90 days)

+
+ + {data.length === 0 ? ( +
+ No data available yet +
+ ) : ( +
+ + + + + + + + + + + + + + { + const { x, y, payload, index } = props as { x: number; y: number; payload: { value: string }; index: number } + if (index === 0 || index === data.length - 1 || shouldShowLabel(index, data.length)) { + return ( + + {payload.value} + + ) + } + return + }} + /> + + + + + + + +
+ )} +
+ ) +} diff --git a/bookstack_agent/ui/app/analytics/components/recent-queries-table.tsx b/bookstack_agent/ui/app/analytics/components/recent-queries-table.tsx new file mode 100644 index 0000000..65bfcf3 --- /dev/null +++ b/bookstack_agent/ui/app/analytics/components/recent-queries-table.tsx @@ -0,0 +1,276 @@ +'use client' + +import { useState } from 'react' +import { Search, ChevronDown, ChevronUp, Clock, CheckCircle, XCircle, Wrench } from 'lucide-react' +import type { BookstackActivity, BookstackTrace } from '@/lib/bookstack-types' + +const TOOL_COLORS: Record = { + search_bookstack: 'bg-violet-900/60 text-violet-300 border-violet-700/50', + get_page: 'bg-blue-900/60 text-blue-300 border-blue-700/50', + list_books: 'bg-pink-900/60 text-pink-300 border-pink-700/50', +} + +const TOOL_SHORT: Record = { + search_bookstack: 'Search', + get_page: 'Read', + list_books: 'List', +} + +function ToolBadge({ tool }: { tool: string }) { + return ( + + {TOOL_SHORT[tool] ?? tool} + + ) +} + +function TraceModal({ + activity, + trace, + onClose, +}: { + activity: BookstackActivity + trace: BookstackTrace | null + onClose: () => void +}) { + return ( +
+
+
+
+

+ {new Date(activity.timestamp).toLocaleString()} · {activity.duration_seconds.toFixed(1)}s +

+

{activity.question}

+
+ +
+ +
+ {/* Tool calls */} + {trace && trace.tool_calls.length > 0 && ( +
+

+ Agent Tool Calls +

+
+ {trace.tool_calls.map((tc) => ( +
+
+ #{tc.seq} + +
+
+                      {JSON.stringify(tc.input, null, 2)}
+                    
+
+ ))} +
+
+ )} + + {/* Answer */} + {trace && trace.answer && ( +
+

+ Answer Preview +

+
+

+ {trace.answer} +

+
+
+ )} + + {!trace && ( +

+ Trace data not available for this query. +

+ )} +
+
+
+ ) +} + +interface RecentQueriesTableProps { + activities: BookstackActivity[] +} + +export default function RecentQueriesTable({ activities }: RecentQueriesTableProps) { + const [search, setSearch] = useState('') + const [statusFilter, setStatusFilter] = useState<'all' | 'success' | 'error'>('all') + const [page, setPage] = useState(0) + const [expandedTrace, setExpandedTrace] = useState<{ + activity: BookstackActivity + trace: BookstackTrace | null + loading: boolean + } | null>(null) + + const PAGE_SIZE = 10 + + const filtered = activities + .filter(a => statusFilter === 'all' || a.status === statusFilter) + .filter(a => !search || a.question.toLowerCase().includes(search.toLowerCase())) + .sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime()) + + const totalPages = Math.ceil(filtered.length / PAGE_SIZE) + const paged = filtered.slice(page * PAGE_SIZE, (page + 1) * PAGE_SIZE) + + const openTrace = async (activity: BookstackActivity) => { + setExpandedTrace({ activity, trace: null, loading: true }) + try { + const res = await fetch( + `/aieng-bot/api/bookstack-trace?path=${encodeURIComponent(activity.trace_path)}`, + ) + const trace: BookstackTrace | null = res.ok ? await res.json() : null + setExpandedTrace({ activity, trace, loading: false }) + } catch { + setExpandedTrace({ activity, trace: null, loading: false }) + } + } + + return ( +
+
+

Recent Queries

+

Last 30 days — click a row to view the agent trace

+
+ + {/* Filters */} +
+
+ + { setSearch(e.target.value); setPage(0) }} + className="w-full pl-9 pr-3 py-2 text-sm bg-slate-700/60 border border-white/10 rounded-lg text-slate-200 placeholder-slate-500 focus:outline-none focus:ring-1 focus:ring-vector-violet" + /> +
+ +
+ + {/* Table */} + {paged.length === 0 ? ( +
+ No queries match your filters. +
+ ) : ( + <> +
+ + + + + + + + + + + + {paged.map((activity, i) => ( + openTrace(activity)} + > + + + + + + + ))} + +
QuestionToolsDurationStatusTimestamp
+

+ {activity.question} +

+
+
+ {activity.tools_used.map(t => ( + + ))} + {activity.num_tool_calls > 0 && ( + + + {activity.num_tool_calls} + + )} +
+
+ + + {activity.duration_seconds.toFixed(1)}s + + + {activity.status === 'success' ? ( + + ) : ( + + )} + + {new Date(activity.timestamp).toLocaleDateString('en-US', { + month: 'short', + day: 'numeric', + hour: '2-digit', + minute: '2-digit', + })} +
+
+ + {/* Pagination */} + {totalPages > 1 && ( +
+ + {filtered.length} queries · page {page + 1} of {totalPages} + +
+ + +
+
+ )} + + )} + + {/* Trace modal */} + {expandedTrace && ( + setExpandedTrace(null)} + /> + )} +
+ ) +} diff --git a/bookstack_agent/ui/app/analytics/components/tool-usage-chart.tsx b/bookstack_agent/ui/app/analytics/components/tool-usage-chart.tsx new file mode 100644 index 0000000..2094a8e --- /dev/null +++ b/bookstack_agent/ui/app/analytics/components/tool-usage-chart.tsx @@ -0,0 +1,120 @@ +'use client' + +import { + BarChart, + Bar, + XAxis, + YAxis, + CartesianGrid, + Tooltip, + ResponsiveContainer, + Cell, +} from 'recharts' +import type { BookstackMetrics } from '@/lib/bookstack-types' + +const TOOL_LABELS: Record = { + search_bookstack: 'Search', + get_page: 'Read Page', + list_books: 'List Books', +} + +const TOOL_COLORS: Record = { + search_bookstack: '#8A25C9', + get_page: '#313CFF', + list_books: '#EB088A', +} + +const TOOL_DESC: Record = { + search_bookstack: 'Full-text search across all books & pages', + get_page: 'Fetch full page markdown by ID', + list_books: 'List all available books', +} + +export default function ToolUsageChart({ metrics }: { metrics: BookstackMetrics }) { + const data = Object.entries(metrics.tool_usage).map(([tool, count]) => ({ + tool, + label: TOOL_LABELS[tool] ?? tool, + count, + color: TOOL_COLORS[tool] ?? '#8A25C9', + })) + + const total = data.reduce((s, d) => s + d.count, 0) + + return ( +
+
+

Tool Usage

+

+ {total.toLocaleString()} total calls across all queries +

+
+ +
+ {/* Bar chart */} +
+ + + + + + + + {data.map((entry) => ( + + ))} + + + +
+ + {/* Tool breakdown list */} +
+ {data.map((entry) => { + const pct = total > 0 ? Math.round((entry.count / total) * 100) : 0 + return ( +
+
+
+ + {entry.label} +
+ + {entry.count.toLocaleString()} ({pct}%) + +
+
+
+
+

{TOOL_DESC[entry.tool]}

+
+ ) + })} +
+
+
+ ) +} diff --git a/bookstack_agent/ui/app/analytics/page.tsx b/bookstack_agent/ui/app/analytics/page.tsx new file mode 100644 index 0000000..207fcc8 --- /dev/null +++ b/bookstack_agent/ui/app/analytics/page.tsx @@ -0,0 +1,113 @@ +import { redirect } from 'next/navigation' +import { isAuthenticated, getCurrentUser } from '@/lib/session' +import { + fetchBookstackActivityLog, + computeBookstackMetrics, + filterRecentActivities, + aggregateByDate, +} from '@/lib/bookstack-data-fetcher' +import QueryMetrics from './components/query-metrics' +import QueryVelocityChart from './components/query-velocity-chart' +import ToolUsageChart from './components/tool-usage-chart' +import RecentQueriesTable from './components/recent-queries-table' +import { BookOpen, Activity } from 'lucide-react' + +export const dynamic = 'force-dynamic' +export const revalidate = 0 + +export default async function BookstackAnalyticsPage() { + const authenticated = await isAuthenticated() + if (!authenticated) { + redirect('/login') + } + + const user = await getCurrentUser() + + const log = await fetchBookstackActivityLog() + + const Header = () => ( +
+
+
+
+
+
+ +
+

+ BookStack Analytics +

+
+

+ Usage insights for the Vector Institute knowledge-base assistant +

+
+
+ {user && ( +
+

Signed in as

+

{user.email}

+
+ )} + + Logout + +
+
+
+
+ ) + + if (!log || log.activities.length === 0) { + return ( +
+
+
+
+
+
+ +
+

No Data Yet

+

+ Analytics will appear here once users start asking questions. +

+
+
+
+ ) + } + + const metrics = computeBookstackMetrics(log) + const recentActivities = filterRecentActivities(log.activities, 30) + .sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime()) + const velocityData = aggregateByDate(filterRecentActivities(log.activities, 90)) + + return ( +
+
+
+ +
+ {/* Metrics row */} + + + {/* Velocity + Tool usage side by side on large screens */} +
+
+ +
+
+ +
+
+ + {/* Recent queries */} + +
+
+ ) +} diff --git a/bookstack_agent/ui/app/api/bookstack-trace/route.ts b/bookstack_agent/ui/app/api/bookstack-trace/route.ts new file mode 100644 index 0000000..68a38f8 --- /dev/null +++ b/bookstack_agent/ui/app/api/bookstack-trace/route.ts @@ -0,0 +1,37 @@ +import { NextRequest, NextResponse } from 'next/server' +import { isAuthenticated } from '@/lib/session' + +const GCS_BUCKET_URL = 'https://storage.googleapis.com/bot-dashboard-vectorinstitute' + +/** + * Proxy authenticated requests for per-query trace files from GCS. + * + * GET /api/bookstack-trace?path=data/bookstack/traces/... + */ +export async function GET(req: NextRequest): Promise { + const authenticated = await isAuthenticated() + if (!authenticated) { + return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }) + } + + const tracePath = req.nextUrl.searchParams.get('path') + if (!tracePath) { + return NextResponse.json({ error: 'Missing path parameter' }, { status: 400 }) + } + + // Restrict to expected prefix to prevent arbitrary GCS reads + if (!tracePath.startsWith('data/bookstack/traces/')) { + return NextResponse.json({ error: 'Invalid trace path' }, { status: 400 }) + } + + try { + const res = await fetch(`${GCS_BUCKET_URL}/${tracePath}`, { cache: 'no-store' }) + if (!res.ok) { + return NextResponse.json({ error: 'Trace not found' }, { status: res.status }) + } + const data = await res.json() + return NextResponse.json(data) + } catch { + return NextResponse.json({ error: 'Failed to fetch trace' }, { status: 500 }) + } +} diff --git a/bookstack_agent/ui/app/layout.tsx b/bookstack_agent/ui/app/layout.tsx index 0adf444..1e7ed87 100644 --- a/bookstack_agent/ui/app/layout.tsx +++ b/bookstack_agent/ui/app/layout.tsx @@ -2,7 +2,7 @@ import type { Metadata } from 'next' import './globals.css' export const metadata: Metadata = { - title: 'BookStack QA — Vector Institute', + title: 'aieng-bot — Vector Institute', description: 'Ask questions about Vector Institute internal documentation', } diff --git a/bookstack_agent/ui/lib/bookstack-data-fetcher.ts b/bookstack_agent/ui/lib/bookstack-data-fetcher.ts new file mode 100644 index 0000000..586db55 --- /dev/null +++ b/bookstack_agent/ui/lib/bookstack-data-fetcher.ts @@ -0,0 +1,156 @@ +/** + * Data-fetching utilities for BookStack QA analytics. + * All reads are from the public GCS bucket — no auth required on the read path. + */ + +import type { + BookstackActivity, + BookstackActivityLog, + BookstackMetrics, + BookstackTrace, +} from './bookstack-types' + +const GCS_BUCKET_URL = 'https://storage.googleapis.com/bot-dashboard-vectorinstitute' +const ACTIVITY_LOG_PATH = 'data/bookstack_activity_log.json' + +/** Fetch the unified BookStack activity log from GCS. */ +export async function fetchBookstackActivityLog(): Promise { + try { + const cacheBuster = Date.now() + const res = await fetch( + `${GCS_BUCKET_URL}/${ACTIVITY_LOG_PATH}?t=${cacheBuster}`, + { cache: 'no-store' }, + ) + if (!res.ok) { + if (res.status !== 404) { + console.error('Failed to fetch bookstack activity log:', res.statusText) + } + return null + } + return await res.json() as BookstackActivityLog + } catch (err) { + console.error('Error fetching bookstack activity log:', err) + return null + } +} + +/** Fetch an individual per-query trace file from GCS. */ +export async function fetchBookstackTrace(tracePath: string): Promise { + try { + const cacheBuster = Date.now() + const res = await fetch( + `${GCS_BUCKET_URL}/${tracePath}?t=${cacheBuster}`, + { cache: 'no-store' }, + ) + if (!res.ok) { + console.error('Failed to fetch bookstack trace:', res.statusText) + return null + } + return await res.json() as BookstackTrace + } catch (err) { + console.error('Error fetching bookstack trace:', err) + return null + } +} + +/** Compute aggregate metrics from the activity log. */ +export function computeBookstackMetrics(log: BookstackActivityLog): BookstackMetrics { + const activities = log.activities + + const total = activities.length + const successful = activities.filter(a => a.status === 'success').length + const errors = activities.filter(a => a.status === 'error').length + + const sessions = new Set(activities.map(a => a.session_id)) + + const durations = activities.map(a => a.duration_seconds).filter(d => d > 0) + const avgDuration = durations.length > 0 + ? durations.reduce((s, d) => s + d, 0) / durations.length + : 0 + + const totalToolCalls = activities.reduce((s, a) => s + a.num_tool_calls, 0) + const avgToolCalls = total > 0 ? totalToolCalls / total : 0 + + const toolUsage = { search_bookstack: 0, get_page: 0, list_books: 0 } + activities.forEach(a => { + if (a.tool_call_counts) { + // Use precise per-tool counts when available (new format) + toolUsage.search_bookstack += a.tool_call_counts.search_bookstack ?? 0 + toolUsage.get_page += a.tool_call_counts.get_page ?? 0 + toolUsage.list_books += a.tool_call_counts.list_books ?? 0 + } else { + // Fallback for legacy entries: count queries that used each tool + a.tools_used.forEach(tool => { + if (tool === 'search_bookstack') toolUsage.search_bookstack++ + else if (tool === 'get_page') toolUsage.get_page++ + else if (tool === 'list_books') toolUsage.list_books++ + }) + } + }) + + const now = new Date() + const todayStart = new Date(now) + todayStart.setHours(0, 0, 0, 0) + const weekStart = new Date(now) + weekStart.setDate(weekStart.getDate() - 7) + + const queriesToday = activities.filter(a => new Date(a.timestamp) >= todayStart).length + const queriesThisWeek = activities.filter(a => new Date(a.timestamp) >= weekStart).length + + return { + total_queries: total, + successful_queries: successful, + error_queries: errors, + success_rate: total > 0 ? successful / total : 0, + unique_sessions: sessions.size, + avg_duration_seconds: avgDuration, + avg_tool_calls_per_query: avgToolCalls, + total_tool_calls: totalToolCalls, + queries_today: queriesToday, + queries_this_week: queriesThisWeek, + tool_usage: toolUsage, + } +} + +/** Slice the activity log to entries within the last N days. */ +export function filterRecentActivities( + activities: BookstackActivity[], + days: number, +): BookstackActivity[] { + const cutoff = new Date() + cutoff.setDate(cutoff.getDate() - days) + return activities.filter(a => new Date(a.timestamp) >= cutoff) +} + +/** Aggregate activities by calendar date for the velocity chart. */ +export function aggregateByDate( + activities: BookstackActivity[], +): Array<{ date: string; success: number; error: number; total: number }> { + const byDate = new Map< + string, + { success: number; error: number; year: number; month: number; day: number } + >() + + activities.forEach(a => { + const d = new Date(a.timestamp) + const key = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}` + if (!byDate.has(key)) { + byDate.set(key, { success: 0, error: 0, year: d.getFullYear(), month: d.getMonth() + 1, day: d.getDate() }) + } + const entry = byDate.get(key)! + if (a.status === 'success') entry.success++ + else entry.error++ + }) + + return Array.from(byDate.entries()) + .sort((a, b) => a[0].localeCompare(b[0])) + .map(([, v]) => ({ + date: new Date(v.year, v.month - 1, v.day).toLocaleDateString('en-US', { + month: 'short', + day: 'numeric', + }), + success: v.success, + error: v.error, + total: v.success + v.error, + })) +} diff --git a/bookstack_agent/ui/lib/bookstack-types.ts b/bookstack_agent/ui/lib/bookstack-types.ts new file mode 100644 index 0000000..6c46dd9 --- /dev/null +++ b/bookstack_agent/ui/lib/bookstack-types.ts @@ -0,0 +1,67 @@ +/** + * Type definitions for BookStack QA Analytics + */ + +/** One entry in the unified activity log — compact form for list/chart views. */ +export interface BookstackActivity { + session_id: string + timestamp: string + /** Question text, truncated to 300 chars in the log. */ + question: string + /** Unique tool names used during this query (e.g. ["search_bookstack", "get_page"]). */ + tools_used: string[] + /** Total number of individual tool invocations. */ + num_tool_calls: number + /** Per-tool invocation counts e.g. {"search_bookstack": 2, "get_page": 3}. */ + tool_call_counts?: Record + /** Byte length of the final answer. */ + answer_length: number + duration_seconds: number + status: 'success' | 'error' + /** GCS path to the detailed trace file for this query. */ + trace_path: string +} + +export interface BookstackActivityLog { + activities: BookstackActivity[] + last_updated: string | null +} + +/** One tool call recorded inside a per-query trace file. */ +export interface BookstackToolCall { + seq: number + tool: 'search_bookstack' | 'get_page' | 'list_books' + input: Record +} + +/** Full per-query trace file — stored at trace_path in GCS. */ +export interface BookstackTrace { + session_id: string + timestamp: string + question: string + tool_calls: BookstackToolCall[] + answer: string + duration_seconds: number + status: 'success' | 'error' +} + +/** Aggregated metrics computed from the activity log. */ +export interface BookstackMetrics { + total_queries: number + successful_queries: number + error_queries: number + success_rate: number + unique_sessions: number + avg_duration_seconds: number + avg_tool_calls_per_query: number + total_tool_calls: number + /** How many queries landed in the last 24 hours. */ + queries_today: number + /** How many queries landed in the last 7 days. */ + queries_this_week: number + tool_usage: { + search_bookstack: number + get_page: number + list_books: number + } +} diff --git a/bookstack_agent/ui/package-lock.json b/bookstack_agent/ui/package-lock.json index 9404012..24d81fb 100644 --- a/bookstack_agent/ui/package-lock.json +++ b/bookstack_agent/ui/package-lock.json @@ -16,6 +16,7 @@ "react-dom": "^19.1.0", "react-markdown": "^9.0.1", "react-syntax-highlighter": "^16.1.1", + "recharts": "^2.15.0", "remark-gfm": "^4.0.1" }, "devDependencies": { @@ -1028,6 +1029,69 @@ "tslib": "^2.4.0" } }, + "node_modules/@types/d3-array": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/@types/d3-array/-/d3-array-3.2.2.tgz", + "integrity": "sha512-hOLWVbm7uRza0BYXpIIW5pxfrKe0W+D5lrFiAEYR+pb6w3N2SwSMaJbXdUfSEv+dT4MfHBLtn5js0LAWaO6otw==", + "license": "MIT" + }, + "node_modules/@types/d3-color": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/@types/d3-color/-/d3-color-3.1.3.tgz", + "integrity": "sha512-iO90scth9WAbmgv7ogoq57O9YpKmFBbmoEoCHDB2xMBY0+/KVrqAaCDyCE16dUspeOvIxFFRI+0sEtqDqy2b4A==", + "license": "MIT" + }, + "node_modules/@types/d3-ease": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@types/d3-ease/-/d3-ease-3.0.2.tgz", + "integrity": "sha512-NcV1JjO5oDzoK26oMzbILE6HW7uVXOHLQvHshBUW4UMdZGfiY6v5BeQwh9a9tCzv+CeefZQHJt5SRgK154RtiA==", + "license": "MIT" + }, + "node_modules/@types/d3-interpolate": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/d3-interpolate/-/d3-interpolate-3.0.4.tgz", + "integrity": "sha512-mgLPETlrpVV1YRJIglr4Ez47g7Yxjl1lj7YKsiMCb27VJH9W8NVM6Bb9d8kkpG/uAQS5AmbA48q2IAolKKo1MA==", + "license": "MIT", + "dependencies": { + "@types/d3-color": "*" + } + }, + "node_modules/@types/d3-path": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/@types/d3-path/-/d3-path-3.1.1.tgz", + "integrity": "sha512-VMZBYyQvbGmWyWVea0EHs/BwLgxc+MKi1zLDCONksozI4YJMcTt8ZEuIR4Sb1MMTE8MMW49v0IwI5+b7RmfWlg==", + "license": "MIT" + }, + "node_modules/@types/d3-scale": { + "version": "4.0.9", + "resolved": "https://registry.npmjs.org/@types/d3-scale/-/d3-scale-4.0.9.tgz", + "integrity": "sha512-dLmtwB8zkAeO/juAMfnV+sItKjlsw2lKdZVVy6LRr0cBmegxSABiLEpGVmSJJ8O08i4+sGR6qQtb6WtuwJdvVw==", + "license": "MIT", + "dependencies": { + "@types/d3-time": "*" + } + }, + "node_modules/@types/d3-shape": { + "version": "3.1.8", + "resolved": "https://registry.npmjs.org/@types/d3-shape/-/d3-shape-3.1.8.tgz", + "integrity": "sha512-lae0iWfcDeR7qt7rA88BNiqdvPS5pFVPpo5OfjElwNaT2yyekbM0C9vK+yqBqEmHr6lDkRnYNoTBYlAgJa7a4w==", + "license": "MIT", + "dependencies": { + "@types/d3-path": "*" + } + }, + "node_modules/@types/d3-time": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/d3-time/-/d3-time-3.0.4.tgz", + "integrity": "sha512-yuzZug1nkAAaBlBBikKZTgzCeA+k1uy4ZFwWANOfKw5z5LRhV0gNA7gNkKm7HoK+HRN0wX3EkxGk0fpbWhmB7g==", + "license": "MIT" + }, + "node_modules/@types/d3-timer": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@types/d3-timer/-/d3-timer-3.0.2.tgz", + "integrity": "sha512-Ps3T8E8dZDam6fUyNiMkekK3XUsaUEik+idO9/YjPtfj2qruF8tFBXS7XhtE4iIXBLxhmLjP3SXpLhVf21I9Lw==", + "license": "MIT" + }, "node_modules/@types/debug": { "version": "4.1.12", "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.12.tgz", @@ -2350,6 +2414,15 @@ "integrity": "sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA==", "license": "MIT" }, + "node_modules/clsx": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/clsx/-/clsx-2.1.1.tgz", + "integrity": "sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, "node_modules/color-convert": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", @@ -2440,6 +2513,127 @@ "integrity": "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==", "license": "MIT" }, + "node_modules/d3-array": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/d3-array/-/d3-array-3.2.4.tgz", + "integrity": "sha512-tdQAmyA18i4J7wprpYq8ClcxZy3SC31QMeByyCFyRt7BVHdREQZ5lpzoe5mFEYZUWe+oq8HBvk9JjpibyEV4Jg==", + "license": "ISC", + "dependencies": { + "internmap": "1 - 2" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-color": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/d3-color/-/d3-color-3.1.0.tgz", + "integrity": "sha512-zg/chbXyeBtMQ1LbD/WSoW2DpC3I0mpmPdW+ynRTj/x2DAWYrIY7qeZIHidozwV24m4iavr15lNwIwLxRmOxhA==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-ease": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/d3-ease/-/d3-ease-3.0.1.tgz", + "integrity": "sha512-wR/XK3D3XcLIZwpbvQwQ5fK+8Ykds1ip7A2Txe0yxncXSdq1L9skcG7blcedkOX+ZcgxGAmLX1FrRGbADwzi0w==", + "license": "BSD-3-Clause", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-format": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/d3-format/-/d3-format-3.1.2.tgz", + "integrity": "sha512-AJDdYOdnyRDV5b6ArilzCPPwc1ejkHcoyFarqlPqT7zRYjhavcT3uSrqcMvsgh2CgoPbK3RCwyHaVyxYcP2Arg==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-interpolate": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/d3-interpolate/-/d3-interpolate-3.0.1.tgz", + "integrity": "sha512-3bYs1rOD33uo8aqJfKP3JWPAibgw8Zm2+L9vBKEHJ2Rg+viTR7o5Mmv5mZcieN+FRYaAOWX5SJATX6k1PWz72g==", + "license": "ISC", + "dependencies": { + "d3-color": "1 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-path": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/d3-path/-/d3-path-3.1.0.tgz", + "integrity": "sha512-p3KP5HCf/bvjBSSKuXid6Zqijx7wIfNW+J/maPs+iwR35at5JCbLUT0LzF1cnjbCHWhqzQTIN2Jpe8pRebIEFQ==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-scale": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/d3-scale/-/d3-scale-4.0.2.tgz", + "integrity": "sha512-GZW464g1SH7ag3Y7hXjf8RoUuAFIqklOAq3MRl4OaWabTFJY9PN/E1YklhXLh+OQ3fM9yS2nOkCoS+WLZ6kvxQ==", + "license": "ISC", + "dependencies": { + "d3-array": "2.10.0 - 3", + "d3-format": "1 - 3", + "d3-interpolate": "1.2.0 - 3", + "d3-time": "2.1.1 - 3", + "d3-time-format": "2 - 4" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-shape": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/d3-shape/-/d3-shape-3.2.0.tgz", + "integrity": "sha512-SaLBuwGm3MOViRq2ABk3eLoxwZELpH6zhl3FbAoJ7Vm1gofKx6El1Ib5z23NUEhF9AsGl7y+dzLe5Cw2AArGTA==", + "license": "ISC", + "dependencies": { + "d3-path": "^3.1.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-time": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/d3-time/-/d3-time-3.1.0.tgz", + "integrity": "sha512-VqKjzBLejbSMT4IgbmVgDjpkYrNWUYJnbCGo874u7MMKIWsILRX+OpX/gTk8MqjpT1A/c6HY2dCA77ZN0lkQ2Q==", + "license": "ISC", + "dependencies": { + "d3-array": "2 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-time-format": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/d3-time-format/-/d3-time-format-4.1.0.tgz", + "integrity": "sha512-dJxPBlzC7NugB2PDLwo9Q8JiTR3M3e4/XANkreKSUxF8vvXKqm1Yfq4Q5dl8budlunRVlUUaDUgFt7eA8D6NLg==", + "license": "ISC", + "dependencies": { + "d3-time": "1 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-timer": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/d3-timer/-/d3-timer-3.0.1.tgz", + "integrity": "sha512-ndfJ/JxxMd3nw31uyKoY2naivF+r29V+Lc0svZxe1JvvIRmi8hUsrMvdOwgS1o6uBHmiz91geQ0ylPP0aj1VUA==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, "node_modules/damerau-levenshtein": { "version": "1.0.8", "resolved": "https://registry.npmjs.org/damerau-levenshtein/-/damerau-levenshtein-1.0.8.tgz", @@ -2518,6 +2712,12 @@ } } }, + "node_modules/decimal.js-light": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/decimal.js-light/-/decimal.js-light-2.5.1.tgz", + "integrity": "sha512-qIMFpTMZmny+MMIitAB6D7iVPEorVw6YQRWkvarTkT4tBeSLLiHzcwj6q0MmYSFCiVpiqPJTJEYIrpcPzVEIvg==", + "license": "MIT" + }, "node_modules/decode-named-character-reference": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/decode-named-character-reference/-/decode-named-character-reference-1.3.0.tgz", @@ -2633,6 +2833,16 @@ "node": ">=0.10.0" } }, + "node_modules/dom-helpers": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/dom-helpers/-/dom-helpers-5.2.1.tgz", + "integrity": "sha512-nRCa7CK3VTrM2NmGkIy4cbK7IZlgBE/PYMn55rrXefr5xXDP0LdtfPnblFDoVdcAfslJ7or6iqAUnx0CCGIWQA==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.8.7", + "csstype": "^3.0.2" + } + }, "node_modules/dunder-proto": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", @@ -3304,6 +3514,12 @@ "node": ">=0.10.0" } }, + "node_modules/eventemitter3": { + "version": "4.0.7", + "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz", + "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==", + "license": "MIT" + }, "node_modules/extend": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", @@ -3317,6 +3533,15 @@ "dev": true, "license": "MIT" }, + "node_modules/fast-equals": { + "version": "5.4.0", + "resolved": "https://registry.npmjs.org/fast-equals/-/fast-equals-5.4.0.tgz", + "integrity": "sha512-jt2DW/aNFNwke7AUd+Z+e6pz39KO5rzdbbFCg2sGafS4mk13MI7Z8O5z9cADNn5lhGODIgLwug6TZO2ctf7kcw==", + "license": "MIT", + "engines": { + "node": ">=6.0.0" + } + }, "node_modules/fast-glob": { "version": "3.3.1", "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.1.tgz", @@ -3925,6 +4150,15 @@ "node": ">= 0.4" } }, + "node_modules/internmap": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/internmap/-/internmap-2.0.3.tgz", + "integrity": "sha512-5Hh7Y1wQbvY5ooGgPbDaL5iYLAPzMTUrjMulskHLH6wnv/A+1q5rgEaiuqEjB+oxGXIVZs1FF+R/KPN3ZSQYYg==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, "node_modules/iron-session": { "version": "8.0.4", "resolved": "https://registry.npmjs.org/iron-session/-/iron-session-8.0.4.tgz", @@ -4466,7 +4700,6 @@ "version": "4.0.0", "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", - "dev": true, "license": "MIT" }, "node_modules/js-yaml": { @@ -4612,6 +4845,12 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/lodash": { + "version": "4.17.23", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz", + "integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==", + "license": "MIT" + }, "node_modules/lodash.merge": { "version": "4.6.2", "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", @@ -4633,7 +4872,6 @@ "version": "1.4.0", "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", - "dev": true, "license": "MIT", "dependencies": { "js-tokens": "^3.0.0 || ^4.0.0" @@ -5766,7 +6004,6 @@ "version": "4.1.1", "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", - "dev": true, "license": "MIT", "engines": { "node": ">=0.10.0" @@ -6263,7 +6500,6 @@ "version": "15.8.1", "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz", "integrity": "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==", - "dev": true, "license": "MIT", "dependencies": { "loose-envify": "^1.4.0", @@ -6337,7 +6573,6 @@ "version": "16.13.1", "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz", "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==", - "dev": true, "license": "MIT" }, "node_modules/react-markdown": { @@ -6367,6 +6602,21 @@ "react": ">=18" } }, + "node_modules/react-smooth": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/react-smooth/-/react-smooth-4.0.4.tgz", + "integrity": "sha512-gnGKTpYwqL0Iii09gHobNolvX4Kiq4PKx6eWBCYYix+8cdw+cGo3do906l1NBPKkSWx1DghC1dlWG9L2uGd61Q==", + "license": "MIT", + "dependencies": { + "fast-equals": "^5.0.1", + "prop-types": "^15.8.1", + "react-transition-group": "^4.4.5" + }, + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", + "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" + } + }, "node_modules/react-syntax-highlighter": { "version": "16.1.1", "resolved": "https://registry.npmjs.org/react-syntax-highlighter/-/react-syntax-highlighter-16.1.1.tgz", @@ -6387,6 +6637,22 @@ "react": ">= 0.14.0" } }, + "node_modules/react-transition-group": { + "version": "4.4.5", + "resolved": "https://registry.npmjs.org/react-transition-group/-/react-transition-group-4.4.5.tgz", + "integrity": "sha512-pZcd1MCJoiKiBR2NRxeCRg13uCXbydPnmB4EOeRrY7480qNWO8IIgQG6zlDkm6uRMsURXPuKq0GWtiM59a5Q6g==", + "license": "BSD-3-Clause", + "dependencies": { + "@babel/runtime": "^7.5.5", + "dom-helpers": "^5.0.1", + "loose-envify": "^1.4.0", + "prop-types": "^15.6.2" + }, + "peerDependencies": { + "react": ">=16.6.0", + "react-dom": ">=16.6.0" + } + }, "node_modules/read-cache": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz", @@ -6410,6 +6676,44 @@ "node": ">=8.10.0" } }, + "node_modules/recharts": { + "version": "2.15.4", + "resolved": "https://registry.npmjs.org/recharts/-/recharts-2.15.4.tgz", + "integrity": "sha512-UT/q6fwS3c1dHbXv2uFgYJ9BMFHu3fwnd7AYZaEQhXuYQ4hgsxLvsUXzGdKeZrW5xopzDCvuA2N41WJ88I7zIw==", + "license": "MIT", + "dependencies": { + "clsx": "^2.0.0", + "eventemitter3": "^4.0.1", + "lodash": "^4.17.21", + "react-is": "^18.3.1", + "react-smooth": "^4.0.4", + "recharts-scale": "^0.4.4", + "tiny-invariant": "^1.3.1", + "victory-vendor": "^36.6.8" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "react": "^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", + "react-dom": "^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" + } + }, + "node_modules/recharts-scale": { + "version": "0.4.5", + "resolved": "https://registry.npmjs.org/recharts-scale/-/recharts-scale-0.4.5.tgz", + "integrity": "sha512-kivNFO+0OcUNu7jQquLXAxz1FIwZj8nrj+YkOKc5694NbjCvcT6aSZiIzNzd2Kul4o4rTto8QVR9lMNtxD4G1w==", + "license": "MIT", + "dependencies": { + "decimal.js-light": "^2.4.1" + } + }, + "node_modules/recharts/node_modules/react-is": { + "version": "18.3.1", + "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", + "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==", + "license": "MIT" + }, "node_modules/reflect.getprototypeof": { "version": "1.0.10", "resolved": "https://registry.npmjs.org/reflect.getprototypeof/-/reflect.getprototypeof-1.0.10.tgz", @@ -7250,6 +7554,12 @@ "node": ">=0.8" } }, + "node_modules/tiny-invariant": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz", + "integrity": "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==", + "license": "MIT" + }, "node_modules/tinyglobby": { "version": "0.2.15", "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz", @@ -7705,6 +8015,28 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/victory-vendor": { + "version": "36.9.2", + "resolved": "https://registry.npmjs.org/victory-vendor/-/victory-vendor-36.9.2.tgz", + "integrity": "sha512-PnpQQMuxlwYdocC8fIJqVXvkeViHYzotI+NJrCuav0ZYFoq912ZHBk3mCeuj+5/VpodOjPe1z0Fk2ihgzlXqjQ==", + "license": "MIT AND ISC", + "dependencies": { + "@types/d3-array": "^3.0.3", + "@types/d3-ease": "^3.0.0", + "@types/d3-interpolate": "^3.0.1", + "@types/d3-scale": "^4.0.2", + "@types/d3-shape": "^3.1.0", + "@types/d3-time": "^3.0.0", + "@types/d3-timer": "^3.0.0", + "d3-array": "^3.1.6", + "d3-ease": "^3.0.1", + "d3-interpolate": "^3.0.1", + "d3-scale": "^4.0.2", + "d3-shape": "^3.1.0", + "d3-time": "^3.0.0", + "d3-timer": "^3.0.1" + } + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", diff --git a/bookstack_agent/ui/package.json b/bookstack_agent/ui/package.json index e928add..906acdf 100644 --- a/bookstack_agent/ui/package.json +++ b/bookstack_agent/ui/package.json @@ -18,6 +18,7 @@ "react-dom": "^19.1.0", "react-markdown": "^9.0.1", "react-syntax-highlighter": "^16.1.1", + "recharts": "^2.15.0", "remark-gfm": "^4.0.1" }, "devDependencies": { diff --git a/pyproject.toml b/pyproject.toml index 26392be..5dc3505 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,6 +75,7 @@ docs = [ bookstack-api = [ "fastapi>=0.115.0", + "google-cloud-storage>=2.0.0", "httptools>=0.6.0", "uvicorn[standard]>=0.32.0", "uvloop>=0.21.0", @@ -110,6 +111,10 @@ mypy_path = "src:slack_bot/stubs" module = "bookstack_agent.api.main" allow_untyped_decorators = true +[[tool.mypy.overrides]] +module = "aieng_bot.bookstack.activity_logger" +ignore_missing_imports = true + [tool.ruff] include = ["*.py", "pyproject.toml", "*.ipynb"] line-length = 88 diff --git a/src/aieng_bot/bookstack/activity_logger.py b/src/aieng_bot/bookstack/activity_logger.py new file mode 100644 index 0000000..5eb222f --- /dev/null +++ b/src/aieng_bot/bookstack/activity_logger.py @@ -0,0 +1,278 @@ +"""Activity logger for BookStack QA analytics — writes query records to GCS.""" + +from __future__ import annotations + +import json +import logging +from datetime import datetime, timezone +from typing import Any + +try: + from google.cloud import storage as _gcs_storage + + _GCS_AVAILABLE = True +except ImportError: + _gcs_storage = None + _GCS_AVAILABLE = False + +logger = logging.getLogger(__name__) + +BUCKET = "bot-dashboard-vectorinstitute" +ACTIVITY_LOG_PATH = "data/bookstack_activity_log.json" +TRACES_PREFIX = "data/bookstack/traces" + + +class BookstackActivityLogger: + """Log BookStack QA query analytics to Google Cloud Storage. + + Each query is recorded in a unified activity log (for list/chart views) + and as an individual trace file (for detailed per-query inspection). + + Uses the ``google-cloud-storage`` Python library with Application Default + Credentials (ADC). In GKE this is satisfied automatically via Workload + Identity; locally, run ``gcloud auth application-default login``. + + Parameters + ---------- + bucket : str + GCS bucket name (default ``bot-dashboard-vectorinstitute``). + log_path : str + Path to the activity log JSON inside the bucket. + + """ + + def __init__( + self, + bucket: str = BUCKET, + log_path: str = ACTIVITY_LOG_PATH, + ) -> None: + """Initialise the logger.""" + self.bucket = bucket + self.log_path = log_path + self._client: Any = None + + # ------------------------------------------------------------------ + # GCS helpers (lazy client init so imports happen at runtime) + # ------------------------------------------------------------------ + + def _get_client(self) -> Any: + """Return (and lazily create) the GCS client.""" + if self._client is None: + if not _GCS_AVAILABLE or _gcs_storage is None: + raise RuntimeError( + "google-cloud-storage is required for analytics logging. " + "Install it with: pip install google-cloud-storage" + ) + self._client = _gcs_storage.Client() + return self._client + + def _load_activity_log(self) -> dict[str, Any] | None: + """Download the current activity log from GCS. + + Returns + ------- + dict + Parsed log with ``activities`` list and ``last_updated`` key. + Returns an empty structure if the log does not yet exist. + Returns ``None`` on any read error (caller must abort write). + + """ + try: + client = self._get_client() + bucket = client.bucket(self.bucket) + blob = bucket.blob(self.log_path) + if not blob.exists(): + return {"activities": [], "last_updated": None} + data = blob.download_as_text() + return json.loads(data) + except json.JSONDecodeError as exc: + logger.error("Failed to parse bookstack activity log: %s", exc) + return None + except Exception as exc: + logger.error( + "Failed to load bookstack activity log from GCS " + "(aborting write to protect existing data): %s", + exc, + ) + return None + + def _save_activity_log(self, log_data: dict[str, Any]) -> bool: + """Upload the activity log to GCS. + + Parameters + ---------- + log_data : dict + Updated activity log to persist. + + Returns + ------- + bool + ``True`` on success, ``False`` on failure. + + """ + try: + client = self._get_client() + bucket = client.bucket(self.bucket) + blob = bucket.blob(self.log_path) + blob.upload_from_string( + json.dumps(log_data, indent=2), + content_type="application/json", + ) + return True + except Exception as exc: + logger.error("Failed to upload bookstack activity log to GCS: %s", exc) + return False + + def _save_trace(self, trace: dict[str, Any], trace_path: str) -> bool: + """Upload an individual query trace to GCS. + + Parameters + ---------- + trace : dict + Full trace data for one query. + trace_path : str + Destination path inside the bucket. + + Returns + ------- + bool + ``True`` on success, ``False`` on failure. + + """ + try: + client = self._get_client() + bucket = client.bucket(self.bucket) + blob = bucket.blob(trace_path) + blob.upload_from_string( + json.dumps(trace, indent=2), + content_type="application/json", + ) + return True + except Exception as exc: + logger.error( + "Failed to upload bookstack trace to GCS (%s): %s", trace_path, exc + ) + return False + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + def log_query( + self, + session_id: str, + question: str, + tool_calls: list[dict[str, Any]], + answer: str, + duration_seconds: float, + status: str, + ) -> bool: + """Record a completed BookStack QA query to GCS. + + Saves two objects: + + 1. An entry appended to the unified activity log (for aggregate views). + 2. A per-query trace file (for detailed inspection / recent queries). + + This method is synchronous and is intended to be called via + ``asyncio.to_thread`` from async contexts so it does not block the + event loop. + + Parameters + ---------- + session_id : str + Opaque session identifier from the API session store. + question : str + The user's raw question text. + tool_calls : list[dict] + Ordered list of ``{"tool": , "input": {...}}`` dicts + collected during the agent's tool-use loop. + answer : str + The agent's final answer (markdown). + duration_seconds : float + Wall-clock time from question receipt to answer emission. + status : str + ``"success"`` or ``"error"``. + + Returns + ------- + bool + ``True`` if both the trace and the activity log were saved + successfully, ``False`` otherwise. + + """ + now = datetime.now(timezone.utc) + timestamp = now.isoformat() + + # ------------------------------------------------------------------ + # 1. Build and save the per-query trace + # ------------------------------------------------------------------ + trace: dict[str, Any] = { + "session_id": session_id, + "timestamp": timestamp, + "question": question, + "tool_calls": [ + {"seq": i + 1, "tool": tc["tool"], "input": tc.get("input", {})} + for i, tc in enumerate(tool_calls) + ], + "answer": answer, + "duration_seconds": round(duration_seconds, 3), + "status": status, + } + + date_str = now.strftime("%Y-%m-%d") + time_str = now.strftime("%H%M%S") + safe_sid = session_id[:8] + trace_path = f"{TRACES_PREFIX}/{date_str}/{safe_sid}-{time_str}.json" + + trace_saved = self._save_trace(trace, trace_path) + + # ------------------------------------------------------------------ + # 2. Append to the unified activity log + # ------------------------------------------------------------------ + tools_used = list( + dict.fromkeys(tc["tool"] for tc in tool_calls) + ) # preserve order, deduplicate + + # Per-tool call counts for accurate analytics + tool_call_counts: dict[str, int] = {} + for tc in tool_calls: + tool_name = tc["tool"] + tool_call_counts[tool_name] = tool_call_counts.get(tool_name, 0) + 1 + + activity: dict[str, Any] = { + "session_id": session_id, + "timestamp": timestamp, + "question": question[:300], # keep activity log compact + "tools_used": tools_used, + "tool_call_counts": tool_call_counts, + "num_tool_calls": len(tool_calls), + "answer_length": len(answer), + "duration_seconds": round(duration_seconds, 3), + "status": status, + "trace_path": trace_path, + } + + log_data = self._load_activity_log() + if log_data is None: + logger.error( + "Aborting bookstack activity log write for session %s " + "to prevent overwriting existing data after a GCS read failure", + session_id[:8], + ) + return False + + log_data["activities"].append(activity) + log_data["last_updated"] = timestamp + + log_saved = self._save_activity_log(log_data) + + if log_saved: + logger.info( + "Bookstack query logged (status=%s, tools=%s, session=%s)", + status, + tools_used, + session_id[:8], + ) + + return trace_saved and log_saved diff --git a/tests/bookstack/test_activity_logger.py b/tests/bookstack/test_activity_logger.py new file mode 100644 index 0000000..cfc9371 --- /dev/null +++ b/tests/bookstack/test_activity_logger.py @@ -0,0 +1,455 @@ +"""Unit tests for BookstackActivityLogger.""" + +import json +from typing import Any +from unittest.mock import MagicMock, patch + +import pytest + +from aieng_bot.bookstack.activity_logger import BookstackActivityLogger + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def logger() -> BookstackActivityLogger: + """Return a BookstackActivityLogger configured with a test bucket.""" + return BookstackActivityLogger( + bucket="test-bucket", + log_path="data/test_bookstack_activity_log.json", + ) + + +@pytest.fixture +def mock_gcs_client() -> MagicMock: + """Return a mock google.cloud.storage.Client.""" + return MagicMock() + + +@pytest.fixture +def sample_log() -> dict[str, Any]: + """Return an existing activity log with one entry.""" + return { + "activities": [ + { + "session_id": "existing01", + "timestamp": "2026-03-01T10:00:00Z", + "question": "How do I access the VPN?", + "tools_used": ["search_bookstack"], + "tool_call_counts": {"search_bookstack": 1}, + "num_tool_calls": 1, + "answer_length": 400, + "duration_seconds": 3.5, + "status": "success", + "trace_path": "data/bookstack/traces/2026-03-01/existing01-100000.json", + } + ], + "last_updated": "2026-03-01T10:00:00Z", + } + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _attach_mock_client( + logger: BookstackActivityLogger, mock_client: MagicMock +) -> None: + """Inject a mock GCS client into the logger.""" + logger._client = mock_client + + +# --------------------------------------------------------------------------- +# Initialisation +# --------------------------------------------------------------------------- + + +class TestInit: + """BookstackActivityLogger initialisation.""" + + def test_defaults(self) -> None: + """Verify default bucket and log path values.""" + lg = BookstackActivityLogger() + assert lg.bucket == "bot-dashboard-vectorinstitute" + assert lg.log_path == "data/bookstack_activity_log.json" + + def test_custom_values(self) -> None: + """Verify custom bucket and log path are stored correctly.""" + lg = BookstackActivityLogger(bucket="my-bucket", log_path="custom/path.json") + assert lg.bucket == "my-bucket" + assert lg.log_path == "custom/path.json" + + def test_client_starts_as_none(self, logger: BookstackActivityLogger) -> None: + """Verify GCS client is lazily initialised.""" + assert logger._client is None + + +# --------------------------------------------------------------------------- +# _load_activity_log +# --------------------------------------------------------------------------- + + +class TestLoadActivityLog: + """_load_activity_log GCS read behaviour.""" + + def test_load_existing_log( + self, + logger: BookstackActivityLogger, + mock_gcs_client: MagicMock, + sample_log: dict[str, Any], + ) -> None: + """Return parsed log when blob exists.""" + blob = MagicMock() + blob.exists.return_value = True + blob.download_as_text.return_value = json.dumps(sample_log) + mock_gcs_client.bucket.return_value.blob.return_value = blob + _attach_mock_client(logger, mock_gcs_client) + + result = logger._load_activity_log() + + assert result == sample_log + assert len(result["activities"]) == 1 # type: ignore[index] + + def test_load_returns_empty_when_blob_not_found( + self, + logger: BookstackActivityLogger, + mock_gcs_client: MagicMock, + ) -> None: + """Return empty structure when the log file does not yet exist.""" + blob = MagicMock() + blob.exists.return_value = False + mock_gcs_client.bucket.return_value.blob.return_value = blob + _attach_mock_client(logger, mock_gcs_client) + + result = logger._load_activity_log() + + assert result == {"activities": [], "last_updated": None} + + def test_load_returns_none_on_gcs_error( + self, + logger: BookstackActivityLogger, + mock_gcs_client: MagicMock, + ) -> None: + """Return None when GCS raises (caller must abort write).""" + mock_gcs_client.bucket.side_effect = Exception("permission denied") + _attach_mock_client(logger, mock_gcs_client) + + result = logger._load_activity_log() + + assert result is None + + def test_load_returns_none_on_invalid_json( + self, + logger: BookstackActivityLogger, + mock_gcs_client: MagicMock, + ) -> None: + """Return None when the blob contains malformed JSON.""" + blob = MagicMock() + blob.exists.return_value = True + blob.download_as_text.return_value = "not valid json {" + mock_gcs_client.bucket.return_value.blob.return_value = blob + _attach_mock_client(logger, mock_gcs_client) + + result = logger._load_activity_log() + + assert result is None + + +# --------------------------------------------------------------------------- +# _save_activity_log +# --------------------------------------------------------------------------- + + +class TestSaveActivityLog: + """_save_activity_log GCS write behaviour.""" + + def test_save_success( + self, + logger: BookstackActivityLogger, + mock_gcs_client: MagicMock, + sample_log: dict[str, Any], + ) -> None: + """Return True and upload JSON with correct content-type on success.""" + blob = MagicMock() + mock_gcs_client.bucket.return_value.blob.return_value = blob + _attach_mock_client(logger, mock_gcs_client) + + result = logger._save_activity_log(sample_log) + + assert result is True + blob.upload_from_string.assert_called_once() + call_kwargs = blob.upload_from_string.call_args + assert call_kwargs.kwargs["content_type"] == "application/json" + saved = json.loads(call_kwargs.args[0]) + assert saved == sample_log + + def test_save_returns_false_on_gcs_error( + self, + logger: BookstackActivityLogger, + mock_gcs_client: MagicMock, + sample_log: dict[str, Any], + ) -> None: + """Return False when the GCS upload raises.""" + blob = MagicMock() + blob.upload_from_string.side_effect = Exception("upload failed") + mock_gcs_client.bucket.return_value.blob.return_value = blob + _attach_mock_client(logger, mock_gcs_client) + + result = logger._save_activity_log(sample_log) + + assert result is False + + +# --------------------------------------------------------------------------- +# _save_trace +# --------------------------------------------------------------------------- + + +class TestSaveTrace: + """_save_trace uploads individual trace files.""" + + def test_save_trace_success( + self, + logger: BookstackActivityLogger, + mock_gcs_client: MagicMock, + ) -> None: + """Return True when trace upload succeeds.""" + blob = MagicMock() + mock_gcs_client.bucket.return_value.blob.return_value = blob + _attach_mock_client(logger, mock_gcs_client) + + trace = {"session_id": "abc", "question": "Q", "tool_calls": [], "answer": "A"} + result = logger._save_trace( + trace, "data/bookstack/traces/2026-03-14/abc-120000.json" + ) + + assert result is True + blob.upload_from_string.assert_called_once() + + def test_save_trace_returns_false_on_error( + self, + logger: BookstackActivityLogger, + mock_gcs_client: MagicMock, + ) -> None: + """Return False when the GCS upload raises.""" + blob = MagicMock() + blob.upload_from_string.side_effect = Exception("network error") + mock_gcs_client.bucket.return_value.blob.return_value = blob + _attach_mock_client(logger, mock_gcs_client) + + result = logger._save_trace( + {}, "data/bookstack/traces/2026-03-14/abc-120000.json" + ) + + assert result is False + + +# --------------------------------------------------------------------------- +# log_query +# --------------------------------------------------------------------------- + + +class TestLogQuery: + """log_query end-to-end behaviour.""" + + def test_log_query_success( + self, + logger: BookstackActivityLogger, + mock_gcs_client: MagicMock, + ) -> None: + """Return True and perform two GCS uploads (trace + activity log).""" + blob = MagicMock() + blob.exists.return_value = False # new log + mock_gcs_client.bucket.return_value.blob.return_value = blob + _attach_mock_client(logger, mock_gcs_client) + + result = logger.log_query( + session_id="sess1234", + question="What is the offboarding process?", + tool_calls=[ + {"tool": "search_bookstack", "input": {"query": "offboarding"}}, + {"tool": "get_page", "input": {"page_id": 10}}, + ], + answer="The offboarding process involves...", + duration_seconds=5.2, + status="success", + ) + + assert result is True + # upload_from_string called twice: once for trace, once for activity log + assert blob.upload_from_string.call_count == 2 + + def test_log_query_records_correct_activity_fields( + self, + logger: BookstackActivityLogger, + mock_gcs_client: MagicMock, + ) -> None: + """Verify all activity fields are populated with correct values.""" + blob = MagicMock() + blob.exists.return_value = False + mock_gcs_client.bucket.return_value.blob.return_value = blob + _attach_mock_client(logger, mock_gcs_client) + + with patch.object( + logger, "_save_activity_log", wraps=logger._save_activity_log + ) as spy: + logger.log_query( + session_id="sess5678", + question="How do I request compute?", + tool_calls=[ + {"tool": "search_bookstack", "input": {"query": "compute"}}, + {"tool": "search_bookstack", "input": {"query": "GPU request"}}, + {"tool": "get_page", "input": {"page_id": 7}}, + ], + answer="Submit a compute request via...", + duration_seconds=9.1, + status="success", + ) + + saved = spy.call_args[0][0] + activity = saved["activities"][0] + + assert activity["session_id"] == "sess5678" + assert activity["question"] == "How do I request compute?" + assert activity["num_tool_calls"] == 3 + assert activity["tool_call_counts"] == {"search_bookstack": 2, "get_page": 1} + assert set(activity["tools_used"]) == {"search_bookstack", "get_page"} + assert activity["status"] == "success" + assert activity["duration_seconds"] == 9.1 + assert activity["answer_length"] == len("Submit a compute request via...") + + def test_log_query_truncates_question_in_activity( + self, + logger: BookstackActivityLogger, + mock_gcs_client: MagicMock, + ) -> None: + """Truncate questions longer than 300 chars in the activity log.""" + blob = MagicMock() + blob.exists.return_value = False + mock_gcs_client.bucket.return_value.blob.return_value = blob + _attach_mock_client(logger, mock_gcs_client) + + long_question = "A" * 500 + + with patch.object( + logger, "_save_activity_log", wraps=logger._save_activity_log + ) as spy: + logger.log_query( + session_id="sess9999", + question=long_question, + tool_calls=[], + answer="Answer", + duration_seconds=1.0, + status="success", + ) + saved = spy.call_args[0][0] + + assert len(saved["activities"][0]["question"]) == 300 + + def test_log_query_appends_to_existing_log( + self, + logger: BookstackActivityLogger, + mock_gcs_client: MagicMock, + sample_log: dict[str, Any], + ) -> None: + """Append new entry to an existing log without overwriting old entries.""" + blob = MagicMock() + blob.exists.return_value = True + blob.download_as_text.return_value = json.dumps(sample_log) + mock_gcs_client.bucket.return_value.blob.return_value = blob + _attach_mock_client(logger, mock_gcs_client) + + with patch.object( + logger, "_save_activity_log", wraps=logger._save_activity_log + ) as spy: + logger.log_query( + session_id="newentry", + question="New question", + tool_calls=[{"tool": "list_books", "input": {}}], + answer="New answer", + duration_seconds=2.0, + status="success", + ) + saved = spy.call_args[0][0] + + assert len(saved["activities"]) == 2 + assert saved["activities"][0]["session_id"] == "existing01" + assert saved["activities"][1]["session_id"] == "newentry" + + def test_log_query_returns_false_when_load_fails( + self, + logger: BookstackActivityLogger, + ) -> None: + """Return False and abort write when the GCS load returns None.""" + with patch.object(logger, "_load_activity_log", return_value=None): + result = logger.log_query( + session_id="sess0000", + question="Q", + tool_calls=[], + answer="A", + duration_seconds=1.0, + status="success", + ) + + assert result is False + + def test_log_query_error_status( + self, + logger: BookstackActivityLogger, + mock_gcs_client: MagicMock, + ) -> None: + """Record error status correctly in the activity log.""" + blob = MagicMock() + blob.exists.return_value = False + mock_gcs_client.bucket.return_value.blob.return_value = blob + _attach_mock_client(logger, mock_gcs_client) + + with patch.object( + logger, "_save_activity_log", wraps=logger._save_activity_log + ) as spy: + logger.log_query( + session_id="errorsess", + question="A failing query", + tool_calls=[], + answer="", + duration_seconds=0.5, + status="error", + ) + saved = spy.call_args[0][0] + + assert saved["activities"][0]["status"] == "error" + + def test_log_query_deduplicates_tools_used( + self, + logger: BookstackActivityLogger, + mock_gcs_client: MagicMock, + ) -> None: + """List each tool once in tools_used while counting all calls in tool_call_counts.""" + blob = MagicMock() + blob.exists.return_value = False + mock_gcs_client.bucket.return_value.blob.return_value = blob + _attach_mock_client(logger, mock_gcs_client) + + with patch.object( + logger, "_save_activity_log", wraps=logger._save_activity_log + ) as spy: + logger.log_query( + session_id="dedupsess", + question="Q", + tool_calls=[ + {"tool": "search_bookstack", "input": {"query": "a"}}, + {"tool": "search_bookstack", "input": {"query": "b"}}, + {"tool": "search_bookstack", "input": {"query": "c"}}, + ], + answer="A", + duration_seconds=3.0, + status="success", + ) + saved = spy.call_args[0][0] + + # tools_used should list each tool only once + assert saved["activities"][0]["tools_used"] == ["search_bookstack"] + assert saved["activities"][0]["num_tool_calls"] == 3 diff --git a/uv.lock b/uv.lock index e8f335d..b7183fd 100644 --- a/uv.lock +++ b/uv.lock @@ -1,6 +1,10 @@ version = 1 revision = 1 requires-python = ">=3.12" +resolution-markers = [ + "python_full_version >= '3.13'", + "python_full_version < '3.13'", +] [[package]] name = "aieng-bot" @@ -20,6 +24,7 @@ dependencies = [ [package.dev-dependencies] bookstack-api = [ { name = "fastapi" }, + { name = "google-cloud-storage" }, { name = "httptools" }, { name = "uvicorn", extra = ["standard"] }, { name = "uvloop" }, @@ -64,6 +69,7 @@ requires-dist = [ [package.metadata.requires-dev] bookstack-api = [ { name = "fastapi", specifier = ">=0.115.0" }, + { name = "google-cloud-storage", specifier = ">=2.0.0" }, { name = "httptools", specifier = ">=0.6.0" }, { name = "uvicorn", extras = ["standard"], specifier = ">=0.32.0" }, { name = "uvloop", specifier = ">=0.21.0" }, @@ -688,6 +694,112 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f7/ec/67fbef5d497f86283db54c22eec6f6140243aae73265799baaaa19cd17fb/ghp_import-2.1.0-py3-none-any.whl", hash = "sha256:8337dd7b50877f163d4c0289bc1f1c7f127550241988d568c1db512c4324a619", size = 11034 }, ] +[[package]] +name = "google-api-core" +version = "2.30.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-auth" }, + { name = "googleapis-common-protos" }, + { name = "proto-plus" }, + { name = "protobuf" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/22/98/586ec94553b569080caef635f98a3723db36a38eac0e3d7eb3ea9d2e4b9a/google_api_core-2.30.0.tar.gz", hash = "sha256:02edfa9fab31e17fc0befb5f161b3bf93c9096d99aed584625f38065c511ad9b", size = 176959 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/45/27/09c33d67f7e0dcf06d7ac17d196594e66989299374bfb0d4331d1038e76b/google_api_core-2.30.0-py3-none-any.whl", hash = "sha256:80be49ee937ff9aba0fd79a6eddfde35fe658b9953ab9b79c57dd7061afa8df5", size = 173288 }, +] + +[[package]] +name = "google-auth" +version = "2.49.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, + { name = "pyasn1-modules" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ea/80/6a696a07d3d3b0a92488933532f03dbefa4a24ab80fb231395b9a2a1be77/google_auth-2.49.1.tar.gz", hash = "sha256:16d40da1c3c5a0533f57d268fe72e0ebb0ae1cc3b567024122651c045d879b64", size = 333825 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/eb/c6c2478d8a8d633460be40e2a8a6f8f429171997a35a96f81d3b680dec83/google_auth-2.49.1-py3-none-any.whl", hash = "sha256:195ebe3dca18eddd1b3db5edc5189b76c13e96f29e73043b923ebcf3f1a860f7", size = 240737 }, +] + +[[package]] +name = "google-cloud-core" +version = "2.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-api-core" }, + { name = "google-auth" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a6/03/ef0bc99d0e0faf4fdbe67ac445e18cdaa74824fd93cd069e7bb6548cb52d/google_cloud_core-2.5.0.tar.gz", hash = "sha256:7c1b7ef5c92311717bd05301aa1a91ffbc565673d3b0b4163a52d8413a186963", size = 36027 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/89/20/bfa472e327c8edee00f04beecc80baeddd2ab33ee0e86fd7654da49d45e9/google_cloud_core-2.5.0-py3-none-any.whl", hash = "sha256:67d977b41ae6c7211ee830c7912e41003ea8194bff15ae7d72fd6f51e57acabc", size = 29469 }, +] + +[[package]] +name = "google-cloud-storage" +version = "3.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-api-core" }, + { name = "google-auth" }, + { name = "google-cloud-core" }, + { name = "google-crc32c" }, + { name = "google-resumable-media" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f7/b1/4f0798e88285b50dfc60ed3a7de071def538b358db2da468c2e0deecbb40/google_cloud_storage-3.9.0.tar.gz", hash = "sha256:f2d8ca7db2f652be757e92573b2196e10fbc09649b5c016f8b422ad593c641cc", size = 17298544 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/46/0b/816a6ae3c9fd096937d2e5f9670558908811d57d59ddf69dd4b83b326fd1/google_cloud_storage-3.9.0-py3-none-any.whl", hash = "sha256:2dce75a9e8b3387078cbbdad44757d410ecdb916101f8ba308abf202b6968066", size = 321324 }, +] + +[[package]] +name = "google-crc32c" +version = "1.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/03/41/4b9c02f99e4c5fb477122cd5437403b552873f014616ac1d19ac8221a58d/google_crc32c-1.8.0.tar.gz", hash = "sha256:a428e25fb7691024de47fecfbff7ff957214da51eddded0da0ae0e0f03a2cf79", size = 14192 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/5f/7307325b1198b59324c0fa9807cafb551afb65e831699f2ce211ad5c8240/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:4b8286b659c1335172e39563ab0a768b8015e88e08329fa5321f774275fc3113", size = 31300 }, + { url = "https://files.pythonhosted.org/packages/21/8e/58c0d5d86e2220e6a37befe7e6a94dd2f6006044b1a33edf1ff6d9f7e319/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:2a3dc3318507de089c5384cc74d54318401410f82aa65b2d9cdde9d297aca7cb", size = 30867 }, + { url = "https://files.pythonhosted.org/packages/ce/a9/a780cc66f86335a6019f557a8aaca8fbb970728f0efd2430d15ff1beae0e/google_crc32c-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:14f87e04d613dfa218d6135e81b78272c3b904e2a7053b841481b38a7d901411", size = 33364 }, + { url = "https://files.pythonhosted.org/packages/21/3f/3457ea803db0198c9aaca2dd373750972ce28a26f00544b6b85088811939/google_crc32c-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cb5c869c2923d56cb0c8e6bcdd73c009c36ae39b652dbe46a05eb4ef0ad01454", size = 33740 }, + { url = "https://files.pythonhosted.org/packages/df/c0/87c2073e0c72515bb8733d4eef7b21548e8d189f094b5dad20b0ecaf64f6/google_crc32c-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:3cc0c8912038065eafa603b238abf252e204accab2a704c63b9e14837a854962", size = 34437 }, + { url = "https://files.pythonhosted.org/packages/d1/db/000f15b41724589b0e7bc24bc7a8967898d8d3bc8caf64c513d91ef1f6c0/google_crc32c-1.8.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:3ebb04528e83b2634857f43f9bb8ef5b2bbe7f10f140daeb01b58f972d04736b", size = 31297 }, + { url = "https://files.pythonhosted.org/packages/d7/0d/8ebed0c39c53a7e838e2a486da8abb0e52de135f1b376ae2f0b160eb4c1a/google_crc32c-1.8.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:450dc98429d3e33ed2926fc99ee81001928d63460f8538f21a5d6060912a8e27", size = 30867 }, + { url = "https://files.pythonhosted.org/packages/ce/42/b468aec74a0354b34c8cbf748db20d6e350a68a2b0912e128cabee49806c/google_crc32c-1.8.0-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3b9776774b24ba76831609ffbabce8cdf6fa2bd5e9df37b594221c7e333a81fa", size = 33344 }, + { url = "https://files.pythonhosted.org/packages/1c/e8/b33784d6fc77fb5062a8a7854e43e1e618b87d5ddf610a88025e4de6226e/google_crc32c-1.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:89c17d53d75562edfff86679244830599ee0a48efc216200691de8b02ab6b2b8", size = 33694 }, + { url = "https://files.pythonhosted.org/packages/92/b1/d3cbd4d988afb3d8e4db94ca953df429ed6db7282ed0e700d25e6c7bfc8d/google_crc32c-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:57a50a9035b75643996fbf224d6661e386c7162d1dfdab9bc4ca790947d1007f", size = 34435 }, + { url = "https://files.pythonhosted.org/packages/21/88/8ecf3c2b864a490b9e7010c84fd203ec8cf3b280651106a3a74dd1b0ca72/google_crc32c-1.8.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:e6584b12cb06796d285d09e33f63309a09368b9d806a551d8036a4207ea43697", size = 31301 }, + { url = "https://files.pythonhosted.org/packages/36/c6/f7ff6c11f5ca215d9f43d3629163727a272eabc356e5c9b2853df2bfe965/google_crc32c-1.8.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:f4b51844ef67d6cf2e9425983274da75f18b1597bb2c998e1c0a0e8d46f8f651", size = 30868 }, + { url = "https://files.pythonhosted.org/packages/56/15/c25671c7aad70f8179d858c55a6ae8404902abe0cdcf32a29d581792b491/google_crc32c-1.8.0-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b0d1a7afc6e8e4635564ba8aa5c0548e3173e41b6384d7711a9123165f582de2", size = 33381 }, + { url = "https://files.pythonhosted.org/packages/42/fa/f50f51260d7b0ef5d4898af122d8a7ec5a84e2984f676f746445f783705f/google_crc32c-1.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8b3f68782f3cbd1bce027e48768293072813469af6a61a86f6bb4977a4380f21", size = 33734 }, + { url = "https://files.pythonhosted.org/packages/08/a5/7b059810934a09fb3ccb657e0843813c1fee1183d3bc2c8041800374aa2c/google_crc32c-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:d511b3153e7011a27ab6ee6bb3a5404a55b994dc1a7322c0b87b29606d9790e2", size = 34878 }, +] + +[[package]] +name = "google-resumable-media" +version = "2.8.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-crc32c" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/64/d7/520b62a35b23038ff005e334dba3ffc75fcf583bee26723f1fd8fd4b6919/google_resumable_media-2.8.0.tar.gz", hash = "sha256:f1157ed8b46994d60a1bc432544db62352043113684d4e030ee02e77ebe9a1ae", size = 2163265 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1f/0b/93afde9cfe012260e9fe1522f35c9b72d6ee222f316586b1f23ecf44d518/google_resumable_media-2.8.0-py3-none-any.whl", hash = "sha256:dd14a116af303845a8d932ddae161a26e86cc229645bc98b39f026f9b1717582", size = 81340 }, +] + +[[package]] +name = "googleapis-common-protos" +version = "1.73.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/99/96/a0205167fa0154f4a542fd6925bdc63d039d88dab3588b875078107e6f06/googleapis_common_protos-1.73.0.tar.gz", hash = "sha256:778d07cd4fbeff84c6f7c72102f0daf98fa2bfd3fa8bea426edc545588da0b5a", size = 147323 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/28/23eea8acd65972bbfe295ce3666b28ac510dfcb115fac089d3edb0feb00a/googleapis_common_protos-1.73.0-py3-none-any.whl", hash = "sha256:dfdaaa2e860f242046be561e6d6cb5c5f1541ae02cfbcb034371aadb2942b4e8", size = 297578 }, +] + [[package]] name = "griffe" version = "2.0.0" @@ -1629,6 +1741,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/84/03/0d3ce49e2505ae70cf43bc5bb3033955d2fc9f932163e84dc0779cc47f48/prompt_toolkit-3.0.52-py3-none-any.whl", hash = "sha256:9aac639a3bbd33284347de5ad8d68ecc044b91a762dc39b7c21095fcd6a19955", size = 391431 }, ] +[[package]] +name = "proto-plus" +version = "1.27.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3a/02/8832cde80e7380c600fbf55090b6ab7b62bd6825dbedde6d6657c15a1f8e/proto_plus-1.27.1.tar.gz", hash = "sha256:912a7460446625b792f6448bade9e55cd4e41e6ac10e27009ef71a7f317fa147", size = 56929 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/79/ac273cbbf744691821a9cca88957257f41afe271637794975ca090b9588b/proto_plus-1.27.1-py3-none-any.whl", hash = "sha256:e4643061f3a4d0de092d62aa4ad09fa4756b2cbb89d4627f3985018216f9fefc", size = 50480 }, +] + +[[package]] +name = "protobuf" +version = "6.33.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ba/25/7c72c307aafc96fa87062aa6291d9f7c94836e43214d43722e86037aac02/protobuf-6.33.5.tar.gz", hash = "sha256:6ddcac2a081f8b7b9642c09406bc6a4290128fce5f471cddd165960bb9119e5c", size = 444465 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b1/79/af92d0a8369732b027e6d6084251dd8e782c685c72da161bd4a2e00fbabb/protobuf-6.33.5-cp310-abi3-win32.whl", hash = "sha256:d71b040839446bac0f4d162e758bea99c8251161dae9d0983a3b88dee345153b", size = 425769 }, + { url = "https://files.pythonhosted.org/packages/55/75/bb9bc917d10e9ee13dee8607eb9ab963b7cf8be607c46e7862c748aa2af7/protobuf-6.33.5-cp310-abi3-win_amd64.whl", hash = "sha256:3093804752167bcab3998bec9f1048baae6e29505adaf1afd14a37bddede533c", size = 437118 }, + { url = "https://files.pythonhosted.org/packages/a2/6b/e48dfc1191bc5b52950246275bf4089773e91cb5ba3592621723cdddca62/protobuf-6.33.5-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:a5cb85982d95d906df1e2210e58f8e4f1e3cdc088e52c921a041f9c9a0386de5", size = 427766 }, + { url = "https://files.pythonhosted.org/packages/4e/b1/c79468184310de09d75095ed1314b839eb2f72df71097db9d1404a1b2717/protobuf-6.33.5-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:9b71e0281f36f179d00cbcb119cb19dec4d14a81393e5ea220f64b286173e190", size = 324638 }, + { url = "https://files.pythonhosted.org/packages/c5/f5/65d838092fd01c44d16037953fd4c2cc851e783de9b8f02b27ec4ffd906f/protobuf-6.33.5-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8afa18e1d6d20af15b417e728e9f60f3aa108ee76f23c3b2c07a2c3b546d3afd", size = 339411 }, + { url = "https://files.pythonhosted.org/packages/9b/53/a9443aa3ca9ba8724fdfa02dd1887c1bcd8e89556b715cfbacca6b63dbec/protobuf-6.33.5-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:cbf16ba3350fb7b889fca858fb215967792dc125b35c7976ca4818bee3521cf0", size = 323465 }, + { url = "https://files.pythonhosted.org/packages/57/bf/2086963c69bdac3d7cff1cc7ff79b8ce5ea0bec6797a017e1be338a46248/protobuf-6.33.5-py3-none-any.whl", hash = "sha256:69915a973dd0f60f31a08b8318b73eab2bd6a392c79184b3612226b0a3f8ec02", size = 170687 }, +] + [[package]] name = "psutil" version = "7.2.2" @@ -1687,6 +1826,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9b/bf/7595e817906a29453ba4d99394e781b6fabe55d21f3c15d240f85dd06bb1/py_serializable-2.1.0-py3-none-any.whl", hash = "sha256:b56d5d686b5a03ba4f4db5e769dc32336e142fc3bd4d68a8c25579ebb0a67304", size = 23045 }, ] +[[package]] +name = "pyasn1" +version = "0.6.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/b6/6e630dff89739fcd427e3f72b3d905ce0acb85a45d4ec3e2678718a3487f/pyasn1-0.6.2.tar.gz", hash = "sha256:9b59a2b25ba7e4f8197db7686c09fb33e658b98339fadb826e9512629017833b", size = 146586 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/44/b5/a96872e5184f354da9c84ae119971a0a4c221fe9b27a4d94bd43f2596727/pyasn1-0.6.2-py3-none-any.whl", hash = "sha256:1eb26d860996a18e9b6ed05e7aae0e9fc21619fcee6af91cca9bad4fbea224bf", size = 83371 }, +] + +[[package]] +name = "pyasn1-modules" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyasn1" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259 }, +] + [[package]] name = "pycodestyle" version = "2.14.0" From 62822642b583e75b6af1893c45bc39e9a6e47d23 Mon Sep 17 00:00:00 2001 From: Amrit Krishnan Date: Sat, 14 Mar 2026 20:56:02 -0400 Subject: [PATCH 2/3] Fix url log path, for safety --- .../ui/app/api/bookstack-trace/route.ts | 52 +++++++++++++++++-- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/bookstack_agent/ui/app/api/bookstack-trace/route.ts b/bookstack_agent/ui/app/api/bookstack-trace/route.ts index 68a38f8..71ee9b8 100644 --- a/bookstack_agent/ui/app/api/bookstack-trace/route.ts +++ b/bookstack_agent/ui/app/api/bookstack-trace/route.ts @@ -3,6 +3,48 @@ import { isAuthenticated } from '@/lib/session' const GCS_BUCKET_URL = 'https://storage.googleapis.com/bot-dashboard-vectorinstitute' +/** + * Sanitize a user-supplied trace path. + * + * Accepts only relative paths (no leading slash, no absolute URLs), rejects + * traversal segments (. and ..), and enforces the required prefix so only + * objects under data/bookstack/traces/ can ever be fetched. + * + * Returns the normalized path, or null if the input is invalid. + */ +function sanitizeTracePath(rawPath: string | null): string | null { + if (!rawPath) return null + + const trimmed = rawPath.trim() + const lower = trimmed.toLowerCase() + + // Reject absolute URLs or absolute-style paths + if ( + trimmed.startsWith('/') || + trimmed.startsWith('\\') || + lower.startsWith('http://') || + lower.startsWith('https://') + ) { + return null + } + + // Split, drop empty segments, reject traversal and backslash-containing segments + const segments = trimmed.split('/').filter((s) => s.length > 0) + for (const seg of segments) { + if (seg === '.' || seg === '..' || seg.includes('\\')) { + return null + } + } + + const normalized = segments.join('/') + + if (!normalized.startsWith('data/bookstack/traces/')) { + return null + } + + return normalized +} + /** * Proxy authenticated requests for per-query trace files from GCS. * @@ -14,18 +56,18 @@ export async function GET(req: NextRequest): Promise { return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }) } - const tracePath = req.nextUrl.searchParams.get('path') - if (!tracePath) { + const rawPath = req.nextUrl.searchParams.get('path') + if (!rawPath) { return NextResponse.json({ error: 'Missing path parameter' }, { status: 400 }) } - // Restrict to expected prefix to prevent arbitrary GCS reads - if (!tracePath.startsWith('data/bookstack/traces/')) { + const safePath = sanitizeTracePath(rawPath) + if (!safePath) { return NextResponse.json({ error: 'Invalid trace path' }, { status: 400 }) } try { - const res = await fetch(`${GCS_BUCKET_URL}/${tracePath}`, { cache: 'no-store' }) + const res = await fetch(`${GCS_BUCKET_URL}/${safePath}`, { cache: 'no-store' }) if (!res.ok) { return NextResponse.json({ error: 'Trace not found' }, { status: res.status }) } From 210b1869fac7f7b80f6513219cfbe22cc8b4e65e Mon Sep 17 00:00:00 2001 From: Amrit Krishnan Date: Sat, 14 Mar 2026 21:02:19 -0400 Subject: [PATCH 3/3] Fix CodeQL SSRF: rebuild trace URL from strict regex groups Replace the custom sanitizeTracePath helper with a strict regex that captures only the date and filename segments, then reconstruct the GCS URL from those encodeURIComponent-encoded groups. This ensures no raw user input is ever interpolated into the fetch URL, satisfying CodeQL's taint-flow analysis. Co-Authored-By: Claude Sonnet 4.6 --- .../ui/app/api/bookstack-trace/route.ts | 62 ++++++------------- 1 file changed, 19 insertions(+), 43 deletions(-) diff --git a/bookstack_agent/ui/app/api/bookstack-trace/route.ts b/bookstack_agent/ui/app/api/bookstack-trace/route.ts index 71ee9b8..3345bf1 100644 --- a/bookstack_agent/ui/app/api/bookstack-trace/route.ts +++ b/bookstack_agent/ui/app/api/bookstack-trace/route.ts @@ -1,54 +1,24 @@ import { NextRequest, NextResponse } from 'next/server' import { isAuthenticated } from '@/lib/session' -const GCS_BUCKET_URL = 'https://storage.googleapis.com/bot-dashboard-vectorinstitute' - /** - * Sanitize a user-supplied trace path. - * - * Accepts only relative paths (no leading slash, no absolute URLs), rejects - * traversal segments (. and ..), and enforces the required prefix so only - * objects under data/bookstack/traces/ can ever be fetched. + * Expected trace path format: + * data/bookstack/traces/YYYY-MM-DD/XXXXXXXX-HHMMSS.json * - * Returns the normalized path, or null if the input is invalid. + * Groups: (1) date segment, (2) filename segment. + * Matching strictly here means we never interpolate raw user input into the URL — + * only regex-captured, URL-encoded segments are used. */ -function sanitizeTracePath(rawPath: string | null): string | null { - if (!rawPath) return null +const TRACE_PATH_RE = + /^data\/bookstack\/traces\/(\d{4}-\d{2}-\d{2})\/([a-zA-Z0-9]{1,8}-\d{6}\.json)$/ - const trimmed = rawPath.trim() - const lower = trimmed.toLowerCase() - - // Reject absolute URLs or absolute-style paths - if ( - trimmed.startsWith('/') || - trimmed.startsWith('\\') || - lower.startsWith('http://') || - lower.startsWith('https://') - ) { - return null - } - - // Split, drop empty segments, reject traversal and backslash-containing segments - const segments = trimmed.split('/').filter((s) => s.length > 0) - for (const seg of segments) { - if (seg === '.' || seg === '..' || seg.includes('\\')) { - return null - } - } - - const normalized = segments.join('/') - - if (!normalized.startsWith('data/bookstack/traces/')) { - return null - } - - return normalized -} +const GCS_BASE = + 'https://storage.googleapis.com/bot-dashboard-vectorinstitute/data/bookstack/traces' /** * Proxy authenticated requests for per-query trace files from GCS. * - * GET /api/bookstack-trace?path=data/bookstack/traces/... + * GET /api/bookstack-trace?path=data/bookstack/traces/YYYY-MM-DD/SESSION-HHMMSS.json */ export async function GET(req: NextRequest): Promise { const authenticated = await isAuthenticated() @@ -61,13 +31,19 @@ export async function GET(req: NextRequest): Promise { return NextResponse.json({ error: 'Missing path parameter' }, { status: 400 }) } - const safePath = sanitizeTracePath(rawPath) - if (!safePath) { + // Parse the path with a strict regex — only the captured groups are used below. + // encodeURIComponent on each segment prevents any residual injection. + const match = rawPath.trim().match(TRACE_PATH_RE) + if (!match) { return NextResponse.json({ error: 'Invalid trace path' }, { status: 400 }) } + const date = encodeURIComponent(match[1]) + const filename = encodeURIComponent(match[2]) + const url = `${GCS_BASE}/${date}/${filename}` + try { - const res = await fetch(`${GCS_BUCKET_URL}/${safePath}`, { cache: 'no-store' }) + const res = await fetch(url, { cache: 'no-store' }) if (!res.ok) { return NextResponse.json({ error: 'Trace not found' }, { status: res.status }) }