diff --git a/docs/screenshots/playground-idle.png b/docs/screenshots/playground-idle.png new file mode 100644 index 0000000..bc45017 Binary files /dev/null and b/docs/screenshots/playground-idle.png differ diff --git a/docs/screenshots/playground-results.png b/docs/screenshots/playground-results.png new file mode 100644 index 0000000..997f7b5 Binary files /dev/null and b/docs/screenshots/playground-results.png differ diff --git a/docs/screenshots/playground-running.png b/docs/screenshots/playground-running.png new file mode 100644 index 0000000..c8878b3 Binary files /dev/null and b/docs/screenshots/playground-running.png differ diff --git a/packages/web/e2e/playground.screenshots.spec.ts b/packages/web/e2e/playground.screenshots.spec.ts new file mode 100644 index 0000000..5226151 --- /dev/null +++ b/packages/web/e2e/playground.screenshots.spec.ts @@ -0,0 +1,131 @@ +/** + * One-off screenshot script for the dialectic playground. + * Run with: pnpm exec playwright test packages/web/e2e/playground.screenshots.ts + * Outputs are written to docs/screenshots/. + */ + +import { mkdirSync } from "node:fs"; +import { dirname, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; +import { test } from "@playwright/test"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); +const OUT_DIR = resolve(__dirname, "../../../docs/screenshots"); + +const STORE_KEY = "openconcho:instances"; +const STORE_VALUE = JSON.stringify({ + instances: [ + { + id: "demo-inst", + name: "Demo Honcho", + baseUrl: "http://localhost:8001", + token: "", + }, + ], + activeId: "demo-inst", +}); + +const WORKSPACE = "demo-workspace"; +const PEER = "alice@example.com"; + +// Per-level mocked latency (ms) and answer. +const FIXTURES: Record = { + minimal: { + delayMs: 140, + content: + "Quick gist: Alice prefers async standups, dislikes meetings on Mondays, and tracks priorities in Linear.", + }, + low: { + delayMs: 410, + content: + "Alice runs the platform team. She prefers async standups, batches code review in the afternoons, and pushes back on meetings before 10am. Linear is her source of truth for priorities.", + }, + medium: { + delayMs: 1180, + content: + "Alice leads the platform team and operates on async-by-default. Three recurring patterns:\n\n• Async over sync — she explicitly skips standups in favor of written status posts on Wednesdays.\n• Deep-work mornings — meetings before 10am are pushed back; she protects 9–11am for coding.\n• Single-source-of-truth in Linear — anything not tracked there is treated as not happening.", + }, + high: { + delayMs: 2410, + content: + "Alice's working model has stayed remarkably stable over the last three months. She leads platform, treats async writing as the default communication mode, and resists synchronous coordination unless a decision is actively blocked. Three concrete patterns recur:\n\n1. Async-first standups — Wednesday written status, no daily sync.\n2. Morning deep work — calendar protected 9–11am, meetings pushed past 10.\n3. Linear as system-of-record — verbal commitments she hasn't written into Linear are treated as not real.\n\nShe also pushes back hard on cross-team meetings without a clear decision owner.", + }, + max: { + delayMs: 3920, + content: + "Across her recent sessions Alice consistently surfaces three reinforcing patterns and one tension worth flagging.\n\nPatterns:\n1. Async-first communication — explicit preference for written status (Wednesday Linear updates) over standups; she's said \"if it's not in Linear it isn't real\" in three separate threads.\n2. Protected morning deep-work — calendar is blocked 9–11am every weekday; she'll move meetings rather than break the block.\n3. Decision-owner gating — she refuses cross-team meetings without a named decision owner; this has come up six times since March.\n\nTension to flag: Alice's async-default occasionally collides with newer hires who prefer synchronous onboarding. She's aware of this — last month she experimented with a weekly 30-min office hour — but the data is too thin to call it resolved.", + }, +}; + +// Default baseURL comes from playwright.config.ts (localhost:5173); override +// with PLAYWRIGHT_BASE_URL=http://localhost:5184 if regenerating screenshots +// against a worktree dev server on a different port. +const BASE_URL = process.env.PLAYWRIGHT_BASE_URL; + +test.use({ + viewport: { width: 1600, height: 1000 }, + ...(BASE_URL ? { baseURL: BASE_URL } : {}), +}); + +test("playground screenshots", async ({ page }) => { + mkdirSync(OUT_DIR, { recursive: true }); + + await page.addInitScript( + ([key, value]) => { + window.localStorage.setItem(key, value); + }, + [STORE_KEY, STORE_VALUE], + ); + + // Mock the Honcho health probe so the SPA doesn't show a disconnected banner. + await page.route("**/v3/health*", (route) => + route.fulfill({ + status: 200, + contentType: "application/json", + body: JSON.stringify({ status: "ok" }), + }), + ); + + // Mock the chat POST with per-level fixtures. + await page.route("**/v3/workspaces/*/peers/*/chat", async (route) => { + const body = JSON.parse(route.request().postData() ?? "{}") as { + reasoning_level?: keyof typeof FIXTURES; + }; + const level = body.reasoning_level ?? "low"; + const fx = FIXTURES[level]; + await new Promise((r) => setTimeout(r, fx.delayMs)); + await route.fulfill({ + status: 200, + contentType: "application/json", + body: JSON.stringify({ content: fx.content }), + }); + }); + + // 1. Idle: empty playground. + await page.goto(`/workspaces/${WORKSPACE}/peers/${encodeURIComponent(PEER)}/playground`); + await page.waitForSelector('[data-testid="column-minimal"]'); + await page.screenshot({ + path: `${OUT_DIR}/playground-idle.png`, + fullPage: false, + }); + + // 2. Mid-flight: type a query, fire, capture while columns are still pending. + await page.getByLabel("Query").fill("What patterns does Alice show across her recent sessions?"); + await page.getByLabel("Run selected levels").click(); + await page.waitForSelector('[data-testid="column-minimal"][data-status="success"]'); + // minimal returns at ~140ms; capture now so medium/high/max are still pending. + await page.screenshot({ + path: `${OUT_DIR}/playground-running.png`, + fullPage: false, + }); + + // 3. Settled: wait for max to finish. + await page.waitForSelector('[data-testid="column-max"][data-status="success"]', { + timeout: 10_000, + }); + await page.screenshot({ + path: `${OUT_DIR}/playground-results.png`, + fullPage: false, + }); +}); diff --git a/packages/web/src/api/queries.ts b/packages/web/src/api/queries.ts index d46185b..060094f 100644 --- a/packages/web/src/api/queries.ts +++ b/packages/web/src/api/queries.ts @@ -263,7 +263,21 @@ export function useSearchPeer(workspaceId: string, peerId: string) { }); } -export function useChat(workspaceId: string, peerId: string) { +export type ReasoningLevel = "minimal" | "low" | "medium" | "high" | "max"; + +export const REASONING_LEVELS: readonly ReasoningLevel[] = [ + "minimal", + "low", + "medium", + "high", + "max", +] as const; + +export function useChat( + workspaceId: string, + peerId: string, + reasoningLevel: ReasoningLevel = "low", +) { const qc = useQueryClient(); return useMutation({ mutationFn: async (message: string) => { @@ -271,7 +285,7 @@ export function useChat(workspaceId: string, peerId: string) { "/v3/workspaces/{workspace_id}/peers/{peer_id}/chat", { params: { path: { workspace_id: workspaceId, peer_id: peerId } }, - body: { query: message, stream: false, reasoning_level: "low" }, + body: { query: message, stream: false, reasoning_level: reasoningLevel }, }, ); return data ?? err(error); diff --git a/packages/web/src/components/peers/PeerDetail.tsx b/packages/web/src/components/peers/PeerDetail.tsx index 455e655..8b3044b 100644 --- a/packages/web/src/components/peers/PeerDetail.tsx +++ b/packages/web/src/components/peers/PeerDetail.tsx @@ -1,6 +1,16 @@ import { useNavigate, useParams } from "@tanstack/react-router"; import { AnimatePresence, motion } from "framer-motion"; -import { Eye, EyeOff, MessageCircle, Save, Search, User, Users, X } from "lucide-react"; +import { + Eye, + EyeOff, + FlaskConical, + MessageCircle, + Save, + Search, + User, + Users, + X, +} from "lucide-react"; import { useState } from "react"; import { usePeer, @@ -98,19 +108,35 @@ export function PeerDetail() { Peer identity & memory - +
+ + +
diff --git a/packages/web/src/components/playground/DialecticPlayground.tsx b/packages/web/src/components/playground/DialecticPlayground.tsx new file mode 100644 index 0000000..24b059f --- /dev/null +++ b/packages/web/src/components/playground/DialecticPlayground.tsx @@ -0,0 +1,376 @@ +import { useQueryClient } from "@tanstack/react-query"; +import { Link, useParams } from "@tanstack/react-router"; +import { motion } from "framer-motion"; +import { FlaskConical, Play } from "lucide-react"; +import { useCallback, useMemo, useState } from "react"; +import { client } from "@/api/client"; +import { REASONING_LEVELS, type ReasoningLevel } from "@/api/queries"; +import { LoadingSpinner } from "@/components/shared/LoadingSpinner"; +import { Button } from "@/components/ui/button"; +import { Textarea } from "@/components/ui/input"; +import { SectionHeading } from "@/components/ui/typography"; +import { useDemo } from "@/hooks/useDemo"; + +// ─── Types ──────────────────────────────────────────────────────────────────── + +interface ColumnState { + status: "idle" | "pending" | "success" | "error"; + content: string | null; + error: string | null; + startedAt: number | null; + endedAt: number | null; +} + +type RunResult = { ok: true; content: string | null } | { ok: false; error: string }; + +const IDLE: ColumnState = { + status: "idle", + content: null, + error: null, + startedAt: null, + endedAt: null, +}; + +// ─── Pure helpers (exported for testing) ───────────────────────────────────── + +export function buildInitialColumns(): Record { + return Object.fromEntries(REASONING_LEVELS.map((l) => [l, { ...IDLE }])) as Record< + ReasoningLevel, + ColumnState + >; +} + +export function latencyMs(col: ColumnState): number | null { + if (col.startedAt == null || col.endedAt == null) return null; + return col.endedAt - col.startedAt; +} + +// ─── Run-fanout (exported for testing) ─────────────────────────────────────── + +export interface FanoutDeps { + now: () => number; + runOne: (level: ReasoningLevel, query: string) => Promise; + onStart: (level: ReasoningLevel, startedAt: number) => void; + onEnd: (level: ReasoningLevel, endedAt: number, result: RunResult) => void; +} + +/** + * Fires the same query at every selected level concurrently. Returns when all + * settle. Per-column timing is captured via onStart/onEnd so React state updates + * stay reflective of the network race, not the await order. + */ +export async function fanoutQuery( + levels: readonly ReasoningLevel[], + query: string, + deps: FanoutDeps, +): Promise { + await Promise.all( + levels.map(async (level) => { + const startedAt = deps.now(); + deps.onStart(level, startedAt); + try { + const result = await deps.runOne(level, query); + deps.onEnd(level, deps.now(), result); + } catch (e) { + deps.onEnd(level, deps.now(), { + ok: false, + error: e instanceof Error ? e.message : String(e), + }); + } + }), + ); +} + +// ─── Component ─────────────────────────────────────────────────────────────── + +export function DialecticPlayground() { + const { mask } = useDemo(); + const { workspaceId, peerId } = useParams({ strict: false }) as { + workspaceId: string; + peerId: string; + }; + const qc = useQueryClient(); + + const [query, setQuery] = useState(""); + const [selected, setSelected] = useState>( + () => + Object.fromEntries(REASONING_LEVELS.map((l) => [l, true])) as Record, + ); + const [columns, setColumns] = useState>(buildInitialColumns); + + const anyPending = useMemo( + () => REASONING_LEVELS.some((l) => columns[l].status === "pending"), + [columns], + ); + const selectedLevels = useMemo(() => REASONING_LEVELS.filter((l) => selected[l]), [selected]); + + const runOne = useCallback( + async (level: ReasoningLevel, q: string): Promise => { + const { data, error } = await client.current.POST( + "/v3/workspaces/{workspace_id}/peers/{peer_id}/chat", + { + params: { path: { workspace_id: workspaceId, peer_id: peerId } }, + body: { query: q, stream: false, reasoning_level: level }, + }, + ); + if (error) { + return { + ok: false, + error: typeof error === "object" ? JSON.stringify(error) : String(error), + }; + } + const content = (data as { content?: string | null } | undefined)?.content ?? null; + return { ok: true, content }; + }, + [workspaceId, peerId], + ); + + const handleRun = useCallback(async () => { + const trimmed = query.trim(); + if (!trimmed || anyPending || selectedLevels.length === 0) return; + + setColumns((prev) => { + const next = { ...prev }; + for (const l of selectedLevels) next[l] = { ...IDLE, status: "pending" }; + return next; + }); + + await fanoutQuery(selectedLevels, trimmed, { + now: () => performance.now(), + runOne, + onStart: (level, startedAt) => { + setColumns((prev) => ({ + ...prev, + [level]: { ...prev[level], status: "pending", startedAt, endedAt: null }, + })); + }, + onEnd: (level, endedAt, result) => { + setColumns((prev) => ({ + ...prev, + [level]: { + ...prev[level], + endedAt, + status: result.ok ? "success" : "error", + content: result.ok ? result.content : null, + error: result.ok ? null : result.error, + }, + })); + }, + }); + + qc.invalidateQueries({ queryKey: ["peer-context", workspaceId, peerId] }); + }, [anyPending, peerId, qc, query, runOne, selectedLevels, workspaceId]); + + function toggleLevel(level: ReasoningLevel) { + setSelected((prev) => ({ ...prev, [level]: !prev[level] })); + } + + function handleKeyDown(e: React.KeyboardEvent) { + if ((e.key === "Enter" && (e.metaKey || e.ctrlKey)) || (e.key === "Enter" && !e.shiftKey)) { + e.preventDefault(); + handleRun(); + } + } + + return ( +
+ {/* Header */} +
+
+ + {mask(peerId)} + + / + Playground +
+
+ + + Dialectic reasoning playground + +
+

+ Fire the same query at every reasoning level in parallel — compare answers and latency. +

+
+ + {/* Query input */} +
+
+