From 965fde8feeb1db611df2676e07f079544932e8c4 Mon Sep 17 00:00:00 2001 From: mohammed naji Date: Mon, 11 May 2026 22:55:38 +0400 Subject: [PATCH 1/2] Add slice-v1 retrieval and benchmark tooling Add opt-in slice-v1 retrieval, richer sketch semantics, real-workspace benchmark helpers, and calibration/diagnostic coverage for the post-v0.21 payoff layer. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CHANGELOG.md | 11 + README.md | 3 +- .../2026-05-11-spi-vs-legacy/README.md | 24 ++ .../REAL_WORKSPACE_REPORT_TEMPLATE.md | 32 ++ .../2026-05-11-spi-vs-legacy/graph-stats.mjs | 18 + .../2026-05-11-spi-vs-legacy/probe.mjs | 75 +++- .../prompts.real-workspace.example.json | 55 +++ .../run-real-workspace.sh | 35 ++ .../2026-05-11-spi-vs-legacy/run.sh | 18 +- .../summarize-real-workspaces.mjs | 52 +++ src/cli/parser.ts | 27 +- src/contracts/context-pack.ts | 26 ++ src/infrastructure/context-pack-command.ts | 4 +- src/runtime/context-pack-resolution.ts | 73 +++- src/runtime/retrieve.ts | 82 +++++ src/runtime/retrieve/slicing.ts | 332 ++++++++++++++++++ src/runtime/stdio/definitions.ts | 8 +- src/runtime/stdio/tools.ts | 26 ++ tests/unit/benchmark-graph-stats.test.ts | 44 +++ tests/unit/benchmark-real-workspace.test.ts | 121 +++++++ .../context-pack-resolution-sketch.test.ts | 99 ++++++ tests/unit/retrieve-slice-surface.test.ts | 77 ++++ tests/unit/retrieve-slice-v1.test.ts | 125 +++++++ tests/unit/stdio-slice-surface.test.ts | 127 +++++++ 24 files changed, 1477 insertions(+), 17 deletions(-) create mode 100644 docs/benchmarks/2026-05-11-spi-vs-legacy/REAL_WORKSPACE_REPORT_TEMPLATE.md create mode 100644 docs/benchmarks/2026-05-11-spi-vs-legacy/graph-stats.mjs create mode 100644 docs/benchmarks/2026-05-11-spi-vs-legacy/prompts.real-workspace.example.json create mode 100644 docs/benchmarks/2026-05-11-spi-vs-legacy/run-real-workspace.sh create mode 100644 docs/benchmarks/2026-05-11-spi-vs-legacy/summarize-real-workspaces.mjs create mode 100644 src/runtime/retrieve/slicing.ts create mode 100644 tests/unit/benchmark-graph-stats.test.ts create mode 100644 tests/unit/benchmark-real-workspace.test.ts create mode 100644 tests/unit/retrieve-slice-surface.test.ts create mode 100644 tests/unit/retrieve-slice-v1.test.ts create mode 100644 tests/unit/stdio-slice-surface.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index dfc0ffd..acc9714 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,17 @@ All notable changes to the TypeScript package will be documented in this file. ## [Unreleased] +### Added + +- **Opt-in task-conditioned slicing v1**: retrieval can now run with `retrievalStrategy: 'slice-v1'` to anchor on explicit symbols/paths, take bounded explain/debug/impact/review-oriented slices, suppress barrel-like nodes, and emit `slice` metadata (`mode`, `anchors`, `directions`, `selected_paths`) alongside the selected pack. +- **Real-workspace benchmark flow**: `docs/benchmarks/2026-05-11-spi-vs-legacy/` now ships `run-real-workspace.sh`, `summarize-real-workspaces.mjs`, `prompts.real-workspace.example.json`, and `REAL_WORKSPACE_REPORT_TEMPLATE.md` so backend-only and monorepo workspaces can be benchmarked locally without committing private paths or artifacts. + +### Changed + +- **Sketch semantics are richer but still deterministic**: `resolution: 'sketch'` now surfaces `reads env`, config reads, and compact side-effect hints such as `external_http`, `llm_call`, and `db_write` when graph evidence exists, while preserving dependency-record output for lighter nodes. +- **Slice-v1 is exposed safely in CLI/MCP**: CLI `pack`, MCP `retrieve`, and MCP `context_pack` now accept `retrieval_strategy: 'default' | 'slice-v1'`, validate unsupported values clearly, and keep compact output unchanged unless the caller opts in. +- **Benchmark analysis is broader and more honest**: the SPI probe now records resolution comparisons (`detail` / `signature` / `sketch`), slice-v1 runs, retrieval-gate metadata, top files, and a value-per-token calibration summary instead of implying a token win. + ## [0.21.0] - 2026-05-11 ### Changed diff --git a/README.md b/README.md index 95e9ba3..750920e 100644 --- a/README.md +++ b/README.md @@ -94,7 +94,7 @@ NestJS + Next.js SaaS, 1,268 files, ~860K words. Same question, same Claude Opus PR-review proof on a real diff: prompt tokens 63,024 → **8,690** (**7.25× fewer**). Receipts: [`docs/benchmarks/2026-05-02-govalidate-pr-review/`](docs/benchmarks/2026-05-02-govalidate-pr-review/). -`--spi` benchmark (bundled fixture, 7 prompts): pack tokens **−26%**, graph.json size **−32%**, cache-hit rebuild **−27%** vs legacy. Receipts: [`docs/benchmarks/2026-05-11-spi-vs-legacy/`](docs/benchmarks/2026-05-11-spi-vs-legacy/). +`--spi` benchmark (bundled fixture, 7 prompts): **better framework-shaped correctness**, **operational retrieval-level expansion**, **graph.json size −32%**, **cache-hit rebuild −27% vs legacy**, but **no measured explain-pack token win on that fixture**. Receipts: [`docs/benchmarks/2026-05-11-spi-vs-legacy/`](docs/benchmarks/2026-05-11-spi-vs-legacy/). [Reproduce them](docs/benchmarks/2026-04-30-govalidate/verify.sh) with one shell script against the committed evidence files. @@ -142,6 +142,7 @@ graphify-ts generate . # build the graph graphify-ts generate . --spi # opt-in SPI pipeline (framework metadata + disk cache) graphify-ts watch . # rebuild on file change graphify-ts pack "how does auth work?" --task explain # compact CLI context payload +graphify-ts pack "why does auth fail?" --task explain --retrieval-strategy slice-v1 graphify-ts prompt "how does auth work?" --provider claude # provider-ready compiled prompt graphify-ts review-compare graphify-out/graph.json --exec '...' --yes # PR review benchmark graphify-ts compare "How does auth work?" --exec '...' --yes # general benchmark diff --git a/docs/benchmarks/2026-05-11-spi-vs-legacy/README.md b/docs/benchmarks/2026-05-11-spi-vs-legacy/README.md index 7936f2e..0d89689 100644 --- a/docs/benchmarks/2026-05-11-spi-vs-legacy/README.md +++ b/docs/benchmarks/2026-05-11-spi-vs-legacy/README.md @@ -77,6 +77,7 @@ The runner now produces: 1. `legacy.json`, `spi-cold.json`, `spi-warm.json` 2. `spi-cold.analysis.json` — strategy comparison + retrieval-level sweep 3. `summary.json` — top-level aggregate report +4. `edge_count` in each variant JSON ### Optional: point the runner at another local repo @@ -101,6 +102,29 @@ node docs/benchmarks/2026-05-11-spi-vs-legacy/probe.mjs \ If GoValidate is available locally, use the template above for both the backend-only checkout and the monorepo checkout. This repo does **not** commit any private-path defaults or fake results for those runs. +### Real-workspace matrix runner + +You can benchmark two local workspaces side by side without committing private paths or artifacts: + +```bash +GRAPHIFY_BENCH_BACKEND=/absolute/path/to/backend \ +GRAPHIFY_BENCH_MONOREPO=/absolute/path/to/monorepo \ +bash docs/benchmarks/2026-05-11-spi-vs-legacy/run-real-workspace.sh +``` + +Defaults: + +- prompts file: `docs/benchmarks/2026-05-11-spi-vs-legacy/prompts.real-workspace.example.json` +- output bundle: `docs/benchmarks/2026-05-11-spi-vs-legacy/results/real-workspaces//` + +Artifacts: + +1. one normal benchmark run per workspace (`backend/summary.json`, `monorepo/summary.json`) +2. `real-workspaces.summary.json` — side-by-side aggregate summary +3. `REAL_WORKSPACE_REPORT_TEMPLATE.md` — sharing template with privacy disclaimer + +The aggregate summary keeps objective metrics separate from qualitative notes and does not claim any private-repo numbers unless you run the benchmark locally. + ## Caveats / limitations - **Fixture is synthetic.** It is still small enough that the new `value-per-token` scorer does not beat evidence-order on final pack size. diff --git a/docs/benchmarks/2026-05-11-spi-vs-legacy/REAL_WORKSPACE_REPORT_TEMPLATE.md b/docs/benchmarks/2026-05-11-spi-vs-legacy/REAL_WORKSPACE_REPORT_TEMPLATE.md new file mode 100644 index 0000000..e593d85 --- /dev/null +++ b/docs/benchmarks/2026-05-11-spi-vs-legacy/REAL_WORKSPACE_REPORT_TEMPLATE.md @@ -0,0 +1,32 @@ +# Real workspace benchmark report template + +This benchmark can be run on private repos locally. +No private paths or artifacts are committed. +If GoValidate is unavailable, no GoValidate-specific numbers are claimed. + +## Workspace matrix + +| Workspace | Variant | Build time (ms) | Graph size (bytes) | Nodes | Edges | +|---|---|---:|---:|---:|---:| + +## Strategy / resolution comparisons + +| Workspace | Prompt | Strategy | Resolution | Tokens | Nodes | Quality | Notes | +|---|---|---|---|---:|---:|---:|---| + +## Retrieval-level comparisons + +| Workspace | Prompt | Retrieval level | Tokens | Nodes | Gate reason | +|---|---|---:|---:|---:|---| + +## Value-per-token calibration + +- Where value-per-token helps: +- Where it does not change output: +- Where it hurts or increases tokens: +- Suggested scoring adjustments: + +## Qualitative notes + +- Objective metrics are listed separately from qualitative notes. +- Private workspace paths must be redacted before sharing any report excerpt. diff --git a/docs/benchmarks/2026-05-11-spi-vs-legacy/graph-stats.mjs b/docs/benchmarks/2026-05-11-spi-vs-legacy/graph-stats.mjs new file mode 100644 index 0000000..9515f7d --- /dev/null +++ b/docs/benchmarks/2026-05-11-spi-vs-legacy/graph-stats.mjs @@ -0,0 +1,18 @@ +#!/usr/bin/env node + +import { readFileSync } from 'node:fs' + +const graphPath = process.argv[2] +if (!graphPath) { + console.error('usage: graph-stats.mjs ') + process.exit(2) +} + +const graph = JSON.parse(readFileSync(graphPath, 'utf8')) +const nodeCount = Array.isArray(graph.nodes) ? graph.nodes.length : 0 +const edgeCount = Array.isArray(graph.edges) ? graph.edges.length : 0 + +console.log(JSON.stringify({ + node_count: nodeCount, + edge_count: edgeCount, +})) diff --git a/docs/benchmarks/2026-05-11-spi-vs-legacy/probe.mjs b/docs/benchmarks/2026-05-11-spi-vs-legacy/probe.mjs index 50cc9ac..c881212 100644 --- a/docs/benchmarks/2026-05-11-spi-vs-legacy/probe.mjs +++ b/docs/benchmarks/2026-05-11-spi-vs-legacy/probe.mjs @@ -4,6 +4,8 @@ import { readFileSync } from 'node:fs' import { basename, relative, resolve } from 'node:path' import { computeContextPackDiagnostics } from '../../../dist/src/runtime/context-pack-diagnostics.js' +import { estimateContextPackEntryTokens } from '../../../dist/src/runtime/context-pack.js' +import { applyContextPackResolution } from '../../../dist/src/runtime/context-pack-resolution.js' import { contextPackFromRetrieveResult, retrieveContext } from '../../../dist/src/runtime/retrieve.js' import { loadGraph } from '../../../dist/src/runtime/serve.js' @@ -31,16 +33,50 @@ function summarizeRun(result) { result.matched_nodes .map((node) => node.framework_role) .filter((value) => typeof value === 'string' && value.length > 0), - ), + ), ).sort() + const topFiles = Array.from( + new Set( + result.matched_nodes + .map((node) => node.source_file) + .filter((value) => typeof value === 'string' && value.length > 0), + ), + ).slice(0, 5) + const resolvedSummaries = Object.fromEntries( + ['detail', 'signature', 'sketch'].map((resolution) => { + const resolved = resolution === 'detail' + ? { + nodes: pack.nodes, + bytes_saved: 0, + } + : applyContextPackResolution(pack.nodes, { + resolution, + relationships: pack.relationships, + }) + const tokenCount = resolved.nodes.reduce( + (total, node) => total + estimateContextPackEntryTokens(node.label, node.source_file, node.line_number, node.snippet), + 0, + ) + return [resolution, { + token_count: tokenCount, + bytes_saved: resolved.bytes_saved, + representation_types: Array.from(new Set(resolved.nodes.map((node) => node.representation_type ?? 'detail'))).sort(), + }] + }), + ) return { token_count: result.token_count, node_count: result.matched_nodes.length, labels: result.matched_nodes.map((node) => node.label), + top_files: topFiles, framework_roles: frameworkRoles, quality_score: diagnostics.quality_score, warnings: diagnostics.warnings.map((warning) => warning.kind), + retrieval_gate: result.retrieval_gate ?? null, + retrieval_strategy: result.retrieval_strategy ?? 'default', + slice: result.slice ?? null, + resolutions: resolvedSummaries, selection_strategy: result.selection_diagnostics?.selection_strategy, used_tokens: result.selection_diagnostics?.used_tokens ?? result.token_count, required_overflow: result.selection_diagnostics?.required_overflow ?? false, @@ -70,6 +106,12 @@ const promptAnalyses = prompts.map((prompt) => { budget, selectionStrategy: 'value-per-token', }) + const sliceV1 = retrieveContext(graph, { + question: prompt.text, + budget, + selectionStrategy: 'value-per-token', + retrievalStrategy: 'slice-v1', + }) return { id: prompt.id, @@ -78,10 +120,13 @@ const promptAnalyses = prompts.map((prompt) => { strategies: { evidence_order: summarizeRun(evidenceOrder), value_per_token: summarizeRun(valuePerToken), + slice_v1: summarizeRun(sliceV1), }, deltas: { token_count: valuePerToken.token_count - evidenceOrder.token_count, node_count: valuePerToken.matched_nodes.length - evidenceOrder.matched_nodes.length, + slice_token_count: sliceV1.token_count - valuePerToken.token_count, + slice_node_count: sliceV1.matched_nodes.length - valuePerToken.matched_nodes.length, }, retrieval_levels: retrievalLevels.map((level) => ({ level, @@ -95,8 +140,36 @@ const promptAnalyses = prompts.map((prompt) => { } }) +const calibration = promptAnalyses.reduce((summary, prompt) => { + const evidenceOrder = prompt.strategies.evidence_order + const valuePerToken = prompt.strategies.value_per_token + const tokenDelta = valuePerToken.token_count - evidenceOrder.token_count + const qualityDelta = valuePerToken.quality_score - evidenceOrder.quality_score + const labelDelta = valuePerToken.labels.filter((label) => !evidenceOrder.labels.includes(label)) + const note = { + prompt: prompt.id, + token_delta: tokenDelta, + quality_delta: qualityDelta, + added_labels: labelDelta, + } + + if (tokenDelta < 0 && qualityDelta >= 0) { + summary.helps.push(note) + } else if (tokenDelta > 0 && qualityDelta <= 0) { + summary.hurts_or_expands.push(note) + } else { + summary.no_material_change.push(note) + } + return summary +}, { + helps: [], + no_material_change: [], + hurts_or_expands: [], +}) + console.log(JSON.stringify({ graph_path: graphPathForOutput, budget, prompts: promptAnalyses, + calibration, }, null, 2)) diff --git a/docs/benchmarks/2026-05-11-spi-vs-legacy/prompts.real-workspace.example.json b/docs/benchmarks/2026-05-11-spi-vs-legacy/prompts.real-workspace.example.json new file mode 100644 index 0000000..2e52bb0 --- /dev/null +++ b/docs/benchmarks/2026-05-11-spi-vs-legacy/prompts.real-workspace.example.json @@ -0,0 +1,55 @@ +{ + "schema_version": 1, + "prompts": [ + { + "id": "auth-flow", + "intent": "explain", + "text": "Explain auth flow end to end." + }, + { + "id": "report-generation", + "intent": "explain", + "text": "Explain validation report generation end to end." + }, + { + "id": "report-generation-slow", + "intent": "debug", + "text": "Why is validation report generation slow?" + }, + { + "id": "research-agent-impact", + "intent": "impact", + "text": "What can break if the research agent changes?" + }, + { + "id": "report-generation-tests", + "intent": "explain", + "text": "Which tests are relevant for report generation?" + }, + { + "id": "controller-to-persistence", + "intent": "explain", + "text": "Find the call path from controller to final report persistence." + }, + { + "id": "config-runtime-effect", + "intent": "debug", + "text": "Where does this env/config variable affect runtime behavior?" + }, + { + "id": "auth-config-impact", + "intent": "impact", + "text": "What can break if session/cookie/auth config changes?" + }, + { + "id": "review-current-diff", + "intent": "review", + "text": "Review current backend diff for risky changes." + }, + { + "id": "onboarding-routes", + "intent": "explain", + "text": "Which routes/controllers/services are involved in onboarding?" + } + ] +} diff --git a/docs/benchmarks/2026-05-11-spi-vs-legacy/run-real-workspace.sh b/docs/benchmarks/2026-05-11-spi-vs-legacy/run-real-workspace.sh new file mode 100644 index 0000000..14d0d1f --- /dev/null +++ b/docs/benchmarks/2026-05-11-spi-vs-legacy/run-real-workspace.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +set -euo pipefail + +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TS="$(date -u +%Y-%m-%dT%H%M%SZ)" +BUNDLE_DIR="${GRAPHIFY_BENCH_REAL_RESULTS_DIR:-$HERE/results/real-workspaces/$TS}" +PROMPTS_FILE="${GRAPHIFY_BENCH_REAL_PROMPTS:-$HERE/prompts.real-workspace.example.json}" + +run_workspace() { + local workspace_name="$1" + local workspace_path="$2" + if [[ -z "$workspace_path" ]]; then + return + fi + + mkdir -p "$BUNDLE_DIR/$workspace_name" + echo "[real-workspace] $workspace_name -> $workspace_path" + GRAPHIFY_BENCH_FIXTURE="$workspace_path" \ + GRAPHIFY_BENCH_PROMPTS="$PROMPTS_FILE" \ + GRAPHIFY_BENCH_RESULTS_DIR="$BUNDLE_DIR/$workspace_name" \ + bash "$HERE/run.sh" +} + +if [[ -z "${GRAPHIFY_BENCH_BACKEND:-}" && -z "${GRAPHIFY_BENCH_MONOREPO:-}" ]]; then + echo "Set GRAPHIFY_BENCH_BACKEND and/or GRAPHIFY_BENCH_MONOREPO before running." >&2 + exit 2 +fi + +mkdir -p "$BUNDLE_DIR" +run_workspace "backend" "${GRAPHIFY_BENCH_BACKEND:-}" +run_workspace "monorepo" "${GRAPHIFY_BENCH_MONOREPO:-}" + +node "$HERE/summarize-real-workspaces.mjs" "$BUNDLE_DIR" > "$BUNDLE_DIR/real-workspaces.summary.json" +cat "$BUNDLE_DIR/real-workspaces.summary.json" diff --git a/docs/benchmarks/2026-05-11-spi-vs-legacy/run.sh b/docs/benchmarks/2026-05-11-spi-vs-legacy/run.sh index 3d1eab7..ace3d4c 100755 --- a/docs/benchmarks/2026-05-11-spi-vs-legacy/run.sh +++ b/docs/benchmarks/2026-05-11-spi-vs-legacy/run.sh @@ -25,7 +25,7 @@ PROMPTS_FILE="${GRAPHIFY_BENCH_PROMPTS:-$HERE/prompts.json}" # Create a clean copy of the fixture for each variant so cache state and # graphify-out are independent. TS="$(date -u +%Y-%m-%dT%H%M%SZ)" -RESULTS_DIR="$HERE/results/$TS" +RESULTS_DIR="${GRAPHIFY_BENCH_RESULTS_DIR:-$HERE/results/$TS}" mkdir -p "$RESULTS_DIR" GRAPHIFY="$ROOT/dist/src/cli/bin.js" @@ -56,10 +56,12 @@ run_variant() { local graph_path="$fixture_copy/graphify-out/graph.json" local graph_size graph_size=$(wc -c < "$graph_path" | tr -d ' ') - local node_count - node_count=$(node -e "const g=require('$graph_path'); console.log(g.nodes.length)") + local graph_stats node_count edge_count + graph_stats=$(node "$HERE/graph-stats.mjs" "$graph_path") + node_count=$(GRAPH_STATS="$graph_stats" node -e "const s=JSON.parse(process.env.GRAPH_STATS); console.log(s.node_count)") + edge_count=$(GRAPH_STATS="$graph_stats" node -e "const s=JSON.parse(process.env.GRAPH_STATS); console.log(s.edge_count)") - echo " time=${elapsed}ms graph_size=${graph_size} nodes=${node_count}" + echo " time=${elapsed}ms graph_size=${graph_size} nodes=${node_count} edges=${edge_count}" # Per-prompt pack runs. local prompt_results="[" @@ -99,6 +101,7 @@ run_variant() { "build_time_ms": $elapsed, "graph_size_bytes": $graph_size, "node_count": $node_count, + "edge_count": $edge_count, "prompts": $prompt_results } EOF @@ -123,8 +126,10 @@ SPI_WARM_ELAPSED=$((t1 - t0)) # build_time_ms so spi-warm has schema parity with legacy / spi-cold. SPI_WARM_GRAPH_PATH="$SPI_WARM_FIXTURE/graphify-out/graph.json" SPI_WARM_GRAPH_SIZE=$(wc -c < "$SPI_WARM_GRAPH_PATH" | tr -d ' ') -SPI_WARM_NODE_COUNT=$(node -e "const g=require('$SPI_WARM_GRAPH_PATH'); console.log(g.nodes.length)") -echo " time=${SPI_WARM_ELAPSED}ms graph_size=${SPI_WARM_GRAPH_SIZE} nodes=${SPI_WARM_NODE_COUNT}" +SPI_WARM_GRAPH_STATS=$(node "$HERE/graph-stats.mjs" "$SPI_WARM_GRAPH_PATH") +SPI_WARM_NODE_COUNT=$(GRAPH_STATS="$SPI_WARM_GRAPH_STATS" node -e "const s=JSON.parse(process.env.GRAPH_STATS); console.log(s.node_count)") +SPI_WARM_EDGE_COUNT=$(GRAPH_STATS="$SPI_WARM_GRAPH_STATS" node -e "const s=JSON.parse(process.env.GRAPH_STATS); console.log(s.edge_count)") +echo " time=${SPI_WARM_ELAPSED}ms graph_size=${SPI_WARM_GRAPH_SIZE} nodes=${SPI_WARM_NODE_COUNT} edges=${SPI_WARM_EDGE_COUNT}" cat > "$RESULTS_DIR/spi-warm.json" < "$RESULTS_DIR/spi-warm.json" <') + process.exit(2) +} + +const preferredOrder = ['backend', 'monorepo'] +const workspaceNames = readdirSync(bundleDir, { withFileTypes: true }) + .filter((entry) => entry.isDirectory() && existsSync(join(bundleDir, entry.name, 'summary.json'))) + .map((entry) => entry.name) + .sort((left, right) => { + const leftIndex = preferredOrder.indexOf(left) + const rightIndex = preferredOrder.indexOf(right) + if (leftIndex !== -1 || rightIndex !== -1) { + return (leftIndex === -1 ? Number.POSITIVE_INFINITY : leftIndex) - (rightIndex === -1 ? Number.POSITIVE_INFINITY : rightIndex) + } + return left.localeCompare(right) + }) + +const workspaces = Object.fromEntries( + workspaceNames.map((name) => [name, JSON.parse(readFileSync(join(bundleDir, name, 'summary.json'), 'utf8'))]), +) + +const objectiveMetrics = workspaceNames.flatMap((workspace) => { + const summary = workspaces[workspace] + return Object.entries(summary.variants ?? {}).flatMap(([variant, metrics]) => ([ + { workspace, variant, metric: 'build_time_ms', value: metrics.build_time_ms ?? null }, + { workspace, variant, metric: 'graph_size_bytes', value: metrics.graph_size_bytes ?? null }, + { workspace, variant, metric: 'node_count', value: metrics.node_count ?? null }, + { workspace, variant, metric: 'edge_count', value: metrics.edge_count ?? null }, + ])) +}) + +const qualitativeNotes = [ + 'This benchmark can be run on private repos locally.', + 'No private paths or artifacts are committed.', + 'If GoValidate is unavailable, no GoValidate-specific numbers are claimed.', +] + +console.log(JSON.stringify({ + workspace_order: workspaceNames, + workspaces, + comparison: { + objective_metrics: objectiveMetrics, + qualitative_notes: qualitativeNotes, + }, +}, null, 2)) diff --git a/src/cli/parser.ts b/src/cli/parser.ts index d0c10f3..38ce6ec 100644 --- a/src/cli/parser.ts +++ b/src/cli/parser.ts @@ -1,6 +1,6 @@ import { isAbsolute, resolve } from 'node:path' -import type { ContextPackTaskKind } from '../contracts/context-pack.js' +import type { ContextPackRetrievalStrategy, ContextPackTaskKind } from '../contracts/context-pack.js' import { validateGraphOutputPath, validateGraphPath } from '../shared/security.js' import { type InstallPlatform, isInstallPlatform, type McpToolProfile, isMcpToolProfile } from '../infrastructure/install.js' @@ -32,6 +32,7 @@ export interface PackCliOptions { * emits a decision with reason 'manual override' at the supplied level * instead of running its heuristic classifier on the prompt. */ retrievalLevel?: 0 | 1 | 2 | 3 | 4 | 5 + retrievalStrategy?: ContextPackRetrievalStrategy } export type PromptCliProvider = 'claude' | 'gemini' @@ -403,7 +404,7 @@ export function parseQueryArgs(args: string[]): QueryCliOptions { } export function parsePackArgs(args: string[]): PackCliOptions { - const usage = 'Usage: graphify-ts pack "" [--budget N] [--task KIND] [--graph path] [--retrieval-level 0-5]' + const usage = 'Usage: graphify-ts pack "" [--budget N] [--task KIND] [--graph path] [--retrieval-level 0-5] [--retrieval-strategy default|slice-v1]' const prompt = args[0]?.trim() if (!prompt) { throw new UsageError(usage) @@ -413,6 +414,7 @@ export function parsePackArgs(args: string[]): PackCliOptions { let task: ContextPackTaskKind = 'explain' let graphPath = 'graphify-out/graph.json' let retrievalLevel: PackCliOptions['retrievalLevel'] | undefined + let retrievalStrategy: PackCliOptions['retrievalStrategy'] | undefined const normalizedPrompt = validateCliQuestionText('prompt', prompt) @@ -474,6 +476,18 @@ export function parsePackArgs(args: string[]): PackCliOptions { continue } + if (argument === '--retrieval-strategy') { + retrievalStrategy = parseRetrievalStrategy(requireOptionValue('--retrieval-strategy', args[index + 1])) + index += 1 + continue + } + + if (argument.startsWith('--retrieval-strategy=')) { + const [, value] = argument.split('=', 2) + retrievalStrategy = parseRetrievalStrategy(requireOptionValue('--retrieval-strategy', value)) + continue + } + throw new UsageError(`error: unknown option for pack: ${argument}`) } @@ -483,6 +497,7 @@ export function parsePackArgs(args: string[]): PackCliOptions { task, graphPath, ...(retrievalLevel !== undefined ? { retrievalLevel } : {}), + ...(retrievalStrategy !== undefined ? { retrievalStrategy } : {}), } } @@ -494,6 +509,14 @@ function parseRetrievalLevel(value: string): PackCliOptions['retrievalLevel'] { return parsed as PackCliOptions['retrievalLevel'] } +function parseRetrievalStrategy(value: string): PackCliOptions['retrievalStrategy'] { + const normalized = value.trim().toLowerCase() + if (normalized === 'default' || normalized === 'slice-v1') { + return normalized + } + throw new UsageError(`error: --retrieval-strategy must be one of default, slice-v1 (got ${JSON.stringify(value)})`) +} + export function parsePromptArgs(args: string[]): PromptCliOptions { const usage = 'Usage: graphify-ts prompt "" --provider NAME [--graph path]' const prompt = args[0]?.trim() diff --git a/src/contracts/context-pack.ts b/src/contracts/context-pack.ts index 1f9e3e8..f8d3a2e 100644 --- a/src/contracts/context-pack.ts +++ b/src/contracts/context-pack.ts @@ -34,6 +34,30 @@ export interface ContextPackSelectionDiagnostics { ranking: ContextPackSelectionRankingEntry[] } +export type ContextPackRetrievalStrategy = 'default' | 'slice-v1' + +export interface ContextPackSliceAnchor { + node_id?: string + label: string + reason: string +} + +export interface ContextPackSlicePath { + from_id?: string + from: string + to_id?: string + to: string + relation: string + direction: 'forward' | 'backward' +} + +export interface ContextPackSliceMetadata { + mode: 'explain' | 'debug' | 'impact' | 'review' + anchors: ContextPackSliceAnchor[] + directions: Array<'forward' | 'backward'> + selected_paths: ContextPackSlicePath[] +} + export type ContextRepresentationType = | 'detail' | 'summary' @@ -179,6 +203,8 @@ export interface CompiledContextPack< graph_signals?: ContextPackGraphSignals shared_file_type?: string selection_diagnostics?: ContextPackSelectionDiagnostics + retrieval_strategy?: ContextPackRetrievalStrategy + slice?: ContextPackSliceMetadata /** * Retrieval-gate decision (#75) attached when the caller invoked the * gate before building the pack. Carries `level`, `reason`, `intent`, diff --git a/src/infrastructure/context-pack-command.ts b/src/infrastructure/context-pack-command.ts index f344128..5109999 100644 --- a/src/infrastructure/context-pack-command.ts +++ b/src/infrastructure/context-pack-command.ts @@ -17,7 +17,7 @@ const DEFAULT_IMPACT_DEPTH = 3 export interface ContextPackCommandDependencies { loadGraph: (graphPath: string) => KnowledgeGraph - retrieveContext: (graph: KnowledgeGraph, options: Pick) => RetrieveResult + retrieveContext: (graph: KnowledgeGraph, options: Pick) => RetrieveResult compactRetrieveResult: typeof compactRetrieveResult analyzePrImpact: (graph: KnowledgeGraph, projectDir?: string, options?: { baseBranch?: string; depth?: number; budget?: number; taskIntent?: TaskContextPlan['evidence']['recipe_id'] }) => PrImpactResult compactPrImpactResult: typeof compactPrImpactResult @@ -218,6 +218,7 @@ export async function runContextPackCommand( budget: plannerBudget, taskIntent: initialPlan.evidence.recipe_id, ...(options.retrievalLevel !== undefined ? { retrievalLevel: options.retrievalLevel } : {}), + ...(options.retrievalStrategy !== undefined ? { retrievalStrategy: options.retrievalStrategy } : {}), }) const impactTarget = pickImpactTarget(retrieval) const communityLabels = buildCommunityLabels(graph, communitiesFromGraph(graph)) @@ -240,6 +241,7 @@ export async function runContextPackCommand( budget: plannerBudget, taskIntent: initialPlan.evidence.recipe_id, ...(options.retrievalLevel !== undefined ? { retrievalLevel: options.retrievalLevel } : {}), + ...(options.retrievalStrategy !== undefined ? { retrievalStrategy: options.retrievalStrategy } : {}), }) const explainPack = dependencies.compactRetrieveResult(retrieval) diff --git a/src/runtime/context-pack-resolution.ts b/src/runtime/context-pack-resolution.ts index 0db05e5..ed651c4 100644 --- a/src/runtime/context-pack-resolution.ts +++ b/src/runtime/context-pack-resolution.ts @@ -325,17 +325,72 @@ function relationLabels( return labels } +function sideEffectHints(labels: readonly string[]): { + sideEffects: string[] + latencySensitive: string[] +} { + const sideEffects = new Set() + for (const label of labels) { + const normalized = label.toLowerCase() + if ( + normalized === 'fetch' + || normalized.startsWith('axios') + || normalized.startsWith('got') + || normalized.startsWith('request') + || normalized.startsWith('http.') + || normalized.startsWith('https.') + ) { + sideEffects.add('external_http') + } + if ( + normalized.includes('anthropic') + || normalized.includes('openai') + || normalized.includes('chatcompletion') + || normalized.includes('messages.create') + || normalized.includes('responses.create') + ) { + sideEffects.add('llm_call') + } + if ( + normalized.includes('prisma.') + && (normalized.endsWith('.create') || normalized.endsWith('.update') || normalized.endsWith('.delete') || normalized.endsWith('.upsert')) + ) { + sideEffects.add('db_write') + } + if (normalized.includes('redis') || normalized.includes('cache.')) { + sideEffects.add('cache_io') + } + if (normalized.includes('queue') || normalized.includes('publish') || normalized.includes('emit')) { + sideEffects.add('queue_event') + } + } + + const orderedEffects = ['external_http', 'llm_call', 'db_write', 'cache_io', 'queue_event'] + return { + sideEffects: orderedEffects.filter((effect) => sideEffects.has(effect)), + latencySensitive: ['external_http', 'llm_call'].filter((effect) => sideEffects.has(effect)), + } +} + function renderSketchRepresentation( node: ContextPackNode, relationIndex: RelationIndex, ): { type: 'behavior_sketch' | 'dependency_record'; reason: string; snippet: string } | null { const behaviorEdges = relationLabels(node, relationIndex, 'outgoing', ['calls', 'route_handler', 'controller_route', 'method', 'contains']) const tests = relationLabels(node, relationIndex, 'outgoing', ['covered_by']) - const config = relationLabels(node, relationIndex, 'outgoing', ['uses_config', 'reads_env']) + const config = relationLabels(node, relationIndex, 'outgoing', ['uses_config']) + const readsEnv = relationLabels(node, relationIndex, 'outgoing', ['reads_env']) const outgoingDeps = relationLabels(node, relationIndex, 'outgoing', ['calls', 'injects', 'depends_on']) const incomingDeps = relationLabels(node, relationIndex, 'incoming', ['calls', 'injects', 'depends_on']) - - if (tests.length > 0 || config.length > 0 || behaviorEdges.length > 1 || node.framework_role) { + const { sideEffects, latencySensitive } = sideEffectHints(behaviorEdges) + + if ( + tests.length > 0 + || config.length > 0 + || readsEnv.length > 0 + || behaviorEdges.length > 1 + || node.framework_role + ) { const lines = [node.label] for (const label of behaviorEdges.slice(0, 5)) { lines.push(`-> ${label}`) @@ -343,9 +398,18 @@ function renderSketchRepresentation( if (tests.length > 0) { lines.push(`tests: ${tests.slice(0, 3).join(', ')}`) } + if (readsEnv.length > 0) { + lines.push(`reads env: ${readsEnv.slice(0, 3).join(', ')}`) + } if (config.length > 0) { lines.push(`config: ${config.slice(0, 3).join(', ')}`) } + if (sideEffects.length > 0) { + lines.push(`side effects: ${sideEffects.join(', ')}`) + } + if (latencySensitive.length > 0) { + lines.push(`latency-sensitive: ${latencySensitive.join(', ')}`) + } if (node.framework_role) { lines.push(`framework: ${node.framework_role}`) } @@ -367,6 +431,9 @@ function renderSketchRepresentation( if (node.framework_role) { lines.push(`framework: ${node.framework_role}`) } + if (sideEffects.length > 0) { + lines.push(`side effects: ${sideEffects.join(', ')}`) + } return { type: 'dependency_record', reason: 'graph-derived dependency record', diff --git a/src/runtime/retrieve.ts b/src/runtime/retrieve.ts index 1493f91..c05779a 100644 --- a/src/runtime/retrieve.ts +++ b/src/runtime/retrieve.ts @@ -9,7 +9,9 @@ import type { ContextPackExpandableLineRange, ContextPackExpandableRef, ContextPackNode, + ContextPackRetrievalStrategy, ContextPackSelectionDiagnostics, + ContextPackSliceMetadata, ContextPackTaskContract, } from '../contracts/context-pack.js' import type { TaskIntentKind } from '../contracts/task-intent.js' @@ -35,6 +37,7 @@ import { relationAllowedForPolicy, relationIsPrimaryForPolicy, } from './retrieve/expansion.js' +import { sliceCandidatesForRetrieve } from './retrieve/slicing.js' import { communitiesFromGraph, estimateQueryTokens } from './serve.js' const SNIPPET_HALF_WINDOW = 7 @@ -75,6 +78,7 @@ export interface RetrieveOptions { retrievalLevel?: RetrievalLevel /** Internal additive override for benchmarks/tests. */ selectionStrategy?: ContextPackSelectionStrategy + retrievalStrategy?: ContextPackRetrievalStrategy } export interface RetrieveMatchedNode { @@ -125,6 +129,8 @@ export interface RetrieveResult { coverage?: ContextPackCoverage selection_diagnostics?: ContextPackSelectionDiagnostics retrieval_gate?: RetrievalGateDecision + retrieval_strategy?: ContextPackRetrievalStrategy + slice?: ContextPackSliceMetadata } export interface CompactRetrieveMatchedNode extends Omit { @@ -569,6 +575,34 @@ interface ScoredNode { relevanceBand: 'direct' | 'related' | 'peripheral' } +function scoredNodeFromGraph(graph: KnowledgeGraph, nodeId: string, score: number): ScoredNode { + const attributes = graph.nodeAttributes(nodeId) + const resolvedLine = resolvedLineNumber(attributes) + return { + id: nodeId, + label: String(attributes.label ?? ''), + sourceFile: String(attributes.source_file ?? ''), + sourceLocation: typeof attributes.source_location === 'string' && attributes.source_location.length > 0 + ? attributes.source_location + : null, + lineNumber: resolvedLine.lineNumber, + lineNumberDerived: resolvedLine.derived, + storedSnippet: storedSnippetFromAttributes(attributes), + nodeKind: String(attributes.node_kind ?? ''), + framework: typeof attributes.framework === 'string' ? attributes.framework : undefined, + frameworkRole: typeof attributes.framework_role === 'string' ? attributes.framework_role : undefined, + fileType: String(attributes.file_type ?? '').trim().toLowerCase(), + fileNodeLike: isFileNodeLike(String(attributes.label ?? ''), String(attributes.source_file ?? '')), + community: parseCommunityId(attributes.community), + frameworkBoost: 0, + exactLabelMatch: false, + sourcePathMatch: false, + evidenceTier: 0, + score, + relevanceBand: 'related', + } +} + interface FrameworkQuestionProfile { frameworkShaped: boolean express: boolean @@ -1214,6 +1248,9 @@ export function contextPackFromRetrieveResult( expandable: result.expandable ?? [], coverage: result.coverage ?? fallbackRetrieveCoverage(result), ...(result.selection_diagnostics ? { selection_diagnostics: result.selection_diagnostics } : {}), + ...(result.retrieval_strategy ? { retrieval_strategy: result.retrieval_strategy } : {}), + ...(result.slice ? { slice: result.slice } : {}), + ...(result.retrieval_gate ? { retrieval_gate: result.retrieval_gate } : {}), } } @@ -1226,6 +1263,7 @@ function buildRetrieveResultFromOrderedCandidates( retrieveGraphSignals: RetrieveGraphSignals, retrievalGate: RetrievalGateDecision, rootPath?: string, + sliceMetadata?: ContextPackSliceMetadata, ): RetrieveResult { const snippetFileCache = new Map() const taskContract = classifyTaskContract('explain', { @@ -1349,6 +1387,8 @@ function buildRetrieveResultFromOrderedCandidates( coverage: pack.coverage, ...(pack.selection_diagnostics ? { selection_diagnostics: pack.selection_diagnostics } : {}), ...(pack.retrieval_gate ? { retrieval_gate: pack.retrieval_gate } : {}), + retrieval_strategy: options.retrievalStrategy ?? 'default', + ...(sliceMetadata ? { slice: sliceMetadata } : {}), } } @@ -1392,6 +1432,7 @@ export function retrieveContext(graph: KnowledgeGraph, options: RetrieveOptions) expandable: emptyPack.expandable, coverage: emptyPack.coverage, ...(emptyPack.retrieval_gate ? { retrieval_gate: emptyPack.retrieval_gate } : {}), + retrieval_strategy: options.retrievalStrategy ?? 'default', } } @@ -1421,6 +1462,7 @@ export function retrieveContext(graph: KnowledgeGraph, options: RetrieveOptions) expandable: emptyPack.expandable, coverage: emptyPack.coverage, ...(emptyPack.retrieval_gate ? { retrieval_gate: emptyPack.retrieval_gate } : {}), + retrieval_strategy: options.retrievalStrategy ?? 'default', } } @@ -1790,6 +1832,40 @@ export function retrieveContext(graph: KnowledgeGraph, options: RetrieveOptions) ? frameworkOrderedCandidates : frameworkOrderedCandidates.filter((node) => node.relevanceBand !== 'peripheral') + if (options.retrievalStrategy === 'slice-v1') { + const sliced = sliceCandidatesForRetrieve( + graph, + scored.map((node) => ({ + id: node.id, + label: node.label, + sourceFile: node.sourceFile, + exactLabelMatch: node.exactLabelMatch, + sourcePathMatch: node.sourcePathMatch, + score: node.score, + })), + retrievalGate.intent, + ) + + if (sliced) { + const scoredById = new Map(scored.map((node) => [node.id, node])) + const sliceCandidates = sliced.ordered_ids.map((nodeId, index) => ( + scoredById.get(nodeId) ?? scoredNodeFromGraph(graph, nodeId, Math.max(0.25, 2 - (index * 0.1))) + )) + + return buildRetrieveResultFromOrderedCandidates( + graph, + options, + sliceCandidates, + communities, + communityLabels, + retrieveGraphSignals, + retrievalGate, + rootPath, + sliced.metadata, + ) + } + } + return buildRetrieveResultFromOrderedCandidates( graph, options, @@ -1942,6 +2018,9 @@ export function compactRetrieveResult(result: RetrieveResult): CompactRetrieveRe community_context: compactPack.community_context, graph_signals: compactPack.graph_signals ?? { god_nodes: [], bridge_nodes: [] }, ...(compactPack.shared_file_type ? { shared_file_type: compactPack.shared_file_type } : {}), + retrieval_strategy: result.retrieval_strategy ?? 'default', + ...(result.slice ? { slice: result.slice } : {}), + ...(result.retrieval_gate ? { retrieval_gate: result.retrieval_gate } : {}), } } @@ -1986,5 +2065,8 @@ export function compactRetrieveResultForStdio(result: RetrieveResult): RetrieveR ...(result.expandable ? { expandable: result.expandable } : {}), ...(result.coverage ? { coverage: result.coverage } : {}), ...(result.selection_diagnostics ? { selection_diagnostics: result.selection_diagnostics } : {}), + retrieval_strategy: result.retrieval_strategy ?? 'default', + ...(result.slice ? { slice: result.slice } : {}), + ...(result.retrieval_gate ? { retrieval_gate: result.retrieval_gate } : {}), } } diff --git a/src/runtime/retrieve/slicing.ts b/src/runtime/retrieve/slicing.ts new file mode 100644 index 0000000..aa88165 --- /dev/null +++ b/src/runtime/retrieve/slicing.ts @@ -0,0 +1,332 @@ +import type { + ContextPackSliceAnchor, + ContextPackSliceMetadata, + ContextPackSlicePath, +} from '../../contracts/context-pack.js' +import type { KnowledgeGraph } from '../../contracts/graph.js' +import type { RetrievalIntent } from '../../contracts/retrieval-gate.js' + +export interface SliceScoredNode { + id: string + label: string + sourceFile: string + exactLabelMatch: boolean + sourcePathMatch: boolean + score: number +} + +type SliceMode = ContextPackSliceMetadata['mode'] + +interface SlicePolicy { + mode: SliceMode + directions: Array<'forward' | 'backward'> + backward_relations: ReadonlySet + forward_relations: ReadonlySet + backward_depth: number + forward_depth: number + helper_relations: ReadonlySet +} + +const EXPLAIN_BACKWARD = new Set(['calls', 'controller_route', 'route_handler']) +const EXPLAIN_FORWARD = new Set(['calls', 'contains', 'method', 'route_handler', 'controller_route']) +const DEBUG_BACKWARD = new Set(['calls', 'controller_route', 'route_handler']) +const DEBUG_FORWARD = new Set(['calls', 'contains', 'method', 'route_handler', 'controller_route']) +const IMPACT_BACKWARD = new Set(['calls', 'controller_route', 'route_handler']) +const IMPACT_FORWARD = new Set(['calls', 'contains', 'method', 'route_handler', 'controller_route']) +const DEBUG_HELPERS = new Set(['uses_guard', 'guarded_by', 'reads_env', 'uses_config', 'depends_on', 'covered_by', 'injects']) +const EXPLAIN_HELPERS = new Set(['covered_by', 'reads_env', 'uses_config']) +const IMPACT_HELPERS = new Set(['covered_by', 'reads_env', 'uses_config', 'depends_on', 'exports']) + +function policyForIntent(intent: RetrievalIntent): SlicePolicy { + switch (intent) { + case 'debug': + return { + mode: 'debug', + directions: ['backward', 'forward'], + backward_relations: DEBUG_BACKWARD, + forward_relations: DEBUG_FORWARD, + backward_depth: 1, + forward_depth: 1, + helper_relations: DEBUG_HELPERS, + } + case 'impact': + return { + mode: 'impact', + directions: ['backward', 'forward'], + backward_relations: IMPACT_BACKWARD, + forward_relations: IMPACT_FORWARD, + backward_depth: 2, + forward_depth: 1, + helper_relations: IMPACT_HELPERS, + } + case 'review': + return { + mode: 'review', + directions: ['backward', 'forward'], + backward_relations: IMPACT_BACKWARD, + forward_relations: IMPACT_FORWARD, + backward_depth: 1, + forward_depth: 1, + helper_relations: IMPACT_HELPERS, + } + case 'explain': + default: + return { + mode: 'explain', + directions: ['backward', 'forward'], + backward_relations: EXPLAIN_BACKWARD, + forward_relations: EXPLAIN_FORWARD, + backward_depth: 1, + forward_depth: 1, + helper_relations: EXPLAIN_HELPERS, + } + } +} + +function isBarrelLike(label: string, sourceFile: string): boolean { + return label.trim().toLowerCase() === 'index.ts' || /(?:^|\/)index\.ts$/i.test(sourceFile) +} + +function shouldSuppressNode( + graph: KnowledgeGraph, + node: SliceScoredNode, + anchoredIds: ReadonlySet, +): boolean { + if (anchoredIds.has(node.id)) { + return false + } + + if (isBarrelLike(node.label, node.sourceFile)) { + return true + } + + return graph.degree(node.id) >= 40 +} + +function buildAnchors(scored: readonly SliceScoredNode[]): ContextPackSliceAnchor[] { + const anchors: ContextPackSliceAnchor[] = [] + const seen = new Set() + const matchedAnchors = scored.filter((node) => node.exactLabelMatch || node.sourcePathMatch) + const nonBarrelMatchedAnchors = matchedAnchors.filter((node) => !isBarrelLike(node.label, node.sourceFile)) + const anchorPool = matchedAnchors.length > 0 + ? (nonBarrelMatchedAnchors.length > 0 ? nonBarrelMatchedAnchors : matchedAnchors) + : scored.filter((node) => !isBarrelLike(node.label, node.sourceFile)).slice(0, 1) + + for (const node of anchorPool) { + const reason = node.exactLabelMatch + ? 'symbol mention' + : node.sourcePathMatch + ? 'path mention' + : 'top lexical match' + if (!reason || seen.has(node.id)) { + continue + } + anchors.push({ + node_id: node.id, + label: node.label, + reason, + }) + seen.add(node.id) + if (anchors.length >= 2) { + break + } + } + + return anchors +} + +function recordPath( + paths: ContextPackSlicePath[], + seen: Set, + path: ContextPackSlicePath, +): void { + const key = `${path.direction}:${path.from_id ?? path.from}:${path.relation}:${path.to_id ?? path.to}` + if (seen.has(key)) { + return + } + seen.add(key) + paths.push(path) +} + +function traverseDirection( + graph: KnowledgeGraph, + scoredById: ReadonlyMap, + anchorIds: readonly string[], + selectedIds: Set, + orderedIds: string[], + pathSeen: Set, + selectedPaths: ContextPackSlicePath[], + anchoredIds: ReadonlySet, + direction: 'forward' | 'backward', + relations: ReadonlySet, + maxDepth: number, +): void { + const queue = anchorIds.map((id) => ({ id, depth: 0 })) + const seen = new Set(anchorIds) + + while (queue.length > 0) { + const current = queue.shift()! + if (current.depth >= maxDepth) { + continue + } + + const neighbors = direction === 'forward' ? graph.successors(current.id) : graph.predecessors(current.id) + for (const neighborId of neighbors) { + const sourceId = direction === 'forward' ? current.id : neighborId + const targetId = direction === 'forward' ? neighborId : current.id + const relation = String(graph.edgeAttributes(sourceId, targetId).relation ?? 'related_to') + if (!relations.has(relation)) { + continue + } + + const neighbor = scoredById.get(neighborId) + if (!neighbor) { + continue + } + if (shouldSuppressNode(graph, neighbor, anchoredIds)) { + continue + } + + if (!selectedIds.has(neighborId)) { + selectedIds.add(neighborId) + orderedIds.push(neighborId) + } + + const currentNode = scoredById.get(current.id) + recordPath(selectedPaths, pathSeen, { + from_id: sourceId, + from: direction === 'forward' ? currentNode?.label ?? sourceId : neighbor.label, + to_id: targetId, + to: direction === 'forward' ? neighbor.label : currentNode?.label ?? targetId, + relation, + direction, + }) + + if (!seen.has(neighborId)) { + seen.add(neighborId) + queue.push({ id: neighborId, depth: current.depth + 1 }) + } + } + } +} + +function addHelperNeighbors( + graph: KnowledgeGraph, + scoredById: ReadonlyMap, + helperRelations: ReadonlySet, + selectedIds: Set, + orderedIds: string[], + pathSeen: Set, + selectedPaths: ContextPackSlicePath[], + anchoredIds: ReadonlySet, +): void { + for (const currentId of [...orderedIds]) { + const currentNode = scoredById.get(currentId) + if (!currentNode) { + continue + } + + for (const neighborId of graph.successors(currentId)) { + const relation = String(graph.edgeAttributes(currentId, neighborId).relation ?? 'related_to') + if (!helperRelations.has(relation)) { + continue + } + + const neighbor = scoredById.get(neighborId) + if (!neighbor || shouldSuppressNode(graph, neighbor, anchoredIds)) { + continue + } + + if (!selectedIds.has(neighborId)) { + selectedIds.add(neighborId) + orderedIds.push(neighborId) + } + + recordPath(selectedPaths, pathSeen, { + from_id: currentId, + from: currentNode.label, + to_id: neighborId, + to: neighbor.label, + relation, + direction: 'forward', + }) + } + } +} + +export function sliceCandidatesForRetrieve( + graph: KnowledgeGraph, + scoredCandidates: readonly SliceScoredNode[], + intent: RetrievalIntent, +): { ordered_ids: string[]; metadata: ContextPackSliceMetadata } | null { + if (scoredCandidates.length === 0) { + return null + } + + const anchors = buildAnchors(scoredCandidates) + if (anchors.length === 0) { + return null + } + + const policy = policyForIntent(intent) + const anchorIds = anchors.map((anchor) => anchor.node_id).filter((id): id is string => typeof id === 'string') + const orderedIds = [...anchorIds] + const selectedIds = new Set(anchorIds) + const anchoredIds = new Set(anchorIds) + const scoredById = new Map(scoredCandidates.map((candidate) => [candidate.id, candidate])) + const selectedPaths: ContextPackSlicePath[] = [] + const pathSeen = new Set() + + if (policy.directions.includes('backward')) { + traverseDirection( + graph, + scoredById, + anchorIds, + selectedIds, + orderedIds, + pathSeen, + selectedPaths, + anchoredIds, + 'backward', + policy.backward_relations, + policy.backward_depth, + ) + } + + if (policy.directions.includes('forward')) { + traverseDirection( + graph, + scoredById, + anchorIds, + selectedIds, + orderedIds, + pathSeen, + selectedPaths, + anchoredIds, + 'forward', + policy.forward_relations, + policy.forward_depth, + ) + } + + addHelperNeighbors( + graph, + scoredById, + policy.helper_relations, + selectedIds, + orderedIds, + pathSeen, + selectedPaths, + anchoredIds, + ) + + return { + ordered_ids: orderedIds, + metadata: { + mode: policy.mode, + anchors, + directions: policy.directions, + selected_paths: selectedPaths, + }, + } +} diff --git a/src/runtime/stdio/definitions.ts b/src/runtime/stdio/definitions.ts index 99c788d..9f44115 100644 --- a/src/runtime/stdio/definitions.ts +++ b/src/runtime/stdio/definitions.ts @@ -208,7 +208,7 @@ export const MCP_TOOLS: McpToolDefinition[] = [ }, { name: 'retrieve', - description: 'Graph-relevant context for a natural-language question: matched nodes + snippets, relationships, community context, structural signals.', + description: 'Retrieve matched nodes, snippets, relationships, and community context for a question.', inputSchema: { type: 'object', required: ['question', 'budget'], @@ -223,13 +223,14 @@ export const MCP_TOOLS: McpToolDefinition[] = [ rerank_model: { type: 'string', description: 'Override reranker model or local path' }, verbose: { type: 'boolean', description: 'Return verbose payload (default: compact)' }, retrieval_level: { type: 'number', description: 'Override retrieval-gate level 0-5 (#75)' }, + retrieval_strategy: { type: 'string', enum: ['default', 'slice-v1'], description: 'Experimental retrieval strategy.' }, }, }, }, { name: 'context_pack', description: - 'Build a compact explain/review/impact context pack for a downstream agent. Use when you need expandable refs plus coverage and missing-context signals instead of the full graph payload. Pass delta_session_id to ship only new nodes.', + 'Build a compact explain/review/impact context pack with expandable refs and coverage signals.', inputSchema: { type: 'object', required: ['prompt'], @@ -239,10 +240,11 @@ export const MCP_TOOLS: McpToolDefinition[] = [ budget: { type: 'number', description: 'Optional: maximum token budget for the pack (default 3000)' }, delta_session_id: { type: 'string', description: 'Optional (#81): delta-pack session key for per-session dedup.' }, verbose: { type: 'boolean', description: 'Optional: include extended selection diagnostics.' }, + retrieval_strategy: { type: 'string', enum: ['default', 'slice-v1'], description: 'Experimental retrieval strategy.' }, resolution: { type: 'string', enum: ['detail', 'summary', 'mixed', 'signature', 'sketch'], - description: 'Optional (#76/#135): node resolution.', + description: 'Node resolution.', }, }, }, diff --git a/src/runtime/stdio/tools.ts b/src/runtime/stdio/tools.ts index 70fd747..7ae75ff 100644 --- a/src/runtime/stdio/tools.ts +++ b/src/runtime/stdio/tools.ts @@ -10,6 +10,7 @@ import type { ContextPackExpandableFollowUp, ContextPackExpandableRef, ContextPackNode, + ContextPackRetrievalStrategy, ContextPackRelationship, ContextRepresentationType, } from '../../contracts/context-pack.js' @@ -133,6 +134,20 @@ function isStoredContextPackHandle(value: unknown): value is StoredContextPackHa && Array.isArray(followUp.focus_ranges) } +function parseRetrievalStrategyParam( + helpers: Pick, + toolArguments: Record, +): ContextPackRetrievalStrategy | null | 'invalid' { + const raw = helpers.stringParamAlias(toolArguments, ['retrieval_strategy', 'retrievalStrategy']) + if (raw === null) { + return null + } + if (raw === 'default' || raw === 'slice-v1') { + return raw + } + return 'invalid' +} + function emptyCoverage(): ContextPackCoverage { return { required_evidence: [], @@ -770,6 +785,10 @@ export function handleToolCall(id: string | number | null, graphPath: string, pa if ((Object.hasOwn(toolArguments, 'retrieval_level') || Object.hasOwn(toolArguments, 'retrievalLevel')) && retrieveLevelOverride === null) { return helpers.failure(id, helpers.jsonrpcInvalidParams, 'retrieval_level must be an integer between 0 and 5') } + const retrieveStrategy = parseRetrievalStrategyParam(helpers, toolArguments) + if (retrieveStrategy === 'invalid') { + return helpers.failure(id, helpers.jsonrpcInvalidParams, 'retrieval_strategy must be one of default, slice-v1') + } const retrieveLevelTyped = retrieveLevelOverride === null ? null : (retrieveLevelOverride as 0 | 1 | 2 | 3 | 4 | 5) const retrieval = retrieveSemantic || retrieveRerank ? retrieveContextAsync(graph, { question, @@ -781,12 +800,14 @@ export function handleToolCall(id: string | number | null, graphPath: string, pa ...(retrieveRerank ? { rerank: true } : {}), ...(retrieveRerankModel ? { rerankerModel: retrieveRerankModel } : {}), ...(retrieveLevelTyped !== null ? { retrievalLevel: retrieveLevelTyped } : {}), + ...(retrieveStrategy ? { retrievalStrategy: retrieveStrategy } : {}), }) : Promise.resolve(retrieveContext(graph, { question, budget: retrieveBudget, ...(retrieveCommunity !== null ? { community: retrieveCommunity } : {}), ...(retrieveFileType ? { fileType: retrieveFileType } : {}), ...(retrieveLevelTyped !== null ? { retrievalLevel: retrieveLevelTyped } : {}), + ...(retrieveStrategy ? { retrievalStrategy: retrieveStrategy } : {}), })) const useVerboseRetrieve = toolArguments.verbose === true || toolArguments.compact === false return retrieval.then((result) => helpers.ok(id, helpers.textToolResult(JSON.stringify( @@ -864,12 +885,17 @@ export function handleToolCall(id: string | number | null, graphPath: string, pa if ((Object.hasOwn(toolArguments, 'retrieval_level') || Object.hasOwn(toolArguments, 'retrievalLevel')) && contextPackLevelOverride === null) { return helpers.failure(id, helpers.jsonrpcInvalidParams, 'retrieval_level must be an integer between 0 and 5') } + const contextPackStrategy = parseRetrievalStrategyParam(helpers, toolArguments) + if (contextPackStrategy === 'invalid') { + return helpers.failure(id, helpers.jsonrpcInvalidParams, 'retrieval_strategy must be one of default, slice-v1') + } const contextPackLevelTyped = contextPackLevelOverride === null ? null : (contextPackLevelOverride as 0 | 1 | 2 | 3 | 4 | 5) const retrieval = retrieveContext(graph, { question: prompt, budget: resolvedBudget, taskIntent: initialPlan.evidence.recipe_id, ...(contextPackLevelTyped !== null ? { retrievalLevel: contextPackLevelTyped } : {}), + ...(contextPackStrategy ? { retrievalStrategy: contextPackStrategy } : {}), }) if (task === 'impact') { diff --git a/tests/unit/benchmark-graph-stats.test.ts b/tests/unit/benchmark-graph-stats.test.ts new file mode 100644 index 0000000..455d76d --- /dev/null +++ b/tests/unit/benchmark-graph-stats.test.ts @@ -0,0 +1,44 @@ +import { execFileSync } from 'node:child_process' +import { mkdtempSync, rmSync, writeFileSync } from 'node:fs' +import { join } from 'node:path' +import { tmpdir } from 'node:os' + +import { describe, expect, it } from 'vitest' + +function withTempGraph(graph: unknown, run: (graphPath: string) => void): void { + const dir = mkdtempSync(join(tmpdir(), 'graphify-bench-stats-')) + const graphPath = join(dir, 'graph.json') + try { + writeFileSync(graphPath, JSON.stringify(graph, null, 2)) + run(graphPath) + } finally { + rmSync(dir, { recursive: true, force: true }) + } +} + +describe('benchmark graph stats helper', () => { + it('counts edges when graph.json has an edges array', () => { + withTempGraph({ + nodes: [{ id: 'a' }, { id: 'b' }], + edges: [{ source: 'a', target: 'b' }], + }, (graphPath) => { + const output = execFileSync('node', [ + 'docs/benchmarks/2026-05-11-spi-vs-legacy/graph-stats.mjs', + graphPath, + ], { cwd: process.cwd(), encoding: 'utf8' }) + expect(JSON.parse(output)).toEqual({ node_count: 2, edge_count: 1 }) + }) + }) + + it('falls back to zero edges when graph.json omits the edges array', () => { + withTempGraph({ + nodes: [{ id: 'a' }, { id: 'b' }], + }, (graphPath) => { + const output = execFileSync('node', [ + 'docs/benchmarks/2026-05-11-spi-vs-legacy/graph-stats.mjs', + graphPath, + ], { cwd: process.cwd(), encoding: 'utf8' }) + expect(JSON.parse(output)).toEqual({ node_count: 2, edge_count: 0 }) + }) + }) +}) diff --git a/tests/unit/benchmark-real-workspace.test.ts b/tests/unit/benchmark-real-workspace.test.ts new file mode 100644 index 0000000..6b327ba --- /dev/null +++ b/tests/unit/benchmark-real-workspace.test.ts @@ -0,0 +1,121 @@ +import { execFileSync } from 'node:child_process' +import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs' +import { join } from 'node:path' +import { tmpdir } from 'node:os' + +import { describe, expect, it } from 'vitest' + +function withTempDir(run: (dir: string) => void): void { + const dir = mkdtempSync(join(tmpdir(), 'graphify-real-bench-')) + try { + run(dir) + } finally { + rmSync(dir, { recursive: true, force: true }) + } +} + +describe('real-workspace benchmark support', () => { + it('summarizes backend-only and monorepo benchmark runs side by side', () => { + withTempDir((dir) => { + const backendDir = join(dir, 'backend') + const monorepoDir = join(dir, 'monorepo') + mkdirSync(backendDir, { recursive: true }) + mkdirSync(monorepoDir, { recursive: true }) + + const sampleSummary = { + variants: { + legacy: { build_time_ms: 500, graph_size_bytes: 1000, node_count: 10, edge_count: 12 }, + 'spi-cold': { build_time_ms: 650, graph_size_bytes: 800, node_count: 11, edge_count: 14 }, + 'spi-warm': { build_time_ms: 320, graph_size_bytes: 800, node_count: 11, edge_count: 14 }, + }, + analysis: { + 'spi-cold': { + prompts: [ + { + id: 'auth-flow', + strategies: { + evidence_order: { + token_count: 210, + node_count: 6, + framework_roles: ['nest_controller'], + representation_types: ['detail'], + quality_score: 0.91, + warnings: [], + }, + value_per_token: { + token_count: 180, + node_count: 5, + framework_roles: ['nest_controller'], + representation_types: ['sketch'], + quality_score: 0.94, + warnings: [], + }, + }, + retrieval_levels: [ + { level: 1, token_count: 70, node_count: 2 }, + { level: 4, token_count: 220, node_count: 7 }, + ], + }, + ], + }, + }, + comparison: { + build_time_delta_ms: 150, + graph_size_delta_bytes: -200, + }, + } + + writeFileSync(join(backendDir, 'summary.json'), JSON.stringify(sampleSummary, null, 2)) + writeFileSync(join(monorepoDir, 'summary.json'), JSON.stringify({ + ...sampleSummary, + variants: { + ...sampleSummary.variants, + legacy: { build_time_ms: 1000, graph_size_bytes: 5000, node_count: 50, edge_count: 70 }, + }, + }, null, 2)) + + const output = execFileSync('node', [ + 'docs/benchmarks/2026-05-11-spi-vs-legacy/summarize-real-workspaces.mjs', + dir, + ], { + cwd: process.cwd(), + encoding: 'utf8', + }) + + const summary = JSON.parse(output) + expect(summary.workspace_order).toEqual(['backend', 'monorepo']) + expect(summary.workspaces.backend.variants['spi-cold'].build_time_ms).toBe(650) + expect(summary.workspaces.monorepo.variants.legacy.graph_size_bytes).toBe(5000) + expect(summary.comparison.objective_metrics).toEqual( + expect.arrayContaining([ + expect.objectContaining({ workspace: 'backend', metric: 'build_time_ms' }), + expect.objectContaining({ workspace: 'monorepo', metric: 'graph_size_bytes' }), + ]), + ) + expect(summary.comparison.qualitative_notes).toEqual( + expect.arrayContaining([ + expect.stringContaining('No private paths or artifacts are committed'), + ]), + ) + }) + }) + + it('ships a real-workspace prompt example and report template with the privacy disclaimer', () => { + const prompts = JSON.parse(readFileSync( + join(process.cwd(), 'docs', 'benchmarks', '2026-05-11-spi-vs-legacy', 'prompts.real-workspace.example.json'), + 'utf8', + )) + const template = readFileSync( + join(process.cwd(), 'docs', 'benchmarks', '2026-05-11-spi-vs-legacy', 'REAL_WORKSPACE_REPORT_TEMPLATE.md'), + 'utf8', + ) + + expect(prompts.schema_version).toBe(1) + expect(prompts.prompts.map((prompt: { id: string }) => prompt.id)).toEqual( + expect.arrayContaining(['auth-flow', 'report-generation', 'review-current-diff']), + ) + expect(template).toContain('This benchmark can be run on private repos locally.') + expect(template).toContain('No private paths or artifacts are committed.') + expect(template).toContain('If GoValidate is unavailable, no GoValidate-specific numbers are claimed.') + }) +}) diff --git a/tests/unit/context-pack-resolution-sketch.test.ts b/tests/unit/context-pack-resolution-sketch.test.ts index 26b074b..8f873f1 100644 --- a/tests/unit/context-pack-resolution-sketch.test.ts +++ b/tests/unit/context-pack-resolution-sketch.test.ts @@ -152,4 +152,103 @@ describe('applyContextPackResolution sketch mode', () => { expect(sessionService?.representation_type).toBe('dependency_record') expect(sessionService?.snippet).toContain('calls: TokenService.sign') }) + + it('surfaces env/config reads in sketch mode when deterministic evidence exists', () => { + const result = applyContextPackResolution( + [ + node({ + node_id: 'auth_controller', + label: 'AuthController.callback', + framework_role: 'nest_controller', + snippet: [ + 'export class AuthController {', + ' callback() {', + ' return this.sessionService.create(process.env.AUTH_COOKIE_DOMAIN)', + ' }', + '}', + ].join('\n'), + }), + node({ node_id: 'session_service', label: 'SessionService.create', snippet: 'export function create() {}' }), + node({ node_id: 'auth_env', label: 'AUTH_COOKIE_DOMAIN', source_file: '/src/config/auth.ts', snippet: 'export const AUTH_COOKIE_DOMAIN = process.env.AUTH_COOKIE_DOMAIN' }), + ], + { + resolution: 'sketch', + relationships: [ + relationship('auth_controller', 'session_service', 'calls'), + relationship('auth_controller', 'auth_env', 'reads_env'), + relationship('auth_controller', 'auth_env', 'uses_config'), + ], + }, + ) + + const authController = result.nodes.find((entry) => entry.node_id === 'auth_controller') + + expect(authController?.representation_type).toBe('behavior_sketch') + expect(authController?.snippet).toContain('reads env: AUTH_COOKIE_DOMAIN') + expect(authController?.snippet).toContain('config: AUTH_COOKIE_DOMAIN') + }) + + it('surfaces deterministic side-effect hints for http, llm, and db writes', () => { + const result = applyContextPackResolution( + [ + node({ + node_id: 'report_service', + label: 'ReportGenerationService.generate', + framework_role: 'nest_provider', + snippet: [ + 'export class ReportGenerationService {', + ' async generate() {', + ' await fetch("https://example.com")', + ' await this.anthropic.messages.create({})', + ' return prisma.report.create({ data: {} })', + ' }', + '}', + ].join('\n'), + }), + node({ node_id: 'http_client', label: 'fetch', snippet: 'export async function fetch() {}' }), + node({ node_id: 'llm_client', label: 'Anthropic.messages.create', snippet: 'export async function create() {}' }), + node({ node_id: 'report_repo', label: 'prisma.report.create', snippet: 'export async function create() {}' }), + ], + { + resolution: 'sketch', + relationships: [ + relationship('report_service', 'http_client', 'calls'), + relationship('report_service', 'llm_client', 'calls'), + relationship('report_service', 'report_repo', 'calls'), + ], + }, + ) + + const reportService = result.nodes.find((entry) => entry.node_id === 'report_service') + + expect(reportService?.representation_type).toBe('behavior_sketch') + expect(reportService?.snippet).toContain('side effects: external_http, llm_call, db_write') + expect(reportService?.snippet).toContain('latency-sensitive: external_http, llm_call') + }) + + it('keeps framework route/procedure context in sketches', () => { + const result = applyContextPackResolution( + [ + node({ + node_id: 'cancel_order', + label: 'appRouter.cancelOrder()', + framework_role: 'trpc_procedure_mutation', + snippet: 'export const cancelOrder = protectedProcedure.mutation(() => prisma.order.update({}))', + }), + node({ node_id: 'order_repo', label: 'prisma.order.update', snippet: 'export async function update() {}' }), + ], + { + resolution: 'sketch', + relationships: [ + relationship('cancel_order', 'order_repo', 'calls'), + ], + }, + ) + + const cancelOrder = result.nodes.find((entry) => entry.node_id === 'cancel_order') + + expect(cancelOrder?.representation_type).toBe('behavior_sketch') + expect(cancelOrder?.snippet).toContain('framework: trpc_procedure_mutation') + expect(cancelOrder?.snippet).toContain('side effects: db_write') + }) }) diff --git a/tests/unit/retrieve-slice-surface.test.ts b/tests/unit/retrieve-slice-surface.test.ts new file mode 100644 index 0000000..e384c21 --- /dev/null +++ b/tests/unit/retrieve-slice-surface.test.ts @@ -0,0 +1,77 @@ +import { describe, expect, it, vi } from 'vitest' + +import { parsePackArgs } from '../../src/cli/parser.js' +import { KnowledgeGraph } from '../../src/contracts/graph.js' +import { runContextPackCommand, type ContextPackCommandDependencies } from '../../src/infrastructure/context-pack-command.js' + +describe('slice-v1 CLI surface', () => { + it('accepts --retrieval-strategy slice-v1 for pack', () => { + const options = parsePackArgs(['"Explain auth"', '--retrieval-strategy', 'slice-v1']) + expect((options as { retrievalStrategy?: string }).retrievalStrategy).toBe('slice-v1') + }) + + it('rejects unsupported retrieval strategies for pack', () => { + expect(() => parsePackArgs(['"Explain auth"', '--retrieval-strategy', 'invented'])).toThrow(/slice-v1/) + }) +}) + +describe('slice-v1 context-pack command surface', () => { + it('forwards retrievalStrategy to retrieveContext for explain packs', async () => { + const graph = new KnowledgeGraph() + const dependencies: ContextPackCommandDependencies = { + loadGraph: vi.fn().mockReturnValue(graph), + retrieveContext: vi.fn().mockReturnValue({ + question: 'Explain auth', + token_count: 10, + matched_nodes: [], + relationships: [], + community_context: [], + graph_signals: { god_nodes: [], bridge_nodes: [] }, + retrieval_strategy: 'slice-v1', + slice: { + mode: 'explain', + anchors: [{ label: 'AuthService', reason: 'symbol mention' }], + directions: ['backward', 'forward'], + selected_paths: [], + }, + }), + compactRetrieveResult: vi.fn().mockReturnValue({ + question: 'Explain auth', + token_count: 10, + matched_nodes: [], + relationships: [], + community_context: [], + graph_signals: { god_nodes: [], bridge_nodes: [] }, + retrieval_strategy: 'slice-v1', + slice: { + mode: 'explain', + anchors: [{ label: 'AuthService', reason: 'symbol mention' }], + directions: ['backward', 'forward'], + selected_paths: [], + }, + }), + analyzePrImpact: vi.fn(), + compactPrImpactResult: vi.fn(), + analyzeImpact: vi.fn(), + compactImpactResult: vi.fn(), + } + + const output = await runContextPackCommand({ + prompt: 'Explain auth', + budget: 1000, + task: 'explain', + graphPath: 'graphify-out/graph.json', + retrievalStrategy: 'slice-v1', + } as never, dependencies) + + expect(dependencies.retrieveContext).toHaveBeenCalledWith(graph, { + question: 'Explain auth', + budget: 1000, + taskIntent: 'explain', + retrievalStrategy: 'slice-v1', + }) + + const payload = JSON.parse(output) as { pack: { retrieval_strategy?: string } } + expect(payload.pack.retrieval_strategy).toBe('slice-v1') + }) +}) diff --git a/tests/unit/retrieve-slice-v1.test.ts b/tests/unit/retrieve-slice-v1.test.ts new file mode 100644 index 0000000..dde5db6 --- /dev/null +++ b/tests/unit/retrieve-slice-v1.test.ts @@ -0,0 +1,125 @@ +import { describe, expect, it } from 'vitest' + +import { build } from '../../src/pipeline/build.js' +import { retrieveContext } from '../../src/runtime/retrieve.js' + +function buildSliceGraph() { + return build( + [ + { + schema_version: 1, + nodes: [ + { id: 'auth_route', label: 'POST /login', file_type: 'code', source_file: '/src/auth/routes.ts', source_location: 'L10', node_kind: 'route', framework: 'express', framework_role: 'express_route', community: 0 }, + { id: 'auth_controller', label: 'AuthController.login', file_type: 'code', source_file: '/src/auth/controller.ts', source_location: 'L20', node_kind: 'method', framework: 'nestjs', framework_role: 'nest_controller', community: 0 }, + { id: 'auth_guard', label: 'AuthGuard', file_type: 'code', source_file: '/src/auth/guard.ts', source_location: 'L30', node_kind: 'class', community: 0 }, + { id: 'auth_service', label: 'AuthService.login', file_type: 'code', source_file: '/src/auth/service.ts', source_location: 'L40', node_kind: 'method', community: 0 }, + { id: 'login_validator', label: 'LoginValidator.validate', file_type: 'code', source_file: '/src/auth/login-validator.ts', source_location: 'L50', node_kind: 'method', community: 0 }, + { id: 'session_store', label: 'SessionStore.createSession', file_type: 'code', source_file: '/src/session/store.ts', source_location: 'L60', node_kind: 'method', community: 1 }, + { id: 'auth_env', label: 'AUTH_COOKIE_DOMAIN', file_type: 'code', source_file: '/src/config/auth.ts', source_location: 'L70', community: 2 }, + { id: 'auth_contract', label: 'LoginInput', file_type: 'code', source_file: '/src/contracts/auth.ts', source_location: 'L80', community: 0 }, + { id: 'auth_test', label: 'AuthService.login.spec', file_type: 'code', source_file: '/tests/auth.service.spec.ts', source_location: 'L90', node_kind: 'function', community: 3 }, + { id: 'billing_exporter', label: 'BillingExporter.syncSessions', file_type: 'code', source_file: '/src/billing/exporter.ts', source_location: 'L100', node_kind: 'method', community: 4 }, + { id: 'billing_metrics', label: 'BillingMetrics.flush', file_type: 'code', source_file: '/src/billing/metrics.ts', source_location: 'L105', node_kind: 'method', community: 4 }, + { id: 'api_client', label: 'ApiClient.syncBilling', file_type: 'code', source_file: '/src/api/client.ts', source_location: 'L110', node_kind: 'method', community: 4 }, + { id: 'shared_index', label: 'index.ts', file_type: 'code', source_file: '/src/shared/index.ts', source_location: 'L120', community: 5 }, + { id: 'shared_cookie', label: 'CookieService', file_type: 'code', source_file: '/src/shared/cookie.ts', source_location: 'L130', node_kind: 'class', community: 5 }, + ], + edges: [ + { source: 'auth_route', target: 'auth_controller', relation: 'controller_route', confidence: 'EXTRACTED', source_file: '/src/auth/routes.ts' }, + { source: 'auth_controller', target: 'auth_guard', relation: 'uses_guard', confidence: 'EXTRACTED', source_file: '/src/auth/controller.ts' }, + { source: 'auth_controller', target: 'auth_service', relation: 'calls', confidence: 'EXTRACTED', source_file: '/src/auth/controller.ts' }, + { source: 'auth_service', target: 'login_validator', relation: 'calls', confidence: 'EXTRACTED', source_file: '/src/auth/service.ts' }, + { source: 'auth_service', target: 'session_store', relation: 'calls', confidence: 'EXTRACTED', source_file: '/src/auth/service.ts' }, + { source: 'auth_service', target: 'auth_env', relation: 'reads_env', confidence: 'EXTRACTED', source_file: '/src/auth/service.ts' }, + { source: 'auth_service', target: 'auth_contract', relation: 'depends_on', confidence: 'EXTRACTED', source_file: '/src/auth/service.ts' }, + { source: 'auth_service', target: 'auth_test', relation: 'covered_by', confidence: 'EXTRACTED', source_file: '/src/auth/service.ts' }, + { source: 'billing_exporter', target: 'auth_service', relation: 'calls', confidence: 'EXTRACTED', source_file: '/src/billing/exporter.ts' }, + { source: 'billing_exporter', target: 'billing_metrics', relation: 'calls', confidence: 'EXTRACTED', source_file: '/src/billing/exporter.ts' }, + { source: 'api_client', target: 'billing_exporter', relation: 'calls', confidence: 'EXTRACTED', source_file: '/src/api/client.ts' }, + { source: 'auth_service', target: 'shared_index', relation: 'imports_from', confidence: 'EXTRACTED', source_file: '/src/auth/service.ts' }, + { source: 'shared_index', target: 'shared_cookie', relation: 'exports', confidence: 'EXTRACTED', source_file: '/src/shared/index.ts' }, + ], + }, + ], + { directed: true }, + ) +} + +function labelsFor(prompt: string, overrides: Record = {}): string[] { + return retrieveContext(buildSliceGraph(), { + question: prompt, + budget: 3000, + retrievalLevel: 4, + ...overrides, + } as never).matched_nodes.map((node) => node.label) +} + +describe('retrieveContext retrievalStrategy=slice-v1', () => { + it('keeps explain slices bounded around the anchored symbol instead of broad impact expansion', () => { + const defaultLabels = labelsFor('Explain `AuthService.login`') + const sliced = retrieveContext(buildSliceGraph(), { + question: 'Explain `AuthService.login`', + budget: 3000, + retrievalLevel: 4, + retrievalStrategy: 'slice-v1', + } as never) + const slicedLabels = sliced.matched_nodes.map((node) => node.label) + + expect(defaultLabels).toContain('ApiClient.syncBilling') + expect(slicedLabels).toContain('AuthService.login') + expect(slicedLabels).toContain('AuthController.login') + expect(slicedLabels).toContain('LoginValidator.validate') + expect(slicedLabels).toContain('AuthService.login.spec') + expect(slicedLabels).not.toContain('ApiClient.syncBilling') + expect(slicedLabels).not.toContain('index.ts') + expect((sliced as any).retrieval_strategy).toBe('slice-v1') + expect((sliced as any).slice.mode).toBe('explain') + expect((sliced as any).slice.anchors).toEqual( + expect.arrayContaining([ + expect.objectContaining({ label: 'AuthService.login', reason: 'symbol mention' }), + ]), + ) + }) + + it('captures backward and forward debug evidence without exploding through barrels', () => { + const sliced = retrieveContext(buildSliceGraph(), { + question: [ + 'Why does `AuthService.login` fail in production?', + ' at AuthService.login (/src/auth/service.ts:40:7)', + ].join('\n'), + budget: 3000, + retrievalStrategy: 'slice-v1', + } as never) + + const labels = sliced.matched_nodes.map((node) => node.label) + + expect(labels).toContain('AuthController.login') + expect(labels).toContain('AuthGuard') + expect(labels).toContain('AUTH_COOKIE_DOMAIN') + expect(labels).toContain('SessionStore.createSession') + expect(labels).toContain('LoginInput') + expect(labels).toContain('AuthService.login.spec') + expect(labels).not.toContain('BillingMetrics.flush') + expect(labels).not.toContain('index.ts') + expect((sliced as any).slice.mode).toBe('debug') + expect((sliced as any).slice.directions).toEqual(['backward', 'forward']) + }) + + it('uses an impact-oriented forward slice for breakage questions', () => { + const sliced = retrieveContext(buildSliceGraph(), { + question: 'What breaks if `AuthService.login` changes?', + budget: 3000, + retrievalStrategy: 'slice-v1', + } as never) + + const labels = sliced.matched_nodes.map((node) => node.label) + + expect(labels).toContain('AuthController.login') + expect(labels).toContain('POST /login') + expect(labels).toContain('BillingExporter.syncSessions') + expect(labels).toContain('ApiClient.syncBilling') + expect(labels).toContain('AuthService.login.spec') + expect(labels).not.toContain('index.ts') + expect((sliced as any).slice.mode).toBe('impact') + }) +}) diff --git a/tests/unit/stdio-slice-surface.test.ts b/tests/unit/stdio-slice-surface.test.ts new file mode 100644 index 0000000..e9fb195 --- /dev/null +++ b/tests/unit/stdio-slice-surface.test.ts @@ -0,0 +1,127 @@ +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs' +import { join } from 'node:path' +import { tmpdir } from 'node:os' + +import { afterEach, beforeEach, describe, expect, it } from 'vitest' + +import { handleStdioRequest } from '../../src/runtime/stdio-server.js' + +const tempRoots: string[] = [] +let previousToolProfile: string | undefined + +function createGraphPath(): string { + const root = mkdtempSync(join(tmpdir(), 'graphify-stdio-slice-')) + tempRoots.push(root) + const graphifyOut = join(root, 'graphify-out') + const graphPath = join(graphifyOut, 'graph.json') + mkdirSync(graphifyOut, { recursive: true }) + writeFileSync(join(root, 'auth.ts'), 'export function login() {}\n', 'utf8') + writeFileSync(join(root, 'auth.spec.ts'), 'test("login", () => {})\n', 'utf8') + writeFileSync(join(graphifyOut, 'GRAPH_REPORT.md'), '# Graph report\n', 'utf8') + writeFileSync(graphPath, JSON.stringify({ + root_path: root, + nodes: [ + { id: 'auth_service', label: 'AuthService.login', source_file: join(root, 'auth.ts'), source_location: 'L1', file_type: 'code', community: 0 }, + { id: 'auth_test', label: 'AuthService.login.spec', source_file: join(root, 'auth.spec.ts'), source_location: 'L2', file_type: 'code', community: 1 }, + ], + edges: [ + { source: 'auth_service', target: 'auth_test', relation: 'covered_by', confidence: 'EXTRACTED', source_file: join(root, 'auth.ts') }, + ], + hyperedges: [], + }), 'utf8') + return graphPath +} + +afterEach(() => { + while (tempRoots.length > 0) { + rmSync(tempRoots.pop()!, { recursive: true, force: true }) + } +}) + +beforeEach(() => { + previousToolProfile = process.env.GRAPHIFY_TOOL_PROFILE + process.env.GRAPHIFY_TOOL_PROFILE = 'full' +}) + +afterEach(() => { + if (previousToolProfile === undefined) { + delete process.env.GRAPHIFY_TOOL_PROFILE + } else { + process.env.GRAPHIFY_TOOL_PROFILE = previousToolProfile + } +}) + +describe('stdio slice-v1 surface', () => { + it('accepts retrieval_strategy=slice-v1 for retrieve and context_pack', async () => { + const graphPath = createGraphPath() + + const retrieveResponse = await Promise.resolve(handleStdioRequest(graphPath, { + id: 1, + method: 'tools/call', + params: { + name: 'retrieve', + arguments: { + question: 'Explain `AuthService.login`', + budget: 1000, + retrieval_strategy: 'slice-v1', + verbose: true, + }, + }, + })) + + const contextPackResponse = await Promise.resolve(handleStdioRequest(graphPath, { + id: 2, + method: 'tools/call', + params: { + name: 'context_pack', + arguments: { + prompt: 'Explain `AuthService.login`', + budget: 1000, + task: 'explain', + retrieval_strategy: 'slice-v1', + verbose: true, + }, + }, + })) + + const retrieveText = ((retrieveResponse as { result?: { content?: Array<{ text: string }> } }).result?.content ?? [])[0]?.text ?? '' + const contextPackText = ((contextPackResponse as { result?: { content?: Array<{ text: string }> } }).result?.content ?? [])[0]?.text ?? '' + + expect(retrieveText).toContain('"retrieval_strategy":"slice-v1"') + expect(contextPackText).toContain('"retrieval_strategy":"slice-v1"') + }) + + it('rejects unsupported retrieval_strategy values', async () => { + const graphPath = createGraphPath() + + const retrieveResponse = await Promise.resolve(handleStdioRequest(graphPath, { + id: 1, + method: 'tools/call', + params: { + name: 'retrieve', + arguments: { + question: 'Explain auth', + budget: 1000, + retrieval_strategy: 'invented', + }, + }, + })) + + const contextPackResponse = await Promise.resolve(handleStdioRequest(graphPath, { + id: 2, + method: 'tools/call', + params: { + name: 'context_pack', + arguments: { + prompt: 'Explain auth', + budget: 1000, + task: 'explain', + retrieval_strategy: 'invented', + }, + }, + })) + + expect(JSON.stringify(retrieveResponse)).toContain('retrieval_strategy must be one of default, slice-v1') + expect(JSON.stringify(contextPackResponse)).toContain('retrieval_strategy must be one of default, slice-v1') + }) +}) From debf9ba602fa04766528e5f184aca578442ee8d4 Mon Sep 17 00:00:00 2001 From: mohammed naji Date: Mon, 11 May 2026 23:13:46 +0400 Subject: [PATCH 2/2] Address PR review fixes Tighten benchmark script failures, keep slice-v1 constrained through async retrieval, reject unsupported review retrieval_strategy usage, and limit sketch side-effect inference to execution edges. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../2026-05-11-spi-vs-legacy/graph-stats.mjs | 10 ++- .../2026-05-11-spi-vs-legacy/probe.mjs | 17 +++-- .../run-real-workspace.sh | 14 +++- .../summarize-real-workspaces.mjs | 34 +++++++--- src/infrastructure/context-pack-command.ts | 3 + src/runtime/benchmark/probe-calibration.ts | 12 ++++ src/runtime/context-pack-resolution.ts | 3 +- src/runtime/retrieve.ts | 11 ++++ src/runtime/retrieve/slicing.ts | 27 +++++--- src/runtime/stdio/tools.ts | 12 ++-- tests/unit/benchmark-graph-stats.test.ts | 18 +++++ .../unit/benchmark-probe-calibration.test.ts | 19 ++++++ tests/unit/benchmark-real-workspace.test.ts | 66 +++++++++++++++++++ .../context-pack-resolution-sketch.test.ts | 30 +++++++++ tests/unit/retrieve-semantic.test.ts | 52 +++++++++++++++ tests/unit/retrieve-slice-surface.test.ts | 21 ++++++ tests/unit/retrieve-slice-v1.test.ts | 15 +++++ tests/unit/stdio-slice-surface.test.ts | 20 ++++++ 18 files changed, 353 insertions(+), 31 deletions(-) create mode 100644 src/runtime/benchmark/probe-calibration.ts create mode 100644 tests/unit/benchmark-probe-calibration.test.ts diff --git a/docs/benchmarks/2026-05-11-spi-vs-legacy/graph-stats.mjs b/docs/benchmarks/2026-05-11-spi-vs-legacy/graph-stats.mjs index 9515f7d..c977ae7 100644 --- a/docs/benchmarks/2026-05-11-spi-vs-legacy/graph-stats.mjs +++ b/docs/benchmarks/2026-05-11-spi-vs-legacy/graph-stats.mjs @@ -8,7 +8,15 @@ if (!graphPath) { process.exit(2) } -const graph = JSON.parse(readFileSync(graphPath, 'utf8')) +const graphJson = readFileSync(graphPath, 'utf8') +let graph +try { + graph = JSON.parse(graphJson) +} catch (error) { + const message = error instanceof Error ? error.message : String(error) + console.error(`failed to parse graph JSON at ${graphPath}: ${message}`) + process.exit(1) +} const nodeCount = Array.isArray(graph.nodes) ? graph.nodes.length : 0 const edgeCount = Array.isArray(graph.edges) ? graph.edges.length : 0 diff --git a/docs/benchmarks/2026-05-11-spi-vs-legacy/probe.mjs b/docs/benchmarks/2026-05-11-spi-vs-legacy/probe.mjs index c881212..319effb 100644 --- a/docs/benchmarks/2026-05-11-spi-vs-legacy/probe.mjs +++ b/docs/benchmarks/2026-05-11-spi-vs-legacy/probe.mjs @@ -6,6 +6,7 @@ import { basename, relative, resolve } from 'node:path' import { computeContextPackDiagnostics } from '../../../dist/src/runtime/context-pack-diagnostics.js' import { estimateContextPackEntryTokens } from '../../../dist/src/runtime/context-pack.js' import { applyContextPackResolution } from '../../../dist/src/runtime/context-pack-resolution.js' +import { classifyCalibrationBucket } from '../../../dist/src/runtime/benchmark/probe-calibration.js' import { contextPackFromRetrieveResult, retrieveContext } from '../../../dist/src/runtime/retrieve.js' import { loadGraph } from '../../../dist/src/runtime/serve.js' @@ -153,12 +154,16 @@ const calibration = promptAnalyses.reduce((summary, prompt) => { added_labels: labelDelta, } - if (tokenDelta < 0 && qualityDelta >= 0) { - summary.helps.push(note) - } else if (tokenDelta > 0 && qualityDelta <= 0) { - summary.hurts_or_expands.push(note) - } else { - summary.no_material_change.push(note) + switch (classifyCalibrationBucket({ tokenDelta, qualityDelta })) { + case 'helps': + summary.helps.push(note) + break + case 'hurts_or_expands': + summary.hurts_or_expands.push(note) + break + default: + summary.no_material_change.push(note) + break } return summary }, { diff --git a/docs/benchmarks/2026-05-11-spi-vs-legacy/run-real-workspace.sh b/docs/benchmarks/2026-05-11-spi-vs-legacy/run-real-workspace.sh index 14d0d1f..9b6c711 100644 --- a/docs/benchmarks/2026-05-11-spi-vs-legacy/run-real-workspace.sh +++ b/docs/benchmarks/2026-05-11-spi-vs-legacy/run-real-workspace.sh @@ -7,12 +7,22 @@ TS="$(date -u +%Y-%m-%dT%H%M%SZ)" BUNDLE_DIR="${GRAPHIFY_BENCH_REAL_RESULTS_DIR:-$HERE/results/real-workspaces/$TS}" PROMPTS_FILE="${GRAPHIFY_BENCH_REAL_PROMPTS:-$HERE/prompts.real-workspace.example.json}" +if [[ ! -f "$PROMPTS_FILE" ]]; then + echo "GRAPHIFY_BENCH_REAL_PROMPTS must point to an existing prompts JSON file: $PROMPTS_FILE" >&2 + exit 2 +fi + run_workspace() { local workspace_name="$1" local workspace_path="$2" + local workspace_var_name="$3" if [[ -z "$workspace_path" ]]; then return fi + if [[ ! -d "$workspace_path" ]]; then + echo "$workspace_var_name must point to an existing workspace directory: $workspace_path" >&2 + exit 2 + fi mkdir -p "$BUNDLE_DIR/$workspace_name" echo "[real-workspace] $workspace_name -> $workspace_path" @@ -28,8 +38,8 @@ if [[ -z "${GRAPHIFY_BENCH_BACKEND:-}" && -z "${GRAPHIFY_BENCH_MONOREPO:-}" ]]; fi mkdir -p "$BUNDLE_DIR" -run_workspace "backend" "${GRAPHIFY_BENCH_BACKEND:-}" -run_workspace "monorepo" "${GRAPHIFY_BENCH_MONOREPO:-}" +run_workspace "backend" "${GRAPHIFY_BENCH_BACKEND:-}" "GRAPHIFY_BENCH_BACKEND" +run_workspace "monorepo" "${GRAPHIFY_BENCH_MONOREPO:-}" "GRAPHIFY_BENCH_MONOREPO" node "$HERE/summarize-real-workspaces.mjs" "$BUNDLE_DIR" > "$BUNDLE_DIR/real-workspaces.summary.json" cat "$BUNDLE_DIR/real-workspaces.summary.json" diff --git a/docs/benchmarks/2026-05-11-spi-vs-legacy/summarize-real-workspaces.mjs b/docs/benchmarks/2026-05-11-spi-vs-legacy/summarize-real-workspaces.mjs index e76c6ce..819e97f 100644 --- a/docs/benchmarks/2026-05-11-spi-vs-legacy/summarize-real-workspaces.mjs +++ b/docs/benchmarks/2026-05-11-spi-vs-legacy/summarize-real-workspaces.mjs @@ -22,8 +22,18 @@ const workspaceNames = readdirSync(bundleDir, { withFileTypes: true }) return left.localeCompare(right) }) +function readWorkspaceSummary(name) { + const summaryPath = join(bundleDir, name, 'summary.json') + try { + return JSON.parse(readFileSync(summaryPath, 'utf8')) + } catch (error) { + const message = error instanceof Error ? error.message : String(error) + throw new Error(`failed to read ${name} summary.json at ${summaryPath}: ${message}`) + } +} + const workspaces = Object.fromEntries( - workspaceNames.map((name) => [name, JSON.parse(readFileSync(join(bundleDir, name, 'summary.json'), 'utf8'))]), + workspaceNames.map((name) => [name, readWorkspaceSummary(name)]), ) const objectiveMetrics = workspaceNames.flatMap((workspace) => { @@ -42,11 +52,17 @@ const qualitativeNotes = [ 'If GoValidate is unavailable, no GoValidate-specific numbers are claimed.', ] -console.log(JSON.stringify({ - workspace_order: workspaceNames, - workspaces, - comparison: { - objective_metrics: objectiveMetrics, - qualitative_notes: qualitativeNotes, - }, -}, null, 2)) +try { + console.log(JSON.stringify({ + workspace_order: workspaceNames, + workspaces, + comparison: { + objective_metrics: objectiveMetrics, + qualitative_notes: qualitativeNotes, + }, + }, null, 2)) +} catch (error) { + const message = error instanceof Error ? error.message : String(error) + console.error(message) + process.exit(1) +} diff --git a/src/infrastructure/context-pack-command.ts b/src/infrastructure/context-pack-command.ts index 5109999..895a142 100644 --- a/src/infrastructure/context-pack-command.ts +++ b/src/infrastructure/context-pack-command.ts @@ -188,6 +188,9 @@ export async function runContextPackCommand( }) if (options.task === 'review') { + if (options.retrievalStrategy !== undefined) { + throw new Error('retrievalStrategy is not supported for task=review') + } const reviewResult = dependencies.analyzePrImpact(graph, '.', { budget: plannerBudget, taskIntent: initialPlan.evidence.recipe_id, diff --git a/src/runtime/benchmark/probe-calibration.ts b/src/runtime/benchmark/probe-calibration.ts new file mode 100644 index 0000000..879fac8 --- /dev/null +++ b/src/runtime/benchmark/probe-calibration.ts @@ -0,0 +1,12 @@ +export function classifyCalibrationBucket(input: { + tokenDelta: number + qualityDelta: number +}): 'helps' | 'hurts_or_expands' | 'no_material_change' { + if (input.tokenDelta < 0 && input.qualityDelta >= 0) { + return 'helps' + } + if (input.tokenDelta > 0 || input.qualityDelta < 0) { + return 'hurts_or_expands' + } + return 'no_material_change' +} diff --git a/src/runtime/context-pack-resolution.ts b/src/runtime/context-pack-resolution.ts index ed651c4..779d7a2 100644 --- a/src/runtime/context-pack-resolution.ts +++ b/src/runtime/context-pack-resolution.ts @@ -377,12 +377,13 @@ function renderSketchRepresentation( relationIndex: RelationIndex, ): { type: 'behavior_sketch' | 'dependency_record'; reason: string; snippet: string } | null { const behaviorEdges = relationLabels(node, relationIndex, 'outgoing', ['calls', 'route_handler', 'controller_route', 'method', 'contains']) + const executionEdges = relationLabels(node, relationIndex, 'outgoing', ['calls']) const tests = relationLabels(node, relationIndex, 'outgoing', ['covered_by']) const config = relationLabels(node, relationIndex, 'outgoing', ['uses_config']) const readsEnv = relationLabels(node, relationIndex, 'outgoing', ['reads_env']) const outgoingDeps = relationLabels(node, relationIndex, 'outgoing', ['calls', 'injects', 'depends_on']) const incomingDeps = relationLabels(node, relationIndex, 'incoming', ['calls', 'injects', 'depends_on']) - const { sideEffects, latencySensitive } = sideEffectHints(behaviorEdges) + const { sideEffects, latencySensitive } = sideEffectHints(executionEdges) if ( tests.length > 0 diff --git a/src/runtime/retrieve.ts b/src/runtime/retrieve.ts index c05779a..2e07d88 100644 --- a/src/runtime/retrieve.ts +++ b/src/runtime/retrieve.ts @@ -1911,6 +1911,13 @@ export async function retrieveContextAsync(graph: KnowledgeGraph, options: Retri return nodeId ? [[nodeId, node.relevance_band] as const] : [] }), ) + const lexicalSliceIds = options.retrievalStrategy === 'slice-v1' + ? new Set( + lexicalResult.matched_nodes + .map((node) => matchedNodeId(node)) + .filter((nodeId): nodeId is string => nodeId !== null), + ) + : null const questionLower = options.question.toLowerCase() const candidatesById = new Map( @@ -1937,6 +1944,9 @@ export async function retrieveContextAsync(graph: KnowledgeGraph, options: Retri for (const [candidateId] of [...semanticScores.entries()] .sort((left, right) => right[1] - left[1]) .slice(0, 8)) { + if (lexicalSliceIds !== null && !lexicalSliceIds.has(candidateId)) { + continue + } candidateIds.add(candidateId) } } @@ -1995,6 +2005,7 @@ export async function retrieveContextAsync(graph: KnowledgeGraph, options: Retri ...(options.retrievalLevel !== undefined ? { manualOverride: options.retrievalLevel } : {}), }), rootPath, + lexicalResult.slice, ) } diff --git a/src/runtime/retrieve/slicing.ts b/src/runtime/retrieve/slicing.ts index aa88165..f3ac753 100644 --- a/src/runtime/retrieve/slicing.ts +++ b/src/runtime/retrieve/slicing.ts @@ -15,6 +15,18 @@ export interface SliceScoredNode { score: number } +function sliceNodeFromGraph(graph: KnowledgeGraph, nodeId: string): SliceScoredNode { + const attributes = graph.nodeAttributes(nodeId) + return { + id: nodeId, + label: String(attributes.label ?? nodeId), + sourceFile: String(attributes.source_file ?? ''), + exactLabelMatch: false, + sourcePathMatch: false, + score: 0.25, + } +} + type SliceMode = ContextPackSliceMetadata['mode'] interface SlicePolicy { @@ -150,7 +162,7 @@ function recordPath( function traverseDirection( graph: KnowledgeGraph, - scoredById: ReadonlyMap, + scoredById: Map, anchorIds: readonly string[], selectedIds: Set, orderedIds: string[], @@ -179,10 +191,8 @@ function traverseDirection( continue } - const neighbor = scoredById.get(neighborId) - if (!neighbor) { - continue - } + const neighbor = scoredById.get(neighborId) ?? sliceNodeFromGraph(graph, neighborId) + scoredById.set(neighborId, neighbor) if (shouldSuppressNode(graph, neighbor, anchoredIds)) { continue } @@ -212,7 +222,7 @@ function traverseDirection( function addHelperNeighbors( graph: KnowledgeGraph, - scoredById: ReadonlyMap, + scoredById: Map, helperRelations: ReadonlySet, selectedIds: Set, orderedIds: string[], @@ -232,8 +242,9 @@ function addHelperNeighbors( continue } - const neighbor = scoredById.get(neighborId) - if (!neighbor || shouldSuppressNode(graph, neighbor, anchoredIds)) { + const neighbor = scoredById.get(neighborId) ?? sliceNodeFromGraph(graph, neighborId) + scoredById.set(neighborId, neighbor) + if (shouldSuppressNode(graph, neighbor, anchoredIds)) { continue } diff --git a/src/runtime/stdio/tools.ts b/src/runtime/stdio/tools.ts index 7ae75ff..661d671 100644 --- a/src/runtime/stdio/tools.ts +++ b/src/runtime/stdio/tools.ts @@ -856,6 +856,14 @@ export function handleToolCall(id: string | number | null, graphPath: string, pa ) } + const contextPackStrategy = parseRetrievalStrategyParam(helpers, toolArguments) + if (contextPackStrategy === 'invalid') { + return helpers.failure(id, helpers.jsonrpcInvalidParams, 'retrieval_strategy must be one of default, slice-v1') + } + if (task === 'review' && contextPackStrategy) { + return helpers.failure(id, helpers.jsonrpcInvalidParams, 'retrieval_strategy is not supported for task=review') + } + if (task === 'review') { const graphDir = dirname(validateGraphPath(graphPath)) const projectRoot = dirname(graphDir) @@ -885,10 +893,6 @@ export function handleToolCall(id: string | number | null, graphPath: string, pa if ((Object.hasOwn(toolArguments, 'retrieval_level') || Object.hasOwn(toolArguments, 'retrievalLevel')) && contextPackLevelOverride === null) { return helpers.failure(id, helpers.jsonrpcInvalidParams, 'retrieval_level must be an integer between 0 and 5') } - const contextPackStrategy = parseRetrievalStrategyParam(helpers, toolArguments) - if (contextPackStrategy === 'invalid') { - return helpers.failure(id, helpers.jsonrpcInvalidParams, 'retrieval_strategy must be one of default, slice-v1') - } const contextPackLevelTyped = contextPackLevelOverride === null ? null : (contextPackLevelOverride as 0 | 1 | 2 | 3 | 4 | 5) const retrieval = retrieveContext(graph, { question: prompt, diff --git a/tests/unit/benchmark-graph-stats.test.ts b/tests/unit/benchmark-graph-stats.test.ts index 455d76d..ffc7223 100644 --- a/tests/unit/benchmark-graph-stats.test.ts +++ b/tests/unit/benchmark-graph-stats.test.ts @@ -41,4 +41,22 @@ describe('benchmark graph stats helper', () => { expect(JSON.parse(output)).toEqual({ node_count: 2, edge_count: 0 }) }) }) + + it('prints a clear error when graph.json is malformed', () => { + const dir = mkdtempSync(join(tmpdir(), 'graphify-bench-stats-bad-')) + const graphPath = join(dir, 'graph.json') + writeFileSync(graphPath, '{"nodes":[', 'utf8') + try { + expect(() => execFileSync('node', [ + 'docs/benchmarks/2026-05-11-spi-vs-legacy/graph-stats.mjs', + graphPath, + ], { cwd: process.cwd(), encoding: 'utf8', stdio: 'pipe' })).toThrowError( + expect.objectContaining({ + stderr: expect.stringContaining(graphPath), + }), + ) + } finally { + rmSync(dir, { recursive: true, force: true }) + } + }) }) diff --git a/tests/unit/benchmark-probe-calibration.test.ts b/tests/unit/benchmark-probe-calibration.test.ts new file mode 100644 index 0000000..9de060a --- /dev/null +++ b/tests/unit/benchmark-probe-calibration.test.ts @@ -0,0 +1,19 @@ +import { describe, expect, it } from 'vitest' + +import { classifyCalibrationBucket } from '../../src/runtime/benchmark/probe-calibration.js' + +describe('benchmark probe calibration buckets', () => { + it('treats token expansion as hurts_or_expands even when quality improves', () => { + expect(classifyCalibrationBucket({ + tokenDelta: 25, + qualityDelta: 0.2, + })).toBe('hurts_or_expands') + }) + + it('treats quality regressions as hurts_or_expands even when tokens drop', () => { + expect(classifyCalibrationBucket({ + tokenDelta: -15, + qualityDelta: -0.1, + })).toBe('hurts_or_expands') + }) +}) diff --git a/tests/unit/benchmark-real-workspace.test.ts b/tests/unit/benchmark-real-workspace.test.ts index 6b327ba..c2d7d78 100644 --- a/tests/unit/benchmark-real-workspace.test.ts +++ b/tests/unit/benchmark-real-workspace.test.ts @@ -118,4 +118,70 @@ describe('real-workspace benchmark support', () => { expect(template).toContain('No private paths or artifacts are committed.') expect(template).toContain('If GoValidate is unavailable, no GoValidate-specific numbers are claimed.') }) + + it('fails fast when the real-workspace prompts file is missing', () => { + withTempDir((dir) => { + expect(() => execFileSync('bash', [ + 'docs/benchmarks/2026-05-11-spi-vs-legacy/run-real-workspace.sh', + ], { + cwd: process.cwd(), + encoding: 'utf8', + stdio: 'pipe', + env: { + ...process.env, + GRAPHIFY_BENCH_BACKEND: process.cwd(), + GRAPHIFY_BENCH_REAL_PROMPTS: join(dir, 'missing-prompts.json'), + }, + })).toThrowError(expect.objectContaining({ + stderr: expect.stringContaining('GRAPHIFY_BENCH_REAL_PROMPTS'), + })) + }) + }) + + it('fails fast when a configured workspace path is missing', () => { + withTempDir((dir) => { + const promptsPath = join(dir, 'prompts.json') + writeFileSync(promptsPath, JSON.stringify({ + schema_version: 1, + prompts: [], + }, null, 2)) + + expect(() => execFileSync('bash', [ + 'docs/benchmarks/2026-05-11-spi-vs-legacy/run-real-workspace.sh', + ], { + cwd: process.cwd(), + encoding: 'utf8', + stdio: 'pipe', + env: { + ...process.env, + GRAPHIFY_BENCH_BACKEND: join(dir, 'missing-backend'), + GRAPHIFY_BENCH_REAL_PROMPTS: promptsPath, + }, + })).toThrowError(expect.objectContaining({ + stderr: expect.stringContaining('GRAPHIFY_BENCH_BACKEND'), + })) + }) + }) + + it('prints which workspace summary failed to parse', () => { + withTempDir((dir) => { + const backendDir = join(dir, 'backend') + const monorepoDir = join(dir, 'monorepo') + mkdirSync(backendDir, { recursive: true }) + mkdirSync(monorepoDir, { recursive: true }) + writeFileSync(join(backendDir, 'summary.json'), JSON.stringify({ variants: {} }, null, 2)) + writeFileSync(join(monorepoDir, 'summary.json'), '{"variants":', 'utf8') + + expect(() => execFileSync('node', [ + 'docs/benchmarks/2026-05-11-spi-vs-legacy/summarize-real-workspaces.mjs', + dir, + ], { + cwd: process.cwd(), + encoding: 'utf8', + stdio: 'pipe', + })).toThrowError(expect.objectContaining({ + stderr: expect.stringContaining('monorepo'), + })) + }) + }) }) diff --git a/tests/unit/context-pack-resolution-sketch.test.ts b/tests/unit/context-pack-resolution-sketch.test.ts index 8f873f1..2d56fb6 100644 --- a/tests/unit/context-pack-resolution-sketch.test.ts +++ b/tests/unit/context-pack-resolution-sketch.test.ts @@ -251,4 +251,34 @@ describe('applyContextPackResolution sketch mode', () => { expect(cancelOrder?.snippet).toContain('framework: trpc_procedure_mutation') expect(cancelOrder?.snippet).toContain('side effects: db_write') }) + + it('does not infer side effects from structural contains edges alone', () => { + const result = applyContextPackResolution( + [ + node({ + node_id: 'worker_module', + label: 'WorkerModule', + framework_role: 'nest_module', + snippet: 'export class WorkerModule {}', + }), + node({ + node_id: 'queue_publisher', + label: 'QueueClient.publish', + snippet: 'export async function publish() {}', + }), + ], + { + resolution: 'sketch', + relationships: [ + relationship('worker_module', 'queue_publisher', 'contains'), + ], + }, + ) + + const workerModule = result.nodes.find((entry) => entry.node_id === 'worker_module') + + expect(workerModule?.representation_type).toBe('behavior_sketch') + expect(workerModule?.snippet).not.toContain('side effects:') + expect(workerModule?.snippet).toContain('framework: nest_module') + }) }) diff --git a/tests/unit/retrieve-semantic.test.ts b/tests/unit/retrieve-semantic.test.ts index 7b84e8f..52e7f54 100644 --- a/tests/unit/retrieve-semantic.test.ts +++ b/tests/unit/retrieve-semantic.test.ts @@ -79,4 +79,56 @@ describe('retrieve semantic path', () => { expect(result.matched_nodes[0]?.label).toBe('ArchiveStore') }) + + it('keeps semantic slice-v1 retrieval inside the lexical slice and preserves slice metadata', async () => { + vi.resetModules() + vi.doMock('../../src/runtime/semantic.js', () => ({ + rankCandidatesBySemanticSimilarity: vi.fn(async () => new Map([ + ['unrelated_worker', 0.99], + ['session_store', 0.55], + ])), + rerankCandidatesWithCrossEncoder: vi.fn(async () => new Map()), + DEFAULT_SEMANTIC_MODEL: 'mock-semantic-model', + DEFAULT_RERANK_MODEL: 'mock-rerank-model', + })) + + const { retrieveContextAsync } = await import('../../src/runtime/retrieve.js') + const graph = new KnowledgeGraph() + graph.addNode('auth_service', { + label: 'AuthService.login', + file_type: 'code', + source_file: '/src/auth-service.ts', + source_location: 'L2-L6', + snippet: 'export class AuthService { async login() { return this.sessionStore.create() } }', + }) + graph.addNode('session_store', { + label: 'SessionStore.create', + file_type: 'code', + source_file: '/src/session-store.ts', + source_location: 'L1-L3', + snippet: 'export function create() {}', + }) + graph.addNode('unrelated_worker', { + label: 'UnrelatedWorker.rebuild', + file_type: 'code', + source_file: '/src/unrelated-worker.ts', + source_location: 'L1-L3', + snippet: 'export function rebuild() {}', + }) + graph.addEdge('auth_service', 'session_store', { relation: 'calls' }) + + const result = await retrieveContextAsync(graph, { + question: 'Explain `AuthService.login`', + budget: 3000, + semantic: true, + retrievalStrategy: 'slice-v1', + }) + + const labels = result.matched_nodes.map((node) => node.label) + expect(labels).toContain('AuthService.login') + expect(labels).toContain('SessionStore.create') + expect(labels).not.toContain('UnrelatedWorker.rebuild') + expect((result as { retrieval_strategy?: string }).retrieval_strategy).toBe('slice-v1') + expect((result as { slice?: { mode?: string } }).slice?.mode).toBe('explain') + }) }) diff --git a/tests/unit/retrieve-slice-surface.test.ts b/tests/unit/retrieve-slice-surface.test.ts index e384c21..5ac662c 100644 --- a/tests/unit/retrieve-slice-surface.test.ts +++ b/tests/unit/retrieve-slice-surface.test.ts @@ -74,4 +74,25 @@ describe('slice-v1 context-pack command surface', () => { const payload = JSON.parse(output) as { pack: { retrieval_strategy?: string } } expect(payload.pack.retrieval_strategy).toBe('slice-v1') }) + + it('rejects retrievalStrategy for review packs instead of silently ignoring it', async () => { + const graph = new KnowledgeGraph() + const dependencies: ContextPackCommandDependencies = { + loadGraph: vi.fn().mockReturnValue(graph), + retrieveContext: vi.fn(), + compactRetrieveResult: vi.fn(), + analyzePrImpact: vi.fn(), + compactPrImpactResult: vi.fn(), + analyzeImpact: vi.fn(), + compactImpactResult: vi.fn(), + } + + await expect(runContextPackCommand({ + prompt: 'Review current diff', + budget: 1000, + task: 'review', + graphPath: 'graphify-out/graph.json', + retrievalStrategy: 'slice-v1', + } as never, dependencies)).rejects.toThrow(/retrievalStrategy/i) + }) }) diff --git a/tests/unit/retrieve-slice-v1.test.ts b/tests/unit/retrieve-slice-v1.test.ts index dde5db6..f1ee73d 100644 --- a/tests/unit/retrieve-slice-v1.test.ts +++ b/tests/unit/retrieve-slice-v1.test.ts @@ -105,6 +105,21 @@ describe('retrieveContext retrievalStrategy=slice-v1', () => { expect((sliced as any).slice.directions).toEqual(['backward', 'forward']) }) + it('can pull direct graph neighbors into a level-1 slice even when they do not lexically match', () => { + const sliced = retrieveContext(buildSliceGraph(), { + question: 'Explain `AuthService.login`', + budget: 3000, + retrievalLevel: 1, + retrievalStrategy: 'slice-v1', + } as never) + + const labels = sliced.matched_nodes.map((node) => node.label) + + expect(labels).toContain('AuthService.login') + expect(labels).toContain('SessionStore.createSession') + expect(labels).not.toContain('index.ts') + }) + it('uses an impact-oriented forward slice for breakage questions', () => { const sliced = retrieveContext(buildSliceGraph(), { question: 'What breaks if `AuthService.login` changes?', diff --git a/tests/unit/stdio-slice-surface.test.ts b/tests/unit/stdio-slice-surface.test.ts index e9fb195..2f6dbd5 100644 --- a/tests/unit/stdio-slice-surface.test.ts +++ b/tests/unit/stdio-slice-surface.test.ts @@ -124,4 +124,24 @@ describe('stdio slice-v1 surface', () => { expect(JSON.stringify(retrieveResponse)).toContain('retrieval_strategy must be one of default, slice-v1') expect(JSON.stringify(contextPackResponse)).toContain('retrieval_strategy must be one of default, slice-v1') }) + + it('rejects retrieval_strategy for review context packs instead of ignoring it', async () => { + const graphPath = createGraphPath() + + const contextPackResponse = await Promise.resolve(handleStdioRequest(graphPath, { + id: 3, + method: 'tools/call', + params: { + name: 'context_pack', + arguments: { + prompt: 'Review current diff', + budget: 1000, + task: 'review', + retrieval_strategy: 'slice-v1', + }, + }, + })) + + expect(JSON.stringify(contextPackResponse)).toContain('retrieval_strategy is not supported for task=review') + }) })