diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 208e6629..b3cc1527 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,7 +47,7 @@ jobs: cache: true - name: Install scip-go - run: go install github.com/sourcegraph/scip-go/cmd/scip-go@latest + run: go install github.com/scip-code/scip-go/cmd/scip-go@latest - name: Run tests run: go test -v -race ./... diff --git a/.github/workflows/ckb.yml b/.github/workflows/ckb.yml index 1cbc78e8..8e60de36 100644 --- a/.github/workflows/ckb.yml +++ b/.github/workflows/ckb.yml @@ -88,7 +88,7 @@ jobs: run: go build -ldflags="-s -w" -o ckb ./cmd/ckb - name: Install SCIP indexer - run: go install github.com/sourcegraph/scip-go/cmd/scip-go@latest + run: go install github.com/scip-code/scip-go/cmd/scip-go@latest # ─────────────────────────────────────────────────────────────────────── # Cache & Index @@ -114,7 +114,7 @@ jobs: echo "╔═══════════════════════════════════════════════════════════════════════════════╗" echo "║ INDEXER NOT FOUND ║" echo "╠═══════════════════════════════════════════════════════════════════════════════╣" - echo "║ Go: go install github.com/sourcegraph/scip-go/cmd/scip-go@latest ║" + echo "║ Go: go install github.com/scip-code/scip-go/cmd/scip-go@latest ║" echo "║ TypeScript: npm i -g @sourcegraph/scip-typescript ║" echo "║ Python: pip install scip-python ║" echo "║ Rust: cargo install scip ║" @@ -989,7 +989,7 @@ jobs: run: go build -ldflags="-s -w" -o ckb ./cmd/ckb - name: Install SCIP indexer - run: go install github.com/sourcegraph/scip-go/cmd/scip-go@latest + run: go install github.com/scip-code/scip-go/cmd/scip-go@latest - name: Cache uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5 diff --git a/.github/workflows/cov.yml b/.github/workflows/cov.yml index 17f80b4a..4a818800 100644 --- a/.github/workflows/cov.yml +++ b/.github/workflows/cov.yml @@ -31,7 +31,7 @@ jobs: cache: true - name: Install scip-go - run: go install github.com/sourcegraph/scip-go/cmd/scip-go@latest + run: go install github.com/scip-code/scip-go/cmd/scip-go@latest - name: Run tests with coverage run: | diff --git a/.gitignore b/.gitignore index 3aaefdee..a575087a 100644 --- a/.gitignore +++ b/.gitignore @@ -49,3 +49,6 @@ testdata/**/pubspec.lock # Vendored Cartographer Rust build artifacts third_party/cartographer/mapper-core/cartographer/target/ + +# MCP runtime caches (pinned to commit hash, regenerated on startup) +internal/mcp/.cartographer_cache.json diff --git a/CHANGELOG.md b/CHANGELOG.md index cd66ac24..ae4f9b8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,132 @@ All notable changes to CKB will be documented in this file. ## [Unreleased] +## [9.2.0] - 2026-04-25 + +### Added + +- **`analyzeOutgoingImpact` — forward call graph** (MCP + CLI) — mirror + of `analyzeImpact` answering *"what does this symbol call?"* instead of + *"who calls it?"*. New `Engine.AnalyzeOutgoingImpact` drives off LIP + v2.3.5's `query_outgoing_impact` RPC, folds the result through the same + `ImpactItem` pipeline as the incoming side (with `direct-callee` / + `transitive-callee` kinds), and surfaces semantically coupled callees + alongside the static graph. Degrades cleanly when LIP isn't running: + the response is empty with a provenance warning, never an error. + Surfaces include `ckb impact outgoing ` (with `--min-score` + for the semantic threshold), the `analyzeOutgoingImpact` MCP tool, and + a new `ProvenanceCLI.Warnings` field so LIP-degradation messages reach + JSON consumers. +- **`symbolExists` MCP tool** — exact-match boolean oracle that returns + `{exists, kind, location?}` for a fully-qualified symbol ID. Built for + LLMs to ground references *before* they cite them in code, without + spending tokens on a 20-result `searchSymbols` payload. Cheaper than + `getSymbol` for the "does this thing actually exist" check. +- **LIP enrichment folds into `analyzeImpact`** — tier-1 tree-sitter + callers that LIP discovers (when `scip-go` emits no `Call` roles, e.g. + Go method dispatch) are now folded into the same `directImpact` / + `transitiveImpact` lists as SCIP's own results, deduplicated by + `(file, name)`. Driven by a new `BlastRadiusEnricher` interface so the + fold path is the single source of truth for both incoming and outgoing + impact analysis. Items LIP marks `edges_source=empty` are skipped (LIP + signalling no static evidence); `tier1`, `scip_with_tier1_edges`, and + `scip_only` all fold the same way. Risk score now picks up + semantic-coupling signals via the same enricher pipeline. +- **`register_project_root` on LIP handshake** — Engine startup now + registers the repo root with the daemon so LIP canonicalises file URIs + against a known anchor, matching the v2.3.1 contract. Eliminates the + URI-shape drift that previously caused tier-1 callers to dedup + incorrectly against SCIP results. + +### Changed + +- **`analyzeImpact` risk score now weighted by bridge centrality** — + `calculateAggregatedRisk` multiplies the weighted-mean score by + `1 + max(BridgeScore)/1000` (capped at 2.0) over the changed files, so a + change landing on a critical architectural path (high betweenness) is + reported as riskier than the same-shape change in a leaf module. Implements + the behaviour that `CARTOGRAPHER_STRATEGY.md` had already documented but + the code was not actually doing. Bridge lookups match by both `Path` and + `ModuleID`; if no changed file matches the graph, the multiplier is 1.0 + and no informational factor is appended. Only runs when the binary was + built with `-tags cartographer` (graph is a no-op otherwise). A new + `bridge_centrality` informational factor surfaces in `RiskScore.Factors` + when the multiplier fires; its `Weight` is 0 because it applies + multiplicatively, not as a weighted-mean input. + +### Cartographer + +- **Vendored Cartographer fully synced to upstream 3.0.0** — the + vendored tree under `third_party/cartographer/mapper-core/cartographer/` + was 391 lines behind on `diagram.rs` alone, and 10 `.rs` files plus + `Cargo.toml` had drifted. Full sync brings in doc-node graph support + (`cartographer_doc_index`, `cartographer_doc_context`, `cartographer_query_docs` + FFI entry points — Go bindings can be added as a follow-up), + LIP-style `Range` / `at_range` on `GraphEdge`, PascalCase bare-identifier + resolution for doc backtick refs, and the overlays feature on diagrams. + New `scripts/sync-cartographer.sh` is now the supported path for future + syncs — rsync-based, explicit path list, emits next-step commands. No + local patches needed against upstream. +- **Diagram overlays in `renderArchitecture` / `ckb diagram`** — the + vendored `diagram.rs` was synced from upstream Cartographer, so the + Mermaid/DOT output now decorates the base import graph with + architectural signals: cycle members get a thick red border (pivots + dashed), cycle-internal edges a heavy red arrow, layer violations pick + up per-type dashed/dotted edge styling, and hot nodes + (`hotspot_score ≥ 70`) get an orange border plus DOT size scaling. + Mermaid is border-only for hot nodes (no sizing primitive). Cycle red + takes precedence over hot orange on the same node — architectural + signal wins over performance signal. +- **`renderArchitecture` MCP tool** — returns the project's module-level + import graph as Mermaid or Graphviz (DOT), ready to paste into IDEs + that render Mermaid inline (Cursor, Claude Desktop, VS Code markdown + preview, GitHub). With `focus` set, returns an undirected BFS + neighborhood around the anchor module to `depth` (default 2); without, + returns the top-N most-connected nodes (default cap 40). Response + includes `truncated: true` when the node cap kicked in. Backed by the + new `cartographer_render_architecture` FFI export; CLI and MCP outputs + are produced by the same shared renderer. +- Go binding `cartographer.RenderArchitecture()` in `internal/cartographer/bridge.go` (+ no-op stub for the no-tag build). + +### Fixed + +- **Vendored Cartographer `rebuild_graph` deadlock** — upstream + `ApiState::rebuild_graph` held the `mapped_files` Mutex across its + loop and then called `resolve_import_target`, which re-acquired the + same non-reentrant `std::sync::Mutex`. Any project with a resolvable + import deadlocked — the `cartographer diagram` / `cartographer health` + CLIs hung, and the Go bridge's `cartographer.MapProject` would block + any time CKB fed it a repo with imports. Fixed in the vendored tree + (and contributed back upstream) by splitting the resolver: a public + method that locks, and a private helper that takes the already-held + map; `rebuild_graph` now calls the helper. Discovered during + end-to-end smoke testing against CKB itself (1093 files). Regression + test added upstream. +- **`localize-tree-sitter-symbols.sh` dropped grammar C parsers** — the + script extracted archive members via `ar x`, which silently clobbers + files when multiple members share a name. Cargo emits a `parser.o` + and `scanner.o` per grammar crate (tree-sitter-c, -cpp, -rust, -go, + etc.), so `ar x` left only the *last* grammar's C parser on disk, + producing a localized archive missing `_tree_sitter_c` / `_tree_sitter_cpp`. + The script now feeds the archive directly to `ld -r` with + `-force_load` (Mach-O) / `--whole-archive` (ELF), which pulls every + member in without touching the filesystem. The `rust_tree_sitter` C + ABI refs to `_tree_sitter_c` and `_tree_sitter_cpp` now resolve + inside the combined object as expected. +- **Tree-sitter symbol collisions at link time** — `libcartographer.a` + previously exported its bundled tree-sitter runtime and grammar + symbols, which collided with `go-tree-sitter` when building CKB with + `-tags cartographer` (`ld: 246 duplicate symbols`). `make build-cartographer` + now post-processes the archive via + `scripts/localize-tree-sitter-symbols.sh` (vendored under + `third_party/cartographer/mapper-core/cartographer/scripts/`), which + partial-links archive members into one combined object and localizes + `ts_*` / `tree_sitter_*`. `cartographer_*` FFI exports stay global. + Beyond the duplicate-symbol error, this also rules out a silent + memory-corruption class of bug where Cartographer's Rust code could + have bound to the consumer's tree-sitter copy at global resolution + time if the two versions' struct layouts ever drifted. + ## [9.1.0] - 2026-04-16 ### Added diff --git a/CLAUDE.md b/CLAUDE.md index 7ac11773..1e0c8d7c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -78,6 +78,22 @@ golangci-lint run ./ckb setup --tool=vscode ``` +## Release Process + +Releases are fully automated via `.github/workflows/release.yml`, triggered by pushing a `v*` tag. + +**Steps to release:** +1. Bump version in `internal/version/version.go`, `npm/package.json`, `testdata/review/sarif.json` +2. Update `CHANGELOG.md` +3. Merge to main, tag `vX.Y.Z`, push the tag +4. The pipeline handles everything else: + - Runs `go test -race ./...` + - GoReleaser builds cross-platform binaries and uploads to GitHub Releases + - Updates Homebrew tap (`SimplyLiz/homebrew-ckb`) + - Publishes `@tastehub/ckb` + 5 platform packages to npm + +**Do not manually `npm publish`** — the pipeline does it with checksummed binaries from GoReleaser. + ## npm Distribution (v7.0) CKB is also available via npm: diff --git a/Makefile b/Makefile index b6144073..84f7fbe2 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,8 @@ build: build-cartographer build-cartographer: @echo "Building Cartographer static library..." @cd $(CARTOGRAPHER_DIR) && cargo build --release + @echo "Localizing tree-sitter symbols (prevents link-time collisions with go-tree-sitter)..." + @cd $(CARTOGRAPHER_DIR) && scripts/localize-tree-sitter-symbols.sh target/release/libcartographer.a @echo "Library: $(CARTOGRAPHER_LIB)" ## Build without Cartographer (no Rust toolchain required — for CI and contributors) diff --git a/cmd/ckb/impact.go b/cmd/ckb/impact.go index fd4fb2f5..dcfb8425 100644 --- a/cmd/ckb/impact.go +++ b/cmd/ckb/impact.go @@ -39,6 +39,9 @@ var ( // prepareChange subcommand flags prepareChangeFormat string prepareChangeChangeType string + // outgoing subcommand flags + impactOutgoingMinScore float32 + impactOutgoingFormat string ) var impactCmd = &cobra.Command{ @@ -75,6 +78,25 @@ Examples: Run: runPrepareChange, } +var impactOutgoingCmd = &cobra.Command{ + Use: "outgoing ", + Short: "Analyze what a symbol calls (forward call graph)", + Long: `Analyze the forward call graph of a symbol — what it calls directly +and transitively. Mirrors 'ckb impact ' but in the opposite +direction. + +Requires a LIP daemon advertising query_outgoing_impact (LIP v2.3.5+). +When LIP is unavailable the response carries the symbol metadata with +empty callee lists and a provenance warning. + +Examples: + ckb impact outgoing DoWork + ckb impact outgoing DoWork --min-score=0.6 + ckb impact outgoing DoWork --format=json`, + Args: cobra.ExactArgs(1), + Run: runImpactOutgoing, +} + var impactDiffCmd = &cobra.Command{ Use: "diff", Short: "Analyze impact of code changes", @@ -112,8 +134,13 @@ func init() { prepareChangeCmd.Flags().StringVar(&prepareChangeFormat, "format", "full", "Output format (full, compact)") prepareChangeCmd.Flags().StringVar(&prepareChangeChangeType, "change-type", "modify", "Change type (modify, rename, delete, extract, move)") + // outgoing subcommand flags + impactOutgoingCmd.Flags().Float32Var(&impactOutgoingMinScore, "min-score", 0.6, "Minimum cosine similarity for semantic callees (0 disables semantic enrichment)") + impactOutgoingCmd.Flags().StringVar(&impactOutgoingFormat, "format", "human", "Output format (human, json)") + impactCmd.AddCommand(impactDiffCmd) impactCmd.AddCommand(prepareChangeCmd) + impactCmd.AddCommand(impactOutgoingCmd) rootCmd.AddCommand(impactCmd) } @@ -301,6 +328,7 @@ type ImpactResponseCLI struct { SymbolID string `json:"symbolId"` Symbol *SymbolInfoCLI `json:"symbol,omitempty"` RiskScore *RiskScoreCLI `json:"riskScore,omitempty"` + BlastRadius *BlastRadiusCLI `json:"blastRadius,omitempty"` DirectImpact []ImpactItemCLI `json:"directImpact"` TransitiveImpact []ImpactItemCLI `json:"transitiveImpact,omitempty"` ModulesAffected []ModuleImpactCLI `json:"modulesAffected"` @@ -434,6 +462,15 @@ func convertImpactResponse(symbolID string, resp *query.AnalyzeImpactResponse) * } } + if resp.BlastRadius != nil { + result.BlastRadius = &BlastRadiusCLI{ + ModuleCount: resp.BlastRadius.ModuleCount, + FileCount: resp.BlastRadius.FileCount, + UniqueCallerCount: resp.BlastRadius.UniqueCallerCount, + RiskLevel: resp.BlastRadius.RiskLevel, + } + } + if resp.Provenance != nil { result.Provenance = &ProvenanceCLI{ RepoStateId: resp.Provenance.RepoStateId, @@ -795,3 +832,135 @@ func formatImpactMarkdown(resp *ChangeSetResponseCLI) string { return b.String() } + +// OutgoingImpactResponseCLI is the CLI-facing view of +// query.AnalyzeOutgoingImpactResponse. +type OutgoingImpactResponseCLI struct { + SymbolID string `json:"symbolId"` + Symbol *SymbolInfoCLI `json:"symbol,omitempty"` + DirectCallees []ImpactItemCLI `json:"directCallees"` + TransitiveCallees []ImpactItemCLI `json:"transitiveCallees,omitempty"` + SemanticCallees []SemanticCalleeInfoCLI `json:"semanticCallees,omitempty"` + EdgesSource string `json:"edgesSource,omitempty"` + Truncated bool `json:"truncated,omitempty"` + Provenance *ProvenanceCLI `json:"provenance,omitempty"` +} + +// SemanticCalleeInfoCLI represents an embedding-similar coupled callee. +type SemanticCalleeInfoCLI struct { + SymbolURI string `json:"symbolUri,omitempty"` + FileURI string `json:"fileUri"` + Similarity float32 `json:"similarity"` + Source string `json:"source"` +} + +func runImpactOutgoing(cmd *cobra.Command, args []string) { + start := time.Now() + logger := newLogger(impactOutgoingFormat) + symbolID := args[0] + + repoRoot := mustGetRepoRoot() + engine := mustGetEngine(repoRoot, logger) + ctx := newContext() + + resp, err := engine.AnalyzeOutgoingImpact(ctx, query.AnalyzeOutgoingImpactOptions{ + SymbolId: symbolID, + MinScore: impactOutgoingMinScore, + }) + if err != nil { + if strings.Contains(err.Error(), "not found") { + fmt.Fprint(os.Stderr, formatSymbolNotFoundError(symbolID)) + os.Exit(1) + } + fmt.Fprintf(os.Stderr, "Error analyzing outgoing impact: %v\n", err) + os.Exit(1) + } + + cliResp := convertOutgoingImpactResponse(symbolID, resp) + output, err := FormatResponse(cliResp, OutputFormat(impactOutgoingFormat)) + if err != nil { + fmt.Fprintf(os.Stderr, "Error formatting output: %v\n", err) + os.Exit(1) + } + fmt.Println(output) + + logger.Debug("Outgoing impact analysis completed", + "symbolId", symbolID, + "direct", len(resp.DirectCallees), + "transitive", len(resp.TransitiveCallees), + "duration", time.Since(start).Milliseconds(), + ) +} + +func convertOutgoingImpactResponse(symbolID string, resp *query.AnalyzeOutgoingImpactResponse) *OutgoingImpactResponseCLI { + direct := make([]ImpactItemCLI, 0, len(resp.DirectCallees)) + for _, item := range resp.DirectCallees { + direct = append(direct, impactItemToCLI(item)) + } + transitive := make([]ImpactItemCLI, 0, len(resp.TransitiveCallees)) + for _, item := range resp.TransitiveCallees { + transitive = append(transitive, impactItemToCLI(item)) + } + semantic := make([]SemanticCalleeInfoCLI, 0, len(resp.SemanticCallees)) + for _, s := range resp.SemanticCallees { + semantic = append(semantic, SemanticCalleeInfoCLI{ + SymbolURI: s.SymbolURI, + FileURI: s.FileURI, + Similarity: s.Similarity, + Source: s.Source, + }) + } + + out := &OutgoingImpactResponseCLI{ + SymbolID: symbolID, + DirectCallees: direct, + TransitiveCallees: transitive, + SemanticCallees: semantic, + EdgesSource: resp.EdgesSource, + Truncated: resp.Truncated, + } + if resp.Symbol != nil { + visibility := "unknown" + confidence := 0.0 + if resp.Symbol.Visibility != nil { + visibility = resp.Symbol.Visibility.Visibility + confidence = resp.Symbol.Visibility.Confidence + } + out.Symbol = &SymbolInfoCLI{ + StableID: resp.Symbol.StableId, + Name: resp.Symbol.Name, + Kind: resp.Symbol.Kind, + Visibility: visibility, + VisibilityConfidence: confidence, + } + } + if resp.Provenance != nil { + out.Provenance = &ProvenanceCLI{ + RepoStateId: resp.Provenance.RepoStateId, + RepoStateDirty: resp.Provenance.RepoStateDirty, + QueryDurationMs: resp.Provenance.QueryDurationMs, + Warnings: resp.Provenance.Warnings, + } + } + return out +} + +func impactItemToCLI(item query.ImpactItem) ImpactItemCLI { + cli := ImpactItemCLI{ + StableID: item.StableId, + Name: item.Name, + Kind: item.Kind, + Distance: item.Distance, + ModuleID: item.ModuleId, + Confidence: item.Confidence, + } + if item.Location != nil { + cli.Location = &LocationCLI{ + FileID: item.Location.FileId, + Path: item.Location.FileId, + StartLine: item.Location.StartLine, + StartColumn: item.Location.StartColumn, + } + } + return cli +} diff --git a/cmd/ckb/review.go b/cmd/ckb/review.go index 20318e51..395cdac8 100644 --- a/cmd/ckb/review.go +++ b/cmd/ckb/review.go @@ -222,11 +222,11 @@ func runReview(cmd *cobra.Command, args []string) { } opts := query.ReviewPROptions{ - BaseBranch: reviewBaseBranch, - HeadBranch: reviewHeadBranch, - Policy: policy, - Checks: reviewChecks, - SkipChecks: reviewSkipChecks, + BaseBranch: reviewBaseBranch, + HeadBranch: reviewHeadBranch, + Policy: policy, + Checks: reviewChecks, + SkipChecks: reviewSkipChecks, Staged: reviewStaged, Scope: scope, LLM: reviewLLM, diff --git a/cmd/ckb/symbol.go b/cmd/ckb/symbol.go index 9225ddcf..232a3c62 100644 --- a/cmd/ckb/symbol.go +++ b/cmd/ckb/symbol.go @@ -114,9 +114,10 @@ type ModuleInfoCLI struct { // ProvenanceCLI contains response metadata type ProvenanceCLI struct { - RepoStateId string `json:"repoStateId"` - RepoStateDirty bool `json:"repoStateDirty"` - QueryDurationMs int64 `json:"queryDurationMs"` + RepoStateId string `json:"repoStateId"` + RepoStateDirty bool `json:"repoStateDirty"` + QueryDurationMs int64 `json:"queryDurationMs"` + Warnings []string `json:"warnings,omitempty"` } func convertSymbolResponse(resp *query.GetSymbolResponse) *SymbolResponseCLI { diff --git a/internal/cartographer/bridge.go b/internal/cartographer/bridge.go index 3ae6964b..df99288b 100644 --- a/internal/cartographer/bridge.go +++ b/internal/cartographer/bridge.go @@ -647,3 +647,37 @@ func ContextHealth(content string, opts *ContextHealthOpts) (*ContextHealthRepor } return &result, nil } + +// RenderArchitecture renders the project's import graph as a Mermaid or +// Graphviz (DOT) diagram. focus is an optional module_id or path suffix — +// when set, the diagram is a BFS neighborhood of that module up to `depth`; +// when empty, it's the top-N nodes by degree. depth=0 → 2, maxNodes=0 → 40. +func RenderArchitecture(path, format, focus string, depth, maxNodes uint32) (*RenderArchitectureResult, error) { + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + var cFormat *C.char + if format != "" { + cFormat = C.CString(format) + defer C.free(unsafe.Pointer(cFormat)) + } + + var cFocus *C.char + if focus != "" { + cFocus = C.CString(focus) + defer C.free(unsafe.Pointer(cFocus)) + } + + resp, err := callFFI(func() *C.char { + return C.cartographer_render_architecture(cPath, cFormat, cFocus, C.uint(depth), C.uint(maxNodes)) + }) + if err != nil { + return nil, err + } + + var result RenderArchitectureResult + if err := json.Unmarshal(resp.Data, &result); err != nil { + return nil, &CartographerError{err.Error()} + } + return &result, nil +} diff --git a/internal/cartographer/bridge_stub.go b/internal/cartographer/bridge_stub.go index d77617d3..c44c89b7 100644 --- a/internal/cartographer/bridge_stub.go +++ b/internal/cartographer/bridge_stub.go @@ -5,11 +5,6 @@ // All functions return ErrUnavailable; callers should check Available() first. package cartographer -import "errors" - -// ErrUnavailable is returned by all functions when Cartographer is not compiled in. -var ErrUnavailable = errors.New("cartographer: not compiled in this build (use -tags cartographer)") - // Available reports whether the Cartographer library is linked into this binary. func Available() bool { return false } @@ -52,3 +47,6 @@ func QueryContext(_, _ string, _ *QueryContextOpts) (*QueryContextResult, error) func ShotgunSurgery(_ string, _, _ uint32) ([]ShotgunSurgeryEntry, error) { return nil, ErrUnavailable } func Evolution(_ string, _ uint32) (*EvolutionResult, error) { return nil, ErrUnavailable } func BlastRadius(_, _ string, _ uint32) (*BlastRadiusResult, error) { return nil, ErrUnavailable } +func RenderArchitecture(_, _, _ string, _, _ uint32) (*RenderArchitectureResult, error) { + return nil, ErrUnavailable +} diff --git a/internal/cartographer/types.go b/internal/cartographer/types.go index 6b67dd03..9dd56ffa 100644 --- a/internal/cartographer/types.go +++ b/internal/cartographer/types.go @@ -1,6 +1,13 @@ // Package cartographer provides CGo bindings to the Rust Cartographer library. package cartographer +import "errors" + +// ErrUnavailable is returned by stub builds when Cartographer is not compiled +// in. Callers check Available() first; under `-tags cartographer` this value +// is never returned but still needs to be referenceable by tool impls. +var ErrUnavailable = errors.New("cartographer: not compiled in this build (use -tags cartographer)") + // --------------------------------------------------------------------------- // Public types (shared between real bridge and stub builds) // --------------------------------------------------------------------------- @@ -503,3 +510,17 @@ type BlastRadiusRelated struct { } // --------------------------------------------------------------------------- +// Render architecture types +// --------------------------------------------------------------------------- + +// RenderArchitectureResult is returned by RenderArchitecture. +// Truncated is true when the node cap kicked in — callers should tighten +// focus/depth or raise maxNodes to see more. +type RenderArchitectureResult struct { + Diagram string `json:"diagram"` + Truncated bool `json:"truncated"` + Format string `json:"format"` // "mermaid" | "dot" + NodeCount int `json:"nodeCount"` +} + +// --------------------------------------------------------------------------- diff --git a/internal/impact/analyzer.go b/internal/impact/analyzer.go index c20ca257..40406e9f 100644 --- a/internal/impact/analyzer.go +++ b/internal/impact/analyzer.go @@ -321,6 +321,21 @@ func (a *ImpactAnalyzer) processTransitiveCallers(symbol *Symbol, vis *Visibilit // computeBlastRadius calculates blast radius metrics from all impact items func (a *ImpactAnalyzer) computeBlastRadius(allImpact []ImpactItem) *BlastRadius { + return RecomputeBlastRadius(allImpact) +} + +// RecomputeBlastRadius derives a BlastRadius summary from a flat list of +// ImpactItems. Exposed so that callers who mutate the item list after +// analysis (e.g. folding in external enricher data via +// FoldExternalStaticItems) can refresh the summary counts and risk level +// without re-running the full analyzer. +// +// ModuleId-less items contribute to FileCount and the caller count but +// not to ModuleCount — LIP tier-1 items arrive without module resolution, +// so ModuleCount remains a conservative lower bound. RiskLevel is +// re-derived via ClassifyBlastRadius(moduleCount, callerCount), matching +// the analyzer's original classification. +func RecomputeBlastRadius(allImpact []ImpactItem) *BlastRadius { moduleSet := make(map[string]bool) fileSet := make(map[string]bool) callerCount := 0 diff --git a/internal/impact/classification.go b/internal/impact/classification.go index 5310fe1d..e57f4515 100644 --- a/internal/impact/classification.go +++ b/internal/impact/classification.go @@ -6,6 +6,8 @@ type ImpactKind string const ( DirectCaller ImpactKind = "direct-caller" TransitiveCaller ImpactKind = "transitive-caller" + DirectCallee ImpactKind = "direct-callee" // forward direction: what this symbol calls + TransitiveCallee ImpactKind = "transitive-callee" // forward direction, distance >= 2 TypeDependency ImpactKind = "type-dependency" TestDependency ImpactKind = "test-dependency" ImplementsInterface ImpactKind = "implements-interface" diff --git a/internal/impact/enricher.go b/internal/impact/enricher.go new file mode 100644 index 00000000..b49961ab --- /dev/null +++ b/internal/impact/enricher.go @@ -0,0 +1,324 @@ +package impact + +import ( + "context" + "path/filepath" + "strings" +) + +// BlastRadiusEnricher supplements SCIP-derived blast radius with external data +// (e.g., LIP embedding-based semantic coupling). Implementations must be safe +// for concurrent use and degrade gracefully — returning nil signals "unavailable". +type BlastRadiusEnricher interface { + // EnrichBatch takes changed file URIs and returns per-symbol blast radius + // from the external source. The map key is the symbol URI (e.g., + // "lip://local/src/auth.rs#validate_token"). Returns nil when the source + // is unavailable. + EnrichBatch(ctx context.Context, changedFileURIs []string) (map[string]*ExternalBlastRadius, error) +} + +// ExternalBlastRadius is what an enricher returns per symbol. +type ExternalBlastRadius struct { + // DirectItems are callers the external source found via static analysis. + // These overlap with SCIP's results and are used to confirm edges. + DirectItems []ExternalItem + // TransitiveItems are transitive callers from the external source. + TransitiveItems []ExternalItem + // SemanticItems are callers found via embedding similarity that may not + // appear in any static call graph (dynamic dispatch, macros, etc.). + SemanticItems []ExternalSemanticItem + // RiskLevel is the external source's own risk assessment. + RiskLevel string + // EdgesSource is the provenance for DirectItems/TransitiveItems. Values + // mirror LIP v2.3.1: "tier1" (tree-sitter AST), "scip_with_tier1_edges" + // (SCIP symbols, Tier-1 edges back-filled), "scip_only" (SCIP call edges + // as-is), "empty" (no static edges available). An unset value means the + // source didn't report provenance — treat as fold-eligible. + EdgesSource string +} + +// Edge-source values for ExternalBlastRadius.EdgesSource. +const ( + EdgesSourceTier1 = "tier1" + EdgesSourceScipWithTier1Edges = "scip_with_tier1_edges" + EdgesSourceScipOnly = "scip_only" + EdgesSourceEmpty = "empty" +) + +// ExternalItem is a static caller from an external blast radius source. +type ExternalItem struct { + FileURI string + SymbolURI string + Distance int + Confidence float64 +} + +// ExternalSemanticItem is a semantically coupled symbol from an enricher. +type ExternalSemanticItem struct { + FileURI string + SymbolURI string + Similarity float32 // cosine similarity + Source string // "semantic" or "both" +} + +// MergeBlastRadius blends SCIP-derived blast radius with enricher data. +// +// Design invariant: UniqueCallerCount stays SCIP-only so that reviewPR +// thresholds (callerCount >= 3, callerCount > maxFanOut) are never inflated +// by embedding noise. Semantic callers are additive in SemanticCallerCount +// and SemanticCallers — they inform humans, not thresholds. +// +// Items with source=="both" confirm that a SCIP static edge also has embedding +// evidence. These bump ConfirmedCount but don't change UniqueCallerCount. +func MergeBlastRadius(static *BlastRadius, external *ExternalBlastRadius) *BlastRadius { + if static == nil { + return nil + } + if external == nil { + return static + } + + merged := *static // shallow copy + merged.StaticCallerCount = static.UniqueCallerCount + + // Build a set of files SCIP already knows about (from static callers). + // We use file URIs because SCIP symbol IDs and LIP symbol URIs use different + // schemes — file URI is the stable join key. + staticFiles := make(map[string]struct{}) + for _, item := range external.DirectItems { + staticFiles[item.FileURI] = struct{}{} + } + for _, item := range external.TransitiveItems { + staticFiles[item.FileURI] = struct{}{} + } + + var semanticCallers []EnrichedCaller + confirmed := 0 + seen := make(map[string]struct{}) // dedup by file URI + + for _, si := range external.SemanticItems { + if _, dup := seen[si.FileURI]; dup { + continue + } + seen[si.FileURI] = struct{}{} + + switch si.Source { + case "both": + // Confirms a SCIP edge — record but don't inflate counts + confirmed++ + semanticCallers = append(semanticCallers, EnrichedCaller{ + SymbolURI: si.SymbolURI, + FileURI: si.FileURI, + Tier: CouplingBoth, + Confidence: 0.95, + Similarity: si.Similarity, + }) + case "semantic": + // New coupling not in SCIP — advisory + semanticCallers = append(semanticCallers, EnrichedCaller{ + SymbolURI: si.SymbolURI, + FileURI: si.FileURI, + Tier: CouplingSemantic, + Confidence: float64(si.Similarity), // cosine similarity as confidence proxy + Similarity: si.Similarity, + }) + } + } + + // Count only pure semantic (not "both") as additional callers + pureSemanticCount := 0 + for _, c := range semanticCallers { + if c.Tier == CouplingSemantic { + pureSemanticCount++ + } + } + + merged.SemanticCallerCount = pureSemanticCount + merged.ConfirmedCount = confirmed + merged.SemanticCallers = semanticCallers + // RiskLevel stays SCIP-derived. Semantic coupling informs the human, not the threshold. + return &merged +} + +// FoldExternalStaticItems folds an enricher's DirectItems / TransitiveItems +// into SCIP-derived ImpactItem lists so LIP's tier-1 tree-sitter callers +// (which SCIP misses when scip-go emits no Call roles) become first-class +// impact items rather than sitting in a parallel summary field. +// +// Behaviour: +// - external == nil OR EdgesSource == "empty" → returns (direct, transitive) +// unchanged. "empty" means LIP had no static call-edge evidence to +// contribute; folding nothing is correct. +// - Items with SymbolURI == "" are skipped (Phase-4 file-only fallback +// from LIP — legitimate file-level evidence but no symbol identity to +// dedup against). +// - Remaining items are deduped against the existing SCIP items by +// (absolute file path, symbol name). LIP's tier-1 URIs carry absolute +// paths (lip://local//#); SCIP ImpactItem.Location.FileId +// is joined onto repoRoot when relative. Items already present on the +// SCIP side are dropped — we never inflate caller counts with evidence +// SCIP already recorded. +// +// The function does not reclassify EdgesSource values — callers decide +// whether the provenance is trustworthy before calling. All non-Empty +// sources (tier1, scip_with_tier1_edges, scip_only) fold the same way. +func FoldExternalStaticItems( + direct, transitive []ImpactItem, + external *ExternalBlastRadius, + repoRoot string, +) (foldedDirect, foldedTransitive []ImpactItem) { + return foldExternalItemsWithKinds(direct, transitive, external, repoRoot, DirectCaller, TransitiveCaller) +} + +// FoldExternalCalleeItems is the forward-direction twin of +// FoldExternalStaticItems, tagging folded items with DirectCallee / +// TransitiveCallee. Used by Engine.AnalyzeOutgoingImpact to fold LIP's +// query_outgoing_impact result into the shared ImpactItem pipeline. +// +// Unlike the incoming path there is typically no SCIP-derived list to merge +// against — callers usually pass nil for direct/transitive and get back a +// pure LIP-derived set. Dedup semantics are identical to the incoming fold, +// so passing non-nil inputs is also supported for future SCIP forward BFS. +func FoldExternalCalleeItems( + direct, transitive []ImpactItem, + external *ExternalBlastRadius, + repoRoot string, +) (foldedDirect, foldedTransitive []ImpactItem) { + return foldExternalItemsWithKinds(direct, transitive, external, repoRoot, DirectCallee, TransitiveCallee) +} + +func foldExternalItemsWithKinds( + direct, transitive []ImpactItem, + external *ExternalBlastRadius, + repoRoot string, + directKind, transitiveKind ImpactKind, +) (foldedDirect, foldedTransitive []ImpactItem) { + if external == nil || external.EdgesSource == EdgesSourceEmpty { + return direct, transitive + } + + seen := make(map[string]struct{}, len(direct)+len(transitive)) + for _, it := range direct { + seen[impactItemDedupKey(it, repoRoot)] = struct{}{} + } + for _, it := range transitive { + seen[impactItemDedupKey(it, repoRoot)] = struct{}{} + } + + foldedDirect = direct + foldedTransitive = transitive + + for _, ei := range external.DirectItems { + item, key, ok := externalItemToImpactItem(ei, directKind) + if !ok { + continue + } + if _, dup := seen[key]; dup { + continue + } + seen[key] = struct{}{} + foldedDirect = append(foldedDirect, item) + } + for _, ei := range external.TransitiveItems { + item, key, ok := externalItemToImpactItem(ei, transitiveKind) + if !ok { + continue + } + if _, dup := seen[key]; dup { + continue + } + seen[key] = struct{}{} + foldedTransitive = append(foldedTransitive, item) + } + return foldedDirect, foldedTransitive +} + +// externalItemToImpactItem parses a LIP ExternalItem into an ImpactItem +// and its dedup key. Returns ok=false for items with empty SymbolURI or a +// URI that doesn't carry a `#` fragment. +func externalItemToImpactItem(ei ExternalItem, kind ImpactKind) (ImpactItem, string, bool) { + if ei.SymbolURI == "" { + return ImpactItem{}, "", false + } + absPath, name, ok := splitLIPSymbolURI(ei.SymbolURI, ei.FileURI) + if !ok { + return ImpactItem{}, "", false + } + distance := ei.Distance + if distance == 0 { + if kind == DirectCaller || kind == DirectCallee { + distance = 1 + } else { + distance = 2 + } + } + item := ImpactItem{ + StableId: ei.SymbolURI, + Name: name, + Kind: kind, + Distance: distance, + Confidence: ei.Confidence, + Location: &Location{FileId: absPath}, + } + return item, dedupKey(absPath, name), true +} + +// splitLIPSymbolURI parses a lip://local//# URI into (abs, name). +// Falls back to the companion file_uri when the symbol URI has no fragment, +// which happens for Phase-4 file-only items LIP emits — but those should +// already be filtered by the caller via the empty-SymbolURI check, so a +// fragment-less URI here is treated as unparseable. +func splitLIPSymbolURI(symURI, fileURI string) (absPath, name string, ok bool) { + hash := strings.LastIndex(symURI, "#") + if hash < 0 { + return "", "", false + } + filePart := symURI[:hash] + name = symURI[hash+1:] + if name == "" { + return "", "", false + } + absPath = stripLIPLocalPrefix(filePart) + if absPath == "" && fileURI != "" { + absPath = stripLIPLocalPrefix(fileURI) + } + if absPath == "" { + return "", "", false + } + return absPath, name, true +} + +// stripLIPLocalPrefix converts lip://local// or lip://local/ +// back to a filesystem path. Non-lip://local URIs (e.g. scip-go) are +// returned unchanged — they won't match any SCIP FileId but the dedup +// key will still be unique, so LIP items are safely additive. +func stripLIPLocalPrefix(uri string) string { + const p = "lip://local/" + if !strings.HasPrefix(uri, p) { + return uri + } + rest := uri[len(p):] + // LIP writes lip://local// (double slash) when the path is + // absolute. After stripping the single-slash prefix, a leading + // slash survives and marks an absolute path. Relative paths come + // through as plain "foo/bar.go". + return rest +} + +// impactItemDedupKey produces the (absolute path, name) key used for +// cross-source dedup. Location.FileId may be repo-relative (the common +// SCIP case) or absolute — filepath.IsAbs + filepath.Join handles both. +func impactItemDedupKey(it ImpactItem, repoRoot string) string { + path := "" + if it.Location != nil { + path = it.Location.FileId + } + if path != "" && !filepath.IsAbs(path) && repoRoot != "" { + path = filepath.Join(repoRoot, path) + } + return dedupKey(path, it.Name) +} + +func dedupKey(absPath, name string) string { + return absPath + "#" + name +} diff --git a/internal/impact/enricher_test.go b/internal/impact/enricher_test.go new file mode 100644 index 00000000..93df063c --- /dev/null +++ b/internal/impact/enricher_test.go @@ -0,0 +1,318 @@ +package impact + +import "testing" + +func TestMergeBlastRadius_NilExternal(t *testing.T) { + static := &BlastRadius{ + ModuleCount: 3, + FileCount: 5, + UniqueCallerCount: 8, + RiskLevel: "medium", + } + got := MergeBlastRadius(static, nil) + if got != static { + t.Fatal("nil external should return static unchanged") + } +} + +func TestMergeBlastRadius_NilStatic(t *testing.T) { + got := MergeBlastRadius(nil, &ExternalBlastRadius{}) + if got != nil { + t.Fatal("nil static should return nil") + } +} + +func TestMergeBlastRadius_SemanticOnly(t *testing.T) { + static := &BlastRadius{ + ModuleCount: 2, + FileCount: 3, + UniqueCallerCount: 4, + RiskLevel: "low", + } + external := &ExternalBlastRadius{ + SemanticItems: []ExternalSemanticItem{ + {FileURI: "file:///src/a.rs", SymbolURI: "sym:a", Similarity: 0.85, Source: "semantic"}, + {FileURI: "file:///src/b.rs", SymbolURI: "sym:b", Similarity: 0.72, Source: "semantic"}, + }, + } + + got := MergeBlastRadius(static, external) + + // UniqueCallerCount must stay SCIP-only + if got.UniqueCallerCount != 4 { + t.Errorf("UniqueCallerCount = %d, want 4 (SCIP-only)", got.UniqueCallerCount) + } + if got.StaticCallerCount != 4 { + t.Errorf("StaticCallerCount = %d, want 4", got.StaticCallerCount) + } + if got.SemanticCallerCount != 2 { + t.Errorf("SemanticCallerCount = %d, want 2", got.SemanticCallerCount) + } + if got.ConfirmedCount != 0 { + t.Errorf("ConfirmedCount = %d, want 0", got.ConfirmedCount) + } + if len(got.SemanticCallers) != 2 { + t.Fatalf("SemanticCallers len = %d, want 2", len(got.SemanticCallers)) + } + for _, sc := range got.SemanticCallers { + if sc.Tier != CouplingSemantic { + t.Errorf("caller %s tier = %s, want semantic", sc.FileURI, sc.Tier) + } + } + // RiskLevel stays SCIP-derived + if got.RiskLevel != "low" { + t.Errorf("RiskLevel = %s, want low", got.RiskLevel) + } +} + +func TestMergeBlastRadius_BothSource(t *testing.T) { + static := &BlastRadius{ + ModuleCount: 1, + FileCount: 2, + UniqueCallerCount: 3, + RiskLevel: "low", + } + external := &ExternalBlastRadius{ + SemanticItems: []ExternalSemanticItem{ + {FileURI: "file:///src/confirmed.rs", Similarity: 0.91, Source: "both"}, + {FileURI: "file:///src/new.rs", Similarity: 0.78, Source: "semantic"}, + }, + } + + got := MergeBlastRadius(static, external) + + // "both" confirms a SCIP edge — doesn't inflate semantic count + if got.SemanticCallerCount != 1 { + t.Errorf("SemanticCallerCount = %d, want 1 (only pure semantic)", got.SemanticCallerCount) + } + if got.ConfirmedCount != 1 { + t.Errorf("ConfirmedCount = %d, want 1", got.ConfirmedCount) + } + if len(got.SemanticCallers) != 2 { + t.Fatalf("SemanticCallers len = %d, want 2 (both + semantic)", len(got.SemanticCallers)) + } + + // Check tiers + tiers := map[CouplingTier]int{} + for _, sc := range got.SemanticCallers { + tiers[sc.Tier]++ + } + if tiers[CouplingBoth] != 1 || tiers[CouplingSemantic] != 1 { + t.Errorf("tier counts = %v, want both:1, semantic:1", tiers) + } +} + +func TestFoldExternalStaticItems_NilExternal(t *testing.T) { + direct := []ImpactItem{{StableId: "a", Name: "A", Kind: DirectCaller}} + gotD, gotT := FoldExternalStaticItems(direct, nil, nil, "/repo") + if len(gotD) != 1 || gotD[0].StableId != "a" { + t.Errorf("nil external should pass direct through unchanged, got %+v", gotD) + } + if gotT != nil { + t.Errorf("nil external should pass transitive through unchanged, got %+v", gotT) + } +} + +func TestFoldExternalStaticItems_EmptyEdgesSource(t *testing.T) { + direct := []ImpactItem{{StableId: "a", Name: "A", Kind: DirectCaller}} + external := &ExternalBlastRadius{ + EdgesSource: EdgesSourceEmpty, + DirectItems: []ExternalItem{ + {FileURI: "lip://local//repo/b.go", SymbolURI: "lip://local//repo/b.go#B", Distance: 1, Confidence: 0.9}, + }, + } + gotD, _ := FoldExternalStaticItems(direct, nil, external, "/repo") + if len(gotD) != 1 { + t.Errorf("EdgesSource=empty should skip fold; got %d direct items, want 1", len(gotD)) + } +} + +func TestFoldExternalStaticItems_SkipsEmptySymbolURI(t *testing.T) { + external := &ExternalBlastRadius{ + EdgesSource: EdgesSourceScipOnly, + DirectItems: []ExternalItem{ + {FileURI: "lip://local//repo/phase4.go", SymbolURI: "", Distance: 1, Confidence: 0.95}, + {FileURI: "lip://local//repo/ok.go", SymbolURI: "lip://local//repo/ok.go#Ok", Distance: 1, Confidence: 0.95}, + }, + } + gotD, _ := FoldExternalStaticItems(nil, nil, external, "/repo") + if len(gotD) != 1 { + t.Fatalf("expected 1 direct item after skipping empty-SymbolURI, got %d", len(gotD)) + } + if gotD[0].Name != "Ok" { + t.Errorf("kept item name = %q, want Ok", gotD[0].Name) + } + if gotD[0].Location == nil || gotD[0].Location.FileId != "/repo/ok.go" { + t.Errorf("kept item FileId = %v, want /repo/ok.go", gotD[0].Location) + } +} + +func TestFoldExternalStaticItems_DedupAgainstSCIP(t *testing.T) { + // SCIP already knows about callgraph.go:RenderTree. LIP rediscovers it. + // After fold, we should NOT get a duplicate. + direct := []ImpactItem{ + {StableId: "scip-sym", Name: "RenderTree", Kind: DirectCaller, Distance: 1, + Location: &Location{FileId: "cmd/ckb/callgraph.go"}}, + } + external := &ExternalBlastRadius{ + EdgesSource: EdgesSourceScipOnly, + DirectItems: []ExternalItem{ + // Same file + name as SCIP → dedup + {FileURI: "lip://local//repo/cmd/ckb/callgraph.go", + SymbolURI: "lip://local//repo/cmd/ckb/callgraph.go#RenderTree", + Distance: 1, Confidence: 0.95}, + // Novel caller — keep + {FileURI: "lip://local//repo/cmd/ckb/impact.go", + SymbolURI: "lip://local//repo/cmd/ckb/impact.go#doImpact", + Distance: 1, Confidence: 0.95}, + }, + } + gotD, _ := FoldExternalStaticItems(direct, nil, external, "/repo") + if len(gotD) != 2 { + t.Fatalf("want 2 items (1 SCIP + 1 novel LIP), got %d: %+v", len(gotD), gotD) + } + if gotD[0].Name != "RenderTree" || gotD[1].Name != "doImpact" { + t.Errorf("wrong items: %q, %q", gotD[0].Name, gotD[1].Name) + } +} + +func TestFoldExternalStaticItems_DedupBetweenLIPDirectAndTransitive(t *testing.T) { + // LIP emits the same caller in both lists (shouldn't happen, but guard). + external := &ExternalBlastRadius{ + EdgesSource: EdgesSourceScipWithTier1Edges, + DirectItems: []ExternalItem{ + {SymbolURI: "lip://local//repo/a.go#A", Distance: 1, Confidence: 0.95}, + }, + TransitiveItems: []ExternalItem{ + {SymbolURI: "lip://local//repo/a.go#A", Distance: 2, Confidence: 0.85}, + }, + } + gotD, gotT := FoldExternalStaticItems(nil, nil, external, "/repo") + if len(gotD) != 1 || len(gotT) != 0 { + t.Errorf("want direct=1 trans=0 after cross-list dedup, got direct=%d trans=%d", len(gotD), len(gotT)) + } +} + +func TestFoldExternalStaticItems_AbsoluteFileIdPassthrough(t *testing.T) { + // When SCIP already stores an absolute FileId, dedup should still match + // LIP's absolute URI — filepath.Join shouldn't double-prefix. + direct := []ImpactItem{ + {Name: "X", Kind: DirectCaller, Distance: 1, + Location: &Location{FileId: "/repo/x.go"}}, + } + external := &ExternalBlastRadius{ + EdgesSource: EdgesSourceTier1, + DirectItems: []ExternalItem{ + {SymbolURI: "lip://local//repo/x.go#X", Distance: 1, Confidence: 0.95}, + }, + } + gotD, _ := FoldExternalStaticItems(direct, nil, external, "/repo") + if len(gotD) != 1 { + t.Errorf("abs FileId dedup failed: got %d items, want 1", len(gotD)) + } +} + +func TestFoldExternalStaticItems_DistanceDefault(t *testing.T) { + // LIP sometimes omits Distance=0. Direct items should default to 1, + // transitive to 2. + external := &ExternalBlastRadius{ + EdgesSource: EdgesSourceScipOnly, + DirectItems: []ExternalItem{ + {SymbolURI: "lip://local//repo/a.go#A", Confidence: 0.95}, + }, + TransitiveItems: []ExternalItem{ + {SymbolURI: "lip://local//repo/b.go#B", Confidence: 0.85}, + }, + } + gotD, gotT := FoldExternalStaticItems(nil, nil, external, "/repo") + if len(gotD) != 1 || gotD[0].Distance != 1 { + t.Errorf("direct distance default = %d, want 1", gotD[0].Distance) + } + if len(gotT) != 1 || gotT[0].Distance != 2 { + t.Errorf("transitive distance default = %d, want 2", gotT[0].Distance) + } +} + +func TestFoldExternalCalleeItems_TagsCalleeKinds(t *testing.T) { + // Mirrors the Caller test pattern, but asserts items are tagged with + // DirectCallee / TransitiveCallee rather than the caller kinds. Guards + // against accidental cross-wiring of foldExternalItemsWithKinds. + external := &ExternalBlastRadius{ + EdgesSource: EdgesSourceScipWithTier1Edges, + DirectItems: []ExternalItem{ + {SymbolURI: "lip://local//repo/a.go#A", Distance: 1, Confidence: 0.95}, + }, + TransitiveItems: []ExternalItem{ + {SymbolURI: "lip://local//repo/b.go#B", Distance: 2, Confidence: 0.85}, + }, + } + gotD, gotT := FoldExternalCalleeItems(nil, nil, external, "/repo") + if len(gotD) != 1 || gotD[0].Kind != DirectCallee { + t.Errorf("direct kind = %q, want %q", gotD[0].Kind, DirectCallee) + } + if len(gotT) != 1 || gotT[0].Kind != TransitiveCallee { + t.Errorf("transitive kind = %q, want %q", gotT[0].Kind, TransitiveCallee) + } +} + +func TestFoldExternalCalleeItems_NilAndEmpty(t *testing.T) { + // Short-circuits mirror the caller twin — nil external and "empty" + // EdgesSource both must leave the input lists unchanged. + seed := []ImpactItem{{Name: "seed", Kind: DirectCallee, Distance: 1}} + + gotD, gotT := FoldExternalCalleeItems(seed, nil, nil, "/repo") + if len(gotD) != 1 || len(gotT) != 0 { + t.Errorf("nil external: direct=%d trans=%d, want 1/0", len(gotD), len(gotT)) + } + + gotD, _ = FoldExternalCalleeItems(seed, nil, &ExternalBlastRadius{ + EdgesSource: EdgesSourceEmpty, + DirectItems: []ExternalItem{ + {SymbolURI: "lip://local//repo/a.go#A", Distance: 1}, + }, + }, "/repo") + if len(gotD) != 1 { + t.Errorf("empty EdgesSource: direct=%d, want 1 (unchanged)", len(gotD)) + } +} + +func TestFoldExternalCalleeItems_DistanceDefault(t *testing.T) { + // Distance=0 from LIP should default to 1 for direct, 2 for transitive — + // same semantics as the caller path. + external := &ExternalBlastRadius{ + EdgesSource: EdgesSourceScipOnly, + DirectItems: []ExternalItem{ + {SymbolURI: "lip://local//repo/a.go#A", Confidence: 0.95}, + }, + TransitiveItems: []ExternalItem{ + {SymbolURI: "lip://local//repo/b.go#B", Confidence: 0.85}, + }, + } + gotD, gotT := FoldExternalCalleeItems(nil, nil, external, "/repo") + if gotD[0].Distance != 1 { + t.Errorf("direct default distance = %d, want 1", gotD[0].Distance) + } + if gotT[0].Distance != 2 { + t.Errorf("transitive default distance = %d, want 2", gotT[0].Distance) + } +} + +func TestMergeBlastRadius_DedupByFile(t *testing.T) { + static := &BlastRadius{ + UniqueCallerCount: 2, + RiskLevel: "low", + } + external := &ExternalBlastRadius{ + SemanticItems: []ExternalSemanticItem{ + {FileURI: "file:///src/dup.rs", Similarity: 0.80, Source: "semantic"}, + {FileURI: "file:///src/dup.rs", Similarity: 0.75, Source: "semantic"}, // same file + {FileURI: "file:///src/other.rs", Similarity: 0.70, Source: "semantic"}, + }, + } + + got := MergeBlastRadius(static, external) + + if got.SemanticCallerCount != 2 { + t.Errorf("SemanticCallerCount = %d, want 2 (deduped)", got.SemanticCallerCount) + } +} diff --git a/internal/impact/types.go b/internal/impact/types.go index e486c2ec..71b6d781 100644 --- a/internal/impact/types.go +++ b/internal/impact/types.go @@ -58,12 +58,35 @@ type Reference struct { IsTest bool // Whether this reference is from a test } +// CouplingTier distinguishes how a caller relationship was discovered. +type CouplingTier string + +const ( + CouplingSemantic CouplingTier = "semantic" // LIP embedding similarity — lower certainty + CouplingBoth CouplingTier = "both" // confirmed by both SCIP and LIP +) + +// EnrichedCaller is a caller discovered by either static analysis or semantic similarity. +type EnrichedCaller struct { + SymbolURI string `json:"symbolUri,omitempty"` + FileURI string `json:"fileUri"` + Tier CouplingTier `json:"tier"` + Confidence float64 `json:"confidence"` // 0.0–1.0 + Similarity float32 `json:"similarity,omitempty"` // raw cosine similarity (semantic/both only) +} + // BlastRadius summarizes the spread of impact across the codebase type BlastRadius struct { ModuleCount int `json:"moduleCount"` // Number of affected modules FileCount int `json:"fileCount"` // Number of affected files - UniqueCallerCount int `json:"uniqueCallerCount"` // Number of unique callers + UniqueCallerCount int `json:"uniqueCallerCount"` // Number of unique callers (SCIP static only) RiskLevel string `json:"riskLevel"` // "low", "medium", "high" + + // Semantic enrichment from LIP (populated when LIP blast radius is available) + StaticCallerCount int `json:"staticCallerCount,omitempty"` + SemanticCallerCount int `json:"semanticCallerCount,omitempty"` + ConfirmedCount int `json:"confirmedCount,omitempty"` // callers found by both SCIP and LIP + SemanticCallers []EnrichedCaller `json:"semanticCallers,omitempty"` } // Blast radius classification thresholds diff --git a/internal/lip/blast_radius.go b/internal/lip/blast_radius.go new file mode 100644 index 00000000..24461dba --- /dev/null +++ b/internal/lip/blast_radius.go @@ -0,0 +1,146 @@ +package lip + +import ( + "context" + "strings" + + "github.com/SimplyLiz/CodeMCP/internal/impact" +) + +// BlastRadiusEnricher adapts LIP's QueryBlastRadiusBatch into the +// impact.BlastRadiusEnricher interface. Safe for concurrent use (stateless +// adapter over the LIP socket RPC). +type BlastRadiusEnricher struct { + // MinScore is the cosine similarity threshold for semantic hits. + // Zero means static-only (no semantic items). Typical: 0.6. + MinScore float32 +} + +// EnrichBatch implements impact.BlastRadiusEnricher. +func (e *BlastRadiusEnricher) EnrichBatch(ctx context.Context, changedFileURIs []string) (map[string]*impact.ExternalBlastRadius, error) { + if len(changedFileURIs) == 0 { + return nil, nil + } + + result, err := QueryBlastRadiusBatch(changedFileURIs, e.MinScore) + if result == nil { + return nil, err + } + + out := make(map[string]*impact.ExternalBlastRadius, len(result.Entries)) + for symbolURI, entry := range result.Entries { + out[symbolURI] = EntryToExternal(&entry) + } + return out, nil +} + +// LookupSymbol finds the blast radius entry for a symbol within a pre-fetched +// result map. LIP keys entries by "lip://local/#" — tries exact +// match first, then falls back to scanning entries whose URI shares the file +// prefix and contains the symbol name. The fallback handles C++ mangled names +// and template specialisations where LIP's symbol URI diverges from CKB's +// stable ID. +func LookupSymbol(entries map[string]*impact.ExternalBlastRadius, file, name string) (*impact.ExternalBlastRadius, bool) { + key := "lip://local/" + file + "#" + name + if ebr, ok := entries[key]; ok { + return ebr, true + } + prefix := "lip://local/" + file + "#" + for uri, ebr := range entries { + if strings.HasPrefix(uri, prefix) && strings.Contains(uri[len(prefix):], name) { + return ebr, true + } + } + return nil, false +} + +// SCIPSymbolToURI translates a SCIP symbol string (space-separated +// ` `) into the LIP URI +// form LIP's daemon uses for SCIP-imported symbols: +// `lip:////@/`. +// +// Returns the input unchanged when it already looks like a LIP URI (starts +// with `lip://`) or doesn't parse as a 5-field SCIP symbol. Mirrors +// `scip_symbol_to_lip_uri` in LIP's import.rs. +func SCIPSymbolToURI(sym string) string { + if sym == "" { + return "" + } + if strings.HasPrefix(sym, "lip://") { + return sym + } + parts := strings.SplitN(sym, " ", 5) + if len(parts) != 5 { + return sym + } + scheme, manager, pkg, version, descriptor := parts[0], parts[1], parts[2], parts[3], parts[4] + descPath := strings.ReplaceAll(descriptor, " ", "/") + return "lip://" + scheme + "/" + manager + "/" + pkg + "@" + version + "/" + descPath +} + +// EntryToExternal converts a BlastRadiusEntry to impact.ExternalBlastRadius. +func EntryToExternal(entry *BlastRadiusEntry) *impact.ExternalBlastRadius { + ebr := &impact.ExternalBlastRadius{ + RiskLevel: entry.RiskLevel, + EdgesSource: entry.EdgesSource, + } + for _, di := range entry.DirectItems { + ebr.DirectItems = append(ebr.DirectItems, impact.ExternalItem{ + FileURI: di.FileURI, SymbolURI: di.SymbolURI, + Distance: di.Distance, Confidence: di.Confidence, + }) + } + for _, ti := range entry.TransitiveItems { + ebr.TransitiveItems = append(ebr.TransitiveItems, impact.ExternalItem{ + FileURI: ti.FileURI, SymbolURI: ti.SymbolURI, + Distance: ti.Distance, Confidence: ti.Confidence, + }) + } + for _, si := range entry.SemanticItems { + ebr.SemanticItems = append(ebr.SemanticItems, impact.ExternalSemanticItem{ + FileURI: si.FileURI, SymbolURI: si.SymbolURI, + Similarity: si.Similarity, Source: si.Source, + }) + } + return ebr +} + +// OutgoingEntryToExternal converts an OutgoingImpactEntry to +// impact.ExternalBlastRadius so callers can reuse the same fold and merge +// machinery built for incoming blast radius. +// +// The shared Go type does not imply shared semantics: direct_items here are +// callees (symbols the target invokes), not callers. Consumers must classify +// folded items with DirectCallee / TransitiveCallee kinds rather than +// DirectCaller / TransitiveCaller. +// +// RiskLevel is intentionally left empty — outgoing impact doesn't carry its +// own risk classification; CKB derives one from the unioned callee set on +// receipt. +func OutgoingEntryToExternal(entry *OutgoingImpactEntry) *impact.ExternalBlastRadius { + if entry == nil { + return nil + } + ebr := &impact.ExternalBlastRadius{ + EdgesSource: entry.EdgesSource, + } + for _, di := range entry.DirectItems { + ebr.DirectItems = append(ebr.DirectItems, impact.ExternalItem{ + FileURI: di.FileURI, SymbolURI: di.SymbolURI, + Distance: di.Distance, Confidence: di.Confidence, + }) + } + for _, ti := range entry.TransitiveItems { + ebr.TransitiveItems = append(ebr.TransitiveItems, impact.ExternalItem{ + FileURI: ti.FileURI, SymbolURI: ti.SymbolURI, + Distance: ti.Distance, Confidence: ti.Confidence, + }) + } + for _, si := range entry.SemanticItems { + ebr.SemanticItems = append(ebr.SemanticItems, impact.ExternalSemanticItem{ + FileURI: si.FileURI, SymbolURI: si.SymbolURI, + Similarity: si.Similarity, Source: si.Source, + }) + } + return ebr +} diff --git a/internal/lip/client.go b/internal/lip/client.go index f23d094e..efcb215c 100644 --- a/internal/lip/client.go +++ b/internal/lip/client.go @@ -739,6 +739,180 @@ func PruneDeleted() (int, int, error) { return result.Checked, len(result.Removed), nil } +// ============================================================================= +// Blast radius +// ============================================================================= + +// BlastRadiusItem is a static caller from LIP's blast radius response. +type BlastRadiusItem struct { + FileURI string `json:"file_uri"` + SymbolURI string `json:"symbol_uri"` + Distance int `json:"distance"` + Confidence float64 `json:"confidence"` +} + +// BlastRadiusSemanticItem is a semantically coupled symbol from LIP. +type BlastRadiusSemanticItem struct { + FileURI string `json:"file_uri"` + SymbolURI string `json:"symbol_uri"` + Similarity float32 `json:"similarity"` + Source string `json:"source"` // "semantic" or "both" +} + +// BlastRadiusEntry is a single symbol's blast radius from LIP. +type BlastRadiusEntry struct { + SymbolURI string `json:"symbol_uri"` + FileURI string `json:"file_uri"` // input file this entry belongs to + DirectDependents int `json:"direct_dependents"` + TransitiveDependents int `json:"transitive_dependents"` + AffectedFiles []string `json:"affected_files"` + DirectItems []BlastRadiusItem `json:"direct_items"` + TransitiveItems []BlastRadiusItem `json:"transitive_items"` + RiskLevel string `json:"risk_level"` + Truncated bool `json:"truncated"` + SemanticItems []BlastRadiusSemanticItem `json:"semantic_items"` + // EdgesSource is LIP v2.3.1+ provenance for the static call edges: + // "tier1", "scip_with_tier1_edges", "scip_only", or "empty". Omitted + // by older daemons — clients treat missing as "fold-eligible". + EdgesSource string `json:"edges_source,omitempty"` +} + +// BlastRadiusBatchResult is the full response from QueryBlastRadiusBatch. +// NotIndexedURIs lists input URIs that were absent from the LIP index — +// callers can distinguish "not indexed" from "indexed but zero callers". +type BlastRadiusBatchResult struct { + Entries map[string]BlastRadiusEntry // keyed by symbol_uri + NotIndexedURIs []string // input file URIs not in index (omitted when empty) +} + +type blastRadiusBatchResp struct { + Results []BlastRadiusEntry `json:"results"` + NotIndexedURIs []string `json:"not_indexed_uris,omitempty"` +} + +// QueryBlastRadiusBatch asks LIP for blast radius of all symbols in the given +// changed files. One round-trip. Returns a map keyed by symbol_uri. +// Returns nil when LIP is unavailable. +// +// min_score is the cosine similarity threshold for semantic hits. Pass 0 to +// get static-only results (no semantic items). Typical values: 0.6–0.8. +func QueryBlastRadiusBatch(changedFileURIs []string, minScore float32) (*BlastRadiusBatchResult, error) { + if len(changedFileURIs) == 0 { + return nil, nil + } + req := map[string]any{ + "type": "query_blast_radius_batch", + "changed_file_uris": changedFileURIs, + } + if minScore > 0 { + req["min_score"] = minScore + } + // Budget: generous timeout — LIP needs to resolve symbols + compute embeddings + timeout := max(time.Duration(len(changedFileURIs)+1)*200*time.Millisecond, 3*time.Second) + raw, _ := lipRPC(req, timeout, 8<<20, + func(r blastRadiusBatchResp) *blastRadiusBatchResp { return &r }) + if raw == nil { + return nil, nil + } + // Index by symbol_uri for O(1) lookup in the merge path + entries := make(map[string]BlastRadiusEntry, len(raw.Results)) + for _, entry := range raw.Results { + entries[entry.SymbolURI] = entry + } + return &BlastRadiusBatchResult{ + Entries: entries, + NotIndexedURIs: raw.NotIndexedURIs, + }, nil +} + +type blastRadiusSymbolResp struct { + Result *BlastRadiusEntry `json:"result,omitempty"` +} + +// QueryBlastRadiusSymbol asks LIP for blast radius of a single symbol (v2.3+). +// Returns (nil, nil) when the symbol's file isn't indexed or LIP is +// unavailable — callers should treat both identically (fall back to the +// static SCIP blast radius unchanged). +// +// Prefer this over QueryBlastRadiusBatch when you already have a symbol URI: +// it skips the file-level fetch-and-filter workaround and lets LIP dispatch +// directly. +func QueryBlastRadiusSymbol(symbolURI string, minScore float32) (*BlastRadiusEntry, error) { + if symbolURI == "" { + return nil, nil + } + req := map[string]any{ + "type": "query_blast_radius_symbol", + "symbol_uri": symbolURI, + } + if minScore > 0 { + req["min_score"] = minScore + } + raw, _ := lipRPC(req, 2*time.Second, 2<<20, + func(r blastRadiusSymbolResp) *blastRadiusSymbolResp { return &r }) + if raw == nil { + return nil, nil + } + return raw.Result, nil +} + +// ============================================================================= +// Outgoing impact (v2.3.3) +// ============================================================================= + +// OutgoingImpactEntry is the result of a QueryOutgoingImpact call. Shape +// mirrors BlastRadiusEntry but traces the forward call graph — direct_items +// are callees at distance=1, transitive_items at distance>=2. +// +// target_uri echoes the request's symbol_uri (post-canonicalisation). +// edges_source and the semantic items reuse the same provenance and coupling +// vocabulary as blast radius, so callers can treat both results through the +// shared ExternalBlastRadius pipeline via OutgoingEntryToExternal. +type OutgoingImpactEntry struct { + TargetURI string `json:"target_uri"` + DirectItems []BlastRadiusItem `json:"direct_items"` + TransitiveItems []BlastRadiusItem `json:"transitive_items"` + SemanticItems []BlastRadiusSemanticItem `json:"semantic_items"` + // EdgesSource mirrors BlastRadiusEntry.EdgesSource: "tier1", + // "scip_with_tier1_edges", "scip_only", or "empty". Omitted by + // daemons that don't yet report provenance — treat as fold-eligible. + EdgesSource string `json:"edges_source,omitempty"` + Truncated bool `json:"truncated"` +} + +type outgoingImpactResp struct { + Result *OutgoingImpactEntry `json:"result,omitempty"` +} + +// QueryOutgoingImpact asks LIP for the forward call graph of a symbol +// (v2.3.3+). Returns (nil, nil) when the symbol isn't indexed, LIP is +// unavailable, or the daemon doesn't support the RPC — callers should +// degrade to SCIP-only outgoing traversal. +// +// Depth is capped at 8 server-side (matching query_outgoing_calls). Semantic +// items are seeded from the target's own embedding, same as blast radius. +// +// Gate on Handshake.SupportedMessages containing "query_outgoing_impact" +// before calling — older daemons reject with UnknownMessage. +func QueryOutgoingImpact(symbolURI string, minScore float32) (*OutgoingImpactEntry, error) { + if symbolURI == "" { + return nil, nil + } + req := map[string]any{ + "type": "query_outgoing_impact", + "symbol_uri": symbolURI, + } + if minScore > 0 { + req["min_score"] = minScore + } + raw, _ := lipRPC(req, 2*time.Second, 2<<20, + func(r outgoingImpactResp) *outgoingImpactResp { return &r }) + if raw == nil { + return nil, nil + } + return raw.Result, nil +} + // ============================================================================= // Annotations // ============================================================================= @@ -786,6 +960,39 @@ func BatchAnnotationGet(uris []string, key string) (map[string]string, error) { // Protocol // ============================================================================= +type deltaAckResp struct { + Accepted bool `json:"accepted"` + Error *string `json:"error"` +} + +// RegisterProjectRoot tells the LIP daemon the canonical filesystem root for +// this workspace (shipped in LIP v2.3.1). Once registered, LIP canonicalises +// inbound lip://local/ URIs via longest-prefix match against the root +// set, so callers can send either relative or absolute forms on subsequent +// queries. +// +// Best-effort and idempotent: the daemon tracks roots as an unordered set. +// Safe to call on every connect. Returns (false, nil) when LIP is unavailable +// or rejects the root; callers treat both as "proceed without registration" +// since CKB's query paths already tolerate unregistered roots via the server's +// auto-detect heuristics. +// +// Gate on Handshake.SupportedMessages containing "register_project_root" +// before calling — older daemons will reject with UnknownMessage. +func RegisterProjectRoot(root string) (bool, error) { + if root == "" { + return false, nil + } + result, _ := lipRPC( + map[string]any{"type": "register_project_root", "root": root}, + 500*time.Millisecond, 4<<10, + func(r deltaAckResp) *deltaAckResp { return &r }) + if result == nil { + return false, nil + } + return result.Accepted, nil +} + // Handshake performs the version handshake. Clients can call this on connect to // detect protocol drift before sending real queries. func Handshake(clientVersion string) (*HandshakeInfo, error) { diff --git a/internal/lip/client_test.go b/internal/lip/client_test.go index 3c9107fc..41d7e621 100644 --- a/internal/lip/client_test.go +++ b/internal/lip/client_test.go @@ -695,6 +695,187 @@ func TestWireProtocol_BatchAnnotationGet(t *testing.T) { assertField(t, req, "key", "stability") } +func TestWireProtocol_RegisterProjectRoot_Accepted(t *testing.T) { + d := newTestDaemon(t, deltaAckResp{Accepted: true}) + + ok, err := RegisterProjectRoot("/abs/path/to/repo") + d.waitHandled(t) + + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !ok { + t.Errorf("accepted = false, want true") + } + req := d.req() + assertField(t, req, "type", "register_project_root") + assertField(t, req, "root", "/abs/path/to/repo") +} + +// When the daemon rejects the root (e.g. invalid path), the RPC should return +// (false, nil) — non-fatal, callers fall back to auto-detection. +func TestWireProtocol_RegisterProjectRoot_Rejected(t *testing.T) { + reason := "invalid root" + d := newTestDaemon(t, deltaAckResp{Accepted: false, Error: &reason}) + + ok, err := RegisterProjectRoot("/bogus") + d.waitHandled(t) + + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if ok { + t.Errorf("accepted = true, want false") + } +} + +// Empty root must short-circuit — no socket call. +func TestWireProtocol_RegisterProjectRoot_EmptyRoot(t *testing.T) { + prev := os.Getenv("LIP_SOCKET") + os.Setenv("LIP_SOCKET", "/tmp/lip-nonexistent-ckb-test.sock") + defer os.Setenv("LIP_SOCKET", prev) + + ok, err := RegisterProjectRoot("") + if ok || err != nil { + t.Errorf("empty root: want (false, nil), got (%v, %v)", ok, err) + } +} + +// ============================================================================= +// Outgoing impact +// ============================================================================= + +func TestWireProtocol_QueryOutgoingImpact_WithResult(t *testing.T) { + d := newTestDaemon(t, outgoingImpactResp{Result: &OutgoingImpactEntry{ + TargetURI: "lip://local//repo/foo.go#Caller", + DirectItems: []BlastRadiusItem{ + {FileURI: "lip://local//repo/bar.go", SymbolURI: "lip://local//repo/bar.go#Callee", + Distance: 1, Confidence: 0.95}, + }, + TransitiveItems: []BlastRadiusItem{ + {FileURI: "lip://local//repo/baz.go", SymbolURI: "lip://local//repo/baz.go#Deep", + Distance: 2, Confidence: 0.85}, + }, + SemanticItems: []BlastRadiusSemanticItem{ + {FileURI: "lip://local//repo/similar.go", SymbolURI: "...#Similar", + Similarity: 0.82, Source: "semantic"}, + }, + EdgesSource: "scip_with_tier1_edges", + Truncated: false, + }}) + + got, err := QueryOutgoingImpact("lip://local//repo/foo.go#Caller", 0.6) + d.waitHandled(t) + + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got == nil { + t.Fatal("got nil result") + } + if got.TargetURI != "lip://local//repo/foo.go#Caller" { + t.Errorf("TargetURI = %q", got.TargetURI) + } + if len(got.DirectItems) != 1 || got.DirectItems[0].Distance != 1 { + t.Errorf("DirectItems = %+v", got.DirectItems) + } + if len(got.TransitiveItems) != 1 || got.TransitiveItems[0].Distance != 2 { + t.Errorf("TransitiveItems = %+v", got.TransitiveItems) + } + if len(got.SemanticItems) != 1 || got.SemanticItems[0].Source != "semantic" { + t.Errorf("SemanticItems = %+v", got.SemanticItems) + } + if got.EdgesSource != "scip_with_tier1_edges" { + t.Errorf("EdgesSource = %q", got.EdgesSource) + } + + req := d.req() + assertField(t, req, "type", "query_outgoing_impact") + assertField(t, req, "symbol_uri", "lip://local//repo/foo.go#Caller") + assertField(t, req, "min_score", 0.6) +} + +// Null result (target not indexed) must come back as (nil, nil). +func TestWireProtocol_QueryOutgoingImpact_NullResult(t *testing.T) { + d := newTestDaemon(t, outgoingImpactResp{Result: nil}) + + got, err := QueryOutgoingImpact("lip://local//repo/unknown.go#X", 0.6) + d.waitHandled(t) + + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != nil { + t.Errorf("want nil result for unindexed target, got %+v", got) + } +} + +// min_score=0 must be omitted so the daemon applies its default. +func TestWireProtocol_QueryOutgoingImpact_OmitMinScore(t *testing.T) { + d := newTestDaemon(t, outgoingImpactResp{}) + + _, _ = QueryOutgoingImpact("lip://x#X", 0) + d.waitHandled(t) + + req := d.req() + assertField(t, req, "type", "query_outgoing_impact") + assertField(t, req, "symbol_uri", "lip://x#X") + assertNoField(t, req, "min_score") +} + +// Empty symbol URI must short-circuit — no socket call. +func TestWireProtocol_QueryOutgoingImpact_EmptySymbol(t *testing.T) { + prev := os.Getenv("LIP_SOCKET") + os.Setenv("LIP_SOCKET", "/tmp/lip-nonexistent-ckb-test.sock") + defer os.Setenv("LIP_SOCKET", prev) + + got, err := QueryOutgoingImpact("", 0.6) + if got != nil || err != nil { + t.Errorf("empty symbol: want (nil, nil), got (%v, %v)", got, err) + } +} + +func TestOutgoingEntryToExternal(t *testing.T) { + entry := &OutgoingImpactEntry{ + TargetURI: "lip://x#X", + DirectItems: []BlastRadiusItem{ + {FileURI: "f1", SymbolURI: "f1#A", Distance: 1, Confidence: 0.9}, + }, + TransitiveItems: []BlastRadiusItem{ + {FileURI: "f2", SymbolURI: "f2#B", Distance: 2, Confidence: 0.8}, + }, + SemanticItems: []BlastRadiusSemanticItem{ + {FileURI: "f3", SymbolURI: "f3#C", Similarity: 0.75, Source: "both"}, + }, + EdgesSource: "tier1", + } + ext := OutgoingEntryToExternal(entry) + if ext == nil { + t.Fatal("got nil") + } + if len(ext.DirectItems) != 1 || ext.DirectItems[0].SymbolURI != "f1#A" { + t.Errorf("DirectItems: %+v", ext.DirectItems) + } + if len(ext.TransitiveItems) != 1 || ext.TransitiveItems[0].Distance != 2 { + t.Errorf("TransitiveItems: %+v", ext.TransitiveItems) + } + if len(ext.SemanticItems) != 1 || ext.SemanticItems[0].Source != "both" { + t.Errorf("SemanticItems: %+v", ext.SemanticItems) + } + if ext.EdgesSource != "tier1" { + t.Errorf("EdgesSource = %q", ext.EdgesSource) + } + if ext.RiskLevel != "" { + t.Errorf("RiskLevel should be empty for outgoing, got %q", ext.RiskLevel) + } +} + +func TestOutgoingEntryToExternal_Nil(t *testing.T) { + if got := OutgoingEntryToExternal(nil); got != nil { + t.Errorf("nil entry: want nil, got %+v", got) + } +} + func TestWireProtocol_Handshake(t *testing.T) { d := newTestDaemon(t, handshakeResp{DaemonVersion: "2.0.0", ProtocolVersion: 2}) diff --git a/internal/mcp/presets.go b/internal/mcp/presets.go index 06592e68..c7eacc60 100644 --- a/internal/mcp/presets.go +++ b/internal/mcp/presets.go @@ -39,6 +39,7 @@ var Presets = map[string][]string{ // Discovery & Search (granular fallback) "searchSymbols", + "symbolExists", "getSymbol", // Navigation & Understanding (granular fallback) @@ -77,7 +78,7 @@ var Presets = map[string][]string{ PresetReview: { // Core tools "explore", "understand", "prepareChange", "batchGet", "batchSearch", - "searchSymbols", "getSymbol", "explainSymbol", "explainFile", "explainPath", + "searchSymbols", "symbolExists", "getSymbol", "explainSymbol", "explainFile", "explainPath", "findReferences", "getCallGraph", "traceUsage", "getArchitecture", "getModuleOverview", "getModuleResponsibilities", "listKeyConcepts", "analyzeImpact", "getHotspots", "exportForLLM", @@ -106,7 +107,7 @@ var Presets = map[string][]string{ PresetRefactor: { // Core tools "explore", "understand", "prepareChange", "batchGet", "batchSearch", - "searchSymbols", "getSymbol", "explainSymbol", "explainFile", "explainPath", + "searchSymbols", "symbolExists", "getSymbol", "explainSymbol", "explainFile", "explainPath", "findReferences", "getCallGraph", "traceUsage", "getArchitecture", "getModuleOverview", "getModuleResponsibilities", "listKeyConcepts", "analyzeImpact", "getHotspots", "exportForLLM", @@ -136,7 +137,7 @@ var Presets = map[string][]string{ PresetFederation: { // Core tools "explore", "understand", "prepareChange", "batchGet", "batchSearch", - "searchSymbols", "getSymbol", "explainSymbol", "explainFile", "explainPath", + "searchSymbols", "symbolExists", "getSymbol", "explainSymbol", "explainFile", "explainPath", "findReferences", "getCallGraph", "traceUsage", "getArchitecture", "getModuleOverview", "getModuleResponsibilities", "listKeyConcepts", "analyzeImpact", "getHotspots", "exportForLLM", @@ -170,7 +171,7 @@ var Presets = map[string][]string{ PresetDocs: { // Core tools "explore", "understand", "prepareChange", "batchGet", "batchSearch", - "searchSymbols", "getSymbol", "explainSymbol", "explainFile", "explainPath", + "searchSymbols", "symbolExists", "getSymbol", "explainSymbol", "explainFile", "explainPath", "findReferences", "getCallGraph", "traceUsage", "getArchitecture", "getModuleOverview", "getModuleResponsibilities", "listKeyConcepts", "analyzeImpact", "getHotspots", "exportForLLM", @@ -192,7 +193,7 @@ var Presets = map[string][]string{ PresetOps: { // Core tools "explore", "understand", "prepareChange", "batchGet", "batchSearch", - "searchSymbols", "getSymbol", "explainSymbol", "explainFile", "explainPath", + "searchSymbols", "symbolExists", "getSymbol", "explainSymbol", "explainFile", "explainPath", "findReferences", "getCallGraph", "traceUsage", "getArchitecture", "getModuleOverview", "getModuleResponsibilities", "listKeyConcepts", "analyzeImpact", "getHotspots", "exportForLLM", @@ -263,6 +264,7 @@ var coreToolOrder = []string{ "batchSearch", // Granular tools (fallback) "searchSymbols", + "symbolExists", "getSymbol", "explainSymbol", "explainFile", diff --git a/internal/mcp/presets_test.go b/internal/mcp/presets_test.go index a287aae8..8f87c131 100644 --- a/internal/mcp/presets_test.go +++ b/internal/mcp/presets_test.go @@ -14,14 +14,14 @@ func TestPresetFiltering(t *testing.T) { // Test core preset (default) // v8.3: Core now includes explainPath, getModuleResponsibilities, exportForLLM coreTools := server.GetFilteredTools() - if len(coreTools) != 24 { - t.Errorf("expected 24 core tools, got %d", len(coreTools)) + if len(coreTools) != 25 { + t.Errorf("expected 25 core tools, got %d", len(coreTools)) } // Verify compound tools come first (preferred for AI workflows) expectedFirst := []string{ "explore", "understand", "prepareChange", "batchGet", "batchSearch", - "searchSymbols", "getSymbol", "explainSymbol", "explainFile", "explainPath", + "searchSymbols", "symbolExists", "getSymbol", "explainSymbol", "explainFile", "explainPath", "findReferences", "getCallGraph", "traceUsage", "getArchitecture", "getModuleOverview", "getModuleResponsibilities", "listKeyConcepts", "analyzeImpact", "getHotspots", "exportForLLM", @@ -42,9 +42,9 @@ func TestPresetFiltering(t *testing.T) { t.Fatalf("failed to set full preset: %v", err) } fullTools := server.GetFilteredTools() - // v8.5: +3 Cartographer (shotgunSurgery, evolution, blastRadius) +3 LIP annotation tools = 107 - if len(fullTools) != 107 { - t.Errorf("expected 107 full tools, got %d", len(fullTools)) + // v8.5: +3 Cartographer (shotgunSurgery, evolution, blastRadius) +3 LIP annotation tools = 107; +1 symbolExists = 108; +1 (full includes the expanded presets) = 109; +1 analyzeOutgoingImpact = 110 + if len(fullTools) != 110 { + t.Errorf("expected 110 full tools, got %d", len(fullTools)) } // Full preset should still have core tools first diff --git a/internal/mcp/token_budget_test.go b/internal/mcp/token_budget_test.go index eaecfdbd..34bb6299 100644 --- a/internal/mcp/token_budget_test.go +++ b/internal/mcp/token_budget_test.go @@ -33,9 +33,9 @@ func TestToolsListTokenBudget(t *testing.T) { minTools int // Ensure we don't accidentally drop tools maxTools int }{ - {PresetCore, maxCorePresetBytes, 20, 24}, // v8.3: 24 tools (+explainPath, responsibilities, exportForLLM) - {PresetReview, maxReviewPresetBytes, 30, 41}, // v8.4: 41 tools (+findUnwiredModules) - {PresetFull, maxFullPresetBytes, 80, 107}, // v8.5: 107 tools (+3 Cartographer, +3 LIP annotation) + {PresetCore, maxCorePresetBytes, 20, 25}, // v8.3: 24 tools (+explainPath, responsibilities, exportForLLM); +1 symbolExists = 25 + {PresetReview, maxReviewPresetBytes, 30, 42}, // v8.4: 41 tools (+findUnwiredModules); +1 symbolExists = 42 + {PresetFull, maxFullPresetBytes, 80, 110}, // v8.5: 107 tools (+3 Cartographer, +3 LIP annotation); +1 symbolExists in all presets = 109; +1 analyzeOutgoingImpact = 110 } for _, tt := range tests { diff --git a/internal/mcp/tool_impls.go b/internal/mcp/tool_impls.go index 64bb9512..87a19789 100644 --- a/internal/mcp/tool_impls.go +++ b/internal/mcp/tool_impls.go @@ -593,6 +593,56 @@ func (s *MCPServer) toolSearchSymbols(params map[string]interface{}) (*envelope. Build(), nil } +// toolSymbolExists implements the symbolExists tool. +func (s *MCPServer) toolSymbolExists(params map[string]interface{}) (*envelope.Response, error) { + name, ok := params["name"].(string) + if !ok || name == "" { + return nil, errors.NewInvalidParameterError("name", "") + } + + var kinds []string + if kindsVal, ok := params["kinds"].([]interface{}); ok { + for _, k := range kindsVal { + if kStr, ok := k.(string); ok { + kinds = append(kinds, kStr) + } + } + } + + scope, _ := params["scope"].(string) + includeExternal, _ := params["includeExternal"].(bool) + + ctx := context.Background() + opts := query.SymbolExistsOptions{ + Name: name, + Kinds: kinds, + Scope: scope, + IncludeExternal: includeExternal, + } + + result, err := s.engine().SymbolExists(ctx, opts) + if err != nil { + return nil, errors.NewOperationError("symbol exists", err) + } + + data := map[string]interface{}{ + "exists": result.Exists, + "matches": result.Matches, + "kinds": result.Kinds, + } + if len(result.Receivers) > 0 { + data["receivers"] = result.Receivers + } + if result.StaleIndex { + data["staleIndex"] = result.StaleIndex + } + + return NewToolResponse(). + Data(data). + WithProvenance(result.Provenance). + Build(), nil +} + // toolFindReferences implements the findReferences tool func (s *MCPServer) toolFindReferences(params map[string]interface{}) (*envelope.Response, error) { timer := NewWideResultTimer() @@ -1130,6 +1180,130 @@ func (s *MCPServer) toolAnalyzeImpact(params map[string]interface{}) (*envelope. return resp.Build(), nil } +// toolAnalyzeOutgoingImpact implements the analyzeOutgoingImpact tool — +// the forward-direction twin of analyzeImpact. Answers "what does X call?" +// by asking LIP for the outgoing call graph. When LIP is unavailable, the +// response is empty with a provenance warning rather than an error. +func (s *MCPServer) toolAnalyzeOutgoingImpact(params map[string]interface{}) (*envelope.Response, error) { + timer := NewWideResultTimer() + + symbolId, ok := params["symbolId"].(string) + if !ok { + return nil, errors.NewInvalidParameterError("symbolId", "") + } + + minScore := float32(0.6) + if v, ok := params["minScore"].(float64); ok { + minScore = float32(v) + } + + s.logger.Debug("Executing analyzeOutgoingImpact", + "symbolId", symbolId, + "minScore", minScore, + ) + + ctx := context.Background() + outResp, err := s.engine().AnalyzeOutgoingImpact(ctx, query.AnalyzeOutgoingImpactOptions{ + SymbolId: symbolId, + MinScore: minScore, + }) + if err != nil { + return nil, errors.NewOperationError("outgoing impact analysis", err) + } + + directCallees := make([]map[string]interface{}, 0, len(outResp.DirectCallees)) + for _, item := range outResp.DirectCallees { + info := map[string]interface{}{ + "stableId": item.StableId, + "name": item.Name, + "kind": item.Kind, + "distance": item.Distance, + "moduleId": item.ModuleId, + "confidence": item.Confidence, + } + if item.Location != nil { + info["location"] = map[string]interface{}{ + "fileId": item.Location.FileId, + "startLine": item.Location.StartLine, + } + } + directCallees = append(directCallees, info) + } + + transitiveCallees := make([]map[string]interface{}, 0, len(outResp.TransitiveCallees)) + for _, item := range outResp.TransitiveCallees { + info := map[string]interface{}{ + "stableId": item.StableId, + "name": item.Name, + "kind": item.Kind, + "distance": item.Distance, + "moduleId": item.ModuleId, + "confidence": item.Confidence, + } + if item.Location != nil { + info["location"] = map[string]interface{}{ + "fileId": item.Location.FileId, + "startLine": item.Location.StartLine, + } + } + transitiveCallees = append(transitiveCallees, info) + } + + semanticCallees := make([]map[string]interface{}, 0, len(outResp.SemanticCallees)) + for _, s := range outResp.SemanticCallees { + semanticCallees = append(semanticCallees, map[string]interface{}{ + "symbolUri": s.SymbolURI, + "fileUri": s.FileURI, + "similarity": s.Similarity, + "source": s.Source, + }) + } + + data := map[string]interface{}{ + "directCallees": directCallees, + "transitiveCallees": transitiveCallees, + } + if len(semanticCallees) > 0 { + data["semanticCallees"] = semanticCallees + } + if outResp.EdgesSource != "" { + data["edgesSource"] = outResp.EdgesSource + } + if outResp.Truncated { + data["truncated"] = true + } + if outResp.Symbol != nil { + sym := map[string]interface{}{ + "stableId": outResp.Symbol.StableId, + "name": outResp.Symbol.Name, + "kind": outResp.Symbol.Kind, + } + if outResp.Symbol.Visibility != nil { + sym["visibility"] = outResp.Symbol.Visibility.Visibility + } + data["symbol"] = sym + } + + totalResults := len(outResp.DirectCallees) + len(outResp.TransitiveCallees) + responseBytes := MeasureJSONSize(data) + RecordWideResult(WideResultMetrics{ + ToolName: "analyzeOutgoingImpact", + TotalResults: totalResults, + ReturnedResults: totalResults, + TruncatedCount: 0, + ResponseBytes: responseBytes, + EstimatedTokens: EstimateTokens(responseBytes), + ExecutionMs: timer.ElapsedMs(), + }) + + activeBackend := s.engine().ActiveBackendName() + return NewToolResponse(). + Data(data). + WithProvenance(outResp.Provenance). + WithBackend(activeBackend, s.logger). + Build(), nil +} + // toolAnalyzeChange implements the analyzeChange tool func (s *MCPServer) toolAnalyzeChange(params map[string]interface{}) (*envelope.Response, error) { timer := NewWideResultTimer() diff --git a/internal/mcp/tool_impls_v86.go b/internal/mcp/tool_impls_v86.go index 35c42213..eab8968e 100644 --- a/internal/mcp/tool_impls_v86.go +++ b/internal/mcp/tool_impls_v86.go @@ -112,6 +112,45 @@ func (s *MCPServer) toolGetArchitecturalEvolution(params map[string]interface{}) return NewToolResponse().Data(result).Build(), nil } +// toolRenderArchitecture renders the project's import graph as a Mermaid or +// Graphviz (DOT) diagram, suitable for inline rendering in IDEs that support +// Mermaid (Cursor, Claude Desktop, VS Code markdown preview, GitHub). +// +// With `focus` set, returns a BFS neighborhood of the given module. Without, +// returns the top-N most-connected nodes. `truncated=true` in the response +// signals that the node cap kicked in. +func (s *MCPServer) toolRenderArchitecture(params map[string]interface{}) (*envelope.Response, error) { + if !cartographer.Available() { + return nil, errors.NewOperationError("render architecture", cartographer.ErrUnavailable) + } + + format, _ := params["format"].(string) + if format == "" { + format = "mermaid" + } + if format != "mermaid" && format != "dot" { + return nil, errors.NewInvalidParameterError("format", "must be \"mermaid\" or \"dot\"") + } + + focus, _ := params["focus"].(string) + + var depth, maxNodes uint32 + if v, ok := params["depth"].(float64); ok && v > 0 { + depth = uint32(v) + } + if v, ok := params["max_nodes"].(float64); ok && v > 0 { + maxNodes = uint32(v) + } + + repoRoot := s.engine().GetRepoRoot() + result, err := cartographer.RenderArchitecture(repoRoot, format, focus, depth, maxNodes) + if err != nil { + return nil, errors.NewOperationError("render architecture", err) + } + + return NewToolResponse().Data(result).Build(), nil +} + // toolGetBlastRadius returns the graph-theoretic blast radius for a module/file. func (s *MCPServer) toolGetBlastRadius(params map[string]interface{}) (*envelope.Response, error) { if !cartographer.Available() { diff --git a/internal/mcp/tools.go b/internal/mcp/tools.go index 01793150..80e9c83d 100644 --- a/internal/mcp/tools.go +++ b/internal/mcp/tools.go @@ -107,7 +107,7 @@ func (s *MCPServer) GetToolDefinitions() []Tool { }, { Name: "searchSymbols", - Description: "Semantic code search returning symbol types, locations, and relationships—more accurate than text-based grep/find.", + Description: "Semantic code search returning symbol types, locations, and relationships—more accurate than text-based grep/find. Note: may not match class methods or record properties by bare name — use symbolExists for authoritative boolean lookups.", InputSchema: map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ @@ -148,6 +148,34 @@ func (s *MCPServer) GetToolDefinitions() []Tool { "required": []string{"query"}, }, }, + { + Name: "symbolExists", + Description: "Boolean oracle for LLM grounding: answers whether a bare symbol name has any declaration in the index. Uses exact-match (not FTS ranking) so class methods and object-property declarations are found reliably. Returns exists, matches count, distinct kinds, and receiver names for methods/properties. Cheaper than searchSymbols — no locations, no ranking.", + InputSchema: map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "name": map[string]interface{}{ + "type": "string", + "description": "Bare symbol name to look up (e.g. \"saveReport\", \"ENV_PATH\")", + }, + "kinds": map[string]interface{}{ + "type": "array", + "items": map[string]interface{}{"type": "string"}, + "description": "Optional kind filter (e.g. [\"method\", \"function\", \"class\", \"property\"])", + }, + "scope": map[string]interface{}{ + "type": "string", + "description": "Optional file-path prefix to restrict search (e.g. \"packages/server/src/\")", + }, + "includeExternal": map[string]interface{}{ + "type": "boolean", + "default": false, + "description": "Include symbols from node_modules (default false)", + }, + }, + "required": []string{"name"}, + }, + }, { Name: "listSymbols", Description: "Bulk list symbols in a scope without search query. Returns functions, types, and classes with body ranges and complexity metrics (lines, endLine, cyclomatic, cognitive). Use for complete symbol inventory — no search query needed.", @@ -316,6 +344,25 @@ func (s *MCPServer) GetToolDefinitions() []Tool { "required": []string{"symbolId"}, }, }, + { + Name: "analyzeOutgoingImpact", + Description: "Use this to check 'what does X call?' — returns direct and transitive callees, plus embedding-similar semantically coupled symbols. Mirror of analyzeImpact in the forward direction. Requires a LIP daemon advertising query_outgoing_impact (v2.3.5+); when LIP is unavailable the response is empty with a provenance warning.", + InputSchema: map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "symbolId": map[string]interface{}{ + "type": "string", + "description": "The stable symbol ID to analyze", + }, + "minScore": map[string]interface{}{ + "type": "number", + "default": 0.6, + "description": "Minimum cosine similarity for semantic callees. 0 disables semantic enrichment.", + }, + }, + "required": []string{"symbolId"}, + }, + }, { Name: "analyzeChange", Description: "Use this AFTER changes are made to analyze a git diff — answers: what might break? which tests should run? who needs to review? For pre-change planning (before writing code), use prepareChange instead. For full PR review with quality gates, use reviewPR.", @@ -1963,6 +2010,35 @@ func (s *MCPServer) GetToolDefinitions() []Tool { "required": []string{"target"}, }, }, + { + Name: "renderArchitecture", + Description: "Render the project's module-level import graph as a Mermaid or Graphviz (DOT) diagram, ready to paste into IDEs that render Mermaid inline (Cursor, Claude Desktop, VS Code markdown preview, GitHub). With `focus` set, returns a BFS neighborhood (both imports and imported-by) around the given module to depth `depth`; without `focus`, returns the top-N most-connected nodes as an at-a-glance shape of the codebase. Response includes `truncated: true` when the node cap kicked in — tighten focus or lower depth to get a complete view.", + InputSchema: map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "format": map[string]interface{}{ + "type": "string", + "description": "Output diagram format (default mermaid)", + "enum": []string{"mermaid", "dot"}, + "default": "mermaid", + }, + "focus": map[string]interface{}{ + "type": "string", + "description": "Optional anchor: module ID, repo-relative file path, or path suffix (e.g. 'server.rs'). When set, the diagram is a BFS neighborhood around this node; when absent, returns the top-N most-connected nodes.", + }, + "depth": map[string]interface{}{ + "type": "integer", + "description": "BFS depth from `focus` over undirected import edges (default 2). Ignored when `focus` is absent.", + "default": 2, + }, + "max_nodes": map[string]interface{}{ + "type": "integer", + "description": "Cap on nodes rendered; response sets `truncated: true` if the cap was hit (default 40).", + "default": 40, + }, + }, + }, + }, { Name: "queryContext", Description: "Retrieve the most relevant code context for a task or question. Runs Cartographer's PKG retrieval pipeline: BM25 content search → personalized PageRank skeleton → context health scoring. Returns a ready-to-use context bundle with token count and A–F quality grade. Use this before starting any non-trivial coding task.", @@ -2702,11 +2778,13 @@ func (s *MCPServer) RegisterTools() { s.tools["expandToolset"] = s.toolExpandToolset s.tools["getSymbol"] = s.toolGetSymbol s.tools["searchSymbols"] = s.toolSearchSymbols + s.tools["symbolExists"] = s.toolSymbolExists s.tools["listSymbols"] = s.toolListSymbols s.tools["getSymbolGraph"] = s.toolGetSymbolGraph s.tools["findReferences"] = s.toolFindReferences s.tools["getArchitecture"] = s.toolGetArchitecture s.tools["analyzeImpact"] = s.toolAnalyzeImpact + s.tools["analyzeOutgoingImpact"] = s.toolAnalyzeOutgoingImpact s.tools["analyzeChange"] = s.toolAnalyzeChange s.tools["explainSymbol"] = s.toolExplainSymbol s.tools["justifySymbol"] = s.toolJustifySymbol @@ -2826,6 +2904,7 @@ func (s *MCPServer) RegisterTools() { s.tools["detectShotgunSurgery"] = s.toolDetectShotgunSurgery s.tools["getArchitecturalEvolution"] = s.toolGetArchitecturalEvolution s.tools["getBlastRadius"] = s.toolGetBlastRadius + s.tools["renderArchitecture"] = s.toolRenderArchitecture // v9.0 LIP symbol annotations s.tools["annotationSet"] = s.toolAnnotationSet s.tools["annotationGet"] = s.toolAnnotationGet diff --git a/internal/query/impact.go b/internal/query/impact.go index c306d21c..c64a9a30 100644 --- a/internal/query/impact.go +++ b/internal/query/impact.go @@ -3,6 +3,7 @@ package query import ( "context" "fmt" + "math" "os" "os/exec" "path/filepath" @@ -113,8 +114,23 @@ type ModuleImpact struct { type BlastRadiusSummary struct { ModuleCount int `json:"moduleCount"` FileCount int `json:"fileCount"` - UniqueCallerCount int `json:"uniqueCallerCount"` - RiskLevel string `json:"riskLevel"` // "low", "medium", "high" + UniqueCallerCount int `json:"uniqueCallerCount"` // SCIP static callers only (drives thresholds) + RiskLevel string `json:"riskLevel"` // "low", "medium", "high" + + // LIP semantic enrichment (omitted when LIP is unavailable) + StaticCallerCount int `json:"staticCallerCount,omitempty"` + SemanticCallerCount int `json:"semanticCallerCount,omitempty"` + ConfirmedCount int `json:"confirmedCount,omitempty"` + SemanticCallers []SemanticCallerInfo `json:"semanticCallers,omitempty"` +} + +// SemanticCallerInfo is an embedding-discovered caller surfaced in the blast radius. +type SemanticCallerInfo struct { + SymbolURI string `json:"symbolUri,omitempty"` + FileURI string `json:"fileUri"` + Tier string `json:"tier"` // "semantic" or "both" + Confidence float64 `json:"confidence"` + Similarity float32 `json:"similarity,omitempty"` } // scipCallerProvider adapts SCIPAdapter to the TransitiveCallerProvider interface @@ -317,6 +333,25 @@ func (e *Engine) AnalyzeImpact(ctx context.Context, opts AnalyzeImpactOptions) ( return nil, e.wrapError(err, errors.InternalError) } + // Fetch LIP enrichment up-front so tier-1 static callers can be folded + // into result.DirectImpact/TransitiveImpact before budget truncation and + // item sorting. `ext` is also used later to populate semantic callers on + // the BlastRadius summary via MergeBlastRadius. + ext := e.fetchLIPEnrichment(symbolInfo) + if ext != nil { + result.DirectImpact, result.TransitiveImpact = impact.FoldExternalStaticItems( + result.DirectImpact, result.TransitiveImpact, ext, e.repoRoot) + // Recompute the blast-radius summary from the unioned item set so + // UniqueCallerCount / FileCount / RiskLevel reflect LIP's static + // contribution. Skipped when EdgesSource=="empty" because the fold + // no-ops in that case and the original summary is already correct. + if ext.EdgesSource != impact.EdgesSourceEmpty && result.BlastRadius != nil { + all := append([]impact.ImpactItem{}, result.DirectImpact...) + all = append(all, result.TransitiveImpact...) + result.BlastRadius = impact.RecomputeBlastRadius(all) + } + } + // Convert results directImpact := convertImpactItems(result.DirectImpact) transitiveImpact := convertImpactItems(result.TransitiveImpact) @@ -437,14 +472,34 @@ func (e *Engine) AnalyzeImpact(ctx context.Context, opts AnalyzeImpactOptions) ( docsToUpdate = e.getDocsToUpdate(symbolInfo.StableId, 5) } - // Convert blast radius + // Convert blast radius, then enrich with LIP semantic coupling when available. + // The static-item fold already happened up-front; MergeBlastRadius here + // only blends in semantic (embedding-discovered) callers and preserves + // the post-fold summary counts. var blastRadius *BlastRadiusSummary if result.BlastRadius != nil { + enriched := result.BlastRadius + if ext != nil { + enriched = impact.MergeBlastRadius(result.BlastRadius, ext) + } + blastRadius = &BlastRadiusSummary{ - ModuleCount: result.BlastRadius.ModuleCount, - FileCount: result.BlastRadius.FileCount, - UniqueCallerCount: result.BlastRadius.UniqueCallerCount, - RiskLevel: result.BlastRadius.RiskLevel, + ModuleCount: enriched.ModuleCount, + FileCount: enriched.FileCount, + UniqueCallerCount: enriched.UniqueCallerCount, + RiskLevel: enriched.RiskLevel, + StaticCallerCount: enriched.StaticCallerCount, + SemanticCallerCount: enriched.SemanticCallerCount, + ConfirmedCount: enriched.ConfirmedCount, + } + for _, sc := range enriched.SemanticCallers { + blastRadius.SemanticCallers = append(blastRadius.SemanticCallers, SemanticCallerInfo{ + SymbolURI: sc.SymbolURI, + FileURI: sc.FileURI, + Tier: string(sc.Tier), + Confidence: sc.Confidence, + Similarity: sc.Similarity, + }) } } @@ -1223,6 +1278,69 @@ func (e *Engine) getGitDiff(staged bool, baseBranch string) (string, error) { return string(out), nil } +// bridgeMultiplierFromGraph computes a risk multiplier from Cartographer's +// betweenness-centrality scores for the changed files. Files on critical +// architectural paths (high BridgeScore) yield a larger multiplier so that +// the same textual change is reported as riskier when it lands in a bridge. +// +// The multiplier is 1.0 + max(BridgeScore)/1000, capped at 2.0. BridgeScore +// is betweenness_centrality * 1000 (range 0-1000) per api.rs, so this maps +// a "perfect bridge" to a 2x risk amplification and a non-bridge to 1x. +// +// Matches files by both Path (exact) and ModuleID, to cover the cases where +// ChangedSymbol.File is either a repo-relative path or a module identifier. +// Returns (1.0, nil) when no nodes match — callers should treat a nil factor +// as "no adjustment, skip the factor append". +func bridgeMultiplierFromGraph(nodes []cartographer.GraphNode, files []string) (float64, *RiskFactor) { + if len(nodes) == 0 || len(files) == 0 { + return 1.0, nil + } + byPath := make(map[string]float64, len(nodes)) + byModule := make(map[string]float64, len(nodes)) + for _, n := range nodes { + if n.BridgeScore == nil { + continue + } + if n.Path != "" { + byPath[n.Path] = *n.BridgeScore + } + if n.ModuleID != "" { + byModule[n.ModuleID] = *n.BridgeScore + } + } + if len(byPath) == 0 && len(byModule) == 0 { + return 1.0, nil + } + + var maxScore float64 + matched := false + for _, f := range files { + if s, ok := byPath[f]; ok { + if s > maxScore { + maxScore = s + } + matched = true + continue + } + if s, ok := byModule[f]; ok { + if s > maxScore { + maxScore = s + } + matched = true + } + } + if !matched { + return 1.0, nil + } + + multiplier := math.Min(1.0+maxScore/1000.0, 2.0) + return multiplier, &RiskFactor{ + Name: "bridge_centrality", + Value: maxScore / 1000.0, // 0.0-1.0 informational + Weight: 0, // applied as multiplier, not weighted mean + } +} + // calculateAggregatedRisk computes an aggregated risk score for the change set. func (e *Engine) calculateAggregatedRisk( changedSymbols []impact.ChangedSymbol, @@ -1312,6 +1430,33 @@ func (e *Engine) calculateAggregatedRisk( score = weightedSum / totalWeight } + // Bridge-centrality adjustment: if any changed file sits on a critical + // architectural path, amplify the score. See bridgeMultiplierFromGraph + // for the multiplier shape. The graph is only fetched when the + // cartographer build tag is on; under the stub build this is a no-op. + bridgeAmplified := false + if cartographer.Available() && e.repoRoot != "" { + if graph, err := cartographer.MapProject(e.repoRoot); err == nil && graph != nil { + files := make([]string, 0, len(changedSymbols)) + seen := make(map[string]struct{}, len(changedSymbols)) + for _, s := range changedSymbols { + if s.File == "" { + continue + } + if _, dup := seen[s.File]; dup { + continue + } + seen[s.File] = struct{}{} + files = append(files, s.File) + } + if mul, factor := bridgeMultiplierFromGraph(graph.Nodes, files); factor != nil { + score = math.Min(score*mul, 1.0) + factors = append(factors, *factor) + bridgeAmplified = true + } + } + } + // Determine level var level string switch { @@ -1328,6 +1473,9 @@ func (e *Engine) calculateAggregatedRisk( // Build explanation explanation := fmt.Sprintf("Change affects %d symbols across %d modules with %d direct and %d transitive impacts.", len(changedSymbols), len(modules), len(directImpact), len(transitiveImpact)) + if bridgeAmplified { + explanation += " Risk amplified by bridge centrality (change lands on a critical architectural path)." + } return &RiskScore{ Level: level, @@ -1804,3 +1952,45 @@ func (e *Engine) generateRecommendations( return recs } + +// fetchLIPEnrichment pulls blast-radius enrichment for a symbol from LIP via +// the best available RPC. Preference order: +// 1. query_blast_radius_symbol (v2.3+) — direct symbol-URI call, no filtering. +// 2. query_blast_radius_batch (v2.2) — fetch all symbols in the file, then +// LookupSymbol by name as a fallback. +// +// Returns nil when the symbol lacks a location, LIP is unavailable, or the +// daemon doesn't support either RPC — callers should treat nil as "skip +// enrichment" and fall back to SCIP-only results unchanged. +func (e *Engine) fetchLIPEnrichment(symbolInfo *SymbolInfo) *impact.ExternalBlastRadius { + if symbolInfo == nil || symbolInfo.Location == nil { + return nil + } + switch { + case e.lipSupports("query_blast_radius_symbol"): + // Prefer the SCIP-form URI for symbols imported from SCIP (the common + // case — CKB's StableID is the raw scip-go symbol). Fall back to the + // Tier-1 `lip://local/#` form for symbols that don't + // round-trip through SCIP. + symURI := lip.SCIPSymbolToURI(symbolInfo.StableId) + if symURI == "" || !strings.HasPrefix(symURI, "lip://") { + symURI = "lip://local/" + symbolInfo.Location.FileId + "#" + symbolInfo.Name + } + if entry, _ := lip.QueryBlastRadiusSymbol(symURI, 0.6); entry != nil { + return lip.EntryToExternal(entry) + } + case e.lipSupports("query_blast_radius_batch"): + fileURI := "lip://local/" + symbolInfo.Location.FileId + if lipEntries, _ := lip.QueryBlastRadiusBatch([]string{fileURI}, 0.6); lipEntries != nil { + converted := make(map[string]*impact.ExternalBlastRadius, len(lipEntries.Entries)) + for k, v := range lipEntries.Entries { + vCopy := v + converted[k] = lip.EntryToExternal(&vCopy) + } + if ext, ok := lip.LookupSymbol(converted, symbolInfo.Location.FileId, symbolInfo.Name); ok { + return ext + } + } + } + return nil +} diff --git a/internal/query/impact_outgoing.go b/internal/query/impact_outgoing.go new file mode 100644 index 00000000..420f5ea5 --- /dev/null +++ b/internal/query/impact_outgoing.go @@ -0,0 +1,229 @@ +package query + +import ( + "context" + "fmt" + "sort" + "strings" + "time" + + "github.com/SimplyLiz/CodeMCP/internal/errors" + "github.com/SimplyLiz/CodeMCP/internal/impact" + "github.com/SimplyLiz/CodeMCP/internal/lip" +) + +// AnalyzeOutgoingImpactOptions configures the forward-direction call-graph +// query — "what does this symbol call?" — the mirror of AnalyzeImpact. +type AnalyzeOutgoingImpactOptions struct { + SymbolId string + // MinScore is the cosine-similarity threshold for semantic callees. + // 0 disables semantic enrichment entirely; typical non-zero value 0.6. + MinScore float32 +} + +// AnalyzeOutgoingImpactResponse carries the forward call graph: callees +// reached at distance 1 (direct) and 2..N (transitive), optionally enriched +// with embedding-similar coupled symbols. +type AnalyzeOutgoingImpactResponse struct { + Symbol *SymbolInfo `json:"symbol"` + Visibility *VisibilityInfo `json:"visibility,omitempty"` + DirectCallees []ImpactItem `json:"directCallees"` + TransitiveCallees []ImpactItem `json:"transitiveCallees,omitempty"` + SemanticCallees []SemanticCalleeInfo `json:"semanticCallees,omitempty"` + // EdgesSource is LIP's provenance tag: "tier1", "scip_with_tier1_edges", + // "scip_only", "empty". Helps consumers judge completeness. + EdgesSource string `json:"edgesSource,omitempty"` + Truncated bool `json:"truncated,omitempty"` + Provenance *Provenance `json:"provenance"` +} + +// SemanticCalleeInfo is a coupled symbol discovered by embedding similarity +// rather than the static call graph. `Source` is "semantic" (similarity-only) +// or "both" (also confirmed by static edges). +type SemanticCalleeInfo struct { + SymbolURI string `json:"symbolUri,omitempty"` + FileURI string `json:"fileUri"` + Similarity float32 `json:"similarity"` + Source string `json:"source"` +} + +// AnalyzeOutgoingImpact asks LIP for the forward call graph of a symbol. +// Requires a daemon advertising `query_outgoing_impact` (LIP v2.3.5+ for +// Go method symbols — earlier versions hit the name-bridge asymmetry and +// return empty). When LIP is unavailable or the RPC isn't supported the +// response carries the symbol metadata with empty callee lists and a +// provenance warning; no error is returned. +func (e *Engine) AnalyzeOutgoingImpact(ctx context.Context, opts AnalyzeOutgoingImpactOptions) (*AnalyzeOutgoingImpactResponse, error) { + startTime := time.Now() + + repoState, err := e.GetRepoState(ctx, "full") + if err != nil { + return nil, e.wrapError(err, errors.InternalError) + } + + symbolInfo, completeness, backendContribs := resolveSymbolForImpactHook(e, ctx, opts.SymbolId) + if symbolInfo == nil { + return nil, errors.NewCkbError( + errors.SymbolNotFound, + fmt.Sprintf("Symbol not found: %s", opts.SymbolId), + nil, nil, nil, + ) + } + + resp := &AnalyzeOutgoingImpactResponse{ + Symbol: symbolInfo, + Visibility: symbolInfo.Visibility, + } + + provenance := e.buildProvenance(repoState, "full", startTime, backendContribs, completeness) + + if !e.lipSupports("query_outgoing_impact") { + provenance.Warnings = append(provenance.Warnings, + "LIP daemon unavailable or does not advertise query_outgoing_impact — outgoing call graph skipped") + resp.Provenance = provenance + return resp, nil + } + + symURI := buildLIPSymbolURI(symbolInfo) + if symURI == "" { + provenance.Warnings = append(provenance.Warnings, + "symbol has no resolvable LIP URI — outgoing call graph skipped") + resp.Provenance = provenance + return resp, nil + } + + entry, err := lip.QueryOutgoingImpact(symURI, opts.MinScore) + if err != nil { + provenance.Warnings = append(provenance.Warnings, + fmt.Sprintf("LIP query_outgoing_impact failed: %v", err)) + resp.Provenance = provenance + return resp, nil + } + if entry == nil { + // LIP reached the daemon but returned no result for this symbol. + // Typically means the symbol isn't indexed or has no outgoing edges. + resp.Provenance = provenance + return resp, nil + } + + external := lip.OutgoingEntryToExternal(entry) + direct, transitive := impact.FoldExternalCalleeItems(nil, nil, external, e.repoRoot) + + resp.DirectCallees = convertImpactItems(direct) + resp.TransitiveCallees = convertImpactItems(transitive) + resp.EdgesSource = entry.EdgesSource + resp.Truncated = entry.Truncated + + if external != nil { + for _, si := range external.SemanticItems { + resp.SemanticCallees = append(resp.SemanticCallees, SemanticCalleeInfo{ + SymbolURI: si.SymbolURI, + FileURI: si.FileURI, + Similarity: si.Similarity, + Source: si.Source, + }) + } + } + + sortImpactItems(resp.DirectCallees) + sortImpactItems(resp.TransitiveCallees) + sort.Slice(resp.SemanticCallees, func(i, j int) bool { + return resp.SemanticCallees[i].Similarity > resp.SemanticCallees[j].Similarity + }) + + if resp.Truncated { + provenance.Warnings = append(provenance.Warnings, + "LIP hit its 200-node BFS cap — callee set is incomplete") + } + resp.Provenance = provenance + return resp, nil +} + +// resolveSymbolForImpactHook is the indirection tests hook to bypass the +// real SCIP/resolver path. Production callers never reassign it. +var resolveSymbolForImpactHook = func(e *Engine, ctx context.Context, symbolId string) (*SymbolInfo, CompletenessInfo, []BackendContribution) { + return e.resolveSymbolForImpact(ctx, symbolId) +} + +// resolveSymbolForImpact replicates the first half of AnalyzeImpact's symbol +// lookup: SCIP backend first, then identity-resolver fallback. Kept private +// to this package — callers outside query/ should use the engine methods. +func (e *Engine) resolveSymbolForImpact(ctx context.Context, symbolId string) (*SymbolInfo, CompletenessInfo, []BackendContribution) { + var completeness CompletenessInfo + var contribs []BackendContribution + + resolved, _ := e.resolver.ResolveSymbolId(symbolId) + lookupId := symbolId + if resolved != nil && resolved.Symbol != nil { + lookupId = resolved.Symbol.StableId + } + + if e.scipAdapter != nil && e.scipAdapter.IsAvailable() { + result, err := e.scipAdapter.GetSymbol(ctx, lookupId) + if err == nil && result != nil { + info := &SymbolInfo{ + StableId: result.StableID, + Name: result.Name, + Kind: result.Kind, + ContainerName: result.ContainerName, + ModuleId: result.ModuleID, + Visibility: &VisibilityInfo{ + Visibility: result.Visibility, + Confidence: result.VisibilityConfidence, + Source: "scip", + }, + Location: &LocationInfo{ + FileId: result.Location.Path, + StartLine: result.Location.Line, + StartColumn: result.Location.Column, + }, + } + contribs = append(contribs, BackendContribution{ + BackendId: "scip", + Available: true, + Used: true, + Completeness: result.Completeness.Score, + }) + completeness = CompletenessInfo{ + Score: result.Completeness.Score, + Reason: string(result.Completeness.Reason), + } + return info, completeness, contribs + } + } + + if resolved != nil && resolved.Symbol != nil && resolved.Symbol.Fingerprint != nil { + info := &SymbolInfo{ + StableId: resolved.Symbol.StableId, + Name: resolved.Symbol.Fingerprint.Name, + Kind: string(resolved.Symbol.Fingerprint.Kind), + ContainerName: resolved.Symbol.Fingerprint.QualifiedContainer, + Visibility: &VisibilityInfo{ + Visibility: "unknown", + Confidence: 0.3, + Source: "default", + }, + } + completeness = CompletenessInfo{Score: 0.5, Reason: "identity-only"} + return info, completeness, contribs + } + + return nil, completeness, contribs +} + +// buildLIPSymbolURI translates a CKB symbol into the URI LIP expects on its +// query_* RPCs. Prefers the SCIP-form URI when the stable ID is a SCIP +// symbol (the common case post-index), falls back to the tier-1 local form +// keyed by file path + name. +func buildLIPSymbolURI(symbolInfo *SymbolInfo) string { + if symbolInfo == nil { + return "" + } + if sym := lip.SCIPSymbolToURI(symbolInfo.StableId); sym != "" && strings.HasPrefix(sym, "lip://") { + return sym + } + if symbolInfo.Location != nil && symbolInfo.Location.FileId != "" && symbolInfo.Name != "" { + return "lip://local/" + symbolInfo.Location.FileId + "#" + symbolInfo.Name + } + return "" +} diff --git a/internal/query/impact_outgoing_test.go b/internal/query/impact_outgoing_test.go new file mode 100644 index 00000000..0936aff3 --- /dev/null +++ b/internal/query/impact_outgoing_test.go @@ -0,0 +1,335 @@ +package query + +import ( + "context" + "encoding/binary" + "encoding/json" + "io" + "net" + "os" + "path/filepath" + "strings" + "sync" + "testing" + "time" + + "github.com/SimplyLiz/CodeMCP/internal/impact" +) + +// startOutgoingImpactDaemon spawns a Unix-socket fake that replies to every +// query_outgoing_impact request with `payload`. Other request types get an +// empty object, which is enough for the handshake-gated paths that never +// reach those code paths in this test. Returns a snapshot closure so tests +// can assert on recorded requests. +func startOutgoingImpactDaemon(t *testing.T, payload map[string]any) func() []map[string]any { + t.Helper() + dir, err := os.MkdirTemp("/tmp", "lip-outgoing") + if err != nil { + t.Fatalf("mkdirtemp: %v", err) + } + sockPath := filepath.Join(dir, "s.sock") + ln, err := net.Listen("unix", sockPath) + if err != nil { + os.RemoveAll(dir) + t.Fatalf("listen: %v", err) + } + prev := os.Getenv("LIP_SOCKET") + os.Setenv("LIP_SOCKET", sockPath) + + var ( + reqsMu sync.Mutex + reqs []map[string]any + ) + reqC := make(chan map[string]any, 16) + done := make(chan struct{}) + go func() { + defer close(done) + for r := range reqC { + reqsMu.Lock() + reqs = append(reqs, r) + reqsMu.Unlock() + } + }() + + go func() { + for { + conn, err := ln.Accept() + if err != nil { + return + } + go func(c net.Conn) { + defer c.Close() + for { + var lenBuf [4]byte + if _, err := io.ReadFull(c, lenBuf[:]); err != nil { + return + } + buf := make([]byte, binary.BigEndian.Uint32(lenBuf[:])) + if _, err := io.ReadFull(c, buf); err != nil { + return + } + var req map[string]any + _ = json.Unmarshal(buf, &req) + reqC <- req + + var resp any = map[string]any{} + if req["type"] == "query_outgoing_impact" { + resp = payload + } + out, _ := json.Marshal(resp) + var lb [4]byte + binary.BigEndian.PutUint32(lb[:], uint32(len(out))) + _, _ = c.Write(lb[:]) + _, _ = c.Write(out) + } + }(conn) + } + }() + + t.Cleanup(func() { + ln.Close() + close(reqC) + <-done + os.RemoveAll(dir) + os.Setenv("LIP_SOCKET", prev) + }) + return func() []map[string]any { + reqsMu.Lock() + defer reqsMu.Unlock() + out := make([]map[string]any, len(reqs)) + copy(out, reqs) + return out + } +} + +// newOutgoingTestEngine builds a minimal Engine that skips SCIP/resolver +// lookups: the caller-provided symbolInfo is returned directly, simulating +// a successful resolve. `supported` is the set of LIP message types the +// engine should pretend its daemon advertised. +func newOutgoingTestEngine(t *testing.T, supported []string) *Engine { + t.Helper() + e := &Engine{repoRoot: t.TempDir()} + e.lipSupported = make(map[string]struct{}, len(supported)) + for _, s := range supported { + e.lipSupported[s] = struct{}{} + } + return e +} + +// stubOutgoingSymbol is the SymbolInfo we feed the engine via a test-only +// override. Kept at package level so the stub resolveSymbolForImpact can +// find it. +var stubOutgoingSymbol *SymbolInfo + +func TestAnalyzeOutgoingImpact_HappyPath(t *testing.T) { + payload := map[string]any{ + "type": "outgoing_impact_result", + "result": map[string]any{ + "target_uri": "lip://local/internal/foo/bar.go#DoWork", + "direct_items": []map[string]any{ + { + "file_uri": "lip://local//repo/internal/foo/bar.go", + "symbol_uri": "lip://local//repo/internal/foo/bar.go#helper", + "distance": 1, + "confidence": 0.95, + }, + { + "file_uri": "lip://local//repo/internal/baz/qux.go", + "symbol_uri": "lip://local//repo/internal/baz/qux.go#validate", + "distance": 1, + "confidence": 0.9, + }, + }, + "transitive_items": []map[string]any{ + { + "file_uri": "lip://local//repo/internal/deep/x.go", + "symbol_uri": "lip://local//repo/internal/deep/x.go#log", + "distance": 2, + "confidence": 0.8, + }, + }, + "edges_source": "scip_with_tier1_edges", + "truncated": false, + "semantic_items": []map[string]any{ + { + "file_uri": "lip://local//repo/internal/sibling/y.go", + "symbol_uri": "lip://local//repo/internal/sibling/y.go#related", + "similarity": 0.82, + "source": "semantic", + }, + }, + }, + } + snap := startOutgoingImpactDaemon(t, payload) + + e := newOutgoingTestEngine(t, []string{"query_outgoing_impact"}) + // Inject a pre-resolved symbol by bypassing resolveSymbolForImpact via + // the swap hook below; we wrap the method in a way that avoids touching + // engine_test.go infrastructure. + withStubResolver(t, &SymbolInfo{ + StableId: "lip://scip-go/gomod/pkg@1/internal/foo.go/DoWork().", + Name: "DoWork", + Kind: "function", + Location: &LocationInfo{FileId: "internal/foo/bar.go"}, + }) + + resp, err := e.AnalyzeOutgoingImpact(context.Background(), AnalyzeOutgoingImpactOptions{ + SymbolId: "DoWork", + MinScore: 0.6, + }) + if err != nil { + t.Fatalf("AnalyzeOutgoingImpact: %v", err) + } + if len(resp.DirectCallees) != 2 { + t.Errorf("DirectCallees = %d, want 2", len(resp.DirectCallees)) + } + for _, c := range resp.DirectCallees { + if c.Kind != string(impact.DirectCallee) { + t.Errorf("direct callee kind = %q, want direct-callee", c.Kind) + } + } + if len(resp.TransitiveCallees) != 1 { + t.Errorf("TransitiveCallees = %d, want 1", len(resp.TransitiveCallees)) + } + if resp.TransitiveCallees[0].Kind != string(impact.TransitiveCallee) { + t.Errorf("transitive callee kind = %q, want transitive-callee", resp.TransitiveCallees[0].Kind) + } + if resp.EdgesSource != "scip_with_tier1_edges" { + t.Errorf("EdgesSource = %q, want scip_with_tier1_edges", resp.EdgesSource) + } + if len(resp.SemanticCallees) != 1 { + t.Errorf("SemanticCallees = %d, want 1", len(resp.SemanticCallees)) + } + if resp.SemanticCallees[0].Source != "semantic" { + t.Errorf("semantic source = %q, want semantic", resp.SemanticCallees[0].Source) + } + if resp.Truncated { + t.Error("Truncated = true unexpectedly") + } + + reqs := snap() + var sawOutgoing bool + for _, r := range reqs { + if r["type"] == "query_outgoing_impact" { + sawOutgoing = true + if r["symbol_uri"] == nil || r["symbol_uri"] == "" { + t.Errorf("request symbol_uri empty: %+v", r) + } + } + } + if !sawOutgoing { + t.Errorf("no query_outgoing_impact request observed; requests=%+v", reqs) + } +} + +func TestAnalyzeOutgoingImpact_LipUnsupported(t *testing.T) { + // No daemon started — lipSupported is empty. + e := newOutgoingTestEngine(t, nil) + withStubResolver(t, &SymbolInfo{ + StableId: "some-id", + Name: "DoWork", + Location: &LocationInfo{FileId: "internal/foo/bar.go"}, + }) + + resp, err := e.AnalyzeOutgoingImpact(context.Background(), AnalyzeOutgoingImpactOptions{ + SymbolId: "DoWork", + }) + if err != nil { + t.Fatalf("AnalyzeOutgoingImpact: %v", err) + } + if len(resp.DirectCallees) != 0 || len(resp.TransitiveCallees) != 0 { + t.Errorf("expected empty callees when LIP unsupported, got direct=%d transitive=%d", + len(resp.DirectCallees), len(resp.TransitiveCallees)) + } + if resp.Provenance == nil || len(resp.Provenance.Warnings) == 0 { + t.Fatal("expected provenance warning when LIP unsupported") + } + if !warningsContain(resp.Provenance.Warnings, "query_outgoing_impact") { + t.Errorf("warning missing 'query_outgoing_impact': %v", resp.Provenance.Warnings) + } +} + +func TestAnalyzeOutgoingImpact_TruncatedPropagates(t *testing.T) { + payload := map[string]any{ + "type": "outgoing_impact_result", + "result": map[string]any{ + "target_uri": "lip://local/foo#bar", + "direct_items": []map[string]any{}, + "transitive_items": []map[string]any{}, + "edges_source": "scip_with_tier1_edges", + "truncated": true, + "semantic_items": []map[string]any{}, + }, + } + startOutgoingImpactDaemon(t, payload) + + e := newOutgoingTestEngine(t, []string{"query_outgoing_impact"}) + withStubResolver(t, &SymbolInfo{ + StableId: "x", + Name: "bar", + Location: &LocationInfo{FileId: "foo.go"}, + }) + + resp, err := e.AnalyzeOutgoingImpact(context.Background(), AnalyzeOutgoingImpactOptions{ + SymbolId: "bar", + }) + if err != nil { + t.Fatalf("AnalyzeOutgoingImpact: %v", err) + } + if !resp.Truncated { + t.Fatal("Truncated not propagated") + } + if !warningsContain(resp.Provenance.Warnings, "200-node") { + t.Errorf("missing truncation warning: %v", resp.Provenance.Warnings) + } +} + +func TestAnalyzeOutgoingImpact_LipReturnsNilResult(t *testing.T) { + // daemon returns outgoing_impact_result with no result field + payload := map[string]any{"type": "outgoing_impact_result"} + startOutgoingImpactDaemon(t, payload) + + e := newOutgoingTestEngine(t, []string{"query_outgoing_impact"}) + withStubResolver(t, &SymbolInfo{ + StableId: "x", + Name: "bar", + Location: &LocationInfo{FileId: "foo.go"}, + }) + + resp, err := e.AnalyzeOutgoingImpact(context.Background(), AnalyzeOutgoingImpactOptions{ + SymbolId: "bar", + }) + if err != nil { + t.Fatalf("AnalyzeOutgoingImpact: %v", err) + } + if len(resp.DirectCallees) != 0 || len(resp.TransitiveCallees) != 0 { + t.Errorf("expected empty callees on nil result; got direct=%d transitive=%d", + len(resp.DirectCallees), len(resp.TransitiveCallees)) + } +} + +// withStubResolver installs a test hook that short-circuits +// resolveSymbolForImpact to the provided info. Cleanup restores the +// production implementation. +func withStubResolver(t *testing.T, info *SymbolInfo) { + t.Helper() + prev := resolveSymbolForImpactHook + stubOutgoingSymbol = info + resolveSymbolForImpactHook = func(_ *Engine, _ context.Context, _ string) (*SymbolInfo, CompletenessInfo, []BackendContribution) { + return stubOutgoingSymbol, CompletenessInfo{Score: 1.0, Reason: "stub"}, nil + } + t.Cleanup(func() { resolveSymbolForImpactHook = prev }) +} + +func warningsContain(haystack []string, needle string) bool { + for _, s := range haystack { + if strings.Contains(s, needle) { + return true + } + } + return false +} + +// Ensure time import used for build green when test file compiles without +// reaching code paths that take a deadline. (keeps go vet quiet) +var _ = time.Second diff --git a/internal/query/impact_test.go b/internal/query/impact_test.go index 387b4341..5bbdd979 100644 --- a/internal/query/impact_test.go +++ b/internal/query/impact_test.go @@ -1,13 +1,139 @@ package query import ( + "math" "testing" "time" + "github.com/SimplyLiz/CodeMCP/internal/cartographer" "github.com/SimplyLiz/CodeMCP/internal/impact" "github.com/SimplyLiz/CodeMCP/internal/telemetry" ) +func ptrF64(v float64) *float64 { return &v } + +func TestBridgeMultiplierFromGraph(t *testing.T) { + tests := []struct { + name string + nodes []cartographer.GraphNode + files []string + wantMul float64 + wantFactor bool + wantValue float64 // only checked when wantFactor=true + }{ + { + name: "no nodes", + nodes: nil, + files: []string{"a.go"}, + wantMul: 1.0, + wantFactor: false, + }, + { + name: "no files", + nodes: []cartographer.GraphNode{{Path: "a.go", BridgeScore: ptrF64(500)}}, + files: nil, + wantMul: 1.0, + wantFactor: false, + }, + { + name: "no BridgeScore populated", + nodes: []cartographer.GraphNode{ + {Path: "a.go", BridgeScore: nil}, + }, + files: []string{"a.go"}, + wantMul: 1.0, + wantFactor: false, + }, + { + name: "file does not match any node", + nodes: []cartographer.GraphNode{ + {Path: "b.go", BridgeScore: ptrF64(500)}, + }, + files: []string{"a.go"}, + wantMul: 1.0, + wantFactor: false, + }, + { + name: "match by path yields 1 + score/1000", + nodes: []cartographer.GraphNode{ + {Path: "a.go", BridgeScore: ptrF64(250)}, + }, + files: []string{"a.go"}, + wantMul: 1.25, + wantFactor: true, + wantValue: 0.25, + }, + { + name: "match by module id", + nodes: []cartographer.GraphNode{ + {ModuleID: "pkg/foo", Path: "", BridgeScore: ptrF64(600)}, + }, + files: []string{"pkg/foo"}, + wantMul: 1.6, + wantFactor: true, + wantValue: 0.6, + }, + { + name: "multiple files takes max score", + nodes: []cartographer.GraphNode{ + {Path: "low.go", BridgeScore: ptrF64(100)}, + {Path: "high.go", BridgeScore: ptrF64(800)}, + {Path: "mid.go", BridgeScore: ptrF64(400)}, + }, + files: []string{"low.go", "high.go", "mid.go"}, + wantMul: 1.8, + wantFactor: true, + wantValue: 0.8, + }, + { + name: "cap at 2.0 for over-1000 scores", + nodes: []cartographer.GraphNode{ + {Path: "a.go", BridgeScore: ptrF64(1500)}, + }, + files: []string{"a.go"}, + wantMul: 2.0, + wantFactor: true, + wantValue: 1.5, + }, + { + name: "path match wins over module match for same file", + nodes: []cartographer.GraphNode{ + {Path: "a.go", ModuleID: "a.go", BridgeScore: ptrF64(300)}, + {Path: "", ModuleID: "a.go", BridgeScore: ptrF64(900)}, + }, + files: []string{"a.go"}, + wantMul: 1.3, + wantFactor: true, + wantValue: 0.3, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mul, factor := bridgeMultiplierFromGraph(tt.nodes, tt.files) + if math.Abs(mul-tt.wantMul) > 1e-9 { + t.Errorf("multiplier = %v, want %v", mul, tt.wantMul) + } + if tt.wantFactor { + if factor == nil { + t.Fatalf("expected factor, got nil") + } + if factor.Name != "bridge_centrality" { + t.Errorf("factor.Name = %q, want %q", factor.Name, "bridge_centrality") + } + if factor.Weight != 0 { + t.Errorf("factor.Weight = %v, want 0 (informational)", factor.Weight) + } + if math.Abs(factor.Value-tt.wantValue) > 1e-9 { + t.Errorf("factor.Value = %v, want %v", factor.Value, tt.wantValue) + } + } else if factor != nil { + t.Errorf("expected nil factor, got %+v", factor) + } + }) + } +} + func TestFilterTestReferences(t *testing.T) { tests := []struct { name string diff --git a/internal/query/lip_health.go b/internal/query/lip_health.go index e83c3700..188c311e 100644 --- a/internal/query/lip_health.go +++ b/internal/query/lip_health.go @@ -97,6 +97,17 @@ func (e *Engine) probeHandshake() { supported[m] = struct{}{} } + // Register our workspace root so LIP canonicalises relative lip://local/ + // URIs against the correct absolute path. Gated on capability — older + // daemons (pre-v2.3.1) reject with UnknownMessage; they auto-detect roots + // from file-level indexing instead, so skipping is safe. + // + // Idempotent on the daemon side: repeated calls (reconnects, multi-engine + // test setups) are no-ops rather than errors. + if _, ok := supported["register_project_root"]; ok && e.repoRoot != "" { + _, _ = lip.RegisterProjectRoot(e.repoRoot) + } + // Follow up with a cheap IndexStatus probe so callers can distinguish // "daemon down" from "daemon up but has no content for this workspace". // Failures here are non-fatal — we just leave lipIndexProbed=false and diff --git a/internal/query/lip_health_test.go b/internal/query/lip_health_test.go index dbb43b65..82971a7a 100644 --- a/internal/query/lip_health_test.go +++ b/internal/query/lip_health_test.go @@ -7,6 +7,7 @@ import ( "net" "os" "path/filepath" + "sync" "sync/atomic" "testing" "time" @@ -189,6 +190,154 @@ func TestGetDegradationWarnings_LipMixedModels(t *testing.T) { } } +// startLipProbeDaemon handles the three RPCs probeHandshake issues — +// handshake, register_project_root, query_index_status — and records every +// decoded request. supported_messages is set from the supported param so +// tests can toggle the register_project_root feature gate. Returns a +// snapshot closure that safely reads the recorded requests under the +// daemon's own mutex. +func startLipProbeDaemon(t *testing.T, supported []string) func() []map[string]any { + t.Helper() + dir, err := os.MkdirTemp("/tmp", "lip") + if err != nil { + t.Fatalf("mkdirtemp: %v", err) + } + sockPath := filepath.Join(dir, "s.sock") + ln, err := net.Listen("unix", sockPath) + if err != nil { + os.RemoveAll(dir) + t.Fatalf("listen: %v", err) + } + + prev := os.Getenv("LIP_SOCKET") + os.Setenv("LIP_SOCKET", sockPath) + + var mu sync.Mutex + reqs := []map[string]any{} + + go func() { + for { + conn, err := ln.Accept() + if err != nil { + return + } + go func(c net.Conn) { + defer c.Close() + var lenBuf [4]byte + if _, err := io.ReadFull(c, lenBuf[:]); err != nil { + return + } + reqLen := binary.BigEndian.Uint32(lenBuf[:]) + buf := make([]byte, reqLen) + if _, err := io.ReadFull(c, buf); err != nil { + return + } + var req map[string]any + _ = json.Unmarshal(buf, &req) + + mu.Lock() + reqs = append(reqs, req) + mu.Unlock() + + var resp any + switch req["type"] { + case "handshake": + resp = map[string]any{ + "daemon_version": "2.3.1", + "protocol_version": 3, + "supported_messages": supported, + } + case "register_project_root": + resp = map[string]any{"accepted": true} + case "query_index_status": + resp = map[string]any{ + "indexed_files": 10, + "pending_embedding_files": 0, + "mixed_models": false, + "models_in_index": []string{"model-a"}, + } + default: + resp = map[string]any{} + } + payload, _ := json.Marshal(resp) + var out [4]byte + binary.BigEndian.PutUint32(out[:], uint32(len(payload))) + _, _ = c.Write(out[:]) + _, _ = c.Write(payload) + }(conn) + } + }() + + t.Cleanup(func() { + ln.Close() + os.RemoveAll(dir) + os.Setenv("LIP_SOCKET", prev) + }) + + return func() []map[string]any { + mu.Lock() + defer mu.Unlock() + out := make([]map[string]any, len(reqs)) + copy(out, reqs) + return out + } +} + +func TestProbeHandshake_RegistersProjectRoot(t *testing.T) { + snapshot := startLipProbeDaemon(t, []string{"register_project_root", "query_index_status"}) + + e := &Engine{repoRoot: "/my/repo/root"} + e.probeHandshake() + + got := snapshot() + var seenRegister bool + for _, r := range got { + if r["type"] == "register_project_root" { + seenRegister = true + if r["root"] != "/my/repo/root" { + t.Errorf("root = %v, want /my/repo/root", r["root"]) + } + } + } + if !seenRegister { + types := make([]string, len(got)) + for i, r := range got { + types[i], _ = r["type"].(string) + } + t.Errorf("register_project_root not sent; request types: %v", types) + } +} + +// When the daemon doesn't advertise register_project_root, probeHandshake +// must not send it (older daemons reject as UnknownMessage). +func TestProbeHandshake_SkipsRegisterWhenUnsupported(t *testing.T) { + snapshot := startLipProbeDaemon(t, []string{"query_index_status"}) + + e := &Engine{repoRoot: "/my/repo/root"} + e.probeHandshake() + + for _, r := range snapshot() { + if r["type"] == "register_project_root" { + t.Errorf("register_project_root sent despite not being advertised") + } + } +} + +// Engines without a repoRoot (unusual but possible in test fixtures) must +// still handshake cleanly without trying to register an empty root. +func TestProbeHandshake_SkipsRegisterWhenNoRepoRoot(t *testing.T) { + snapshot := startLipProbeDaemon(t, []string{"register_project_root", "query_index_status"}) + + e := &Engine{repoRoot: ""} + e.probeHandshake() + + for _, r := range snapshot() { + if r["type"] == "register_project_root" { + t.Errorf("register_project_root sent with empty repoRoot") + } + } +} + func TestGetDegradationWarnings_NoWarningBeforeFirstProbe(t *testing.T) { // No daemon running — subscriber can't prime the cache, so no warning // should surface regardless of what MixedModels *would* be. diff --git a/internal/query/outgoing_fold_integration_test.go b/internal/query/outgoing_fold_integration_test.go new file mode 100644 index 00000000..92703125 --- /dev/null +++ b/internal/query/outgoing_fold_integration_test.go @@ -0,0 +1,114 @@ +package query + +import ( + "testing" + + "github.com/SimplyLiz/CodeMCP/internal/impact" + "github.com/SimplyLiz/CodeMCP/internal/lip" +) + +// TestOutgoingFoldIntegration exercises the seam between the lip and +// impact packages the way AnalyzeOutgoingImpact does: LIP wire-shape entry +// → OutgoingEntryToExternal → FoldExternalCalleeItems. This catches glue +// regressions that unit tests in either package alone would miss — e.g. +// the Kind tagging contract between the two sides, or a field rename +// breaking the conversion silently. +func TestOutgoingFoldIntegration(t *testing.T) { + entry := &lip.OutgoingImpactEntry{ + TargetURI: "lip://local//repo/internal/foo/bar.go#DoWork", + DirectItems: []lip.BlastRadiusItem{ + { + FileURI: "lip://local//repo/internal/foo/bar.go", + SymbolURI: "lip://local//repo/internal/foo/bar.go#helper", + Distance: 1, + Confidence: 0.95, + }, + { + FileURI: "lip://local//repo/internal/baz/qux.go", + SymbolURI: "lip://local//repo/internal/baz/qux.go#validate", + Distance: 1, + Confidence: 0.9, + }, + }, + TransitiveItems: []lip.BlastRadiusItem{ + { + FileURI: "lip://local//repo/internal/deep/x.go", + SymbolURI: "lip://local//repo/internal/deep/x.go#log", + Distance: 2, + Confidence: 0.8, + }, + }, + SemanticItems: []lip.BlastRadiusSemanticItem{ + { + FileURI: "lip://local//repo/internal/sibling/y.go", + SymbolURI: "lip://local//repo/internal/sibling/y.go#related", + Similarity: 0.82, + Source: "semantic", + }, + }, + EdgesSource: impact.EdgesSourceScipWithTier1Edges, + Truncated: false, + } + + external := lip.OutgoingEntryToExternal(entry) + if external == nil { + t.Fatal("OutgoingEntryToExternal returned nil for non-nil entry") + } + if external.EdgesSource != impact.EdgesSourceScipWithTier1Edges { + t.Errorf("EdgesSource round-trip = %q, want %q", + external.EdgesSource, impact.EdgesSourceScipWithTier1Edges) + } + + direct, transitive := impact.FoldExternalCalleeItems(nil, nil, external, "/repo") + + if len(direct) != 2 { + t.Fatalf("direct callees = %d, want 2", len(direct)) + } + for _, d := range direct { + if d.Kind != impact.DirectCallee { + t.Errorf("direct kind = %q, want %q", d.Kind, impact.DirectCallee) + } + if d.Distance != 1 { + t.Errorf("direct distance = %d, want 1", d.Distance) + } + } + + if len(transitive) != 1 { + t.Fatalf("transitive callees = %d, want 1", len(transitive)) + } + if transitive[0].Kind != impact.TransitiveCallee { + t.Errorf("transitive kind = %q, want %q", + transitive[0].Kind, impact.TransitiveCallee) + } + if transitive[0].Distance != 2 { + t.Errorf("transitive distance = %d, want 2", transitive[0].Distance) + } + + // Semantic items survive the conversion but live on the external struct — + // the fold doesn't promote them to ImpactItems (AnalyzeOutgoingImpact + // maps them into SemanticCalleeInfo separately). + if len(external.SemanticItems) != 1 { + t.Errorf("semantic items = %d, want 1", len(external.SemanticItems)) + } + if external.SemanticItems[0].Source != "semantic" { + t.Errorf("semantic source = %q, want semantic", + external.SemanticItems[0].Source) + } +} + +// TestOutgoingFoldIntegration_EmptyEdgesSource verifies that an "empty" +// provenance flag bypasses the fold even when items are present. This is +// LIP's explicit signal that it has no static edge evidence to contribute. +func TestOutgoingFoldIntegration_EmptyEdgesSource(t *testing.T) { + entry := &lip.OutgoingImpactEntry{ + DirectItems: []lip.BlastRadiusItem{ + {SymbolURI: "lip://local//repo/a.go#A", Distance: 1, Confidence: 0.9}, + }, + EdgesSource: impact.EdgesSourceEmpty, + } + external := lip.OutgoingEntryToExternal(entry) + direct, _ := impact.FoldExternalCalleeItems(nil, nil, external, "/repo") + if len(direct) != 0 { + t.Errorf("empty EdgesSource should bypass fold, got %d items", len(direct)) + } +} diff --git a/internal/query/review_blastradius.go b/internal/query/review_blastradius.go index 0e9a601e..4ca6b5e6 100644 --- a/internal/query/review_blastradius.go +++ b/internal/query/review_blastradius.go @@ -7,6 +7,8 @@ import ( "time" "github.com/SimplyLiz/CodeMCP/internal/cartographer" + "github.com/SimplyLiz/CodeMCP/internal/impact" + "github.com/SimplyLiz/CodeMCP/internal/lip" ) // checkBlastRadius checks if changed symbols have high fan-out (many callers). @@ -27,6 +29,24 @@ func (e *Engine) checkBlastRadius(ctx context.Context, changedFiles []string, op } } + // Prefetch LIP blast radius for all changed files in a single round-trip. + // Returns nil when LIP is unavailable or doesn't support the message — the + // rest of the function degrades to SCIP-only blast radius unchanged. + var lipBR map[string]*impact.ExternalBlastRadius + if e.lipSupports("query_blast_radius_batch") { + lipURIs := make([]string, len(changedFiles)) + for i, f := range changedFiles { + lipURIs[i] = "lip://local/" + f + } + if raw, _ := lip.QueryBlastRadiusBatch(lipURIs, 0.6); raw != nil { + lipBR = make(map[string]*impact.ExternalBlastRadius, len(raw.Entries)) + for k, v := range raw.Entries { + vCopy := v + lipBR[k] = lip.EntryToExternal(&vCopy) + } + } + } + // Collect symbols from changed files, cap at 30 total. // Only include functions and methods — variable references are typically // framework wiring (cobra commands, Spring beans, Qt signals) not real callers. @@ -83,13 +103,58 @@ func (e *Engine) checkBlastRadius(ctx context.Context, changedFiles []string, op continue } + // Merge LIP semantic enrichment into the SCIP-derived blast radius. + // Keyed by symbol's stable ID which maps to LIP's symbol_uri via the + // "lip://local/#" convention. + semanticCount := 0 + if lipBR != nil { + if enriched, ok := lip.LookupSymbol(lipBR, sym.file, sym.name); ok { + // Convert BlastRadiusSummary → impact.BlastRadius for merge + staticBR := &impact.BlastRadius{ + ModuleCount: impactResp.BlastRadius.ModuleCount, + FileCount: impactResp.BlastRadius.FileCount, + UniqueCallerCount: impactResp.BlastRadius.UniqueCallerCount, + RiskLevel: impactResp.BlastRadius.RiskLevel, + } + merged := impact.MergeBlastRadius(staticBR, enriched) + if merged != nil { + impactResp.BlastRadius = &BlastRadiusSummary{ + ModuleCount: merged.ModuleCount, + FileCount: merged.FileCount, + UniqueCallerCount: merged.UniqueCallerCount, + RiskLevel: merged.RiskLevel, + StaticCallerCount: merged.StaticCallerCount, + SemanticCallerCount: merged.SemanticCallerCount, + ConfirmedCount: merged.ConfirmedCount, + } + for _, sc := range merged.SemanticCallers { + impactResp.BlastRadius.SemanticCallers = append( + impactResp.BlastRadius.SemanticCallers, + SemanticCallerInfo{ + SymbolURI: sc.SymbolURI, + FileURI: sc.FileURI, + Tier: string(sc.Tier), + Confidence: sc.Confidence, + Similarity: sc.Similarity, + }, + ) + } + semanticCount = merged.SemanticCallerCount + } + } + } + callerCount := impactResp.BlastRadius.UniqueCallerCount if informationalMode { // In informational mode, only surface symbols with meaningful fan-out. // Symbols with 1-2 callers are normal coupling; 3+ suggests a change // that could ripple further than expected. - if callerCount >= 3 { + if callerCount >= 3 || semanticCount > 0 { + msg := fmt.Sprintf("Fan-out: %s has %d callers", sym.name, callerCount) + if semanticCount > 0 { + msg += fmt.Sprintf(" (+%d semantically coupled)", semanticCount) + } hint := "" if sym.name != "" { hint = fmt.Sprintf("→ ckb explain %s", sym.name) @@ -104,13 +169,17 @@ func (e *Engine) checkBlastRadius(ctx context.Context, changedFiles []string, op Check: "blast-radius", Severity: severity, File: sym.file, - Message: fmt.Sprintf("Fan-out: %s has %d callers", sym.name, callerCount), + Message: msg, Category: "risk", RuleID: "ckb/blast-radius/high-fanout", Hint: hint, }) } } else if callerCount > maxFanOut { + msg := fmt.Sprintf("High fan-out: %s has %d callers (threshold: %d)", sym.name, callerCount, maxFanOut) + if semanticCount > 0 { + msg += fmt.Sprintf(" (+%d semantically coupled)", semanticCount) + } hint := "" if sym.name != "" { hint = fmt.Sprintf("→ ckb explain %s", sym.name) @@ -119,7 +188,7 @@ func (e *Engine) checkBlastRadius(ctx context.Context, changedFiles []string, op Check: "blast-radius", Severity: "warning", File: sym.file, - Message: fmt.Sprintf("High fan-out: %s has %d callers (threshold: %d)", sym.name, callerCount, maxFanOut), + Message: msg, Category: "risk", RuleID: "ckb/blast-radius/high-fanout", Hint: hint, diff --git a/internal/query/symbol_exists.go b/internal/query/symbol_exists.go new file mode 100644 index 00000000..dd4f8145 --- /dev/null +++ b/internal/query/symbol_exists.go @@ -0,0 +1,138 @@ +package query + +import ( + "context" + "sort" + "strings" + "time" + + "github.com/SimplyLiz/CodeMCP/internal/errors" +) + +// SymbolExistsOptions is the input for SymbolExists. +type SymbolExistsOptions struct { + Name string + Kinds []string + Scope string + IncludeExternal bool +} + +// SymbolExistsResult is the response for SymbolExists. +type SymbolExistsResult struct { + Exists bool `json:"exists"` + Matches int `json:"matches"` + Kinds []string `json:"kinds"` + Receivers []string `json:"receivers,omitempty"` + StaleIndex bool `json:"staleIndex,omitempty"` + Provenance *Provenance `json:"provenance"` +} + +// SymbolExists answers whether a bare symbol name has any declaration in the index. +// Unlike SearchSymbols it queries symbols_fts_content directly with an exact WHERE +// clause, bypassing FTS5 tokenisation — so class methods and object-property +// declarations whose bare leaf name never surfaces through FTS ranking are found +// reliably. +func (e *Engine) SymbolExists(ctx context.Context, opts SymbolExistsOptions) (*SymbolExistsResult, error) { + startTime := time.Now() + + if opts.Name == "" { + return nil, errors.NewInvalidParameterError("name", "name is required") + } + + repoState, err := e.GetRepoState(ctx, "head") + if err != nil { + return nil, e.wrapError(err, errors.InternalError) + } + + notFound := func(reason string) *SymbolExistsResult { + return &SymbolExistsResult{ + Exists: false, + Matches: 0, + Kinds: []string{}, + Provenance: e.buildProvenance(repoState, "head", startTime, nil, + CompletenessInfo{Score: 0.5, Reason: reason}), + } + } + + if e.db == nil { + return notFound("db-unavailable"), nil + } + + sqlStr := `SELECT name, kind, COALESCE(signature, '') FROM symbols_fts_content WHERE name = ?` + args := []interface{}{opts.Name} + + if opts.Scope != "" { + sqlStr += ` AND file_path LIKE ?` + args = append(args, opts.Scope+"%") + } + + if !opts.IncludeExternal { + sqlStr += ` AND file_path NOT LIKE '%node_modules%'` + } + + if len(opts.Kinds) > 0 { + placeholders := strings.Repeat("?,", len(opts.Kinds)) + placeholders = placeholders[:len(placeholders)-1] + sqlStr += ` AND kind IN (` + placeholders + `)` + for _, k := range opts.Kinds { + args = append(args, k) + } + } + + rows, err := e.db.Query(sqlStr, args...) + if err != nil { + // Content table may not exist yet (index not yet populated); treat as not found. + return notFound("fts-unavailable"), nil + } + defer rows.Close() //nolint:errcheck + + kindsSet := map[string]bool{} + receiversSet := map[string]bool{} + matchCount := 0 + + for rows.Next() { + var name, kind, signature string + if scanErr := rows.Scan(&name, &kind, &signature); scanErr != nil { + continue + } + matchCount++ + if kind != "" { + kindsSet[kind] = true + } + // signature is "ReceiverName.leafName" for methods and properties. + if strings.HasSuffix(signature, "."+name) { + receiver := signature[:len(signature)-len("."+name)] + if receiver != "" { + receiversSet[receiver] = true + } + } + } + if rowsErr := rows.Err(); rowsErr != nil { + return nil, e.wrapError(rowsErr, errors.InternalError) + } + + kinds := make([]string, 0, len(kindsSet)) + for k := range kindsSet { + kinds = append(kinds, k) + } + sort.Strings(kinds) + + var receivers []string + if len(receiversSet) > 0 { + receivers = make([]string, 0, len(receiversSet)) + for r := range receiversSet { + receivers = append(receivers, r) + } + sort.Strings(receivers) + } + + return &SymbolExistsResult{ + Exists: matchCount > 0, + Matches: matchCount, + Kinds: kinds, + Receivers: receivers, + StaleIndex: repoState.Dirty, + Provenance: e.buildProvenance(repoState, "head", startTime, nil, + CompletenessInfo{Score: 1.0, Reason: "exact-match"}), + }, nil +} diff --git a/internal/query/symbol_exists_test.go b/internal/query/symbol_exists_test.go new file mode 100644 index 00000000..4a6f6d51 --- /dev/null +++ b/internal/query/symbol_exists_test.go @@ -0,0 +1,168 @@ +package query + +import ( + "context" + "testing" + + "github.com/SimplyLiz/CodeMCP/internal/storage" +) + +func seedSymbolExistsFixture(t *testing.T, e *Engine) { + t.Helper() + ctx := context.Background() + ftsManager := storage.NewFTSManager(e.db.Conn(), storage.DefaultFTSConfig()) + if err := ftsManager.InitSchema(); err != nil { + t.Fatalf("InitSchema: %v", err) + } + records := []storage.SymbolFTSRecord{ + {ID: "s1", Name: "ENV_PATH", Kind: "const", Signature: "ENV_PATH", FilePath: "src/fixtures.ts", Language: "typescript"}, + {ID: "s2", Name: "ReportPersistenceService", Kind: "class", Signature: "ReportPersistenceService", FilePath: "src/fixtures.ts", Language: "typescript"}, + {ID: "s3", Name: "saveReport", Kind: "method", Signature: "ReportPersistenceService.saveReport", FilePath: "src/fixtures.ts", Language: "typescript"}, + {ID: "s4", Name: "trackUsage", Kind: "method", Signature: "ReportPersistenceService.trackUsage", FilePath: "src/fixtures.ts", Language: "typescript"}, + {ID: "s5", Name: "setApiKey", Kind: "property", Signature: "settingsRouter.setApiKey", FilePath: "src/fixtures.ts", Language: "typescript"}, + } + if err := ftsManager.BulkInsert(ctx, records); err != nil { + t.Fatalf("BulkInsert: %v", err) + } +} + +func TestSymbolExists(t *testing.T) { + engine, cleanup := testEngine(t) + defer cleanup() + seedSymbolExistsFixture(t, engine) + + ctx := context.Background() + + tests := []struct { + name string + opts SymbolExistsOptions + wantExists bool + wantMatches int + wantKinds []string + wantReceiver string // expected receiver, or "" if none + }{ + { + name: "top-level const", + opts: SymbolExistsOptions{Name: "ENV_PATH"}, + wantExists: true, + wantMatches: 1, + wantKinds: []string{"const"}, + }, + { + name: "class", + opts: SymbolExistsOptions{Name: "ReportPersistenceService"}, + wantExists: true, + wantMatches: 1, + wantKinds: []string{"class"}, + }, + { + name: "class method", + opts: SymbolExistsOptions{Name: "saveReport"}, + wantExists: true, + wantMatches: 1, + wantKinds: []string{"method"}, + wantReceiver: "ReportPersistenceService", + }, + { + name: "private class method", + opts: SymbolExistsOptions{Name: "trackUsage"}, + wantExists: true, + wantMatches: 1, + wantKinds: []string{"method"}, + wantReceiver: "ReportPersistenceService", + }, + { + name: "object property", + opts: SymbolExistsOptions{Name: "setApiKey"}, + wantExists: true, + wantMatches: 1, + wantKinds: []string{"property"}, + wantReceiver: "settingsRouter", + }, + { + name: "nonexistent symbol", + opts: SymbolExistsOptions{Name: "fakeNameNobodyWrote"}, + wantExists: false, + wantMatches: 0, + wantKinds: []string{}, + }, + { + name: "kind filter — matches", + opts: SymbolExistsOptions{Name: "saveReport", Kinds: []string{"method"}}, + wantExists: true, + wantMatches: 1, + wantKinds: []string{"method"}, + }, + { + name: "kind filter — excludes", + opts: SymbolExistsOptions{Name: "saveReport", Kinds: []string{"class"}}, + wantExists: false, + wantMatches: 0, + wantKinds: []string{}, + }, + { + name: "scope filter — matches", + opts: SymbolExistsOptions{Name: "ENV_PATH", Scope: "src/"}, + wantExists: true, + wantMatches: 1, + }, + { + name: "scope filter — excludes", + opts: SymbolExistsOptions{Name: "ENV_PATH", Scope: "other/"}, + wantExists: false, + wantMatches: 0, + wantKinds: []string{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := engine.SymbolExists(ctx, tt.opts) + if err != nil { + t.Fatalf("SymbolExists error: %v", err) + } + if result.Exists != tt.wantExists { + t.Errorf("Exists = %v, want %v", result.Exists, tt.wantExists) + } + if result.Matches != tt.wantMatches { + t.Errorf("Matches = %d, want %d", result.Matches, tt.wantMatches) + } + if tt.wantKinds != nil { + if len(result.Kinds) != len(tt.wantKinds) { + t.Errorf("Kinds = %v, want %v", result.Kinds, tt.wantKinds) + } else { + for i, k := range result.Kinds { + if k != tt.wantKinds[i] { + t.Errorf("Kinds[%d] = %q, want %q", i, k, tt.wantKinds[i]) + } + } + } + } + if tt.wantReceiver != "" { + found := false + for _, r := range result.Receivers { + if r == tt.wantReceiver { + found = true + break + } + } + if !found { + t.Errorf("Receivers = %v, want to contain %q", result.Receivers, tt.wantReceiver) + } + } + if result.Provenance == nil { + t.Error("Provenance should not be nil") + } + }) + } +} + +func TestSymbolExistsEmptyName(t *testing.T) { + engine, cleanup := testEngine(t) + defer cleanup() + + _, err := engine.SymbolExists(context.Background(), SymbolExistsOptions{Name: ""}) + if err == nil { + t.Error("expected error for empty name") + } +} diff --git a/internal/version/version.go b/internal/version/version.go index 1caffe0a..032004fe 100644 --- a/internal/version/version.go +++ b/internal/version/version.go @@ -6,7 +6,7 @@ package version // go build -ldflags "-X github.com/SimplyLiz/CodeMCP/internal/version.Version=1.0.0 -X github.com/SimplyLiz/CodeMCP/internal/version.Commit=abc123" var ( // Version is the semantic version of CKB - Version = "9.1.0" + Version = "9.2.0" // Commit is the git commit hash (set at build time) Commit = "unknown" diff --git a/npm/package.json b/npm/package.json index d793c0d7..c96d94a8 100644 --- a/npm/package.json +++ b/npm/package.json @@ -1,7 +1,7 @@ { "name": "@tastehub/ckb", "mcpName": "io.github.SimplyLiz/ckb", - "version": "9.1.0", + "version": "9.2.0", "description": "Code intelligence for AI assistants (MCP), CLI, and HTTP API - symbol navigation, impact analysis, architecture", "keywords": [ "mcp", diff --git a/scripts/sync-cartographer.sh b/scripts/sync-cartographer.sh new file mode 100755 index 00000000..24865778 --- /dev/null +++ b/scripts/sync-cartographer.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# +# Sync the vendored Cartographer source tree at +# third_party/cartographer/mapper-core/cartographer/ from an upstream checkout. +# +# Why vendor at all: CKB links libcartographer.a at build time. Shipping the +# Rust source alongside Go keeps reproducible builds (no network fetch, no +# version-skew between Go code and FFI surface) and lets CI build the archive +# from a pinned snapshot. +# +# Why a script: manual file-by-file copies drift silently — the vendored tree +# was 391 lines behind upstream diagram.rs when the overlays feature landed, +# and nobody noticed until rebuild. This script is the only supported path +# and should be rerun whenever upstream cuts a release worth pulling. +# +# Usage: +# scripts/sync-cartographer.sh +# +# Example: +# scripts/sync-cartographer.sh ../../../Cartographer +# +# After running: inspect `git diff`, then rebuild: +# make build-cartographer +# go test -tags cartographer ./internal/query/... + +set -euo pipefail + +UPSTREAM="${1:?usage: $0 }" +UPSTREAM_CART="$UPSTREAM/mapper-core/cartographer" + +if [[ ! -d "$UPSTREAM_CART" ]]; then + echo "error: $UPSTREAM_CART not found — pass the Cartographer repo root" >&2 + exit 1 +fi + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +CKB_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +VENDOR="$CKB_ROOT/third_party/cartographer/mapper-core/cartographer" + +if [[ ! -d "$VENDOR" ]]; then + echo "error: vendor tree not found: $VENDOR" >&2 + exit 1 +fi + +echo "syncing $UPSTREAM_CART → $VENDOR" + +# Rsync the Rust source + build config + FFI header. We explicitly list the +# paths to sync rather than mirroring the whole tree so we never pull in +# build artifacts (target/), IDE files, or editor scratch files. If upstream +# adds new top-level items (e.g. a new subcrate), add them here deliberately. +rsync -a --delete "$UPSTREAM_CART/src/" "$VENDOR/src/" +rsync -a --delete "$UPSTREAM_CART/include/" "$VENDOR/include/" +rsync -a --delete "$UPSTREAM_CART/scripts/" "$VENDOR/scripts/" +cp "$UPSTREAM_CART/Cargo.toml" "$VENDOR/Cargo.toml" +cp "$UPSTREAM_CART/Cargo.lock" "$VENDOR/Cargo.lock" +cp "$UPSTREAM_CART/build.rs" "$VENDOR/build.rs" +cp "$UPSTREAM_CART/cbindgen.toml" "$VENDOR/cbindgen.toml" + +# No local patches known at this time. If a local patch ever becomes +# necessary (e.g. upstream depends on a private crate we can't vendor), +# reapply it here AFTER the rsync and document WHY inline. + +echo "done. next steps:" +echo " 1. review: git -C $CKB_ROOT diff third_party/cartographer/" +echo " 2. build: cd $CKB_ROOT && make build-cartographer" +echo " 3. test: cd $CKB_ROOT && go test -tags cartographer ./internal/query/..." diff --git a/testdata/review/sarif.json b/testdata/review/sarif.json index f36f8cb7..efed6636 100644 --- a/testdata/review/sarif.json +++ b/testdata/review/sarif.json @@ -268,8 +268,8 @@ } } ], - "semanticVersion": "9.1.0", - "version": "9.1.0" + "semanticVersion": "9.2.0", + "version": "9.2.0" } } } diff --git a/third_party/cartographer/mapper-core/cartographer/Cargo.toml b/third_party/cartographer/mapper-core/cartographer/Cargo.toml index 3a004f87..92d8dede 100644 --- a/third_party/cartographer/mapper-core/cartographer/Cargo.toml +++ b/third_party/cartographer/mapper-core/cartographer/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cartographer" -version = "2.4.0" +version = "3.0.0" edition = "2021" description = "Code Cartographer for Architectural Intelligence" authors = ["SimplyLiz"] diff --git a/third_party/cartographer/mapper-core/cartographer/include/cartographer.h b/third_party/cartographer/mapper-core/cartographer/include/cartographer.h index 8c65594c..6ddc8df3 100644 --- a/third_party/cartographer/mapper-core/cartographer/include/cartographer.h +++ b/third_party/cartographer/mapper-core/cartographer/include/cartographer.h @@ -378,7 +378,7 @@ char *cartographer_search_content(const char *path, const char *pattern, const c * * Parameters: * - `path` – absolute path to repo root (UTF-8 C string) - * - `pattern` – glob pattern, e.g. `"*.rs"` or `"src/**/*.go"` (C string) + * - `pattern` – glob pattern, e.g. `"*.rs"` or `"src/subdir/*.go"` (C string) * - `limit` – max files to return; 0 = unlimited * - `opts_json` – optional JSON `FindOptions` or null for defaults: * `{ modifiedSinceSecs, newerThan, minSizeBytes, maxSizeBytes, maxDepth, noIgnore }` @@ -641,4 +641,104 @@ char *cartographer_query_context(const char *path, const char *query, const char */ char *cartographer_shotgun_surgery(const char *path, uint32_t limit, uint32_t min_partners); +/** + * Return all document-type nodes from the project graph. + * + * Input: `path` — project root (C string) + * + * Response shape: + * ```json + * { + * "ok": true, + * "data": [ + * { + * "path": "docs/architecture.md", + * "module_id": "docs/architecture.md", + * "signatures": ["# Architecture", "## Overview"], + * "imports": ["src/api.rs"], + * "edge_count": 3 + * } + * ] + * } + * ``` + */ +char *cartographer_doc_index(const char *path); + +/** + * Return a single document's structure plus skeletons of referenced code files. + * + * Inputs: + * `path` — project root (C string) + * `doc_path` — relative path to the document (C string) + * `budget` — max tokens for referenced code (0 → 4000) + * + * Response shape: + * ```json + * { + * "ok": true, + * "data": { + * "doc": { "path": "...", "moduleId": "...", "signatures": [...], "imports": [...] }, + * "referencedFiles": [{ "path": "...", "rank": 0.05, "signatures": [...] }], + * "totalTokens": 2100 + * } + * } + * ``` + */ +char *cartographer_doc_context(const char *path, const char *doc_path, uint32_t budget); + +/** + * Doc-biased context retrieval: search docs first, follow cross-refs into code. + * + * Inputs: + * `path` — project root (C string) + * `query` — natural language query (C string) + * `opts_json` — optional JSON: `{ "budget": 8000, "model": "claude" }` + * + * Response shape: + * ```json + * { + * "ok": true, + * "data": { + * "context": "## Doc Context for: ...\n\n...", + * "docFiles": [...], + * "codeFiles": [...], + * "focusDocs": ["docs/setup.md"], + * "totalTokens": 5200, + * "health": { "score": 81.0, "grade": "B", ... } + * } + * } + * ``` + */ +char *cartographer_query_docs(const char *path, const char *query, const char *opts_json); + +/** + * Render the project's import graph as a Mermaid or Graphviz (DOT) diagram. + * + * Inputs: + * `path` — project root (C string) + * `format` — "mermaid" or "dot" (C string; may be null → "mermaid") + * `focus` — optional module_id or path to anchor BFS on (C string, may + * be null → top-N by degree) + * `depth` — BFS depth when `focus` is set (0 → 2; ignored without focus) + * `max_nodes` — cap on nodes in the output (0 → 40) + * + * Response shape: + * ```json + * { + * "ok": true, + * "data": { + * "diagram": "graph TD\n N0[...] --> N1[...]\n...", + * "truncated": false, + * "format": "mermaid", + * "nodeCount": 23 + * } + * } + * ``` + */ +char *cartographer_render_architecture(const char *path, + const char *format, + const char *focus, + uint32_t depth, + uint32_t max_nodes); + #endif /* CARTOGRAPHER_H */ diff --git a/third_party/cartographer/mapper-core/cartographer/scripts/localize-tree-sitter-symbols.sh b/third_party/cartographer/mapper-core/cartographer/scripts/localize-tree-sitter-symbols.sh new file mode 100755 index 00000000..b06759be --- /dev/null +++ b/third_party/cartographer/mapper-core/cartographer/scripts/localize-tree-sitter-symbols.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash +# +# Post-process libcartographer.a so its bundled tree-sitter runtime and +# grammar symbols are internal, not externally visible. Consumers that also +# link tree-sitter (e.g. Go projects using github.com/smacker/go-tree-sitter) +# would otherwise trip "duplicate symbol" errors at link time and — worse — +# risk Cartographer's Rust code binding to the consumer's tree-sitter copy +# if the linker resolved `ts_*` cross-archive. If the two tree-sitter +# versions drift, that route produces silent memory corruption. +# +# Approach: partial-link every .o inside the archive into one combined +# relocatable object so Cartographer's internal ts_*/tree_sitter_* refs +# resolve within the archive, then mark those symbols local so they no +# longer participate in global symbol resolution. Only the cartographer_* +# FFI entry points stay exported. +# +# Requires a C compiler whose linker supports `-r`, an `ar`, and an +# objcopy-style tool. `rust-objcopy` from rustup's llvm-tools-preview +# component works on both Linux (ELF) and macOS (Mach-O). +# +# Usage: localize-tree-sitter-symbols.sh + +set -euo pipefail + +ARCHIVE="${1:?usage: $0 }" +case "$ARCHIVE" in + /*) ARCHIVE_ABS="$ARCHIVE" ;; + *) ARCHIVE_ABS="$PWD/$ARCHIVE" ;; +esac + +if [[ ! -f "$ARCHIVE_ABS" ]]; then + echo "error: archive not found: $ARCHIVE_ABS" >&2 + exit 1 +fi + +pick() { + for c in "$@"; do + if command -v "$c" >/dev/null 2>&1; then echo "$c"; return 0; fi + done + return 1 +} + +# `rust-objcopy` ships in the target-specific rustlib bin dir and is not on +# PATH by default; probe it via rustc before falling through to system tools. +OBJCOPY="" +if command -v rustc >/dev/null 2>&1; then + RUST_BINDIR="$(rustc --print target-libdir 2>/dev/null)/../bin" + if [[ -x "$RUST_BINDIR/rust-objcopy" ]]; then + OBJCOPY="$RUST_BINDIR/rust-objcopy" + fi +fi +if [[ -z "$OBJCOPY" ]]; then + OBJCOPY="$(pick rust-objcopy llvm-objcopy objcopy)" || { + echo "error: no objcopy tool found (tried rust-objcopy, llvm-objcopy, objcopy)" >&2 + echo "hint: rustup component add llvm-tools-preview" >&2 + exit 1 + } +fi +CC="$(pick cc clang gcc)" || { echo "error: no C compiler found" >&2; exit 1; } +AR="$(pick llvm-ar ar)" || { echo "error: no ar found" >&2; exit 1; } + +# Mach-O symbol names carry a leading underscore; ELF does not. +case "$(uname -s)" in + Darwin) UPREFIX="_" ;; + *) UPREFIX="" ;; +esac + +WORK="$(mktemp -d)" +trap 'rm -rf "$WORK"' EXIT + +cp "$ARCHIVE_ABS" "$WORK/input.a" +( + cd "$WORK" + + # Partial link (`ld -r`) merges every archive member into a single + # relocatable object so Cartographer's internal ts_*/tree_sitter_* refs + # resolve within the combined object. We feed the archive directly to + # the linker with a force-load flag rather than `ar x`-extracting first, + # because Cargo emits multiple `.o` members with identical names (each + # tree-sitter grammar crate's build.rs produces its own `parser.o` / + # `scanner.o`) — `ar x` clobbers duplicates on disk, dropping the C + # parser objects for all but the last grammar. `-force_load` (Mach-O) + # and `--whole-archive` (ELF) both pull in every member unconditionally, + # preserving every instance. + # + # `-nostdlib` prevents clang/gcc from pulling in CRT or libSystem. + case "$(uname -s)" in + Darwin) + "$CC" -nostdlib -Wl,-r -o combined.o -Wl,-force_load,input.a + ;; + *) + "$CC" -nostdlib -Wl,-r -o combined.o \ + -Wl,--whole-archive input.a -Wl,--no-whole-archive + ;; + esac + + # Localize tree-sitter runtime (`ts_*`) and grammar init symbols + # (`tree_sitter_`, plus internal `tree_sitter__external_scanner_*` + # helpers). Safe now that the combined object resolved internal refs. + "$OBJCOPY" \ + --wildcard \ + --localize-symbol="${UPREFIX}ts_*" \ + --localize-symbol="${UPREFIX}tree_sitter_*" \ + combined.o + + # Replace the archive with just the combined, localized object. + rm -f "$ARCHIVE_ABS" + "$AR" rcs "$ARCHIVE_ABS" combined.o +) + +echo "localized tree-sitter symbols in: $ARCHIVE_ABS" diff --git a/third_party/cartographer/mapper-core/cartographer/scripts/tests/test-localize-symbols.sh b/third_party/cartographer/mapper-core/cartographer/scripts/tests/test-localize-symbols.sh new file mode 100755 index 00000000..56de9e62 --- /dev/null +++ b/third_party/cartographer/mapper-core/cartographer/scripts/tests/test-localize-symbols.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash +# +# Smoke test for scripts/localize-tree-sitter-symbols.sh. +# +# Builds a small static archive that mirrors libcartographer.a's symbol +# shape — a tree-sitter runtime object, a grammar object, and a wrapper +# object that references them and exposes cartographer_* entry points — +# runs the script, and asserts ts_*/tree_sitter_* are no longer global +# while cartographer_* still is. + +set -euo pipefail + +HERE="$(cd "$(dirname "$0")" && pwd)" +SCRIPT="$HERE/../localize-tree-sitter-symbols.sh" + +CC="${CC:-cc}" +AR="${AR:-ar}" +NM="${NM:-nm}" + +WORK="$(mktemp -d)" +trap 'rm -rf "$WORK"' EXIT +cd "$WORK" + +cat > runtime.c <<'EOF' +int ts_parser_new(void) { return 42; } +int ts_tree_root_node(int x) { return x + 1; } +EOF + +cat > grammar.c <<'EOF' +int tree_sitter_rust(void) { return 7; } +EOF + +cat > wrapper.c <<'EOF' +extern int ts_parser_new(void); +extern int tree_sitter_rust(void); +int cartographer_version(void) { return ts_parser_new() + tree_sitter_rust(); } +int cartographer_render_architecture(void) { return 0; } +EOF + +"$CC" -c -fPIC runtime.c -o runtime.o +"$CC" -c -fPIC grammar.c -o grammar.o +"$CC" -c -fPIC wrapper.c -o wrapper.o +"$AR" rcs libfixture.a runtime.o grammar.o wrapper.o + +# Mach-O prepends an underscore to C symbol names; ELF does not. +case "$(uname -s)" in + Darwin) U=_ ;; + *) U= ;; +esac + +fail() { echo "FAIL: $*" >&2; exit 1; } + +# Pre-condition: baseline archive exposes ts_* and tree_sitter_* as globals. +"$NM" -g runtime.o | grep -qE " T ${U}ts_parser_new\$" \ + || fail "baseline: ${U}ts_parser_new should be global in runtime.o" +"$NM" -g grammar.o | grep -qE " T ${U}tree_sitter_rust\$" \ + || fail "baseline: ${U}tree_sitter_rust should be global in grammar.o" + +"$SCRIPT" libfixture.a >/dev/null + +# After localization: archive should contain exactly combined.o. +rm -f runtime.o grammar.o wrapper.o +"$AR" x libfixture.a +[[ -f combined.o ]] || fail "expected combined.o inside archive after localization" + +GLOBAL_TS="$("$NM" -g combined.o | grep -cE " T ${U}ts_" || true)" +GLOBAL_TSL="$("$NM" -g combined.o | grep -cE " T ${U}tree_sitter_" || true)" +GLOBAL_CARTO="$("$NM" -g combined.o | grep -cE " T ${U}cartographer_" || true)" + +[[ "$GLOBAL_TS" -eq 0 ]] || fail "ts_* still global ($GLOBAL_TS)" +[[ "$GLOBAL_TSL" -eq 0 ]] || fail "tree_sitter_* still global ($GLOBAL_TSL)" +[[ "$GLOBAL_CARTO" -ge 2 ]] || fail "cartographer_* lost exports (got $GLOBAL_CARTO, want >= 2)" + +# And the localized symbols should still be present as local (t), i.e. the +# definitions weren't stripped — just made invisible to the global resolver. +LOCAL_TS="$("$NM" combined.o | grep -cE " t ${U}ts_" || true)" +LOCAL_TSL="$("$NM" combined.o | grep -cE " t ${U}tree_sitter_" || true)" +[[ "$LOCAL_TS" -ge 1 ]] || fail "ts_* definitions missing post-localization" +[[ "$LOCAL_TSL" -ge 1 ]] || fail "tree_sitter_* definitions missing post-localization" + +echo "PASS: ts_* and tree_sitter_* localized; cartographer_* still exported ($GLOBAL_CARTO symbols)" diff --git a/third_party/cartographer/mapper-core/cartographer/src/api.rs b/third_party/cartographer/mapper-core/cartographer/src/api.rs index 9e2c77e2..c963bcf8 100644 --- a/third_party/cartographer/mapper-core/cartographer/src/api.rs +++ b/third_party/cartographer/mapper-core/cartographer/src/api.rs @@ -118,6 +118,18 @@ pub struct GraphNode { pub cochange_partners: Option, /// Shannon entropy of co-change distribution (higher = more scattered changes). pub cochange_entropy: Option, + /// Dominant git author by commit count (bot/format commits excluded). + /// Populated by `enrich_with_git`. Powers the `--color-by=owner` diagram mode. + pub owner: Option, +} + +/// A source position range using LIP semantics: line is 0-based, char is UTF-8 byte offset from line start. +#[derive(Debug, Clone, Serialize)] +pub struct Range { + pub start_line: usize, + pub start_char: usize, + pub end_line: usize, + pub end_char: usize, } #[derive(Debug, Clone, Serialize)] @@ -125,6 +137,7 @@ pub struct GraphEdge { pub source: String, pub target: String, pub edge_type: String, + pub at_range: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -448,14 +461,19 @@ impl ApiState { fan_out: None, cochange_partners: None, cochange_entropy: None, + owner: None, }); for import in &file.imports { - if let Some(target) = self.resolve_import_target(import, module_id) { + // `rebuild_graph` already holds the `mapped_files` lock; call the + // map-taking helper directly. Calling `resolve_import_target` here + // would re-enter the non-reentrant Mutex and deadlock. + if let Some(target) = Self::resolve_import_target_in(&files, import, module_id) { edges.push(GraphEdge { source: module_id.clone(), target, edge_type: "import".to_string(), + at_range: None, }); } } @@ -895,6 +913,13 @@ fn parse_import_parts(import: &str) -> (String, Option) { // Fallback: last token let last = raw.split_whitespace().last().unwrap_or(raw); let last = last.trim_matches('"').trim_matches('\'').trim_end_matches(';'); + // Bare PascalCase identifier (e.g. from doc backtick refs) → set as symbol hint + // so resolve_import_target can match it against symbol definitions. + if !last.contains('/') && !last.contains('.') && last.len() >= 4 + && last.chars().next().map(|c| c.is_uppercase()).unwrap_or(false) + { + return (last.to_string(), Some(last.to_string())); + } (last.to_string(), None) } @@ -915,15 +940,18 @@ fn extract_js_import_symbol(lhs: &str) -> Option { /// Return the last meaningful path component to use as a file-stem candidate. fn derive_module_stem(module_path: &str) -> String { - module_path + let last = module_path .split('/') .filter(|s| !s.is_empty() && *s != "." && *s != "..") .last() .unwrap_or(module_path) - .trim_start_matches('@') // strip npm scope prefix - .split('-') // treat kebab-case first word as stem - .next() - .unwrap_or("") + .trim_start_matches('@'); // strip npm scope prefix + let kebab_first = last.split('-').next().unwrap_or(last); // treat kebab-case first word as stem + // Strip file extension so doc-style imports ("scanner.rs", "api/search.md") resolve correctly + Path::new(kebab_first) + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or(kebab_first) .to_string() } @@ -961,6 +989,67 @@ fn is_test_path(path: &str) -> bool { || lower.ends_with("_test.go") } +// --------------------------------------------------------------------------- +// Document helpers +// --------------------------------------------------------------------------- + +/// File extensions treated as "documents" (non-code) for doc-oriented tools. +pub const DOC_EXTENSIONS: &[&str] = &["md", "markdown", "yaml", "yml", "toml", "json"]; + +pub fn is_doc_path(path: &str) -> bool { + path.rsplit('.') + .next() + .map(|ext| DOC_EXTENSIONS.contains(&ext)) + .unwrap_or(false) +} + +/// Summary of a document node in the project graph. +#[derive(Debug, Clone, Serialize)] +pub struct DocNode { + pub path: String, + pub module_id: String, + pub signatures: Vec, + pub imports: Vec, + pub edge_count: usize, +} + +impl ApiState { + /// Return all document-type nodes from the project graph. + pub fn doc_nodes(&self) -> Result, String> { + let graph = self.rebuild_graph()?; + let files = self.mapped_files.lock().map_err(|e| e.to_string())?; + + let mut docs = Vec::new(); + for node in &graph.nodes { + if !is_doc_path(&node.path) { + continue; + } + let edge_count = graph.edges.iter() + .filter(|e| e.source == node.module_id || e.target == node.module_id) + .count(); + + let (sigs, imports) = files.get(&node.module_id) + .map(|mf| ( + mf.signatures.iter().map(|s| s.raw.clone()).collect(), + mf.imports.clone(), + )) + .unwrap_or_default(); + + docs.push(DocNode { + path: node.path.clone(), + module_id: node.module_id.clone(), + signatures: sigs, + imports, + edge_count, + }); + } + + // Sort: most connected docs first + docs.sort_by(|a, b| b.edge_count.cmp(&a.edge_count)); + Ok(docs) + } +} + struct BridgeAnalysis { is_bridge: bool, bridge_score: f64, @@ -1119,7 +1208,17 @@ impl ApiState { fn resolve_import_target(&self, import: &str, source: &str) -> Option { let files = self.mapped_files.lock().ok()?; + Self::resolve_import_target_in(&files, import, source) + } + // Same lookup as `resolve_import_target` but takes the already-locked map. + // Used by `rebuild_graph` (which holds the lock for the whole rebuild) to + // avoid re-entering the non-reentrant Mutex and deadlocking. + fn resolve_import_target_in( + files: &HashMap, + import: &str, + source: &str, + ) -> Option { let (module_path, symbol_hint) = parse_import_parts(import); let stem = derive_module_stem(&module_path); @@ -1145,6 +1244,15 @@ impl ApiState { return Some(module_id.clone()); } + // 1b. Path-suffix match for relative doc links ("api/search.md" → "docs/api/search.md"). + // Checked before the loose segment match to return an unambiguous result. + if norm_path.contains('/') || norm_path.contains('.') { + let suffix = format!("/{}", norm_path.trim_start_matches('/')); + if file.path.ends_with(&suffix) { + return Some(module_id.clone()); + } + } + // 2. Path segment: file path contains the module stem as a component if segment_match.is_none() && stem.len() >= 3 { let file_lower = file.path.to_lowercase(); @@ -1432,7 +1540,6 @@ impl ApiState { pub fn get_evolution(&self, days: Option) -> Result { let current_graph = self.rebuild_graph()?; - let current_health = current_graph.metadata.health_score.unwrap_or(100.0); let days = days.unwrap_or(30); @@ -1441,7 +1548,7 @@ impl ApiState { .unwrap_or_default() .as_secs(); - let mut snapshots = vec![ArchitectureSnapshot { + let current_snapshot = ArchitectureSnapshot { timestamp: now, health_score: current_health, total_files: current_graph.metadata.total_files, @@ -1456,16 +1563,49 @@ impl ApiState { .iter() .max_by_key(|(_, v)| *v) .map(|(k, _)| k.clone()), - }]; - - // Trend requires multiple snapshots; this reflects current state only. - // Historical tracking is not yet implemented, so `days` has no effect. - let health_trend = if current_health >= 80.0 { - "Healthy".to_string() - } else if current_health >= 60.0 { - "Moderate".to_string() + }; + + // ── Persist snapshot to history file ────────────────────────────────── + let history_path = self.root_path.join(".cartographer_history.json"); + let mut all_snapshots: Vec = + std::fs::read_to_string(&history_path) + .ok() + .and_then(|s| serde_json::from_str(&s).ok()) + .unwrap_or_default(); + all_snapshots.push(current_snapshot); + // Cap history to last 365 snapshots to prevent unbounded growth + if all_snapshots.len() > 365 { + let drain_count = all_snapshots.len() - 365; + all_snapshots.drain(0..drain_count); + } + if let Ok(json) = serde_json::to_string(&all_snapshots) { + let _ = std::fs::write(&history_path, json); + } + + // ── Filter to requested window ──────────────────────────────────────── + let since_epoch = now.saturating_sub(days as u64 * 86_400); + let snapshots: Vec = all_snapshots + .into_iter() + .filter(|s| s.timestamp >= since_epoch) + .collect(); + + // ── Compute trend from first vs last snapshot ───────────────────────── + let health_trend = if snapshots.len() >= 2 { + let first = snapshots.first().unwrap().health_score; + let last = snapshots.last().unwrap().health_score; + let delta = last - first; + if delta > 5.0 { + "Improving".to_string() + } else if delta < -5.0 { + "Degrading".to_string() + } else { + "Stable".to_string() + } } else { - "At Risk".to_string() + // Single snapshot — classify by absolute score + if current_health >= 80.0 { "Healthy".to_string() } + else if current_health >= 60.0 { "Moderate".to_string() } + else { "At Risk".to_string() } }; let mut debt_indicators = Vec::new(); @@ -1493,12 +1633,10 @@ impl ApiState { recommendations.push("Priority: Break circular dependencies".to_string()); } if current_graph.metadata.god_module_count.unwrap_or(0) > 2 { - recommendations - .push("Consider splitting large modules to improve cohesion".to_string()); + recommendations.push("Consider splitting large modules to improve cohesion".to_string()); } if recommendations.is_empty() { - recommendations - .push("Architecture is healthy - maintain current practices".to_string()); + recommendations.push("Architecture is healthy - maintain current practices".to_string()); } Ok(ArchitectureEvolution { @@ -1535,4 +1673,44 @@ mod tests { let level = CompressionLevel::default(); assert_eq!(level, CompressionLevel::Standard); } + + #[test] + fn derive_module_stem_strips_extension() { + assert_eq!(derive_module_stem("scanner.rs"), "scanner"); + assert_eq!(derive_module_stem("api/search.md"), "search"); + assert_eq!(derive_module_stem("config.yaml"), "config"); + // Normal code imports (no extension) unchanged + assert_eq!(derive_module_stem("scanner"), "scanner"); + assert_eq!(derive_module_stem("react-dom"), "react"); + assert_eq!(derive_module_stem("src/api/handler"), "handler"); + } + + // Regression test: before the fix, rebuild_graph held the mapped_files + // Mutex across its inner loop and then called resolve_import_target, + // which re-acquired the same non-reentrant Mutex → deadlock on any + // project with at least one resolvable import. Any resolved edge is + // enough to prove the hang is gone; correctness of the graph content + // is covered elsewhere. + #[test] + fn rebuild_graph_does_not_deadlock_on_imports() { + let state = ApiState::new(std::path::PathBuf::from("/test")); + { + let mut files = state.mapped_files.lock().unwrap(); + files.insert( + "a".to_string(), + MappedFile::from_minimal("a.rs".to_string(), vec!["b".to_string()]), + ); + files.insert( + "b".to_string(), + MappedFile::from_minimal("b.rs".to_string(), vec![]), + ); + } + let graph = state.rebuild_graph().expect("rebuild_graph must return"); + assert_eq!(graph.nodes.len(), 2); + assert!( + graph.edges.iter().any(|e| e.source == "a" && e.target == "b"), + "expected resolved a->b edge, got edges: {:?}", + graph.edges + ); + } } diff --git a/third_party/cartographer/mapper-core/cartographer/src/call_graph.rs b/third_party/cartographer/mapper-core/cartographer/src/call_graph.rs new file mode 100644 index 00000000..0a9ae8d6 --- /dev/null +++ b/third_party/cartographer/mapper-core/cartographer/src/call_graph.rs @@ -0,0 +1,728 @@ +//! File-local call-graph extraction for Rust and Python. +//! +//! Given one source file, produce (caller, callee) edges between functions +//! defined *in that file*. Calls into other files or the stdlib are dropped +//! and reported as `unresolved_count` — the goal is "what does this file do +//! internally", not project-wide call tracing (that's a much bigger job and +//! would need cross-file resolution). +//! +//! Output is shaped as a `ProjectGraphResponse` so the existing diagram +//! renderers (Mermaid/DOT/ASCII + focus/depth/max_nodes) work unchanged. +//! +//! Gated on `lang-rust` / `lang-python` Cargo features, matching the rest of +//! the tree-sitter surface in `extractor.rs`. + +use std::path::Path; + +use crate::api::{GraphEdge, GraphMetadata, GraphNode, ProjectGraphResponse}; + +#[cfg(any(feature = "lang-rust", feature = "lang-python"))] +use tree_sitter::{Node, Parser}; + +/// Aggregated call graph for a single source file. +#[derive(Debug, Clone)] +pub struct FileCallGraph { + /// Every function/method defined in the file, in source order. + pub functions: Vec, + /// Caller → callee edges, both as qualified names from `functions`. + pub calls: Vec<(String, String)>, + /// Number of call sites where the callee could not be matched to a + /// function defined in this file (external, stdlib, or unresolved). + pub unresolved_count: usize, + /// Language tag ("rust" / "python") for the project graph we emit. + pub language: &'static str, +} + +/// One function / method definition in the file being analysed. +#[derive(Debug, Clone)] +pub struct FunctionInfo { + /// Qualified name: `Type::method` in Rust, `Class.method` in Python, plain + /// function name at file scope. + pub qualified: String, + /// Bare method name — used for simple-name resolution when a callee only + /// names the method (e.g. `self.foo()`). + pub simple: String, + /// 1-based line number of the definition. + pub line: u32, + /// "fn" for free functions, "method" for impl/class members. + pub kind: &'static str, +} + +/// Build a call graph for the given file. Returns `Ok(None)` when the file +/// extension isn't one we extract call graphs for (currently `.rs`/`.py`). +/// Returns `Err` on unreadable files or parser init failures. +pub fn build_file_call_graph(path: &Path, source: &str) -> Result, String> { + let ext = path + .extension() + .and_then(|e| e.to_str()) + .map(|s| s.to_lowercase()) + .unwrap_or_default(); + + match ext.as_str() { + #[cfg(feature = "lang-rust")] + "rs" => Ok(Some(extract_rust(source)?)), + #[cfg(feature = "lang-python")] + "py" => Ok(Some(extract_python(source)?)), + _ => Ok(None), + } +} + +/// Wrap a `FileCallGraph` in a `ProjectGraphResponse` so `diagram::render()` +/// can consume it directly. Each function becomes a node whose `module_id` is +/// the qualified name; each call becomes an edge. +pub fn to_project_graph(cg: &FileCallGraph, path: &Path) -> ProjectGraphResponse { + let path_str = path.to_string_lossy().into_owned(); + let nodes: Vec = cg + .functions + .iter() + .map(|f| GraphNode { + module_id: f.qualified.clone(), + // Render path shows "file.rs:Type::method" so the diagram carries + // enough info for a reader to find the function without inspecting + // module_id separately. + path: format!("{}:{}", path_str, f.qualified), + language: cg.language.to_string(), + // 1 "signature" = 1 function; used for hotspot sizing in DOT. + signature_count: 1, + complexity: None, + is_bridge: None, + bridge_score: None, + degree: None, + risk_level: None, + churn: None, + hotspot_score: None, + role: Some(f.kind.to_string()), + is_dead: None, + unreferenced_exports: None, + fan_in: None, + fan_out: None, + cochange_partners: None, + cochange_entropy: None, + owner: None, + }) + .collect(); + + let edges: Vec = cg + .calls + .iter() + .map(|(src, tgt)| GraphEdge { + source: src.clone(), + target: tgt.clone(), + edge_type: "call".into(), + at_range: None, + }) + .collect(); + + let mut languages = std::collections::HashMap::new(); + languages.insert(cg.language.to_string(), nodes.len()); + + let total_edges = edges.len(); + ProjectGraphResponse { + nodes, + edges, + cycles: vec![], + god_modules: vec![], + layer_violations: vec![], + metadata: GraphMetadata { + total_files: 1, + total_edges, + languages, + generated_at: String::new(), + bridge_count: None, + cycle_count: None, + god_module_count: None, + health_score: None, + layer_violation_count: None, + architectural_drift: None, + hotspot_count: None, + dead_code_count: None, + unreferenced_exports_count: None, + }, + cochange_pairs: vec![], + } +} + +// --------------------------------------------------------------------------- +// Rust +// --------------------------------------------------------------------------- + +#[cfg(feature = "lang-rust")] +fn extract_rust(source: &str) -> Result { + let mut parser = Parser::new(); + let lang = tree_sitter_rust::language(); + parser + .set_language(&lang) + .map_err(|e| format!("tree-sitter rust init failed: {e}"))?; + let tree = parser + .parse(source.as_bytes(), None) + .ok_or_else(|| "tree-sitter parse returned None".to_string())?; + let src = source.as_bytes(); + + // Pass 1 — enumerate function definitions with scope stack. + let mut functions: Vec = Vec::new(); + let mut scope: Vec = Vec::new(); + collect_rust_functions(&tree.root_node(), src, &mut scope, &mut functions); + + // Pass 2 — for each function, walk its body and resolve call sites. + let resolver = Resolver::new(&functions); + let mut calls: Vec<(String, String)> = Vec::new(); + let mut unresolved_count: usize = 0; + let mut scope: Vec = Vec::new(); + collect_rust_calls( + &tree.root_node(), + src, + &mut scope, + &resolver, + &mut calls, + &mut unresolved_count, + ); + + Ok(FileCallGraph { + functions, + calls, + unresolved_count, + language: "rust", + }) +} + +#[cfg(feature = "lang-rust")] +fn collect_rust_functions( + node: &Node, + src: &[u8], + scope: &mut Vec, + out: &mut Vec, +) { + match node.kind() { + "impl_item" => { + let type_name = node + .child_by_field_name("type") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(); + // Drop generics for scope key so `impl Foo` and `impl Foo` match. + let base = type_name.split('<').next().unwrap_or(&type_name).trim().to_string(); + scope.push(base); + if let Some(body) = node.child_by_field_name("body") { + let mut cur = body.walk(); + for child in body.children(&mut cur) { + collect_rust_functions(&child, src, scope, out); + } + } + scope.pop(); + } + "function_item" => { + let name = node + .child_by_field_name("name") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(); + if name.is_empty() { + return; + } + let kind = if scope.is_empty() { "fn" } else { "method" }; + let qualified = qualify(scope, &name, "::"); + out.push(FunctionInfo { + qualified, + simple: name, + line: (node.start_position().row as u32) + 1, + kind, + }); + } + _ => { + let mut cur = node.walk(); + for child in node.children(&mut cur) { + collect_rust_functions(&child, src, scope, out); + } + } + } +} + +#[cfg(feature = "lang-rust")] +fn collect_rust_calls( + node: &Node, + src: &[u8], + scope: &mut Vec, + resolver: &Resolver, + out: &mut Vec<(String, String)>, + unresolved: &mut usize, +) { + match node.kind() { + "impl_item" => { + let type_name = node + .child_by_field_name("type") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(); + let base = type_name.split('<').next().unwrap_or(&type_name).trim().to_string(); + scope.push(base); + if let Some(body) = node.child_by_field_name("body") { + let mut cur = body.walk(); + for child in body.children(&mut cur) { + collect_rust_calls(&child, src, scope, resolver, out, unresolved); + } + } + scope.pop(); + } + "function_item" => { + let name = node + .child_by_field_name("name") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(); + if name.is_empty() { + return; + } + let caller_qual = qualify(scope, &name, "::"); + if let Some(body) = node.child_by_field_name("body") { + walk_rust_body(&body, src, &caller_qual, resolver, out, unresolved); + } + } + _ => { + let mut cur = node.walk(); + for child in node.children(&mut cur) { + collect_rust_calls(&child, src, scope, resolver, out, unresolved); + } + } + } +} + +#[cfg(feature = "lang-rust")] +fn walk_rust_body( + node: &Node, + src: &[u8], + caller: &str, + resolver: &Resolver, + out: &mut Vec<(String, String)>, + unresolved: &mut usize, +) { + if node.kind() == "call_expression" { + let callee_raw = node + .child_by_field_name("function") + .map(|n| rust_callee_name(&n, src)) + .unwrap_or_default(); + if !callee_raw.is_empty() { + match resolver.resolve(&callee_raw) { + Some(target) => { + if target != caller { + out.push((caller.to_string(), target)); + } + } + None => *unresolved += 1, + } + } + } + + // Recurse into children regardless — nested call expressions, closures, and + // blocks all legitimately hold more calls. + let mut cur = node.walk(); + for child in node.children(&mut cur) { + walk_rust_body(&child, src, caller, resolver, out, unresolved); + } +} + +/// Best-effort callee name extraction from a `call_expression`'s `function` +/// node. Returns the shortest form that a human reader would recognize: +/// foo() → "foo" +/// mod::foo() → "foo" +/// x.method() → "method" +/// Type::assoc() → "assoc" +/// Macros aren't call_expressions in tree-sitter-rust so we don't see them. +#[cfg(feature = "lang-rust")] +fn rust_callee_name(node: &Node, src: &[u8]) -> String { + match node.kind() { + "identifier" => node_text(node, src).to_string(), + "field_expression" => node + .child_by_field_name("field") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(), + "scoped_identifier" => node + .child_by_field_name("name") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(), + "generic_function" => node + .child_by_field_name("function") + .map(|n| rust_callee_name(&n, src)) + .unwrap_or_default(), + _ => String::new(), + } +} + +// --------------------------------------------------------------------------- +// Python +// --------------------------------------------------------------------------- + +#[cfg(feature = "lang-python")] +fn extract_python(source: &str) -> Result { + let mut parser = Parser::new(); + let lang = tree_sitter_python::language(); + parser + .set_language(&lang) + .map_err(|e| format!("tree-sitter python init failed: {e}"))?; + let tree = parser + .parse(source.as_bytes(), None) + .ok_or_else(|| "tree-sitter parse returned None".to_string())?; + let src = source.as_bytes(); + + let mut functions: Vec = Vec::new(); + let mut scope: Vec = Vec::new(); + collect_python_functions(&tree.root_node(), src, &mut scope, &mut functions); + + let resolver = Resolver::new(&functions); + let mut calls: Vec<(String, String)> = Vec::new(); + let mut unresolved_count: usize = 0; + let mut scope: Vec = Vec::new(); + collect_python_calls( + &tree.root_node(), + src, + &mut scope, + &resolver, + &mut calls, + &mut unresolved_count, + ); + + Ok(FileCallGraph { + functions, + calls, + unresolved_count, + language: "python", + }) +} + +#[cfg(feature = "lang-python")] +fn collect_python_functions( + node: &Node, + src: &[u8], + scope: &mut Vec, + out: &mut Vec, +) { + match node.kind() { + "class_definition" => { + let class_name = node + .child_by_field_name("name") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(); + scope.push(class_name); + if let Some(body) = node.child_by_field_name("body") { + let mut cur = body.walk(); + for child in body.children(&mut cur) { + collect_python_functions(&child, src, scope, out); + } + } + scope.pop(); + } + "function_definition" => { + let name = node + .child_by_field_name("name") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(); + if name.is_empty() { + return; + } + let kind = if scope.is_empty() { "fn" } else { "method" }; + let qualified = qualify(scope, &name, "."); + out.push(FunctionInfo { + qualified, + simple: name, + line: (node.start_position().row as u32) + 1, + kind, + }); + } + "decorated_definition" => { + // Decorated functions wrap the real definition in the last child. + let mut cur = node.walk(); + let children: Vec = node.children(&mut cur).collect(); + if let Some(def) = children.last() { + collect_python_functions(def, src, scope, out); + } + } + _ => { + let mut cur = node.walk(); + for child in node.children(&mut cur) { + collect_python_functions(&child, src, scope, out); + } + } + } +} + +#[cfg(feature = "lang-python")] +fn collect_python_calls( + node: &Node, + src: &[u8], + scope: &mut Vec, + resolver: &Resolver, + out: &mut Vec<(String, String)>, + unresolved: &mut usize, +) { + match node.kind() { + "class_definition" => { + let class_name = node + .child_by_field_name("name") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(); + scope.push(class_name); + if let Some(body) = node.child_by_field_name("body") { + let mut cur = body.walk(); + for child in body.children(&mut cur) { + collect_python_calls(&child, src, scope, resolver, out, unresolved); + } + } + scope.pop(); + } + "function_definition" => { + let name = node + .child_by_field_name("name") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(); + if name.is_empty() { + return; + } + let caller_qual = qualify(scope, &name, "."); + if let Some(body) = node.child_by_field_name("body") { + walk_python_body(&body, src, &caller_qual, resolver, out, unresolved); + } + } + "decorated_definition" => { + let mut cur = node.walk(); + let children: Vec = node.children(&mut cur).collect(); + if let Some(def) = children.last() { + collect_python_calls(def, src, scope, resolver, out, unresolved); + } + } + _ => { + let mut cur = node.walk(); + for child in node.children(&mut cur) { + collect_python_calls(&child, src, scope, resolver, out, unresolved); + } + } + } +} + +#[cfg(feature = "lang-python")] +fn walk_python_body( + node: &Node, + src: &[u8], + caller: &str, + resolver: &Resolver, + out: &mut Vec<(String, String)>, + unresolved: &mut usize, +) { + if node.kind() == "call" { + let callee_raw = node + .child_by_field_name("function") + .map(|n| python_callee_name(&n, src)) + .unwrap_or_default(); + if !callee_raw.is_empty() { + match resolver.resolve(&callee_raw) { + Some(target) => { + if target != caller { + out.push((caller.to_string(), target)); + } + } + None => *unresolved += 1, + } + } + } + + let mut cur = node.walk(); + for child in node.children(&mut cur) { + walk_python_body(&child, src, caller, resolver, out, unresolved); + } +} + +#[cfg(feature = "lang-python")] +fn python_callee_name(node: &Node, src: &[u8]) -> String { + match node.kind() { + "identifier" => node_text(node, src).to_string(), + "attribute" => node + .child_by_field_name("attribute") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(), + _ => String::new(), + } +} + +// --------------------------------------------------------------------------- +// Shared helpers +// --------------------------------------------------------------------------- + +#[cfg(any(feature = "lang-rust", feature = "lang-python"))] +fn node_text<'a>(node: &Node, src: &'a [u8]) -> &'a str { + std::str::from_utf8(&src[node.start_byte()..node.end_byte()]).unwrap_or("") +} + +fn qualify(scope: &[String], name: &str, sep: &str) -> String { + if scope.is_empty() { + name.to_string() + } else { + format!("{}{}{}", scope.join(sep), sep, name) + } +} + +/// Resolves a raw callee token ("foo", "method", "thing") against a known set +/// of locally-defined functions. Rules: +/// - Exact qualified match wins (e.g. "Foo::bar" → "Foo::bar"). +/// - Unique simple-name match wins when the raw token is just a bare name. +/// - Otherwise: unresolved. +/// +/// We intentionally avoid any fancier disambiguation (type inference, receiver +/// tracking) — that would need the full type system. Unique-simple is enough +/// for the "here's how functions in this file relate" use case. +struct Resolver<'a> { + by_qualified: std::collections::HashMap<&'a str, &'a str>, + by_simple: std::collections::HashMap<&'a str, Vec<&'a str>>, +} + +impl<'a> Resolver<'a> { + fn new(functions: &'a [FunctionInfo]) -> Self { + let mut by_qualified: std::collections::HashMap<&str, &str> = + std::collections::HashMap::new(); + let mut by_simple: std::collections::HashMap<&str, Vec<&str>> = + std::collections::HashMap::new(); + for f in functions { + by_qualified.insert(f.qualified.as_str(), f.qualified.as_str()); + by_simple + .entry(f.simple.as_str()) + .or_default() + .push(f.qualified.as_str()); + } + Resolver { by_qualified, by_simple } + } + + fn resolve(&self, raw: &str) -> Option { + if let Some(q) = self.by_qualified.get(raw) { + return Some((*q).to_string()); + } + if let Some(list) = self.by_simple.get(raw) { + if list.len() == 1 { + return Some(list[0].to_string()); + } + } + None + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + #[cfg(feature = "lang-rust")] + #[test] + fn rust_resolves_free_function_calls() { + let src = r#" +fn a() { b(); c(); } +fn b() { c(); } +fn c() {} +"#; + let cg = build_file_call_graph(&PathBuf::from("x.rs"), src).unwrap().unwrap(); + assert_eq!(cg.functions.len(), 3); + assert_eq!(cg.language, "rust"); + assert!(cg.calls.contains(&("a".into(), "b".into()))); + assert!(cg.calls.contains(&("a".into(), "c".into()))); + assert!(cg.calls.contains(&("b".into(), "c".into()))); + assert_eq!(cg.unresolved_count, 0); + } + + #[cfg(feature = "lang-rust")] + #[test] + fn rust_method_calls_resolve_via_simple_name() { + let src = r#" +struct S; +impl S { + fn foo(&self) { self.bar(); } + fn bar(&self) {} +} +"#; + let cg = build_file_call_graph(&PathBuf::from("x.rs"), src).unwrap().unwrap(); + assert!(cg.functions.iter().any(|f| f.qualified == "S::foo")); + assert!(cg.functions.iter().any(|f| f.qualified == "S::bar")); + assert!(cg.calls.contains(&("S::foo".into(), "S::bar".into())), + "missing method call edge in {:?}", cg.calls); + } + + #[cfg(feature = "lang-rust")] + #[test] + fn rust_external_calls_increment_unresolved() { + let src = r#" +fn a() { println!(); std::mem::swap(&mut 1, &mut 2); unknown(); } +"#; + let cg = build_file_call_graph(&PathBuf::from("x.rs"), src).unwrap().unwrap(); + // println! is a macro, not a call_expression, so it doesn't count. + // std::mem::swap and unknown are call_expressions with no local match. + assert!(cg.unresolved_count >= 2, "expected 2+ unresolved, got {}", cg.unresolved_count); + assert!(cg.calls.is_empty()); + } + + #[cfg(feature = "lang-rust")] + #[test] + fn rust_self_recursion_is_dropped() { + let src = r#" +fn loop_forever() { loop_forever(); } +"#; + let cg = build_file_call_graph(&PathBuf::from("x.rs"), src).unwrap().unwrap(); + // A self-edge wouldn't break rendering, but it's never interesting — + // we drop it so the diagram doesn't loop on a single node. + assert!(cg.calls.is_empty(), "self-recursion should not emit edges: {:?}", cg.calls); + } + + #[cfg(feature = "lang-python")] + #[test] + fn python_resolves_free_function_calls() { + let src = "\ +def a(): + b() + c() +def b(): + c() +def c(): + pass +"; + let cg = build_file_call_graph(&PathBuf::from("x.py"), src).unwrap().unwrap(); + assert_eq!(cg.language, "python"); + assert_eq!(cg.functions.len(), 3); + assert!(cg.calls.contains(&("a".into(), "b".into()))); + assert!(cg.calls.contains(&("a".into(), "c".into()))); + assert!(cg.calls.contains(&("b".into(), "c".into()))); + } + + #[cfg(feature = "lang-python")] + #[test] + fn python_method_calls_via_attribute() { + let src = "\ +class S: + def foo(self): + self.bar() + def bar(self): + pass +"; + let cg = build_file_call_graph(&PathBuf::from("x.py"), src).unwrap().unwrap(); + assert!(cg.functions.iter().any(|f| f.qualified == "S.foo")); + assert!(cg.functions.iter().any(|f| f.qualified == "S.bar")); + assert!( + cg.calls.contains(&("S.foo".into(), "S.bar".into())), + "missing method edge: {:?}", cg.calls + ); + } + + #[test] + fn unknown_extension_returns_none() { + let cg = build_file_call_graph(&PathBuf::from("x.xyz"), "whatever").unwrap(); + assert!(cg.is_none()); + } + + #[cfg(feature = "lang-rust")] + #[test] + fn to_project_graph_emits_node_per_function() { + let src = r#" +fn a() { b(); } +fn b() {} +"#; + let cg = build_file_call_graph(&PathBuf::from("x.rs"), src).unwrap().unwrap(); + let pg = to_project_graph(&cg, &PathBuf::from("x.rs")); + assert_eq!(pg.nodes.len(), 2); + assert_eq!(pg.edges.len(), 1); + assert!(pg.nodes.iter().any(|n| n.module_id == "a")); + assert!(pg.nodes.iter().any(|n| n.module_id == "b")); + assert_eq!(pg.edges[0].edge_type, "call"); + } +} diff --git a/third_party/cartographer/mapper-core/cartographer/src/diagram.rs b/third_party/cartographer/mapper-core/cartographer/src/diagram.rs new file mode 100644 index 00000000..bef694f9 --- /dev/null +++ b/third_party/cartographer/mapper-core/cartographer/src/diagram.rs @@ -0,0 +1,2404 @@ +//! Shared diagram renderer for the import graph. +//! +//! Produces Mermaid or Graphviz (DOT) from a `ProjectGraphResponse`, with two +//! node-selection modes: +//! - No focus → top-N nodes by degree (fallback; "shape of the codebase"). +//! - With focus → BFS from anchor module over import edges up to `depth` +//! ("shape of the neighborhood I'm editing"). +//! +//! Used by both the CLI (`diagram_mode`) and the FFI +//! (`cartographer_render_architecture`) so CLI output and MCP output stay in +//! lock-step. + +use std::collections::{HashMap, HashSet, VecDeque}; + +use crate::api::{is_doc_path, ProjectGraphResponse}; +use crate::layers::LayerViolationType; + +/// Nodes with `hotspot_score` at or above this threshold get the `hot` overlay +/// (thick orange stroke in Mermaid, thicker orange border + larger size in DOT). +/// Picked to match the "top decile" of hotspots on real codebases. +const HOTSPOT_THRESHOLD: f64 = 70.0; + +/// Output format requested by the caller. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DiagramFormat { + Mermaid, + Dot, + /// Terminal-friendly indented tree with box-drawing characters. + /// Always rooted at a single node: `focus` if set, else the blast-radius + /// epicenter, else the most-connected node in the graph. + Ascii, +} + +impl DiagramFormat { + pub fn parse(s: &str) -> Result { + match s.to_lowercase().as_str() { + "mermaid" | "" => Ok(DiagramFormat::Mermaid), + "dot" | "graphviz" => Ok(DiagramFormat::Dot), + "ascii" | "tree" | "text" => Ok(DiagramFormat::Ascii), + other => Err(format!("unknown diagram format: {other}")), + } + } +} + +/// Rendering options. `focus` is a module_id (or suffix match on a path/module_id). +/// +/// Selection precedence: `blast_radius` > `focus` > top-by-degree. +#[derive(Debug, Clone)] +pub struct RenderOptions<'a> { + pub format: DiagramFormat, + pub focus: Option<&'a str>, + pub depth: usize, + pub max_nodes: usize, + /// When `Some(threshold)`, overlay dotted purple edges for every co-change + /// pair whose `coupling_score >= threshold` and whose both endpoints are in + /// the included node set. `None` disables the overlay (default). + pub show_cochange: Option, + /// When `Some(target)`, override selection: included = {target} ∪ direct + /// dependencies ∪ direct dependents. The target module renders as an + /// epicenter (bold red fill). `None` uses the focus/top-by-degree path. + pub blast_radius: Option<&'a str>, + /// When `true`, filter the selection to the doc subgraph: all document + /// nodes (markdown/YAML/TOML/JSON) plus every code file they directly + /// reference. Docs render with a distinct shape regardless of this flag. + pub docs_only: bool, + /// When `Some(n)`, collapse the graph to folder granularity at path depth + /// `n` before rendering. All files whose path shares the same first `n` + /// directory components become a single folder node; edges are aggregated + /// (self-loops dropped, counts summed). Combines with focus/blast-radius — + /// selection happens after collapsing, so anchors must match folder ids. + pub group_by_folder_depth: Option, + /// When `true`, replace role-based node fills with owner-derived colors + /// (dominant git author mapped to a stable palette). Nodes without an + /// `owner` value fall through to the default (white/grey). Overlay borders + /// (cycle/pivot/hot/epicenter) still take precedence. + pub color_by_owner: bool, +} + +impl<'a> RenderOptions<'a> { + /// Convenience constructor that fills every new overlay option with `None`. + /// Intended for call sites that only care about the base top-by-degree / + /// focused rendering and don't want to list every overlay field. + pub fn basic(format: DiagramFormat, max_nodes: usize) -> Self { + Self { + format, + focus: None, + depth: 2, + max_nodes, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + } + } +} + +/// Hash an author name into the shared palette. Palette picked for reasonable +/// contrast on white and for staying distinguishable when several owners +/// appear in the same diagram. Stable across runs — the same owner always +/// lands on the same color. +fn owner_color(owner: &str) -> &'static str { + // 10-color palette. Order matters — first entries are the most visually + // distinct from each other; later entries fall back to neighbors. + const PALETTE: &[&str] = &[ + "#a6cee3", "#b2df8a", "#fb9a99", "#fdbf6f", + "#cab2d6", "#ffff99", "#1f78b4", "#33a02c", + "#e31a1c", "#ff7f00", + ]; + // FNV-1a 32-bit hash; good-enough distribution for a handful of owners. + let mut h: u32 = 0x811c9dc5; + for b in owner.bytes() { + h ^= b as u32; + h = h.wrapping_mul(0x01000193); + } + PALETTE[(h as usize) % PALETTE.len()] +} + +/// Rendered diagram plus a truncation flag so callers can tell the model to +/// tighten `focus` or lower `depth` when the cap kicked in. +#[derive(Debug, Clone)] +pub struct RenderedDiagram { + pub diagram: String, + pub truncated: bool, + pub node_count: usize, + /// Module ids that made it into the render. Exposed so downstream + /// exporters (e.g. the interactive HTML builder) can reuse the selection + /// without re-running focus/blast-radius logic. + pub included: Vec, +} + +/// Precomputed overlays that decorate the base import graph with architectural +/// signals: cycles (from `graph.cycles`), layer violations (from +/// `graph.layer_violations`), co-change pairs (from `graph.cochange_pairs`), +/// hotspot nodes (from `GraphNode.hotspot_score`), and an epicenter marker for +/// blast-radius renderings. +/// +/// We precompute once per `render()` so both Mermaid and DOT rendering paths +/// consult the same sets and stay visually consistent. +struct Overlays<'a> { + cycle_nodes: HashSet<&'a str>, + pivot_nodes: HashSet<&'a str>, + cycle_edges: HashSet<(&'a str, &'a str)>, + violations: HashMap<(&'a str, &'a str), &'a LayerViolationType>, + /// Co-change pairs above threshold, keyed by (file_a, file_b). We don't + /// key symmetrically here — the renderer iterates this map and filters by + /// `included_set`, treating each pair as a single undirected coupling edge. + cochange: HashMap<(&'a str, &'a str), f64>, + /// The target of a blast-radius selection, if any. Rendered as an + /// epicenter (bold red fill) so the "you are here" is unambiguous. + epicenter: Option<&'a str>, +} + +fn compute_overlays<'a>( + graph: &'a ProjectGraphResponse, + show_cochange: Option, + epicenter: Option<&'a str>, +) -> Overlays<'a> { + let mut cycle_nodes: HashSet<&str> = HashSet::new(); + let mut pivot_nodes: HashSet<&str> = HashSet::new(); + let mut cycle_edges: HashSet<(&str, &str)> = HashSet::new(); + + for cycle in &graph.cycles { + let members: HashSet<&str> = cycle.nodes.iter().map(|s| s.as_str()).collect(); + for n in &cycle.nodes { + cycle_nodes.insert(n.as_str()); + } + if let Some(pivot) = &cycle.pivot_node { + pivot_nodes.insert(pivot.as_str()); + } + // An edge participates in this cycle iff both endpoints are cycle members. + for edge in &graph.edges { + if members.contains(edge.source.as_str()) && members.contains(edge.target.as_str()) { + cycle_edges.insert((edge.source.as_str(), edge.target.as_str())); + } + } + } + + let mut violations: HashMap<(&str, &str), &LayerViolationType> = HashMap::new(); + for v in &graph.layer_violations { + // LayerViolation.source_path/target_path are actually module_ids + // (they come from edge_tuples in api.rs, which clone edge.source/target). + violations.insert( + (v.source_path.as_str(), v.target_path.as_str()), + &v.violation_type, + ); + } + + let mut cochange: HashMap<(&str, &str), f64> = HashMap::new(); + if let Some(threshold) = show_cochange { + for p in &graph.cochange_pairs { + if p.coupling_score >= threshold { + cochange.insert((p.file_a.as_str(), p.file_b.as_str()), p.coupling_score); + } + } + } + + Overlays { cycle_nodes, pivot_nodes, cycle_edges, violations, cochange, epicenter } +} + +/// Doc-map selection. Included = all doc nodes ∪ every code file they connect +/// to (either as source or target of an edge). Docs are identified via +/// `api::is_doc_path`. Ordered by edge count descending so the most-connected +/// docs survive `max_nodes` truncation first. +fn docs_only_selection( + graph: &ProjectGraphResponse, + max_nodes: usize, +) -> (Vec, bool) { + let doc_ids: HashSet<&str> = graph + .nodes + .iter() + .filter(|n| is_doc_path(&n.path)) + .map(|n| n.module_id.as_str()) + .collect(); + + let mut neighbors: HashSet<&str> = HashSet::new(); + for edge in &graph.edges { + if doc_ids.contains(edge.source.as_str()) { + neighbors.insert(edge.target.as_str()); + } + if doc_ids.contains(edge.target.as_str()) { + neighbors.insert(edge.source.as_str()); + } + } + + // Rank each candidate by its edge count in the full graph so truncation + // keeps the most-connected nodes. Doc nodes are listed before code + // neighbors so a heavy truncation still shows the docs themselves. + let mut degree: HashMap<&str, usize> = HashMap::new(); + for edge in &graph.edges { + *degree.entry(edge.source.as_str()).or_insert(0) += 1; + *degree.entry(edge.target.as_str()).or_insert(0) += 1; + } + + let mut docs: Vec<&str> = doc_ids.iter().copied().collect(); + docs.sort_by(|a, b| { + degree.get(b).copied().unwrap_or(0) + .cmp(°ree.get(a).copied().unwrap_or(0)) + .then_with(|| a.cmp(b)) + }); + + let mut code: Vec<&str> = neighbors.difference(&doc_ids).copied().collect(); + code.sort_by(|a, b| { + degree.get(b).copied().unwrap_or(0) + .cmp(°ree.get(a).copied().unwrap_or(0)) + .then_with(|| a.cmp(b)) + }); + + let mut ordered: Vec = docs.iter().map(|s| s.to_string()).collect(); + ordered.extend(code.iter().map(|s| s.to_string())); + + let truncated = ordered.len() > max_nodes; + ordered.truncate(max_nodes); + (ordered, truncated) +} + +/// Blast-radius selection. Included = {target} ∪ direct deps ∪ direct dependents, +/// capped at `max_nodes`. Computed purely from the graph — no `ApiState` needed. +/// +/// The target is resolved with the same rules as `bfs_from_anchor`: exact +/// module_id, exact path, then path/module_id suffix match. +fn blast_radius_selection( + graph: &ProjectGraphResponse, + target: &str, + max_nodes: usize, +) -> Result<(Vec, bool), String> { + let resolved = graph + .nodes + .iter() + .find(|n| n.module_id == target) + .or_else(|| graph.nodes.iter().find(|n| n.path == target)) + .or_else(|| { + graph + .nodes + .iter() + .find(|n| n.module_id.ends_with(target) || n.path.ends_with(target)) + }) + .ok_or_else(|| format!("blast-radius target not found in graph: {target}"))?; + + let epicenter_id = resolved.module_id.clone(); + let mut included: Vec = Vec::new(); + let mut seen: HashSet = HashSet::new(); + + // Epicenter first so it stays in the output even under truncation. + included.push(epicenter_id.clone()); + seen.insert(epicenter_id.clone()); + + // Direct dependencies: where epicenter is the source. + for edge in &graph.edges { + if edge.source == epicenter_id && seen.insert(edge.target.clone()) { + included.push(edge.target.clone()); + } + } + // Direct dependents: where epicenter is the target. + for edge in &graph.edges { + if edge.target == epicenter_id && seen.insert(edge.source.clone()) { + included.push(edge.source.clone()); + } + } + + let truncated = included.len() > max_nodes; + included.truncate(max_nodes); + Ok((included, truncated)) +} + +/// Collapse a project graph to folder granularity. All files whose path shares +/// the same first `depth` directory components are merged into a single folder +/// node; edges are aggregated, with intra-folder self-loops dropped. Signature +/// counts sum; hotspot score is the max across member files. Language is set +/// to `"folder"` so renderers can give folder nodes a distinct shape. +/// +/// `depth` of 0 collapses everything to a single root — not useful, so we +/// treat 0 as "don't collapse". `depth` beyond any file's directory depth just +/// keeps that file as its own node (folder = its full parent path). +fn collapse_by_folder(graph: &ProjectGraphResponse, depth: usize) -> ProjectGraphResponse { + use crate::api::{GraphMetadata, GraphNode, GraphEdge}; + + fn folder_key(path: &str, depth: usize) -> String { + let parts: Vec<&str> = path.split('/').collect(); + // File sits at parts[parts.len()-1]; directories are parts[0..len-1]. + let dir_parts = &parts[..parts.len().saturating_sub(1)]; + let take = depth.min(dir_parts.len()); + if take == 0 { + // File sits at the root — group under "(root)" so it's one folder. + "(root)".to_string() + } else { + dir_parts[..take].join("/") + } + } + + // Map each module_id to its folder id. + let mut member_folder: HashMap = HashMap::new(); + // Aggregate per-folder state. + let mut folder_files: HashMap> = HashMap::new(); + + for node in &graph.nodes { + let fid = folder_key(&node.path, depth); + member_folder.insert(node.module_id.clone(), fid.clone()); + folder_files.entry(fid).or_default().push(node); + } + + // Build folder nodes. We stash the file count in `signature_count`'s sibling + // field `fan_in` so the renderer can show "N files" — but simpler: encode it + // directly in the label via a dedicated render branch keyed off language. + let mut nodes: Vec = Vec::with_capacity(folder_files.len()); + for (fid, files) in &folder_files { + let signature_count: usize = files.iter().map(|n| n.signature_count).sum(); + let hotspot_score = files + .iter() + .filter_map(|n| n.hotspot_score) + .fold(None::, |acc, v| Some(acc.map_or(v, |a| a.max(v)))); + // fan_in repurposed to carry member file count for the renderer label. + let member_count: usize = files.len(); + + nodes.push(GraphNode { + module_id: fid.clone(), + path: fid.clone(), + language: "folder".into(), + signature_count, + complexity: None, + is_bridge: None, + bridge_score: None, + degree: None, + risk_level: None, + churn: None, + hotspot_score, + role: None, + is_dead: None, + unreferenced_exports: None, + fan_in: Some(member_count), + fan_out: None, + cochange_partners: None, + cochange_entropy: None, + owner: None, + }); + } + + // Aggregate edges. (src_folder, tgt_folder) → count. Drop self-loops. + let mut edge_counts: HashMap<(String, String), u32> = HashMap::new(); + for e in &graph.edges { + let Some(sf) = member_folder.get(&e.source) else { continue }; + let Some(tf) = member_folder.get(&e.target) else { continue }; + if sf == tf { + continue; + } + *edge_counts.entry((sf.clone(), tf.clone())).or_insert(0) += 1; + } + + let edges: Vec = edge_counts + .into_iter() + .map(|((src, tgt), _)| GraphEdge { + source: src, + target: tgt, + edge_type: "import".into(), + at_range: None, + }) + .collect(); + + // Cycles/violations/cochange don't survive collapse — they describe the + // file-level graph and would be ambiguous at folder granularity. Callers + // who want those overlays should render the file-level view. + ProjectGraphResponse { + nodes, + edges, + cycles: vec![], + god_modules: vec![], + layer_violations: vec![], + metadata: GraphMetadata { + total_files: graph.metadata.total_files, + total_edges: graph.metadata.total_edges, + languages: HashMap::new(), + generated_at: graph.metadata.generated_at.clone(), + bridge_count: None, + cycle_count: None, + god_module_count: None, + health_score: None, + layer_violation_count: None, + architectural_drift: None, + hotspot_count: None, + dead_code_count: None, + unreferenced_exports_count: None, + }, + cochange_pairs: vec![], + } +} + +/// Render an import-graph diagram. Pure over `graph` — no I/O. +pub fn render(graph: &ProjectGraphResponse, opts: &RenderOptions) -> Result { + let max_nodes = opts.max_nodes.max(1); + + // Folder collapse happens before anything else so focus/blast_radius and + // overlays all see the collapsed view. Overlays derived from the file-level + // graph (cycles, violations, cochange) are intentionally dropped. + let collapsed: Option = opts + .group_by_folder_depth + .filter(|&d| d > 0) + .map(|d| collapse_by_folder(graph, d)); + let graph: &ProjectGraphResponse = collapsed.as_ref().unwrap_or(graph); + + // Selection precedence: blast_radius > focus > docs_only > top-by-degree. + // Blast radius resolves the target and overrides everything else so + // callers don't have to null neighboring options. + let (included, truncated, epicenter) = match (opts.blast_radius, opts.focus, opts.docs_only) { + (Some(target), _, _) => { + let (inc, trunc) = blast_radius_selection(graph, target, max_nodes)?; + // `inc[0]` is the epicenter module_id (pushed first in selection). + let epi = inc.first().cloned(); + (inc, trunc, epi) + } + (None, Some(anchor), _) => { + let (inc, trunc) = bfs_from_anchor(graph, anchor, opts.depth, max_nodes)?; + (inc, trunc, None) + } + (None, None, true) => { + let (inc, trunc) = docs_only_selection(graph, max_nodes); + (inc, trunc, None) + } + (None, None, false) => { + let (inc, trunc) = top_by_degree(graph, max_nodes); + (inc, trunc, None) + } + }; + + let included_set: HashSet<&str> = included.iter().map(|s| s.as_str()).collect(); + + // Map module_id -> node for stable lookup during rendering. + let node_by_id: HashMap<&str, &crate::api::GraphNode> = graph + .nodes + .iter() + .map(|n| (n.module_id.as_str(), n)) + .collect(); + + // We need a stable &str borrow of the epicenter id for the Overlays lifetime. + // Reuse the node_by_id key to get a borrow that lives as long as `graph`. + let epicenter_ref: Option<&str> = epicenter + .as_deref() + .and_then(|id| node_by_id.get_key_value(id).map(|(k, _)| *k)); + + let overlays = compute_overlays(graph, opts.show_cochange, epicenter_ref); + + let content = match opts.format { + DiagramFormat::Dot => render_dot(&included, &included_set, &node_by_id, graph, &overlays, opts.color_by_owner), + DiagramFormat::Mermaid => render_mermaid(&included, &included_set, &node_by_id, graph, &overlays, opts.color_by_owner), + DiagramFormat::Ascii => render_ascii( + &included, &included_set, &node_by_id, graph, &overlays, + opts.focus, opts.blast_radius, opts.depth, + ), + }; + + let node_count = included.len(); + Ok(RenderedDiagram { diagram: content, truncated, node_count, included }) +} + +/// Build the node selection without rendering. Exposed so the HTML exporter +/// (and other future non-text renderers) can reuse the same selection rules +/// as Mermaid/DOT — focus, blast-radius, docs-only, folder-collapse. +/// +/// Returns `(graph_to_render, included_module_ids, truncated)`. When folder +/// collapsing is active, `graph_to_render` is an owned collapsed +/// `ProjectGraphResponse`; otherwise it's `None` and the caller uses the +/// original graph. +pub fn select_for_render( + graph: &ProjectGraphResponse, + opts: &RenderOptions, +) -> Result<(Option, Vec, bool), String> { + let max_nodes = opts.max_nodes.max(1); + let collapsed: Option = opts + .group_by_folder_depth + .filter(|&d| d > 0) + .map(|d| collapse_by_folder(graph, d)); + let g: &ProjectGraphResponse = collapsed.as_ref().unwrap_or(graph); + + let (included, truncated, _epi) = match (opts.blast_radius, opts.focus, opts.docs_only) { + (Some(target), _, _) => { + let (inc, trunc) = blast_radius_selection(g, target, max_nodes)?; + let epi = inc.first().cloned(); + (inc, trunc, epi) + } + (None, Some(anchor), _) => { + let (inc, trunc) = bfs_from_anchor(g, anchor, opts.depth, max_nodes)?; + (inc, trunc, None) + } + (None, None, true) => { + let (inc, trunc) = docs_only_selection(g, max_nodes); + (inc, trunc, None) + } + (None, None, false) => { + let (inc, trunc) = top_by_degree(g, max_nodes); + (inc, trunc, None) + } + }; + Ok((collapsed, included, truncated)) +} + +fn top_by_degree(graph: &ProjectGraphResponse, max_nodes: usize) -> (Vec, bool) { + let mut degree: HashMap<&str, usize> = HashMap::new(); + for edge in &graph.edges { + *degree.entry(edge.source.as_str()).or_insert(0) += 1; + *degree.entry(edge.target.as_str()).or_insert(0) += 1; + } + + let mut ranked: Vec<&crate::api::GraphNode> = graph + .nodes + .iter() + .filter(|n| degree.get(n.module_id.as_str()).copied().unwrap_or(0) > 0) + .collect(); + ranked.sort_by(|a, b| { + let da = degree.get(a.module_id.as_str()).copied().unwrap_or(0); + let db = degree.get(b.module_id.as_str()).copied().unwrap_or(0); + db.cmp(&da) + .then_with(|| a.module_id.cmp(&b.module_id)) + }); + + let truncated = ranked.len() > max_nodes; + ranked.truncate(max_nodes); + + (ranked.into_iter().map(|n| n.module_id.clone()).collect(), truncated) +} + +/// Undirected BFS from `anchor` over import edges. We treat imports as +/// bidirectional here because "the area I'm editing" includes both what I +/// import *and* what imports me — callers usually want the full neighborhood. +fn bfs_from_anchor( + graph: &ProjectGraphResponse, + anchor: &str, + depth: usize, + max_nodes: usize, +) -> Result<(Vec, bool), String> { + // Resolve anchor: accept exact module_id match, then path suffix match. + let resolved = graph + .nodes + .iter() + .find(|n| n.module_id == anchor) + .or_else(|| graph.nodes.iter().find(|n| n.path == anchor)) + .or_else(|| graph.nodes.iter().find(|n| n.module_id.ends_with(anchor) || n.path.ends_with(anchor))) + .ok_or_else(|| format!("focus not found in graph: {anchor}"))?; + + let start = resolved.module_id.clone(); + + // Build an adjacency map (undirected) once. + let mut adj: HashMap<&str, Vec<&str>> = HashMap::new(); + for edge in &graph.edges { + adj.entry(edge.source.as_str()).or_default().push(edge.target.as_str()); + adj.entry(edge.target.as_str()).or_default().push(edge.source.as_str()); + } + + let mut visited: HashSet = HashSet::new(); + let mut order: Vec = Vec::new(); + let mut queue: VecDeque<(String, usize)> = VecDeque::new(); + + visited.insert(start.clone()); + order.push(start.clone()); + queue.push_back((start, 0)); + + let mut truncated = false; + + while let Some((module, d)) = queue.pop_front() { + if d >= depth { + continue; + } + if let Some(neighbors) = adj.get(module.as_str()) { + for &n in neighbors { + if visited.insert(n.to_string()) { + if order.len() >= max_nodes { + truncated = true; + // Drain the queue so we stop adding further frontier nodes. + queue.clear(); + break; + } + order.push(n.to_string()); + queue.push_back((n.to_string(), d + 1)); + } + } + } + } + + Ok((order, truncated)) +} + +fn render_dot( + included: &[String], + included_set: &HashSet<&str>, + node_by_id: &HashMap<&str, &crate::api::GraphNode>, + graph: &ProjectGraphResponse, + overlays: &Overlays, + color_by_owner: bool, +) -> String { + let mut out = String::from("digraph cartographer {\n rankdir=LR;\n"); + for module_id in included { + let Some(node) = node_by_id.get(module_id.as_str()) else { continue }; + let label = node.path.rsplit('/').next().unwrap_or(&node.path); + let fill = if color_by_owner { + node.owner.as_deref().map(owner_color).unwrap_or("#fff") + } else { + role_color_dot(node.role.as_deref()) + }; + + let mid = module_id.as_str(); + let is_epicenter = overlays.epicenter == Some(mid); + let is_pivot = overlays.pivot_nodes.contains(mid); + let in_cycle = overlays.cycle_nodes.contains(mid); + let score = node.hotspot_score.unwrap_or(0.0).clamp(0.0, 100.0); + let hot = score >= HOTSPOT_THRESHOLD; + + // Epicenter overrides everything — this is the "you are here" marker + // for blast-radius renderings. Otherwise: pivot > cycle > hot > default. + // Pivot is dashed to distinguish it inside a red-bordered cycle. + let (fill_override, border_color, pen_width, extra_style) = if is_epicenter { + (Some("#ff3333"), "#660000", 4.0, "") + } else if is_pivot { + (None, "#cc0000", 3.0, ",dashed") + } else if in_cycle { + (None, "#cc0000", 3.0, "") + } else if hot { + (None, "#ff6600", 3.0, "") + } else { + (None, "#333333", 1.0, "") + }; + let actual_fill = fill_override.unwrap_or(fill); + + // Hotspot-driven sizing. score ∈ [0,100] → width ∈ [0.75, 1.80], + // height ∈ [0.50, 0.90], fontsize ∈ [10, 16]. Nodes without a score + // render at the default size. + let width = 0.75 + (score / 100.0) * 1.05; + let height = 0.50 + (score / 100.0) * 0.40; + let fontsize = 10 + ((score / 100.0) * 6.0) as u32; + + // Doc nodes render as `shape=note` with a light yellow fill so readers + // can distinguish documentation from code at a glance. Folder-collapsed + // nodes use `shape=folder` with a light blue fill and a "(N files)" + // count inline in the label. Epicenter fill still wins when set. + let is_doc = is_doc_path(&node.path); + let is_folder = node.language == "folder"; + let shape = if is_folder { + "folder" + } else if is_doc { + "note" + } else { + "box" + }; + let final_fill = if fill_override.is_some() { + actual_fill + } else if is_folder { + "#d6e9ff" + } else if is_doc { + "#fff4c0" + } else { + actual_fill + }; + let unit_label = if is_doc { "sec" } else { "fn" }; + + if is_folder { + let files = node.fan_in.unwrap_or(0); + let folder_label = if node.module_id == "(root)" { "(root)" } else { label }; + out.push_str(&format!( + " \"{}\" [label=\"{}/\\n{} files, {} fn\" shape={} style=\"filled{}\" fillcolor=\"{}\" color=\"{}\" penwidth={:.1} width={:.2} height={:.2} fontsize={}];\n", + node.module_id, folder_label, files, node.signature_count, + shape, extra_style, final_fill, border_color, pen_width, width, height, fontsize + )); + } else { + out.push_str(&format!( + " \"{}\" [label=\"{}\\n{} {}\" shape={} style=\"filled{}\" fillcolor=\"{}\" color=\"{}\" penwidth={:.1} width={:.2} height={:.2} fontsize={}];\n", + node.module_id, label, node.signature_count, unit_label, + shape, extra_style, final_fill, border_color, pen_width, width, height, fontsize + )); + } + } + for edge in &graph.edges { + if !(included_set.contains(edge.source.as_str()) + && included_set.contains(edge.target.as_str())) + { + continue; + } + let key = (edge.source.as_str(), edge.target.as_str()); + let viol = overlays.violations.get(&key).copied(); + let in_cycle = overlays.cycle_edges.contains(&key); + + let (color, style, pen) = match viol { + Some(LayerViolationType::BackCall) + | Some(LayerViolationType::CircularCrossLayer) => ("#cc0000", "dashed", 2.5), + Some(LayerViolationType::SkipCall) => ("#ff9900", "dotted", 2.0), + Some(LayerViolationType::DirectForeignImport) => ("#cccc00", "dotted", 1.5), + None if in_cycle => ("#cc0000", "solid", 2.5), + None => ("#666666", "solid", 1.0), + }; + + out.push_str(&format!( + " \"{}\" -> \"{}\" [color=\"{}\" style={} penwidth={:.1}];\n", + edge.source, edge.target, color, style, pen + )); + } + + // Co-change overlay edges. `constraint=false` keeps DOT's layout engine + // from treating these as part of the import DAG — they'd otherwise pull + // unrelated nodes together and blow up the layout. Rendered bidirectionally + // as `arrowhead=none` to signal these are coupling, not dependency. + for ((a, b), score) in &overlays.cochange { + if !(included_set.contains(a) && included_set.contains(b)) { + continue; + } + out.push_str(&format!( + " \"{}\" -> \"{}\" [color=\"#8844cc\" style=dotted penwidth={:.1} arrowhead=none constraint=false label=\"{:.2}\" fontsize=9 fontcolor=\"#8844cc\"];\n", + a, b, 1.0 + score * 2.0, score + )); + } + + out.push('}'); + out +} + +fn render_mermaid( + included: &[String], + included_set: &HashSet<&str>, + node_by_id: &HashMap<&str, &crate::api::GraphNode>, + graph: &ProjectGraphResponse, + overlays: &Overlays, + color_by_owner: bool, +) -> String { + let mut out = String::from("graph TD\n"); + out.push_str(" classDef bridge fill:#f96,stroke:#333\n"); + out.push_str(" classDef core fill:#9cf,stroke:#333\n"); + out.push_str(" classDef dead fill:#ccc,stroke:#333\n"); + out.push_str(" classDef entry fill:#9f9,stroke:#333\n"); + out.push_str(" classDef cycle stroke:#c00,stroke-width:3px\n"); + out.push_str(" classDef pivot stroke:#c00,stroke-width:3px,stroke-dasharray:5 5\n"); + out.push_str(" classDef hot stroke:#f60,stroke-width:3px\n"); + out.push_str(" classDef epicenter fill:#f33,stroke:#600,stroke-width:4px,color:#fff\n"); + out.push_str(" classDef doc fill:#fff4c0,stroke:#aa8,stroke-dasharray:3 2\n"); + out.push_str(" classDef folder fill:#d6e9ff,stroke:#468,stroke-width:2px\n"); + + let id_map: HashMap<&str, usize> = included + .iter() + .enumerate() + .map(|(i, m)| (m.as_str(), i)) + .collect(); + + // Node declarations carry the inline role class (:::core / :::bridge / etc). + // Overlay classes (cycle/pivot/hot) are applied via separate `class` statements + // below so a node can wear multiple classes without relying on inline chaining. + // Doc nodes use Mermaid stadium shape `([...])` + "sec" label; folder nodes + // use subroutine shape `[[...]]` + "N files, M fn" label. + for module_id in included { + let Some(node) = node_by_id.get(module_id.as_str()) else { continue }; + let i = id_map[module_id.as_str()]; + let label = node.path.rsplit('/').next().unwrap_or(&node.path); + let is_doc = is_doc_path(&node.path); + let is_folder = node.language == "folder"; + // In owner-color mode we drop the role class suffix so the per-node + // `style` directive we emit below wins without fighting the classDef. + let class_suffix = if color_by_owner { "" } else { role_class_suffix(node.role.as_deref()) }; + if is_folder { + let files = node.fan_in.unwrap_or(0); + let folder_label = if node.module_id == "(root)" { "(root)" } else { label }; + out.push_str(&format!( + " N{}[[\"{}/\\n{} files, {} fn\"]]{}\n", + i, folder_label, files, node.signature_count, class_suffix + )); + } else { + let unit_label = if is_doc { "sec" } else { "fn" }; + let (open, close) = if is_doc { ("([\"", "\"])") } else { ("[\"", "\"]") }; + out.push_str(&format!( + " N{}{}{}\\n{} {}{}{}\n", + i, open, label, node.signature_count, unit_label, close, class_suffix + )); + } + } + + // Owner coloring emits per-node style directives. Overlay borders + // (cycle/pivot/hot/epicenter) are applied via stroke-only classes, so + // they don't collide with the fill we set here. + if color_by_owner { + for module_id in included { + let Some(node) = node_by_id.get(module_id.as_str()) else { continue }; + let i = id_map[module_id.as_str()]; + if let Some(owner) = node.owner.as_deref() { + out.push_str(&format!( + " style N{} fill:{},stroke:#333\n", + i, owner_color(owner) + )); + } + } + } + + // Overlay class assignments. Epicenter wins outright so blast-radius + // renderings have an unambiguous "you are here" marker. Otherwise pivot + // takes precedence over cycle so pivots are visually distinguishable + // inside a cycle. + for module_id in included { + let Some(node) = node_by_id.get(module_id.as_str()) else { continue }; + let i = id_map[module_id.as_str()]; + let mid = module_id.as_str(); + let mut extras: Vec<&str> = Vec::new(); + if overlays.epicenter == Some(mid) { + extras.push("epicenter"); + } else if overlays.pivot_nodes.contains(mid) { + extras.push("pivot"); + } else if overlays.cycle_nodes.contains(mid) { + extras.push("cycle"); + } + if node.hotspot_score.unwrap_or(0.0) >= HOTSPOT_THRESHOLD + && overlays.epicenter != Some(mid) + { + extras.push("hot"); + } + // Doc nodes get the `doc` overlay class on top of whatever else they + // wear. Epicenter still wins visually because `class` statements apply + // in order and later declarations override earlier ones in Mermaid. + if is_doc_path(&node.path) && overlays.epicenter != Some(mid) { + extras.push("doc"); + } + // Folder nodes get the `folder` overlay class (blue fill + thick stroke). + if node.language == "folder" && overlays.epicenter != Some(mid) { + extras.push("folder"); + } + if !extras.is_empty() { + out.push_str(&format!(" class N{} {};\n", i, extras.join(","))); + } + } + + // Edges. We emit them in source order and remember each edge's index so we + // can append `linkStyle` directives for cycle/violation edges at the end. + let mut edge_index: usize = 0; + let mut link_styles: Vec<(usize, &'static str)> = Vec::new(); + for edge in &graph.edges { + if !(included_set.contains(edge.source.as_str()) + && included_set.contains(edge.target.as_str())) + { + continue; + } + let (Some(&si), Some(&ti)) = ( + id_map.get(edge.source.as_str()), + id_map.get(edge.target.as_str()), + ) else { + continue; + }; + let key = (edge.source.as_str(), edge.target.as_str()); + let viol = overlays.violations.get(&key).copied(); + let in_cycle = overlays.cycle_edges.contains(&key); + + // Arrow: `==>` for plain cycles, `-.->` for any violation (dotted + // Mermaid arrow covers both back-calls and skip-calls visually; + // linkStyle below distinguishes them by colour/dash). + let arrow = match (viol, in_cycle) { + (Some(_), _) => "-.->", + (None, true) => "==>", + (None, false) => "-->", + }; + out.push_str(&format!(" N{} {} N{}\n", si, arrow, ti)); + + let style: Option<&'static str> = match viol { + Some(LayerViolationType::BackCall) + | Some(LayerViolationType::CircularCrossLayer) => { + Some("stroke:#c00,stroke-width:2.5px,stroke-dasharray:6 3") + } + Some(LayerViolationType::SkipCall) => { + Some("stroke:#f90,stroke-width:2px,stroke-dasharray:3 3") + } + Some(LayerViolationType::DirectForeignImport) => { + Some("stroke:#cc0,stroke-width:1.5px,stroke-dasharray:2 2") + } + None if in_cycle => Some("stroke:#c00,stroke-width:2.5px"), + None => None, + }; + if let Some(s) = style { + link_styles.push((edge_index, s)); + } + edge_index += 1; + } + + // Co-change overlay edges. Mermaid lacks a directionless arrow; we use + // `---` (plain line) so readers don't mistake these for imports. Each gets + // a linkStyle directive that dashes them purple. + for ((a, b), score) in &overlays.cochange { + if !(included_set.contains(a) && included_set.contains(b)) { + continue; + } + let (Some(&ai), Some(&bi)) = (id_map.get(a), id_map.get(b)) else { + continue; + }; + // Mermaid uses `---|label|` for edge labels on undirected-style lines. + out.push_str(&format!(" N{} ---|{:.2}| N{}\n", ai, score, bi)); + link_styles.push(( + edge_index, + "stroke:#84c,stroke-width:2px,stroke-dasharray:2 4", + )); + edge_index += 1; + } + + for (idx, style) in link_styles { + out.push_str(&format!(" linkStyle {} {}\n", idx, style)); + } + out +} + +/// Render a terminal-friendly indented tree. Always single-rooted — the idea +/// is "what does this one module reach, and where does it fit" which falls +/// apart if we emit a forest. Cycles are broken with a `↑ seen` marker so the +/// output stays bounded and readable. +/// +/// Root selection: explicit `focus` → blast_radius epicenter → first node in +/// `included` (which is top-by-degree #1 under the default selection). +#[allow(clippy::too_many_arguments)] +fn render_ascii( + included: &[String], + included_set: &HashSet<&str>, + node_by_id: &HashMap<&str, &crate::api::GraphNode>, + graph: &ProjectGraphResponse, + overlays: &Overlays, + focus: Option<&str>, + blast_radius: Option<&str>, + depth: usize, +) -> String { + // Directed adjacency over included edges only. We walk imports in their + // natural direction (source → target) so the tree reads "X depends on Y". + let mut adj: HashMap<&str, Vec<&str>> = HashMap::new(); + for edge in &graph.edges { + let s = edge.source.as_str(); + let t = edge.target.as_str(); + if included_set.contains(s) && included_set.contains(t) { + adj.entry(s).or_default().push(t); + } + } + for targets in adj.values_mut() { + targets.sort(); + targets.dedup(); + } + + // Pick the root. Fall back through explicit > epicenter > best by out-degree. + // + // "First included" as the fallback is what top_by_degree gives us (#1 by + // total degree), but for a tree that's the wrong signal: a node with high + // in-degree and zero out-degree would render as a lone root with an + // empty subtree. We want the node that *reaches* the most, so we rank + // included nodes by out-degree within included_set before falling back. + let root: &str = match (focus, blast_radius, overlays.epicenter, included.first()) { + (Some(anchor), _, _, _) => { + // Re-resolve the same way bfs_from_anchor did so we land on the + // actual module_id (the anchor may have been a path suffix). + included + .iter() + .find(|m| m.as_str() == anchor) + .map(|s| s.as_str()) + .or_else(|| { + included + .iter() + .find(|m| { + node_by_id + .get(m.as_str()) + .map(|n| n.path.ends_with(anchor) || n.module_id.ends_with(anchor)) + .unwrap_or(false) + }) + .map(|s| s.as_str()) + }) + .or_else(|| included.first().map(|s| s.as_str())) + .unwrap_or("") + } + (None, Some(_), Some(epi), _) => epi, + (None, None, _, Some(_)) => { + let best = included + .iter() + .map(|m| m.as_str()) + .max_by_key(|m| adj.get(m).map(|v| v.len()).unwrap_or(0)); + best.unwrap_or("") + } + _ => "", + }; + + if root.is_empty() { + return String::from("(empty graph)\n"); + } + + // DFS with visited tracking. `depth` from RenderOptions is the traversal + // cap; 0 means "just the root". We default the practical cap to 32 if the + // caller passed 0, so top-by-degree invocations still produce useful output. + let effective_depth = if depth == 0 && focus.is_none() { 32 } else { depth }; + + let mut out = String::new(); + // Header line: the root itself, un-prefixed. + out.push_str(&ascii_label(root, node_by_id, overlays)); + out.push('\n'); + + let mut visited: HashSet<&str> = HashSet::new(); + visited.insert(root); + + let children: Vec<&str> = adj.get(root).cloned().unwrap_or_default(); + for (i, child) in children.iter().enumerate() { + let is_last = i + 1 == children.len(); + ascii_walk( + child, + &adj, + node_by_id, + overlays, + &mut visited, + &mut out, + "", + is_last, + 1, + effective_depth, + ); + } + + // Orphans: other included nodes not reachable from the root. Report as a + // flat tail so the user sees them without losing the tree structure. + let mut orphans: Vec<&str> = included + .iter() + .map(|s| s.as_str()) + .filter(|m| !visited.contains(m)) + .collect(); + if !orphans.is_empty() { + orphans.sort(); + out.push_str("\n(disconnected)\n"); + for (i, m) in orphans.iter().enumerate() { + let is_last = i + 1 == orphans.len(); + let branch = if is_last { "└── " } else { "├── " }; + out.push_str(branch); + out.push_str(&ascii_label(m, node_by_id, overlays)); + out.push('\n'); + } + } + + out +} + +#[allow(clippy::too_many_arguments)] +fn ascii_walk<'a>( + node: &'a str, + adj: &HashMap<&'a str, Vec<&'a str>>, + node_by_id: &HashMap<&'a str, &crate::api::GraphNode>, + overlays: &Overlays, + visited: &mut HashSet<&'a str>, + out: &mut String, + prefix: &str, + is_last: bool, + current_depth: usize, + max_depth: usize, +) { + let branch = if is_last { "└── " } else { "├── " }; + out.push_str(prefix); + out.push_str(branch); + + if visited.contains(node) { + // Cycle or re-entry — emit a terminator so the output stays bounded. + out.push_str(&ascii_label(node, node_by_id, overlays)); + out.push_str(" ↑ seen\n"); + return; + } + visited.insert(node); + + out.push_str(&ascii_label(node, node_by_id, overlays)); + out.push('\n'); + + if current_depth >= max_depth { + return; + } + + let children: Vec<&str> = adj.get(node).cloned().unwrap_or_default(); + if children.is_empty() { + return; + } + + let child_prefix = format!("{}{}", prefix, if is_last { " " } else { "│ " }); + for (i, child) in children.iter().enumerate() { + let last = i + 1 == children.len(); + ascii_walk( + child, + adj, + node_by_id, + overlays, + visited, + out, + &child_prefix, + last, + current_depth + 1, + max_depth, + ); + } +} + +fn ascii_label( + module_id: &str, + node_by_id: &HashMap<&str, &crate::api::GraphNode>, + overlays: &Overlays, +) -> String { + let Some(node) = node_by_id.get(module_id) else { + return module_id.to_string(); + }; + let name = node.path.rsplit('/').next().unwrap_or(&node.path); + let unit = if is_doc_path(&node.path) { "sec" } else { "fn" }; + + // Overlay markers — mirror what Mermaid/DOT apply, but flattened into + // ASCII-safe suffixes. + let mut tags: Vec<&str> = Vec::new(); + if overlays.epicenter == Some(module_id) { + tags.push("★ epicenter"); + } + if overlays.cycle_nodes.contains(module_id) { + tags.push("◉ cycle"); + } + if overlays.pivot_nodes.contains(module_id) { + tags.push("✦ pivot"); + } + let is_hot = node.hotspot_score.unwrap_or(0.0) >= HOTSPOT_THRESHOLD; + if is_hot { + tags.push("♨ hot"); + } + if let Some(role) = node.role.as_deref() { + // Role is a short word (core/bridge/dead/entry); inline it plainly. + tags.push(role); + } + + let tag_suffix = if tags.is_empty() { + String::new() + } else { + format!(" [{}]", tags.join(", ")) + }; + + format!("{} ({} {}){}", name, node.signature_count, unit, tag_suffix) +} + +fn role_color_dot(role: Option<&str>) -> &'static str { + match role { + Some("core") => "#9cf", + Some("bridge") => "#f96", + Some("dead") => "#ccc", + Some("entry") => "#9f9", + _ => "#fff", + } +} + +fn role_class_suffix(role: Option<&str>) -> &'static str { + match role { + Some("core") => ":::core", + Some("bridge") => ":::bridge", + Some("dead") => ":::dead", + Some("entry") => ":::entry", + _ => "", + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::api::{CycleInfo, GraphEdge, GraphMetadata, GraphNode, ProjectGraphResponse}; + use crate::layers::{LayerViolation, LayerViolationType}; + use std::collections::HashMap; + + fn node(id: &str, role: Option<&str>) -> GraphNode { + GraphNode { + module_id: id.into(), + path: format!("src/{}.rs", id), + language: "rust".into(), + signature_count: 3, + complexity: None, + is_bridge: None, + bridge_score: None, + degree: None, + risk_level: None, + churn: None, + hotspot_score: None, + role: role.map(String::from), + is_dead: None, + unreferenced_exports: None, + fan_in: None, + fan_out: None, + cochange_partners: None, + cochange_entropy: None, + owner: None, + } + } + + fn edge(src: &str, tgt: &str) -> GraphEdge { + GraphEdge { + source: src.into(), + target: tgt.into(), + edge_type: "import".into(), + at_range: None, + } + } + + fn fixture() -> ProjectGraphResponse { + ProjectGraphResponse { + nodes: vec![ + node("a", Some("core")), + node("b", None), + node("c", Some("bridge")), + node("d", None), + node("isolated", None), + ], + edges: vec![edge("a", "b"), edge("b", "c"), edge("c", "d")], + cycles: vec![], + god_modules: vec![], + layer_violations: vec![], + metadata: GraphMetadata { + total_files: 5, + total_edges: 3, + languages: HashMap::new(), + generated_at: "".into(), + bridge_count: None, + cycle_count: None, + god_module_count: None, + health_score: None, + layer_violation_count: None, + architectural_drift: None, + hotspot_count: None, + dead_code_count: None, + unreferenced_exports_count: None, + }, + cochange_pairs: vec![], + } + } + + #[test] + fn top_n_skips_isolated_nodes() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert!(!r.diagram.contains("isolated")); + assert_eq!(r.node_count, 4); + assert!(!r.truncated); + } + + #[test] + fn top_n_truncates_and_reports() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 2, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert!(r.truncated); + assert_eq!(r.node_count, 2); + } + + #[test] + fn focus_bfs_expands_neighborhood() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: Some("a"), + depth: 1, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + // depth=1 from a → reaches b but not c + assert_eq!(r.node_count, 2); + } + + #[test] + fn focus_bfs_depth_two_reaches_further() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: Some("a"), + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert_eq!(r.node_count, 3); // a, b, c + } + + #[test] + fn focus_accepts_path_suffix() { + let g = fixture(); + // path is "src/a.rs" — match by suffix + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: Some("a.rs"), + depth: 1, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert_eq!(r.node_count, 2); + } + + #[test] + fn focus_not_found_returns_error() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: Some("does_not_exist"), + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }); + assert!(r.is_err()); + } + + #[test] + fn dot_output_has_role_colors() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Dot, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert!(r.diagram.starts_with("digraph cartographer {")); + assert!(r.diagram.contains("#9cf")); // core color present for node a + } + + #[test] + fn format_parse_accepts_aliases_and_rejects_unknown() { + assert_eq!(DiagramFormat::parse("mermaid").unwrap(), DiagramFormat::Mermaid); + assert_eq!(DiagramFormat::parse("MERMAID").unwrap(), DiagramFormat::Mermaid); + assert_eq!(DiagramFormat::parse("").unwrap(), DiagramFormat::Mermaid); + assert_eq!(DiagramFormat::parse("dot").unwrap(), DiagramFormat::Dot); + assert_eq!(DiagramFormat::parse("graphviz").unwrap(), DiagramFormat::Dot); + assert!(DiagramFormat::parse("svg").is_err()); + } + + #[test] + fn focus_bfs_is_undirected() { + // "The area I'm editing" includes both what I import and what imports + // me. Verify BFS from a leaf picks up its importers, not just its + // imports. Here `d` is imported by `c` (edge c→d) but imports nothing. + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: Some("d"), + depth: 1, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert_eq!(r.node_count, 2); // d + its importer c + assert!(r.diagram.contains("c.rs")); + } + + #[test] + fn focus_respects_node_cap() { + // depth=2 from a would reach {a,b,c}; cap at 2 should truncate. + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: Some("a"), + depth: 2, + max_nodes: 2, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert_eq!(r.node_count, 2); + assert!(r.truncated); + } + + #[test] + fn focus_bfs_handles_cycles_without_looping() { + // Add a cycle a→b→c→a and BFS should still terminate and not + // duplicate nodes in the output. + let mut g = fixture(); + g.edges.push(edge("c", "a")); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: Some("a"), + depth: 5, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + // a, b, c, d reachable undirected; no duplicates. + assert_eq!(r.node_count, 4); + } + + #[test] + fn mermaid_output_declares_classes_and_direction() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert!(r.diagram.starts_with("graph TD\n")); + assert!(r.diagram.contains("classDef core")); + assert!(r.diagram.contains("classDef bridge")); + // Role-tagged nodes carry their class suffix. + assert!(r.diagram.contains(":::core")); + assert!(r.diagram.contains(":::bridge")); + // Overlay classes are always declared so later `class` statements resolve. + assert!(r.diagram.contains("classDef cycle")); + assert!(r.diagram.contains("classDef pivot")); + assert!(r.diagram.contains("classDef hot")); + } + + fn cycle(nodes: &[&str], pivot: Option<&str>) -> CycleInfo { + CycleInfo { + nodes: nodes.iter().map(|s| s.to_string()).collect(), + pivot_node: pivot.map(String::from), + severity: "high".into(), + } + } + + fn violation(src: &str, tgt: &str, vt: LayerViolationType) -> LayerViolation { + LayerViolation { + source_path: src.into(), + target_path: tgt.into(), + source_layer: "x".into(), + target_layer: "y".into(), + violation_type: vt, + severity: "CRITICAL".into(), + } + } + + #[test] + fn mermaid_marks_cycle_nodes_edges_and_pivot() { + let mut g = fixture(); + g.edges.push(edge("c", "a")); // closes a → b → c → a + g.cycles.push(cycle(&["a", "b", "c"], Some("b"))); + + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + + // Cycle edges use thick arrow and pick up a linkStyle. + assert!(r.diagram.contains("==>"), "expected cycle edges to use ==>:\n{}", r.diagram); + assert!(r.diagram.contains("linkStyle"), "expected linkStyle for cycle edges"); + + // Pivot takes precedence over cycle — node b gets the pivot class. + assert!( + r.diagram.lines().any(|l| l.trim_start().starts_with("class N") && l.contains("pivot")), + "expected a class statement assigning pivot:\n{}", r.diagram + ); + // Non-pivot cycle members still get the cycle class. + assert!( + r.diagram.lines().any(|l| l.trim_start().starts_with("class N") && l.contains("cycle")), + "expected a class statement assigning cycle:\n{}", r.diagram + ); + } + + #[test] + fn dot_marks_cycle_edges_red() { + let mut g = fixture(); + g.edges.push(edge("c", "a")); + g.cycles.push(cycle(&["a", "b", "c"], None)); + + let r = render(&g, &RenderOptions { + format: DiagramFormat::Dot, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + + // At least one cycle edge must carry the red colour and solid style. + let cycle_edge_line = r + .diagram + .lines() + .find(|l| l.contains("\"a\" -> \"b\"") || l.contains("\"b\" -> \"c\"") || l.contains("\"c\" -> \"a\"")) + .expect("cycle edge should be rendered"); + assert!(cycle_edge_line.contains("#cc0000"), "cycle edge missing red colour: {}", cycle_edge_line); + + // Non-cycle edges stay grey. + assert!(r.diagram.contains("#666666") || !g.edges.iter().any(|e| { + let members = ["a", "b", "c"]; + !members.contains(&e.source.as_str()) || !members.contains(&e.target.as_str()) + })); + } + + #[test] + fn mermaid_marks_layer_violations() { + let mut g = fixture(); + g.layer_violations.push(violation("a", "b", LayerViolationType::BackCall)); + g.layer_violations.push(violation("b", "c", LayerViolationType::SkipCall)); + + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + + // Both violations use the dotted-violation arrow. + assert!(r.diagram.contains("-.->"), "expected dotted arrow for violations:\n{}", r.diagram); + // linkStyle distinguishes the two by colour. + assert!(r.diagram.contains("stroke:#c00"), "expected red stroke for BackCall"); + assert!(r.diagram.contains("stroke:#f90"), "expected orange stroke for SkipCall"); + } + + #[test] + fn dot_marks_layer_violations_with_style_and_colour() { + let mut g = fixture(); + g.layer_violations.push(violation("a", "b", LayerViolationType::BackCall)); + g.layer_violations.push(violation("b", "c", LayerViolationType::SkipCall)); + + let r = render(&g, &RenderOptions { + format: DiagramFormat::Dot, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + + let back = r.diagram.lines().find(|l| l.contains("\"a\" -> \"b\"")).unwrap(); + assert!(back.contains("#cc0000"), "BackCall edge missing red: {}", back); + assert!(back.contains("style=dashed"), "BackCall edge missing dashed: {}", back); + + let skip = r.diagram.lines().find(|l| l.contains("\"b\" -> \"c\"")).unwrap(); + assert!(skip.contains("#ff9900"), "SkipCall edge missing orange: {}", skip); + assert!(skip.contains("style=dotted"), "SkipCall edge missing dotted: {}", skip); + } + + #[test] + fn dot_sizes_hot_nodes_and_applies_orange_border() { + let mut g = fixture(); + if let Some(n) = g.nodes.iter_mut().find(|n| n.module_id == "a") { + n.hotspot_score = Some(90.0); + } + + let r = render(&g, &RenderOptions { + format: DiagramFormat::Dot, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + + let hot_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"a\" [")).unwrap(); + // width at score=90 ≈ 0.75 + 0.9 * 1.05 = 1.695 → formatted as 1.70 + assert!(hot_line.contains("width=1.70"), "hot node width wrong: {}", hot_line); + assert!(hot_line.contains("#ff6600"), "hot node missing orange border: {}", hot_line); + + // A cold node stays at default width. + let cold_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"b\" [")).unwrap(); + assert!(cold_line.contains("width=0.75"), "cold node width wrong: {}", cold_line); + } + + #[test] + fn mermaid_marks_hot_nodes_with_class() { + let mut g = fixture(); + if let Some(n) = g.nodes.iter_mut().find(|n| n.module_id == "a") { + n.hotspot_score = Some(90.0); + } + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + + // `a` should get a class statement including `hot`. + assert!( + r.diagram.lines().any(|l| l.trim_start().starts_with("class N") && l.contains("hot")), + "expected class statement assigning hot:\n{}", r.diagram + ); + } + + #[test] + fn cycle_border_takes_precedence_over_hot_border_in_dot() { + // A node that's both hot and in a cycle wears the cycle red border, + // not the hot orange border — architectural signal wins. + let mut g = fixture(); + g.edges.push(edge("c", "a")); + g.cycles.push(cycle(&["a", "b", "c"], None)); + if let Some(n) = g.nodes.iter_mut().find(|n| n.module_id == "a") { + n.hotspot_score = Some(95.0); + } + + let r = render(&g, &RenderOptions { + format: DiagramFormat::Dot, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + + let a_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"a\" [")).unwrap(); + // Expect the cycle red colour, not the hot orange. + assert!(a_line.contains("color=\"#cc0000\""), "expected cycle red border: {}", a_line); + assert!(!a_line.contains("color=\"#ff6600\""), "hot border should not win over cycle: {}", a_line); + } + + fn cochange_pair(a: &str, b: &str, score: f64) -> crate::api::CoChangePair { + crate::api::CoChangePair { + file_a: a.into(), + file_b: b.into(), + count: 3, + coupling_score: score, + } + } + + #[test] + fn cochange_overlay_off_by_default() { + let mut g = fixture(); + g.cochange_pairs.push(cochange_pair("a", "c", 0.9)); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + // No undirected-style edge, no purple link styling. + assert!(!r.diagram.contains("---|")); + assert!(!r.diagram.contains("stroke:#84c")); + } + + #[test] + fn cochange_overlay_renders_above_threshold_mermaid() { + let mut g = fixture(); + g.cochange_pairs.push(cochange_pair("a", "c", 0.9)); + g.cochange_pairs.push(cochange_pair("a", "d", 0.2)); // below threshold + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: Some(0.5), + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + // 0.9 pair shows up with the score label; 0.2 pair filtered out. + assert!(r.diagram.contains("---|0.90|"), "missing cochange line:\n{}", r.diagram); + assert!(!r.diagram.contains("---|0.20|")); + // linkStyle appends the purple dash style. + assert!(r.diagram.contains("stroke:#84c")); + } + + #[test] + fn cochange_overlay_renders_above_threshold_dot() { + let mut g = fixture(); + g.cochange_pairs.push(cochange_pair("a", "c", 0.9)); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Dot, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: Some(0.5), + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + // Purple edge with arrowhead=none and constraint=false so it doesn't + // warp the DAG layout. + let line = r.diagram.lines().find(|l| l.contains("\"a\" -> \"c\"") && l.contains("#8844cc")).unwrap(); + assert!(line.contains("arrowhead=none"), "cochange edge must be undirected: {}", line); + assert!(line.contains("constraint=false"), "cochange edge must not constrain layout: {}", line); + } + + #[test] + fn cochange_overlay_skips_pairs_with_excluded_endpoint() { + // `isolated` is dropped by the selection stage; any cochange pair + // involving it must not appear as an edge referencing a missing node. + let mut g = fixture(); + g.cochange_pairs.push(cochange_pair("a", "isolated", 0.9)); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: Some(0.5), + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert!(!r.diagram.contains("---|"), "cochange to excluded node must not render"); + } + + #[test] + fn blast_radius_selects_epicenter_deps_and_dependents() { + // fixture: a -> b -> c -> d, plus isolated. Blast radius of `b`: + // {b} ∪ {c} (dependency) ∪ {a} (dependent) = {a, b, c}. + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: Some("b"), + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert_eq!(r.node_count, 3); + assert!(r.diagram.contains("a.rs")); + assert!(r.diagram.contains("b.rs")); + assert!(r.diagram.contains("c.rs")); + assert!(!r.diagram.contains("d.rs")); + // Epicenter class applied to b. + assert!( + r.diagram.lines().any(|l| l.trim_start().starts_with("class N") && l.contains("epicenter")), + "expected epicenter class assignment:\n{}", r.diagram + ); + } + + #[test] + fn blast_radius_marks_epicenter_in_dot() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Dot, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: Some("b"), + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + // Epicenter node `b` gets the bold red fill; other nodes don't. + let b_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"b\" [")).unwrap(); + assert!(b_line.contains("fillcolor=\"#ff3333\""), "epicenter missing red fill: {}", b_line); + let a_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"a\" [")).unwrap(); + assert!(!a_line.contains("fillcolor=\"#ff3333\""), "non-epicenter got epicenter fill: {}", a_line); + } + + #[test] + fn blast_radius_overrides_focus() { + // When both are set, blast_radius wins. Fixture: a -> b -> c -> d. + // With blast_radius=d: {d} ∪ {} ∪ {c} = {d, c}. Focus=a would give + // {a, b} at depth=1 — verify we get the blast set, not the BFS set. + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: Some("a"), + depth: 1, + max_nodes: 10, + show_cochange: None, + blast_radius: Some("d"), + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert_eq!(r.node_count, 2); + assert!(r.diagram.contains("d.rs")); + assert!(r.diagram.contains("c.rs")); + assert!(!r.diagram.contains("a.rs")); + assert!(!r.diagram.contains("b.rs")); + } + + #[test] + fn blast_radius_accepts_path_suffix() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: Some("b.rs"), + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert_eq!(r.node_count, 3); + } + + #[test] + fn blast_radius_unknown_target_errors() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: Some("does_not_exist"), + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }); + assert!(r.is_err()); + let err = r.unwrap_err(); + assert!(err.contains("blast-radius target not found"), "wrong error: {}", err); + } + + #[test] + fn basic_constructor_matches_manual_defaults() { + let opts = RenderOptions::basic(DiagramFormat::Mermaid, 42); + assert_eq!(opts.format, DiagramFormat::Mermaid); + assert!(opts.focus.is_none()); + assert_eq!(opts.depth, 2); + assert_eq!(opts.max_nodes, 42); + assert!(opts.show_cochange.is_none()); + assert!(opts.blast_radius.is_none()); + assert!(!opts.docs_only); + assert!(opts.group_by_folder_depth.is_none()); + assert!(!opts.color_by_owner); + } + + #[test] + fn overlays_respect_max_nodes_truncation() { + // Cycle spans a,b,c but max_nodes=2 cuts the graph — the renderer must + // not reference excluded nodes in linkStyle / class statements. + let mut g = fixture(); + g.edges.push(edge("c", "a")); + g.cycles.push(cycle(&["a", "b", "c"], Some("c"))); + + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 2, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert!(r.truncated); + assert_eq!(r.node_count, 2); + + // No linkStyle index should exceed the count of emitted edges. + let edge_count = r.diagram.lines().filter(|l| { + l.contains(" --> ") || l.contains(" ==> ") || l.contains(" -.-> ") + }).count(); + for line in r.diagram.lines().filter(|l| l.trim_start().starts_with("linkStyle")) { + let idx: usize = line.split_whitespace().nth(1).unwrap().parse().unwrap(); + assert!(idx < edge_count, "linkStyle {} refers to an edge that wasn't emitted", idx); + } + } + + // ---------- doc-map (Phase 3.2) ---------------------------------------- + + fn doc_node(id: &str, ext: &str) -> GraphNode { + let mut n = node(id, None); + n.path = format!("docs/{}.{}", id, ext); + n.language = "markdown".into(); + n + } + + fn fixture_with_docs() -> ProjectGraphResponse { + // a.rs <- README.md (doc → code), config.yaml isolated from code edges, + // plus b/c/d/isolated from the base fixture. README references a.rs. + let mut g = fixture(); + g.nodes.push(doc_node("README", "md")); + g.nodes.push(doc_node("config", "yaml")); + g.edges.push(edge("README", "a")); // README references code file a + g + } + + #[test] + fn mermaid_doc_node_uses_stadium_shape_and_sec_label() { + let g = fixture_with_docs(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 20, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + // Stadium shape `([...])` for docs, `sec` unit label, and `doc` classDef. + assert!(r.diagram.contains("classDef doc"), "missing doc classDef:\n{}", r.diagram); + assert!( + r.diagram.lines().any(|l| l.contains("([\"README.md")), + "doc node missing stadium shape:\n{}", r.diagram + ); + assert!(r.diagram.contains("sec\"])"), "doc node missing sec unit:\n{}", r.diagram); + // Non-doc node still uses square bracket + fn. + assert!( + r.diagram.lines().any(|l| l.contains("[\"a.rs") && l.contains("fn\"]")), + "code node shape/unit regressed:\n{}", r.diagram + ); + // Doc class is applied via a class statement. + assert!( + r.diagram.lines().any(|l| l.trim_start().starts_with("class N") && l.contains("doc")), + "doc class not assigned:\n{}", r.diagram + ); + } + + #[test] + fn dot_doc_node_uses_note_shape_and_yellow_fill() { + let g = fixture_with_docs(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Dot, + focus: None, + depth: 2, + max_nodes: 20, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + let doc_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"README\" [")).unwrap(); + assert!(doc_line.contains("shape=note"), "doc not shape=note: {}", doc_line); + assert!(doc_line.contains("#fff4c0"), "doc not yellow fill: {}", doc_line); + assert!(doc_line.contains("sec\""), "doc missing sec unit: {}", doc_line); + + let code_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"a\" [")).unwrap(); + assert!(code_line.contains("shape=box"), "code shape regressed: {}", code_line); + assert!(code_line.contains("fn\""), "code unit regressed: {}", code_line); + } + + #[test] + fn docs_only_selects_docs_and_their_neighbors() { + // README references a.rs. docs_only should yield {README, config, a}: + // both docs plus the one code neighbor. b/c/d/isolated are excluded. + let g = fixture_with_docs(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 20, + show_cochange: None, + blast_radius: None, + docs_only: true, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert!(r.diagram.contains("README.md"), "missing README:\n{}", r.diagram); + assert!(r.diagram.contains("config.yaml"), "missing config:\n{}", r.diagram); + assert!(r.diagram.contains("a.rs"), "missing referenced code file a.rs:\n{}", r.diagram); + assert!(!r.diagram.contains("b.rs"), "b.rs should not render in docs_only:\n{}", r.diagram); + assert!(!r.diagram.contains("c.rs"), "c.rs should not render in docs_only:\n{}", r.diagram); + assert!(!r.diagram.contains("d.rs"), "d.rs should not render in docs_only:\n{}", r.diagram); + assert_eq!(r.node_count, 3); + } + + #[test] + fn docs_only_blast_radius_wins_over_docs_only() { + // Selection precedence: blast_radius > focus > docs_only > top. When + // both blast_radius and docs_only are set, blast_radius selection + // applies — docs_only is ignored. + let g = fixture_with_docs(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 20, + show_cochange: None, + blast_radius: Some("b"), + docs_only: true, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + // Blast radius of b in the base edges: {a, b, c}. + assert!(r.diagram.contains("a.rs")); + assert!(r.diagram.contains("b.rs")); + assert!(r.diagram.contains("c.rs")); + assert!(!r.diagram.contains("README.md"), "docs_only should be overridden:\n{}", r.diagram); + } + + // ---------- folder-collapsed view (Phase 3.3) -------------------------- + + fn node_at(id: &str, path: &str) -> GraphNode { + let mut n = node(id, None); + n.path = path.into(); + n + } + + fn fixture_with_folders() -> ProjectGraphResponse { + // Layout: + // src/api/users.rs (api_users) → src/db/sql.rs (db_sql) + // src/api/posts.rs (api_posts) → src/db/sql.rs + // src/api/users.rs → src/api/posts.rs (intra-folder, must be dropped) + // tests/foo.rs (tests_foo) → src/api/users.rs + // Depth 1 groups: {src, tests}; edges src↔src dropped, src↔tests kept, + // tests→src kept. + let mut g = ProjectGraphResponse { + nodes: vec![ + node_at("api_users", "src/api/users.rs"), + node_at("api_posts", "src/api/posts.rs"), + node_at("db_sql", "src/db/sql.rs"), + node_at("tests_foo", "tests/foo.rs"), + ], + edges: vec![ + edge("api_users", "db_sql"), + edge("api_posts", "db_sql"), + edge("api_users", "api_posts"), + edge("tests_foo", "api_users"), + ], + cycles: vec![], + god_modules: vec![], + layer_violations: vec![], + metadata: GraphMetadata { + total_files: 4, + total_edges: 4, + languages: HashMap::new(), + generated_at: "".into(), + bridge_count: None, + cycle_count: None, + god_module_count: None, + health_score: None, + layer_violation_count: None, + architectural_drift: None, + hotspot_count: None, + dead_code_count: None, + unreferenced_exports_count: None, + }, + cochange_pairs: vec![], + }; + // Give api_users a hotspot score so we can assert max-folding. + if let Some(n) = g.nodes.iter_mut().find(|n| n.module_id == "api_users") { + n.hotspot_score = Some(85.0); + } + g + } + + #[test] + fn folder_collapse_depth_one_groups_top_level_dirs() { + let g = fixture_with_folders(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 20, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: Some(1), + color_by_owner: false, + }).unwrap(); + // Two folder nodes: `src` and `tests`. The individual files must not + // appear by filename (only the folder labels do). + assert_eq!(r.node_count, 2); + assert!(r.diagram.contains("src/"), "folder label missing:\n{}", r.diagram); + assert!(r.diagram.contains("tests/"), "folder label missing:\n{}", r.diagram); + assert!(!r.diagram.contains("users.rs")); + assert!(!r.diagram.contains("posts.rs")); + assert!(!r.diagram.contains("sql.rs")); + // Subroutine shape + folder class applied. + assert!(r.diagram.contains("classDef folder")); + assert!(r.diagram.contains("[["), "missing subroutine shape:\n{}", r.diagram); + assert!( + r.diagram.lines().any(|l| l.trim_start().starts_with("class N") && l.contains("folder")), + "folder class not assigned:\n{}", r.diagram + ); + } + + #[test] + fn folder_collapse_drops_intra_folder_edges_and_aggregates() { + let g = fixture_with_folders(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Dot, + focus: None, + depth: 2, + max_nodes: 20, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: Some(1), + color_by_owner: false, + }).unwrap(); + // Expected edges after collapse: tests → src (1). The src→src edges + // from the file graph must be dropped. The src→src count is non-zero + // in the file graph, but at folder granularity it's a self-loop. + let edge_count = r.diagram.lines().filter(|l| l.contains(" -> ")).count(); + assert_eq!(edge_count, 1, "expected exactly 1 folder edge:\n{}", r.diagram); + assert!(r.diagram.contains("\"tests\" -> \"src\""), "expected tests→src:\n{}", r.diagram); + // Folder shape + fill. + let src_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"src\" [")).unwrap(); + assert!(src_line.contains("shape=folder"), "folder shape missing: {}", src_line); + assert!(src_line.contains("#d6e9ff"), "folder fill missing: {}", src_line); + // `src` contains 3 files with 3+3+3 = 9 fn. + assert!(src_line.contains("3 files"), "file count missing: {}", src_line); + assert!(src_line.contains("9 fn"), "fn sum missing: {}", src_line); + } + + #[test] + fn folder_collapse_depth_two_separates_api_from_db() { + let g = fixture_with_folders(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 20, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: Some(2), + color_by_owner: false, + }).unwrap(); + // Groups: src/api, src/db, tests. api→db edges collapse into one. + // Mermaid labels use the folder *tail*, not the full path, to keep the + // labels readable — the full folder id remains in the node id. + assert_eq!(r.node_count, 3); + assert!(r.diagram.contains("api/"), "api/ label missing:\n{}", r.diagram); + assert!(r.diagram.contains("db/"), "db/ label missing:\n{}", r.diagram); + assert!(r.diagram.contains("tests/"), "tests/ label missing:\n{}", r.diagram); + // api (6 fn from api_users+api_posts) and db (3 fn from db_sql) are separate. + assert!(r.diagram.contains("2 files, 6 fn"), "api aggregation wrong:\n{}", r.diagram); + assert!(r.diagram.contains("1 files, 3 fn"), "db aggregation wrong:\n{}", r.diagram); + } + + // ---------- ownership coloring (Phase 1.6) ----------------------------- + + #[test] + fn owner_color_is_stable_and_within_palette() { + // Two calls for the same owner must yield the same color. + let c1 = owner_color("alice"); + let c2 = owner_color("alice"); + assert_eq!(c1, c2); + // All colors must start with `#` and be 7 chars (hex triple). + assert_eq!(c1.len(), 7); + assert!(c1.starts_with('#')); + // Different owners hash to (likely) different palette entries — at + // minimum: both are valid palette entries even if they collide. + let c_bob = owner_color("bob"); + assert_eq!(c_bob.len(), 7); + assert!(c_bob.starts_with('#')); + } + + #[test] + fn mermaid_color_by_owner_emits_style_and_drops_role_class() { + let mut g = fixture(); + if let Some(n) = g.nodes.iter_mut().find(|n| n.module_id == "a") { + n.owner = Some("alice".into()); + } + if let Some(n) = g.nodes.iter_mut().find(|n| n.module_id == "b") { + n.owner = Some("bob".into()); + } + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: true, + }).unwrap(); + // Role class suffixes must be dropped so the per-node `style` wins. + assert!(!r.diagram.contains(":::core"), "role class leaked:\n{}", r.diagram); + assert!(!r.diagram.contains(":::bridge"), "role class leaked:\n{}", r.diagram); + // Owner colors are applied via explicit `style` lines. + let alice = owner_color("alice"); + let bob = owner_color("bob"); + assert!( + r.diagram.contains(&format!("fill:{}", alice)), + "alice color missing:\n{}", r.diagram + ); + assert!( + r.diagram.contains(&format!("fill:{}", bob)), + "bob color missing:\n{}", r.diagram + ); + } + + #[test] + fn dot_color_by_owner_paints_fillcolor() { + let mut g = fixture(); + if let Some(n) = g.nodes.iter_mut().find(|n| n.module_id == "a") { + n.owner = Some("alice".into()); + } + let r = render(&g, &RenderOptions { + format: DiagramFormat::Dot, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: true, + }).unwrap(); + let alice = owner_color("alice"); + let a_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"a\" [")).unwrap(); + assert!( + a_line.contains(&format!("fillcolor=\"{}\"", alice)), + "alice fill missing: {}", a_line + ); + // Nodes without an owner fall back to the default white. + let b_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"b\" [")).unwrap(); + assert!(b_line.contains("fillcolor=\"#fff\""), "default fill missing: {}", b_line); + } + + #[test] + fn folder_collapse_depth_zero_is_noop() { + let g = fixture_with_folders(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 20, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: Some(0), + color_by_owner: false, + }).unwrap(); + // Depth=0 falls back to the uncollapsed graph — every file node renders. + assert_eq!(r.node_count, 4); + assert!(r.diagram.contains("users.rs")); + assert!(r.diagram.contains("sql.rs")); + } + + #[test] + fn ascii_format_parses() { + assert_eq!(DiagramFormat::parse("ascii").unwrap(), DiagramFormat::Ascii); + assert_eq!(DiagramFormat::parse("tree").unwrap(), DiagramFormat::Ascii); + assert_eq!(DiagramFormat::parse("TEXT").unwrap(), DiagramFormat::Ascii); + } + + #[test] + fn ascii_renders_top_by_degree_as_tree() { + // Default selection: top-by-degree picks a highly connected root. + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Ascii, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + // Tree characters must show up somewhere below the root line. + assert!(r.diagram.contains("├── ") || r.diagram.contains("└── "), + "expected tree glyphs in:\n{}", r.diagram); + // Every included node should appear at least once. + assert!(r.diagram.contains("a.rs")); + assert!(r.diagram.contains("b.rs")); + } + + #[test] + fn ascii_rooted_on_focus() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Ascii, + focus: Some("a"), + depth: 3, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + // First line is the root, un-prefixed. + let first = r.diagram.lines().next().unwrap(); + assert!(first.starts_with("a.rs"), "root line wrong: {first:?}"); + assert!(!first.starts_with("├") && !first.starts_with("└")); + } + + #[test] + fn ascii_breaks_cycles_with_seen_marker() { + // Build a->b->a cycle so the walker must stop re-entering `a`. + let mut g = fixture(); + g.edges.push(edge("b", "a")); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Ascii, + focus: Some("a"), + depth: 5, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert!(r.diagram.contains("↑ seen"), + "expected cycle marker in ascii output:\n{}", r.diagram); + } + + #[test] + fn ascii_respects_depth_cap() { + // a->b->c->d chain; depth=1 from a should reach b but not c. + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Ascii, + focus: Some("a"), + depth: 1, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert!(r.diagram.contains("a.rs")); + assert!(r.diagram.contains("b.rs")); + // depth=1 selection via bfs_from_anchor only *includes* a and b, so + // c.rs must not appear in the ascii tree either. + assert!(!r.diagram.contains("c.rs"), "depth cap not respected:\n{}", r.diagram); + } +} diff --git a/third_party/cartographer/mapper-core/cartographer/src/diagram_export.rs b/third_party/cartographer/mapper-core/cartographer/src/diagram_export.rs new file mode 100644 index 00000000..61da410e --- /dev/null +++ b/third_party/cartographer/mapper-core/cartographer/src/diagram_export.rs @@ -0,0 +1,170 @@ +//! Render a diagram to SVG/PNG by shelling out to an external converter. +//! +//! We pick the converter based on the *source* format (Mermaid vs DOT) — not +//! on a user-visible flag — so callers just say "write to foo.svg" and we do +//! the right thing: +//! +//! Mermaid + .svg/.png → `mmdc` (Mermaid CLI, npm-installed) +//! DOT + .svg/.png → `dot` (Graphviz binary) +//! +//! If the target extension isn't `.svg`/`.png`, we treat the write as a +//! passthrough — the caller's diagram text lands at `target` unchanged. + +use std::io::Write; +use std::path::Path; +use std::process::{Command, Stdio}; + +use crate::diagram::DiagramFormat; + +/// What `export_diagram` did, so the CLI can print a matching status line. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ExportKind { + /// Wrote the diagram source straight to disk (no converter invoked). + Source, + /// Rendered via `mmdc` (Mermaid → SVG or PNG). + MermaidSvg, + MermaidPng, + /// Rendered via Graphviz `dot`. + DotSvg, + DotPng, +} + +/// Write `content` (diagram source in `source_format`) to `target`, converting +/// to SVG/PNG on the way if the target extension calls for it. +/// +/// Errors if a converter is needed but the binary is missing from `$PATH` — +/// returning a message that tells the user how to install it. +pub fn export_diagram( + content: &str, + source_format: DiagramFormat, + target: &Path, +) -> Result { + let ext = target + .extension() + .and_then(|e| e.to_str()) + .map(|s| s.to_lowercase()) + .unwrap_or_default(); + + match (source_format, ext.as_str()) { + (_, "svg") | (_, "png") => convert(content, source_format, target, &ext), + _ => { + std::fs::write(target, content).map_err(|e| e.to_string())?; + Ok(ExportKind::Source) + } + } +} + +fn convert( + content: &str, + source_format: DiagramFormat, + target: &Path, + ext: &str, +) -> Result { + match source_format { + DiagramFormat::Mermaid => export_mermaid(content, target, ext), + DiagramFormat::Dot => export_dot(content, target, ext), + // ASCII trees are text-only by design — there's no sensible converter + // that turns them into a raster/vector. Tell the user to pick a + // different format explicitly instead of silently writing the text. + DiagramFormat::Ascii => Err( + "ASCII diagrams can't be rendered to .svg/.png — use `--format mermaid` or `--format dot` for image output, or write to a text extension.".to_string(), + ), + } +} + +fn export_mermaid(content: &str, target: &Path, ext: &str) -> Result { + // mmdc reads from a file and writes to a path; it can't read stdin. + let tmp = tempfile(".mmd")?; + std::fs::write(&tmp, content).map_err(|e| e.to_string())?; + + let status = Command::new("mmdc") + .args([ + "-i", + tmp.to_str().ok_or("tempfile path not UTF-8")?, + "-o", + target.to_str().ok_or("target path not UTF-8")?, + ]) + .status() + .map_err(|_| { + "`mmdc` not found on PATH. Install via `npm install -g @mermaid-js/mermaid-cli`." + .to_string() + })?; + + // Remove the tmp regardless of outcome — leaving `.mmd` files around on + // failure mostly confuses users; the error message below is enough. + let _ = std::fs::remove_file(&tmp); + + if !status.success() { + return Err(format!("mmdc exited with status {}", status)); + } + Ok(if ext == "png" { ExportKind::MermaidPng } else { ExportKind::MermaidSvg }) +} + +fn export_dot(content: &str, target: &Path, ext: &str) -> Result { + // `dot` accepts stdin — no tempfile needed. + let mut child = Command::new("dot") + .args(["-T", ext, "-o", target.to_str().ok_or("target path not UTF-8")?]) + .stdin(Stdio::piped()) + .spawn() + .map_err(|_| { + "`dot` not found on PATH. Install Graphviz (e.g. `brew install graphviz`).".to_string() + })?; + + { + let stdin = child + .stdin + .as_mut() + .ok_or_else(|| "could not open dot stdin".to_string())?; + stdin + .write_all(content.as_bytes()) + .map_err(|e| e.to_string())?; + } + + let status = child.wait().map_err(|e| e.to_string())?; + if !status.success() { + return Err(format!("dot exited with status {}", status)); + } + Ok(if ext == "png" { ExportKind::DotPng } else { ExportKind::DotSvg }) +} + +/// Allocate a unique path in the system temp dir with the given extension. +/// We avoid pulling in the `tempfile` crate for one call — the path is used +/// immediately and removed in the happy path. +fn tempfile(ext: &str) -> Result { + let mut path = std::env::temp_dir(); + let pid = std::process::id(); + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0); + path.push(format!("cartographer-{}-{}{}", pid, nanos, ext)); + Ok(path) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn passthrough_for_non_image_extension() { + let dir = std::env::temp_dir(); + let target = dir.join(format!("cartographer-test-{}.mmd", std::process::id())); + let kind = export_diagram("graph TD\n A --> B", DiagramFormat::Mermaid, &target).unwrap(); + assert_eq!(kind, ExportKind::Source); + let written = std::fs::read_to_string(&target).unwrap(); + assert!(written.contains("graph TD")); + let _ = std::fs::remove_file(&target); + } + + #[test] + fn passthrough_for_dot_source_with_dot_extension() { + let dir = std::env::temp_dir(); + let target = dir.join(format!("cartographer-test-{}.dot", std::process::id())); + let kind = export_diagram("digraph G { A -> B }", DiagramFormat::Dot, &target).unwrap(); + assert_eq!(kind, ExportKind::Source); + let _ = std::fs::remove_file(&target); + } + + // mmdc / dot may not be installed in CI, so we don't drive actual + // conversion in unit tests. The shell-out paths are exercised manually. +} diff --git a/third_party/cartographer/mapper-core/cartographer/src/extractor.rs b/third_party/cartographer/mapper-core/cartographer/src/extractor.rs index 8ea126e0..171ce327 100644 --- a/third_party/cartographer/mapper-core/cartographer/src/extractor.rs +++ b/third_party/cartographer/mapper-core/cartographer/src/extractor.rs @@ -206,16 +206,21 @@ fn lip_uri(path: &Path, qualified: &str) -> String { feature = "lang-c", feature = "lang-cpp", ))] fn make_sig( - raw: String, kind: SymbolKind, line: usize, path: &Path, + raw: String, kind: SymbolKind, node: &Node, path: &Path, name: &str, qualified: &str, doc: Option, ) -> Signature { + let sp = node.start_position(); + let ep = node.end_position(); Signature { raw, ckb_id: Some(lip_uri(path, qualified)), symbol_name: Some(name.to_string()), qualified_name: Some(qualified.to_string()), kind, - line_start: line, + line_start: sp.row, + col_start: sp.column, + line_end: ep.row, + col_end: ep.column, confidence: CONFIDENCE_TS, doc_comment: doc, } @@ -291,7 +296,7 @@ fn walk_rust(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, sc .unwrap_or_default(); let raw = sig_up_to_block(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Interface, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::Interface, node, path, &name, &name, doc)); scope.push(name); if let Some(body) = node.child_by_field_name("body") { let mut cur = body.walk(); @@ -318,7 +323,7 @@ fn walk_rust(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, sc let kind = if scope.is_empty() { SymbolKind::Function } else { SymbolKind::Method }; let raw = sig_up_to_block(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, kind, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, kind, node, path, &name, &qualified, doc)); } "struct_item" => { let name = node.child_by_field_name("name") @@ -328,7 +333,7 @@ fn walk_rust(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, sc let raw = first_line(node, src); let doc = preceding_doc_comment(node, src); let qualified = scope_qualify(scope, &name); - sigs.push(make_sig(raw, SymbolKind::Struct, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, SymbolKind::Struct, node, path, &name, &qualified, doc)); } "enum_item" => { let name = node.child_by_field_name("name") @@ -338,7 +343,7 @@ fn walk_rust(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, sc let raw = first_line(node, src); let doc = preceding_doc_comment(node, src); let qualified = scope_qualify(scope, &name); - sigs.push(make_sig(raw, SymbolKind::Enum, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, SymbolKind::Enum, node, path, &name, &qualified, doc)); } "type_item" => { let name = node.child_by_field_name("name") @@ -347,7 +352,7 @@ fn walk_rust(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, sc if name.is_empty() { return; } let raw = node_text(node, src).split_whitespace().collect::>().join(" "); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::TypeAlias, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::TypeAlias, node, path, &name, &name, doc)); } "const_item" | "static_item" => { let name = node.child_by_field_name("name") @@ -366,7 +371,7 @@ fn walk_rust(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, sc } let raw = node_text(node, src).split_whitespace().collect::>().join(" "); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Variable, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::Variable, node, path, &name, &name, doc)); } "macro_definition" => { let name = { @@ -380,7 +385,7 @@ fn walk_rust(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, sc if name.is_empty() { return; } let raw = format!("macro_rules! {}", name); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Macro, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::Macro, node, path, &name, &name, doc)); } "mod_item" => { let name = node.child_by_field_name("name") @@ -389,7 +394,7 @@ fn walk_rust(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, sc if name.is_empty() { return; } let doc = preceding_doc_comment(node, src); let raw = format!("mod {}", name); - sigs.push(make_sig(raw, SymbolKind::Namespace, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::Namespace, node, path, &name, &name, doc)); if let Some(body) = node.child_by_field_name("body") { scope.push(name); let mut cur = body.walk(); @@ -475,7 +480,7 @@ fn walk_go(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec) { if name.is_empty() { return; } let raw = sig_up_to_block(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Function, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::Function, node, path, &name, &name, doc)); } "method_declaration" => { let name = node.child_by_field_name("name") @@ -499,7 +504,7 @@ fn walk_go(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec) { let qualified = if receiver_type.is_empty() { name.clone() } else { format!("{}.{}", receiver_type, name) }; let raw = sig_up_to_block(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Method, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, SymbolKind::Method, node, path, &name, &qualified, doc)); } "type_declaration" => { let mut cur = node.walk(); @@ -516,7 +521,7 @@ fn walk_go(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec) { }; let raw = first_line(&child, src); let doc = preceding_doc_comment(&child, src); - sigs.push(make_sig(raw, kind, child.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, kind, &child, path, &name, &name, doc)); } } } @@ -536,7 +541,7 @@ fn walk_go(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec) { if name.is_empty() { continue; } let raw = node_text(&child, src).split_whitespace().collect::>().join(" "); let doc = preceding_doc_comment(&child, src); - sigs.push(make_sig(raw, SymbolKind::Variable, child.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::Variable, &child, path, &name, &name, doc)); } } } @@ -611,7 +616,7 @@ fn walk_python(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, let kind = if scope.is_empty() { SymbolKind::Function } else { SymbolKind::Method }; let raw = sig_up_to_colon(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, kind, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, kind, node, path, &name, &qualified, doc)); } "class_definition" => { let name = node.child_by_field_name("name") @@ -620,7 +625,7 @@ fn walk_python(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, if name.is_empty() { return; } let raw = sig_up_to_colon(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Class, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::Class, node, path, &name, &name, doc)); scope.push(name); if let Some(body) = node.child_by_field_name("body") { let mut cur = body.walk(); @@ -644,7 +649,7 @@ fn walk_python(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, .unwrap_or_default(); if !name.is_empty() && name.chars().all(|c| c.is_uppercase() || c == '_' || c.is_numeric()) { let raw = first_line(node, src); - sigs.push(make_sig(raw, SymbolKind::Variable, node.start_position().row, path, &name, &name, None)); + sigs.push(make_sig(raw, SymbolKind::Variable, node, path, &name, &name, None)); } } } @@ -737,7 +742,7 @@ fn walk_ts(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, scop let kind = if scope.is_empty() { SymbolKind::Function } else { SymbolKind::Method }; let raw = sig_up_to_block(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, kind, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, kind, node, path, &name, &qualified, doc)); } "class_declaration" | "abstract_class_declaration" | "class" => { let name = node.child_by_field_name("name") @@ -746,7 +751,7 @@ fn walk_ts(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, scop if name.is_empty() { return; } let raw = sig_up_to_block(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Class, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::Class, node, path, &name, &name, doc)); scope.push(name); if let Some(body) = node.child_by_field_name("body") { let mut cur = body.walk(); @@ -765,7 +770,7 @@ fn walk_ts(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, scop let qualified = scope_qualify(scope, &name); let raw = sig_up_to_block(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Method, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, SymbolKind::Method, node, path, &name, &qualified, doc)); } "interface_declaration" => { let name = node.child_by_field_name("name") @@ -774,7 +779,7 @@ fn walk_ts(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, scop if name.is_empty() { return; } let raw = sig_up_to_block(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Interface, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::Interface, node, path, &name, &name, doc)); } "type_alias_declaration" => { let name = node.child_by_field_name("name") @@ -783,7 +788,7 @@ fn walk_ts(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, scop if name.is_empty() { return; } let raw = first_line(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::TypeAlias, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::TypeAlias, node, path, &name, &name, doc)); } "enum_declaration" => { let name = node.child_by_field_name("name") @@ -792,7 +797,7 @@ fn walk_ts(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, scop if name.is_empty() { return; } let raw = first_line(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Enum, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::Enum, node, path, &name, &name, doc)); } "export_statement" | "export_clause" => { let mut cur = node.walk(); @@ -819,7 +824,7 @@ fn walk_ts(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, scop let raw = format!("const {} = {}", name, sig_up_to_block(&val, src)); let qualified = scope_qualify(scope, &name); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Function, decl.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, SymbolKind::Function, &decl, path, &name, &qualified, doc)); } } _ => { @@ -903,7 +908,7 @@ fn walk_c_cpp( let raw = sig_up_to_block(node, src); let doc = preceding_doc_comment(node, src); let kind = if scope.is_empty() { SymbolKind::Function } else { SymbolKind::Method }; - sigs.push(make_sig(raw, kind, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, kind, node, path, &name, &qualified, doc)); } } // Don't recurse into the body — we don't want nested functions @@ -918,7 +923,7 @@ fn walk_c_cpp( if !name.is_empty() { let raw = node_text(node, src).split_whitespace().collect::>().join(" "); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Function, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, SymbolKind::Function, node, path, &name, &qualified, doc)); } } } @@ -933,7 +938,7 @@ fn walk_c_cpp( let raw = first_line(node, src); let doc = preceding_doc_comment(node, src); let qualified = scope_qualify(scope, &name); - sigs.push(make_sig(raw, SymbolKind::Struct, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, SymbolKind::Struct, node, path, &name, &qualified, doc)); } } // Still walk the body for nested types @@ -947,7 +952,7 @@ fn walk_c_cpp( let raw = first_line(node, src); let doc = preceding_doc_comment(node, src); let qualified = scope_qualify(scope, &name); - sigs.push(make_sig(raw, SymbolKind::Enum, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, SymbolKind::Enum, node, path, &name, &qualified, doc)); } } return; @@ -971,7 +976,7 @@ fn walk_c_cpp( if !name.is_empty() { let raw = node_text(node, src).split_whitespace().collect::>().join(" "); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::TypeAlias, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::TypeAlias, node, path, &name, &name, doc)); } return; } @@ -982,7 +987,7 @@ fn walk_c_cpp( let name = node_text(&name_node, src).to_string(); if !name.is_empty() { let raw = first_line(node, src); - sigs.push(make_sig(raw, SymbolKind::Variable, node.start_position().row, path, &name, &name, None)); + sigs.push(make_sig(raw, SymbolKind::Variable, node, path, &name, &name, None)); } } return; @@ -992,7 +997,7 @@ fn walk_c_cpp( let name = node_text(&name_node, src).to_string(); if !name.is_empty() { let raw = first_line(node, src); - sigs.push(make_sig(raw, SymbolKind::Macro, node.start_position().row, path, &name, &name, None)); + sigs.push(make_sig(raw, SymbolKind::Macro, node, path, &name, &name, None)); } } return; @@ -1007,7 +1012,7 @@ fn walk_c_cpp( let raw = first_line(node, src); let doc = preceding_doc_comment(node, src); let qualified = scope_qualify(scope, &name); - sigs.push(make_sig(raw, SymbolKind::Class, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, SymbolKind::Class, node, path, &name, &qualified, doc)); // Walk class body for inline method definitions if let Some(body) = node.child_by_field_name("body") { scope.push(name); @@ -1031,7 +1036,7 @@ fn walk_c_cpp( let doc = preceding_doc_comment(node, src); sigs.push(make_sig( format!("namespace {}", name), SymbolKind::Namespace, - node.start_position().row, path, &name, &name, doc, + node, path, &name, &name, doc, )); scope.push(name); } diff --git a/third_party/cartographer/mapper-core/cartographer/src/git_analysis.rs b/third_party/cartographer/mapper-core/cartographer/src/git_analysis.rs index 555530a1..f7664e45 100644 --- a/third_party/cartographer/mapper-core/cartographer/src/git_analysis.rs +++ b/third_party/cartographer/mapper-core/cartographer/src/git_analysis.rs @@ -402,3 +402,75 @@ pub fn git_diff_files(root: &Path, c1: &str, c2: &str) -> Vec<(String, char)> { result } + +// --------------------------------------------------------------------------- +// git_ownership +// --------------------------------------------------------------------------- + +/// Dominant author per file over the last `limit` commits. "Dominant" = +/// highest raw commit count; ties broken alphabetically. Bot authors and +/// formatting-only commits are excluded (same filters as churn/cochange). +/// +/// Returns an empty map if git is unavailable or the directory is not a repo. +/// Keys are repo-relative paths matching `git log --name-only` output. +pub fn git_ownership(root: &Path, limit: usize) -> HashMap { + let output = Command::new("git") + .args([ + "-C", + &root.to_string_lossy(), + "log", + &format!("-n {}", limit), + "--name-only", + "--format=%x1f%an%x1f%s", + ]) + .output(); + + let output = match output { + Ok(o) if o.status.success() => o, + _ => return HashMap::new(), + }; + + let text = String::from_utf8_lossy(&output.stdout); + + // Per-file per-author commit counts. We need the raw author name here + // (not just the skip flag) so we parse the header locally instead of + // reusing `parse_header`. + let mut counts: HashMap> = HashMap::new(); + let mut current_author: Option = None; + let mut skip_current = false; + + for line in text.lines() { + let line = line.trim(); + if line.starts_with('\x1f') { + let parts: Vec<&str> = line.splitn(3, '\x1f').collect(); + let author = parts.get(1).copied().unwrap_or("").trim().to_string(); + let subject = parts.get(2).copied().unwrap_or("").trim(); + skip_current = is_bot_author(&author) || is_formatting_subject(subject); + current_author = if skip_current { None } else { Some(author) }; + continue; + } + if line.is_empty() || skip_current { + continue; + } + if let Some(ref author) = current_author { + *counts + .entry(line.to_string()) + .or_default() + .entry(author.clone()) + .or_insert(0) += 1; + } + } + + let mut owners: HashMap = HashMap::with_capacity(counts.len()); + for (file, authors) in counts { + // Pick the author with the highest count; ties → alphabetical so the + // result is deterministic across runs. + let dominant = authors + .into_iter() + .max_by(|a, b| a.1.cmp(&b.1).then_with(|| b.0.cmp(&a.0))); + if let Some((name, _)) = dominant { + owners.insert(file, name); + } + } + owners +} diff --git a/third_party/cartographer/mapper-core/cartographer/src/html_export.rs b/third_party/cartographer/mapper-core/cartographer/src/html_export.rs new file mode 100644 index 00000000..23625601 --- /dev/null +++ b/third_party/cartographer/mapper-core/cartographer/src/html_export.rs @@ -0,0 +1,413 @@ +//! Self-contained interactive HTML diagram. +//! +//! One-file output: no network dependency, no external assets. The graph is +//! serialized into a ` + + +"#, + nodes_json = nodes_json, + edges_json = edges_json, + ) +} + +fn json_str(s: &str) -> String { + let mut out = String::with_capacity(s.len() + 2); + out.push('"'); + for c in s.chars() { + match c { + '"' => out.push_str("\\\""), + '\\' => out.push_str("\\\\"), + '\n' => out.push_str("\\n"), + '\r' => out.push_str("\\r"), + '\t' => out.push_str("\\t"), + c if (c as u32) < 0x20 => out.push_str(&format!("\\u{:04x}", c as u32)), + c => out.push(c), + } + } + out.push('"'); + out +} + +fn json_opt_str(s: Option<&str>) -> String { + match s { + Some(v) => json_str(v), + None => "null".into(), + } +} + +fn violation_tag(vt: &LayerViolationType) -> &'static str { + match vt { + LayerViolationType::BackCall => "\"BackCall\"", + LayerViolationType::SkipCall => "\"SkipCall\"", + LayerViolationType::CircularCrossLayer => "\"CircularCrossLayer\"", + LayerViolationType::DirectForeignImport => "\"DirectForeignImport\"", + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::api::{GraphEdge, GraphMetadata}; + + fn node(id: &str, role: Option<&str>) -> GraphNode { + GraphNode { + module_id: id.into(), + path: format!("src/{}.rs", id), + language: "rust".into(), + signature_count: 3, + complexity: None, + is_bridge: None, + bridge_score: None, + degree: None, + risk_level: None, + churn: Some(5), + hotspot_score: Some(42.0), + role: role.map(String::from), + is_dead: None, + unreferenced_exports: None, + fan_in: Some(2), + fan_out: Some(1), + cochange_partners: None, + cochange_entropy: None, + owner: Some("alice".into()), + } + } + + fn fixture() -> ProjectGraphResponse { + ProjectGraphResponse { + nodes: vec![ + node("a", Some("core")), + node("b", None), + node("c", Some("bridge")), + ], + edges: vec![ + GraphEdge { + source: "a".into(), + target: "b".into(), + edge_type: "import".into(), + at_range: None, + }, + GraphEdge { + source: "b".into(), + target: "c".into(), + edge_type: "import".into(), + at_range: None, + }, + ], + cycles: vec![], + god_modules: vec![], + layer_violations: vec![], + metadata: GraphMetadata { + total_files: 3, + total_edges: 2, + languages: HashMap::new(), + generated_at: "".into(), + bridge_count: None, + cycle_count: None, + god_module_count: None, + health_score: None, + layer_violation_count: None, + architectural_drift: None, + hotspot_count: None, + dead_code_count: None, + unreferenced_exports_count: None, + }, + cochange_pairs: vec![], + } + } + + #[test] + fn html_contains_structure_and_embedded_graph() { + let g = fixture(); + let included: Vec = g.nodes.iter().map(|n| n.module_id.clone()).collect(); + let html = render_html(&g, &included); + assert!(html.starts_with("")); + assert!(html.contains("