From 911d7bfa58720fb3f2a852a8fc5930421a0aff4f Mon Sep 17 00:00:00 2001 From: Lisa Date: Thu, 16 Apr 2026 08:40:50 +0200 Subject: [PATCH 01/20] docs: add release process to CLAUDE.md Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index 7ac11773..1e0c8d7c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -78,6 +78,22 @@ golangci-lint run ./ckb setup --tool=vscode ``` +## Release Process + +Releases are fully automated via `.github/workflows/release.yml`, triggered by pushing a `v*` tag. + +**Steps to release:** +1. Bump version in `internal/version/version.go`, `npm/package.json`, `testdata/review/sarif.json` +2. Update `CHANGELOG.md` +3. Merge to main, tag `vX.Y.Z`, push the tag +4. The pipeline handles everything else: + - Runs `go test -race ./...` + - GoReleaser builds cross-platform binaries and uploads to GitHub Releases + - Updates Homebrew tap (`SimplyLiz/homebrew-ckb`) + - Publishes `@tastehub/ckb` + 5 platform packages to npm + +**Do not manually `npm publish`** — the pipeline does it with checksummed binaries from GoReleaser. + ## npm Distribution (v7.0) CKB is also available via npm: From 7b445c9e6d8345bb010cc22fd742d34324c0d040 Mon Sep 17 00:00:00 2001 From: Lisa Date: Fri, 17 Apr 2026 10:56:45 +0200 Subject: [PATCH 02/20] feat(lip): enrich blast radius with LIP semantic coupling reviewPR and analyzeImpact now consume LIP's QueryBlastRadiusBatch to supplement SCIP static call graph with embedding-based semantic coupling. One round-trip prefetch for all changed files, gated behind lipSupports. Key invariant: UniqueCallerCount stays SCIP-only so review thresholds never inflate from embedding noise. Semantic callers are additive in SemanticCallerCount and carry a per-caller CouplingTier (static/semantic/ both) so consumers can distinguish certainty levels. Co-Authored-By: Claude Opus 4.6 --- internal/impact/enricher.go | 123 ++++++++++++++++++++++++++ internal/impact/enricher_test.go | 123 ++++++++++++++++++++++++++ internal/impact/types.go | 26 +++++- internal/lip/blast_radius.go | 66 ++++++++++++++ internal/lip/client.go | 69 +++++++++++++++ internal/query/impact.go | 39 +++++++-- internal/query/review_blastradius.go | 124 ++++++++++++++++++++++++++- 7 files changed, 560 insertions(+), 10 deletions(-) create mode 100644 internal/impact/enricher.go create mode 100644 internal/impact/enricher_test.go create mode 100644 internal/lip/blast_radius.go diff --git a/internal/impact/enricher.go b/internal/impact/enricher.go new file mode 100644 index 00000000..894c70ec --- /dev/null +++ b/internal/impact/enricher.go @@ -0,0 +1,123 @@ +package impact + +import "context" + +// BlastRadiusEnricher supplements SCIP-derived blast radius with external data +// (e.g., LIP embedding-based semantic coupling). Implementations must be safe +// for concurrent use and degrade gracefully — returning nil signals "unavailable". +type BlastRadiusEnricher interface { + // EnrichBatch takes changed file URIs and returns per-symbol blast radius + // from the external source. The map key is the symbol URI (e.g., + // "lip://local/src/auth.rs#validate_token"). Returns nil when the source + // is unavailable. + EnrichBatch(ctx context.Context, changedFileURIs []string) (map[string]*ExternalBlastRadius, error) +} + +// ExternalBlastRadius is what an enricher returns per symbol. +type ExternalBlastRadius struct { + // DirectItems are callers the external source found via static analysis. + // These overlap with SCIP's results and are used to confirm edges. + DirectItems []ExternalItem + // TransitiveItems are transitive callers from the external source. + TransitiveItems []ExternalItem + // SemanticItems are callers found via embedding similarity that may not + // appear in any static call graph (dynamic dispatch, macros, etc.). + SemanticItems []ExternalSemanticItem + // RiskLevel is the external source's own risk assessment. + RiskLevel string +} + +// ExternalItem is a static caller from an external blast radius source. +type ExternalItem struct { + FileURI string + SymbolURI string + Distance int + Confidence float64 +} + +// ExternalSemanticItem is a semantically coupled symbol from an enricher. +type ExternalSemanticItem struct { + FileURI string + SymbolURI string + Similarity float32 // cosine similarity + Source string // "semantic" or "both" +} + +// MergeBlastRadius blends SCIP-derived blast radius with enricher data. +// +// Design invariant: UniqueCallerCount stays SCIP-only so that reviewPR +// thresholds (callerCount >= 3, callerCount > maxFanOut) are never inflated +// by embedding noise. Semantic callers are additive in SemanticCallerCount +// and SemanticCallers — they inform humans, not thresholds. +// +// Items with source=="both" confirm that a SCIP static edge also has embedding +// evidence. These bump ConfirmedCount but don't change UniqueCallerCount. +func MergeBlastRadius(static *BlastRadius, external *ExternalBlastRadius) *BlastRadius { + if static == nil { + return nil + } + if external == nil { + return static + } + + merged := *static // shallow copy + merged.StaticCallerCount = static.UniqueCallerCount + + // Build a set of files SCIP already knows about (from static callers). + // We use file URIs because SCIP symbol IDs and LIP symbol URIs use different + // schemes — file URI is the stable join key. + staticFiles := make(map[string]struct{}) + for _, item := range external.DirectItems { + staticFiles[item.FileURI] = struct{}{} + } + for _, item := range external.TransitiveItems { + staticFiles[item.FileURI] = struct{}{} + } + + var semanticCallers []EnrichedCaller + confirmed := 0 + seen := make(map[string]struct{}) // dedup by file URI + + for _, si := range external.SemanticItems { + if _, dup := seen[si.FileURI]; dup { + continue + } + seen[si.FileURI] = struct{}{} + + switch si.Source { + case "both": + // Confirms a SCIP edge — record but don't inflate counts + confirmed++ + semanticCallers = append(semanticCallers, EnrichedCaller{ + SymbolURI: si.SymbolURI, + FileURI: si.FileURI, + Tier: CouplingBoth, + Confidence: 0.95, + Similarity: si.Similarity, + }) + case "semantic": + // New coupling not in SCIP — advisory + semanticCallers = append(semanticCallers, EnrichedCaller{ + SymbolURI: si.SymbolURI, + FileURI: si.FileURI, + Tier: CouplingSemantic, + Confidence: float64(si.Similarity), // cosine similarity as confidence proxy + Similarity: si.Similarity, + }) + } + } + + // Count only pure semantic (not "both") as additional callers + pureSemanticCount := 0 + for _, c := range semanticCallers { + if c.Tier == CouplingSemantic { + pureSemanticCount++ + } + } + + merged.SemanticCallerCount = pureSemanticCount + merged.ConfirmedCount = confirmed + merged.SemanticCallers = semanticCallers + // RiskLevel stays SCIP-derived. Semantic coupling informs the human, not the threshold. + return &merged +} diff --git a/internal/impact/enricher_test.go b/internal/impact/enricher_test.go new file mode 100644 index 00000000..9e508aa7 --- /dev/null +++ b/internal/impact/enricher_test.go @@ -0,0 +1,123 @@ +package impact + +import "testing" + +func TestMergeBlastRadius_NilExternal(t *testing.T) { + static := &BlastRadius{ + ModuleCount: 3, + FileCount: 5, + UniqueCallerCount: 8, + RiskLevel: "medium", + } + got := MergeBlastRadius(static, nil) + if got != static { + t.Fatal("nil external should return static unchanged") + } +} + +func TestMergeBlastRadius_NilStatic(t *testing.T) { + got := MergeBlastRadius(nil, &ExternalBlastRadius{}) + if got != nil { + t.Fatal("nil static should return nil") + } +} + +func TestMergeBlastRadius_SemanticOnly(t *testing.T) { + static := &BlastRadius{ + ModuleCount: 2, + FileCount: 3, + UniqueCallerCount: 4, + RiskLevel: "low", + } + external := &ExternalBlastRadius{ + SemanticItems: []ExternalSemanticItem{ + {FileURI: "file:///src/a.rs", SymbolURI: "sym:a", Similarity: 0.85, Source: "semantic"}, + {FileURI: "file:///src/b.rs", SymbolURI: "sym:b", Similarity: 0.72, Source: "semantic"}, + }, + } + + got := MergeBlastRadius(static, external) + + // UniqueCallerCount must stay SCIP-only + if got.UniqueCallerCount != 4 { + t.Errorf("UniqueCallerCount = %d, want 4 (SCIP-only)", got.UniqueCallerCount) + } + if got.StaticCallerCount != 4 { + t.Errorf("StaticCallerCount = %d, want 4", got.StaticCallerCount) + } + if got.SemanticCallerCount != 2 { + t.Errorf("SemanticCallerCount = %d, want 2", got.SemanticCallerCount) + } + if got.ConfirmedCount != 0 { + t.Errorf("ConfirmedCount = %d, want 0", got.ConfirmedCount) + } + if len(got.SemanticCallers) != 2 { + t.Fatalf("SemanticCallers len = %d, want 2", len(got.SemanticCallers)) + } + for _, sc := range got.SemanticCallers { + if sc.Tier != CouplingSemantic { + t.Errorf("caller %s tier = %s, want semantic", sc.FileURI, sc.Tier) + } + } + // RiskLevel stays SCIP-derived + if got.RiskLevel != "low" { + t.Errorf("RiskLevel = %s, want low", got.RiskLevel) + } +} + +func TestMergeBlastRadius_BothSource(t *testing.T) { + static := &BlastRadius{ + ModuleCount: 1, + FileCount: 2, + UniqueCallerCount: 3, + RiskLevel: "low", + } + external := &ExternalBlastRadius{ + SemanticItems: []ExternalSemanticItem{ + {FileURI: "file:///src/confirmed.rs", Similarity: 0.91, Source: "both"}, + {FileURI: "file:///src/new.rs", Similarity: 0.78, Source: "semantic"}, + }, + } + + got := MergeBlastRadius(static, external) + + // "both" confirms a SCIP edge — doesn't inflate semantic count + if got.SemanticCallerCount != 1 { + t.Errorf("SemanticCallerCount = %d, want 1 (only pure semantic)", got.SemanticCallerCount) + } + if got.ConfirmedCount != 1 { + t.Errorf("ConfirmedCount = %d, want 1", got.ConfirmedCount) + } + if len(got.SemanticCallers) != 2 { + t.Fatalf("SemanticCallers len = %d, want 2 (both + semantic)", len(got.SemanticCallers)) + } + + // Check tiers + tiers := map[CouplingTier]int{} + for _, sc := range got.SemanticCallers { + tiers[sc.Tier]++ + } + if tiers[CouplingBoth] != 1 || tiers[CouplingSemantic] != 1 { + t.Errorf("tier counts = %v, want both:1, semantic:1", tiers) + } +} + +func TestMergeBlastRadius_DedupByFile(t *testing.T) { + static := &BlastRadius{ + UniqueCallerCount: 2, + RiskLevel: "low", + } + external := &ExternalBlastRadius{ + SemanticItems: []ExternalSemanticItem{ + {FileURI: "file:///src/dup.rs", Similarity: 0.80, Source: "semantic"}, + {FileURI: "file:///src/dup.rs", Similarity: 0.75, Source: "semantic"}, // same file + {FileURI: "file:///src/other.rs", Similarity: 0.70, Source: "semantic"}, + }, + } + + got := MergeBlastRadius(static, external) + + if got.SemanticCallerCount != 2 { + t.Errorf("SemanticCallerCount = %d, want 2 (deduped)", got.SemanticCallerCount) + } +} diff --git a/internal/impact/types.go b/internal/impact/types.go index e486c2ec..f0109edb 100644 --- a/internal/impact/types.go +++ b/internal/impact/types.go @@ -58,12 +58,36 @@ type Reference struct { IsTest bool // Whether this reference is from a test } +// CouplingTier distinguishes how a caller relationship was discovered. +type CouplingTier string + +const ( + CouplingStatic CouplingTier = "static" // SCIP call graph — high certainty + CouplingSemantic CouplingTier = "semantic" // LIP embedding similarity — lower certainty + CouplingBoth CouplingTier = "both" // confirmed by both SCIP and LIP +) + +// EnrichedCaller is a caller discovered by either static analysis or semantic similarity. +type EnrichedCaller struct { + SymbolURI string `json:"symbolUri,omitempty"` + FileURI string `json:"fileUri"` + Tier CouplingTier `json:"tier"` + Confidence float64 `json:"confidence"` // 0.0–1.0 + Similarity float32 `json:"similarity,omitempty"` // raw cosine similarity (semantic/both only) +} + // BlastRadius summarizes the spread of impact across the codebase type BlastRadius struct { ModuleCount int `json:"moduleCount"` // Number of affected modules FileCount int `json:"fileCount"` // Number of affected files - UniqueCallerCount int `json:"uniqueCallerCount"` // Number of unique callers + UniqueCallerCount int `json:"uniqueCallerCount"` // Number of unique callers (SCIP static only) RiskLevel string `json:"riskLevel"` // "low", "medium", "high" + + // Semantic enrichment from LIP (populated when LIP blast radius is available) + StaticCallerCount int `json:"staticCallerCount,omitempty"` + SemanticCallerCount int `json:"semanticCallerCount,omitempty"` + ConfirmedCount int `json:"confirmedCount,omitempty"` // callers found by both SCIP and LIP + SemanticCallers []EnrichedCaller `json:"semanticCallers,omitempty"` } // Blast radius classification thresholds diff --git a/internal/lip/blast_radius.go b/internal/lip/blast_radius.go new file mode 100644 index 00000000..37ea2050 --- /dev/null +++ b/internal/lip/blast_radius.go @@ -0,0 +1,66 @@ +package lip + +import ( + "context" + + "github.com/SimplyLiz/CodeMCP/internal/impact" +) + +// BlastRadiusEnricher adapts LIP's QueryBlastRadiusBatch into the +// impact.BlastRadiusEnricher interface. Safe for concurrent use (stateless +// adapter over the LIP socket RPC). +type BlastRadiusEnricher struct { + // MinScore is the cosine similarity threshold for semantic hits. + // Zero means static-only (no semantic items). Typical: 0.6. + MinScore float32 +} + +// EnrichBatch implements impact.BlastRadiusEnricher. +func (e *BlastRadiusEnricher) EnrichBatch(ctx context.Context, changedFileURIs []string) (map[string]*impact.ExternalBlastRadius, error) { + if len(changedFileURIs) == 0 { + return nil, nil + } + + entries, err := QueryBlastRadiusBatch(changedFileURIs, e.MinScore) + if entries == nil { + return nil, err + } + + out := make(map[string]*impact.ExternalBlastRadius, len(entries)) + for symbolURI, entry := range entries { + ebr := &impact.ExternalBlastRadius{ + RiskLevel: entry.RiskLevel, + } + + for _, di := range entry.DirectItems { + ebr.DirectItems = append(ebr.DirectItems, impact.ExternalItem{ + FileURI: di.FileURI, + SymbolURI: di.SymbolURI, + Distance: di.Distance, + Confidence: di.Confidence, + }) + } + + for _, ti := range entry.TransitiveItems { + ebr.TransitiveItems = append(ebr.TransitiveItems, impact.ExternalItem{ + FileURI: ti.FileURI, + SymbolURI: ti.SymbolURI, + Distance: ti.Distance, + Confidence: ti.Confidence, + }) + } + + for _, si := range entry.SemanticItems { + ebr.SemanticItems = append(ebr.SemanticItems, impact.ExternalSemanticItem{ + FileURI: si.FileURI, + SymbolURI: si.SymbolURI, + Similarity: si.Similarity, + Source: si.Source, + }) + } + + out[symbolURI] = ebr + } + + return out, nil +} diff --git a/internal/lip/client.go b/internal/lip/client.go index f23d094e..d7e37f92 100644 --- a/internal/lip/client.go +++ b/internal/lip/client.go @@ -739,6 +739,75 @@ func PruneDeleted() (int, int, error) { return result.Checked, len(result.Removed), nil } +// ============================================================================= +// Blast radius +// ============================================================================= + +// BlastRadiusItem is a static caller from LIP's blast radius response. +type BlastRadiusItem struct { + FileURI string `json:"file_uri"` + SymbolURI string `json:"symbol_uri"` + Distance int `json:"distance"` + Confidence float64 `json:"confidence"` +} + +// BlastRadiusSemanticItem is a semantically coupled symbol from LIP. +type BlastRadiusSemanticItem struct { + FileURI string `json:"file_uri"` + SymbolURI string `json:"symbol_uri"` + Similarity float32 `json:"similarity"` + Source string `json:"source"` // "semantic" or "both" +} + +// BlastRadiusEntry is a single symbol's blast radius from LIP. +type BlastRadiusEntry struct { + SymbolURI string `json:"symbol_uri"` + DirectDependents int `json:"direct_dependents"` + TransitiveDependents int `json:"transitive_dependents"` + AffectedFiles []string `json:"affected_files"` + DirectItems []BlastRadiusItem `json:"direct_items"` + TransitiveItems []BlastRadiusItem `json:"transitive_items"` + RiskLevel string `json:"risk_level"` + Truncated bool `json:"truncated"` + SemanticItems []BlastRadiusSemanticItem `json:"semantic_items"` +} + +type blastRadiusBatchResp struct { + Results []BlastRadiusEntry `json:"results"` +} + +// QueryBlastRadiusBatch asks LIP for blast radius of all symbols in the given +// changed files. One round-trip. Returns a map keyed by symbol_uri. +// Returns nil when LIP is unavailable. +// +// min_score is the cosine similarity threshold for semantic hits. Pass 0 to +// get static-only results (no semantic items). Typical values: 0.6–0.8. +func QueryBlastRadiusBatch(changedFileURIs []string, minScore float32) (map[string]BlastRadiusEntry, error) { + if len(changedFileURIs) == 0 { + return nil, nil + } + req := map[string]any{ + "type": "query_blast_radius_batch", + "changed_file_uris": changedFileURIs, + } + if minScore > 0 { + req["min_score"] = minScore + } + // Budget: generous timeout — LIP needs to resolve symbols + compute embeddings + timeout := max(time.Duration(len(changedFileURIs)+1)*200*time.Millisecond, 3*time.Second) + result, _ := lipRPC(req, timeout, 8<<20, + func(r blastRadiusBatchResp) *[]BlastRadiusEntry { return &r.Results }) + if result == nil { + return nil, nil + } + // Index by symbol_uri for O(1) lookup in the merge path + out := make(map[string]BlastRadiusEntry, len(*result)) + for _, entry := range *result { + out[entry.SymbolURI] = entry + } + return out, nil +} + // ============================================================================= // Annotations // ============================================================================= diff --git a/internal/query/impact.go b/internal/query/impact.go index c306d21c..cc9a5a0f 100644 --- a/internal/query/impact.go +++ b/internal/query/impact.go @@ -113,8 +113,23 @@ type ModuleImpact struct { type BlastRadiusSummary struct { ModuleCount int `json:"moduleCount"` FileCount int `json:"fileCount"` - UniqueCallerCount int `json:"uniqueCallerCount"` - RiskLevel string `json:"riskLevel"` // "low", "medium", "high" + UniqueCallerCount int `json:"uniqueCallerCount"` // SCIP static callers only (drives thresholds) + RiskLevel string `json:"riskLevel"` // "low", "medium", "high" + + // LIP semantic enrichment (omitted when LIP is unavailable) + StaticCallerCount int `json:"staticCallerCount,omitempty"` + SemanticCallerCount int `json:"semanticCallerCount,omitempty"` + ConfirmedCount int `json:"confirmedCount,omitempty"` + SemanticCallers []SemanticCallerInfo `json:"semanticCallers,omitempty"` +} + +// SemanticCallerInfo is an embedding-discovered caller surfaced in the blast radius. +type SemanticCallerInfo struct { + SymbolURI string `json:"symbolUri,omitempty"` + FileURI string `json:"fileUri"` + Tier string `json:"tier"` // "semantic" or "both" + Confidence float64 `json:"confidence"` + Similarity float32 `json:"similarity,omitempty"` } // scipCallerProvider adapts SCIPAdapter to the TransitiveCallerProvider interface @@ -441,10 +456,22 @@ func (e *Engine) AnalyzeImpact(ctx context.Context, opts AnalyzeImpactOptions) ( var blastRadius *BlastRadiusSummary if result.BlastRadius != nil { blastRadius = &BlastRadiusSummary{ - ModuleCount: result.BlastRadius.ModuleCount, - FileCount: result.BlastRadius.FileCount, - UniqueCallerCount: result.BlastRadius.UniqueCallerCount, - RiskLevel: result.BlastRadius.RiskLevel, + ModuleCount: result.BlastRadius.ModuleCount, + FileCount: result.BlastRadius.FileCount, + UniqueCallerCount: result.BlastRadius.UniqueCallerCount, + RiskLevel: result.BlastRadius.RiskLevel, + StaticCallerCount: result.BlastRadius.StaticCallerCount, + SemanticCallerCount: result.BlastRadius.SemanticCallerCount, + ConfirmedCount: result.BlastRadius.ConfirmedCount, + } + for _, sc := range result.BlastRadius.SemanticCallers { + blastRadius.SemanticCallers = append(blastRadius.SemanticCallers, SemanticCallerInfo{ + SymbolURI: sc.SymbolURI, + FileURI: sc.FileURI, + Tier: string(sc.Tier), + Confidence: sc.Confidence, + Similarity: sc.Similarity, + }) } } diff --git a/internal/query/review_blastradius.go b/internal/query/review_blastradius.go index 0e9a601e..5786bf37 100644 --- a/internal/query/review_blastradius.go +++ b/internal/query/review_blastradius.go @@ -7,6 +7,8 @@ import ( "time" "github.com/SimplyLiz/CodeMCP/internal/cartographer" + "github.com/SimplyLiz/CodeMCP/internal/impact" + "github.com/SimplyLiz/CodeMCP/internal/lip" ) // checkBlastRadius checks if changed symbols have high fan-out (many callers). @@ -27,6 +29,18 @@ func (e *Engine) checkBlastRadius(ctx context.Context, changedFiles []string, op } } + // Prefetch LIP blast radius for all changed files in a single round-trip. + // Returns nil when LIP is unavailable or doesn't support the message — the + // rest of the function degrades to SCIP-only blast radius unchanged. + var lipBR map[string]lip.BlastRadiusEntry + if e.lipSupports("query_blast_radius_batch") { + lipURIs := make([]string, len(changedFiles)) + for i, f := range changedFiles { + lipURIs[i] = "lip://local/" + f + } + lipBR, _ = lip.QueryBlastRadiusBatch(lipURIs, 0.6) + } + // Collect symbols from changed files, cap at 30 total. // Only include functions and methods — variable references are typically // framework wiring (cobra commands, Spring beans, Qt signals) not real callers. @@ -83,13 +97,59 @@ func (e *Engine) checkBlastRadius(ctx context.Context, changedFiles []string, op continue } + // Merge LIP semantic enrichment into the SCIP-derived blast radius. + // Keyed by symbol's stable ID which maps to LIP's symbol_uri via the + // "lip://local/#" convention. + semanticCount := 0 + if lipBR != nil { + if entry, ok := lipBRLookup(lipBR, sym.stableId, sym.file, sym.name); ok { + enriched := lipEntryToExternal(&entry) + // Convert BlastRadiusSummary → impact.BlastRadius for merge + staticBR := &impact.BlastRadius{ + ModuleCount: impactResp.BlastRadius.ModuleCount, + FileCount: impactResp.BlastRadius.FileCount, + UniqueCallerCount: impactResp.BlastRadius.UniqueCallerCount, + RiskLevel: impactResp.BlastRadius.RiskLevel, + } + merged := impact.MergeBlastRadius(staticBR, enriched) + if merged != nil { + impactResp.BlastRadius = &BlastRadiusSummary{ + ModuleCount: merged.ModuleCount, + FileCount: merged.FileCount, + UniqueCallerCount: merged.UniqueCallerCount, + RiskLevel: merged.RiskLevel, + StaticCallerCount: merged.StaticCallerCount, + SemanticCallerCount: merged.SemanticCallerCount, + ConfirmedCount: merged.ConfirmedCount, + } + for _, sc := range merged.SemanticCallers { + impactResp.BlastRadius.SemanticCallers = append( + impactResp.BlastRadius.SemanticCallers, + SemanticCallerInfo{ + SymbolURI: sc.SymbolURI, + FileURI: sc.FileURI, + Tier: string(sc.Tier), + Confidence: sc.Confidence, + Similarity: sc.Similarity, + }, + ) + } + semanticCount = merged.SemanticCallerCount + } + } + } + callerCount := impactResp.BlastRadius.UniqueCallerCount if informationalMode { // In informational mode, only surface symbols with meaningful fan-out. // Symbols with 1-2 callers are normal coupling; 3+ suggests a change // that could ripple further than expected. - if callerCount >= 3 { + if callerCount >= 3 || semanticCount > 0 { + msg := fmt.Sprintf("Fan-out: %s has %d callers", sym.name, callerCount) + if semanticCount > 0 { + msg += fmt.Sprintf(" (+%d semantically coupled)", semanticCount) + } hint := "" if sym.name != "" { hint = fmt.Sprintf("→ ckb explain %s", sym.name) @@ -104,13 +164,17 @@ func (e *Engine) checkBlastRadius(ctx context.Context, changedFiles []string, op Check: "blast-radius", Severity: severity, File: sym.file, - Message: fmt.Sprintf("Fan-out: %s has %d callers", sym.name, callerCount), + Message: msg, Category: "risk", RuleID: "ckb/blast-radius/high-fanout", Hint: hint, }) } } else if callerCount > maxFanOut { + msg := fmt.Sprintf("High fan-out: %s has %d callers (threshold: %d)", sym.name, callerCount, maxFanOut) + if semanticCount > 0 { + msg += fmt.Sprintf(" (+%d semantically coupled)", semanticCount) + } hint := "" if sym.name != "" { hint = fmt.Sprintf("→ ckb explain %s", sym.name) @@ -119,7 +183,7 @@ func (e *Engine) checkBlastRadius(ctx context.Context, changedFiles []string, op Check: "blast-radius", Severity: "warning", File: sym.file, - Message: fmt.Sprintf("High fan-out: %s has %d callers (threshold: %d)", sym.name, callerCount, maxFanOut), + Message: msg, Category: "risk", RuleID: "ckb/blast-radius/high-fanout", Hint: hint, @@ -199,3 +263,57 @@ func isFrameworkSymbol(kind, name, file string) bool { return false } + +// lipBRLookup finds a LIP blast radius entry for a CKB symbol. LIP keys entries +// by "lip://local/#" — we try that convention first, then +// fall back to scanning entries whose file prefix matches. +func lipBRLookup(lipBR map[string]lip.BlastRadiusEntry, stableId, file, name string) (lip.BlastRadiusEntry, bool) { + // Primary: exact match on lip://local/# + key := "lip://local/" + file + "#" + name + if entry, ok := lipBR[key]; ok { + return entry, true + } + // Fallback: scan for entries whose symbol_uri contains our file path. + // This handles cases where LIP's symbol naming diverges from CKB's stable IDs + // (common with C++ mangled names, template specialisations). + prefix := "lip://local/" + file + "#" + for uri, entry := range lipBR { + if strings.HasPrefix(uri, prefix) && strings.Contains(uri, name) { + return entry, true + } + } + return lip.BlastRadiusEntry{}, false +} + +// lipEntryToExternal converts a LIP BlastRadiusEntry to the impact package's +// ExternalBlastRadius for use with impact.MergeBlastRadius. +func lipEntryToExternal(entry *lip.BlastRadiusEntry) *impact.ExternalBlastRadius { + ebr := &impact.ExternalBlastRadius{ + RiskLevel: entry.RiskLevel, + } + for _, di := range entry.DirectItems { + ebr.DirectItems = append(ebr.DirectItems, impact.ExternalItem{ + FileURI: di.FileURI, + SymbolURI: di.SymbolURI, + Distance: di.Distance, + Confidence: di.Confidence, + }) + } + for _, ti := range entry.TransitiveItems { + ebr.TransitiveItems = append(ebr.TransitiveItems, impact.ExternalItem{ + FileURI: ti.FileURI, + SymbolURI: ti.SymbolURI, + Distance: ti.Distance, + Confidence: ti.Confidence, + }) + } + for _, si := range entry.SemanticItems { + ebr.SemanticItems = append(ebr.SemanticItems, impact.ExternalSemanticItem{ + FileURI: si.FileURI, + SymbolURI: si.SymbolURI, + Similarity: si.Similarity, + Source: si.Source, + }) + } + return ebr +} From fd28e20465a8cea4ad6abf94aa297eacea776315 Mon Sep 17 00:00:00 2001 From: Lisa Date: Mon, 20 Apr 2026 23:35:48 +0200 Subject: [PATCH 03/20] refactor(lip): wire BlastRadiusEnricher into AnalyzeImpact, centralise helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AnalyzeImpact now enriches blast radius with LIP semantic coupling on the same lipSupports gate as reviewPR. Both paths share lip.EntryToExternal and lip.LookupSymbol — the private duplicates in review_blastradius.go are removed. BlastRadiusEnricher.EnrichBatch is now actually used. Co-Authored-By: Claude Sonnet 4.6 --- internal/lip/blast_radius.go | 76 ++++++++++++++++------------ internal/query/impact.go | 37 ++++++++++---- internal/query/review_blastradius.go | 66 ++++-------------------- 3 files changed, 81 insertions(+), 98 deletions(-) diff --git a/internal/lip/blast_radius.go b/internal/lip/blast_radius.go index 37ea2050..64f8aa7e 100644 --- a/internal/lip/blast_radius.go +++ b/internal/lip/blast_radius.go @@ -2,6 +2,7 @@ package lip import ( "context" + "strings" "github.com/SimplyLiz/CodeMCP/internal/impact" ) @@ -28,39 +29,52 @@ func (e *BlastRadiusEnricher) EnrichBatch(ctx context.Context, changedFileURIs [ out := make(map[string]*impact.ExternalBlastRadius, len(entries)) for symbolURI, entry := range entries { - ebr := &impact.ExternalBlastRadius{ - RiskLevel: entry.RiskLevel, - } - - for _, di := range entry.DirectItems { - ebr.DirectItems = append(ebr.DirectItems, impact.ExternalItem{ - FileURI: di.FileURI, - SymbolURI: di.SymbolURI, - Distance: di.Distance, - Confidence: di.Confidence, - }) - } - - for _, ti := range entry.TransitiveItems { - ebr.TransitiveItems = append(ebr.TransitiveItems, impact.ExternalItem{ - FileURI: ti.FileURI, - SymbolURI: ti.SymbolURI, - Distance: ti.Distance, - Confidence: ti.Confidence, - }) - } + out[symbolURI] = EntryToExternal(&entry) + } + return out, nil +} - for _, si := range entry.SemanticItems { - ebr.SemanticItems = append(ebr.SemanticItems, impact.ExternalSemanticItem{ - FileURI: si.FileURI, - SymbolURI: si.SymbolURI, - Similarity: si.Similarity, - Source: si.Source, - }) +// LookupSymbol finds the blast radius entry for a symbol within a pre-fetched +// result map. LIP keys entries by "lip://local/#" — tries exact +// match first, then falls back to scanning entries whose URI shares the file +// prefix and contains the symbol name. The fallback handles C++ mangled names +// and template specialisations where LIP's symbol URI diverges from CKB's +// stable ID. +func LookupSymbol(entries map[string]*impact.ExternalBlastRadius, file, name string) (*impact.ExternalBlastRadius, bool) { + key := "lip://local/" + file + "#" + name + if ebr, ok := entries[key]; ok { + return ebr, true + } + prefix := "lip://local/" + file + "#" + for uri, ebr := range entries { + if strings.HasPrefix(uri, prefix) && strings.Contains(uri[len(prefix):], name) { + return ebr, true } - - out[symbolURI] = ebr } + return nil, false +} - return out, nil +// EntryToExternal converts a BlastRadiusEntry to impact.ExternalBlastRadius. +func EntryToExternal(entry *BlastRadiusEntry) *impact.ExternalBlastRadius { + ebr := &impact.ExternalBlastRadius{RiskLevel: entry.RiskLevel} + for _, di := range entry.DirectItems { + ebr.DirectItems = append(ebr.DirectItems, impact.ExternalItem{ + FileURI: di.FileURI, SymbolURI: di.SymbolURI, + Distance: di.Distance, Confidence: di.Confidence, + }) + } + for _, ti := range entry.TransitiveItems { + ebr.TransitiveItems = append(ebr.TransitiveItems, impact.ExternalItem{ + FileURI: ti.FileURI, SymbolURI: ti.SymbolURI, + Distance: ti.Distance, Confidence: ti.Confidence, + }) + } + for _, si := range entry.SemanticItems { + ebr.SemanticItems = append(ebr.SemanticItems, impact.ExternalSemanticItem{ + FileURI: si.FileURI, SymbolURI: si.SymbolURI, + Similarity: si.Similarity, Source: si.Source, + }) + } + return ebr } + diff --git a/internal/query/impact.go b/internal/query/impact.go index cc9a5a0f..748d31e0 100644 --- a/internal/query/impact.go +++ b/internal/query/impact.go @@ -452,19 +452,36 @@ func (e *Engine) AnalyzeImpact(ctx context.Context, opts AnalyzeImpactOptions) ( docsToUpdate = e.getDocsToUpdate(symbolInfo.StableId, 5) } - // Convert blast radius + // Convert blast radius, then enrich with LIP semantic coupling when available. var blastRadius *BlastRadiusSummary if result.BlastRadius != nil { + // LIP enrichment: one round-trip for the symbol's file, keyed by symbol name. + // Gated on capability so older daemons see no change. + enriched := result.BlastRadius + if e.lipSupports("query_blast_radius_batch") && symbolInfo != nil && symbolInfo.Location != nil { + fileURI := "lip://local/" + symbolInfo.Location.FileId + if lipEntries, _ := lip.QueryBlastRadiusBatch([]string{fileURI}, 0.6); lipEntries != nil { + converted := make(map[string]*impact.ExternalBlastRadius, len(lipEntries)) + for k, v := range lipEntries { + vCopy := v + converted[k] = lip.EntryToExternal(&vCopy) + } + if ext, ok := lip.LookupSymbol(converted, symbolInfo.Location.FileId, symbolInfo.Name); ok { + enriched = impact.MergeBlastRadius(result.BlastRadius, ext) + } + } + } + blastRadius = &BlastRadiusSummary{ - ModuleCount: result.BlastRadius.ModuleCount, - FileCount: result.BlastRadius.FileCount, - UniqueCallerCount: result.BlastRadius.UniqueCallerCount, - RiskLevel: result.BlastRadius.RiskLevel, - StaticCallerCount: result.BlastRadius.StaticCallerCount, - SemanticCallerCount: result.BlastRadius.SemanticCallerCount, - ConfirmedCount: result.BlastRadius.ConfirmedCount, - } - for _, sc := range result.BlastRadius.SemanticCallers { + ModuleCount: enriched.ModuleCount, + FileCount: enriched.FileCount, + UniqueCallerCount: enriched.UniqueCallerCount, + RiskLevel: enriched.RiskLevel, + StaticCallerCount: enriched.StaticCallerCount, + SemanticCallerCount: enriched.SemanticCallerCount, + ConfirmedCount: enriched.ConfirmedCount, + } + for _, sc := range enriched.SemanticCallers { blastRadius.SemanticCallers = append(blastRadius.SemanticCallers, SemanticCallerInfo{ SymbolURI: sc.SymbolURI, FileURI: sc.FileURI, diff --git a/internal/query/review_blastradius.go b/internal/query/review_blastradius.go index 5786bf37..60bd58cf 100644 --- a/internal/query/review_blastradius.go +++ b/internal/query/review_blastradius.go @@ -32,13 +32,19 @@ func (e *Engine) checkBlastRadius(ctx context.Context, changedFiles []string, op // Prefetch LIP blast radius for all changed files in a single round-trip. // Returns nil when LIP is unavailable or doesn't support the message — the // rest of the function degrades to SCIP-only blast radius unchanged. - var lipBR map[string]lip.BlastRadiusEntry + var lipBR map[string]*impact.ExternalBlastRadius if e.lipSupports("query_blast_radius_batch") { lipURIs := make([]string, len(changedFiles)) for i, f := range changedFiles { lipURIs[i] = "lip://local/" + f } - lipBR, _ = lip.QueryBlastRadiusBatch(lipURIs, 0.6) + if raw, _ := lip.QueryBlastRadiusBatch(lipURIs, 0.6); raw != nil { + lipBR = make(map[string]*impact.ExternalBlastRadius, len(raw)) + for k, v := range raw { + vCopy := v + lipBR[k] = lip.EntryToExternal(&vCopy) + } + } } // Collect symbols from changed files, cap at 30 total. @@ -102,8 +108,7 @@ func (e *Engine) checkBlastRadius(ctx context.Context, changedFiles []string, op // "lip://local/#" convention. semanticCount := 0 if lipBR != nil { - if entry, ok := lipBRLookup(lipBR, sym.stableId, sym.file, sym.name); ok { - enriched := lipEntryToExternal(&entry) + if enriched, ok := lip.LookupSymbol(lipBR, sym.file, sym.name); ok { // Convert BlastRadiusSummary → impact.BlastRadius for merge staticBR := &impact.BlastRadius{ ModuleCount: impactResp.BlastRadius.ModuleCount, @@ -264,56 +269,3 @@ func isFrameworkSymbol(kind, name, file string) bool { return false } -// lipBRLookup finds a LIP blast radius entry for a CKB symbol. LIP keys entries -// by "lip://local/#" — we try that convention first, then -// fall back to scanning entries whose file prefix matches. -func lipBRLookup(lipBR map[string]lip.BlastRadiusEntry, stableId, file, name string) (lip.BlastRadiusEntry, bool) { - // Primary: exact match on lip://local/# - key := "lip://local/" + file + "#" + name - if entry, ok := lipBR[key]; ok { - return entry, true - } - // Fallback: scan for entries whose symbol_uri contains our file path. - // This handles cases where LIP's symbol naming diverges from CKB's stable IDs - // (common with C++ mangled names, template specialisations). - prefix := "lip://local/" + file + "#" - for uri, entry := range lipBR { - if strings.HasPrefix(uri, prefix) && strings.Contains(uri, name) { - return entry, true - } - } - return lip.BlastRadiusEntry{}, false -} - -// lipEntryToExternal converts a LIP BlastRadiusEntry to the impact package's -// ExternalBlastRadius for use with impact.MergeBlastRadius. -func lipEntryToExternal(entry *lip.BlastRadiusEntry) *impact.ExternalBlastRadius { - ebr := &impact.ExternalBlastRadius{ - RiskLevel: entry.RiskLevel, - } - for _, di := range entry.DirectItems { - ebr.DirectItems = append(ebr.DirectItems, impact.ExternalItem{ - FileURI: di.FileURI, - SymbolURI: di.SymbolURI, - Distance: di.Distance, - Confidence: di.Confidence, - }) - } - for _, ti := range entry.TransitiveItems { - ebr.TransitiveItems = append(ebr.TransitiveItems, impact.ExternalItem{ - FileURI: ti.FileURI, - SymbolURI: ti.SymbolURI, - Distance: ti.Distance, - Confidence: ti.Confidence, - }) - } - for _, si := range entry.SemanticItems { - ebr.SemanticItems = append(ebr.SemanticItems, impact.ExternalSemanticItem{ - FileURI: si.FileURI, - SymbolURI: si.SymbolURI, - Similarity: si.Similarity, - Source: si.Source, - }) - } - return ebr -} From 1ce495bdb45ae928f642111f97b8edcd05f5a405 Mon Sep 17 00:00:00 2001 From: Lisa Date: Tue, 21 Apr 2026 00:59:14 +0200 Subject: [PATCH 04/20] fix(impact): drop unused CouplingStatic constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CouplingStatic was defined but never referenced — SCIP-only callers go into UniqueCallerCount, not EnrichedCallers. Flagged by ckb review dead-code check on develop. Co-Authored-By: Claude Sonnet 4.6 --- internal/impact/types.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/internal/impact/types.go b/internal/impact/types.go index f0109edb..2202993a 100644 --- a/internal/impact/types.go +++ b/internal/impact/types.go @@ -62,9 +62,8 @@ type Reference struct { type CouplingTier string const ( - CouplingStatic CouplingTier = "static" // SCIP call graph — high certainty - CouplingSemantic CouplingTier = "semantic" // LIP embedding similarity — lower certainty - CouplingBoth CouplingTier = "both" // confirmed by both SCIP and LIP + CouplingSemantic CouplingTier = "semantic" // LIP embedding similarity — lower certainty + CouplingBoth CouplingTier = "both" // confirmed by both SCIP and LIP ) // EnrichedCaller is a caller discovered by either static analysis or semantic similarity. From 78dbdb7c80dcbf9569a67fb8cbfb4cfb46699eef Mon Sep 17 00:00:00 2001 From: Lisa Date: Tue, 21 Apr 2026 15:36:13 +0200 Subject: [PATCH 05/20] feat(mcp): renderArchitecture tool + tree-sitter symbol fix Add `renderArchitecture` MCP tool that returns the project's module-level import graph as Mermaid or Graphviz (DOT), ready to paste into IDEs that render Mermaid inline. With `focus` set, returns an undirected BFS neighborhood around the anchor module; without, returns the top-N most-connected nodes. `truncated: true` flags when the node cap kicked in. Wire the tool end-to-end: Go binding `RenderArchitecture` in `internal/cartographer/bridge.go` (+ no-op stub), schema + registration in `internal/mcp/tools.go`, handler in `tool_impls_v86.go`. Vendored `lib.rs` gets a surgical patch (`mod diagram;` + FFI export) and the regenerated `cartographer.h`; new `src/diagram.rs` holds the shared renderer used by CLI and FFI. Fix link-time duplicate-symbol collisions with `go-tree-sitter` under `-tags cartographer`. `make build-cartographer` now runs the vendored `scripts/localize-tree-sitter-symbols.sh` after cargo's release build: it partial-links archive members so Cartographer's internal ts_*/tree_sitter_* references resolve within the archive, then marks those symbols local. cartographer_* FFI exports stay global. Beyond unblocking the build, this prevents a silent memory-corruption class of bug where Cartographer's Rust code could have bound to the consumer's tree-sitter copy at global resolution if the two versions' struct layouts ever drifted. Also relocate `ErrUnavailable` from `bridge_stub.go` into `types.go` so it's visible under both tag variants (the tagged bridge references it unconditionally). Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 29 ++ Makefile | 2 + internal/cartographer/bridge.go | 34 ++ internal/cartographer/bridge_stub.go | 8 +- internal/cartographer/types.go | 21 + internal/mcp/tool_impls_v86.go | 39 ++ internal/mcp/tools.go | 30 ++ .../cartographer/include/cartographer.h | 32 +- .../scripts/localize-tree-sitter-symbols.sh | 95 ++++ .../scripts/tests/test-localize-symbols.sh | 81 +++ .../mapper-core/cartographer/src/diagram.rs | 481 ++++++++++++++++++ .../mapper-core/cartographer/src/lib.rs | 109 +++- 12 files changed, 954 insertions(+), 7 deletions(-) create mode 100755 third_party/cartographer/mapper-core/cartographer/scripts/localize-tree-sitter-symbols.sh create mode 100755 third_party/cartographer/mapper-core/cartographer/scripts/tests/test-localize-symbols.sh create mode 100644 third_party/cartographer/mapper-core/cartographer/src/diagram.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index cd66ac24..c6b641bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,35 @@ All notable changes to CKB will be documented in this file. ## [Unreleased] +### Added + +- **`renderArchitecture` MCP tool** — returns the project's module-level + import graph as Mermaid or Graphviz (DOT), ready to paste into IDEs + that render Mermaid inline (Cursor, Claude Desktop, VS Code markdown + preview, GitHub). With `focus` set, returns an undirected BFS + neighborhood around the anchor module to `depth` (default 2); without, + returns the top-N most-connected nodes (default cap 40). Response + includes `truncated: true` when the node cap kicked in. Backed by the + new `cartographer_render_architecture` FFI export; CLI and MCP outputs + are produced by the same shared renderer. +- Go binding `cartographer.RenderArchitecture()` in `internal/cartographer/bridge.go` (+ no-op stub for the no-tag build). + +### Fixed + +- **Tree-sitter symbol collisions at link time** — `libcartographer.a` + previously exported its bundled tree-sitter runtime and grammar + symbols, which collided with `go-tree-sitter` when building CKB with + `-tags cartographer` (`ld: 246 duplicate symbols`). `make build-cartographer` + now post-processes the archive via + `scripts/localize-tree-sitter-symbols.sh` (vendored under + `third_party/cartographer/mapper-core/cartographer/scripts/`), which + partial-links archive members into one combined object and localizes + `ts_*` / `tree_sitter_*`. `cartographer_*` FFI exports stay global. + Beyond the duplicate-symbol error, this also rules out a silent + memory-corruption class of bug where Cartographer's Rust code could + have bound to the consumer's tree-sitter copy at global resolution + time if the two versions' struct layouts ever drifted. + ## [9.1.0] - 2026-04-16 ### Added diff --git a/Makefile b/Makefile index b6144073..84f7fbe2 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,8 @@ build: build-cartographer build-cartographer: @echo "Building Cartographer static library..." @cd $(CARTOGRAPHER_DIR) && cargo build --release + @echo "Localizing tree-sitter symbols (prevents link-time collisions with go-tree-sitter)..." + @cd $(CARTOGRAPHER_DIR) && scripts/localize-tree-sitter-symbols.sh target/release/libcartographer.a @echo "Library: $(CARTOGRAPHER_LIB)" ## Build without Cartographer (no Rust toolchain required — for CI and contributors) diff --git a/internal/cartographer/bridge.go b/internal/cartographer/bridge.go index 3ae6964b..df99288b 100644 --- a/internal/cartographer/bridge.go +++ b/internal/cartographer/bridge.go @@ -647,3 +647,37 @@ func ContextHealth(content string, opts *ContextHealthOpts) (*ContextHealthRepor } return &result, nil } + +// RenderArchitecture renders the project's import graph as a Mermaid or +// Graphviz (DOT) diagram. focus is an optional module_id or path suffix — +// when set, the diagram is a BFS neighborhood of that module up to `depth`; +// when empty, it's the top-N nodes by degree. depth=0 → 2, maxNodes=0 → 40. +func RenderArchitecture(path, format, focus string, depth, maxNodes uint32) (*RenderArchitectureResult, error) { + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + var cFormat *C.char + if format != "" { + cFormat = C.CString(format) + defer C.free(unsafe.Pointer(cFormat)) + } + + var cFocus *C.char + if focus != "" { + cFocus = C.CString(focus) + defer C.free(unsafe.Pointer(cFocus)) + } + + resp, err := callFFI(func() *C.char { + return C.cartographer_render_architecture(cPath, cFormat, cFocus, C.uint(depth), C.uint(maxNodes)) + }) + if err != nil { + return nil, err + } + + var result RenderArchitectureResult + if err := json.Unmarshal(resp.Data, &result); err != nil { + return nil, &CartographerError{err.Error()} + } + return &result, nil +} diff --git a/internal/cartographer/bridge_stub.go b/internal/cartographer/bridge_stub.go index d77617d3..c44c89b7 100644 --- a/internal/cartographer/bridge_stub.go +++ b/internal/cartographer/bridge_stub.go @@ -5,11 +5,6 @@ // All functions return ErrUnavailable; callers should check Available() first. package cartographer -import "errors" - -// ErrUnavailable is returned by all functions when Cartographer is not compiled in. -var ErrUnavailable = errors.New("cartographer: not compiled in this build (use -tags cartographer)") - // Available reports whether the Cartographer library is linked into this binary. func Available() bool { return false } @@ -52,3 +47,6 @@ func QueryContext(_, _ string, _ *QueryContextOpts) (*QueryContextResult, error) func ShotgunSurgery(_ string, _, _ uint32) ([]ShotgunSurgeryEntry, error) { return nil, ErrUnavailable } func Evolution(_ string, _ uint32) (*EvolutionResult, error) { return nil, ErrUnavailable } func BlastRadius(_, _ string, _ uint32) (*BlastRadiusResult, error) { return nil, ErrUnavailable } +func RenderArchitecture(_, _, _ string, _, _ uint32) (*RenderArchitectureResult, error) { + return nil, ErrUnavailable +} diff --git a/internal/cartographer/types.go b/internal/cartographer/types.go index 6b67dd03..7b8b68f1 100644 --- a/internal/cartographer/types.go +++ b/internal/cartographer/types.go @@ -1,6 +1,13 @@ // Package cartographer provides CGo bindings to the Rust Cartographer library. package cartographer +import "errors" + +// ErrUnavailable is returned by stub builds when Cartographer is not compiled +// in. Callers check Available() first; under `-tags cartographer` this value +// is never returned but still needs to be referenceable by tool impls. +var ErrUnavailable = errors.New("cartographer: not compiled in this build (use -tags cartographer)") + // --------------------------------------------------------------------------- // Public types (shared between real bridge and stub builds) // --------------------------------------------------------------------------- @@ -503,3 +510,17 @@ type BlastRadiusRelated struct { } // --------------------------------------------------------------------------- +// Render architecture types +// --------------------------------------------------------------------------- + +// RenderArchitectureResult is returned by RenderArchitecture. +// Truncated is true when the node cap kicked in — callers should tighten +// focus/depth or raise maxNodes to see more. +type RenderArchitectureResult struct { + Diagram string `json:"diagram"` + Truncated bool `json:"truncated"` + Format string `json:"format"` // "mermaid" | "dot" + NodeCount int `json:"nodeCount"` +} + +// --------------------------------------------------------------------------- diff --git a/internal/mcp/tool_impls_v86.go b/internal/mcp/tool_impls_v86.go index 35c42213..eab8968e 100644 --- a/internal/mcp/tool_impls_v86.go +++ b/internal/mcp/tool_impls_v86.go @@ -112,6 +112,45 @@ func (s *MCPServer) toolGetArchitecturalEvolution(params map[string]interface{}) return NewToolResponse().Data(result).Build(), nil } +// toolRenderArchitecture renders the project's import graph as a Mermaid or +// Graphviz (DOT) diagram, suitable for inline rendering in IDEs that support +// Mermaid (Cursor, Claude Desktop, VS Code markdown preview, GitHub). +// +// With `focus` set, returns a BFS neighborhood of the given module. Without, +// returns the top-N most-connected nodes. `truncated=true` in the response +// signals that the node cap kicked in. +func (s *MCPServer) toolRenderArchitecture(params map[string]interface{}) (*envelope.Response, error) { + if !cartographer.Available() { + return nil, errors.NewOperationError("render architecture", cartographer.ErrUnavailable) + } + + format, _ := params["format"].(string) + if format == "" { + format = "mermaid" + } + if format != "mermaid" && format != "dot" { + return nil, errors.NewInvalidParameterError("format", "must be \"mermaid\" or \"dot\"") + } + + focus, _ := params["focus"].(string) + + var depth, maxNodes uint32 + if v, ok := params["depth"].(float64); ok && v > 0 { + depth = uint32(v) + } + if v, ok := params["max_nodes"].(float64); ok && v > 0 { + maxNodes = uint32(v) + } + + repoRoot := s.engine().GetRepoRoot() + result, err := cartographer.RenderArchitecture(repoRoot, format, focus, depth, maxNodes) + if err != nil { + return nil, errors.NewOperationError("render architecture", err) + } + + return NewToolResponse().Data(result).Build(), nil +} + // toolGetBlastRadius returns the graph-theoretic blast radius for a module/file. func (s *MCPServer) toolGetBlastRadius(params map[string]interface{}) (*envelope.Response, error) { if !cartographer.Available() { diff --git a/internal/mcp/tools.go b/internal/mcp/tools.go index 01793150..5e84eb3e 100644 --- a/internal/mcp/tools.go +++ b/internal/mcp/tools.go @@ -1963,6 +1963,35 @@ func (s *MCPServer) GetToolDefinitions() []Tool { "required": []string{"target"}, }, }, + { + Name: "renderArchitecture", + Description: "Render the project's module-level import graph as a Mermaid or Graphviz (DOT) diagram, ready to paste into IDEs that render Mermaid inline (Cursor, Claude Desktop, VS Code markdown preview, GitHub). With `focus` set, returns a BFS neighborhood (both imports and imported-by) around the given module to depth `depth`; without `focus`, returns the top-N most-connected nodes as an at-a-glance shape of the codebase. Response includes `truncated: true` when the node cap kicked in — tighten focus or lower depth to get a complete view.", + InputSchema: map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "format": map[string]interface{}{ + "type": "string", + "description": "Output diagram format (default mermaid)", + "enum": []string{"mermaid", "dot"}, + "default": "mermaid", + }, + "focus": map[string]interface{}{ + "type": "string", + "description": "Optional anchor: module ID, repo-relative file path, or path suffix (e.g. 'server.rs'). When set, the diagram is a BFS neighborhood around this node; when absent, returns the top-N most-connected nodes.", + }, + "depth": map[string]interface{}{ + "type": "integer", + "description": "BFS depth from `focus` over undirected import edges (default 2). Ignored when `focus` is absent.", + "default": 2, + }, + "max_nodes": map[string]interface{}{ + "type": "integer", + "description": "Cap on nodes rendered; response sets `truncated: true` if the cap was hit (default 40).", + "default": 40, + }, + }, + }, + }, { Name: "queryContext", Description: "Retrieve the most relevant code context for a task or question. Runs Cartographer's PKG retrieval pipeline: BM25 content search → personalized PageRank skeleton → context health scoring. Returns a ready-to-use context bundle with token count and A–F quality grade. Use this before starting any non-trivial coding task.", @@ -2826,6 +2855,7 @@ func (s *MCPServer) RegisterTools() { s.tools["detectShotgunSurgery"] = s.toolDetectShotgunSurgery s.tools["getArchitecturalEvolution"] = s.toolGetArchitecturalEvolution s.tools["getBlastRadius"] = s.toolGetBlastRadius + s.tools["renderArchitecture"] = s.toolRenderArchitecture // v9.0 LIP symbol annotations s.tools["annotationSet"] = s.toolAnnotationSet s.tools["annotationGet"] = s.toolAnnotationGet diff --git a/third_party/cartographer/mapper-core/cartographer/include/cartographer.h b/third_party/cartographer/mapper-core/cartographer/include/cartographer.h index 8c65594c..cbc39e7d 100644 --- a/third_party/cartographer/mapper-core/cartographer/include/cartographer.h +++ b/third_party/cartographer/mapper-core/cartographer/include/cartographer.h @@ -378,7 +378,7 @@ char *cartographer_search_content(const char *path, const char *pattern, const c * * Parameters: * - `path` – absolute path to repo root (UTF-8 C string) - * - `pattern` – glob pattern, e.g. `"*.rs"` or `"src/**/*.go"` (C string) + * - `pattern` – glob pattern, e.g. `"*.rs"` or `"src/subdir/*.go"` (C string) * - `limit` – max files to return; 0 = unlimited * - `opts_json` – optional JSON `FindOptions` or null for defaults: * `{ modifiedSinceSecs, newerThan, minSizeBytes, maxSizeBytes, maxDepth, noIgnore }` @@ -641,4 +641,34 @@ char *cartographer_query_context(const char *path, const char *query, const char */ char *cartographer_shotgun_surgery(const char *path, uint32_t limit, uint32_t min_partners); +/** + * Render the project's import graph as a Mermaid or Graphviz (DOT) diagram. + * + * Inputs: + * `path` — project root (C string) + * `format` — "mermaid" or "dot" (C string; may be null → "mermaid") + * `focus` — optional module_id or path to anchor BFS on (C string, may + * be null → top-N by degree) + * `depth` — BFS depth when `focus` is set (0 → 2; ignored without focus) + * `max_nodes` — cap on nodes in the output (0 → 40) + * + * Response shape: + * ```json + * { + * "ok": true, + * "data": { + * "diagram": "graph TD\n N0[...] --> N1[...]\n...", + * "truncated": false, + * "format": "mermaid", + * "nodeCount": 23 + * } + * } + * ``` + */ +char *cartographer_render_architecture(const char *path, + const char *format, + const char *focus, + uint32_t depth, + uint32_t max_nodes); + #endif /* CARTOGRAPHER_H */ diff --git a/third_party/cartographer/mapper-core/cartographer/scripts/localize-tree-sitter-symbols.sh b/third_party/cartographer/mapper-core/cartographer/scripts/localize-tree-sitter-symbols.sh new file mode 100755 index 00000000..7da03e16 --- /dev/null +++ b/third_party/cartographer/mapper-core/cartographer/scripts/localize-tree-sitter-symbols.sh @@ -0,0 +1,95 @@ +#!/usr/bin/env bash +# +# Post-process libcartographer.a so its bundled tree-sitter runtime and +# grammar symbols are internal, not externally visible. Consumers that also +# link tree-sitter (e.g. Go projects using github.com/smacker/go-tree-sitter) +# would otherwise trip "duplicate symbol" errors at link time and — worse — +# risk Cartographer's Rust code binding to the consumer's tree-sitter copy +# if the linker resolved `ts_*` cross-archive. If the two tree-sitter +# versions drift, that route produces silent memory corruption. +# +# Approach: partial-link every .o inside the archive into one combined +# relocatable object so Cartographer's internal ts_*/tree_sitter_* refs +# resolve within the archive, then mark those symbols local so they no +# longer participate in global symbol resolution. Only the cartographer_* +# FFI entry points stay exported. +# +# Requires a C compiler whose linker supports `-r`, an `ar`, and an +# objcopy-style tool. `rust-objcopy` from rustup's llvm-tools-preview +# component works on both Linux (ELF) and macOS (Mach-O). +# +# Usage: localize-tree-sitter-symbols.sh + +set -euo pipefail + +ARCHIVE="${1:?usage: $0 }" +case "$ARCHIVE" in + /*) ARCHIVE_ABS="$ARCHIVE" ;; + *) ARCHIVE_ABS="$PWD/$ARCHIVE" ;; +esac + +if [[ ! -f "$ARCHIVE_ABS" ]]; then + echo "error: archive not found: $ARCHIVE_ABS" >&2 + exit 1 +fi + +pick() { + for c in "$@"; do + if command -v "$c" >/dev/null 2>&1; then echo "$c"; return 0; fi + done + return 1 +} + +# `rust-objcopy` ships in the target-specific rustlib bin dir and is not on +# PATH by default; probe it via rustc before falling through to system tools. +OBJCOPY="" +if command -v rustc >/dev/null 2>&1; then + RUST_BINDIR="$(rustc --print target-libdir 2>/dev/null)/../bin" + if [[ -x "$RUST_BINDIR/rust-objcopy" ]]; then + OBJCOPY="$RUST_BINDIR/rust-objcopy" + fi +fi +if [[ -z "$OBJCOPY" ]]; then + OBJCOPY="$(pick rust-objcopy llvm-objcopy objcopy)" || { + echo "error: no objcopy tool found (tried rust-objcopy, llvm-objcopy, objcopy)" >&2 + echo "hint: rustup component add llvm-tools-preview" >&2 + exit 1 + } +fi +CC="$(pick cc clang gcc)" || { echo "error: no C compiler found" >&2; exit 1; } +AR="$(pick llvm-ar ar)" || { echo "error: no ar found" >&2; exit 1; } + +# Mach-O symbol names carry a leading underscore; ELF does not. +case "$(uname -s)" in + Darwin) UPREFIX="_" ;; + *) UPREFIX="" ;; +esac + +WORK="$(mktemp -d)" +trap 'rm -rf "$WORK"' EXIT + +cp "$ARCHIVE_ABS" "$WORK/input.a" +( + cd "$WORK" + "$AR" x input.a + rm input.a + + # Partial link (`ld -r`) merges every member into a single relocatable + # object. `-nostdlib` prevents clang/gcc from pulling in CRT or libSystem. + "$CC" -nostdlib -Wl,-r -o combined.o ./*.o + + # Localize tree-sitter runtime (`ts_*`) and grammar init symbols + # (`tree_sitter_`, plus internal `tree_sitter__external_scanner_*` + # helpers). Safe now that the combined object resolved internal refs. + "$OBJCOPY" \ + --wildcard \ + --localize-symbol="${UPREFIX}ts_*" \ + --localize-symbol="${UPREFIX}tree_sitter_*" \ + combined.o + + # Replace the archive with just the combined, localized object. + rm -f "$ARCHIVE_ABS" + "$AR" rcs "$ARCHIVE_ABS" combined.o +) + +echo "localized tree-sitter symbols in: $ARCHIVE_ABS" diff --git a/third_party/cartographer/mapper-core/cartographer/scripts/tests/test-localize-symbols.sh b/third_party/cartographer/mapper-core/cartographer/scripts/tests/test-localize-symbols.sh new file mode 100755 index 00000000..56de9e62 --- /dev/null +++ b/third_party/cartographer/mapper-core/cartographer/scripts/tests/test-localize-symbols.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash +# +# Smoke test for scripts/localize-tree-sitter-symbols.sh. +# +# Builds a small static archive that mirrors libcartographer.a's symbol +# shape — a tree-sitter runtime object, a grammar object, and a wrapper +# object that references them and exposes cartographer_* entry points — +# runs the script, and asserts ts_*/tree_sitter_* are no longer global +# while cartographer_* still is. + +set -euo pipefail + +HERE="$(cd "$(dirname "$0")" && pwd)" +SCRIPT="$HERE/../localize-tree-sitter-symbols.sh" + +CC="${CC:-cc}" +AR="${AR:-ar}" +NM="${NM:-nm}" + +WORK="$(mktemp -d)" +trap 'rm -rf "$WORK"' EXIT +cd "$WORK" + +cat > runtime.c <<'EOF' +int ts_parser_new(void) { return 42; } +int ts_tree_root_node(int x) { return x + 1; } +EOF + +cat > grammar.c <<'EOF' +int tree_sitter_rust(void) { return 7; } +EOF + +cat > wrapper.c <<'EOF' +extern int ts_parser_new(void); +extern int tree_sitter_rust(void); +int cartographer_version(void) { return ts_parser_new() + tree_sitter_rust(); } +int cartographer_render_architecture(void) { return 0; } +EOF + +"$CC" -c -fPIC runtime.c -o runtime.o +"$CC" -c -fPIC grammar.c -o grammar.o +"$CC" -c -fPIC wrapper.c -o wrapper.o +"$AR" rcs libfixture.a runtime.o grammar.o wrapper.o + +# Mach-O prepends an underscore to C symbol names; ELF does not. +case "$(uname -s)" in + Darwin) U=_ ;; + *) U= ;; +esac + +fail() { echo "FAIL: $*" >&2; exit 1; } + +# Pre-condition: baseline archive exposes ts_* and tree_sitter_* as globals. +"$NM" -g runtime.o | grep -qE " T ${U}ts_parser_new\$" \ + || fail "baseline: ${U}ts_parser_new should be global in runtime.o" +"$NM" -g grammar.o | grep -qE " T ${U}tree_sitter_rust\$" \ + || fail "baseline: ${U}tree_sitter_rust should be global in grammar.o" + +"$SCRIPT" libfixture.a >/dev/null + +# After localization: archive should contain exactly combined.o. +rm -f runtime.o grammar.o wrapper.o +"$AR" x libfixture.a +[[ -f combined.o ]] || fail "expected combined.o inside archive after localization" + +GLOBAL_TS="$("$NM" -g combined.o | grep -cE " T ${U}ts_" || true)" +GLOBAL_TSL="$("$NM" -g combined.o | grep -cE " T ${U}tree_sitter_" || true)" +GLOBAL_CARTO="$("$NM" -g combined.o | grep -cE " T ${U}cartographer_" || true)" + +[[ "$GLOBAL_TS" -eq 0 ]] || fail "ts_* still global ($GLOBAL_TS)" +[[ "$GLOBAL_TSL" -eq 0 ]] || fail "tree_sitter_* still global ($GLOBAL_TSL)" +[[ "$GLOBAL_CARTO" -ge 2 ]] || fail "cartographer_* lost exports (got $GLOBAL_CARTO, want >= 2)" + +# And the localized symbols should still be present as local (t), i.e. the +# definitions weren't stripped — just made invisible to the global resolver. +LOCAL_TS="$("$NM" combined.o | grep -cE " t ${U}ts_" || true)" +LOCAL_TSL="$("$NM" combined.o | grep -cE " t ${U}tree_sitter_" || true)" +[[ "$LOCAL_TS" -ge 1 ]] || fail "ts_* definitions missing post-localization" +[[ "$LOCAL_TSL" -ge 1 ]] || fail "tree_sitter_* definitions missing post-localization" + +echo "PASS: ts_* and tree_sitter_* localized; cartographer_* still exported ($GLOBAL_CARTO symbols)" diff --git a/third_party/cartographer/mapper-core/cartographer/src/diagram.rs b/third_party/cartographer/mapper-core/cartographer/src/diagram.rs new file mode 100644 index 00000000..04aa6f2e --- /dev/null +++ b/third_party/cartographer/mapper-core/cartographer/src/diagram.rs @@ -0,0 +1,481 @@ +//! Shared diagram renderer for the import graph. +//! +//! Produces Mermaid or Graphviz (DOT) from a `ProjectGraphResponse`, with two +//! node-selection modes: +//! - No focus → top-N nodes by degree (fallback; "shape of the codebase"). +//! - With focus → BFS from anchor module over import edges up to `depth` +//! ("shape of the neighborhood I'm editing"). +//! +//! Used by both the CLI (`diagram_mode`) and the FFI +//! (`cartographer_render_architecture`) so CLI output and MCP output stay in +//! lock-step. + +use std::collections::{HashMap, HashSet, VecDeque}; + +use crate::api::ProjectGraphResponse; + +/// Output format requested by the caller. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DiagramFormat { + Mermaid, + Dot, +} + +impl DiagramFormat { + pub fn parse(s: &str) -> Result { + match s.to_lowercase().as_str() { + "mermaid" | "" => Ok(DiagramFormat::Mermaid), + "dot" | "graphviz" => Ok(DiagramFormat::Dot), + other => Err(format!("unknown diagram format: {other}")), + } + } +} + +/// Rendering options. `focus` is a module_id (or suffix match on a path/module_id). +#[derive(Debug, Clone)] +pub struct RenderOptions<'a> { + pub format: DiagramFormat, + pub focus: Option<&'a str>, + pub depth: usize, + pub max_nodes: usize, +} + +/// Rendered diagram plus a truncation flag so callers can tell the model to +/// tighten `focus` or lower `depth` when the cap kicked in. +#[derive(Debug, Clone)] +pub struct RenderedDiagram { + pub diagram: String, + pub truncated: bool, + pub node_count: usize, +} + +/// Render an import-graph diagram. Pure over `graph` — no I/O. +pub fn render(graph: &ProjectGraphResponse, opts: &RenderOptions) -> Result { + let max_nodes = opts.max_nodes.max(1); + + let (included, truncated) = match opts.focus { + Some(anchor) => bfs_from_anchor(graph, anchor, opts.depth, max_nodes)?, + None => top_by_degree(graph, max_nodes), + }; + + let included_set: HashSet<&str> = included.iter().map(|s| s.as_str()).collect(); + + // Map module_id -> node for stable lookup during rendering. + let node_by_id: HashMap<&str, &crate::api::GraphNode> = graph + .nodes + .iter() + .map(|n| (n.module_id.as_str(), n)) + .collect(); + + let content = match opts.format { + DiagramFormat::Dot => render_dot(&included, &included_set, &node_by_id, graph), + DiagramFormat::Mermaid => render_mermaid(&included, &included_set, &node_by_id, graph), + }; + + Ok(RenderedDiagram { diagram: content, truncated, node_count: included.len() }) +} + +fn top_by_degree(graph: &ProjectGraphResponse, max_nodes: usize) -> (Vec, bool) { + let mut degree: HashMap<&str, usize> = HashMap::new(); + for edge in &graph.edges { + *degree.entry(edge.source.as_str()).or_insert(0) += 1; + *degree.entry(edge.target.as_str()).or_insert(0) += 1; + } + + let mut ranked: Vec<&crate::api::GraphNode> = graph + .nodes + .iter() + .filter(|n| degree.get(n.module_id.as_str()).copied().unwrap_or(0) > 0) + .collect(); + ranked.sort_by(|a, b| { + let da = degree.get(a.module_id.as_str()).copied().unwrap_or(0); + let db = degree.get(b.module_id.as_str()).copied().unwrap_or(0); + db.cmp(&da) + .then_with(|| a.module_id.cmp(&b.module_id)) + }); + + let truncated = ranked.len() > max_nodes; + ranked.truncate(max_nodes); + + (ranked.into_iter().map(|n| n.module_id.clone()).collect(), truncated) +} + +/// Undirected BFS from `anchor` over import edges. We treat imports as +/// bidirectional here because "the area I'm editing" includes both what I +/// import *and* what imports me — callers usually want the full neighborhood. +fn bfs_from_anchor( + graph: &ProjectGraphResponse, + anchor: &str, + depth: usize, + max_nodes: usize, +) -> Result<(Vec, bool), String> { + // Resolve anchor: accept exact module_id match, then path suffix match. + let resolved = graph + .nodes + .iter() + .find(|n| n.module_id == anchor) + .or_else(|| graph.nodes.iter().find(|n| n.path == anchor)) + .or_else(|| graph.nodes.iter().find(|n| n.module_id.ends_with(anchor) || n.path.ends_with(anchor))) + .ok_or_else(|| format!("focus not found in graph: {anchor}"))?; + + let start = resolved.module_id.clone(); + + // Build an adjacency map (undirected) once. + let mut adj: HashMap<&str, Vec<&str>> = HashMap::new(); + for edge in &graph.edges { + adj.entry(edge.source.as_str()).or_default().push(edge.target.as_str()); + adj.entry(edge.target.as_str()).or_default().push(edge.source.as_str()); + } + + let mut visited: HashSet = HashSet::new(); + let mut order: Vec = Vec::new(); + let mut queue: VecDeque<(String, usize)> = VecDeque::new(); + + visited.insert(start.clone()); + order.push(start.clone()); + queue.push_back((start, 0)); + + let mut truncated = false; + + while let Some((module, d)) = queue.pop_front() { + if d >= depth { + continue; + } + if let Some(neighbors) = adj.get(module.as_str()) { + for &n in neighbors { + if visited.insert(n.to_string()) { + if order.len() >= max_nodes { + truncated = true; + // Drain the queue so we stop adding further frontier nodes. + queue.clear(); + break; + } + order.push(n.to_string()); + queue.push_back((n.to_string(), d + 1)); + } + } + } + } + + Ok((order, truncated)) +} + +fn render_dot( + included: &[String], + included_set: &HashSet<&str>, + node_by_id: &HashMap<&str, &crate::api::GraphNode>, + graph: &ProjectGraphResponse, +) -> String { + let mut out = String::from("digraph cartographer {\n rankdir=LR;\n"); + for module_id in included { + let Some(node) = node_by_id.get(module_id.as_str()) else { continue }; + let label = node.path.rsplit('/').next().unwrap_or(&node.path); + let color = role_color_dot(node.role.as_deref()); + out.push_str(&format!( + " \"{}\" [label=\"{}\\n{} fn\" shape=box style=filled fillcolor=\"{}\"];\n", + node.module_id, label, node.signature_count, color + )); + } + for edge in &graph.edges { + if included_set.contains(edge.source.as_str()) && included_set.contains(edge.target.as_str()) { + out.push_str(&format!(" \"{}\" -> \"{}\";\n", edge.source, edge.target)); + } + } + out.push('}'); + out +} + +fn render_mermaid( + included: &[String], + included_set: &HashSet<&str>, + node_by_id: &HashMap<&str, &crate::api::GraphNode>, + graph: &ProjectGraphResponse, +) -> String { + let mut out = String::from("graph TD\n"); + out.push_str(" classDef bridge fill:#f96,stroke:#333\n"); + out.push_str(" classDef core fill:#9cf,stroke:#333\n"); + out.push_str(" classDef dead fill:#ccc,stroke:#333\n"); + out.push_str(" classDef entry fill:#9f9,stroke:#333\n"); + + let id_map: HashMap<&str, usize> = included + .iter() + .enumerate() + .map(|(i, m)| (m.as_str(), i)) + .collect(); + + for module_id in included { + let Some(node) = node_by_id.get(module_id.as_str()) else { continue }; + let i = id_map[module_id.as_str()]; + let label = node.path.rsplit('/').next().unwrap_or(&node.path); + let class_suffix = role_class_suffix(node.role.as_deref()); + out.push_str(&format!( + " N{}[\"{}\\n{} fn\"]{}\n", + i, label, node.signature_count, class_suffix + )); + } + + for edge in &graph.edges { + if included_set.contains(edge.source.as_str()) && included_set.contains(edge.target.as_str()) { + if let (Some(&si), Some(&ti)) = ( + id_map.get(edge.source.as_str()), + id_map.get(edge.target.as_str()), + ) { + out.push_str(&format!(" N{} --> N{}\n", si, ti)); + } + } + } + out +} + +fn role_color_dot(role: Option<&str>) -> &'static str { + match role { + Some("core") => "#9cf", + Some("bridge") => "#f96", + Some("dead") => "#ccc", + Some("entry") => "#9f9", + _ => "#fff", + } +} + +fn role_class_suffix(role: Option<&str>) -> &'static str { + match role { + Some("core") => ":::core", + Some("bridge") => ":::bridge", + Some("dead") => ":::dead", + Some("entry") => ":::entry", + _ => "", + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::api::{GraphEdge, GraphMetadata, GraphNode, ProjectGraphResponse}; + use std::collections::HashMap; + + fn node(id: &str, role: Option<&str>) -> GraphNode { + GraphNode { + module_id: id.into(), + path: format!("src/{}.rs", id), + language: "rust".into(), + signature_count: 3, + complexity: None, + is_bridge: None, + bridge_score: None, + degree: None, + risk_level: None, + churn: None, + hotspot_score: None, + role: role.map(String::from), + is_dead: None, + unreferenced_exports: None, + fan_in: None, + fan_out: None, + cochange_partners: None, + cochange_entropy: None, + } + } + + fn edge(src: &str, tgt: &str) -> GraphEdge { + GraphEdge { + source: src.into(), + target: tgt.into(), + edge_type: "import".into(), + } + } + + fn fixture() -> ProjectGraphResponse { + ProjectGraphResponse { + nodes: vec![ + node("a", Some("core")), + node("b", None), + node("c", Some("bridge")), + node("d", None), + node("isolated", None), + ], + edges: vec![edge("a", "b"), edge("b", "c"), edge("c", "d")], + cycles: vec![], + god_modules: vec![], + layer_violations: vec![], + metadata: GraphMetadata { + total_files: 5, + total_edges: 3, + languages: HashMap::new(), + generated_at: "".into(), + bridge_count: None, + cycle_count: None, + god_module_count: None, + health_score: None, + layer_violation_count: None, + architectural_drift: None, + hotspot_count: None, + dead_code_count: None, + unreferenced_exports_count: None, + }, + cochange_pairs: vec![], + } + } + + #[test] + fn top_n_skips_isolated_nodes() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + }).unwrap(); + assert!(!r.diagram.contains("isolated")); + assert_eq!(r.node_count, 4); + assert!(!r.truncated); + } + + #[test] + fn top_n_truncates_and_reports() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 2, + }).unwrap(); + assert!(r.truncated); + assert_eq!(r.node_count, 2); + } + + #[test] + fn focus_bfs_expands_neighborhood() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: Some("a"), + depth: 1, + max_nodes: 10, + }).unwrap(); + // depth=1 from a → reaches b but not c + assert_eq!(r.node_count, 2); + } + + #[test] + fn focus_bfs_depth_two_reaches_further() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: Some("a"), + depth: 2, + max_nodes: 10, + }).unwrap(); + assert_eq!(r.node_count, 3); // a, b, c + } + + #[test] + fn focus_accepts_path_suffix() { + let g = fixture(); + // path is "src/a.rs" — match by suffix + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: Some("a.rs"), + depth: 1, + max_nodes: 10, + }).unwrap(); + assert_eq!(r.node_count, 2); + } + + #[test] + fn focus_not_found_returns_error() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: Some("does_not_exist"), + depth: 2, + max_nodes: 10, + }); + assert!(r.is_err()); + } + + #[test] + fn dot_output_has_role_colors() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Dot, + focus: None, + depth: 2, + max_nodes: 10, + }).unwrap(); + assert!(r.diagram.starts_with("digraph cartographer {")); + assert!(r.diagram.contains("#9cf")); // core color present for node a + } + + #[test] + fn format_parse_accepts_aliases_and_rejects_unknown() { + assert_eq!(DiagramFormat::parse("mermaid").unwrap(), DiagramFormat::Mermaid); + assert_eq!(DiagramFormat::parse("MERMAID").unwrap(), DiagramFormat::Mermaid); + assert_eq!(DiagramFormat::parse("").unwrap(), DiagramFormat::Mermaid); + assert_eq!(DiagramFormat::parse("dot").unwrap(), DiagramFormat::Dot); + assert_eq!(DiagramFormat::parse("graphviz").unwrap(), DiagramFormat::Dot); + assert!(DiagramFormat::parse("svg").is_err()); + } + + #[test] + fn focus_bfs_is_undirected() { + // "The area I'm editing" includes both what I import and what imports + // me. Verify BFS from a leaf picks up its importers, not just its + // imports. Here `d` is imported by `c` (edge c→d) but imports nothing. + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: Some("d"), + depth: 1, + max_nodes: 10, + }).unwrap(); + assert_eq!(r.node_count, 2); // d + its importer c + assert!(r.diagram.contains("c.rs")); + } + + #[test] + fn focus_respects_node_cap() { + // depth=2 from a would reach {a,b,c}; cap at 2 should truncate. + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: Some("a"), + depth: 2, + max_nodes: 2, + }).unwrap(); + assert_eq!(r.node_count, 2); + assert!(r.truncated); + } + + #[test] + fn focus_bfs_handles_cycles_without_looping() { + // Add a cycle a→b→c→a and BFS should still terminate and not + // duplicate nodes in the output. + let mut g = fixture(); + g.edges.push(edge("c", "a")); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: Some("a"), + depth: 5, + max_nodes: 10, + }).unwrap(); + // a, b, c, d reachable undirected; no duplicates. + assert_eq!(r.node_count, 4); + } + + #[test] + fn mermaid_output_declares_classes_and_direction() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + }).unwrap(); + assert!(r.diagram.starts_with("graph TD\n")); + assert!(r.diagram.contains("classDef core")); + assert!(r.diagram.contains("classDef bridge")); + // Role-tagged nodes carry their class suffix. + assert!(r.diagram.contains(":::core")); + assert!(r.diagram.contains(":::bridge")); + } +} diff --git a/third_party/cartographer/mapper-core/cartographer/src/lib.rs b/third_party/cartographer/mapper-core/cartographer/src/lib.rs index 95fadd21..437ac907 100644 --- a/third_party/cartographer/mapper-core/cartographer/src/lib.rs +++ b/third_party/cartographer/mapper-core/cartographer/src/lib.rs @@ -16,6 +16,7 @@ use std::path::{Path, PathBuf}; use rayon::prelude::*; mod api; +mod diagram; mod extractor; mod git_analysis; mod layers; @@ -1171,7 +1172,7 @@ pub extern "C" fn cartographer_search_content( /// /// Parameters: /// - `path` – absolute path to repo root (UTF-8 C string) -/// - `pattern` – glob pattern, e.g. `"*.rs"` or `"src/**/*.go"` (C string) +/// - `pattern` – glob pattern, e.g. `"*.rs"` or `"src/subdir/*.go"` (C string) /// - `limit` – max files to return; 0 = unlimited /// - `opts_json` – optional JSON `FindOptions` or null for defaults: /// `{ modifiedSinceSecs, newerThan, minSizeBytes, maxSizeBytes, maxDepth, noIgnore }` @@ -1934,3 +1935,109 @@ pub extern "C" fn cartographer_shotgun_surgery( result_to_json_ptr(Ok::<_, String>(entries)) } + +// --------------------------------------------------------------------------- +// FFI: Render Architecture Diagram (Mermaid / DOT) +// --------------------------------------------------------------------------- + +/// Render the project's import graph as a Mermaid or Graphviz (DOT) diagram. +/// +/// Inputs: +/// `path` — project root (C string) +/// `format` — "mermaid" or "dot" (C string; may be null → "mermaid") +/// `focus` — optional module_id or path to anchor BFS on (C string, may +/// be null → top-N by degree) +/// `depth` — BFS depth when `focus` is set (0 → 2; ignored without focus) +/// `max_nodes` — cap on nodes in the output (0 → 40) +/// +/// Response shape: +/// ```json +/// { +/// "ok": true, +/// "data": { +/// "diagram": "graph TD\n N0[...] --> N1[...]\n...", +/// "truncated": false, +/// "format": "mermaid", +/// "nodeCount": 23 +/// } +/// } +/// ``` +#[no_mangle] +pub extern "C" fn cartographer_render_architecture( + path: *const c_char, + format: *const c_char, + focus: *const c_char, + depth: u32, + max_nodes: u32, +) -> *mut c_char { + let path = match c_str_to_path(path) { + Ok(p) => p, + Err(e) => return result_to_json_ptr::(Err(e)), + }; + + let format_str = if format.is_null() { + "mermaid".to_string() + } else { + match unsafe { CStr::from_ptr(format) }.to_str() { + Ok(s) => s.to_string(), + Err(e) => return result_to_json_ptr::(Err(e.to_string())), + } + }; + let fmt = match diagram::DiagramFormat::parse(&format_str) { + Ok(f) => f, + Err(e) => return result_to_json_ptr::(Err(e)), + }; + + let focus_str = if focus.is_null() { + None + } else { + match unsafe { CStr::from_ptr(focus) }.to_str() { + Ok(s) if !s.is_empty() => Some(s.to_string()), + Ok(_) => None, + Err(e) => return result_to_json_ptr::(Err(e.to_string())), + } + }; + + let depth = if depth == 0 { 2 } else { depth as usize }; + let max_nodes = if max_nodes == 0 { 40 } else { max_nodes as usize }; + + let mapped_files = match build_mapped_files(&path) { + Ok(m) => m, + Err(e) => return result_to_json_ptr::(Err(e)), + }; + let state = ApiState::new(path.clone()); + { + let mut files = state.mapped_files.lock().unwrap(); + *files = mapped_files; + } + + let graph = match state.rebuild_graph() { + Ok(g) => g, + Err(e) => return result_to_json_ptr::(Err(e)), + }; + + let opts = diagram::RenderOptions { + format: fmt, + focus: focus_str.as_deref(), + depth, + max_nodes, + }; + let rendered = match diagram::render(&graph, &opts) { + Ok(r) => r, + Err(e) => return result_to_json_ptr::(Err(e)), + }; + + let format_name = match fmt { + diagram::DiagramFormat::Mermaid => "mermaid", + diagram::DiagramFormat::Dot => "dot", + }; + + let data = serde_json::json!({ + "diagram": rendered.diagram, + "truncated": rendered.truncated, + "format": format_name, + "nodeCount": rendered.node_count, + }); + + result_to_json_ptr::(Ok(data)) +} From 10335c4a4c2ac8c4ae152c2972a17aa700d90cd7 Mon Sep 17 00:00:00 2001 From: Lisa Date: Tue, 21 Apr 2026 15:51:45 +0200 Subject: [PATCH 06/20] feat(impact): weight analyzeImpact risk by bridge centrality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit calculateAggregatedRisk now multiplies the weighted-mean score by 1 + max(BridgeScore)/1000 (capped at 2x) over the graph nodes matching the changed files, so a change landing on a critical architectural path is reported as riskier than the same-shape change in a leaf module. CARTOGRAPHER_STRATEGY.md had already claimed this behaviour — this commit makes it real. The lookup matches files by both Path and ModuleID. If no changed file matches the graph (or the build is without -tags cartographer, where MapProject returns ErrUnavailable), the multiplier is 1.0 and no informational factor is appended — bridge weighting is additive, never punitive when data is missing. A new `bridge_centrality` RiskFactor surfaces in RiskScore.Factors when the multiplier fires. Its Weight is 0 because it applies multiplicatively, not as a weighted-mean input; Value carries the raw max(BridgeScore)/1000 for consumer display. Pure helper bridgeMultiplierFromGraph has 8 subtests covering empty inputs, path/module matching, max-across-files, 2x cap, and path-wins tiebreak. No cartographer build tag needed for the unit test. Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 16 +++++ internal/query/impact.go | 130 +++++++++++++++++++++++++++++++--- internal/query/impact_test.go | 126 ++++++++++++++++++++++++++++++++ 3 files changed, 261 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6b641bb..d420db17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,22 @@ All notable changes to CKB will be documented in this file. ## [Unreleased] +### Changed + +- **`analyzeImpact` risk score now weighted by bridge centrality** — + `calculateAggregatedRisk` multiplies the weighted-mean score by + `1 + max(BridgeScore)/1000` (capped at 2.0) over the changed files, so a + change landing on a critical architectural path (high betweenness) is + reported as riskier than the same-shape change in a leaf module. Implements + the behaviour that `CARTOGRAPHER_STRATEGY.md` had already documented but + the code was not actually doing. Bridge lookups match by both `Path` and + `ModuleID`; if no changed file matches the graph, the multiplier is 1.0 + and no informational factor is appended. Only runs when the binary was + built with `-tags cartographer` (graph is a no-op otherwise). A new + `bridge_centrality` informational factor surfaces in `RiskScore.Factors` + when the multiplier fires; its `Weight` is 0 because it applies + multiplicatively, not as a weighted-mean input. + ### Added - **`renderArchitecture` MCP tool** — returns the project's module-level diff --git a/internal/query/impact.go b/internal/query/impact.go index 748d31e0..03f954fc 100644 --- a/internal/query/impact.go +++ b/internal/query/impact.go @@ -3,6 +3,7 @@ package query import ( "context" "fmt" + "math" "os" "os/exec" "path/filepath" @@ -455,21 +456,35 @@ func (e *Engine) AnalyzeImpact(ctx context.Context, opts AnalyzeImpactOptions) ( // Convert blast radius, then enrich with LIP semantic coupling when available. var blastRadius *BlastRadiusSummary if result.BlastRadius != nil { - // LIP enrichment: one round-trip for the symbol's file, keyed by symbol name. - // Gated on capability so older daemons see no change. + // LIP enrichment paths, in preference order: + // 1. query_blast_radius_symbol (v2.3+) — direct symbol-URI call, no filtering. + // 2. query_blast_radius_batch (v2.2) — fetch all symbols in the file, then LookupSymbol by name. + // Both gated on capability so older daemons degrade to SCIP-only. enriched := result.BlastRadius - if e.lipSupports("query_blast_radius_batch") && symbolInfo != nil && symbolInfo.Location != nil { - fileURI := "lip://local/" + symbolInfo.Location.FileId - if lipEntries, _ := lip.QueryBlastRadiusBatch([]string{fileURI}, 0.6); lipEntries != nil { - converted := make(map[string]*impact.ExternalBlastRadius, len(lipEntries)) - for k, v := range lipEntries { - vCopy := v - converted[k] = lip.EntryToExternal(&vCopy) + if symbolInfo != nil && symbolInfo.Location != nil { + var ext *impact.ExternalBlastRadius + switch { + case e.lipSupports("query_blast_radius_symbol"): + symURI := "lip://local/" + symbolInfo.Location.FileId + "#" + symbolInfo.Name + if entry, _ := lip.QueryBlastRadiusSymbol(symURI, 0.6); entry != nil { + ext = lip.EntryToExternal(entry) } - if ext, ok := lip.LookupSymbol(converted, symbolInfo.Location.FileId, symbolInfo.Name); ok { - enriched = impact.MergeBlastRadius(result.BlastRadius, ext) + case e.lipSupports("query_blast_radius_batch"): + fileURI := "lip://local/" + symbolInfo.Location.FileId + if lipEntries, _ := lip.QueryBlastRadiusBatch([]string{fileURI}, 0.6); lipEntries != nil { + converted := make(map[string]*impact.ExternalBlastRadius, len(lipEntries.Entries)) + for k, v := range lipEntries.Entries { + vCopy := v + converted[k] = lip.EntryToExternal(&vCopy) + } + if e, ok := lip.LookupSymbol(converted, symbolInfo.Location.FileId, symbolInfo.Name); ok { + ext = e + } } } + if ext != nil { + enriched = impact.MergeBlastRadius(result.BlastRadius, ext) + } } blastRadius = &BlastRadiusSummary{ @@ -1267,6 +1282,69 @@ func (e *Engine) getGitDiff(staged bool, baseBranch string) (string, error) { return string(out), nil } +// bridgeMultiplierFromGraph computes a risk multiplier from Cartographer's +// betweenness-centrality scores for the changed files. Files on critical +// architectural paths (high BridgeScore) yield a larger multiplier so that +// the same textual change is reported as riskier when it lands in a bridge. +// +// The multiplier is 1.0 + max(BridgeScore)/1000, capped at 2.0. BridgeScore +// is betweenness_centrality * 1000 (range 0-1000) per api.rs, so this maps +// a "perfect bridge" to a 2x risk amplification and a non-bridge to 1x. +// +// Matches files by both Path (exact) and ModuleID, to cover the cases where +// ChangedSymbol.File is either a repo-relative path or a module identifier. +// Returns (1.0, nil) when no nodes match — callers should treat a nil factor +// as "no adjustment, skip the factor append". +func bridgeMultiplierFromGraph(nodes []cartographer.GraphNode, files []string) (float64, *RiskFactor) { + if len(nodes) == 0 || len(files) == 0 { + return 1.0, nil + } + byPath := make(map[string]float64, len(nodes)) + byModule := make(map[string]float64, len(nodes)) + for _, n := range nodes { + if n.BridgeScore == nil { + continue + } + if n.Path != "" { + byPath[n.Path] = *n.BridgeScore + } + if n.ModuleID != "" { + byModule[n.ModuleID] = *n.BridgeScore + } + } + if len(byPath) == 0 && len(byModule) == 0 { + return 1.0, nil + } + + var maxScore float64 + matched := false + for _, f := range files { + if s, ok := byPath[f]; ok { + if s > maxScore { + maxScore = s + } + matched = true + continue + } + if s, ok := byModule[f]; ok { + if s > maxScore { + maxScore = s + } + matched = true + } + } + if !matched { + return 1.0, nil + } + + multiplier := math.Min(1.0+maxScore/1000.0, 2.0) + return multiplier, &RiskFactor{ + Name: "bridge_centrality", + Value: maxScore / 1000.0, // 0.0-1.0 informational + Weight: 0, // applied as multiplier, not weighted mean + } +} + // calculateAggregatedRisk computes an aggregated risk score for the change set. func (e *Engine) calculateAggregatedRisk( changedSymbols []impact.ChangedSymbol, @@ -1356,6 +1434,33 @@ func (e *Engine) calculateAggregatedRisk( score = weightedSum / totalWeight } + // Bridge-centrality adjustment: if any changed file sits on a critical + // architectural path, amplify the score. See bridgeMultiplierFromGraph + // for the multiplier shape. The graph is only fetched when the + // cartographer build tag is on; under the stub build this is a no-op. + bridgeAmplified := false + if cartographer.Available() && e.repoRoot != "" { + if graph, err := cartographer.MapProject(e.repoRoot); err == nil && graph != nil { + files := make([]string, 0, len(changedSymbols)) + seen := make(map[string]struct{}, len(changedSymbols)) + for _, s := range changedSymbols { + if s.File == "" { + continue + } + if _, dup := seen[s.File]; dup { + continue + } + seen[s.File] = struct{}{} + files = append(files, s.File) + } + if mul, factor := bridgeMultiplierFromGraph(graph.Nodes, files); factor != nil { + score = math.Min(score*mul, 1.0) + factors = append(factors, *factor) + bridgeAmplified = true + } + } + } + // Determine level var level string switch { @@ -1372,6 +1477,9 @@ func (e *Engine) calculateAggregatedRisk( // Build explanation explanation := fmt.Sprintf("Change affects %d symbols across %d modules with %d direct and %d transitive impacts.", len(changedSymbols), len(modules), len(directImpact), len(transitiveImpact)) + if bridgeAmplified { + explanation += " Risk amplified by bridge centrality (change lands on a critical architectural path)." + } return &RiskScore{ Level: level, diff --git a/internal/query/impact_test.go b/internal/query/impact_test.go index 387b4341..db4b98c0 100644 --- a/internal/query/impact_test.go +++ b/internal/query/impact_test.go @@ -1,13 +1,139 @@ package query import ( + "math" "testing" "time" + "github.com/SimplyLiz/CodeMCP/internal/cartographer" "github.com/SimplyLiz/CodeMCP/internal/impact" "github.com/SimplyLiz/CodeMCP/internal/telemetry" ) +func ptrF64(v float64) *float64 { return &v } + +func TestBridgeMultiplierFromGraph(t *testing.T) { + tests := []struct { + name string + nodes []cartographer.GraphNode + files []string + wantMul float64 + wantFactor bool + wantValue float64 // only checked when wantFactor=true + }{ + { + name: "no nodes", + nodes: nil, + files: []string{"a.go"}, + wantMul: 1.0, + wantFactor: false, + }, + { + name: "no files", + nodes: []cartographer.GraphNode{{Path: "a.go", BridgeScore: ptrF64(500)}}, + files: nil, + wantMul: 1.0, + wantFactor: false, + }, + { + name: "no BridgeScore populated", + nodes: []cartographer.GraphNode{ + {Path: "a.go", BridgeScore: nil}, + }, + files: []string{"a.go"}, + wantMul: 1.0, + wantFactor: false, + }, + { + name: "file does not match any node", + nodes: []cartographer.GraphNode{ + {Path: "b.go", BridgeScore: ptrF64(500)}, + }, + files: []string{"a.go"}, + wantMul: 1.0, + wantFactor: false, + }, + { + name: "match by path yields 1 + score/1000", + nodes: []cartographer.GraphNode{ + {Path: "a.go", BridgeScore: ptrF64(250)}, + }, + files: []string{"a.go"}, + wantMul: 1.25, + wantFactor: true, + wantValue: 0.25, + }, + { + name: "match by module id", + nodes: []cartographer.GraphNode{ + {ModuleID: "pkg/foo", Path: "", BridgeScore: ptrF64(600)}, + }, + files: []string{"pkg/foo"}, + wantMul: 1.6, + wantFactor: true, + wantValue: 0.6, + }, + { + name: "multiple files takes max score", + nodes: []cartographer.GraphNode{ + {Path: "low.go", BridgeScore: ptrF64(100)}, + {Path: "high.go", BridgeScore: ptrF64(800)}, + {Path: "mid.go", BridgeScore: ptrF64(400)}, + }, + files: []string{"low.go", "high.go", "mid.go"}, + wantMul: 1.8, + wantFactor: true, + wantValue: 0.8, + }, + { + name: "cap at 2.0 for over-1000 scores", + nodes: []cartographer.GraphNode{ + {Path: "a.go", BridgeScore: ptrF64(1500)}, + }, + files: []string{"a.go"}, + wantMul: 2.0, + wantFactor: true, + wantValue: 1.5, + }, + { + name: "path match wins over module match for same file", + nodes: []cartographer.GraphNode{ + {Path: "a.go", ModuleID: "a.go", BridgeScore: ptrF64(300)}, + {Path: "", ModuleID: "a.go", BridgeScore: ptrF64(900)}, + }, + files: []string{"a.go"}, + wantMul: 1.3, + wantFactor: true, + wantValue: 0.3, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mul, factor := bridgeMultiplierFromGraph(tt.nodes, tt.files) + if math.Abs(mul-tt.wantMul) > 1e-9 { + t.Errorf("multiplier = %v, want %v", mul, tt.wantMul) + } + if tt.wantFactor { + if factor == nil { + t.Fatalf("expected factor, got nil") + } + if factor.Name != "bridge_centrality" { + t.Errorf("factor.Name = %q, want %q", factor.Name, "bridge_centrality") + } + if factor.Weight != 0 { + t.Errorf("factor.Weight = %v, want 0 (informational)", factor.Weight) + } + if math.Abs(factor.Value-tt.wantValue) > 1e-9 { + t.Errorf("factor.Value = %v, want %v", factor.Value, tt.wantValue) + } + } else if factor != nil { + t.Errorf("expected nil factor, got %+v", factor) + } + }) + } +} + func TestFilterTestReferences(t *testing.T) { tests := []struct { name string From c3929ab7ca1c708339816668e2dfe8b36d2cb96f Mon Sep 17 00:00:00 2001 From: Lisa Date: Tue, 21 Apr 2026 15:55:55 +0200 Subject: [PATCH 07/20] feat(lip): distinguish not-indexed from zero-callers; add v2.3 symbol query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit QueryBlastRadiusBatch now returns *BlastRadiusBatchResult instead of a bare map, so callers can tell "file not in the LIP index" apart from "indexed, zero callers" — same on-wire format, new struct wrapper exposes the not_indexed_uris list the daemon already returns. Callers in blast_radius.go and review_blastradius.go updated; existing zero- callers behaviour unchanged. New QueryBlastRadiusSymbol wraps LIP's v2.3 query_blast_radius_symbol RPC for direct symbol-URI lookups, sidestepping the batch-and-filter workaround when the daemon supports it. AnalyzeImpact dispatches to this path first under lipSupports("query_blast_radius_symbol"), and falls back to the batch path on older daemons — so old LIP builds degrade cleanly, new ones skip a round-trip. BlastRadiusEntry gains a FileURI field mirroring the daemon's response, so consumers can back-reference symbol→file without reparsing the URI. Co-Authored-By: Claude Opus 4.7 --- internal/lip/blast_radius.go | 8 ++-- internal/lip/client.go | 62 ++++++++++++++++++++++++---- internal/query/review_blastradius.go | 4 +- 3 files changed, 59 insertions(+), 15 deletions(-) diff --git a/internal/lip/blast_radius.go b/internal/lip/blast_radius.go index 64f8aa7e..8829b7f8 100644 --- a/internal/lip/blast_radius.go +++ b/internal/lip/blast_radius.go @@ -22,13 +22,13 @@ func (e *BlastRadiusEnricher) EnrichBatch(ctx context.Context, changedFileURIs [ return nil, nil } - entries, err := QueryBlastRadiusBatch(changedFileURIs, e.MinScore) - if entries == nil { + result, err := QueryBlastRadiusBatch(changedFileURIs, e.MinScore) + if result == nil { return nil, err } - out := make(map[string]*impact.ExternalBlastRadius, len(entries)) - for symbolURI, entry := range entries { + out := make(map[string]*impact.ExternalBlastRadius, len(result.Entries)) + for symbolURI, entry := range result.Entries { out[symbolURI] = EntryToExternal(&entry) } return out, nil diff --git a/internal/lip/client.go b/internal/lip/client.go index d7e37f92..b2b47209 100644 --- a/internal/lip/client.go +++ b/internal/lip/client.go @@ -762,6 +762,7 @@ type BlastRadiusSemanticItem struct { // BlastRadiusEntry is a single symbol's blast radius from LIP. type BlastRadiusEntry struct { SymbolURI string `json:"symbol_uri"` + FileURI string `json:"file_uri"` // input file this entry belongs to DirectDependents int `json:"direct_dependents"` TransitiveDependents int `json:"transitive_dependents"` AffectedFiles []string `json:"affected_files"` @@ -772,8 +773,17 @@ type BlastRadiusEntry struct { SemanticItems []BlastRadiusSemanticItem `json:"semantic_items"` } +// BlastRadiusBatchResult is the full response from QueryBlastRadiusBatch. +// NotIndexedURIs lists input URIs that were absent from the LIP index — +// callers can distinguish "not indexed" from "indexed but zero callers". +type BlastRadiusBatchResult struct { + Entries map[string]BlastRadiusEntry // keyed by symbol_uri + NotIndexedURIs []string // input file URIs not in index (omitted when empty) +} + type blastRadiusBatchResp struct { - Results []BlastRadiusEntry `json:"results"` + Results []BlastRadiusEntry `json:"results"` + NotIndexedURIs []string `json:"not_indexed_uris,omitempty"` } // QueryBlastRadiusBatch asks LIP for blast radius of all symbols in the given @@ -782,7 +792,7 @@ type blastRadiusBatchResp struct { // // min_score is the cosine similarity threshold for semantic hits. Pass 0 to // get static-only results (no semantic items). Typical values: 0.6–0.8. -func QueryBlastRadiusBatch(changedFileURIs []string, minScore float32) (map[string]BlastRadiusEntry, error) { +func QueryBlastRadiusBatch(changedFileURIs []string, minScore float32) (*BlastRadiusBatchResult, error) { if len(changedFileURIs) == 0 { return nil, nil } @@ -795,17 +805,51 @@ func QueryBlastRadiusBatch(changedFileURIs []string, minScore float32) (map[stri } // Budget: generous timeout — LIP needs to resolve symbols + compute embeddings timeout := max(time.Duration(len(changedFileURIs)+1)*200*time.Millisecond, 3*time.Second) - result, _ := lipRPC(req, timeout, 8<<20, - func(r blastRadiusBatchResp) *[]BlastRadiusEntry { return &r.Results }) - if result == nil { + raw, _ := lipRPC(req, timeout, 8<<20, + func(r blastRadiusBatchResp) *blastRadiusBatchResp { return &r }) + if raw == nil { return nil, nil } // Index by symbol_uri for O(1) lookup in the merge path - out := make(map[string]BlastRadiusEntry, len(*result)) - for _, entry := range *result { - out[entry.SymbolURI] = entry + entries := make(map[string]BlastRadiusEntry, len(raw.Results)) + for _, entry := range raw.Results { + entries[entry.SymbolURI] = entry } - return out, nil + return &BlastRadiusBatchResult{ + Entries: entries, + NotIndexedURIs: raw.NotIndexedURIs, + }, nil +} + +type blastRadiusSymbolResp struct { + Result *BlastRadiusEntry `json:"result,omitempty"` +} + +// QueryBlastRadiusSymbol asks LIP for blast radius of a single symbol (v2.3+). +// Returns (nil, nil) when the symbol's file isn't indexed or LIP is +// unavailable — callers should treat both identically (fall back to the +// static SCIP blast radius unchanged). +// +// Prefer this over QueryBlastRadiusBatch when you already have a symbol URI: +// it skips the file-level fetch-and-filter workaround and lets LIP dispatch +// directly. +func QueryBlastRadiusSymbol(symbolURI string, minScore float32) (*BlastRadiusEntry, error) { + if symbolURI == "" { + return nil, nil + } + req := map[string]any{ + "type": "query_blast_radius_symbol", + "symbol_uri": symbolURI, + } + if minScore > 0 { + req["min_score"] = minScore + } + raw, _ := lipRPC(req, 2*time.Second, 2<<20, + func(r blastRadiusSymbolResp) *blastRadiusSymbolResp { return &r }) + if raw == nil { + return nil, nil + } + return raw.Result, nil } // ============================================================================= diff --git a/internal/query/review_blastradius.go b/internal/query/review_blastradius.go index 60bd58cf..dfe0f418 100644 --- a/internal/query/review_blastradius.go +++ b/internal/query/review_blastradius.go @@ -39,8 +39,8 @@ func (e *Engine) checkBlastRadius(ctx context.Context, changedFiles []string, op lipURIs[i] = "lip://local/" + f } if raw, _ := lip.QueryBlastRadiusBatch(lipURIs, 0.6); raw != nil { - lipBR = make(map[string]*impact.ExternalBlastRadius, len(raw)) - for k, v := range raw { + lipBR = make(map[string]*impact.ExternalBlastRadius, len(raw.Entries)) + for k, v := range raw.Entries { vCopy := v lipBR[k] = lip.EntryToExternal(&vCopy) } From dccc2fc324c84a5113b4f284e5ccca65a20f2938 Mon Sep 17 00:00:00 2001 From: Lisa Date: Tue, 21 Apr 2026 16:39:19 +0200 Subject: [PATCH 08/20] feat(cartographer): sync vendored diagram.rs with overlays; fix localize script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vendored diagram.rs was 391 lines behind upstream — syncing pulls in the Overlays feature (cycle borders, layer-violation edge styling, hotspot sizing) so the libcartographer.a that CKB links against produces the same diagrams as the Cartographer CLI. The only vendor-local delta is `at_range: None` stripped from the test edge() fixture, since the vendored api.rs predates GraphEdge.at_range; a NOTE comment marks this for future syncs. All 20 diagram tests pass on the vendored copy. Also fixes a latent bug in localize-tree-sitter-symbols.sh that surfaced when rebuilding from the synced source: the script extracted archive members with `ar x`, but Cargo emits multiple members named parser.o / scanner.o (one per tree-sitter grammar crate), and `ar x` silently clobbers them. After localization the archive ended up with _tree_sitter_c and _tree_sitter_cpp as undefined references, breaking `go build -tags cartographer`. The script now feeds the archive straight to `ld -r` via -force_load (Mach-O) / --whole-archive (ELF), skipping the filesystem extraction entirely so duplicate-named members coexist in combined.o as intended. Verified: go test -tags cartographer -run TestBridgeMultiplierFromGraph links cleanly and passes. Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 27 ++ .../scripts/localize-tree-sitter-symbols.sh | 26 +- .../mapper-core/cartographer/src/diagram.rs | 423 +++++++++++++++++- 3 files changed, 457 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d420db17..59229354 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,33 @@ All notable changes to CKB will be documented in this file. ## [Unreleased] +### Added + +- **Diagram overlays in `renderArchitecture` / `ckb diagram`** — the + vendored `diagram.rs` was synced from upstream Cartographer, so the + Mermaid/DOT output now decorates the base import graph with + architectural signals: cycle members get a thick red border (pivots + dashed), cycle-internal edges a heavy red arrow, layer violations pick + up per-type dashed/dotted edge styling, and hot nodes + (`hotspot_score ≥ 70`) get an orange border plus DOT size scaling. + Mermaid is border-only for hot nodes (no sizing primitive). Cycle red + takes precedence over hot orange on the same node — architectural + signal wins over performance signal. + +### Fixed + +- **`localize-tree-sitter-symbols.sh` dropped grammar C parsers** — the + script extracted archive members via `ar x`, which silently clobbers + files when multiple members share a name. Cargo emits a `parser.o` + and `scanner.o` per grammar crate (tree-sitter-c, -cpp, -rust, -go, + etc.), so `ar x` left only the *last* grammar's C parser on disk, + producing a localized archive missing `_tree_sitter_c` / `_tree_sitter_cpp`. + The script now feeds the archive directly to `ld -r` with + `-force_load` (Mach-O) / `--whole-archive` (ELF), which pulls every + member in without touching the filesystem. The `rust_tree_sitter` C + ABI refs to `_tree_sitter_c` and `_tree_sitter_cpp` now resolve + inside the combined object as expected. + ### Changed - **`analyzeImpact` risk score now weighted by bridge centrality** — diff --git a/third_party/cartographer/mapper-core/cartographer/scripts/localize-tree-sitter-symbols.sh b/third_party/cartographer/mapper-core/cartographer/scripts/localize-tree-sitter-symbols.sh index 7da03e16..b06759be 100755 --- a/third_party/cartographer/mapper-core/cartographer/scripts/localize-tree-sitter-symbols.sh +++ b/third_party/cartographer/mapper-core/cartographer/scripts/localize-tree-sitter-symbols.sh @@ -71,12 +71,28 @@ trap 'rm -rf "$WORK"' EXIT cp "$ARCHIVE_ABS" "$WORK/input.a" ( cd "$WORK" - "$AR" x input.a - rm input.a - # Partial link (`ld -r`) merges every member into a single relocatable - # object. `-nostdlib` prevents clang/gcc from pulling in CRT or libSystem. - "$CC" -nostdlib -Wl,-r -o combined.o ./*.o + # Partial link (`ld -r`) merges every archive member into a single + # relocatable object so Cartographer's internal ts_*/tree_sitter_* refs + # resolve within the combined object. We feed the archive directly to + # the linker with a force-load flag rather than `ar x`-extracting first, + # because Cargo emits multiple `.o` members with identical names (each + # tree-sitter grammar crate's build.rs produces its own `parser.o` / + # `scanner.o`) — `ar x` clobbers duplicates on disk, dropping the C + # parser objects for all but the last grammar. `-force_load` (Mach-O) + # and `--whole-archive` (ELF) both pull in every member unconditionally, + # preserving every instance. + # + # `-nostdlib` prevents clang/gcc from pulling in CRT or libSystem. + case "$(uname -s)" in + Darwin) + "$CC" -nostdlib -Wl,-r -o combined.o -Wl,-force_load,input.a + ;; + *) + "$CC" -nostdlib -Wl,-r -o combined.o \ + -Wl,--whole-archive input.a -Wl,--no-whole-archive + ;; + esac # Localize tree-sitter runtime (`ts_*`) and grammar init symbols # (`tree_sitter_`, plus internal `tree_sitter__external_scanner_*` diff --git a/third_party/cartographer/mapper-core/cartographer/src/diagram.rs b/third_party/cartographer/mapper-core/cartographer/src/diagram.rs index 04aa6f2e..53bc80b6 100644 --- a/third_party/cartographer/mapper-core/cartographer/src/diagram.rs +++ b/third_party/cartographer/mapper-core/cartographer/src/diagram.rs @@ -13,6 +13,12 @@ use std::collections::{HashMap, HashSet, VecDeque}; use crate::api::ProjectGraphResponse; +use crate::layers::LayerViolationType; + +/// Nodes with `hotspot_score` at or above this threshold get the `hot` overlay +/// (thick orange stroke in Mermaid, thicker orange border + larger size in DOT). +/// Picked to match the "top decile" of hotspots on real codebases. +const HOTSPOT_THRESHOLD: f64 = 70.0; /// Output format requested by the caller. #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -49,6 +55,53 @@ pub struct RenderedDiagram { pub node_count: usize, } +/// Precomputed overlays that decorate the base import graph with architectural +/// signals: cycles (from `graph.cycles`), layer violations (from +/// `graph.layer_violations`), and hotspot nodes (from `GraphNode.hotspot_score`). +/// +/// We precompute once per `render()` so both Mermaid and DOT rendering paths +/// consult the same sets and stay visually consistent. +struct Overlays<'a> { + cycle_nodes: HashSet<&'a str>, + pivot_nodes: HashSet<&'a str>, + cycle_edges: HashSet<(&'a str, &'a str)>, + violations: HashMap<(&'a str, &'a str), &'a LayerViolationType>, +} + +fn compute_overlays(graph: &ProjectGraphResponse) -> Overlays<'_> { + let mut cycle_nodes: HashSet<&str> = HashSet::new(); + let mut pivot_nodes: HashSet<&str> = HashSet::new(); + let mut cycle_edges: HashSet<(&str, &str)> = HashSet::new(); + + for cycle in &graph.cycles { + let members: HashSet<&str> = cycle.nodes.iter().map(|s| s.as_str()).collect(); + for n in &cycle.nodes { + cycle_nodes.insert(n.as_str()); + } + if let Some(pivot) = &cycle.pivot_node { + pivot_nodes.insert(pivot.as_str()); + } + // An edge participates in this cycle iff both endpoints are cycle members. + for edge in &graph.edges { + if members.contains(edge.source.as_str()) && members.contains(edge.target.as_str()) { + cycle_edges.insert((edge.source.as_str(), edge.target.as_str())); + } + } + } + + let mut violations: HashMap<(&str, &str), &LayerViolationType> = HashMap::new(); + for v in &graph.layer_violations { + // LayerViolation.source_path/target_path are actually module_ids + // (they come from edge_tuples in api.rs, which clone edge.source/target). + violations.insert( + (v.source_path.as_str(), v.target_path.as_str()), + &v.violation_type, + ); + } + + Overlays { cycle_nodes, pivot_nodes, cycle_edges, violations } +} + /// Render an import-graph diagram. Pure over `graph` — no I/O. pub fn render(graph: &ProjectGraphResponse, opts: &RenderOptions) -> Result { let max_nodes = opts.max_nodes.max(1); @@ -67,9 +120,11 @@ pub fn render(graph: &ProjectGraphResponse, opts: &RenderOptions) -> Result render_dot(&included, &included_set, &node_by_id, graph), - DiagramFormat::Mermaid => render_mermaid(&included, &included_set, &node_by_id, graph), + DiagramFormat::Dot => render_dot(&included, &included_set, &node_by_id, graph, &overlays), + DiagramFormat::Mermaid => render_mermaid(&included, &included_set, &node_by_id, graph, &overlays), }; Ok(RenderedDiagram { diagram: content, truncated, node_count: included.len() }) @@ -165,21 +220,68 @@ fn render_dot( included_set: &HashSet<&str>, node_by_id: &HashMap<&str, &crate::api::GraphNode>, graph: &ProjectGraphResponse, + overlays: &Overlays, ) -> String { let mut out = String::from("digraph cartographer {\n rankdir=LR;\n"); for module_id in included { let Some(node) = node_by_id.get(module_id.as_str()) else { continue }; let label = node.path.rsplit('/').next().unwrap_or(&node.path); - let color = role_color_dot(node.role.as_deref()); + let fill = role_color_dot(node.role.as_deref()); + + let mid = module_id.as_str(); + let is_pivot = overlays.pivot_nodes.contains(mid); + let in_cycle = overlays.cycle_nodes.contains(mid); + let score = node.hotspot_score.unwrap_or(0.0).clamp(0.0, 100.0); + let hot = score >= HOTSPOT_THRESHOLD; + + // Border: pivot > cycle > hot > default. Pivot is dashed to distinguish + // it inside a red-bordered cycle. + let (border_color, pen_width, extra_style) = if is_pivot { + ("#cc0000", 3.0, ",dashed") + } else if in_cycle { + ("#cc0000", 3.0, "") + } else if hot { + ("#ff6600", 3.0, "") + } else { + ("#333333", 1.0, "") + }; + + // Hotspot-driven sizing. score ∈ [0,100] → width ∈ [0.75, 1.80], + // height ∈ [0.50, 0.90], fontsize ∈ [10, 16]. Nodes without a score + // render at the default size. + let width = 0.75 + (score / 100.0) * 1.05; + let height = 0.50 + (score / 100.0) * 0.40; + let fontsize = 10 + ((score / 100.0) * 6.0) as u32; + out.push_str(&format!( - " \"{}\" [label=\"{}\\n{} fn\" shape=box style=filled fillcolor=\"{}\"];\n", - node.module_id, label, node.signature_count, color + " \"{}\" [label=\"{}\\n{} fn\" shape=box style=\"filled{}\" fillcolor=\"{}\" color=\"{}\" penwidth={:.1} width={:.2} height={:.2} fontsize={}];\n", + node.module_id, label, node.signature_count, + extra_style, fill, border_color, pen_width, width, height, fontsize )); } for edge in &graph.edges { - if included_set.contains(edge.source.as_str()) && included_set.contains(edge.target.as_str()) { - out.push_str(&format!(" \"{}\" -> \"{}\";\n", edge.source, edge.target)); + if !(included_set.contains(edge.source.as_str()) + && included_set.contains(edge.target.as_str())) + { + continue; } + let key = (edge.source.as_str(), edge.target.as_str()); + let viol = overlays.violations.get(&key).copied(); + let in_cycle = overlays.cycle_edges.contains(&key); + + let (color, style, pen) = match viol { + Some(LayerViolationType::BackCall) + | Some(LayerViolationType::CircularCrossLayer) => ("#cc0000", "dashed", 2.5), + Some(LayerViolationType::SkipCall) => ("#ff9900", "dotted", 2.0), + Some(LayerViolationType::DirectForeignImport) => ("#cccc00", "dotted", 1.5), + None if in_cycle => ("#cc0000", "solid", 2.5), + None => ("#666666", "solid", 1.0), + }; + + out.push_str(&format!( + " \"{}\" -> \"{}\" [color=\"{}\" style={} penwidth={:.1}];\n", + edge.source, edge.target, color, style, pen + )); } out.push('}'); out @@ -190,12 +292,16 @@ fn render_mermaid( included_set: &HashSet<&str>, node_by_id: &HashMap<&str, &crate::api::GraphNode>, graph: &ProjectGraphResponse, + overlays: &Overlays, ) -> String { let mut out = String::from("graph TD\n"); out.push_str(" classDef bridge fill:#f96,stroke:#333\n"); out.push_str(" classDef core fill:#9cf,stroke:#333\n"); out.push_str(" classDef dead fill:#ccc,stroke:#333\n"); out.push_str(" classDef entry fill:#9f9,stroke:#333\n"); + out.push_str(" classDef cycle stroke:#c00,stroke-width:3px\n"); + out.push_str(" classDef pivot stroke:#c00,stroke-width:3px,stroke-dasharray:5 5\n"); + out.push_str(" classDef hot stroke:#f60,stroke-width:3px\n"); let id_map: HashMap<&str, usize> = included .iter() @@ -203,6 +309,9 @@ fn render_mermaid( .map(|(i, m)| (m.as_str(), i)) .collect(); + // Node declarations carry the inline role class (:::core / :::bridge / etc). + // Overlay classes (cycle/pivot/hot) are applied via separate `class` statements + // below so a node can wear multiple classes without relying on inline chaining. for module_id in included { let Some(node) = node_by_id.get(module_id.as_str()) else { continue }; let i = id_map[module_id.as_str()]; @@ -214,15 +323,78 @@ fn render_mermaid( )); } + // Overlay class assignments. Pivot takes precedence over cycle so a pivot + // node gets the dashed border that distinguishes it inside a cycle. + for module_id in included { + let Some(node) = node_by_id.get(module_id.as_str()) else { continue }; + let i = id_map[module_id.as_str()]; + let mid = module_id.as_str(); + let mut extras: Vec<&str> = Vec::new(); + if overlays.pivot_nodes.contains(mid) { + extras.push("pivot"); + } else if overlays.cycle_nodes.contains(mid) { + extras.push("cycle"); + } + if node.hotspot_score.unwrap_or(0.0) >= HOTSPOT_THRESHOLD { + extras.push("hot"); + } + if !extras.is_empty() { + out.push_str(&format!(" class N{} {};\n", i, extras.join(","))); + } + } + + // Edges. We emit them in source order and remember each edge's index so we + // can append `linkStyle` directives for cycle/violation edges at the end. + let mut edge_index: usize = 0; + let mut link_styles: Vec<(usize, &'static str)> = Vec::new(); for edge in &graph.edges { - if included_set.contains(edge.source.as_str()) && included_set.contains(edge.target.as_str()) { - if let (Some(&si), Some(&ti)) = ( - id_map.get(edge.source.as_str()), - id_map.get(edge.target.as_str()), - ) { - out.push_str(&format!(" N{} --> N{}\n", si, ti)); + if !(included_set.contains(edge.source.as_str()) + && included_set.contains(edge.target.as_str())) + { + continue; + } + let (Some(&si), Some(&ti)) = ( + id_map.get(edge.source.as_str()), + id_map.get(edge.target.as_str()), + ) else { + continue; + }; + let key = (edge.source.as_str(), edge.target.as_str()); + let viol = overlays.violations.get(&key).copied(); + let in_cycle = overlays.cycle_edges.contains(&key); + + // Arrow: `==>` for plain cycles, `-.->` for any violation (dotted + // Mermaid arrow covers both back-calls and skip-calls visually; + // linkStyle below distinguishes them by colour/dash). + let arrow = match (viol, in_cycle) { + (Some(_), _) => "-.->", + (None, true) => "==>", + (None, false) => "-->", + }; + out.push_str(&format!(" N{} {} N{}\n", si, arrow, ti)); + + let style: Option<&'static str> = match viol { + Some(LayerViolationType::BackCall) + | Some(LayerViolationType::CircularCrossLayer) => { + Some("stroke:#c00,stroke-width:2.5px,stroke-dasharray:6 3") + } + Some(LayerViolationType::SkipCall) => { + Some("stroke:#f90,stroke-width:2px,stroke-dasharray:3 3") + } + Some(LayerViolationType::DirectForeignImport) => { + Some("stroke:#cc0,stroke-width:1.5px,stroke-dasharray:2 2") } + None if in_cycle => Some("stroke:#c00,stroke-width:2.5px"), + None => None, + }; + if let Some(s) = style { + link_styles.push((edge_index, s)); } + edge_index += 1; + } + + for (idx, style) in link_styles { + out.push_str(&format!(" linkStyle {} {}\n", idx, style)); } out } @@ -250,7 +422,8 @@ fn role_class_suffix(role: Option<&str>) -> &'static str { #[cfg(test)] mod tests { use super::*; - use crate::api::{GraphEdge, GraphMetadata, GraphNode, ProjectGraphResponse}; + use crate::api::{CycleInfo, GraphEdge, GraphMetadata, GraphNode, ProjectGraphResponse}; + use crate::layers::{LayerViolation, LayerViolationType}; use std::collections::HashMap; fn node(id: &str, role: Option<&str>) -> GraphNode { @@ -277,6 +450,11 @@ mod tests { } fn edge(src: &str, tgt: &str) -> GraphEdge { + // NOTE: upstream Cartographer's GraphEdge has an `at_range: Option` + // field (LIP-style source position for doc references). This vendored copy + // predates that field — when syncing diagram.rs, strip that one line to + // stay compatible with the vendored api.rs. The overlays feature itself + // doesn't touch at_range, so the drop is test-only. GraphEdge { source: src.into(), target: tgt.into(), @@ -477,5 +655,222 @@ mod tests { // Role-tagged nodes carry their class suffix. assert!(r.diagram.contains(":::core")); assert!(r.diagram.contains(":::bridge")); + // Overlay classes are always declared so later `class` statements resolve. + assert!(r.diagram.contains("classDef cycle")); + assert!(r.diagram.contains("classDef pivot")); + assert!(r.diagram.contains("classDef hot")); + } + + fn cycle(nodes: &[&str], pivot: Option<&str>) -> CycleInfo { + CycleInfo { + nodes: nodes.iter().map(|s| s.to_string()).collect(), + pivot_node: pivot.map(String::from), + severity: "high".into(), + } + } + + fn violation(src: &str, tgt: &str, vt: LayerViolationType) -> LayerViolation { + LayerViolation { + source_path: src.into(), + target_path: tgt.into(), + source_layer: "x".into(), + target_layer: "y".into(), + violation_type: vt, + severity: "CRITICAL".into(), + } + } + + #[test] + fn mermaid_marks_cycle_nodes_edges_and_pivot() { + let mut g = fixture(); + g.edges.push(edge("c", "a")); // closes a → b → c → a + g.cycles.push(cycle(&["a", "b", "c"], Some("b"))); + + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + }).unwrap(); + + // Cycle edges use thick arrow and pick up a linkStyle. + assert!(r.diagram.contains("==>"), "expected cycle edges to use ==>:\n{}", r.diagram); + assert!(r.diagram.contains("linkStyle"), "expected linkStyle for cycle edges"); + + // Pivot takes precedence over cycle — node b gets the pivot class. + assert!( + r.diagram.lines().any(|l| l.trim_start().starts_with("class N") && l.contains("pivot")), + "expected a class statement assigning pivot:\n{}", r.diagram + ); + // Non-pivot cycle members still get the cycle class. + assert!( + r.diagram.lines().any(|l| l.trim_start().starts_with("class N") && l.contains("cycle")), + "expected a class statement assigning cycle:\n{}", r.diagram + ); + } + + #[test] + fn dot_marks_cycle_edges_red() { + let mut g = fixture(); + g.edges.push(edge("c", "a")); + g.cycles.push(cycle(&["a", "b", "c"], None)); + + let r = render(&g, &RenderOptions { + format: DiagramFormat::Dot, + focus: None, + depth: 2, + max_nodes: 10, + }).unwrap(); + + // At least one cycle edge must carry the red colour and solid style. + let cycle_edge_line = r + .diagram + .lines() + .find(|l| l.contains("\"a\" -> \"b\"") || l.contains("\"b\" -> \"c\"") || l.contains("\"c\" -> \"a\"")) + .expect("cycle edge should be rendered"); + assert!(cycle_edge_line.contains("#cc0000"), "cycle edge missing red colour: {}", cycle_edge_line); + + // Non-cycle edges stay grey. + assert!(r.diagram.contains("#666666") || !g.edges.iter().any(|e| { + let members = ["a", "b", "c"]; + !members.contains(&e.source.as_str()) || !members.contains(&e.target.as_str()) + })); + } + + #[test] + fn mermaid_marks_layer_violations() { + let mut g = fixture(); + g.layer_violations.push(violation("a", "b", LayerViolationType::BackCall)); + g.layer_violations.push(violation("b", "c", LayerViolationType::SkipCall)); + + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + }).unwrap(); + + // Both violations use the dotted-violation arrow. + assert!(r.diagram.contains("-.->"), "expected dotted arrow for violations:\n{}", r.diagram); + // linkStyle distinguishes the two by colour. + assert!(r.diagram.contains("stroke:#c00"), "expected red stroke for BackCall"); + assert!(r.diagram.contains("stroke:#f90"), "expected orange stroke for SkipCall"); + } + + #[test] + fn dot_marks_layer_violations_with_style_and_colour() { + let mut g = fixture(); + g.layer_violations.push(violation("a", "b", LayerViolationType::BackCall)); + g.layer_violations.push(violation("b", "c", LayerViolationType::SkipCall)); + + let r = render(&g, &RenderOptions { + format: DiagramFormat::Dot, + focus: None, + depth: 2, + max_nodes: 10, + }).unwrap(); + + let back = r.diagram.lines().find(|l| l.contains("\"a\" -> \"b\"")).unwrap(); + assert!(back.contains("#cc0000"), "BackCall edge missing red: {}", back); + assert!(back.contains("style=dashed"), "BackCall edge missing dashed: {}", back); + + let skip = r.diagram.lines().find(|l| l.contains("\"b\" -> \"c\"")).unwrap(); + assert!(skip.contains("#ff9900"), "SkipCall edge missing orange: {}", skip); + assert!(skip.contains("style=dotted"), "SkipCall edge missing dotted: {}", skip); + } + + #[test] + fn dot_sizes_hot_nodes_and_applies_orange_border() { + let mut g = fixture(); + if let Some(n) = g.nodes.iter_mut().find(|n| n.module_id == "a") { + n.hotspot_score = Some(90.0); + } + + let r = render(&g, &RenderOptions { + format: DiagramFormat::Dot, + focus: None, + depth: 2, + max_nodes: 10, + }).unwrap(); + + let hot_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"a\" [")).unwrap(); + // width at score=90 ≈ 0.75 + 0.9 * 1.05 = 1.695 → formatted as 1.70 + assert!(hot_line.contains("width=1.70"), "hot node width wrong: {}", hot_line); + assert!(hot_line.contains("#ff6600"), "hot node missing orange border: {}", hot_line); + + // A cold node stays at default width. + let cold_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"b\" [")).unwrap(); + assert!(cold_line.contains("width=0.75"), "cold node width wrong: {}", cold_line); + } + + #[test] + fn mermaid_marks_hot_nodes_with_class() { + let mut g = fixture(); + if let Some(n) = g.nodes.iter_mut().find(|n| n.module_id == "a") { + n.hotspot_score = Some(90.0); + } + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + }).unwrap(); + + // `a` should get a class statement including `hot`. + assert!( + r.diagram.lines().any(|l| l.trim_start().starts_with("class N") && l.contains("hot")), + "expected class statement assigning hot:\n{}", r.diagram + ); + } + + #[test] + fn cycle_border_takes_precedence_over_hot_border_in_dot() { + // A node that's both hot and in a cycle wears the cycle red border, + // not the hot orange border — architectural signal wins. + let mut g = fixture(); + g.edges.push(edge("c", "a")); + g.cycles.push(cycle(&["a", "b", "c"], None)); + if let Some(n) = g.nodes.iter_mut().find(|n| n.module_id == "a") { + n.hotspot_score = Some(95.0); + } + + let r = render(&g, &RenderOptions { + format: DiagramFormat::Dot, + focus: None, + depth: 2, + max_nodes: 10, + }).unwrap(); + + let a_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"a\" [")).unwrap(); + // Expect the cycle red colour, not the hot orange. + assert!(a_line.contains("color=\"#cc0000\""), "expected cycle red border: {}", a_line); + assert!(!a_line.contains("color=\"#ff6600\""), "hot border should not win over cycle: {}", a_line); + } + + #[test] + fn overlays_respect_max_nodes_truncation() { + // Cycle spans a,b,c but max_nodes=2 cuts the graph — the renderer must + // not reference excluded nodes in linkStyle / class statements. + let mut g = fixture(); + g.edges.push(edge("c", "a")); + g.cycles.push(cycle(&["a", "b", "c"], Some("c"))); + + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 2, + }).unwrap(); + assert!(r.truncated); + assert_eq!(r.node_count, 2); + + // No linkStyle index should exceed the count of emitted edges. + let edge_count = r.diagram.lines().filter(|l| { + l.contains(" --> ") || l.contains(" ==> ") || l.contains(" -.-> ") + }).count(); + for line in r.diagram.lines().filter(|l| l.trim_start().starts_with("linkStyle")) { + let idx: usize = line.split_whitespace().nth(1).unwrap().parse().unwrap(); + assert!(idx < edge_count, "linkStyle {} refers to an edge that wasn't emitted", idx); + } } } From 111e761a9c5a2c3cb2fe9c1a3baf3701b5f264f9 Mon Sep 17 00:00:00 2001 From: Lisa Date: Tue, 21 Apr 2026 17:43:12 +0200 Subject: [PATCH 09/20] chore(vendor): full sync of Cartographer + add sync-cartographer.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vendored Cartographer tree had drifted across 10 .rs files plus Cargo.toml — diagram.rs alone was 391 lines behind. Prior syncs were done file-by-file and kept missing things. Full sync to upstream 3.0.0 closes the gap. FFI surface is purely additive (cartographer_doc_index, cartographer_doc_context, cartographer_query_docs) so CKB's existing Go bindings still link unchanged; the new FFI entry points can get Go bindings in a follow-up when we want to expose doc-graph features through MCP. Upstream's GraphEdge now carries an at_range: Option field (LIP source position for doc backtick refs) so the vendor-local at_range patch on diagram.rs tests is no longer needed. scripts/sync-cartographer.sh is now the supported path for future syncs — explicit path list rsync from an upstream checkout, so we catch every file the next time upstream changes instead of discovering drift at rebuild time. No local patches are applied; the script will fail loudly if one ever becomes necessary so it's obvious at review time. Verified: all 74 upstream cargo tests pass on the vendored copy; make build-cartographer + go test -tags cartographer link cleanly and the bridge-centrality test passes. Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 11 + scripts/sync-cartographer.sh | 66 + .../mapper-core/cartographer/Cargo.toml | 2 +- .../cartographer/include/cartographer.h | 70 + .../mapper-core/cartographer/src/api.rs | 172 ++- .../mapper-core/cartographer/src/diagram.rs | 6 +- .../mapper-core/cartographer/src/extractor.rs | 71 +- .../mapper-core/cartographer/src/lib.rs | 331 ++++- .../mapper-core/cartographer/src/main.rs | 197 ++- .../mapper-core/cartographer/src/mapper.rs | 1210 ++++++++++++++++- .../mapper-core/cartographer/src/mcp.rs | 759 ++++++++++- .../mapper-core/cartographer/src/scanner.rs | 1 + .../mapper-core/cartographer/src/search.rs | 180 +-- .../cartographer/src/token_metrics.rs | 44 + 14 files changed, 2834 insertions(+), 286 deletions(-) create mode 100755 scripts/sync-cartographer.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 59229354..110c6003 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,17 @@ All notable changes to CKB will be documented in this file. ### Added +- **Vendored Cartographer fully synced to upstream 3.0.0** — the + vendored tree under `third_party/cartographer/mapper-core/cartographer/` + was 391 lines behind on `diagram.rs` alone, and 10 `.rs` files plus + `Cargo.toml` had drifted. Full sync brings in doc-node graph support + (`cartographer_doc_index`, `cartographer_doc_context`, `cartographer_query_docs` + FFI entry points — Go bindings can be added as a follow-up), + LIP-style `Range` / `at_range` on `GraphEdge`, PascalCase bare-identifier + resolution for doc backtick refs, and the overlays feature on diagrams. + New `scripts/sync-cartographer.sh` is now the supported path for future + syncs — rsync-based, explicit path list, emits next-step commands. No + local patches needed against upstream. - **Diagram overlays in `renderArchitecture` / `ckb diagram`** — the vendored `diagram.rs` was synced from upstream Cartographer, so the Mermaid/DOT output now decorates the base import graph with diff --git a/scripts/sync-cartographer.sh b/scripts/sync-cartographer.sh new file mode 100755 index 00000000..24865778 --- /dev/null +++ b/scripts/sync-cartographer.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# +# Sync the vendored Cartographer source tree at +# third_party/cartographer/mapper-core/cartographer/ from an upstream checkout. +# +# Why vendor at all: CKB links libcartographer.a at build time. Shipping the +# Rust source alongside Go keeps reproducible builds (no network fetch, no +# version-skew between Go code and FFI surface) and lets CI build the archive +# from a pinned snapshot. +# +# Why a script: manual file-by-file copies drift silently — the vendored tree +# was 391 lines behind upstream diagram.rs when the overlays feature landed, +# and nobody noticed until rebuild. This script is the only supported path +# and should be rerun whenever upstream cuts a release worth pulling. +# +# Usage: +# scripts/sync-cartographer.sh +# +# Example: +# scripts/sync-cartographer.sh ../../../Cartographer +# +# After running: inspect `git diff`, then rebuild: +# make build-cartographer +# go test -tags cartographer ./internal/query/... + +set -euo pipefail + +UPSTREAM="${1:?usage: $0 }" +UPSTREAM_CART="$UPSTREAM/mapper-core/cartographer" + +if [[ ! -d "$UPSTREAM_CART" ]]; then + echo "error: $UPSTREAM_CART not found — pass the Cartographer repo root" >&2 + exit 1 +fi + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +CKB_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +VENDOR="$CKB_ROOT/third_party/cartographer/mapper-core/cartographer" + +if [[ ! -d "$VENDOR" ]]; then + echo "error: vendor tree not found: $VENDOR" >&2 + exit 1 +fi + +echo "syncing $UPSTREAM_CART → $VENDOR" + +# Rsync the Rust source + build config + FFI header. We explicitly list the +# paths to sync rather than mirroring the whole tree so we never pull in +# build artifacts (target/), IDE files, or editor scratch files. If upstream +# adds new top-level items (e.g. a new subcrate), add them here deliberately. +rsync -a --delete "$UPSTREAM_CART/src/" "$VENDOR/src/" +rsync -a --delete "$UPSTREAM_CART/include/" "$VENDOR/include/" +rsync -a --delete "$UPSTREAM_CART/scripts/" "$VENDOR/scripts/" +cp "$UPSTREAM_CART/Cargo.toml" "$VENDOR/Cargo.toml" +cp "$UPSTREAM_CART/Cargo.lock" "$VENDOR/Cargo.lock" +cp "$UPSTREAM_CART/build.rs" "$VENDOR/build.rs" +cp "$UPSTREAM_CART/cbindgen.toml" "$VENDOR/cbindgen.toml" + +# No local patches known at this time. If a local patch ever becomes +# necessary (e.g. upstream depends on a private crate we can't vendor), +# reapply it here AFTER the rsync and document WHY inline. + +echo "done. next steps:" +echo " 1. review: git -C $CKB_ROOT diff third_party/cartographer/" +echo " 2. build: cd $CKB_ROOT && make build-cartographer" +echo " 3. test: cd $CKB_ROOT && go test -tags cartographer ./internal/query/..." diff --git a/third_party/cartographer/mapper-core/cartographer/Cargo.toml b/third_party/cartographer/mapper-core/cartographer/Cargo.toml index 3a004f87..92d8dede 100644 --- a/third_party/cartographer/mapper-core/cartographer/Cargo.toml +++ b/third_party/cartographer/mapper-core/cartographer/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cartographer" -version = "2.4.0" +version = "3.0.0" edition = "2021" description = "Code Cartographer for Architectural Intelligence" authors = ["SimplyLiz"] diff --git a/third_party/cartographer/mapper-core/cartographer/include/cartographer.h b/third_party/cartographer/mapper-core/cartographer/include/cartographer.h index cbc39e7d..6ddc8df3 100644 --- a/third_party/cartographer/mapper-core/cartographer/include/cartographer.h +++ b/third_party/cartographer/mapper-core/cartographer/include/cartographer.h @@ -641,6 +641,76 @@ char *cartographer_query_context(const char *path, const char *query, const char */ char *cartographer_shotgun_surgery(const char *path, uint32_t limit, uint32_t min_partners); +/** + * Return all document-type nodes from the project graph. + * + * Input: `path` — project root (C string) + * + * Response shape: + * ```json + * { + * "ok": true, + * "data": [ + * { + * "path": "docs/architecture.md", + * "module_id": "docs/architecture.md", + * "signatures": ["# Architecture", "## Overview"], + * "imports": ["src/api.rs"], + * "edge_count": 3 + * } + * ] + * } + * ``` + */ +char *cartographer_doc_index(const char *path); + +/** + * Return a single document's structure plus skeletons of referenced code files. + * + * Inputs: + * `path` — project root (C string) + * `doc_path` — relative path to the document (C string) + * `budget` — max tokens for referenced code (0 → 4000) + * + * Response shape: + * ```json + * { + * "ok": true, + * "data": { + * "doc": { "path": "...", "moduleId": "...", "signatures": [...], "imports": [...] }, + * "referencedFiles": [{ "path": "...", "rank": 0.05, "signatures": [...] }], + * "totalTokens": 2100 + * } + * } + * ``` + */ +char *cartographer_doc_context(const char *path, const char *doc_path, uint32_t budget); + +/** + * Doc-biased context retrieval: search docs first, follow cross-refs into code. + * + * Inputs: + * `path` — project root (C string) + * `query` — natural language query (C string) + * `opts_json` — optional JSON: `{ "budget": 8000, "model": "claude" }` + * + * Response shape: + * ```json + * { + * "ok": true, + * "data": { + * "context": "## Doc Context for: ...\n\n...", + * "docFiles": [...], + * "codeFiles": [...], + * "focusDocs": ["docs/setup.md"], + * "totalTokens": 5200, + * "health": { "score": 81.0, "grade": "B", ... } + * } + * } + * ``` + */ +char *cartographer_query_docs(const char *path, const char *query, const char *opts_json); + /** * Render the project's import graph as a Mermaid or Graphviz (DOT) diagram. * diff --git a/third_party/cartographer/mapper-core/cartographer/src/api.rs b/third_party/cartographer/mapper-core/cartographer/src/api.rs index 9e2c77e2..b32d213b 100644 --- a/third_party/cartographer/mapper-core/cartographer/src/api.rs +++ b/third_party/cartographer/mapper-core/cartographer/src/api.rs @@ -120,11 +120,21 @@ pub struct GraphNode { pub cochange_entropy: Option, } +/// A source position range using LIP semantics: line is 0-based, char is UTF-8 byte offset from line start. +#[derive(Debug, Clone, Serialize)] +pub struct Range { + pub start_line: usize, + pub start_char: usize, + pub end_line: usize, + pub end_char: usize, +} + #[derive(Debug, Clone, Serialize)] pub struct GraphEdge { pub source: String, pub target: String, pub edge_type: String, + pub at_range: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -456,6 +466,7 @@ impl ApiState { source: module_id.clone(), target, edge_type: "import".to_string(), + at_range: None, }); } } @@ -895,6 +906,13 @@ fn parse_import_parts(import: &str) -> (String, Option) { // Fallback: last token let last = raw.split_whitespace().last().unwrap_or(raw); let last = last.trim_matches('"').trim_matches('\'').trim_end_matches(';'); + // Bare PascalCase identifier (e.g. from doc backtick refs) → set as symbol hint + // so resolve_import_target can match it against symbol definitions. + if !last.contains('/') && !last.contains('.') && last.len() >= 4 + && last.chars().next().map(|c| c.is_uppercase()).unwrap_or(false) + { + return (last.to_string(), Some(last.to_string())); + } (last.to_string(), None) } @@ -915,15 +933,18 @@ fn extract_js_import_symbol(lhs: &str) -> Option { /// Return the last meaningful path component to use as a file-stem candidate. fn derive_module_stem(module_path: &str) -> String { - module_path + let last = module_path .split('/') .filter(|s| !s.is_empty() && *s != "." && *s != "..") .last() .unwrap_or(module_path) - .trim_start_matches('@') // strip npm scope prefix - .split('-') // treat kebab-case first word as stem - .next() - .unwrap_or("") + .trim_start_matches('@'); // strip npm scope prefix + let kebab_first = last.split('-').next().unwrap_or(last); // treat kebab-case first word as stem + // Strip file extension so doc-style imports ("scanner.rs", "api/search.md") resolve correctly + Path::new(kebab_first) + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or(kebab_first) .to_string() } @@ -961,6 +982,67 @@ fn is_test_path(path: &str) -> bool { || lower.ends_with("_test.go") } +// --------------------------------------------------------------------------- +// Document helpers +// --------------------------------------------------------------------------- + +/// File extensions treated as "documents" (non-code) for doc-oriented tools. +pub const DOC_EXTENSIONS: &[&str] = &["md", "markdown", "yaml", "yml", "toml", "json"]; + +pub fn is_doc_path(path: &str) -> bool { + path.rsplit('.') + .next() + .map(|ext| DOC_EXTENSIONS.contains(&ext)) + .unwrap_or(false) +} + +/// Summary of a document node in the project graph. +#[derive(Debug, Clone, Serialize)] +pub struct DocNode { + pub path: String, + pub module_id: String, + pub signatures: Vec, + pub imports: Vec, + pub edge_count: usize, +} + +impl ApiState { + /// Return all document-type nodes from the project graph. + pub fn doc_nodes(&self) -> Result, String> { + let graph = self.rebuild_graph()?; + let files = self.mapped_files.lock().map_err(|e| e.to_string())?; + + let mut docs = Vec::new(); + for node in &graph.nodes { + if !is_doc_path(&node.path) { + continue; + } + let edge_count = graph.edges.iter() + .filter(|e| e.source == node.module_id || e.target == node.module_id) + .count(); + + let (sigs, imports) = files.get(&node.module_id) + .map(|mf| ( + mf.signatures.iter().map(|s| s.raw.clone()).collect(), + mf.imports.clone(), + )) + .unwrap_or_default(); + + docs.push(DocNode { + path: node.path.clone(), + module_id: node.module_id.clone(), + signatures: sigs, + imports, + edge_count, + }); + } + + // Sort: most connected docs first + docs.sort_by(|a, b| b.edge_count.cmp(&a.edge_count)); + Ok(docs) + } +} + struct BridgeAnalysis { is_bridge: bool, bridge_score: f64, @@ -1145,6 +1227,15 @@ impl ApiState { return Some(module_id.clone()); } + // 1b. Path-suffix match for relative doc links ("api/search.md" → "docs/api/search.md"). + // Checked before the loose segment match to return an unambiguous result. + if norm_path.contains('/') || norm_path.contains('.') { + let suffix = format!("/{}", norm_path.trim_start_matches('/')); + if file.path.ends_with(&suffix) { + return Some(module_id.clone()); + } + } + // 2. Path segment: file path contains the module stem as a component if segment_match.is_none() && stem.len() >= 3 { let file_lower = file.path.to_lowercase(); @@ -1432,7 +1523,6 @@ impl ApiState { pub fn get_evolution(&self, days: Option) -> Result { let current_graph = self.rebuild_graph()?; - let current_health = current_graph.metadata.health_score.unwrap_or(100.0); let days = days.unwrap_or(30); @@ -1441,7 +1531,7 @@ impl ApiState { .unwrap_or_default() .as_secs(); - let mut snapshots = vec![ArchitectureSnapshot { + let current_snapshot = ArchitectureSnapshot { timestamp: now, health_score: current_health, total_files: current_graph.metadata.total_files, @@ -1456,16 +1546,49 @@ impl ApiState { .iter() .max_by_key(|(_, v)| *v) .map(|(k, _)| k.clone()), - }]; - - // Trend requires multiple snapshots; this reflects current state only. - // Historical tracking is not yet implemented, so `days` has no effect. - let health_trend = if current_health >= 80.0 { - "Healthy".to_string() - } else if current_health >= 60.0 { - "Moderate".to_string() + }; + + // ── Persist snapshot to history file ────────────────────────────────── + let history_path = self.root_path.join(".cartographer_history.json"); + let mut all_snapshots: Vec = + std::fs::read_to_string(&history_path) + .ok() + .and_then(|s| serde_json::from_str(&s).ok()) + .unwrap_or_default(); + all_snapshots.push(current_snapshot); + // Cap history to last 365 snapshots to prevent unbounded growth + if all_snapshots.len() > 365 { + let drain_count = all_snapshots.len() - 365; + all_snapshots.drain(0..drain_count); + } + if let Ok(json) = serde_json::to_string(&all_snapshots) { + let _ = std::fs::write(&history_path, json); + } + + // ── Filter to requested window ──────────────────────────────────────── + let since_epoch = now.saturating_sub(days as u64 * 86_400); + let snapshots: Vec = all_snapshots + .into_iter() + .filter(|s| s.timestamp >= since_epoch) + .collect(); + + // ── Compute trend from first vs last snapshot ───────────────────────── + let health_trend = if snapshots.len() >= 2 { + let first = snapshots.first().unwrap().health_score; + let last = snapshots.last().unwrap().health_score; + let delta = last - first; + if delta > 5.0 { + "Improving".to_string() + } else if delta < -5.0 { + "Degrading".to_string() + } else { + "Stable".to_string() + } } else { - "At Risk".to_string() + // Single snapshot — classify by absolute score + if current_health >= 80.0 { "Healthy".to_string() } + else if current_health >= 60.0 { "Moderate".to_string() } + else { "At Risk".to_string() } }; let mut debt_indicators = Vec::new(); @@ -1493,12 +1616,10 @@ impl ApiState { recommendations.push("Priority: Break circular dependencies".to_string()); } if current_graph.metadata.god_module_count.unwrap_or(0) > 2 { - recommendations - .push("Consider splitting large modules to improve cohesion".to_string()); + recommendations.push("Consider splitting large modules to improve cohesion".to_string()); } if recommendations.is_empty() { - recommendations - .push("Architecture is healthy - maintain current practices".to_string()); + recommendations.push("Architecture is healthy - maintain current practices".to_string()); } Ok(ArchitectureEvolution { @@ -1535,4 +1656,15 @@ mod tests { let level = CompressionLevel::default(); assert_eq!(level, CompressionLevel::Standard); } + + #[test] + fn derive_module_stem_strips_extension() { + assert_eq!(derive_module_stem("scanner.rs"), "scanner"); + assert_eq!(derive_module_stem("api/search.md"), "search"); + assert_eq!(derive_module_stem("config.yaml"), "config"); + // Normal code imports (no extension) unchanged + assert_eq!(derive_module_stem("scanner"), "scanner"); + assert_eq!(derive_module_stem("react-dom"), "react"); + assert_eq!(derive_module_stem("src/api/handler"), "handler"); + } } diff --git a/third_party/cartographer/mapper-core/cartographer/src/diagram.rs b/third_party/cartographer/mapper-core/cartographer/src/diagram.rs index 53bc80b6..54b3f876 100644 --- a/third_party/cartographer/mapper-core/cartographer/src/diagram.rs +++ b/third_party/cartographer/mapper-core/cartographer/src/diagram.rs @@ -450,15 +450,11 @@ mod tests { } fn edge(src: &str, tgt: &str) -> GraphEdge { - // NOTE: upstream Cartographer's GraphEdge has an `at_range: Option` - // field (LIP-style source position for doc references). This vendored copy - // predates that field — when syncing diagram.rs, strip that one line to - // stay compatible with the vendored api.rs. The overlays feature itself - // doesn't touch at_range, so the drop is test-only. GraphEdge { source: src.into(), target: tgt.into(), edge_type: "import".into(), + at_range: None, } } diff --git a/third_party/cartographer/mapper-core/cartographer/src/extractor.rs b/third_party/cartographer/mapper-core/cartographer/src/extractor.rs index 8ea126e0..171ce327 100644 --- a/third_party/cartographer/mapper-core/cartographer/src/extractor.rs +++ b/third_party/cartographer/mapper-core/cartographer/src/extractor.rs @@ -206,16 +206,21 @@ fn lip_uri(path: &Path, qualified: &str) -> String { feature = "lang-c", feature = "lang-cpp", ))] fn make_sig( - raw: String, kind: SymbolKind, line: usize, path: &Path, + raw: String, kind: SymbolKind, node: &Node, path: &Path, name: &str, qualified: &str, doc: Option, ) -> Signature { + let sp = node.start_position(); + let ep = node.end_position(); Signature { raw, ckb_id: Some(lip_uri(path, qualified)), symbol_name: Some(name.to_string()), qualified_name: Some(qualified.to_string()), kind, - line_start: line, + line_start: sp.row, + col_start: sp.column, + line_end: ep.row, + col_end: ep.column, confidence: CONFIDENCE_TS, doc_comment: doc, } @@ -291,7 +296,7 @@ fn walk_rust(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, sc .unwrap_or_default(); let raw = sig_up_to_block(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Interface, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::Interface, node, path, &name, &name, doc)); scope.push(name); if let Some(body) = node.child_by_field_name("body") { let mut cur = body.walk(); @@ -318,7 +323,7 @@ fn walk_rust(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, sc let kind = if scope.is_empty() { SymbolKind::Function } else { SymbolKind::Method }; let raw = sig_up_to_block(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, kind, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, kind, node, path, &name, &qualified, doc)); } "struct_item" => { let name = node.child_by_field_name("name") @@ -328,7 +333,7 @@ fn walk_rust(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, sc let raw = first_line(node, src); let doc = preceding_doc_comment(node, src); let qualified = scope_qualify(scope, &name); - sigs.push(make_sig(raw, SymbolKind::Struct, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, SymbolKind::Struct, node, path, &name, &qualified, doc)); } "enum_item" => { let name = node.child_by_field_name("name") @@ -338,7 +343,7 @@ fn walk_rust(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, sc let raw = first_line(node, src); let doc = preceding_doc_comment(node, src); let qualified = scope_qualify(scope, &name); - sigs.push(make_sig(raw, SymbolKind::Enum, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, SymbolKind::Enum, node, path, &name, &qualified, doc)); } "type_item" => { let name = node.child_by_field_name("name") @@ -347,7 +352,7 @@ fn walk_rust(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, sc if name.is_empty() { return; } let raw = node_text(node, src).split_whitespace().collect::>().join(" "); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::TypeAlias, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::TypeAlias, node, path, &name, &name, doc)); } "const_item" | "static_item" => { let name = node.child_by_field_name("name") @@ -366,7 +371,7 @@ fn walk_rust(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, sc } let raw = node_text(node, src).split_whitespace().collect::>().join(" "); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Variable, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::Variable, node, path, &name, &name, doc)); } "macro_definition" => { let name = { @@ -380,7 +385,7 @@ fn walk_rust(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, sc if name.is_empty() { return; } let raw = format!("macro_rules! {}", name); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Macro, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::Macro, node, path, &name, &name, doc)); } "mod_item" => { let name = node.child_by_field_name("name") @@ -389,7 +394,7 @@ fn walk_rust(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, sc if name.is_empty() { return; } let doc = preceding_doc_comment(node, src); let raw = format!("mod {}", name); - sigs.push(make_sig(raw, SymbolKind::Namespace, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::Namespace, node, path, &name, &name, doc)); if let Some(body) = node.child_by_field_name("body") { scope.push(name); let mut cur = body.walk(); @@ -475,7 +480,7 @@ fn walk_go(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec) { if name.is_empty() { return; } let raw = sig_up_to_block(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Function, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::Function, node, path, &name, &name, doc)); } "method_declaration" => { let name = node.child_by_field_name("name") @@ -499,7 +504,7 @@ fn walk_go(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec) { let qualified = if receiver_type.is_empty() { name.clone() } else { format!("{}.{}", receiver_type, name) }; let raw = sig_up_to_block(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Method, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, SymbolKind::Method, node, path, &name, &qualified, doc)); } "type_declaration" => { let mut cur = node.walk(); @@ -516,7 +521,7 @@ fn walk_go(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec) { }; let raw = first_line(&child, src); let doc = preceding_doc_comment(&child, src); - sigs.push(make_sig(raw, kind, child.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, kind, &child, path, &name, &name, doc)); } } } @@ -536,7 +541,7 @@ fn walk_go(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec) { if name.is_empty() { continue; } let raw = node_text(&child, src).split_whitespace().collect::>().join(" "); let doc = preceding_doc_comment(&child, src); - sigs.push(make_sig(raw, SymbolKind::Variable, child.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::Variable, &child, path, &name, &name, doc)); } } } @@ -611,7 +616,7 @@ fn walk_python(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, let kind = if scope.is_empty() { SymbolKind::Function } else { SymbolKind::Method }; let raw = sig_up_to_colon(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, kind, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, kind, node, path, &name, &qualified, doc)); } "class_definition" => { let name = node.child_by_field_name("name") @@ -620,7 +625,7 @@ fn walk_python(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, if name.is_empty() { return; } let raw = sig_up_to_colon(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Class, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::Class, node, path, &name, &name, doc)); scope.push(name); if let Some(body) = node.child_by_field_name("body") { let mut cur = body.walk(); @@ -644,7 +649,7 @@ fn walk_python(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, .unwrap_or_default(); if !name.is_empty() && name.chars().all(|c| c.is_uppercase() || c == '_' || c.is_numeric()) { let raw = first_line(node, src); - sigs.push(make_sig(raw, SymbolKind::Variable, node.start_position().row, path, &name, &name, None)); + sigs.push(make_sig(raw, SymbolKind::Variable, node, path, &name, &name, None)); } } } @@ -737,7 +742,7 @@ fn walk_ts(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, scop let kind = if scope.is_empty() { SymbolKind::Function } else { SymbolKind::Method }; let raw = sig_up_to_block(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, kind, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, kind, node, path, &name, &qualified, doc)); } "class_declaration" | "abstract_class_declaration" | "class" => { let name = node.child_by_field_name("name") @@ -746,7 +751,7 @@ fn walk_ts(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, scop if name.is_empty() { return; } let raw = sig_up_to_block(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Class, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::Class, node, path, &name, &name, doc)); scope.push(name); if let Some(body) = node.child_by_field_name("body") { let mut cur = body.walk(); @@ -765,7 +770,7 @@ fn walk_ts(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, scop let qualified = scope_qualify(scope, &name); let raw = sig_up_to_block(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Method, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, SymbolKind::Method, node, path, &name, &qualified, doc)); } "interface_declaration" => { let name = node.child_by_field_name("name") @@ -774,7 +779,7 @@ fn walk_ts(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, scop if name.is_empty() { return; } let raw = sig_up_to_block(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Interface, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::Interface, node, path, &name, &name, doc)); } "type_alias_declaration" => { let name = node.child_by_field_name("name") @@ -783,7 +788,7 @@ fn walk_ts(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, scop if name.is_empty() { return; } let raw = first_line(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::TypeAlias, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::TypeAlias, node, path, &name, &name, doc)); } "enum_declaration" => { let name = node.child_by_field_name("name") @@ -792,7 +797,7 @@ fn walk_ts(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, scop if name.is_empty() { return; } let raw = first_line(node, src); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Enum, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::Enum, node, path, &name, &name, doc)); } "export_statement" | "export_clause" => { let mut cur = node.walk(); @@ -819,7 +824,7 @@ fn walk_ts(node: &Node, src: &[u8], path: &Path, sigs: &mut Vec, scop let raw = format!("const {} = {}", name, sig_up_to_block(&val, src)); let qualified = scope_qualify(scope, &name); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Function, decl.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, SymbolKind::Function, &decl, path, &name, &qualified, doc)); } } _ => { @@ -903,7 +908,7 @@ fn walk_c_cpp( let raw = sig_up_to_block(node, src); let doc = preceding_doc_comment(node, src); let kind = if scope.is_empty() { SymbolKind::Function } else { SymbolKind::Method }; - sigs.push(make_sig(raw, kind, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, kind, node, path, &name, &qualified, doc)); } } // Don't recurse into the body — we don't want nested functions @@ -918,7 +923,7 @@ fn walk_c_cpp( if !name.is_empty() { let raw = node_text(node, src).split_whitespace().collect::>().join(" "); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::Function, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, SymbolKind::Function, node, path, &name, &qualified, doc)); } } } @@ -933,7 +938,7 @@ fn walk_c_cpp( let raw = first_line(node, src); let doc = preceding_doc_comment(node, src); let qualified = scope_qualify(scope, &name); - sigs.push(make_sig(raw, SymbolKind::Struct, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, SymbolKind::Struct, node, path, &name, &qualified, doc)); } } // Still walk the body for nested types @@ -947,7 +952,7 @@ fn walk_c_cpp( let raw = first_line(node, src); let doc = preceding_doc_comment(node, src); let qualified = scope_qualify(scope, &name); - sigs.push(make_sig(raw, SymbolKind::Enum, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, SymbolKind::Enum, node, path, &name, &qualified, doc)); } } return; @@ -971,7 +976,7 @@ fn walk_c_cpp( if !name.is_empty() { let raw = node_text(node, src).split_whitespace().collect::>().join(" "); let doc = preceding_doc_comment(node, src); - sigs.push(make_sig(raw, SymbolKind::TypeAlias, node.start_position().row, path, &name, &name, doc)); + sigs.push(make_sig(raw, SymbolKind::TypeAlias, node, path, &name, &name, doc)); } return; } @@ -982,7 +987,7 @@ fn walk_c_cpp( let name = node_text(&name_node, src).to_string(); if !name.is_empty() { let raw = first_line(node, src); - sigs.push(make_sig(raw, SymbolKind::Variable, node.start_position().row, path, &name, &name, None)); + sigs.push(make_sig(raw, SymbolKind::Variable, node, path, &name, &name, None)); } } return; @@ -992,7 +997,7 @@ fn walk_c_cpp( let name = node_text(&name_node, src).to_string(); if !name.is_empty() { let raw = first_line(node, src); - sigs.push(make_sig(raw, SymbolKind::Macro, node.start_position().row, path, &name, &name, None)); + sigs.push(make_sig(raw, SymbolKind::Macro, node, path, &name, &name, None)); } } return; @@ -1007,7 +1012,7 @@ fn walk_c_cpp( let raw = first_line(node, src); let doc = preceding_doc_comment(node, src); let qualified = scope_qualify(scope, &name); - sigs.push(make_sig(raw, SymbolKind::Class, node.start_position().row, path, &name, &qualified, doc)); + sigs.push(make_sig(raw, SymbolKind::Class, node, path, &name, &qualified, doc)); // Walk class body for inline method definitions if let Some(body) = node.child_by_field_name("body") { scope.push(name); @@ -1031,7 +1036,7 @@ fn walk_c_cpp( let doc = preceding_doc_comment(node, src); sigs.push(make_sig( format!("namespace {}", name), SymbolKind::Namespace, - node.start_position().row, path, &name, &name, doc, + node, path, &name, &name, doc, )); scope.push(name); } diff --git a/third_party/cartographer/mapper-core/cartographer/src/lib.rs b/third_party/cartographer/mapper-core/cartographer/src/lib.rs index 437ac907..666db5d1 100644 --- a/third_party/cartographer/mapper-core/cartographer/src/lib.rs +++ b/third_party/cartographer/mapper-core/cartographer/src/lib.rs @@ -128,7 +128,7 @@ fn save_cache(root: &Path, head: &str, files: &HashMap) { // build_mapped_files: parallel scan + optional cache // --------------------------------------------------------------------------- -fn build_mapped_files(root: &Path) -> Result, String> { +pub(crate) fn build_mapped_files(root: &Path) -> Result, String> { // Check persistent cache first let head = git_head(root); if let Some(cached) = load_cache(root, &head) { @@ -1936,6 +1936,335 @@ pub extern "C" fn cartographer_shotgun_surgery( result_to_json_ptr(Ok::<_, String>(entries)) } +// --------------------------------------------------------------------------- +// FFI: Doc Index +// --------------------------------------------------------------------------- + +/// Return all document-type nodes from the project graph. +/// +/// Input: `path` — project root (C string) +/// +/// Response shape: +/// ```json +/// { +/// "ok": true, +/// "data": [ +/// { +/// "path": "docs/architecture.md", +/// "module_id": "docs/architecture.md", +/// "signatures": ["# Architecture", "## Overview"], +/// "imports": ["src/api.rs"], +/// "edge_count": 3 +/// } +/// ] +/// } +/// ``` +#[no_mangle] +pub extern "C" fn cartographer_doc_index(path: *const c_char) -> *mut c_char { + let path = match c_str_to_path(path) { + Ok(p) => p, + Err(e) => return result_to_json_ptr::(Err(e)), + }; + + let mapped_files = match build_mapped_files(&path) { + Ok(m) => m, + Err(e) => return result_to_json_ptr::(Err(e)), + }; + let state = ApiState::new(path.clone()); + { + let mut files = state.mapped_files.lock().unwrap(); + *files = mapped_files; + } + + let result = state.doc_nodes(); + result_to_json_ptr(result) +} + +// --------------------------------------------------------------------------- +// FFI: Doc Context +// --------------------------------------------------------------------------- + +/// Return a single document's structure plus skeletons of referenced code files. +/// +/// Inputs: +/// `path` — project root (C string) +/// `doc_path` — relative path to the document (C string) +/// `budget` — max tokens for referenced code (0 → 4000) +/// +/// Response shape: +/// ```json +/// { +/// "ok": true, +/// "data": { +/// "doc": { "path": "...", "moduleId": "...", "signatures": [...], "imports": [...] }, +/// "referencedFiles": [{ "path": "...", "rank": 0.05, "signatures": [...] }], +/// "totalTokens": 2100 +/// } +/// } +/// ``` +#[no_mangle] +pub extern "C" fn cartographer_doc_context( + path: *const c_char, + doc_path: *const c_char, + budget: u32, +) -> *mut c_char { + let path = match c_str_to_path(path) { + Ok(p) => p, + Err(e) => return result_to_json_ptr::(Err(e)), + }; + if doc_path.is_null() { + return result_to_json_ptr::(Err("null doc_path".into())); + } + let doc_path_str = unsafe { + match CStr::from_ptr(doc_path).to_str() { + Ok(s) => s.to_string(), + Err(e) => return result_to_json_ptr::(Err(e.to_string())), + } + }; + let budget = if budget == 0 { 4000 } else { budget as usize }; + + let mapped_files = match build_mapped_files(&path) { + Ok(m) => m, + Err(e) => return result_to_json_ptr::(Err(e)), + }; + let state = ApiState::new(path.clone()); + { + let mut files = state.mapped_files.lock().unwrap(); + *files = mapped_files; + } + + if let Err(e) = state.rebuild_graph() { + return result_to_json_ptr::(Err(e)); + } + + // Find the doc in mapped_files (exact match or substring) + let (module_id, doc_sigs, doc_imports, doc_path_owned) = { + let files = state.mapped_files.lock().unwrap(); + match files.iter() + .find(|(_, f)| f.path == doc_path_str || f.path.contains(&doc_path_str)) + { + Some((mid, mf)) => ( + mid.clone(), + mf.signatures.iter().map(|s| s.raw.clone()).collect::>(), + mf.imports.clone(), + mf.path.clone(), + ), + None => return result_to_json_ptr::( + Err(format!("Document not found: {}", doc_path_str)), + ), + } + }; + + // Use doc's imports as focus for ranked skeleton + let ranked = if doc_imports.is_empty() { + vec![] + } else { + state.ranked_skeleton(&doc_imports, budget).unwrap_or_default() + }; + + let total_tokens: usize = ranked.iter().map(|f| f.estimated_tokens).sum(); + + let referenced: Vec = ranked.iter().map(|f| { + serde_json::json!({ + "path": f.path, + "rank": f.rank, + "signatureCount": f.signature_count, + "estimatedTokens": f.estimated_tokens, + "signatures": f.signatures, + }) + }).collect(); + + let data = serde_json::json!({ + "doc": { + "path": doc_path_owned, + "moduleId": module_id, + "signatures": doc_sigs, + "imports": doc_imports, + }, + "referencedFiles": referenced, + "totalTokens": total_tokens, + }); + + result_to_json_ptr::(Ok(data)) +} + +// --------------------------------------------------------------------------- +// FFI: Query Docs (doc-biased context retrieval) +// --------------------------------------------------------------------------- + +/// Doc-biased context retrieval: search docs first, follow cross-refs into code. +/// +/// Inputs: +/// `path` — project root (C string) +/// `query` — natural language query (C string) +/// `opts_json` — optional JSON: `{ "budget": 8000, "model": "claude" }` +/// +/// Response shape: +/// ```json +/// { +/// "ok": true, +/// "data": { +/// "context": "## Doc Context for: ...\n\n...", +/// "docFiles": [...], +/// "codeFiles": [...], +/// "focusDocs": ["docs/setup.md"], +/// "totalTokens": 5200, +/// "health": { "score": 81.0, "grade": "B", ... } +/// } +/// } +/// ``` +#[no_mangle] +pub extern "C" fn cartographer_query_docs( + path: *const c_char, + query: *const c_char, + opts_json: *const c_char, +) -> *mut c_char { + let path = match c_str_to_path(path) { + Ok(p) => p, + Err(e) => return result_to_json_ptr::(Err(e)), + }; + if query.is_null() { + return result_to_json_ptr::(Err("null query".into())); + } + let q = unsafe { + match CStr::from_ptr(query).to_str() { + Ok(s) => s.to_string(), + Err(e) => return result_to_json_ptr::(Err(e.to_string())), + } + }; + + #[derive(serde::Deserialize, Default)] + #[serde(rename_all = "camelCase")] + struct QueryDocsOpts { + budget: Option, + model: Option, + } + + let json_opts: QueryDocsOpts = if !opts_json.is_null() { + let raw = unsafe { + match CStr::from_ptr(opts_json).to_str() { + Ok(s) => s, + Err(e) => return result_to_json_ptr::(Err(e.to_string())), + } + }; + serde_json::from_str(raw).unwrap_or_default() + } else { + QueryDocsOpts::default() + }; + + let budget = json_opts.budget.unwrap_or(8000); + let model_str = json_opts.model.unwrap_or_else(|| "claude".to_string()); + + // Step 1: BM25 search across all files + let bm25_opts = search::BM25Options { max_results: 30, ..Default::default() }; + let bm25_result = search::bm25_search(&path, &q, &bm25_opts).unwrap_or_default(); + + // Step 2: Separate into doc files and code files + let mut doc_files: Vec = Vec::new(); + let mut code_files: Vec = Vec::new(); + let mut seen = std::collections::HashSet::new(); + + for m in &bm25_result.matches { + if !seen.insert(m.path.clone()) { continue; } + if api::is_doc_path(&m.path) { + doc_files.push(m.path.clone()); + } else { + code_files.push(m.path.clone()); + } + } + + // Step 3: Build graph + follow doc cross-refs into code + let mapped_files = match build_mapped_files(&path) { + Ok(m) => m, + Err(e) => return result_to_json_ptr::(Err(e)), + }; + let state = ApiState::new(path.clone()); + { let mut f = state.mapped_files.lock().unwrap(); *f = mapped_files; } + if let Err(e) = state.rebuild_graph() { + return result_to_json_ptr::(Err(e)); + } + + { + let files = state.mapped_files.lock().unwrap(); + for doc_path in &doc_files { + if let Some(mf) = files.get(doc_path.as_str()) { + for imp in &mf.imports { + if !seen.contains(imp) && !api::is_doc_path(imp) { + seen.insert(imp.clone()); + code_files.push(imp.clone()); + } + } + } + } + } + + // Step 4: Ranked skeleton — docs as primary focus, code as secondary + let mut all_focus = doc_files.clone(); + all_focus.extend(code_files.iter().cloned()); + all_focus.truncate(30); + + let ranked = state.ranked_skeleton(&all_focus, budget).unwrap_or_default(); + + // Step 5: Build context text — docs first, then code + let mut doc_entries = Vec::new(); + let mut code_entries = Vec::new(); + let mut context_text = format!("## Doc Context for: {}\n\n", q); + let mut total_tokens = 0usize; + + for f in &ranked { + let entry = serde_json::json!({ + "path": f.path, + "rank": f.rank, + "signatureCount": f.signature_count, + "estimatedTokens": f.estimated_tokens, + "signatures": f.signatures, + }); + total_tokens += f.estimated_tokens; + + if api::is_doc_path(&f.path) { + context_text.push_str(&format!( + "// [DOC] {} (rank: {:.4}, {} tokens)\n", f.path, f.rank, f.estimated_tokens + )); + doc_entries.push(entry); + } else { + context_text.push_str(&format!( + "// {} (rank: {:.4}, {} tokens)\n", f.path, f.rank, f.estimated_tokens + )); + code_entries.push(entry); + } + for sig in &f.signatures { + context_text.push_str(&format!(" {}\n", sig)); + } + context_text.push('\n'); + } + + // Step 6: Health score + let sig_count: usize = ranked.iter().map(|f| f.signatures.len()).sum(); + let model = model_str.parse::().unwrap_or_default(); + let health_opts = token_metrics::HealthOpts { + model, + window_size: 0, + key_positions: token_metrics::key_positions_from_order( + &ranked.iter().map(|f| f.path.clone()).collect::>(), + &doc_files, + ), + signature_count: sig_count, + signature_tokens: (total_tokens as f64 * 0.85) as usize, + }; + let health = token_metrics::analyze(&context_text, &health_opts); + + let data = serde_json::json!({ + "context": context_text, + "docFiles": doc_entries, + "codeFiles": code_entries, + "focusDocs": doc_files, + "totalTokens": total_tokens, + "health": health, + }); + + result_to_json_ptr::(Ok(data)) +} + // --------------------------------------------------------------------------- // FFI: Render Architecture Diagram (Mermaid / DOT) // --------------------------------------------------------------------------- diff --git a/third_party/cartographer/mapper-core/cartographer/src/main.rs b/third_party/cartographer/mapper-core/cartographer/src/main.rs index 60fbd77d..3fca3d80 100644 --- a/third_party/cartographer/mapper-core/cartographer/src/main.rs +++ b/third_party/cartographer/mapper-core/cartographer/src/main.rs @@ -1,4 +1,5 @@ mod api; +mod diagram; mod extractor; mod formatter; mod token_metrics; @@ -2667,7 +2668,6 @@ fn diagram_mode(root: &Path, format: &str, output: Option<&Path>, max_nodes: usi use crate::api::ApiState; use crate::mapper::extract_skeleton; use crate::scanner::{is_ignored_path, scan_files_with_noise_tracking}; - use std::collections::HashMap; let result = scan_files_with_noise_tracking(root)?; let mapped_files: std::collections::HashMap = result @@ -2694,115 +2694,26 @@ fn diagram_mode(root: &Path, format: &str, output: Option<&Path>, max_nodes: usi let graph = state.rebuild_graph().map_err(|e| anyhow::anyhow!(e))?; - // Compute degree per node from edges - let mut degree: HashMap<&str, usize> = HashMap::new(); - for edge in &graph.edges { - *degree.entry(edge.source.as_str()).or_insert(0) += 1; - *degree.entry(edge.target.as_str()).or_insert(0) += 1; - } - - // Pick top max_nodes by degree; exclude zero-edge nodes - let mut ranked: Vec<_> = graph - .nodes - .iter() - .filter(|n| degree.get(n.module_id.as_str()).copied().unwrap_or(0) > 0) - .collect(); - ranked.sort_by(|a, b| { - let da = degree.get(a.module_id.as_str()).copied().unwrap_or(0); - let db = degree.get(b.module_id.as_str()).copied().unwrap_or(0); - db.cmp(&da) - }); - ranked.truncate(max_nodes); - - let included: std::collections::HashSet<&str> = - ranked.iter().map(|n| n.module_id.as_str()).collect(); - - let content = match format.to_lowercase().as_str() { - "dot" => { - let mut out = String::from("digraph cartographer {\n rankdir=LR;\n"); - for node in &ranked { - let label = node - .path - .rsplit('/') - .next() - .unwrap_or(&node.path); - let color = match node.role.as_deref() { - Some("core") => "#9cf", - Some("bridge") => "#f96", - Some("dead") => "#ccc", - Some("entry") => "#9f9", - _ => "#fff", - }; - out.push_str(&format!( - " \"{}\" [label=\"{}\\n{} fn\" shape=box style=filled fillcolor=\"{}\"];\n", - node.module_id, label, node.signature_count, color - )); - } - for edge in &graph.edges { - if included.contains(edge.source.as_str()) && included.contains(edge.target.as_str()) { - out.push_str(&format!( - " \"{}\" -> \"{}\";\n", - edge.source, edge.target - )); - } - } - out.push('}'); - out - } - _ => { - // mermaid (default) - let mut out = String::from("graph TD\n"); - out.push_str(" classDef bridge fill:#f96,stroke:#333\n"); - out.push_str(" classDef core fill:#9cf,stroke:#333\n"); - out.push_str(" classDef dead fill:#ccc,stroke:#333\n"); - out.push_str(" classDef entry fill:#9f9,stroke:#333\n"); - - // Build stable numeric IDs - let id_map: HashMap<&str, usize> = ranked - .iter() - .enumerate() - .map(|(i, n)| (n.module_id.as_str(), i)) - .collect(); - - for node in &ranked { - let i = id_map[node.module_id.as_str()]; - let label = node - .path - .rsplit('/') - .next() - .unwrap_or(&node.path); - let class_suffix = match node.role.as_deref() { - Some("core") => ":::core", - Some("bridge") => ":::bridge", - Some("dead") => ":::dead", - Some("entry") => ":::entry", - _ => "", - }; - out.push_str(&format!( - " N{}[\"{}\\n{} fn\"]{}\n", - i, label, node.signature_count, class_suffix - )); - } - - for edge in &graph.edges { - if included.contains(edge.source.as_str()) && included.contains(edge.target.as_str()) { - if let (Some(&si), Some(&ti)) = ( - id_map.get(edge.source.as_str()), - id_map.get(edge.target.as_str()), - ) { - out.push_str(&format!(" N{} --> N{}\n", si, ti)); - } - } - } - out - } + let fmt = diagram::DiagramFormat::parse(format).map_err(|e| anyhow::anyhow!(e))?; + let opts = diagram::RenderOptions { + format: fmt, + focus: None, + depth: 2, + max_nodes, }; + let rendered = diagram::render(&graph, &opts).map_err(|e| anyhow::anyhow!(e))?; if let Some(out_path) = output { - fs::write(out_path, &content)?; + fs::write(out_path, &rendered.diagram)?; println!("Diagram written to: {}", out_path.display()); + if rendered.truncated { + println!("(truncated to {} nodes — raise --max-nodes for more)", max_nodes); + } } else { - println!("{}", content); + println!("{}", rendered.diagram); + if rendered.truncated { + eprintln!("(truncated to {} nodes — raise --max-nodes for more)", max_nodes); + } } Ok(()) @@ -3157,33 +3068,77 @@ fn semidiff_mode(root: &Path, commit1: &str, commit2: &str) -> Result<()> { } // ============================================================================= -// MCP SERVE MODE - Start MCP server with stdio JSON-RPC transport +// SHARED HELPER: parallel file scan + persistent cache // ============================================================================= -fn mcp_serve_mode(root: &Path) -> Result<()> { - use crate::api::ApiState; - use crate::mapper::extract_skeleton; - use crate::mcp::McpServer; - use crate::scanner::{is_ignored_path, scan_files_with_noise_tracking}; - use std::sync::Arc; +/// Scan and extract skeleton for every project file, with a parallel rayon scan +/// and a git-HEAD-keyed persistent cache (.cartographer_cache.json). +fn build_mapped_files_cached(root: &Path) -> anyhow::Result> { + use rayon::prelude::*; + use serde::{Deserialize, Serialize}; - let result = scan_files_with_noise_tracking(root)?; - let mapped_files: std::collections::HashMap = result - .files - .iter() + #[derive(Serialize, Deserialize)] + struct MapCache { + head: String, + files: HashMap, + } + + // Compute git HEAD (empty string if not a git repo) + let head: String = std::process::Command::new("git") + .args(["-C", &root.to_string_lossy(), "rev-parse", "HEAD"]) + .output() + .ok() + .and_then(|o| if o.status.success() { Some(String::from_utf8_lossy(&o.stdout).trim().to_string()) } else { None }) + .unwrap_or_default(); + + let cache_path = root.join(".cartographer_cache.json"); + + // Try cache hit + if !head.is_empty() { + if let Ok(raw) = std::fs::read_to_string(&cache_path) { + if let Ok(cached) = serde_json::from_str::(&raw) { + if cached.head == head { + return Ok(cached.files); + } + } + } + } + + // Parallel scan + let scan = scan_files_with_noise_tracking(root).context("file scan failed")?; + let result: HashMap = scan.files + .par_iter() .filter(|p| !is_ignored_path(p)) .filter_map(|p| { let content = std::fs::read_to_string(p).ok()?; let mapped = extract_skeleton(p, &content); - let rel = p - .strip_prefix(root) - .unwrap_or(p) - .to_string_lossy() - .replace('\\', "/"); + let rel = p.strip_prefix(root).unwrap_or(p) + .to_string_lossy().replace('\\', "/"); Some((rel, mapped)) }) .collect(); + // Write cache + if !head.is_empty() { + if let Ok(json) = serde_json::to_string(&MapCache { head, files: result.clone() }) { + let _ = std::fs::write(&cache_path, json); + } + } + + Ok(result) +} + +// ============================================================================= +// MCP SERVE MODE - Start MCP server with stdio JSON-RPC transport +// ============================================================================= + +fn mcp_serve_mode(root: &Path) -> Result<()> { + use crate::api::ApiState; + use crate::mcp::McpServer; + use std::sync::Arc; + + let mapped_files = build_mapped_files_cached(root)?; + let state = Arc::new(ApiState::new(root.to_path_buf())); { let mut files = state.mapped_files.lock().unwrap(); diff --git a/third_party/cartographer/mapper-core/cartographer/src/mapper.rs b/third_party/cartographer/mapper-core/cartographer/src/mapper.rs index bd7291bd..d0d7647a 100644 --- a/third_party/cartographer/mapper-core/cartographer/src/mapper.rs +++ b/third_party/cartographer/mapper-core/cartographer/src/mapper.rs @@ -35,6 +35,8 @@ pub enum SymbolKind { EnumMember, Constructor, TypeAlias, + ConfigKey, + Endpoint, } // --------------------------------------------------------------------------- @@ -63,6 +65,15 @@ pub struct Signature { /// 0-indexed line number of this signature. #[serde(default)] pub line_start: usize, + /// Column byte offset (UTF-8) of this signature on its start line. 0-indexed. + #[serde(default)] + pub col_start: usize, + /// 0-indexed end line (inclusive). + #[serde(default)] + pub line_end: usize, + /// Column byte offset (UTF-8) of the end of this signature. 0-indexed, exclusive. + #[serde(default)] + pub col_end: usize, /// Confidence score (1–100). 30 = Tier 1 regex heuristic. #[serde(default = "default_confidence")] pub confidence: u8, @@ -89,6 +100,9 @@ impl Signature { qualified_name: Some(qualified_name), kind, line_start, + col_start: 0, + line_end: 0, + col_end: 0, confidence: 30, doc_comment, } @@ -433,8 +447,16 @@ pub fn extract_skeleton(path: &Path, content: &str) -> MappedFile { "c" | "cpp" | "cc" | "cxx" | "h" | "hpp" => extract_c_cpp(rel_path, content), "rb" => extract_ruby(rel_path, content), "php" => extract_php(rel_path, content), - "md" | "txt" | "json" | "yaml" | "yml" | "toml" | "xml" | "html" | "css" | "scss" - | "less" | "svg" | "lock" => { + "cs" => extract_csharp(rel_path, content), + "swift" => extract_swift(rel_path, content), + "lua" => extract_lua(rel_path, content), + "sh" | "bash" | "zsh" | "fish" => extract_shell(rel_path, content), + "sql" => extract_sql(rel_path, content), + "md" | "markdown" => extract_markdown(rel_path, content), + "yaml" | "yml" => extract_yaml(rel_path, content), + "toml" => extract_toml(rel_path, content), + "json" => extract_json(rel_path, content), + "txt" | "xml" | "html" | "css" | "scss" | "less" | "svg" | "lock" => { return MappedFile { path: path.to_string_lossy().replace('\\', "/"), imports: Vec::new(), @@ -1713,62 +1735,1202 @@ fn extract_php(path: String, content: &str) -> MappedFile { } // --------------------------------------------------------------------------- -// Generic fallback +// C# // --------------------------------------------------------------------------- -fn extract_generic(path: String, content: &str) -> MappedFile { - let import_re = Regex::new(r"^(?:import|require|include|use)\s+.+").unwrap(); - let sig_re = Regex::new( - r"^(?:function|def|fn|func|class|struct|interface|type|enum|trait|module)\s+(\w+)", +fn extract_csharp(path: String, content: &str) -> MappedFile { + let import_re = Regex::new(r"^using\s+([\w.]+)").unwrap(); + let type_re = Regex::new( + r"^(?:(?:public|private|protected|internal|static|abstract|sealed|virtual|override|readonly|partial)\s+)*(?:class|interface|enum|struct|record)\s+(\w+)", + ) + .unwrap(); + let fn_re = Regex::new( + r"^(?:(?:public|private|protected|internal|static|abstract|sealed|virtual|override|readonly|async)\s+)+[\w<>\[\]?]+\s+(\w+)\s*\(", ) .unwrap(); let mut imports = Vec::new(); let mut signatures = Vec::new(); let mut doc_buf: Vec = Vec::new(); + let mut scope = ScopeTracker::new(); + let mut in_block_comment = false; for (line_idx, line) in content.lines().enumerate() { let trimmed = line.trim(); if trimmed.is_empty() { + if !in_block_comment { doc_buf.clear(); } + scope.update(line, None); + continue; + } + + if in_block_comment { + if trimmed.contains("*/") { in_block_comment = false; } + else { doc_buf.push(strip_doc_marker(trimmed)); } + scope.update(line, None); + continue; + } + + if trimmed.starts_with("/**") || trimmed.starts_with("/*") { + in_block_comment = !trimmed.contains("*/"); + doc_buf.push(strip_doc_marker(trimmed)); + scope.update(line, None); + continue; + } + + if trimmed.starts_with("///") || trimmed.starts_with("//") { + doc_buf.push(strip_doc_marker(trimmed)); + scope.update(line, None); + continue; + } + + if import_re.is_match(trimmed) { + imports.push(trimmed.to_string()); doc_buf.clear(); + scope.update(line, None); continue; } - if trimmed.starts_with("//") || trimmed.starts_with('#') { + if let Some(caps) = type_re.captures(trimmed) { + let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(); + let kind = if trimmed.contains("interface") { + SymbolKind::Interface + } else if trimmed.contains("enum") { + SymbolKind::Enum + } else if trimmed.contains("struct") { + SymbolKind::Struct + } else { + SymbolKind::Class + }; + let doc = take_doc(&mut doc_buf); + let raw = trimmed.split('{').next().unwrap_or(trimmed).trim().to_string(); + signatures.push(Signature::new(raw, kind, line_idx, &path, name.clone(), doc)); + scope.update(line, Some(name)); + continue; + } + + if let Some(caps) = fn_re.captures(trimmed) { + let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(); + if !matches!(name.as_str(), "if" | "for" | "while" | "switch" | "foreach" | "catch") { + let qualified = scope.qualify(&name); + let kind = if scope.current().is_some() { SymbolKind::Method } else { SymbolKind::Function }; + let raw = trimmed.split('{').next().unwrap_or(trimmed).trim().to_string(); + let doc = take_doc(&mut doc_buf); + signatures.push(Signature::new(raw, kind, line_idx, &path, qualified, doc)); + } + scope.update(line, None); + continue; + } + + doc_buf.clear(); + scope.update(line, None); + } + + MappedFile { path, imports, signatures, docstrings: None, parameters: None, return_types: None } +} + +// --------------------------------------------------------------------------- +// Swift +// --------------------------------------------------------------------------- + +fn extract_swift(path: String, content: &str) -> MappedFile { + let import_re = Regex::new(r"^import\s+(\w+)").unwrap(); + let type_re = Regex::new( + r"^(?:(?:public|private|internal|fileprivate|open|final)\s+)*(?:class|struct|enum|protocol|actor)\s+(\w+)", + ) + .unwrap(); + let fn_re = Regex::new( + r"^(?:(?:public|private|internal|fileprivate|open|final|override|static|class|mutating|lazy)\s+)*func\s+(\w+)", + ) + .unwrap(); + let prop_re = Regex::new( + r"^(?:(?:public|private|internal|fileprivate|open|final|lazy|static)\s+)*(?:var|let)\s+(\w+)\s*:", + ) + .unwrap(); + let ext_re = Regex::new(r"^extension\s+(\w+)").unwrap(); + let alias_re = Regex::new(r"^typealias\s+(\w+)").unwrap(); + + let mut imports = Vec::new(); + let mut signatures = Vec::new(); + let mut doc_buf: Vec = Vec::new(); + let mut scope = ScopeTracker::new(); + let mut in_block_comment = false; + + for (line_idx, line) in content.lines().enumerate() { + let trimmed = line.trim(); + + if trimmed.is_empty() { + if !in_block_comment { doc_buf.clear(); } + scope.update(line, None); + continue; + } + + if in_block_comment { + if trimmed.contains("*/") { in_block_comment = false; } + else { doc_buf.push(strip_doc_marker(trimmed)); } + scope.update(line, None); + continue; + } + + if trimmed.starts_with("/**") || trimmed.starts_with("/*") { + in_block_comment = !trimmed.contains("*/"); + doc_buf.push(strip_doc_marker(trimmed)); + scope.update(line, None); + continue; + } + + if trimmed.starts_with("///") || trimmed.starts_with("//") { doc_buf.push(strip_doc_marker(trimmed)); + scope.update(line, None); continue; } if import_re.is_match(trimmed) { imports.push(trimmed.to_string()); doc_buf.clear(); + scope.update(line, None); continue; } - if let Some(caps) = sig_re.captures(trimmed) { + if let Some(caps) = type_re.captures(trimmed) { let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(); + let kind = if trimmed.contains("protocol") { SymbolKind::Interface } + else if trimmed.contains("enum") { SymbolKind::Enum } + else if trimmed.contains("struct") { SymbolKind::Struct } + else { SymbolKind::Class }; let doc = take_doc(&mut doc_buf); - signatures.push(Signature::new( - trimmed.to_string(), - SymbolKind::Unknown, - line_idx, - &path, - name, - doc, - )); + let raw = trimmed.split('{').next().unwrap_or(trimmed).trim().to_string(); + signatures.push(Signature::new(raw, kind, line_idx, &path, name.clone(), doc)); + scope.update(line, Some(name)); + continue; + } + + if let Some(caps) = ext_re.captures(trimmed) { + let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(); + let doc = take_doc(&mut doc_buf); + let raw = trimmed.split('{').next().unwrap_or(trimmed).trim().to_string(); + signatures.push(Signature::new(raw, SymbolKind::Namespace, line_idx, &path, name.clone(), doc)); + scope.update(line, Some(name)); + continue; + } + + if let Some(caps) = alias_re.captures(trimmed) { + let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(); + let doc = take_doc(&mut doc_buf); + signatures.push(Signature::new(trimmed.to_string(), SymbolKind::TypeAlias, line_idx, &path, name, doc)); + scope.update(line, None); + continue; + } + + if let Some(caps) = fn_re.captures(trimmed) { + let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(); + let qualified = scope.qualify(&name); + let kind = if scope.current().is_some() { SymbolKind::Method } else { SymbolKind::Function }; + let raw = trimmed.split('{').next().unwrap_or(trimmed).trim().to_string(); + let doc = take_doc(&mut doc_buf); + signatures.push(Signature::new(raw, kind, line_idx, &path, qualified, doc)); + scope.update(line, None); + continue; + } + + if let Some(caps) = prop_re.captures(trimmed) { + let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(); + if scope.current().is_some() { + let qualified = scope.qualify(&name); + let doc = take_doc(&mut doc_buf); + let raw = trimmed.split('=').next().unwrap_or(trimmed).trim().to_string(); + signatures.push(Signature::new(raw, SymbolKind::Field, line_idx, &path, qualified, doc)); + } else { + doc_buf.clear(); + } + scope.update(line, None); continue; } doc_buf.clear(); + scope.update(line, None); } - MappedFile { - path, - imports, - signatures, - docstrings: None, - parameters: None, - return_types: None, + MappedFile { path, imports, signatures, docstrings: None, parameters: None, return_types: None } +} + +// --------------------------------------------------------------------------- +// Lua +// --------------------------------------------------------------------------- + +fn extract_lua(path: String, content: &str) -> MappedFile { + let require_re = Regex::new(r#"^(?:local\s+\w+\s*=\s*)?require\s*\(?['"]([^'"]+)['"]\)?"#).unwrap(); + let fn_decl_re = Regex::new(r"^(?:local\s+)?function\s+([\w.:]+)\s*\(").unwrap(); + let fn_assign_re = Regex::new(r"^(?:local\s+)?([\w.:]+)\s*=\s*function\s*\(").unwrap(); + + let mut imports = Vec::new(); + let mut signatures = Vec::new(); + let mut doc_buf: Vec = Vec::new(); + + for (line_idx, line) in content.lines().enumerate() { + let trimmed = line.trim(); + + if trimmed.is_empty() { + doc_buf.clear(); + continue; + } + + if trimmed.starts_with("--") { + doc_buf.push(strip_doc_marker(trimmed)); + continue; + } + + if let Some(caps) = require_re.captures(trimmed) { + let module = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(); + imports.push(format!("require '{}'", module)); + doc_buf.clear(); + continue; + } + + if let Some(caps) = fn_decl_re.captures(trimmed) { + let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(); + let doc = take_doc(&mut doc_buf); + let raw = trimmed.split(')').next().map(|s| format!("{})", s)).unwrap_or_else(|| trimmed.to_string()); + signatures.push(Signature::new(raw, SymbolKind::Function, line_idx, &path, name, doc)); + continue; + } + + if let Some(caps) = fn_assign_re.captures(trimmed) { + let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(); + if !name.is_empty() && !name.starts_with('_') { + let doc = take_doc(&mut doc_buf); + let raw = format!("function {}(...)", name); + signatures.push(Signature::new(raw, SymbolKind::Function, line_idx, &path, name, doc)); + } else { + doc_buf.clear(); + } + continue; + } + + doc_buf.clear(); + } + + MappedFile { path, imports, signatures, docstrings: None, parameters: None, return_types: None } +} + +// --------------------------------------------------------------------------- +// Shell (sh / bash / zsh / fish) +// --------------------------------------------------------------------------- + +fn extract_shell(path: String, content: &str) -> MappedFile { + let fn_paren_re = Regex::new(r"^(\w[\w-]*)\s*\(\)\s*(?:\{|$)").unwrap(); + let fn_keyword_re = Regex::new(r"^function\s+(\w[\w-]*)").unwrap(); + + let mut signatures = Vec::new(); + let mut doc_buf: Vec = Vec::new(); + + for (line_idx, line) in content.lines().enumerate() { + let trimmed = line.trim(); + + if trimmed.is_empty() { + doc_buf.clear(); + continue; + } + + if trimmed.starts_with('#') { + doc_buf.push(strip_doc_marker(trimmed)); + continue; + } + + if let Some(caps) = fn_keyword_re.captures(trimmed) { + let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(); + let doc = take_doc(&mut doc_buf); + signatures.push(Signature::new(format!("function {}()", name), SymbolKind::Function, line_idx, &path, name, doc)); + continue; + } + + if let Some(caps) = fn_paren_re.captures(trimmed) { + let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(); + let doc = take_doc(&mut doc_buf); + signatures.push(Signature::new(format!("{}()", name), SymbolKind::Function, line_idx, &path, name, doc)); + continue; + } + + doc_buf.clear(); + } + + MappedFile { path, imports: Vec::new(), signatures, docstrings: None, parameters: None, return_types: None } +} + +// --------------------------------------------------------------------------- +// SQL +// --------------------------------------------------------------------------- + +fn extract_sql(path: String, content: &str) -> MappedFile { + let ddl_re = Regex::new( + r"(?i)^CREATE\s+(?:OR\s+REPLACE\s+)?(?:TABLE|VIEW|FUNCTION|PROCEDURE|INDEX|TRIGGER)\s+(?:\w+\.)?(\w+)", + ) + .unwrap(); + let alter_re = Regex::new(r"(?i)^ALTER\s+TABLE\s+(?:\w+\.)?(\w+)").unwrap(); + + let mut signatures = Vec::new(); + let mut doc_buf: Vec = Vec::new(); + + for (line_idx, line) in content.lines().enumerate() { + let trimmed = line.trim(); + + if trimmed.is_empty() { + doc_buf.clear(); + continue; + } + + if trimmed.starts_with("--") { + doc_buf.push(strip_doc_marker(trimmed)); + continue; + } + + if let Some(caps) = ddl_re.captures(trimmed) { + let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(); + let upper = trimmed.to_uppercase(); + let kind = if upper.contains("TABLE") { SymbolKind::Struct } + else if upper.contains("VIEW") { SymbolKind::Class } + else if upper.contains("FUNCTION") || upper.contains("PROCEDURE") { SymbolKind::Function } + else { SymbolKind::Unknown }; + let doc = take_doc(&mut doc_buf); + let raw = trimmed.split('(').next().unwrap_or(trimmed).trim_end_matches(';').trim().to_string(); + signatures.push(Signature::new(raw, kind, line_idx, &path, name, doc)); + continue; + } + + if let Some(caps) = alter_re.captures(trimmed) { + let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(); + let doc = take_doc(&mut doc_buf); + signatures.push(Signature::new(trimmed.trim_end_matches(';').to_string(), SymbolKind::Struct, line_idx, &path, name, doc)); + continue; + } + + doc_buf.clear(); + } + + MappedFile { path, imports: Vec::new(), signatures, docstrings: None, parameters: None, return_types: None } +} + +// --------------------------------------------------------------------------- +// Markdown +// --------------------------------------------------------------------------- + +/// Code-file extensions used to detect file-path cross-references in docs. +const CODE_EXTENSIONS: &[&str] = &[ + "rs", "go", "py", "js", "jsx", "ts", "tsx", "mjs", "cjs", + "java", "kt", "scala", "c", "cpp", "cc", "cxx", "h", "hpp", + "rb", "php", "cs", "swift", "lua", "sh", "sql", + "yaml", "yml", "toml", "json", "md", +]; + +fn looks_like_file_path(s: &str) -> bool { + // Contains a slash or ends with a known extension + if s.contains('/') { return true; } + if let Some(dot) = s.rfind('.') { + let ext = &s[dot + 1..]; + return CODE_EXTENSIONS.contains(&ext); + } + false +} + +fn extract_markdown(path: String, content: &str) -> MappedFile { + let heading_re = Regex::new(r"^(#{1,6})\s+(.+)").unwrap(); + let link_re = Regex::new(r"\[.*?\]\(([^)]+)\)").unwrap(); + let backtick_sym_re = Regex::new(r"`([A-Z]\w{3,})`").unwrap(); + // Captures backtick file refs like `scanner.rs`, `search.md`, `config.yaml` + let backtick_file_re = Regex::new( + r"`([\w_-]+\.(?:rs|go|py|ts|tsx|js|jsx|mjs|cjs|java|kt|rb|php|cs|swift|lua|sh|sql|c|h|cpp|cc|cxx|hpp|md|yaml|yml|toml|json))`" + ).unwrap(); + let bare_path_re = Regex::new(r"(?:^|\s)((?:\./|src/|lib/|pkg/|cmd/|internal/)[\w/.@-]+)").unwrap(); + let frontmatter_key_re = Regex::new(r"^([\w_-]+)\s*:").unwrap(); + + let mut signatures = Vec::new(); + let mut imports = Vec::new(); + let mut seen_imports = std::collections::HashSet::new(); + + let lines: Vec<&str> = content.lines().collect(); + let mut start_line = 0; + + // --- YAML front-matter (fenced by --- at line 0) --- + if lines.first().map(|l| l.trim()) == Some("---") { + for (i, line) in lines.iter().enumerate().skip(1) { + let trimmed = line.trim(); + if trimmed == "---" { + start_line = i + 1; + break; + } + if let Some(caps) = frontmatter_key_re.captures(trimmed) { + let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(); + if !name.is_empty() { + signatures.push(Signature::new( + trimmed.to_string(), SymbolKind::ConfigKey, i, &path, + format!("frontmatter.{}", name), None, + )); + } + } + } + } + + // --- Main pass: headings + cross-references --- + for (line_idx, line) in lines.iter().enumerate().skip(start_line) { + let trimmed = line.trim(); + + // Headings + if let Some(caps) = heading_re.captures(trimmed) { + let level = caps.get(1).map(|m| m.as_str().len()).unwrap_or(1); + let title = caps.get(2).map(|m| m.as_str().trim()).unwrap_or("").to_string(); + if title.is_empty() { continue; } + let kind = if level == 1 { SymbolKind::Namespace } else { SymbolKind::Field }; + let slug = title.to_lowercase().replace(|c: char| !c.is_alphanumeric(), "-"); + let raw = format!("{} {}", "#".repeat(level), title); + signatures.push(Signature::new(raw, kind, line_idx, &path, slug, None)); + } + + // Markdown link cross-refs: [text](target) + for caps in link_re.captures_iter(trimmed) { + let target = caps.get(1).map(|m| m.as_str()).unwrap_or(""); + // Skip URLs, anchors, and images + if target.starts_with("http") || target.starts_with('#') || target.is_empty() { + continue; + } + let target = target.split('#').next().unwrap_or(target); // strip anchor + if looks_like_file_path(target) && seen_imports.insert(target.to_string()) { + imports.push(target.trim_start_matches("./").to_string()); + } + } + + // Backtick PascalCase symbol refs: `ApiState`, `MappedFile` + for caps in backtick_sym_re.captures_iter(trimmed) { + let sym = caps.get(1).map(|m| m.as_str()).unwrap_or(""); + if seen_imports.insert(sym.to_string()) { + imports.push(sym.to_string()); + } + } + + // Backtick file refs: `scanner.rs`, `search.md`, `config.yaml` + for caps in backtick_file_re.captures_iter(trimmed) { + let file_ref = caps.get(1).map(|m| m.as_str()).unwrap_or(""); + if seen_imports.insert(file_ref.to_string()) { + imports.push(file_ref.to_string()); + } + } + + // Bare file paths: src/foo/bar.rs, ./lib/util.ts + for caps in bare_path_re.captures_iter(trimmed) { + let p = caps.get(1).map(|m| m.as_str()).unwrap_or(""); + let clean = p.trim_start_matches("./"); + if seen_imports.insert(clean.to_string()) { + imports.push(clean.to_string()); + } + } + } + + MappedFile { path, imports, signatures, docstrings: None, parameters: None, return_types: None } +} + +// --------------------------------------------------------------------------- +// YAML +// --------------------------------------------------------------------------- + +fn extract_yaml(path: String, content: &str) -> MappedFile { + let key_re = Regex::new(r"^(\s*)([\w_-]+)\s*:(.*)").unwrap(); + let max_depth: usize = 3; + + let mut signatures = Vec::new(); + let mut top_level_keys = Vec::new(); + + // Indent-stack: (indent_level, key_name) + let mut stack: Vec<(usize, String)> = Vec::new(); + + for (line_idx, line) in content.lines().enumerate() { + // Skip comments, empty lines, list items + let trimmed = line.trim(); + if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with('-') { + continue; + } + + if let Some(caps) = key_re.captures(line) { + let indent = caps.get(1).map(|m| m.as_str().len()).unwrap_or(0); + let key = caps.get(2).map(|m| m.as_str()).unwrap_or("").to_string(); + let value = caps.get(3).map(|m| m.as_str().trim()).unwrap_or(""); + + if key.is_empty() { continue; } + + // Pop stack entries at same or deeper indent + while let Some(&(level, _)) = stack.last() { + if level >= indent { stack.pop(); } else { break; } + } + + // Track top-level keys for OpenAPI detection + if indent == 0 { + top_level_keys.push(key.clone()); + } + + // Build dot-path from stack + let depth = stack.len(); + if depth < max_depth { + let dot_path = if stack.is_empty() { + key.clone() + } else { + let prefix: Vec<&str> = stack.iter().map(|(_, k)| k.as_str()).collect(); + format!("{}.{}", prefix.join("."), key) + }; + + let kind = if indent == 0 { SymbolKind::Field } else { SymbolKind::ConfigKey }; + let raw = if value.is_empty() { + format!("{}:", dot_path) + } else { + format!("{}: {}", dot_path, value) + }; + signatures.push(Signature::new(raw, kind, line_idx, &path, dot_path, None)); + } + + stack.push((indent, key)); + } + } + + // --- OpenAPI detection --- + let is_openapi = top_level_keys.iter().any(|k| k == "openapi" || k == "swagger"); + if is_openapi { + extract_yaml_openapi_paths(content, &path, &mut signatures); + } + + MappedFile { path, imports: Vec::new(), signatures, docstrings: None, parameters: None, return_types: None } +} + +/// Extract OpenAPI endpoint paths from YAML content. +/// Looks for lines under `paths:` that start with `/`. +fn extract_yaml_openapi_paths(content: &str, path: &str, signatures: &mut Vec) { + let path_entry_re = Regex::new(r"^ (/\S+)\s*:").unwrap(); + let method_re = Regex::new(r"^ (get|post|put|patch|delete|head|options)\s*:").unwrap(); + + let mut in_paths = false; + let mut current_path = String::new(); + + for (line_idx, line) in content.lines().enumerate() { + let trimmed = line.trim(); + + // Detect `paths:` section (top-level, no indent) + if !line.starts_with(' ') && trimmed.starts_with("paths:") { + in_paths = true; + continue; + } + // Exit paths section when next top-level key appears + if in_paths && !line.starts_with(' ') && !trimmed.is_empty() { + in_paths = false; + } + + if !in_paths { continue; } + + // Path entry: /api/users: + if let Some(caps) = path_entry_re.captures(line) { + current_path = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(); + continue; + } + + // HTTP method under a path + if !current_path.is_empty() { + if let Some(caps) = method_re.captures(line) { + let method = caps.get(1).map(|m| m.as_str().to_uppercase()).unwrap_or_default(); + let raw = format!("{} {}", method, current_path); + let qname = format!("paths.{}.{}", current_path, method.to_lowercase()); + signatures.push(Signature::new( + raw, SymbolKind::Endpoint, line_idx, path, qname, None, + )); + } + } + } +} + +// --------------------------------------------------------------------------- +// TOML +// --------------------------------------------------------------------------- + +fn extract_toml(path: String, content: &str) -> MappedFile { + // Try structured parsing first; fall back to regex on failure. + if let Ok(table) = content.parse::() { + let mut signatures = Vec::new(); + toml_walk(&table, "", &path, &mut signatures, 0, 3); + + // Map line numbers: for each signature, find the matching line in source. + // This is a best-effort pass — qualified names are used as search keys. + let lines: Vec<&str> = content.lines().collect(); + for sig in &mut signatures { + if let Some(qname) = &sig.qualified_name { + // Use the last segment as the key to search for + let search_key = qname.rsplit('.').next().unwrap_or(qname); + for (i, line) in lines.iter().enumerate() { + let trimmed = line.trim(); + if trimmed.starts_with(search_key) || trimmed.starts_with(&format!("[{}]", qname)) { + sig.line_start = i; + break; + } + } + } + } + + return MappedFile { path, imports: Vec::new(), signatures, docstrings: None, parameters: None, return_types: None }; + } + + // Fallback: regex-only for malformed TOML + let section_re = Regex::new(r"^\[([^\]]+)\]").unwrap(); + let key_re = Regex::new(r"^([\w_-]+)\s*=").unwrap(); + let mut signatures = Vec::new(); + let mut current_section = String::new(); + + for (line_idx, line) in content.lines().enumerate() { + let trimmed = line.trim(); + if trimmed.is_empty() || trimmed.starts_with('#') { continue; } + if let Some(caps) = section_re.captures(trimmed) { + let name = caps.get(1).map(|m| m.as_str().trim()).unwrap_or("").to_string(); + if name.is_empty() { continue; } + current_section = name.clone(); + signatures.push(Signature::new(trimmed.to_string(), SymbolKind::Namespace, line_idx, &path, name, None)); + } else if let Some(caps) = key_re.captures(trimmed) { + let key = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(); + let qname = if current_section.is_empty() { + key.clone() + } else { + format!("{}.{}", current_section, key) + }; + signatures.push(Signature::new(trimmed.to_string(), SymbolKind::ConfigKey, line_idx, &path, qname, None)); + } + } + + MappedFile { path, imports: Vec::new(), signatures, docstrings: None, parameters: None, return_types: None } +} + +/// Recursively walk a parsed TOML value tree, emitting signatures. +fn toml_walk( + value: &toml::Value, + prefix: &str, + path: &str, + signatures: &mut Vec, + depth: usize, + max_depth: usize, +) { + if depth > max_depth { return; } + + if let Some(table) = value.as_table() { + for (key, val) in table { + let qname = if prefix.is_empty() { + key.clone() + } else { + format!("{}.{}", prefix, key) + }; + + match val { + toml::Value::Table(_) => { + // Section/table → Namespace + let raw = if depth == 0 { + format!("[{}]", qname) + } else { + format!("{}:", qname) + }; + signatures.push(Signature::new( + raw, SymbolKind::Namespace, 0, path, qname.clone(), None, + )); + toml_walk(val, &qname, path, signatures, depth + 1, max_depth); + } + toml::Value::Array(arr) if arr.first().map(|v| v.is_table()).unwrap_or(false) => { + // Array of tables (e.g. [[bin]]) + let raw = format!("[[{}]]", qname); + signatures.push(Signature::new( + raw, SymbolKind::Namespace, 0, path, qname.clone(), None, + )); + // Walk first entry only for structure discovery + if let Some(first) = arr.first() { + toml_walk(first, &qname, path, signatures, depth + 1, max_depth); + } + } + _ => { + // Leaf value → ConfigKey + let val_str = match val { + toml::Value::String(s) => format!("\"{}\"", s), + other => other.to_string(), + }; + let raw = format!("{} = {}", qname, val_str); + signatures.push(Signature::new( + raw, SymbolKind::ConfigKey, 0, path, qname, None, + )); + } + } + } + } +} + +// --------------------------------------------------------------------------- +// JSON +// --------------------------------------------------------------------------- + +/// Max JSON file size to attempt parsing (512 KB). Larger files (data fixtures, +/// generated output) are skipped to avoid slow extraction. +const JSON_MAX_SIZE: usize = 512 * 1024; + +fn extract_json(path: String, content: &str) -> MappedFile { + let empty = MappedFile { + path: path.clone(), imports: Vec::new(), signatures: Vec::new(), + docstrings: None, parameters: None, return_types: None, + }; + + if content.len() > JSON_MAX_SIZE { + return empty; + } + + let parsed: serde_json::Value = match serde_json::from_str(content) { + Ok(v) => v, + Err(_) => return empty, + }; + + let obj = match parsed.as_object() { + Some(o) => o, + None => return empty, + }; + + let mut signatures = Vec::new(); + let mut imports = Vec::new(); + + // Detect variant + let has_schema = obj.contains_key("$schema"); + let is_openapi = obj.contains_key("openapi") || obj.contains_key("swagger"); + let is_package_json = obj.contains_key("name") && obj.contains_key("version") + && (obj.contains_key("dependencies") || obj.contains_key("devDependencies")); + + if is_openapi { + extract_json_openapi(obj, &path, &mut signatures); + } else if has_schema { + extract_json_schema(obj, &path, &mut signatures, &mut imports); + } else if is_package_json { + extract_json_package(obj, &path, &mut signatures, &mut imports); + } else { + // Generic: top-level keys as Field, nested at depth <= 2 as ConfigKey + json_walk(obj, "", &path, &mut signatures, 0, 2); + } + + MappedFile { path, imports, signatures, docstrings: None, parameters: None, return_types: None } +} + +fn json_walk( + obj: &serde_json::Map, + prefix: &str, + path: &str, + signatures: &mut Vec, + depth: usize, + max_depth: usize, +) { + for (key, val) in obj { + let qname = if prefix.is_empty() { key.clone() } else { format!("{}.{}", prefix, key) }; + let kind = if depth == 0 { SymbolKind::Field } else { SymbolKind::ConfigKey }; + + match val { + serde_json::Value::Object(inner) if depth < max_depth => { + signatures.push(Signature::new( + format!("{}:", qname), kind, 0, path, qname.clone(), None, + )); + json_walk(inner, &qname, path, signatures, depth + 1, max_depth); + } + _ => { + let val_str = match val { + serde_json::Value::String(s) => format!("\"{}\"", truncate_str(s, 60)), + serde_json::Value::Array(_) => "[...]".to_string(), + serde_json::Value::Object(_) => "{...}".to_string(), + other => other.to_string(), + }; + signatures.push(Signature::new( + format!("{}: {}", qname, val_str), kind, 0, path, qname, None, + )); + } + } + } +} + +fn truncate_str(s: &str, max: usize) -> String { + if s.len() <= max { s.to_string() } else { format!("{}...", &s[..max]) } +} + +/// Extract OpenAPI endpoints from a parsed JSON object. +fn extract_json_openapi( + obj: &serde_json::Map, + path: &str, + signatures: &mut Vec, +) { + // info.title + if let Some(info) = obj.get("info").and_then(|v| v.as_object()) { + if let Some(title) = info.get("title").and_then(|v| v.as_str()) { + signatures.push(Signature::new( + format!("info.title: \"{}\"", title), SymbolKind::Field, 0, path, + "info.title".to_string(), None, + )); + } + } + + // paths + if let Some(paths) = obj.get("paths").and_then(|v| v.as_object()) { + for (endpoint, methods) in paths { + if let Some(methods_obj) = methods.as_object() { + for method in methods_obj.keys() { + let m = method.to_uppercase(); + if matches!(m.as_str(), "GET" | "POST" | "PUT" | "PATCH" | "DELETE" | "HEAD" | "OPTIONS") { + let raw = format!("{} {}", m, endpoint); + let qname = format!("paths.{}.{}", endpoint, method); + signatures.push(Signature::new( + raw, SymbolKind::Endpoint, 0, path, qname, None, + )); + } + } + } + } + } + + // components.schemas + if let Some(components) = obj.get("components").and_then(|v| v.as_object()) { + if let Some(schemas) = components.get("schemas").and_then(|v| v.as_object()) { + for schema_name in schemas.keys() { + let qname = format!("components.schemas.{}", schema_name); + signatures.push(Signature::new( + format!("schema {}", schema_name), SymbolKind::Namespace, 0, path, + qname, None, + )); + } + } + } +} + +/// Extract JSON Schema properties and $ref imports. +fn extract_json_schema( + obj: &serde_json::Map, + path: &str, + signatures: &mut Vec, + imports: &mut Vec, +) { + // Title + if let Some(title) = obj.get("title").and_then(|v| v.as_str()) { + signatures.push(Signature::new( + format!("schema: {}", title), SymbolKind::Namespace, 0, path, + title.to_string(), None, + )); + } + + // Properties + if let Some(props) = obj.get("properties").and_then(|v| v.as_object()) { + for (key, val) in props { + let type_str = val.get("type").and_then(|v| v.as_str()).unwrap_or("any"); + let raw = format!("{}: {}", key, type_str); + signatures.push(Signature::new( + raw, SymbolKind::ConfigKey, 0, path, + format!("properties.{}", key), None, + )); + } + } + + // $ref values → imports + collect_json_refs(obj, imports); +} + +fn collect_json_refs(obj: &serde_json::Map, imports: &mut Vec) { + for (key, val) in obj { + if key == "$ref" { + if let Some(r) = val.as_str() { + // Only add file-path refs, not internal #/definitions/... refs + if !r.starts_with('#') && !r.is_empty() { + imports.push(r.trim_start_matches("./").to_string()); + } + } + } + match val { + serde_json::Value::Object(inner) => collect_json_refs(inner, imports), + serde_json::Value::Array(arr) => { + for item in arr { + if let Some(inner) = item.as_object() { + collect_json_refs(inner, imports); + } + } + } + _ => {} + } + } +} + +/// Extract package.json: name, scripts, dependencies as imports. +fn extract_json_package( + obj: &serde_json::Map, + path: &str, + signatures: &mut Vec, + imports: &mut Vec, +) { + // name + version + if let Some(name) = obj.get("name").and_then(|v| v.as_str()) { + let version = obj.get("version").and_then(|v| v.as_str()).unwrap_or("?"); + signatures.push(Signature::new( + format!("{} @ {}", name, version), SymbolKind::Namespace, 0, path, + "package".to_string(), None, + )); + } + + // scripts + if let Some(scripts) = obj.get("scripts").and_then(|v| v.as_object()) { + for (key, val) in scripts { + let cmd = val.as_str().unwrap_or("..."); + signatures.push(Signature::new( + format!("script {}: {}", key, truncate_str(cmd, 60)), + SymbolKind::ConfigKey, 0, path, + format!("scripts.{}", key), None, + )); + } + } + + // main / module entry points → imports + for field in &["main", "module", "types"] { + if let Some(entry) = obj.get(*field).and_then(|v| v.as_str()) { + imports.push(entry.trim_start_matches("./").to_string()); + } + } +} + +// --------------------------------------------------------------------------- +// Generic fallback +// --------------------------------------------------------------------------- + +fn extract_generic(path: String, content: &str) -> MappedFile { + let import_re = Regex::new(r"^(?:import|require|include|use)\s+.+").unwrap(); + let sig_re = Regex::new( + r"^(?:function|def|fn|func|class|struct|interface|type|enum|trait|module)\s+(\w+)", + ) + .unwrap(); + + let mut imports = Vec::new(); + let mut signatures = Vec::new(); + let mut doc_buf: Vec = Vec::new(); + + for (line_idx, line) in content.lines().enumerate() { + let trimmed = line.trim(); + + if trimmed.is_empty() { + doc_buf.clear(); + continue; + } + + if trimmed.starts_with("//") || trimmed.starts_with('#') { + doc_buf.push(strip_doc_marker(trimmed)); + continue; + } + + if import_re.is_match(trimmed) { + imports.push(trimmed.to_string()); + doc_buf.clear(); + continue; + } + + if let Some(caps) = sig_re.captures(trimmed) { + let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(); + let doc = take_doc(&mut doc_buf); + signatures.push(Signature::new( + trimmed.to_string(), + SymbolKind::Unknown, + line_idx, + &path, + name, + doc, + )); + continue; + } + + doc_buf.clear(); + } + + MappedFile { + path, + imports, + signatures, + docstrings: None, + parameters: None, + return_types: None, + } +} + +// --------------------------------------------------------------------------- +// Tests — document extractors +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod doc_tests { + use super::*; + use std::path::Path; + + // ── Markdown ────────────────────────────────────────────────────────── + + #[test] + fn markdown_headings_preserved() { + let content = "# Title\n\nSome text.\n\n## Section One\n\n### Subsection"; + let mf = extract_skeleton(Path::new("README.md"), content); + let heading_sigs: Vec<_> = mf.signatures.iter() + .filter(|s| s.kind == SymbolKind::Namespace || s.kind == SymbolKind::Field) + .collect(); + assert_eq!(heading_sigs.len(), 3); + assert_eq!(heading_sigs[0].kind, SymbolKind::Namespace); // H1 + assert_eq!(heading_sigs[1].kind, SymbolKind::Field); // H2 + assert_eq!(heading_sigs[2].kind, SymbolKind::Field); // H3 + } + + #[test] + fn markdown_link_crossrefs() { + let content = "# Guide\n\nSee [the handler](src/api/handler.rs) for details.\n\nAlso check [config](./config.toml)."; + let mf = extract_skeleton(Path::new("docs/guide.md"), content); + assert!(mf.imports.iter().any(|i| i == "src/api/handler.rs"), "should import handler.rs"); + assert!(mf.imports.iter().any(|i| i == "config.toml"), "should import config.toml"); + } + + #[test] + fn markdown_backtick_symbol_refs() { + let content = "# API\n\nThe `ApiState` struct manages the graph. See `MappedFile` too.\n\nIgnore `foo` (too short)."; + let mf = extract_skeleton(Path::new("docs/api.md"), content); + assert!(mf.imports.iter().any(|i| i == "ApiState"), "should import ApiState"); + assert!(mf.imports.iter().any(|i| i == "MappedFile"), "should import MappedFile"); + assert!(!mf.imports.iter().any(|i| i == "foo"), "should NOT import short names"); + } + + #[test] + fn markdown_frontmatter() { + let content = "---\ntitle: My Doc\ntags: rust, api\n---\n# Content\n\nBody text."; + let mf = extract_skeleton(Path::new("docs/post.md"), content); + let fm_sigs: Vec<_> = mf.signatures.iter().filter(|s| s.kind == SymbolKind::ConfigKey).collect(); + assert_eq!(fm_sigs.len(), 2, "should extract 2 front-matter keys"); + assert!(fm_sigs.iter().any(|s| s.qualified_name.as_deref() == Some("frontmatter.title"))); + } + + #[test] + fn markdown_skips_urls() { + let content = "# Links\n\n[Google](https://google.com)\n[Anchor](#section)"; + let mf = extract_skeleton(Path::new("README.md"), content); + assert!(mf.imports.is_empty(), "should not import URLs or anchors"); + } + + #[test] + fn markdown_bare_paths() { + let content = "# Guide\n\nEdit src/mapper.rs to change extraction."; + let mf = extract_skeleton(Path::new("CONTRIBUTING.md"), content); + assert!(mf.imports.iter().any(|i| i == "src/mapper.rs"), "should detect bare file paths"); + } + + #[test] + fn markdown_backtick_file_refs() { + let content = "| `scanner.rs` | File discovery |\n| `mapper.rs` | Extraction |\n| `api.rs` | Graph |\n\nSee `config.yaml` too."; + let mf = extract_skeleton(Path::new("docs/architecture.md"), content); + assert!(mf.imports.iter().any(|i| i == "scanner.rs"), "should import scanner.rs"); + assert!(mf.imports.iter().any(|i| i == "mapper.rs"), "should import mapper.rs"); + assert!(mf.imports.iter().any(|i| i == "api.rs"), "should import api.rs"); + assert!(mf.imports.iter().any(|i| i == "config.yaml"), "should import config.yaml"); + } + + // ── YAML ───────────────────────────────────────────────────────────── + + #[test] + fn yaml_nested_keys() { + let content = "server:\n host: localhost\n port: 8080\ndatabase:\n name: mydb"; + let mf = extract_skeleton(Path::new("config.yaml"), content); + let qnames: Vec<_> = mf.signatures.iter().filter_map(|s| s.qualified_name.as_deref()).collect(); + assert!(qnames.contains(&"server"), "should have top-level key"); + assert!(qnames.contains(&"server.host"), "should have nested key"); + assert!(qnames.contains(&"server.port"), "should have nested key"); + assert!(qnames.contains(&"database.name"), "should have nested key"); + } + + #[test] + fn yaml_depth_cap() { + let content = "a:\n b:\n c:\n d:\n e: deep"; + let mf = extract_skeleton(Path::new("deep.yml"), content); + // Depth 3 cap means a, a.b, a.b.c are extracted; a.b.c.d and deeper are not + let qnames: Vec<_> = mf.signatures.iter().filter_map(|s| s.qualified_name.as_deref()).collect(); + assert!(qnames.contains(&"a.b.c"), "depth 3 should be included"); + assert!(!qnames.iter().any(|q| q.contains("d")), "depth 4+ should be excluded"); + } + + #[test] + fn yaml_openapi_endpoints() { + let content = "\ +openapi: 3.0.0 +info: + title: Test API +paths: + /users: + get: + post: + /users/{id}: + get: + delete: +components: + schemas:"; + let mf = extract_skeleton(Path::new("openapi.yaml"), content); + let endpoints: Vec<_> = mf.signatures.iter() + .filter(|s| s.kind == SymbolKind::Endpoint) + .collect(); + assert!(endpoints.len() >= 4, "should extract at least 4 endpoints, got {}", endpoints.len()); + assert!(endpoints.iter().any(|s| s.raw == "GET /users")); + assert!(endpoints.iter().any(|s| s.raw == "DELETE /users/{id}")); + } + + // ── TOML ───────────────────────────────────────────────────────────── + + #[test] + fn toml_sections_and_keys() { + let content = "[package]\nname = \"cartographer\"\nversion = \"3.0.0\"\n\n[dependencies]\nserde = \"1.0\""; + let mf = extract_skeleton(Path::new("Cargo.toml"), content); + let qnames: Vec<_> = mf.signatures.iter().filter_map(|s| s.qualified_name.as_deref()).collect(); + assert!(qnames.contains(&"package"), "should have package section"); + assert!(qnames.contains(&"package.name"), "should have package.name key"); + assert!(qnames.contains(&"dependencies"), "should have dependencies section"); + assert!(qnames.contains(&"dependencies.serde"), "should have dependencies.serde key"); + } + + #[test] + fn toml_fallback_on_bad_input() { + // Malformed TOML — should still extract what it can via regex fallback + let content = "[section]\nkey = value\n[bad\nmore = stuff"; + let mf = extract_skeleton(Path::new("bad.toml"), content); + // Regex fallback should get at least the section and key + assert!(!mf.signatures.is_empty(), "fallback should extract something"); + } + + // ── JSON ───────────────────────────────────────────────────────────── + + #[test] + fn json_generic_keys() { + let content = r#"{"name": "test", "version": 1, "config": {"debug": true}}"#; + let mf = extract_skeleton(Path::new("settings.json"), content); + assert!(!mf.signatures.is_empty(), "should extract JSON keys"); + let qnames: Vec<_> = mf.signatures.iter().filter_map(|s| s.qualified_name.as_deref()).collect(); + assert!(qnames.contains(&"name")); + assert!(qnames.contains(&"config.debug")); + } + + #[test] + fn json_openapi() { + let content = r#"{"openapi": "3.0.0", "info": {"title": "My API"}, "paths": {"/health": {"get": {}}}}"#; + let mf = extract_skeleton(Path::new("api.json"), content); + let endpoints: Vec<_> = mf.signatures.iter().filter(|s| s.kind == SymbolKind::Endpoint).collect(); + assert_eq!(endpoints.len(), 1); + assert_eq!(endpoints[0].raw, "GET /health"); + } + + #[test] + fn json_schema_properties() { + let content = r#"{"$schema": "http://json-schema.org/draft-07/schema#", "title": "User", "properties": {"name": {"type": "string"}, "age": {"type": "integer"}}}"#; + let mf = extract_skeleton(Path::new("user.schema.json"), content); + let props: Vec<_> = mf.signatures.iter().filter(|s| s.kind == SymbolKind::ConfigKey).collect(); + assert_eq!(props.len(), 2, "should extract 2 properties"); + } + + #[test] + fn json_package_json() { + let content = r#"{"name": "my-app", "version": "1.0.0", "main": "dist/index.js", "dependencies": {"express": "^4.18.0"}}"#; + let mf = extract_skeleton(Path::new("package.json"), content); + assert!(mf.imports.iter().any(|i| i == "dist/index.js"), "should import main entry point"); + } + + #[test] + fn json_size_guard() { + // Content > 512KB should return empty + let content = "x".repeat(600_000); + let mf = extract_skeleton(Path::new("huge.json"), &content); + assert!(mf.signatures.is_empty(), "should skip oversized JSON"); } } diff --git a/third_party/cartographer/mapper-core/cartographer/src/mcp.rs b/third_party/cartographer/mapper-core/cartographer/src/mcp.rs index c133cf8e..40109ffb 100644 --- a/third_party/cartographer/mapper-core/cartographer/src/mcp.rs +++ b/third_party/cartographer/mapper-core/cartographer/src/mcp.rs @@ -5,6 +5,24 @@ use crate::api::{ApiState, ModuleContextRequest}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; +// --------------------------------------------------------------------------- +// watch_graph event types +// --------------------------------------------------------------------------- + +#[derive(Serialize)] +#[serde(rename_all = "snake_case")] +pub enum GraphEventKind { + FileReindexed, + GraphUpdated, +} + +#[derive(Serialize)] +pub struct GraphEvent { + pub kind: GraphEventKind, + pub path: String, + pub timestamp_ms: u64, +} + macro_rules! mcprop { ($type:literal, $desc:literal) => { McpProperty { @@ -661,6 +679,156 @@ impl McpServer { required: vec!["content".to_string()], }, }, + // ----------------------------------------------------------------- + // Symbol-scoped search + // ----------------------------------------------------------------- + McpTool { + name: "search_in_symbol".to_string(), + description: "Search for a pattern scoped to the body of a named function or \ + method. Returns only matches within that symbol's approximate line \ + range, filtering out occurrences elsewhere in the file. Useful for \ + \"find X only inside handleKeyMsg()\" without wading through \ + whole-file grep results." + .to_string(), + input_schema: { + let mut props = HashMap::new(); + props.insert("file".to_string(), mcprop!("string", "Relative path or filename fragment (e.g. chatview.go)")); + props.insert("symbol".to_string(), mcprop!("string", "Function or method name to scope the search to")); + props.insert("pattern".to_string(), mcprop!("string", "Regex or literal search pattern")); + props.insert("context_lines".to_string(), mcprop!("number", "Lines of context around each match (default 2)")); + McpInputSchema { + type_: "object".to_string(), + properties: props, + required: vec!["file".to_string(), "symbol".to_string(), "pattern".to_string()], + } + }, + }, + // ----------------------------------------------------------------- + // TUI key-binding map + // ----------------------------------------------------------------- + McpTool { + name: "list_key_handlers".to_string(), + description: "Extract a structured key-binding map from a TUI source file. \ + Groups all `case \"key\":` and `== \"key\"` patterns by key string \ + with surrounding context. Works for Go/Bubble Tea, Rust/crossterm, \ + and any framework using quoted key strings." + .to_string(), + input_schema: { + let mut props = HashMap::new(); + props.insert("file".to_string(), mcprop!("string", "Relative path or filename fragment")); + props.insert("context_lines".to_string(), mcprop!("number", "Lines of context around each binding (default 4)")); + McpInputSchema { + type_: "object".to_string(), + properties: props, + required: vec!["file".to_string()], + } + }, + }, + // ----------------------------------------------------------------- + // State-machine mapper + // ----------------------------------------------------------------- + McpTool { + name: "map_state_machine".to_string(), + description: "Correlate state guards with nearby key handlers to produce a \ + state × handlers matrix. Given a state variable name and enum \ + prefix, returns which keys are handled in each state with guard \ + line numbers. Ideal for large TUI files with switch-on-state \ + dispatch (e.g. Bubble Tea chatview)." + .to_string(), + input_schema: { + let mut props = HashMap::new(); + props.insert("file".to_string(), mcprop!("string", "Relative path or filename fragment")); + props.insert("state_var".to_string(), mcprop!("string", "State variable expression to look for in guards (default: m.state)")); + props.insert("state_prefix".to_string(), mcprop!("string", "Enum variant prefix used to identify state constants (default: State)")); + props.insert("context_lines".to_string(), mcprop!("number", "Context lines around each guard (default 3)")); + McpInputSchema { + type_: "object".to_string(), + properties: props, + required: vec!["file".to_string()], + } + }, + }, + // ----------------------------------------------------------------- + // Incremental graph push events + // ----------------------------------------------------------------- + McpTool { + name: "watch_graph".to_string(), + description: "Watch a directory for source file changes and emit incremental \ + graph events as newline-delimited JSON to stdout. Each event \ + includes the kind (file_reindexed or graph_updated), the file \ + path, and a millisecond timestamp. Runs until timeout_secs \ + elapses (default 30, max 300)." + .to_string(), + input_schema: McpInputSchema { + type_: "object".to_string(), + properties: { + let mut props = HashMap::new(); + props.insert("root".to_string(), mcprop!("string", "Root directory path to watch recursively")); + props.insert("timeout_secs".to_string(), mcprop!("number", "How long to watch in seconds (default 30, max 300)")); + props + }, + required: vec!["root".to_string()], + }, + }, + // ----------------------------------------------------------------- + // Document-oriented tools + // ----------------------------------------------------------------- + McpTool { + name: "doc_index".to_string(), + description: "Return all document-type files (Markdown, YAML, TOML, JSON) \ + in the project with their extracted headings, config keys, \ + cross-reference edges, and edge counts. Useful as a table \ + of contents for the project's documentation." + .to_string(), + input_schema: McpInputSchema { + type_: "object".to_string(), + properties: HashMap::new(), + required: vec![], + }, + }, + McpTool { + name: "doc_context".to_string(), + description: "Get a single document's extracted structure plus the skeleton \ + of all code files it cross-references. Follows import edges \ + from the doc into code, ranked by relevance. Returns the doc \ + first, then supporting code — ideal for understanding what a \ + doc describes." + .to_string(), + input_schema: { + let mut props = HashMap::new(); + props.insert("doc_path".to_string(), mcprop!("string", + "Path to the document file (relative to project root, or a path fragment)")); + props.insert("budget".to_string(), mcprop!("number", + "Max tokens for referenced code context (default 4000)")); + McpInputSchema { + type_: "object".to_string(), + properties: props, + required: vec!["doc_path".to_string()], + } + }, + }, + McpTool { + name: "query_docs".to_string(), + description: "Doc-biased context retrieval: searches documents first, then \ + follows cross-reference edges into the code they describe. \ + Returns a bundle with docs and supporting code separated. \ + Like query_context but prioritises documentation." + .to_string(), + input_schema: { + let mut props = HashMap::new(); + props.insert("query".to_string(), mcprop!("string", + "Natural language query or keyword to search for")); + props.insert("budget".to_string(), mcprop!("number", + "Max total tokens (default 8000)")); + props.insert("model".to_string(), mcprop!("string", + "Target model for health scoring: claude, gpt4, llama (default claude)")); + McpInputSchema { + type_: "object".to_string(), + properties: props, + required: vec!["query".to_string()], + } + }, + }, ] } @@ -1669,10 +1837,582 @@ impl McpServer { .unwrap_or_default(), }; - let report = crate::token_metrics::analyze(&content, &opts); + let mut report = crate::token_metrics::analyze(&content, &opts); + + // Populate NYX.md [commands] preset names + let nyx = crate::token_metrics::parse_nyx_commands(&self.api_state.root_path); + if !nyx.is_empty() { + let preset_names: Vec = nyx.into_keys().collect(); + report.nyx_commands = Some(preset_names); + } + + // Warn if any preset command references a file in a detected cycle + if let Some(ref preset_names_ref) = report.nyx_commands.clone() { + if let Ok(graph) = self.api_state.rebuild_graph() { + let cycle_files: std::collections::HashSet = graph.cycles.iter() + .flat_map(|c| c.nodes.iter().cloned()) + .collect(); + let nyx_map = crate::token_metrics::parse_nyx_commands(&self.api_state.root_path); + for preset_name in preset_names_ref { + if let Some(cmd) = nyx_map.get(preset_name) { + let references_cycle = cycle_files.iter().any(|f| cmd.contains(f.as_str())); + if references_cycle { + report.warnings.push(format!( + "preset '{}' references a file in a dependency cycle", + preset_name + )); + } + } + } + } + } + + Ok(McpToolResult { + content: vec![McpContent::text( + serde_json::to_string(&report).unwrap_or_default(), + )], + is_error: None, + }) + } + + "watch_graph" => { + use notify::{RecursiveMode, Watcher}; + use std::sync::mpsc; + use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; + + let args = &call.arguments; + let root_str = args + .get("root") + .and_then(|v| v.as_str()) + .ok_or("Missing root")? + .to_string(); + let timeout_secs = args + .get("timeout_secs") + .and_then(|v| v.as_u64()) + .unwrap_or(30) + .min(300); + + let watch_path = std::path::PathBuf::from(&root_str); + let (tx, rx) = mpsc::channel(); + + let mut watcher = notify::recommended_watcher(move |res: notify::Result| { + if let Ok(event) = res { + let _ = tx.send(event); + } + }).map_err(|e| format!("Failed to create watcher: {}", e))?; + + watcher.watch(&watch_path, RecursiveMode::Recursive) + .map_err(|e| format!("Failed to watch {}: {}", root_str, e))?; + + let source_extensions: std::collections::HashSet<&str> = + ["rs", "go", "py", "ts", "js", "dart"].iter().copied().collect(); + + let deadline = Instant::now() + Duration::from_secs(timeout_secs); + let mut event_count = 0u64; + + while Instant::now() < deadline { + let remaining = deadline.saturating_duration_since(Instant::now()); + if remaining.is_zero() { break; } + let timeout = remaining.min(Duration::from_millis(100)); + match rx.recv_timeout(timeout) { + Ok(event) => { + for path in &event.paths { + let ext = path.extension() + .and_then(|e| e.to_str()) + .unwrap_or(""); + if !source_extensions.contains(ext) { + continue; + } + let timestamp_ms = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64; + let graph_event = GraphEvent { + kind: GraphEventKind::FileReindexed, + path: path.to_string_lossy().to_string(), + timestamp_ms, + }; + println!("{}", serde_json::to_string(&graph_event).unwrap_or_default()); + event_count += 1; + } + } + Err(mpsc::RecvTimeoutError::Timeout) => continue, + Err(mpsc::RecvTimeoutError::Disconnected) => break, + } + } + + Ok(McpToolResult { + content: vec![McpContent::text( + serde_json::to_string(&serde_json::json!({ + "events_emitted": event_count, + "timeout_secs": timeout_secs, + "root": root_str, + })).unwrap_or_default(), + )], + is_error: None, + }) + } + + // ----------------------------------------------------------------- + // search_in_symbol — scope a search to one function's body + // ----------------------------------------------------------------- + "search_in_symbol" => { + let args = &call.arguments; + let file = args.get("file").and_then(|v| v.as_str()).ok_or("Missing file")?; + let symbol = args.get("symbol").and_then(|v| v.as_str()).ok_or("Missing symbol")?; + let pattern = args.get("pattern").and_then(|v| v.as_str()).ok_or("Missing pattern")?; + let ctx = args.get("context_lines").and_then(|v| v.as_u64()).unwrap_or(2) as usize; + + // 1. Locate the file in the skeleton index + let files = self.api_state.mapped_files.lock().map(|g| g.clone()).unwrap_or_default(); + let mf = files.values() + .find(|f| f.path == file || f.path.contains(file)) + .ok_or_else(|| format!("File not found: {}", file))?; + + // 2. Find the symbol (symbol_name, qualified_name, or raw text) + let sig = mf.signatures.iter() + .find(|s| { + s.symbol_name.as_deref() == Some(symbol) + || s.qualified_name.as_deref() == Some(symbol) + || s.raw.contains(symbol) + }) + .ok_or_else(|| format!("Symbol '{}' not found in {}", symbol, file))?; + + let sym_start = sig.line_start; // 0-indexed + + // 3. Estimate end: next symbol's line_start, fallback +500 + let sym_end = mf.signatures.iter() + .filter(|s| s.line_start > sym_start) + .map(|s| s.line_start) + .min() + .unwrap_or(sym_start + 500); + + // 4. Content search scoped to this file by glob + let fname = std::path::Path::new(&mf.path) + .file_name().and_then(|n| n.to_str()).unwrap_or(file); + let opts = crate::search::SearchOptions { + case_sensitive: true, + context_lines: ctx, + max_results: 500, + file_glob: Some(format!("**/{}", fname)), + ..Default::default() + }; + let sr = crate::search::search_content(&self.api_state.root_path, pattern, &opts) + .map_err(|e| e)?; + + // 5. Filter to the symbol's estimated line range (convert 0-indexed → 1-indexed) + let in_range: Vec<_> = sr.matches.into_iter() + .filter(|m| m.line_number > sym_start && m.line_number <= sym_end + 1) + .collect(); + let match_count = in_range.len(); + + let result = serde_json::json!({ + "file": mf.path, + "symbol": symbol, + "symbol_kind": format!("{:?}", sig.kind), + "symbol_line": sym_start + 1, + "estimated_end_line": sym_end + 1, + "pattern": pattern, + "match_count": match_count, + "matches": in_range, + }); + Ok(McpToolResult { + content: vec![McpContent::text(serde_json::to_string_pretty(&result).unwrap_or_default())], + is_error: None, + }) + } + + // ----------------------------------------------------------------- + // list_key_handlers — TUI key-binding map + // ----------------------------------------------------------------- + "list_key_handlers" => { + let args = &call.arguments; + let file = args.get("file").and_then(|v| v.as_str()).ok_or("Missing file")?; + let ctx = args.get("context_lines").and_then(|v| v.as_u64()).unwrap_or(4) as usize; + + let files = self.api_state.mapped_files.lock().map(|g| g.clone()).unwrap_or_default(); + let mf = files.values() + .find(|f| f.path == file || f.path.contains(file)) + .ok_or_else(|| format!("File not found: {}", file))?; + let fname = std::path::Path::new(&mf.path) + .file_name().and_then(|n| n.to_str()).unwrap_or(file); + let glob = format!("**/{}", fname); + + // Search for both dominant key-handler syntaxes + let mut all_matches = Vec::new(); + for pattern in &[r#"case ""#, r#"== ""#] { + let opts = crate::search::SearchOptions { + case_sensitive: true, + context_lines: ctx, + max_results: 300, + file_glob: Some(glob.clone()), + ..Default::default() + }; + if let Ok(sr) = crate::search::search_content(&self.api_state.root_path, pattern, &opts) { + all_matches.extend(sr.matches); + } + } + + // Group by extracted key string (BTreeMap keeps keys sorted) + let mut key_map: std::collections::BTreeMap> = + std::collections::BTreeMap::new(); + for m in &all_matches { + if let Some(key) = extract_quoted_key(&m.line) { + key_map.entry(key).or_default().push(serde_json::json!({ + "line": m.line_number, + "text": m.line.trim(), + "before_context": m.before_context, + "after_context": m.after_context, + })); + } + } + + let handlers: Vec<_> = key_map.iter().map(|(k, v)| serde_json::json!({ + "key": k, + "occurrences": v, + })).collect(); + + let result = serde_json::json!({ + "file": mf.path, + "handler_count": handlers.len(), + "handlers": handlers, + }); + Ok(McpToolResult { + content: vec![McpContent::text(serde_json::to_string_pretty(&result).unwrap_or_default())], + is_error: None, + }) + } + + // ----------------------------------------------------------------- + // map_state_machine — state × key-handlers matrix + // ----------------------------------------------------------------- + "map_state_machine" => { + let args = &call.arguments; + let file = args.get("file").and_then(|v| v.as_str()).ok_or("Missing file")?; + let state_var = args.get("state_var").and_then(|v| v.as_str()).unwrap_or("m.state").to_string(); + let state_prefix = args.get("state_prefix").and_then(|v| v.as_str()).unwrap_or("State").to_string(); + + let files = self.api_state.mapped_files.lock().map(|g| g.clone()).unwrap_or_default(); + let mf = files.values() + .find(|f| f.path == file || f.path.contains(file)) + .ok_or_else(|| format!("File not found: {}", file))?; + let fname = std::path::Path::new(&mf.path) + .file_name().and_then(|n| n.to_str()).unwrap_or(file); + let glob = format!("**/{}", fname); + + // Helper: build SearchOptions for this file + let make_opts = |max: usize| crate::search::SearchOptions { + case_sensitive: true, + max_results: max, + file_glob: Some(glob.clone()), + ..Default::default() + }; + + // 1. Find all state enum variants by searching for the prefix + let mut known_states: Vec = Vec::new(); + if let Ok(sr) = crate::search::search_content( + &self.api_state.root_path, &state_prefix, &make_opts(300)) + { + for m in &sr.matches { + let mut pos = 0usize; + while pos < m.line.len() { + if let Some(idx) = m.line[pos..].find(&state_prefix as &str) { + let abs = pos + idx; + let rest = &m.line[abs..]; + let end = rest.find(|c: char| !c.is_alphanumeric() && c != '_') + .unwrap_or(rest.len()); + let name = &rest[..end]; + if name.len() > state_prefix.len() { + let name = name.to_string(); + if !known_states.contains(&name) { + known_states.push(name); + } + } + pos = abs + 1; + } else { + break; + } + } + } + } + + // 2. Find all state guard locations: `state_var == ` + let guard_pattern = format!("{} == ", state_var); + let mut guard_map: HashMap> = HashMap::new(); + if let Ok(sr) = crate::search::search_content( + &self.api_state.root_path, &guard_pattern, &make_opts(500)) + { + for m in &sr.matches { + for state in &known_states { + if m.line.contains(state.as_str()) { + guard_map.entry(state.clone()).or_default().push(m.line_number); + } + } + } + } + + // 3. Collect all key handler matches + let mut all_key_matches = Vec::new(); + for pattern in &[r#"case ""#, r#"== ""#] { + if let Ok(sr) = crate::search::search_content( + &self.api_state.root_path, pattern, &make_opts(500)) + { + all_key_matches.extend(sr.matches); + } + } + + // 4. For each state, attribute key handlers within WINDOW lines of a guard + const WINDOW: usize = 60; + let mut state_handlers: serde_json::Map = + serde_json::Map::new(); + + for state in &known_states { + let guard_lines = guard_map.get(state).cloned().unwrap_or_default(); + let mut keys: Vec = Vec::new(); + let mut handler_details: Vec = Vec::new(); + + for &guard_ln in &guard_lines { + for km in &all_key_matches { + if km.line_number > guard_ln && km.line_number < guard_ln + WINDOW { + if let Some(key) = extract_quoted_key(&km.line) { + if !keys.contains(&key) { + keys.push(key.clone()); + handler_details.push(serde_json::json!({ + "key": key, + "line": km.line_number, + "text": km.line.trim(), + })); + } + } + } + } + } + + state_handlers.insert(state.clone(), serde_json::json!({ + "guard_lines": guard_lines, + "keys": keys, + "handlers": handler_details, + })); + } + + let result = serde_json::json!({ + "file": mf.path, + "state_var": state_var, + "state_prefix": state_prefix, + "states": known_states, + "state_handlers": state_handlers, + }); + Ok(McpToolResult { + content: vec![McpContent::text(serde_json::to_string_pretty(&result).unwrap_or_default())], + is_error: None, + }) + } + + // ----------------------------------------------------------------- + // doc_index — list all document nodes with structure + edges + // ----------------------------------------------------------------- + "doc_index" => { + let docs = self.api_state.doc_nodes()?; Ok(McpToolResult { content: vec![McpContent::text( - serde_json::to_string_pretty(&report).unwrap_or_default(), + serde_json::to_string_pretty(&docs).unwrap_or_default(), + )], + is_error: None, + }) + } + + // ----------------------------------------------------------------- + // doc_context — single doc + referenced code skeletons + // ----------------------------------------------------------------- + "doc_context" => { + let args = &call.arguments; + let doc_path = args.get("doc_path").and_then(|v| v.as_str()) + .ok_or("Missing doc_path")?; + let budget = args.get("budget").and_then(|v| v.as_u64()).unwrap_or(4000) as usize; + + // Rebuild graph so edges exist + if let Err(e) = self.api_state.rebuild_graph() { + return Err(e); + } + + // Find the doc in mapped_files (exact match or substring) + let files = self.api_state.mapped_files.lock().map_err(|e| e.to_string())?; + let (module_id, mf) = files.iter() + .find(|(_, f)| f.path == doc_path || f.path.contains(doc_path)) + .ok_or_else(|| format!("Document not found: {}", doc_path))?; + + let doc_sigs: Vec = mf.signatures.iter().map(|s| s.raw.clone()).collect(); + let doc_imports = mf.imports.clone(); + let doc_path_owned = mf.path.clone(); + let module_id_owned = module_id.clone(); + + // Drop the lock before calling ranked_skeleton + drop(files); + + // Use the doc's imports as focus files for ranked skeleton + let focus: Vec = doc_imports.clone(); + let ranked = if focus.is_empty() { + vec![] + } else { + self.api_state.ranked_skeleton(&focus, budget).unwrap_or_default() + }; + + let total_tokens: usize = ranked.iter().map(|f| f.estimated_tokens).sum(); + + let referenced: Vec = ranked.iter().map(|f| { + serde_json::json!({ + "path": f.path, + "rank": f.rank, + "signatureCount": f.signature_count, + "estimatedTokens": f.estimated_tokens, + "signatures": f.signatures, + }) + }).collect(); + + let result = serde_json::json!({ + "doc": { + "path": doc_path_owned, + "moduleId": module_id_owned, + "signatures": doc_sigs, + "imports": doc_imports, + }, + "referencedFiles": referenced, + "totalTokens": total_tokens, + }); + + Ok(McpToolResult { + content: vec![McpContent::text( + serde_json::to_string_pretty(&result).unwrap_or_default(), + )], + is_error: None, + }) + } + + // ----------------------------------------------------------------- + // query_docs — doc-biased context retrieval + // ----------------------------------------------------------------- + "query_docs" => { + let args = &call.arguments; + let query = args.get("query").and_then(|v| v.as_str()) + .ok_or("Missing query")?.to_string(); + let budget = args.get("budget").and_then(|v| v.as_u64()).unwrap_or(8000) as usize; + let model_str = args.get("model").and_then(|v| v.as_str()) + .unwrap_or("claude").to_string(); + + // Rebuild graph + if let Err(e) = self.api_state.rebuild_graph() { + return Err(e); + } + + // Step 1: BM25 search across all files + let bm25_opts = crate::search::BM25Options { + max_results: 30, + ..Default::default() + }; + let bm25_result = crate::search::bm25_search( + &self.api_state.root_path, &query, &bm25_opts, + ).unwrap_or_default(); + + // Step 2: Separate into doc files and code files + let mut doc_files: Vec = Vec::new(); + let mut code_files: Vec = Vec::new(); + let mut seen = std::collections::HashSet::new(); + + for m in &bm25_result.matches { + if !seen.insert(m.path.clone()) { continue; } + if crate::api::is_doc_path(&m.path) { + doc_files.push(m.path.clone()); + } else { + code_files.push(m.path.clone()); + } + } + + // Step 3: Follow doc cross-refs into code + let files = self.api_state.mapped_files.lock().map_err(|e| e.to_string())?; + let mut ref_code: Vec = Vec::new(); + for doc_path in &doc_files { + if let Some(mf) = files.get(doc_path.as_str()) { + for imp in &mf.imports { + if !seen.contains(imp) && !crate::api::is_doc_path(imp) { + seen.insert(imp.clone()); + ref_code.push(imp.clone()); + } + } + } + } + drop(files); + + // Merge: doc imports come after direct code hits + code_files.extend(ref_code); + + // Step 4: Build ranked skeleton — docs as primary focus, code as secondary + let mut all_focus = doc_files.clone(); + all_focus.extend(code_files.iter().cloned()); + all_focus.truncate(30); + + let ranked = self.api_state.ranked_skeleton(&all_focus, budget) + .unwrap_or_default(); + + // Step 5: Build context text — docs first, then code + let mut doc_entries = Vec::new(); + let mut code_entries = Vec::new(); + let mut context_text = format!("## Doc Context for: {}\n\n", query); + let mut total_tokens = 0usize; + + for f in &ranked { + let entry = serde_json::json!({ + "path": f.path, + "rank": f.rank, + "signatureCount": f.signature_count, + "estimatedTokens": f.estimated_tokens, + "signatures": f.signatures, + }); + total_tokens += f.estimated_tokens; + + if crate::api::is_doc_path(&f.path) { + context_text.push_str(&format!( + "// [DOC] {} (rank: {:.4}, {} tokens)\n", f.path, f.rank, f.estimated_tokens + )); + doc_entries.push(entry); + } else { + context_text.push_str(&format!( + "// {} (rank: {:.4}, {} tokens)\n", f.path, f.rank, f.estimated_tokens + )); + code_entries.push(entry); + } + for sig in &f.signatures { + context_text.push_str(&format!(" {}\n", sig)); + } + context_text.push('\n'); + } + + // Step 6: Health score + let sig_count: usize = ranked.iter().map(|f| f.signatures.len()).sum(); + let model = model_str.parse::().unwrap_or_default(); + let health_opts = crate::token_metrics::HealthOpts { + model, + window_size: 0, + key_positions: crate::token_metrics::key_positions_from_order( + &ranked.iter().map(|f| f.path.clone()).collect::>(), + &doc_files, + ), + signature_count: sig_count, + signature_tokens: (total_tokens as f64 * 0.85) as usize, + }; + let health = crate::token_metrics::analyze(&context_text, &health_opts); + + let result = serde_json::json!({ + "context": context_text, + "docFiles": doc_entries, + "codeFiles": code_entries, + "focusDocs": doc_files, + "totalTokens": total_tokens, + "health": health, + }); + + Ok(McpToolResult { + content: vec![McpContent::text( + serde_json::to_string_pretty(&result).unwrap_or_default(), )], is_error: None, }) @@ -1937,6 +2677,21 @@ impl McpServer { } } +/// Extract the first double-quoted token from a line of code. +/// e.g. `case "ctrl+c":` → Some("ctrl+c"), `key == "up"` → Some("up"). +/// Returns None if no quoted token ≤ 30 chars is found. +fn extract_quoted_key(line: &str) -> Option { + let start = line.find('"')? + 1; + let rest = &line[start..]; + let end = rest.find('"')?; + let key = &rest[..end]; + if !key.is_empty() && key.len() <= 30 { + Some(key.to_string()) + } else { + None + } +} + fn jsonrpc_ok(id: &Option, result: serde_json::Value) -> String { serde_json::to_string(&serde_json::json!({ "jsonrpc": "2.0", diff --git a/third_party/cartographer/mapper-core/cartographer/src/scanner.rs b/third_party/cartographer/mapper-core/cartographer/src/scanner.rs index 9e5a18e7..b33aacd9 100644 --- a/third_party/cartographer/mapper-core/cartographer/src/scanner.rs +++ b/third_party/cartographer/mapper-core/cartographer/src/scanner.rs @@ -153,6 +153,7 @@ pub const IGNORED_FILES: &[&str] = &[ // Cartographer runtime state files ".cartographer_cache.json", ".cartographer_watch_state.json", + ".cartographer_history.json", ]; // Patterns for extension-based blocking diff --git a/third_party/cartographer/mapper-core/cartographer/src/search.rs b/third_party/cartographer/mapper-core/cartographer/src/search.rs index 6934f9d1..07b1a1a4 100644 --- a/third_party/cartographer/mapper-core/cartographer/src/search.rs +++ b/third_party/cartographer/mapper-core/cartographer/src/search.rs @@ -4,6 +4,7 @@ //! block) unless `no_ignore` is set, in which case raw `walkdir` is used. use crate::scanner::{is_ignored_path, scan_files_with_noise_tracking}; +use rayon::prelude::*; use regex::{Regex, RegexBuilder}; use serde::{Deserialize, Serialize}; use std::path::{Path, PathBuf}; @@ -480,100 +481,121 @@ pub fn search_content( let file_list = enumerate_files(root, opts.no_ignore)?; - let mut matches: Vec = Vec::new(); - let mut files_with_m: Vec = Vec::new(); - let mut files_without_m: Vec = Vec::new(); - let mut file_counts: Vec = Vec::new(); - let mut files_searched: usize = 0; - let mut truncated = false; + // ── Parallel file processing ───────────────────────────────────────────── + // Local result carrier — one per file that passes filters and is readable. + enum FileResult { + Matches(Vec), + WithMatch(String), + WithoutMatch(String), + Count(FileCount), + Searched, // read but no matches (keeps files_searched count accurate) + } - 'files: for abs_path in &file_list { - let rel = rel_path(root, abs_path); + let per_file: Vec = file_list + .par_iter() + .filter_map(|abs_path| { + let rel = rel_path(root, abs_path); - // search_path prefix filter - if let Some(ref sp) = opts.search_path { - let sp = sp.trim_end_matches('/'); - if !rel.starts_with(&format!("{}/", sp)) && rel != sp { - continue; + // search_path prefix filter + if let Some(ref sp) = opts.search_path { + let sp = sp.trim_end_matches('/'); + if !rel.starts_with(&format!("{}/", sp)) && rel != sp { + return None; + } } - } - // include/exclude glob - if let Some(ref gre) = include_filter { - if !gre.is_match(&rel) { continue; } - } - if let Some(ref gre) = exclude_filter { - if gre.is_match(&rel) { continue; } - } + // include/exclude glob + if let Some(ref gre) = include_filter { + if !gre.is_match(&rel) { return None; } + } + if let Some(ref gre) = exclude_filter { + if gre.is_match(&rel) { return None; } + } - let content = match std::fs::read_to_string(abs_path) { - Ok(c) => c, - Err(_) => continue, // binary or unreadable — skip silently - }; + let content = std::fs::read_to_string(abs_path).ok()?; + let lines: Vec<&str> = content.lines().collect(); - files_searched += 1; - let lines: Vec<&str> = content.lines().collect(); - - // ── count_only mode ────────────────────────────────────────────────── - if opts.count_only { - let count = lines.iter().filter(|&&l| line_matches(&all_res, l, opts.invert_match)).count(); - file_counts.push(FileCount { path: rel, count }); - if file_counts.len() >= cap { - truncated = true; - break 'files; + // ── count_only ─────────────────────────────────────────────────── + if opts.count_only { + let count = lines.iter().filter(|&&l| line_matches(&all_res, l, opts.invert_match)).count(); + return Some(FileResult::Count(FileCount { path: rel, count })); } - continue; - } - // ── files_with_matches / files_without_match mode ──────────────────── - if opts.files_with_matches || opts.files_without_match { - let has = lines.iter().any(|&l| line_matches(&all_res, l, opts.invert_match)); - if opts.files_with_matches && has { - files_with_m.push(rel.clone()); - if files_with_m.len() >= cap { truncated = true; break 'files; } + // ── files_with_matches / files_without_match ───────────────────── + if opts.files_with_matches || opts.files_without_match { + let has = lines.iter().any(|&l| line_matches(&all_res, l, opts.invert_match)); + return match (opts.files_with_matches && has, opts.files_without_match && !has) { + (true, _) => Some(FileResult::WithMatch(rel)), + (_, true) => Some(FileResult::WithoutMatch(rel)), + _ => Some(FileResult::Searched), + }; } - if opts.files_without_match && !has { - files_without_m.push(rel); - if files_without_m.len() >= cap { truncated = true; break 'files; } + + // ── normal match mode ──────────────────────────────────────────── + let mut file_matches: Vec = Vec::new(); + for (idx, &line) in lines.iter().enumerate() { + if !line_matches(&all_res, line, opts.invert_match) { continue; } + + let spans: Vec<_> = all_res.iter().flat_map(|re| re.find_iter(line)).collect(); + let matched_texts: Vec = if opts.only_matching { + spans.iter().map(|m| m.as_str().to_string()).collect() + } else { + vec![] + }; + let match_ranges: Vec<[usize; 2]> = spans.iter() + .map(|m| [m.start(), m.end()]) + .collect(); + + file_matches.push(ContentMatch { + path: rel.clone(), + line_number: idx + 1, + line: if opts.only_matching { String::new() } else { line.to_string() }, + matched_texts, + match_ranges, + before_context: context_slice(&lines, idx, before_ctx, true), + after_context: context_slice(&lines, idx, after_ctx, false), + }); } - continue; - } - // ── normal match mode ──────────────────────────────────────────────── - for (idx, &line) in lines.iter().enumerate() { - if !line_matches(&all_res, line, opts.invert_match) { - continue; + if file_matches.is_empty() { + Some(FileResult::Searched) + } else { + Some(FileResult::Matches(file_matches)) } + }) + .collect(); - // Collect all match spans once — used for both only_matching text and ranges. - let spans: Vec<_> = all_res.iter() - .flat_map(|re| re.find_iter(line)) - .collect(); + // ── Phase 2: flatten results and enforce hard cap ───────────────────────── + let mut matches: Vec = Vec::new(); + let mut files_with_m: Vec = Vec::new(); + let mut files_without_m: Vec = Vec::new(); + let mut file_counts: Vec = Vec::new(); + let mut files_searched: usize = per_file.len(); + let mut truncated = false; - let matched_texts: Vec = if opts.only_matching { - spans.iter().map(|m| m.as_str().to_string()).collect() - } else { - vec![] - }; - - let match_ranges: Vec<[usize; 2]> = spans.iter() - .map(|m| [m.start(), m.end()]) - .collect(); - - matches.push(ContentMatch { - path: rel.clone(), - line_number: idx + 1, - line: if opts.only_matching { String::new() } else { line.to_string() }, - matched_texts, - match_ranges, - before_context: context_slice(&lines, idx, before_ctx, true), - after_context: context_slice(&lines, idx, after_ctx, false), - }); - - if matches.len() >= cap { - truncated = true; - break 'files; + for result in per_file { + match result { + FileResult::Count(fc) => { + if file_counts.len() < cap { file_counts.push(fc); } + else { truncated = true; } + } + FileResult::WithMatch(path) => { + if files_with_m.len() < cap { files_with_m.push(path); } + else { truncated = true; } + } + FileResult::WithoutMatch(path) => { + if files_without_m.len() < cap { files_without_m.push(path); } + else { truncated = true; } + } + FileResult::Matches(mut file_matches) => { + let remaining = cap.saturating_sub(matches.len()); + if file_matches.len() > remaining { + file_matches.truncate(remaining); + truncated = true; + } + matches.extend(file_matches); } + FileResult::Searched => {} } } diff --git a/third_party/cartographer/mapper-core/cartographer/src/token_metrics.rs b/third_party/cartographer/mapper-core/cartographer/src/token_metrics.rs index 95e47c71..040b705b 100644 --- a/third_party/cartographer/mapper-core/cartographer/src/token_metrics.rs +++ b/third_party/cartographer/mapper-core/cartographer/src/token_metrics.rs @@ -148,6 +148,9 @@ pub struct ContextHealthReport { // Actionable pub warnings: Vec, pub recommendations: Vec, + + // NYX.md [commands] preset names (populated when root path is known) + pub nyx_commands: Option>, } // --------------------------------------------------------------------------- @@ -418,7 +421,48 @@ pub fn analyze(content: &str, opts: &HealthOpts) -> ContextHealthReport { metrics: m, warnings, recommendations, + nyx_commands: None, + } +} + +/// Parse the `[commands]` section from `NYX.md` at `root/NYX.md`. +/// +/// Scans for a `[commands]` section header, then reads `key = "value"` lines +/// until the next `[section]` header or EOF. Returns a map of preset name → command string. +pub fn parse_nyx_commands(root: &std::path::Path) -> std::collections::HashMap { + let mut map = std::collections::HashMap::new(); + let nyx_path = root.join("NYX.md"); + let text = match std::fs::read_to_string(&nyx_path) { + Ok(t) => t, + Err(_) => return map, + }; + + let mut in_commands = false; + for line in text.lines() { + let trimmed = line.trim(); + if trimmed.starts_with('[') && trimmed.ends_with(']') { + in_commands = trimmed == "[commands]"; + continue; + } + if !in_commands { + continue; + } + // Parse `key = "value"` lines + if let Some(eq_pos) = trimmed.find('=') { + let key = trimmed[..eq_pos].trim().to_string(); + let raw_val = trimmed[eq_pos + 1..].trim(); + // Strip surrounding quotes if present + let value = if raw_val.starts_with('"') && raw_val.ends_with('"') && raw_val.len() >= 2 { + raw_val[1..raw_val.len() - 1].to_string() + } else { + raw_val.to_string() + }; + if !key.is_empty() { + map.insert(key, value); + } + } } + map } /// Compute key module positions from an ordered list of module IDs and a list of From ddb90eb0415d3320b4d886a4e63779fa60fb9c5a Mon Sep 17 00:00:00 2001 From: Lisa Date: Tue, 21 Apr 2026 18:01:31 +0200 Subject: [PATCH 10/20] chore: gitignore mcp .cartographer_cache.json MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MCP's cartographer cache is pinned to the current commit hash and regenerated on every startup — not source, shouldn't track. Co-Authored-By: Claude Opus 4.7 --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 3aaefdee..a575087a 100644 --- a/.gitignore +++ b/.gitignore @@ -49,3 +49,6 @@ testdata/**/pubspec.lock # Vendored Cartographer Rust build artifacts third_party/cartographer/mapper-core/cartographer/target/ + +# MCP runtime caches (pinned to commit hash, regenerated on startup) +internal/mcp/.cartographer_cache.json From 2e1a9082b821e704d4e4568a5c30c9123104019f Mon Sep 17 00:00:00 2001 From: Lisa Date: Tue, 21 Apr 2026 21:14:55 +0200 Subject: [PATCH 11/20] fix(cartographer): resolve rebuild_graph mutex deadlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ApiState::rebuild_graph held the mapped_files lock across its loop and then called resolve_import_target, which re-acquired the same non-reentrant std::sync::Mutex — any repo with a resolvable import deadlocked. diagram and health hung on CKB itself (1093 files) and the Go bridge's cartographer.MapProject blocked under the same condition. Split the resolver: a public method that locks and a private helper that takes the already-held map; rebuild_graph now calls the helper. Fix contributed upstream with a regression test; pulled back into the vendor tree via scripts/sync-cartographer.sh — that sync also brings in the upstream call_graph, diagram_export, and html_export modules along with diagram/main updates from upstream [Unreleased]. --- CHANGELOG.md | 12 + .../mapper-core/cartographer/src/api.rs | 48 +- .../cartographer/src/call_graph.rs | 728 ++++++++ .../mapper-core/cartographer/src/diagram.rs | 1598 ++++++++++++++++- .../cartographer/src/diagram_export.rs | 170 ++ .../cartographer/src/git_analysis.rs | 72 + .../cartographer/src/html_export.rs | 413 +++++ .../mapper-core/cartographer/src/lib.rs | 12 + .../mapper-core/cartographer/src/main.rs | 199 +- 9 files changed, 3210 insertions(+), 42 deletions(-) create mode 100644 third_party/cartographer/mapper-core/cartographer/src/call_graph.rs create mode 100644 third_party/cartographer/mapper-core/cartographer/src/diagram_export.rs create mode 100644 third_party/cartographer/mapper-core/cartographer/src/html_export.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 110c6003..008e7b07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,18 @@ All notable changes to CKB will be documented in this file. ### Fixed +- **Vendored Cartographer `rebuild_graph` deadlock** — upstream + `ApiState::rebuild_graph` held the `mapped_files` Mutex across its + loop and then called `resolve_import_target`, which re-acquired the + same non-reentrant `std::sync::Mutex`. Any project with a resolvable + import deadlocked — the `cartographer diagram` / `cartographer health` + CLIs hung, and the Go bridge's `cartographer.MapProject` would block + any time CKB fed it a repo with imports. Fixed in the vendored tree + (and contributed back upstream) by splitting the resolver: a public + method that locks, and a private helper that takes the already-held + map; `rebuild_graph` now calls the helper. Discovered during + end-to-end smoke testing against CKB itself (1093 files). Regression + test added upstream. - **`localize-tree-sitter-symbols.sh` dropped grammar C parsers** — the script extracted archive members via `ar x`, which silently clobbers files when multiple members share a name. Cargo emits a `parser.o` diff --git a/third_party/cartographer/mapper-core/cartographer/src/api.rs b/third_party/cartographer/mapper-core/cartographer/src/api.rs index b32d213b..c963bcf8 100644 --- a/third_party/cartographer/mapper-core/cartographer/src/api.rs +++ b/third_party/cartographer/mapper-core/cartographer/src/api.rs @@ -118,6 +118,9 @@ pub struct GraphNode { pub cochange_partners: Option, /// Shannon entropy of co-change distribution (higher = more scattered changes). pub cochange_entropy: Option, + /// Dominant git author by commit count (bot/format commits excluded). + /// Populated by `enrich_with_git`. Powers the `--color-by=owner` diagram mode. + pub owner: Option, } /// A source position range using LIP semantics: line is 0-based, char is UTF-8 byte offset from line start. @@ -458,10 +461,14 @@ impl ApiState { fan_out: None, cochange_partners: None, cochange_entropy: None, + owner: None, }); for import in &file.imports { - if let Some(target) = self.resolve_import_target(import, module_id) { + // `rebuild_graph` already holds the `mapped_files` lock; call the + // map-taking helper directly. Calling `resolve_import_target` here + // would re-enter the non-reentrant Mutex and deadlock. + if let Some(target) = Self::resolve_import_target_in(&files, import, module_id) { edges.push(GraphEdge { source: module_id.clone(), target, @@ -1201,7 +1208,17 @@ impl ApiState { fn resolve_import_target(&self, import: &str, source: &str) -> Option { let files = self.mapped_files.lock().ok()?; + Self::resolve_import_target_in(&files, import, source) + } + // Same lookup as `resolve_import_target` but takes the already-locked map. + // Used by `rebuild_graph` (which holds the lock for the whole rebuild) to + // avoid re-entering the non-reentrant Mutex and deadlocking. + fn resolve_import_target_in( + files: &HashMap, + import: &str, + source: &str, + ) -> Option { let (module_path, symbol_hint) = parse_import_parts(import); let stem = derive_module_stem(&module_path); @@ -1667,4 +1684,33 @@ mod tests { assert_eq!(derive_module_stem("react-dom"), "react"); assert_eq!(derive_module_stem("src/api/handler"), "handler"); } + + // Regression test: before the fix, rebuild_graph held the mapped_files + // Mutex across its inner loop and then called resolve_import_target, + // which re-acquired the same non-reentrant Mutex → deadlock on any + // project with at least one resolvable import. Any resolved edge is + // enough to prove the hang is gone; correctness of the graph content + // is covered elsewhere. + #[test] + fn rebuild_graph_does_not_deadlock_on_imports() { + let state = ApiState::new(std::path::PathBuf::from("/test")); + { + let mut files = state.mapped_files.lock().unwrap(); + files.insert( + "a".to_string(), + MappedFile::from_minimal("a.rs".to_string(), vec!["b".to_string()]), + ); + files.insert( + "b".to_string(), + MappedFile::from_minimal("b.rs".to_string(), vec![]), + ); + } + let graph = state.rebuild_graph().expect("rebuild_graph must return"); + assert_eq!(graph.nodes.len(), 2); + assert!( + graph.edges.iter().any(|e| e.source == "a" && e.target == "b"), + "expected resolved a->b edge, got edges: {:?}", + graph.edges + ); + } } diff --git a/third_party/cartographer/mapper-core/cartographer/src/call_graph.rs b/third_party/cartographer/mapper-core/cartographer/src/call_graph.rs new file mode 100644 index 00000000..0a9ae8d6 --- /dev/null +++ b/third_party/cartographer/mapper-core/cartographer/src/call_graph.rs @@ -0,0 +1,728 @@ +//! File-local call-graph extraction for Rust and Python. +//! +//! Given one source file, produce (caller, callee) edges between functions +//! defined *in that file*. Calls into other files or the stdlib are dropped +//! and reported as `unresolved_count` — the goal is "what does this file do +//! internally", not project-wide call tracing (that's a much bigger job and +//! would need cross-file resolution). +//! +//! Output is shaped as a `ProjectGraphResponse` so the existing diagram +//! renderers (Mermaid/DOT/ASCII + focus/depth/max_nodes) work unchanged. +//! +//! Gated on `lang-rust` / `lang-python` Cargo features, matching the rest of +//! the tree-sitter surface in `extractor.rs`. + +use std::path::Path; + +use crate::api::{GraphEdge, GraphMetadata, GraphNode, ProjectGraphResponse}; + +#[cfg(any(feature = "lang-rust", feature = "lang-python"))] +use tree_sitter::{Node, Parser}; + +/// Aggregated call graph for a single source file. +#[derive(Debug, Clone)] +pub struct FileCallGraph { + /// Every function/method defined in the file, in source order. + pub functions: Vec, + /// Caller → callee edges, both as qualified names from `functions`. + pub calls: Vec<(String, String)>, + /// Number of call sites where the callee could not be matched to a + /// function defined in this file (external, stdlib, or unresolved). + pub unresolved_count: usize, + /// Language tag ("rust" / "python") for the project graph we emit. + pub language: &'static str, +} + +/// One function / method definition in the file being analysed. +#[derive(Debug, Clone)] +pub struct FunctionInfo { + /// Qualified name: `Type::method` in Rust, `Class.method` in Python, plain + /// function name at file scope. + pub qualified: String, + /// Bare method name — used for simple-name resolution when a callee only + /// names the method (e.g. `self.foo()`). + pub simple: String, + /// 1-based line number of the definition. + pub line: u32, + /// "fn" for free functions, "method" for impl/class members. + pub kind: &'static str, +} + +/// Build a call graph for the given file. Returns `Ok(None)` when the file +/// extension isn't one we extract call graphs for (currently `.rs`/`.py`). +/// Returns `Err` on unreadable files or parser init failures. +pub fn build_file_call_graph(path: &Path, source: &str) -> Result, String> { + let ext = path + .extension() + .and_then(|e| e.to_str()) + .map(|s| s.to_lowercase()) + .unwrap_or_default(); + + match ext.as_str() { + #[cfg(feature = "lang-rust")] + "rs" => Ok(Some(extract_rust(source)?)), + #[cfg(feature = "lang-python")] + "py" => Ok(Some(extract_python(source)?)), + _ => Ok(None), + } +} + +/// Wrap a `FileCallGraph` in a `ProjectGraphResponse` so `diagram::render()` +/// can consume it directly. Each function becomes a node whose `module_id` is +/// the qualified name; each call becomes an edge. +pub fn to_project_graph(cg: &FileCallGraph, path: &Path) -> ProjectGraphResponse { + let path_str = path.to_string_lossy().into_owned(); + let nodes: Vec = cg + .functions + .iter() + .map(|f| GraphNode { + module_id: f.qualified.clone(), + // Render path shows "file.rs:Type::method" so the diagram carries + // enough info for a reader to find the function without inspecting + // module_id separately. + path: format!("{}:{}", path_str, f.qualified), + language: cg.language.to_string(), + // 1 "signature" = 1 function; used for hotspot sizing in DOT. + signature_count: 1, + complexity: None, + is_bridge: None, + bridge_score: None, + degree: None, + risk_level: None, + churn: None, + hotspot_score: None, + role: Some(f.kind.to_string()), + is_dead: None, + unreferenced_exports: None, + fan_in: None, + fan_out: None, + cochange_partners: None, + cochange_entropy: None, + owner: None, + }) + .collect(); + + let edges: Vec = cg + .calls + .iter() + .map(|(src, tgt)| GraphEdge { + source: src.clone(), + target: tgt.clone(), + edge_type: "call".into(), + at_range: None, + }) + .collect(); + + let mut languages = std::collections::HashMap::new(); + languages.insert(cg.language.to_string(), nodes.len()); + + let total_edges = edges.len(); + ProjectGraphResponse { + nodes, + edges, + cycles: vec![], + god_modules: vec![], + layer_violations: vec![], + metadata: GraphMetadata { + total_files: 1, + total_edges, + languages, + generated_at: String::new(), + bridge_count: None, + cycle_count: None, + god_module_count: None, + health_score: None, + layer_violation_count: None, + architectural_drift: None, + hotspot_count: None, + dead_code_count: None, + unreferenced_exports_count: None, + }, + cochange_pairs: vec![], + } +} + +// --------------------------------------------------------------------------- +// Rust +// --------------------------------------------------------------------------- + +#[cfg(feature = "lang-rust")] +fn extract_rust(source: &str) -> Result { + let mut parser = Parser::new(); + let lang = tree_sitter_rust::language(); + parser + .set_language(&lang) + .map_err(|e| format!("tree-sitter rust init failed: {e}"))?; + let tree = parser + .parse(source.as_bytes(), None) + .ok_or_else(|| "tree-sitter parse returned None".to_string())?; + let src = source.as_bytes(); + + // Pass 1 — enumerate function definitions with scope stack. + let mut functions: Vec = Vec::new(); + let mut scope: Vec = Vec::new(); + collect_rust_functions(&tree.root_node(), src, &mut scope, &mut functions); + + // Pass 2 — for each function, walk its body and resolve call sites. + let resolver = Resolver::new(&functions); + let mut calls: Vec<(String, String)> = Vec::new(); + let mut unresolved_count: usize = 0; + let mut scope: Vec = Vec::new(); + collect_rust_calls( + &tree.root_node(), + src, + &mut scope, + &resolver, + &mut calls, + &mut unresolved_count, + ); + + Ok(FileCallGraph { + functions, + calls, + unresolved_count, + language: "rust", + }) +} + +#[cfg(feature = "lang-rust")] +fn collect_rust_functions( + node: &Node, + src: &[u8], + scope: &mut Vec, + out: &mut Vec, +) { + match node.kind() { + "impl_item" => { + let type_name = node + .child_by_field_name("type") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(); + // Drop generics for scope key so `impl Foo` and `impl Foo` match. + let base = type_name.split('<').next().unwrap_or(&type_name).trim().to_string(); + scope.push(base); + if let Some(body) = node.child_by_field_name("body") { + let mut cur = body.walk(); + for child in body.children(&mut cur) { + collect_rust_functions(&child, src, scope, out); + } + } + scope.pop(); + } + "function_item" => { + let name = node + .child_by_field_name("name") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(); + if name.is_empty() { + return; + } + let kind = if scope.is_empty() { "fn" } else { "method" }; + let qualified = qualify(scope, &name, "::"); + out.push(FunctionInfo { + qualified, + simple: name, + line: (node.start_position().row as u32) + 1, + kind, + }); + } + _ => { + let mut cur = node.walk(); + for child in node.children(&mut cur) { + collect_rust_functions(&child, src, scope, out); + } + } + } +} + +#[cfg(feature = "lang-rust")] +fn collect_rust_calls( + node: &Node, + src: &[u8], + scope: &mut Vec, + resolver: &Resolver, + out: &mut Vec<(String, String)>, + unresolved: &mut usize, +) { + match node.kind() { + "impl_item" => { + let type_name = node + .child_by_field_name("type") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(); + let base = type_name.split('<').next().unwrap_or(&type_name).trim().to_string(); + scope.push(base); + if let Some(body) = node.child_by_field_name("body") { + let mut cur = body.walk(); + for child in body.children(&mut cur) { + collect_rust_calls(&child, src, scope, resolver, out, unresolved); + } + } + scope.pop(); + } + "function_item" => { + let name = node + .child_by_field_name("name") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(); + if name.is_empty() { + return; + } + let caller_qual = qualify(scope, &name, "::"); + if let Some(body) = node.child_by_field_name("body") { + walk_rust_body(&body, src, &caller_qual, resolver, out, unresolved); + } + } + _ => { + let mut cur = node.walk(); + for child in node.children(&mut cur) { + collect_rust_calls(&child, src, scope, resolver, out, unresolved); + } + } + } +} + +#[cfg(feature = "lang-rust")] +fn walk_rust_body( + node: &Node, + src: &[u8], + caller: &str, + resolver: &Resolver, + out: &mut Vec<(String, String)>, + unresolved: &mut usize, +) { + if node.kind() == "call_expression" { + let callee_raw = node + .child_by_field_name("function") + .map(|n| rust_callee_name(&n, src)) + .unwrap_or_default(); + if !callee_raw.is_empty() { + match resolver.resolve(&callee_raw) { + Some(target) => { + if target != caller { + out.push((caller.to_string(), target)); + } + } + None => *unresolved += 1, + } + } + } + + // Recurse into children regardless — nested call expressions, closures, and + // blocks all legitimately hold more calls. + let mut cur = node.walk(); + for child in node.children(&mut cur) { + walk_rust_body(&child, src, caller, resolver, out, unresolved); + } +} + +/// Best-effort callee name extraction from a `call_expression`'s `function` +/// node. Returns the shortest form that a human reader would recognize: +/// foo() → "foo" +/// mod::foo() → "foo" +/// x.method() → "method" +/// Type::assoc() → "assoc" +/// Macros aren't call_expressions in tree-sitter-rust so we don't see them. +#[cfg(feature = "lang-rust")] +fn rust_callee_name(node: &Node, src: &[u8]) -> String { + match node.kind() { + "identifier" => node_text(node, src).to_string(), + "field_expression" => node + .child_by_field_name("field") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(), + "scoped_identifier" => node + .child_by_field_name("name") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(), + "generic_function" => node + .child_by_field_name("function") + .map(|n| rust_callee_name(&n, src)) + .unwrap_or_default(), + _ => String::new(), + } +} + +// --------------------------------------------------------------------------- +// Python +// --------------------------------------------------------------------------- + +#[cfg(feature = "lang-python")] +fn extract_python(source: &str) -> Result { + let mut parser = Parser::new(); + let lang = tree_sitter_python::language(); + parser + .set_language(&lang) + .map_err(|e| format!("tree-sitter python init failed: {e}"))?; + let tree = parser + .parse(source.as_bytes(), None) + .ok_or_else(|| "tree-sitter parse returned None".to_string())?; + let src = source.as_bytes(); + + let mut functions: Vec = Vec::new(); + let mut scope: Vec = Vec::new(); + collect_python_functions(&tree.root_node(), src, &mut scope, &mut functions); + + let resolver = Resolver::new(&functions); + let mut calls: Vec<(String, String)> = Vec::new(); + let mut unresolved_count: usize = 0; + let mut scope: Vec = Vec::new(); + collect_python_calls( + &tree.root_node(), + src, + &mut scope, + &resolver, + &mut calls, + &mut unresolved_count, + ); + + Ok(FileCallGraph { + functions, + calls, + unresolved_count, + language: "python", + }) +} + +#[cfg(feature = "lang-python")] +fn collect_python_functions( + node: &Node, + src: &[u8], + scope: &mut Vec, + out: &mut Vec, +) { + match node.kind() { + "class_definition" => { + let class_name = node + .child_by_field_name("name") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(); + scope.push(class_name); + if let Some(body) = node.child_by_field_name("body") { + let mut cur = body.walk(); + for child in body.children(&mut cur) { + collect_python_functions(&child, src, scope, out); + } + } + scope.pop(); + } + "function_definition" => { + let name = node + .child_by_field_name("name") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(); + if name.is_empty() { + return; + } + let kind = if scope.is_empty() { "fn" } else { "method" }; + let qualified = qualify(scope, &name, "."); + out.push(FunctionInfo { + qualified, + simple: name, + line: (node.start_position().row as u32) + 1, + kind, + }); + } + "decorated_definition" => { + // Decorated functions wrap the real definition in the last child. + let mut cur = node.walk(); + let children: Vec = node.children(&mut cur).collect(); + if let Some(def) = children.last() { + collect_python_functions(def, src, scope, out); + } + } + _ => { + let mut cur = node.walk(); + for child in node.children(&mut cur) { + collect_python_functions(&child, src, scope, out); + } + } + } +} + +#[cfg(feature = "lang-python")] +fn collect_python_calls( + node: &Node, + src: &[u8], + scope: &mut Vec, + resolver: &Resolver, + out: &mut Vec<(String, String)>, + unresolved: &mut usize, +) { + match node.kind() { + "class_definition" => { + let class_name = node + .child_by_field_name("name") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(); + scope.push(class_name); + if let Some(body) = node.child_by_field_name("body") { + let mut cur = body.walk(); + for child in body.children(&mut cur) { + collect_python_calls(&child, src, scope, resolver, out, unresolved); + } + } + scope.pop(); + } + "function_definition" => { + let name = node + .child_by_field_name("name") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(); + if name.is_empty() { + return; + } + let caller_qual = qualify(scope, &name, "."); + if let Some(body) = node.child_by_field_name("body") { + walk_python_body(&body, src, &caller_qual, resolver, out, unresolved); + } + } + "decorated_definition" => { + let mut cur = node.walk(); + let children: Vec = node.children(&mut cur).collect(); + if let Some(def) = children.last() { + collect_python_calls(def, src, scope, resolver, out, unresolved); + } + } + _ => { + let mut cur = node.walk(); + for child in node.children(&mut cur) { + collect_python_calls(&child, src, scope, resolver, out, unresolved); + } + } + } +} + +#[cfg(feature = "lang-python")] +fn walk_python_body( + node: &Node, + src: &[u8], + caller: &str, + resolver: &Resolver, + out: &mut Vec<(String, String)>, + unresolved: &mut usize, +) { + if node.kind() == "call" { + let callee_raw = node + .child_by_field_name("function") + .map(|n| python_callee_name(&n, src)) + .unwrap_or_default(); + if !callee_raw.is_empty() { + match resolver.resolve(&callee_raw) { + Some(target) => { + if target != caller { + out.push((caller.to_string(), target)); + } + } + None => *unresolved += 1, + } + } + } + + let mut cur = node.walk(); + for child in node.children(&mut cur) { + walk_python_body(&child, src, caller, resolver, out, unresolved); + } +} + +#[cfg(feature = "lang-python")] +fn python_callee_name(node: &Node, src: &[u8]) -> String { + match node.kind() { + "identifier" => node_text(node, src).to_string(), + "attribute" => node + .child_by_field_name("attribute") + .map(|n| node_text(&n, src).to_string()) + .unwrap_or_default(), + _ => String::new(), + } +} + +// --------------------------------------------------------------------------- +// Shared helpers +// --------------------------------------------------------------------------- + +#[cfg(any(feature = "lang-rust", feature = "lang-python"))] +fn node_text<'a>(node: &Node, src: &'a [u8]) -> &'a str { + std::str::from_utf8(&src[node.start_byte()..node.end_byte()]).unwrap_or("") +} + +fn qualify(scope: &[String], name: &str, sep: &str) -> String { + if scope.is_empty() { + name.to_string() + } else { + format!("{}{}{}", scope.join(sep), sep, name) + } +} + +/// Resolves a raw callee token ("foo", "method", "thing") against a known set +/// of locally-defined functions. Rules: +/// - Exact qualified match wins (e.g. "Foo::bar" → "Foo::bar"). +/// - Unique simple-name match wins when the raw token is just a bare name. +/// - Otherwise: unresolved. +/// +/// We intentionally avoid any fancier disambiguation (type inference, receiver +/// tracking) — that would need the full type system. Unique-simple is enough +/// for the "here's how functions in this file relate" use case. +struct Resolver<'a> { + by_qualified: std::collections::HashMap<&'a str, &'a str>, + by_simple: std::collections::HashMap<&'a str, Vec<&'a str>>, +} + +impl<'a> Resolver<'a> { + fn new(functions: &'a [FunctionInfo]) -> Self { + let mut by_qualified: std::collections::HashMap<&str, &str> = + std::collections::HashMap::new(); + let mut by_simple: std::collections::HashMap<&str, Vec<&str>> = + std::collections::HashMap::new(); + for f in functions { + by_qualified.insert(f.qualified.as_str(), f.qualified.as_str()); + by_simple + .entry(f.simple.as_str()) + .or_default() + .push(f.qualified.as_str()); + } + Resolver { by_qualified, by_simple } + } + + fn resolve(&self, raw: &str) -> Option { + if let Some(q) = self.by_qualified.get(raw) { + return Some((*q).to_string()); + } + if let Some(list) = self.by_simple.get(raw) { + if list.len() == 1 { + return Some(list[0].to_string()); + } + } + None + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + #[cfg(feature = "lang-rust")] + #[test] + fn rust_resolves_free_function_calls() { + let src = r#" +fn a() { b(); c(); } +fn b() { c(); } +fn c() {} +"#; + let cg = build_file_call_graph(&PathBuf::from("x.rs"), src).unwrap().unwrap(); + assert_eq!(cg.functions.len(), 3); + assert_eq!(cg.language, "rust"); + assert!(cg.calls.contains(&("a".into(), "b".into()))); + assert!(cg.calls.contains(&("a".into(), "c".into()))); + assert!(cg.calls.contains(&("b".into(), "c".into()))); + assert_eq!(cg.unresolved_count, 0); + } + + #[cfg(feature = "lang-rust")] + #[test] + fn rust_method_calls_resolve_via_simple_name() { + let src = r#" +struct S; +impl S { + fn foo(&self) { self.bar(); } + fn bar(&self) {} +} +"#; + let cg = build_file_call_graph(&PathBuf::from("x.rs"), src).unwrap().unwrap(); + assert!(cg.functions.iter().any(|f| f.qualified == "S::foo")); + assert!(cg.functions.iter().any(|f| f.qualified == "S::bar")); + assert!(cg.calls.contains(&("S::foo".into(), "S::bar".into())), + "missing method call edge in {:?}", cg.calls); + } + + #[cfg(feature = "lang-rust")] + #[test] + fn rust_external_calls_increment_unresolved() { + let src = r#" +fn a() { println!(); std::mem::swap(&mut 1, &mut 2); unknown(); } +"#; + let cg = build_file_call_graph(&PathBuf::from("x.rs"), src).unwrap().unwrap(); + // println! is a macro, not a call_expression, so it doesn't count. + // std::mem::swap and unknown are call_expressions with no local match. + assert!(cg.unresolved_count >= 2, "expected 2+ unresolved, got {}", cg.unresolved_count); + assert!(cg.calls.is_empty()); + } + + #[cfg(feature = "lang-rust")] + #[test] + fn rust_self_recursion_is_dropped() { + let src = r#" +fn loop_forever() { loop_forever(); } +"#; + let cg = build_file_call_graph(&PathBuf::from("x.rs"), src).unwrap().unwrap(); + // A self-edge wouldn't break rendering, but it's never interesting — + // we drop it so the diagram doesn't loop on a single node. + assert!(cg.calls.is_empty(), "self-recursion should not emit edges: {:?}", cg.calls); + } + + #[cfg(feature = "lang-python")] + #[test] + fn python_resolves_free_function_calls() { + let src = "\ +def a(): + b() + c() +def b(): + c() +def c(): + pass +"; + let cg = build_file_call_graph(&PathBuf::from("x.py"), src).unwrap().unwrap(); + assert_eq!(cg.language, "python"); + assert_eq!(cg.functions.len(), 3); + assert!(cg.calls.contains(&("a".into(), "b".into()))); + assert!(cg.calls.contains(&("a".into(), "c".into()))); + assert!(cg.calls.contains(&("b".into(), "c".into()))); + } + + #[cfg(feature = "lang-python")] + #[test] + fn python_method_calls_via_attribute() { + let src = "\ +class S: + def foo(self): + self.bar() + def bar(self): + pass +"; + let cg = build_file_call_graph(&PathBuf::from("x.py"), src).unwrap().unwrap(); + assert!(cg.functions.iter().any(|f| f.qualified == "S.foo")); + assert!(cg.functions.iter().any(|f| f.qualified == "S.bar")); + assert!( + cg.calls.contains(&("S.foo".into(), "S.bar".into())), + "missing method edge: {:?}", cg.calls + ); + } + + #[test] + fn unknown_extension_returns_none() { + let cg = build_file_call_graph(&PathBuf::from("x.xyz"), "whatever").unwrap(); + assert!(cg.is_none()); + } + + #[cfg(feature = "lang-rust")] + #[test] + fn to_project_graph_emits_node_per_function() { + let src = r#" +fn a() { b(); } +fn b() {} +"#; + let cg = build_file_call_graph(&PathBuf::from("x.rs"), src).unwrap().unwrap(); + let pg = to_project_graph(&cg, &PathBuf::from("x.rs")); + assert_eq!(pg.nodes.len(), 2); + assert_eq!(pg.edges.len(), 1); + assert!(pg.nodes.iter().any(|n| n.module_id == "a")); + assert!(pg.nodes.iter().any(|n| n.module_id == "b")); + assert_eq!(pg.edges[0].edge_type, "call"); + } +} diff --git a/third_party/cartographer/mapper-core/cartographer/src/diagram.rs b/third_party/cartographer/mapper-core/cartographer/src/diagram.rs index 54b3f876..bef694f9 100644 --- a/third_party/cartographer/mapper-core/cartographer/src/diagram.rs +++ b/third_party/cartographer/mapper-core/cartographer/src/diagram.rs @@ -12,7 +12,7 @@ use std::collections::{HashMap, HashSet, VecDeque}; -use crate::api::ProjectGraphResponse; +use crate::api::{is_doc_path, ProjectGraphResponse}; use crate::layers::LayerViolationType; /// Nodes with `hotspot_score` at or above this threshold get the `hot` overlay @@ -25,6 +25,10 @@ const HOTSPOT_THRESHOLD: f64 = 70.0; pub enum DiagramFormat { Mermaid, Dot, + /// Terminal-friendly indented tree with box-drawing characters. + /// Always rooted at a single node: `focus` if set, else the blast-radius + /// epicenter, else the most-connected node in the graph. + Ascii, } impl DiagramFormat { @@ -32,18 +36,84 @@ impl DiagramFormat { match s.to_lowercase().as_str() { "mermaid" | "" => Ok(DiagramFormat::Mermaid), "dot" | "graphviz" => Ok(DiagramFormat::Dot), + "ascii" | "tree" | "text" => Ok(DiagramFormat::Ascii), other => Err(format!("unknown diagram format: {other}")), } } } /// Rendering options. `focus` is a module_id (or suffix match on a path/module_id). +/// +/// Selection precedence: `blast_radius` > `focus` > top-by-degree. #[derive(Debug, Clone)] pub struct RenderOptions<'a> { pub format: DiagramFormat, pub focus: Option<&'a str>, pub depth: usize, pub max_nodes: usize, + /// When `Some(threshold)`, overlay dotted purple edges for every co-change + /// pair whose `coupling_score >= threshold` and whose both endpoints are in + /// the included node set. `None` disables the overlay (default). + pub show_cochange: Option, + /// When `Some(target)`, override selection: included = {target} ∪ direct + /// dependencies ∪ direct dependents. The target module renders as an + /// epicenter (bold red fill). `None` uses the focus/top-by-degree path. + pub blast_radius: Option<&'a str>, + /// When `true`, filter the selection to the doc subgraph: all document + /// nodes (markdown/YAML/TOML/JSON) plus every code file they directly + /// reference. Docs render with a distinct shape regardless of this flag. + pub docs_only: bool, + /// When `Some(n)`, collapse the graph to folder granularity at path depth + /// `n` before rendering. All files whose path shares the same first `n` + /// directory components become a single folder node; edges are aggregated + /// (self-loops dropped, counts summed). Combines with focus/blast-radius — + /// selection happens after collapsing, so anchors must match folder ids. + pub group_by_folder_depth: Option, + /// When `true`, replace role-based node fills with owner-derived colors + /// (dominant git author mapped to a stable palette). Nodes without an + /// `owner` value fall through to the default (white/grey). Overlay borders + /// (cycle/pivot/hot/epicenter) still take precedence. + pub color_by_owner: bool, +} + +impl<'a> RenderOptions<'a> { + /// Convenience constructor that fills every new overlay option with `None`. + /// Intended for call sites that only care about the base top-by-degree / + /// focused rendering and don't want to list every overlay field. + pub fn basic(format: DiagramFormat, max_nodes: usize) -> Self { + Self { + format, + focus: None, + depth: 2, + max_nodes, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + } + } +} + +/// Hash an author name into the shared palette. Palette picked for reasonable +/// contrast on white and for staying distinguishable when several owners +/// appear in the same diagram. Stable across runs — the same owner always +/// lands on the same color. +fn owner_color(owner: &str) -> &'static str { + // 10-color palette. Order matters — first entries are the most visually + // distinct from each other; later entries fall back to neighbors. + const PALETTE: &[&str] = &[ + "#a6cee3", "#b2df8a", "#fb9a99", "#fdbf6f", + "#cab2d6", "#ffff99", "#1f78b4", "#33a02c", + "#e31a1c", "#ff7f00", + ]; + // FNV-1a 32-bit hash; good-enough distribution for a handful of owners. + let mut h: u32 = 0x811c9dc5; + for b in owner.bytes() { + h ^= b as u32; + h = h.wrapping_mul(0x01000193); + } + PALETTE[(h as usize) % PALETTE.len()] } /// Rendered diagram plus a truncation flag so callers can tell the model to @@ -53,11 +123,17 @@ pub struct RenderedDiagram { pub diagram: String, pub truncated: bool, pub node_count: usize, + /// Module ids that made it into the render. Exposed so downstream + /// exporters (e.g. the interactive HTML builder) can reuse the selection + /// without re-running focus/blast-radius logic. + pub included: Vec, } /// Precomputed overlays that decorate the base import graph with architectural /// signals: cycles (from `graph.cycles`), layer violations (from -/// `graph.layer_violations`), and hotspot nodes (from `GraphNode.hotspot_score`). +/// `graph.layer_violations`), co-change pairs (from `graph.cochange_pairs`), +/// hotspot nodes (from `GraphNode.hotspot_score`), and an epicenter marker for +/// blast-radius renderings. /// /// We precompute once per `render()` so both Mermaid and DOT rendering paths /// consult the same sets and stay visually consistent. @@ -66,9 +142,20 @@ struct Overlays<'a> { pivot_nodes: HashSet<&'a str>, cycle_edges: HashSet<(&'a str, &'a str)>, violations: HashMap<(&'a str, &'a str), &'a LayerViolationType>, + /// Co-change pairs above threshold, keyed by (file_a, file_b). We don't + /// key symmetrically here — the renderer iterates this map and filters by + /// `included_set`, treating each pair as a single undirected coupling edge. + cochange: HashMap<(&'a str, &'a str), f64>, + /// The target of a blast-radius selection, if any. Rendered as an + /// epicenter (bold red fill) so the "you are here" is unambiguous. + epicenter: Option<&'a str>, } -fn compute_overlays(graph: &ProjectGraphResponse) -> Overlays<'_> { +fn compute_overlays<'a>( + graph: &'a ProjectGraphResponse, + show_cochange: Option, + epicenter: Option<&'a str>, +) -> Overlays<'a> { let mut cycle_nodes: HashSet<&str> = HashSet::new(); let mut pivot_nodes: HashSet<&str> = HashSet::new(); let mut cycle_edges: HashSet<(&str, &str)> = HashSet::new(); @@ -99,16 +186,279 @@ fn compute_overlays(graph: &ProjectGraphResponse) -> Overlays<'_> { ); } - Overlays { cycle_nodes, pivot_nodes, cycle_edges, violations } + let mut cochange: HashMap<(&str, &str), f64> = HashMap::new(); + if let Some(threshold) = show_cochange { + for p in &graph.cochange_pairs { + if p.coupling_score >= threshold { + cochange.insert((p.file_a.as_str(), p.file_b.as_str()), p.coupling_score); + } + } + } + + Overlays { cycle_nodes, pivot_nodes, cycle_edges, violations, cochange, epicenter } +} + +/// Doc-map selection. Included = all doc nodes ∪ every code file they connect +/// to (either as source or target of an edge). Docs are identified via +/// `api::is_doc_path`. Ordered by edge count descending so the most-connected +/// docs survive `max_nodes` truncation first. +fn docs_only_selection( + graph: &ProjectGraphResponse, + max_nodes: usize, +) -> (Vec, bool) { + let doc_ids: HashSet<&str> = graph + .nodes + .iter() + .filter(|n| is_doc_path(&n.path)) + .map(|n| n.module_id.as_str()) + .collect(); + + let mut neighbors: HashSet<&str> = HashSet::new(); + for edge in &graph.edges { + if doc_ids.contains(edge.source.as_str()) { + neighbors.insert(edge.target.as_str()); + } + if doc_ids.contains(edge.target.as_str()) { + neighbors.insert(edge.source.as_str()); + } + } + + // Rank each candidate by its edge count in the full graph so truncation + // keeps the most-connected nodes. Doc nodes are listed before code + // neighbors so a heavy truncation still shows the docs themselves. + let mut degree: HashMap<&str, usize> = HashMap::new(); + for edge in &graph.edges { + *degree.entry(edge.source.as_str()).or_insert(0) += 1; + *degree.entry(edge.target.as_str()).or_insert(0) += 1; + } + + let mut docs: Vec<&str> = doc_ids.iter().copied().collect(); + docs.sort_by(|a, b| { + degree.get(b).copied().unwrap_or(0) + .cmp(°ree.get(a).copied().unwrap_or(0)) + .then_with(|| a.cmp(b)) + }); + + let mut code: Vec<&str> = neighbors.difference(&doc_ids).copied().collect(); + code.sort_by(|a, b| { + degree.get(b).copied().unwrap_or(0) + .cmp(°ree.get(a).copied().unwrap_or(0)) + .then_with(|| a.cmp(b)) + }); + + let mut ordered: Vec = docs.iter().map(|s| s.to_string()).collect(); + ordered.extend(code.iter().map(|s| s.to_string())); + + let truncated = ordered.len() > max_nodes; + ordered.truncate(max_nodes); + (ordered, truncated) +} + +/// Blast-radius selection. Included = {target} ∪ direct deps ∪ direct dependents, +/// capped at `max_nodes`. Computed purely from the graph — no `ApiState` needed. +/// +/// The target is resolved with the same rules as `bfs_from_anchor`: exact +/// module_id, exact path, then path/module_id suffix match. +fn blast_radius_selection( + graph: &ProjectGraphResponse, + target: &str, + max_nodes: usize, +) -> Result<(Vec, bool), String> { + let resolved = graph + .nodes + .iter() + .find(|n| n.module_id == target) + .or_else(|| graph.nodes.iter().find(|n| n.path == target)) + .or_else(|| { + graph + .nodes + .iter() + .find(|n| n.module_id.ends_with(target) || n.path.ends_with(target)) + }) + .ok_or_else(|| format!("blast-radius target not found in graph: {target}"))?; + + let epicenter_id = resolved.module_id.clone(); + let mut included: Vec = Vec::new(); + let mut seen: HashSet = HashSet::new(); + + // Epicenter first so it stays in the output even under truncation. + included.push(epicenter_id.clone()); + seen.insert(epicenter_id.clone()); + + // Direct dependencies: where epicenter is the source. + for edge in &graph.edges { + if edge.source == epicenter_id && seen.insert(edge.target.clone()) { + included.push(edge.target.clone()); + } + } + // Direct dependents: where epicenter is the target. + for edge in &graph.edges { + if edge.target == epicenter_id && seen.insert(edge.source.clone()) { + included.push(edge.source.clone()); + } + } + + let truncated = included.len() > max_nodes; + included.truncate(max_nodes); + Ok((included, truncated)) +} + +/// Collapse a project graph to folder granularity. All files whose path shares +/// the same first `depth` directory components are merged into a single folder +/// node; edges are aggregated, with intra-folder self-loops dropped. Signature +/// counts sum; hotspot score is the max across member files. Language is set +/// to `"folder"` so renderers can give folder nodes a distinct shape. +/// +/// `depth` of 0 collapses everything to a single root — not useful, so we +/// treat 0 as "don't collapse". `depth` beyond any file's directory depth just +/// keeps that file as its own node (folder = its full parent path). +fn collapse_by_folder(graph: &ProjectGraphResponse, depth: usize) -> ProjectGraphResponse { + use crate::api::{GraphMetadata, GraphNode, GraphEdge}; + + fn folder_key(path: &str, depth: usize) -> String { + let parts: Vec<&str> = path.split('/').collect(); + // File sits at parts[parts.len()-1]; directories are parts[0..len-1]. + let dir_parts = &parts[..parts.len().saturating_sub(1)]; + let take = depth.min(dir_parts.len()); + if take == 0 { + // File sits at the root — group under "(root)" so it's one folder. + "(root)".to_string() + } else { + dir_parts[..take].join("/") + } + } + + // Map each module_id to its folder id. + let mut member_folder: HashMap = HashMap::new(); + // Aggregate per-folder state. + let mut folder_files: HashMap> = HashMap::new(); + + for node in &graph.nodes { + let fid = folder_key(&node.path, depth); + member_folder.insert(node.module_id.clone(), fid.clone()); + folder_files.entry(fid).or_default().push(node); + } + + // Build folder nodes. We stash the file count in `signature_count`'s sibling + // field `fan_in` so the renderer can show "N files" — but simpler: encode it + // directly in the label via a dedicated render branch keyed off language. + let mut nodes: Vec = Vec::with_capacity(folder_files.len()); + for (fid, files) in &folder_files { + let signature_count: usize = files.iter().map(|n| n.signature_count).sum(); + let hotspot_score = files + .iter() + .filter_map(|n| n.hotspot_score) + .fold(None::, |acc, v| Some(acc.map_or(v, |a| a.max(v)))); + // fan_in repurposed to carry member file count for the renderer label. + let member_count: usize = files.len(); + + nodes.push(GraphNode { + module_id: fid.clone(), + path: fid.clone(), + language: "folder".into(), + signature_count, + complexity: None, + is_bridge: None, + bridge_score: None, + degree: None, + risk_level: None, + churn: None, + hotspot_score, + role: None, + is_dead: None, + unreferenced_exports: None, + fan_in: Some(member_count), + fan_out: None, + cochange_partners: None, + cochange_entropy: None, + owner: None, + }); + } + + // Aggregate edges. (src_folder, tgt_folder) → count. Drop self-loops. + let mut edge_counts: HashMap<(String, String), u32> = HashMap::new(); + for e in &graph.edges { + let Some(sf) = member_folder.get(&e.source) else { continue }; + let Some(tf) = member_folder.get(&e.target) else { continue }; + if sf == tf { + continue; + } + *edge_counts.entry((sf.clone(), tf.clone())).or_insert(0) += 1; + } + + let edges: Vec = edge_counts + .into_iter() + .map(|((src, tgt), _)| GraphEdge { + source: src, + target: tgt, + edge_type: "import".into(), + at_range: None, + }) + .collect(); + + // Cycles/violations/cochange don't survive collapse — they describe the + // file-level graph and would be ambiguous at folder granularity. Callers + // who want those overlays should render the file-level view. + ProjectGraphResponse { + nodes, + edges, + cycles: vec![], + god_modules: vec![], + layer_violations: vec![], + metadata: GraphMetadata { + total_files: graph.metadata.total_files, + total_edges: graph.metadata.total_edges, + languages: HashMap::new(), + generated_at: graph.metadata.generated_at.clone(), + bridge_count: None, + cycle_count: None, + god_module_count: None, + health_score: None, + layer_violation_count: None, + architectural_drift: None, + hotspot_count: None, + dead_code_count: None, + unreferenced_exports_count: None, + }, + cochange_pairs: vec![], + } } /// Render an import-graph diagram. Pure over `graph` — no I/O. pub fn render(graph: &ProjectGraphResponse, opts: &RenderOptions) -> Result { let max_nodes = opts.max_nodes.max(1); - let (included, truncated) = match opts.focus { - Some(anchor) => bfs_from_anchor(graph, anchor, opts.depth, max_nodes)?, - None => top_by_degree(graph, max_nodes), + // Folder collapse happens before anything else so focus/blast_radius and + // overlays all see the collapsed view. Overlays derived from the file-level + // graph (cycles, violations, cochange) are intentionally dropped. + let collapsed: Option = opts + .group_by_folder_depth + .filter(|&d| d > 0) + .map(|d| collapse_by_folder(graph, d)); + let graph: &ProjectGraphResponse = collapsed.as_ref().unwrap_or(graph); + + // Selection precedence: blast_radius > focus > docs_only > top-by-degree. + // Blast radius resolves the target and overrides everything else so + // callers don't have to null neighboring options. + let (included, truncated, epicenter) = match (opts.blast_radius, opts.focus, opts.docs_only) { + (Some(target), _, _) => { + let (inc, trunc) = blast_radius_selection(graph, target, max_nodes)?; + // `inc[0]` is the epicenter module_id (pushed first in selection). + let epi = inc.first().cloned(); + (inc, trunc, epi) + } + (None, Some(anchor), _) => { + let (inc, trunc) = bfs_from_anchor(graph, anchor, opts.depth, max_nodes)?; + (inc, trunc, None) + } + (None, None, true) => { + let (inc, trunc) = docs_only_selection(graph, max_nodes); + (inc, trunc, None) + } + (None, None, false) => { + let (inc, trunc) = top_by_degree(graph, max_nodes); + (inc, trunc, None) + } }; let included_set: HashSet<&str> = included.iter().map(|s| s.as_str()).collect(); @@ -120,14 +470,66 @@ pub fn render(graph: &ProjectGraphResponse, opts: &RenderOptions) -> Result = epicenter + .as_deref() + .and_then(|id| node_by_id.get_key_value(id).map(|(k, _)| *k)); + + let overlays = compute_overlays(graph, opts.show_cochange, epicenter_ref); let content = match opts.format { - DiagramFormat::Dot => render_dot(&included, &included_set, &node_by_id, graph, &overlays), - DiagramFormat::Mermaid => render_mermaid(&included, &included_set, &node_by_id, graph, &overlays), + DiagramFormat::Dot => render_dot(&included, &included_set, &node_by_id, graph, &overlays, opts.color_by_owner), + DiagramFormat::Mermaid => render_mermaid(&included, &included_set, &node_by_id, graph, &overlays, opts.color_by_owner), + DiagramFormat::Ascii => render_ascii( + &included, &included_set, &node_by_id, graph, &overlays, + opts.focus, opts.blast_radius, opts.depth, + ), }; - Ok(RenderedDiagram { diagram: content, truncated, node_count: included.len() }) + let node_count = included.len(); + Ok(RenderedDiagram { diagram: content, truncated, node_count, included }) +} + +/// Build the node selection without rendering. Exposed so the HTML exporter +/// (and other future non-text renderers) can reuse the same selection rules +/// as Mermaid/DOT — focus, blast-radius, docs-only, folder-collapse. +/// +/// Returns `(graph_to_render, included_module_ids, truncated)`. When folder +/// collapsing is active, `graph_to_render` is an owned collapsed +/// `ProjectGraphResponse`; otherwise it's `None` and the caller uses the +/// original graph. +pub fn select_for_render( + graph: &ProjectGraphResponse, + opts: &RenderOptions, +) -> Result<(Option, Vec, bool), String> { + let max_nodes = opts.max_nodes.max(1); + let collapsed: Option = opts + .group_by_folder_depth + .filter(|&d| d > 0) + .map(|d| collapse_by_folder(graph, d)); + let g: &ProjectGraphResponse = collapsed.as_ref().unwrap_or(graph); + + let (included, truncated, _epi) = match (opts.blast_radius, opts.focus, opts.docs_only) { + (Some(target), _, _) => { + let (inc, trunc) = blast_radius_selection(g, target, max_nodes)?; + let epi = inc.first().cloned(); + (inc, trunc, epi) + } + (None, Some(anchor), _) => { + let (inc, trunc) = bfs_from_anchor(g, anchor, opts.depth, max_nodes)?; + (inc, trunc, None) + } + (None, None, true) => { + let (inc, trunc) = docs_only_selection(g, max_nodes); + (inc, trunc, None) + } + (None, None, false) => { + let (inc, trunc) = top_by_degree(g, max_nodes); + (inc, trunc, None) + } + }; + Ok((collapsed, included, truncated)) } fn top_by_degree(graph: &ProjectGraphResponse, max_nodes: usize) -> (Vec, bool) { @@ -221,30 +623,40 @@ fn render_dot( node_by_id: &HashMap<&str, &crate::api::GraphNode>, graph: &ProjectGraphResponse, overlays: &Overlays, + color_by_owner: bool, ) -> String { let mut out = String::from("digraph cartographer {\n rankdir=LR;\n"); for module_id in included { let Some(node) = node_by_id.get(module_id.as_str()) else { continue }; let label = node.path.rsplit('/').next().unwrap_or(&node.path); - let fill = role_color_dot(node.role.as_deref()); + let fill = if color_by_owner { + node.owner.as_deref().map(owner_color).unwrap_or("#fff") + } else { + role_color_dot(node.role.as_deref()) + }; let mid = module_id.as_str(); + let is_epicenter = overlays.epicenter == Some(mid); let is_pivot = overlays.pivot_nodes.contains(mid); let in_cycle = overlays.cycle_nodes.contains(mid); let score = node.hotspot_score.unwrap_or(0.0).clamp(0.0, 100.0); let hot = score >= HOTSPOT_THRESHOLD; - // Border: pivot > cycle > hot > default. Pivot is dashed to distinguish - // it inside a red-bordered cycle. - let (border_color, pen_width, extra_style) = if is_pivot { - ("#cc0000", 3.0, ",dashed") + // Epicenter overrides everything — this is the "you are here" marker + // for blast-radius renderings. Otherwise: pivot > cycle > hot > default. + // Pivot is dashed to distinguish it inside a red-bordered cycle. + let (fill_override, border_color, pen_width, extra_style) = if is_epicenter { + (Some("#ff3333"), "#660000", 4.0, "") + } else if is_pivot { + (None, "#cc0000", 3.0, ",dashed") } else if in_cycle { - ("#cc0000", 3.0, "") + (None, "#cc0000", 3.0, "") } else if hot { - ("#ff6600", 3.0, "") + (None, "#ff6600", 3.0, "") } else { - ("#333333", 1.0, "") + (None, "#333333", 1.0, "") }; + let actual_fill = fill_override.unwrap_or(fill); // Hotspot-driven sizing. score ∈ [0,100] → width ∈ [0.75, 1.80], // height ∈ [0.50, 0.90], fontsize ∈ [10, 16]. Nodes without a score @@ -253,11 +665,45 @@ fn render_dot( let height = 0.50 + (score / 100.0) * 0.40; let fontsize = 10 + ((score / 100.0) * 6.0) as u32; - out.push_str(&format!( - " \"{}\" [label=\"{}\\n{} fn\" shape=box style=\"filled{}\" fillcolor=\"{}\" color=\"{}\" penwidth={:.1} width={:.2} height={:.2} fontsize={}];\n", - node.module_id, label, node.signature_count, - extra_style, fill, border_color, pen_width, width, height, fontsize - )); + // Doc nodes render as `shape=note` with a light yellow fill so readers + // can distinguish documentation from code at a glance. Folder-collapsed + // nodes use `shape=folder` with a light blue fill and a "(N files)" + // count inline in the label. Epicenter fill still wins when set. + let is_doc = is_doc_path(&node.path); + let is_folder = node.language == "folder"; + let shape = if is_folder { + "folder" + } else if is_doc { + "note" + } else { + "box" + }; + let final_fill = if fill_override.is_some() { + actual_fill + } else if is_folder { + "#d6e9ff" + } else if is_doc { + "#fff4c0" + } else { + actual_fill + }; + let unit_label = if is_doc { "sec" } else { "fn" }; + + if is_folder { + let files = node.fan_in.unwrap_or(0); + let folder_label = if node.module_id == "(root)" { "(root)" } else { label }; + out.push_str(&format!( + " \"{}\" [label=\"{}/\\n{} files, {} fn\" shape={} style=\"filled{}\" fillcolor=\"{}\" color=\"{}\" penwidth={:.1} width={:.2} height={:.2} fontsize={}];\n", + node.module_id, folder_label, files, node.signature_count, + shape, extra_style, final_fill, border_color, pen_width, width, height, fontsize + )); + } else { + out.push_str(&format!( + " \"{}\" [label=\"{}\\n{} {}\" shape={} style=\"filled{}\" fillcolor=\"{}\" color=\"{}\" penwidth={:.1} width={:.2} height={:.2} fontsize={}];\n", + node.module_id, label, node.signature_count, unit_label, + shape, extra_style, final_fill, border_color, pen_width, width, height, fontsize + )); + } } for edge in &graph.edges { if !(included_set.contains(edge.source.as_str()) @@ -283,6 +729,21 @@ fn render_dot( edge.source, edge.target, color, style, pen )); } + + // Co-change overlay edges. `constraint=false` keeps DOT's layout engine + // from treating these as part of the import DAG — they'd otherwise pull + // unrelated nodes together and blow up the layout. Rendered bidirectionally + // as `arrowhead=none` to signal these are coupling, not dependency. + for ((a, b), score) in &overlays.cochange { + if !(included_set.contains(a) && included_set.contains(b)) { + continue; + } + out.push_str(&format!( + " \"{}\" -> \"{}\" [color=\"#8844cc\" style=dotted penwidth={:.1} arrowhead=none constraint=false label=\"{:.2}\" fontsize=9 fontcolor=\"#8844cc\"];\n", + a, b, 1.0 + score * 2.0, score + )); + } + out.push('}'); out } @@ -293,6 +754,7 @@ fn render_mermaid( node_by_id: &HashMap<&str, &crate::api::GraphNode>, graph: &ProjectGraphResponse, overlays: &Overlays, + color_by_owner: bool, ) -> String { let mut out = String::from("graph TD\n"); out.push_str(" classDef bridge fill:#f96,stroke:#333\n"); @@ -302,6 +764,9 @@ fn render_mermaid( out.push_str(" classDef cycle stroke:#c00,stroke-width:3px\n"); out.push_str(" classDef pivot stroke:#c00,stroke-width:3px,stroke-dasharray:5 5\n"); out.push_str(" classDef hot stroke:#f60,stroke-width:3px\n"); + out.push_str(" classDef epicenter fill:#f33,stroke:#600,stroke-width:4px,color:#fff\n"); + out.push_str(" classDef doc fill:#fff4c0,stroke:#aa8,stroke-dasharray:3 2\n"); + out.push_str(" classDef folder fill:#d6e9ff,stroke:#468,stroke-width:2px\n"); let id_map: HashMap<&str, usize> = included .iter() @@ -312,32 +777,81 @@ fn render_mermaid( // Node declarations carry the inline role class (:::core / :::bridge / etc). // Overlay classes (cycle/pivot/hot) are applied via separate `class` statements // below so a node can wear multiple classes without relying on inline chaining. + // Doc nodes use Mermaid stadium shape `([...])` + "sec" label; folder nodes + // use subroutine shape `[[...]]` + "N files, M fn" label. for module_id in included { let Some(node) = node_by_id.get(module_id.as_str()) else { continue }; let i = id_map[module_id.as_str()]; let label = node.path.rsplit('/').next().unwrap_or(&node.path); - let class_suffix = role_class_suffix(node.role.as_deref()); - out.push_str(&format!( - " N{}[\"{}\\n{} fn\"]{}\n", - i, label, node.signature_count, class_suffix - )); + let is_doc = is_doc_path(&node.path); + let is_folder = node.language == "folder"; + // In owner-color mode we drop the role class suffix so the per-node + // `style` directive we emit below wins without fighting the classDef. + let class_suffix = if color_by_owner { "" } else { role_class_suffix(node.role.as_deref()) }; + if is_folder { + let files = node.fan_in.unwrap_or(0); + let folder_label = if node.module_id == "(root)" { "(root)" } else { label }; + out.push_str(&format!( + " N{}[[\"{}/\\n{} files, {} fn\"]]{}\n", + i, folder_label, files, node.signature_count, class_suffix + )); + } else { + let unit_label = if is_doc { "sec" } else { "fn" }; + let (open, close) = if is_doc { ("([\"", "\"])") } else { ("[\"", "\"]") }; + out.push_str(&format!( + " N{}{}{}\\n{} {}{}{}\n", + i, open, label, node.signature_count, unit_label, close, class_suffix + )); + } + } + + // Owner coloring emits per-node style directives. Overlay borders + // (cycle/pivot/hot/epicenter) are applied via stroke-only classes, so + // they don't collide with the fill we set here. + if color_by_owner { + for module_id in included { + let Some(node) = node_by_id.get(module_id.as_str()) else { continue }; + let i = id_map[module_id.as_str()]; + if let Some(owner) = node.owner.as_deref() { + out.push_str(&format!( + " style N{} fill:{},stroke:#333\n", + i, owner_color(owner) + )); + } + } } - // Overlay class assignments. Pivot takes precedence over cycle so a pivot - // node gets the dashed border that distinguishes it inside a cycle. + // Overlay class assignments. Epicenter wins outright so blast-radius + // renderings have an unambiguous "you are here" marker. Otherwise pivot + // takes precedence over cycle so pivots are visually distinguishable + // inside a cycle. for module_id in included { let Some(node) = node_by_id.get(module_id.as_str()) else { continue }; let i = id_map[module_id.as_str()]; let mid = module_id.as_str(); let mut extras: Vec<&str> = Vec::new(); - if overlays.pivot_nodes.contains(mid) { + if overlays.epicenter == Some(mid) { + extras.push("epicenter"); + } else if overlays.pivot_nodes.contains(mid) { extras.push("pivot"); } else if overlays.cycle_nodes.contains(mid) { extras.push("cycle"); } - if node.hotspot_score.unwrap_or(0.0) >= HOTSPOT_THRESHOLD { + if node.hotspot_score.unwrap_or(0.0) >= HOTSPOT_THRESHOLD + && overlays.epicenter != Some(mid) + { extras.push("hot"); } + // Doc nodes get the `doc` overlay class on top of whatever else they + // wear. Epicenter still wins visually because `class` statements apply + // in order and later declarations override earlier ones in Mermaid. + if is_doc_path(&node.path) && overlays.epicenter != Some(mid) { + extras.push("doc"); + } + // Folder nodes get the `folder` overlay class (blue fill + thick stroke). + if node.language == "folder" && overlays.epicenter != Some(mid) { + extras.push("folder"); + } if !extras.is_empty() { out.push_str(&format!(" class N{} {};\n", i, extras.join(","))); } @@ -393,12 +907,256 @@ fn render_mermaid( edge_index += 1; } + // Co-change overlay edges. Mermaid lacks a directionless arrow; we use + // `---` (plain line) so readers don't mistake these for imports. Each gets + // a linkStyle directive that dashes them purple. + for ((a, b), score) in &overlays.cochange { + if !(included_set.contains(a) && included_set.contains(b)) { + continue; + } + let (Some(&ai), Some(&bi)) = (id_map.get(a), id_map.get(b)) else { + continue; + }; + // Mermaid uses `---|label|` for edge labels on undirected-style lines. + out.push_str(&format!(" N{} ---|{:.2}| N{}\n", ai, score, bi)); + link_styles.push(( + edge_index, + "stroke:#84c,stroke-width:2px,stroke-dasharray:2 4", + )); + edge_index += 1; + } + for (idx, style) in link_styles { out.push_str(&format!(" linkStyle {} {}\n", idx, style)); } out } +/// Render a terminal-friendly indented tree. Always single-rooted — the idea +/// is "what does this one module reach, and where does it fit" which falls +/// apart if we emit a forest. Cycles are broken with a `↑ seen` marker so the +/// output stays bounded and readable. +/// +/// Root selection: explicit `focus` → blast_radius epicenter → first node in +/// `included` (which is top-by-degree #1 under the default selection). +#[allow(clippy::too_many_arguments)] +fn render_ascii( + included: &[String], + included_set: &HashSet<&str>, + node_by_id: &HashMap<&str, &crate::api::GraphNode>, + graph: &ProjectGraphResponse, + overlays: &Overlays, + focus: Option<&str>, + blast_radius: Option<&str>, + depth: usize, +) -> String { + // Directed adjacency over included edges only. We walk imports in their + // natural direction (source → target) so the tree reads "X depends on Y". + let mut adj: HashMap<&str, Vec<&str>> = HashMap::new(); + for edge in &graph.edges { + let s = edge.source.as_str(); + let t = edge.target.as_str(); + if included_set.contains(s) && included_set.contains(t) { + adj.entry(s).or_default().push(t); + } + } + for targets in adj.values_mut() { + targets.sort(); + targets.dedup(); + } + + // Pick the root. Fall back through explicit > epicenter > best by out-degree. + // + // "First included" as the fallback is what top_by_degree gives us (#1 by + // total degree), but for a tree that's the wrong signal: a node with high + // in-degree and zero out-degree would render as a lone root with an + // empty subtree. We want the node that *reaches* the most, so we rank + // included nodes by out-degree within included_set before falling back. + let root: &str = match (focus, blast_radius, overlays.epicenter, included.first()) { + (Some(anchor), _, _, _) => { + // Re-resolve the same way bfs_from_anchor did so we land on the + // actual module_id (the anchor may have been a path suffix). + included + .iter() + .find(|m| m.as_str() == anchor) + .map(|s| s.as_str()) + .or_else(|| { + included + .iter() + .find(|m| { + node_by_id + .get(m.as_str()) + .map(|n| n.path.ends_with(anchor) || n.module_id.ends_with(anchor)) + .unwrap_or(false) + }) + .map(|s| s.as_str()) + }) + .or_else(|| included.first().map(|s| s.as_str())) + .unwrap_or("") + } + (None, Some(_), Some(epi), _) => epi, + (None, None, _, Some(_)) => { + let best = included + .iter() + .map(|m| m.as_str()) + .max_by_key(|m| adj.get(m).map(|v| v.len()).unwrap_or(0)); + best.unwrap_or("") + } + _ => "", + }; + + if root.is_empty() { + return String::from("(empty graph)\n"); + } + + // DFS with visited tracking. `depth` from RenderOptions is the traversal + // cap; 0 means "just the root". We default the practical cap to 32 if the + // caller passed 0, so top-by-degree invocations still produce useful output. + let effective_depth = if depth == 0 && focus.is_none() { 32 } else { depth }; + + let mut out = String::new(); + // Header line: the root itself, un-prefixed. + out.push_str(&ascii_label(root, node_by_id, overlays)); + out.push('\n'); + + let mut visited: HashSet<&str> = HashSet::new(); + visited.insert(root); + + let children: Vec<&str> = adj.get(root).cloned().unwrap_or_default(); + for (i, child) in children.iter().enumerate() { + let is_last = i + 1 == children.len(); + ascii_walk( + child, + &adj, + node_by_id, + overlays, + &mut visited, + &mut out, + "", + is_last, + 1, + effective_depth, + ); + } + + // Orphans: other included nodes not reachable from the root. Report as a + // flat tail so the user sees them without losing the tree structure. + let mut orphans: Vec<&str> = included + .iter() + .map(|s| s.as_str()) + .filter(|m| !visited.contains(m)) + .collect(); + if !orphans.is_empty() { + orphans.sort(); + out.push_str("\n(disconnected)\n"); + for (i, m) in orphans.iter().enumerate() { + let is_last = i + 1 == orphans.len(); + let branch = if is_last { "└── " } else { "├── " }; + out.push_str(branch); + out.push_str(&ascii_label(m, node_by_id, overlays)); + out.push('\n'); + } + } + + out +} + +#[allow(clippy::too_many_arguments)] +fn ascii_walk<'a>( + node: &'a str, + adj: &HashMap<&'a str, Vec<&'a str>>, + node_by_id: &HashMap<&'a str, &crate::api::GraphNode>, + overlays: &Overlays, + visited: &mut HashSet<&'a str>, + out: &mut String, + prefix: &str, + is_last: bool, + current_depth: usize, + max_depth: usize, +) { + let branch = if is_last { "└── " } else { "├── " }; + out.push_str(prefix); + out.push_str(branch); + + if visited.contains(node) { + // Cycle or re-entry — emit a terminator so the output stays bounded. + out.push_str(&ascii_label(node, node_by_id, overlays)); + out.push_str(" ↑ seen\n"); + return; + } + visited.insert(node); + + out.push_str(&ascii_label(node, node_by_id, overlays)); + out.push('\n'); + + if current_depth >= max_depth { + return; + } + + let children: Vec<&str> = adj.get(node).cloned().unwrap_or_default(); + if children.is_empty() { + return; + } + + let child_prefix = format!("{}{}", prefix, if is_last { " " } else { "│ " }); + for (i, child) in children.iter().enumerate() { + let last = i + 1 == children.len(); + ascii_walk( + child, + adj, + node_by_id, + overlays, + visited, + out, + &child_prefix, + last, + current_depth + 1, + max_depth, + ); + } +} + +fn ascii_label( + module_id: &str, + node_by_id: &HashMap<&str, &crate::api::GraphNode>, + overlays: &Overlays, +) -> String { + let Some(node) = node_by_id.get(module_id) else { + return module_id.to_string(); + }; + let name = node.path.rsplit('/').next().unwrap_or(&node.path); + let unit = if is_doc_path(&node.path) { "sec" } else { "fn" }; + + // Overlay markers — mirror what Mermaid/DOT apply, but flattened into + // ASCII-safe suffixes. + let mut tags: Vec<&str> = Vec::new(); + if overlays.epicenter == Some(module_id) { + tags.push("★ epicenter"); + } + if overlays.cycle_nodes.contains(module_id) { + tags.push("◉ cycle"); + } + if overlays.pivot_nodes.contains(module_id) { + tags.push("✦ pivot"); + } + let is_hot = node.hotspot_score.unwrap_or(0.0) >= HOTSPOT_THRESHOLD; + if is_hot { + tags.push("♨ hot"); + } + if let Some(role) = node.role.as_deref() { + // Role is a short word (core/bridge/dead/entry); inline it plainly. + tags.push(role); + } + + let tag_suffix = if tags.is_empty() { + String::new() + } else { + format!(" [{}]", tags.join(", ")) + }; + + format!("{} ({} {}){}", name, node.signature_count, unit, tag_suffix) +} + fn role_color_dot(role: Option<&str>) -> &'static str { match role { Some("core") => "#9cf", @@ -446,6 +1204,7 @@ mod tests { fan_out: None, cochange_partners: None, cochange_entropy: None, + owner: None, } } @@ -498,6 +1257,11 @@ mod tests { focus: None, depth: 2, max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, }).unwrap(); assert!(!r.diagram.contains("isolated")); assert_eq!(r.node_count, 4); @@ -512,6 +1276,11 @@ mod tests { focus: None, depth: 2, max_nodes: 2, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, }).unwrap(); assert!(r.truncated); assert_eq!(r.node_count, 2); @@ -525,6 +1294,11 @@ mod tests { focus: Some("a"), depth: 1, max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, }).unwrap(); // depth=1 from a → reaches b but not c assert_eq!(r.node_count, 2); @@ -538,6 +1312,11 @@ mod tests { focus: Some("a"), depth: 2, max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, }).unwrap(); assert_eq!(r.node_count, 3); // a, b, c } @@ -551,6 +1330,11 @@ mod tests { focus: Some("a.rs"), depth: 1, max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, }).unwrap(); assert_eq!(r.node_count, 2); } @@ -563,6 +1347,11 @@ mod tests { focus: Some("does_not_exist"), depth: 2, max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, }); assert!(r.is_err()); } @@ -575,6 +1364,11 @@ mod tests { focus: None, depth: 2, max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, }).unwrap(); assert!(r.diagram.starts_with("digraph cartographer {")); assert!(r.diagram.contains("#9cf")); // core color present for node a @@ -601,6 +1395,11 @@ mod tests { focus: Some("d"), depth: 1, max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, }).unwrap(); assert_eq!(r.node_count, 2); // d + its importer c assert!(r.diagram.contains("c.rs")); @@ -615,6 +1414,11 @@ mod tests { focus: Some("a"), depth: 2, max_nodes: 2, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, }).unwrap(); assert_eq!(r.node_count, 2); assert!(r.truncated); @@ -631,6 +1435,11 @@ mod tests { focus: Some("a"), depth: 5, max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, }).unwrap(); // a, b, c, d reachable undirected; no duplicates. assert_eq!(r.node_count, 4); @@ -644,6 +1453,11 @@ mod tests { focus: None, depth: 2, max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, }).unwrap(); assert!(r.diagram.starts_with("graph TD\n")); assert!(r.diagram.contains("classDef core")); @@ -687,6 +1501,11 @@ mod tests { focus: None, depth: 2, max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, }).unwrap(); // Cycle edges use thick arrow and pick up a linkStyle. @@ -716,6 +1535,11 @@ mod tests { focus: None, depth: 2, max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, }).unwrap(); // At least one cycle edge must carry the red colour and solid style. @@ -744,6 +1568,11 @@ mod tests { focus: None, depth: 2, max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, }).unwrap(); // Both violations use the dotted-violation arrow. @@ -764,6 +1593,11 @@ mod tests { focus: None, depth: 2, max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, }).unwrap(); let back = r.diagram.lines().find(|l| l.contains("\"a\" -> \"b\"")).unwrap(); @@ -787,6 +1621,11 @@ mod tests { focus: None, depth: 2, max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, }).unwrap(); let hot_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"a\" [")).unwrap(); @@ -810,6 +1649,11 @@ mod tests { focus: None, depth: 2, max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, }).unwrap(); // `a` should get a class statement including `hot`. @@ -835,6 +1679,11 @@ mod tests { focus: None, depth: 2, max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, }).unwrap(); let a_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"a\" [")).unwrap(); @@ -843,6 +1692,223 @@ mod tests { assert!(!a_line.contains("color=\"#ff6600\""), "hot border should not win over cycle: {}", a_line); } + fn cochange_pair(a: &str, b: &str, score: f64) -> crate::api::CoChangePair { + crate::api::CoChangePair { + file_a: a.into(), + file_b: b.into(), + count: 3, + coupling_score: score, + } + } + + #[test] + fn cochange_overlay_off_by_default() { + let mut g = fixture(); + g.cochange_pairs.push(cochange_pair("a", "c", 0.9)); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + // No undirected-style edge, no purple link styling. + assert!(!r.diagram.contains("---|")); + assert!(!r.diagram.contains("stroke:#84c")); + } + + #[test] + fn cochange_overlay_renders_above_threshold_mermaid() { + let mut g = fixture(); + g.cochange_pairs.push(cochange_pair("a", "c", 0.9)); + g.cochange_pairs.push(cochange_pair("a", "d", 0.2)); // below threshold + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: Some(0.5), + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + // 0.9 pair shows up with the score label; 0.2 pair filtered out. + assert!(r.diagram.contains("---|0.90|"), "missing cochange line:\n{}", r.diagram); + assert!(!r.diagram.contains("---|0.20|")); + // linkStyle appends the purple dash style. + assert!(r.diagram.contains("stroke:#84c")); + } + + #[test] + fn cochange_overlay_renders_above_threshold_dot() { + let mut g = fixture(); + g.cochange_pairs.push(cochange_pair("a", "c", 0.9)); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Dot, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: Some(0.5), + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + // Purple edge with arrowhead=none and constraint=false so it doesn't + // warp the DAG layout. + let line = r.diagram.lines().find(|l| l.contains("\"a\" -> \"c\"") && l.contains("#8844cc")).unwrap(); + assert!(line.contains("arrowhead=none"), "cochange edge must be undirected: {}", line); + assert!(line.contains("constraint=false"), "cochange edge must not constrain layout: {}", line); + } + + #[test] + fn cochange_overlay_skips_pairs_with_excluded_endpoint() { + // `isolated` is dropped by the selection stage; any cochange pair + // involving it must not appear as an edge referencing a missing node. + let mut g = fixture(); + g.cochange_pairs.push(cochange_pair("a", "isolated", 0.9)); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: Some(0.5), + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert!(!r.diagram.contains("---|"), "cochange to excluded node must not render"); + } + + #[test] + fn blast_radius_selects_epicenter_deps_and_dependents() { + // fixture: a -> b -> c -> d, plus isolated. Blast radius of `b`: + // {b} ∪ {c} (dependency) ∪ {a} (dependent) = {a, b, c}. + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: Some("b"), + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert_eq!(r.node_count, 3); + assert!(r.diagram.contains("a.rs")); + assert!(r.diagram.contains("b.rs")); + assert!(r.diagram.contains("c.rs")); + assert!(!r.diagram.contains("d.rs")); + // Epicenter class applied to b. + assert!( + r.diagram.lines().any(|l| l.trim_start().starts_with("class N") && l.contains("epicenter")), + "expected epicenter class assignment:\n{}", r.diagram + ); + } + + #[test] + fn blast_radius_marks_epicenter_in_dot() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Dot, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: Some("b"), + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + // Epicenter node `b` gets the bold red fill; other nodes don't. + let b_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"b\" [")).unwrap(); + assert!(b_line.contains("fillcolor=\"#ff3333\""), "epicenter missing red fill: {}", b_line); + let a_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"a\" [")).unwrap(); + assert!(!a_line.contains("fillcolor=\"#ff3333\""), "non-epicenter got epicenter fill: {}", a_line); + } + + #[test] + fn blast_radius_overrides_focus() { + // When both are set, blast_radius wins. Fixture: a -> b -> c -> d. + // With blast_radius=d: {d} ∪ {} ∪ {c} = {d, c}. Focus=a would give + // {a, b} at depth=1 — verify we get the blast set, not the BFS set. + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: Some("a"), + depth: 1, + max_nodes: 10, + show_cochange: None, + blast_radius: Some("d"), + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert_eq!(r.node_count, 2); + assert!(r.diagram.contains("d.rs")); + assert!(r.diagram.contains("c.rs")); + assert!(!r.diagram.contains("a.rs")); + assert!(!r.diagram.contains("b.rs")); + } + + #[test] + fn blast_radius_accepts_path_suffix() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: Some("b.rs"), + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert_eq!(r.node_count, 3); + } + + #[test] + fn blast_radius_unknown_target_errors() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: Some("does_not_exist"), + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }); + assert!(r.is_err()); + let err = r.unwrap_err(); + assert!(err.contains("blast-radius target not found"), "wrong error: {}", err); + } + + #[test] + fn basic_constructor_matches_manual_defaults() { + let opts = RenderOptions::basic(DiagramFormat::Mermaid, 42); + assert_eq!(opts.format, DiagramFormat::Mermaid); + assert!(opts.focus.is_none()); + assert_eq!(opts.depth, 2); + assert_eq!(opts.max_nodes, 42); + assert!(opts.show_cochange.is_none()); + assert!(opts.blast_radius.is_none()); + assert!(!opts.docs_only); + assert!(opts.group_by_folder_depth.is_none()); + assert!(!opts.color_by_owner); + } + #[test] fn overlays_respect_max_nodes_truncation() { // Cycle spans a,b,c but max_nodes=2 cuts the graph — the renderer must @@ -856,6 +1922,11 @@ mod tests { focus: None, depth: 2, max_nodes: 2, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, }).unwrap(); assert!(r.truncated); assert_eq!(r.node_count, 2); @@ -869,4 +1940,465 @@ mod tests { assert!(idx < edge_count, "linkStyle {} refers to an edge that wasn't emitted", idx); } } + + // ---------- doc-map (Phase 3.2) ---------------------------------------- + + fn doc_node(id: &str, ext: &str) -> GraphNode { + let mut n = node(id, None); + n.path = format!("docs/{}.{}", id, ext); + n.language = "markdown".into(); + n + } + + fn fixture_with_docs() -> ProjectGraphResponse { + // a.rs <- README.md (doc → code), config.yaml isolated from code edges, + // plus b/c/d/isolated from the base fixture. README references a.rs. + let mut g = fixture(); + g.nodes.push(doc_node("README", "md")); + g.nodes.push(doc_node("config", "yaml")); + g.edges.push(edge("README", "a")); // README references code file a + g + } + + #[test] + fn mermaid_doc_node_uses_stadium_shape_and_sec_label() { + let g = fixture_with_docs(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 20, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + // Stadium shape `([...])` for docs, `sec` unit label, and `doc` classDef. + assert!(r.diagram.contains("classDef doc"), "missing doc classDef:\n{}", r.diagram); + assert!( + r.diagram.lines().any(|l| l.contains("([\"README.md")), + "doc node missing stadium shape:\n{}", r.diagram + ); + assert!(r.diagram.contains("sec\"])"), "doc node missing sec unit:\n{}", r.diagram); + // Non-doc node still uses square bracket + fn. + assert!( + r.diagram.lines().any(|l| l.contains("[\"a.rs") && l.contains("fn\"]")), + "code node shape/unit regressed:\n{}", r.diagram + ); + // Doc class is applied via a class statement. + assert!( + r.diagram.lines().any(|l| l.trim_start().starts_with("class N") && l.contains("doc")), + "doc class not assigned:\n{}", r.diagram + ); + } + + #[test] + fn dot_doc_node_uses_note_shape_and_yellow_fill() { + let g = fixture_with_docs(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Dot, + focus: None, + depth: 2, + max_nodes: 20, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + let doc_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"README\" [")).unwrap(); + assert!(doc_line.contains("shape=note"), "doc not shape=note: {}", doc_line); + assert!(doc_line.contains("#fff4c0"), "doc not yellow fill: {}", doc_line); + assert!(doc_line.contains("sec\""), "doc missing sec unit: {}", doc_line); + + let code_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"a\" [")).unwrap(); + assert!(code_line.contains("shape=box"), "code shape regressed: {}", code_line); + assert!(code_line.contains("fn\""), "code unit regressed: {}", code_line); + } + + #[test] + fn docs_only_selects_docs_and_their_neighbors() { + // README references a.rs. docs_only should yield {README, config, a}: + // both docs plus the one code neighbor. b/c/d/isolated are excluded. + let g = fixture_with_docs(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 20, + show_cochange: None, + blast_radius: None, + docs_only: true, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert!(r.diagram.contains("README.md"), "missing README:\n{}", r.diagram); + assert!(r.diagram.contains("config.yaml"), "missing config:\n{}", r.diagram); + assert!(r.diagram.contains("a.rs"), "missing referenced code file a.rs:\n{}", r.diagram); + assert!(!r.diagram.contains("b.rs"), "b.rs should not render in docs_only:\n{}", r.diagram); + assert!(!r.diagram.contains("c.rs"), "c.rs should not render in docs_only:\n{}", r.diagram); + assert!(!r.diagram.contains("d.rs"), "d.rs should not render in docs_only:\n{}", r.diagram); + assert_eq!(r.node_count, 3); + } + + #[test] + fn docs_only_blast_radius_wins_over_docs_only() { + // Selection precedence: blast_radius > focus > docs_only > top. When + // both blast_radius and docs_only are set, blast_radius selection + // applies — docs_only is ignored. + let g = fixture_with_docs(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 20, + show_cochange: None, + blast_radius: Some("b"), + docs_only: true, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + // Blast radius of b in the base edges: {a, b, c}. + assert!(r.diagram.contains("a.rs")); + assert!(r.diagram.contains("b.rs")); + assert!(r.diagram.contains("c.rs")); + assert!(!r.diagram.contains("README.md"), "docs_only should be overridden:\n{}", r.diagram); + } + + // ---------- folder-collapsed view (Phase 3.3) -------------------------- + + fn node_at(id: &str, path: &str) -> GraphNode { + let mut n = node(id, None); + n.path = path.into(); + n + } + + fn fixture_with_folders() -> ProjectGraphResponse { + // Layout: + // src/api/users.rs (api_users) → src/db/sql.rs (db_sql) + // src/api/posts.rs (api_posts) → src/db/sql.rs + // src/api/users.rs → src/api/posts.rs (intra-folder, must be dropped) + // tests/foo.rs (tests_foo) → src/api/users.rs + // Depth 1 groups: {src, tests}; edges src↔src dropped, src↔tests kept, + // tests→src kept. + let mut g = ProjectGraphResponse { + nodes: vec![ + node_at("api_users", "src/api/users.rs"), + node_at("api_posts", "src/api/posts.rs"), + node_at("db_sql", "src/db/sql.rs"), + node_at("tests_foo", "tests/foo.rs"), + ], + edges: vec![ + edge("api_users", "db_sql"), + edge("api_posts", "db_sql"), + edge("api_users", "api_posts"), + edge("tests_foo", "api_users"), + ], + cycles: vec![], + god_modules: vec![], + layer_violations: vec![], + metadata: GraphMetadata { + total_files: 4, + total_edges: 4, + languages: HashMap::new(), + generated_at: "".into(), + bridge_count: None, + cycle_count: None, + god_module_count: None, + health_score: None, + layer_violation_count: None, + architectural_drift: None, + hotspot_count: None, + dead_code_count: None, + unreferenced_exports_count: None, + }, + cochange_pairs: vec![], + }; + // Give api_users a hotspot score so we can assert max-folding. + if let Some(n) = g.nodes.iter_mut().find(|n| n.module_id == "api_users") { + n.hotspot_score = Some(85.0); + } + g + } + + #[test] + fn folder_collapse_depth_one_groups_top_level_dirs() { + let g = fixture_with_folders(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 20, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: Some(1), + color_by_owner: false, + }).unwrap(); + // Two folder nodes: `src` and `tests`. The individual files must not + // appear by filename (only the folder labels do). + assert_eq!(r.node_count, 2); + assert!(r.diagram.contains("src/"), "folder label missing:\n{}", r.diagram); + assert!(r.diagram.contains("tests/"), "folder label missing:\n{}", r.diagram); + assert!(!r.diagram.contains("users.rs")); + assert!(!r.diagram.contains("posts.rs")); + assert!(!r.diagram.contains("sql.rs")); + // Subroutine shape + folder class applied. + assert!(r.diagram.contains("classDef folder")); + assert!(r.diagram.contains("[["), "missing subroutine shape:\n{}", r.diagram); + assert!( + r.diagram.lines().any(|l| l.trim_start().starts_with("class N") && l.contains("folder")), + "folder class not assigned:\n{}", r.diagram + ); + } + + #[test] + fn folder_collapse_drops_intra_folder_edges_and_aggregates() { + let g = fixture_with_folders(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Dot, + focus: None, + depth: 2, + max_nodes: 20, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: Some(1), + color_by_owner: false, + }).unwrap(); + // Expected edges after collapse: tests → src (1). The src→src edges + // from the file graph must be dropped. The src→src count is non-zero + // in the file graph, but at folder granularity it's a self-loop. + let edge_count = r.diagram.lines().filter(|l| l.contains(" -> ")).count(); + assert_eq!(edge_count, 1, "expected exactly 1 folder edge:\n{}", r.diagram); + assert!(r.diagram.contains("\"tests\" -> \"src\""), "expected tests→src:\n{}", r.diagram); + // Folder shape + fill. + let src_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"src\" [")).unwrap(); + assert!(src_line.contains("shape=folder"), "folder shape missing: {}", src_line); + assert!(src_line.contains("#d6e9ff"), "folder fill missing: {}", src_line); + // `src` contains 3 files with 3+3+3 = 9 fn. + assert!(src_line.contains("3 files"), "file count missing: {}", src_line); + assert!(src_line.contains("9 fn"), "fn sum missing: {}", src_line); + } + + #[test] + fn folder_collapse_depth_two_separates_api_from_db() { + let g = fixture_with_folders(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 20, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: Some(2), + color_by_owner: false, + }).unwrap(); + // Groups: src/api, src/db, tests. api→db edges collapse into one. + // Mermaid labels use the folder *tail*, not the full path, to keep the + // labels readable — the full folder id remains in the node id. + assert_eq!(r.node_count, 3); + assert!(r.diagram.contains("api/"), "api/ label missing:\n{}", r.diagram); + assert!(r.diagram.contains("db/"), "db/ label missing:\n{}", r.diagram); + assert!(r.diagram.contains("tests/"), "tests/ label missing:\n{}", r.diagram); + // api (6 fn from api_users+api_posts) and db (3 fn from db_sql) are separate. + assert!(r.diagram.contains("2 files, 6 fn"), "api aggregation wrong:\n{}", r.diagram); + assert!(r.diagram.contains("1 files, 3 fn"), "db aggregation wrong:\n{}", r.diagram); + } + + // ---------- ownership coloring (Phase 1.6) ----------------------------- + + #[test] + fn owner_color_is_stable_and_within_palette() { + // Two calls for the same owner must yield the same color. + let c1 = owner_color("alice"); + let c2 = owner_color("alice"); + assert_eq!(c1, c2); + // All colors must start with `#` and be 7 chars (hex triple). + assert_eq!(c1.len(), 7); + assert!(c1.starts_with('#')); + // Different owners hash to (likely) different palette entries — at + // minimum: both are valid palette entries even if they collide. + let c_bob = owner_color("bob"); + assert_eq!(c_bob.len(), 7); + assert!(c_bob.starts_with('#')); + } + + #[test] + fn mermaid_color_by_owner_emits_style_and_drops_role_class() { + let mut g = fixture(); + if let Some(n) = g.nodes.iter_mut().find(|n| n.module_id == "a") { + n.owner = Some("alice".into()); + } + if let Some(n) = g.nodes.iter_mut().find(|n| n.module_id == "b") { + n.owner = Some("bob".into()); + } + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: true, + }).unwrap(); + // Role class suffixes must be dropped so the per-node `style` wins. + assert!(!r.diagram.contains(":::core"), "role class leaked:\n{}", r.diagram); + assert!(!r.diagram.contains(":::bridge"), "role class leaked:\n{}", r.diagram); + // Owner colors are applied via explicit `style` lines. + let alice = owner_color("alice"); + let bob = owner_color("bob"); + assert!( + r.diagram.contains(&format!("fill:{}", alice)), + "alice color missing:\n{}", r.diagram + ); + assert!( + r.diagram.contains(&format!("fill:{}", bob)), + "bob color missing:\n{}", r.diagram + ); + } + + #[test] + fn dot_color_by_owner_paints_fillcolor() { + let mut g = fixture(); + if let Some(n) = g.nodes.iter_mut().find(|n| n.module_id == "a") { + n.owner = Some("alice".into()); + } + let r = render(&g, &RenderOptions { + format: DiagramFormat::Dot, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: true, + }).unwrap(); + let alice = owner_color("alice"); + let a_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"a\" [")).unwrap(); + assert!( + a_line.contains(&format!("fillcolor=\"{}\"", alice)), + "alice fill missing: {}", a_line + ); + // Nodes without an owner fall back to the default white. + let b_line = r.diagram.lines().find(|l| l.trim_start().starts_with("\"b\" [")).unwrap(); + assert!(b_line.contains("fillcolor=\"#fff\""), "default fill missing: {}", b_line); + } + + #[test] + fn folder_collapse_depth_zero_is_noop() { + let g = fixture_with_folders(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Mermaid, + focus: None, + depth: 2, + max_nodes: 20, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: Some(0), + color_by_owner: false, + }).unwrap(); + // Depth=0 falls back to the uncollapsed graph — every file node renders. + assert_eq!(r.node_count, 4); + assert!(r.diagram.contains("users.rs")); + assert!(r.diagram.contains("sql.rs")); + } + + #[test] + fn ascii_format_parses() { + assert_eq!(DiagramFormat::parse("ascii").unwrap(), DiagramFormat::Ascii); + assert_eq!(DiagramFormat::parse("tree").unwrap(), DiagramFormat::Ascii); + assert_eq!(DiagramFormat::parse("TEXT").unwrap(), DiagramFormat::Ascii); + } + + #[test] + fn ascii_renders_top_by_degree_as_tree() { + // Default selection: top-by-degree picks a highly connected root. + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Ascii, + focus: None, + depth: 2, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + // Tree characters must show up somewhere below the root line. + assert!(r.diagram.contains("├── ") || r.diagram.contains("└── "), + "expected tree glyphs in:\n{}", r.diagram); + // Every included node should appear at least once. + assert!(r.diagram.contains("a.rs")); + assert!(r.diagram.contains("b.rs")); + } + + #[test] + fn ascii_rooted_on_focus() { + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Ascii, + focus: Some("a"), + depth: 3, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + // First line is the root, un-prefixed. + let first = r.diagram.lines().next().unwrap(); + assert!(first.starts_with("a.rs"), "root line wrong: {first:?}"); + assert!(!first.starts_with("├") && !first.starts_with("└")); + } + + #[test] + fn ascii_breaks_cycles_with_seen_marker() { + // Build a->b->a cycle so the walker must stop re-entering `a`. + let mut g = fixture(); + g.edges.push(edge("b", "a")); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Ascii, + focus: Some("a"), + depth: 5, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert!(r.diagram.contains("↑ seen"), + "expected cycle marker in ascii output:\n{}", r.diagram); + } + + #[test] + fn ascii_respects_depth_cap() { + // a->b->c->d chain; depth=1 from a should reach b but not c. + let g = fixture(); + let r = render(&g, &RenderOptions { + format: DiagramFormat::Ascii, + focus: Some("a"), + depth: 1, + max_nodes: 10, + show_cochange: None, + blast_radius: None, + docs_only: false, + group_by_folder_depth: None, + color_by_owner: false, + }).unwrap(); + assert!(r.diagram.contains("a.rs")); + assert!(r.diagram.contains("b.rs")); + // depth=1 selection via bfs_from_anchor only *includes* a and b, so + // c.rs must not appear in the ascii tree either. + assert!(!r.diagram.contains("c.rs"), "depth cap not respected:\n{}", r.diagram); + } } diff --git a/third_party/cartographer/mapper-core/cartographer/src/diagram_export.rs b/third_party/cartographer/mapper-core/cartographer/src/diagram_export.rs new file mode 100644 index 00000000..61da410e --- /dev/null +++ b/third_party/cartographer/mapper-core/cartographer/src/diagram_export.rs @@ -0,0 +1,170 @@ +//! Render a diagram to SVG/PNG by shelling out to an external converter. +//! +//! We pick the converter based on the *source* format (Mermaid vs DOT) — not +//! on a user-visible flag — so callers just say "write to foo.svg" and we do +//! the right thing: +//! +//! Mermaid + .svg/.png → `mmdc` (Mermaid CLI, npm-installed) +//! DOT + .svg/.png → `dot` (Graphviz binary) +//! +//! If the target extension isn't `.svg`/`.png`, we treat the write as a +//! passthrough — the caller's diagram text lands at `target` unchanged. + +use std::io::Write; +use std::path::Path; +use std::process::{Command, Stdio}; + +use crate::diagram::DiagramFormat; + +/// What `export_diagram` did, so the CLI can print a matching status line. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ExportKind { + /// Wrote the diagram source straight to disk (no converter invoked). + Source, + /// Rendered via `mmdc` (Mermaid → SVG or PNG). + MermaidSvg, + MermaidPng, + /// Rendered via Graphviz `dot`. + DotSvg, + DotPng, +} + +/// Write `content` (diagram source in `source_format`) to `target`, converting +/// to SVG/PNG on the way if the target extension calls for it. +/// +/// Errors if a converter is needed but the binary is missing from `$PATH` — +/// returning a message that tells the user how to install it. +pub fn export_diagram( + content: &str, + source_format: DiagramFormat, + target: &Path, +) -> Result { + let ext = target + .extension() + .and_then(|e| e.to_str()) + .map(|s| s.to_lowercase()) + .unwrap_or_default(); + + match (source_format, ext.as_str()) { + (_, "svg") | (_, "png") => convert(content, source_format, target, &ext), + _ => { + std::fs::write(target, content).map_err(|e| e.to_string())?; + Ok(ExportKind::Source) + } + } +} + +fn convert( + content: &str, + source_format: DiagramFormat, + target: &Path, + ext: &str, +) -> Result { + match source_format { + DiagramFormat::Mermaid => export_mermaid(content, target, ext), + DiagramFormat::Dot => export_dot(content, target, ext), + // ASCII trees are text-only by design — there's no sensible converter + // that turns them into a raster/vector. Tell the user to pick a + // different format explicitly instead of silently writing the text. + DiagramFormat::Ascii => Err( + "ASCII diagrams can't be rendered to .svg/.png — use `--format mermaid` or `--format dot` for image output, or write to a text extension.".to_string(), + ), + } +} + +fn export_mermaid(content: &str, target: &Path, ext: &str) -> Result { + // mmdc reads from a file and writes to a path; it can't read stdin. + let tmp = tempfile(".mmd")?; + std::fs::write(&tmp, content).map_err(|e| e.to_string())?; + + let status = Command::new("mmdc") + .args([ + "-i", + tmp.to_str().ok_or("tempfile path not UTF-8")?, + "-o", + target.to_str().ok_or("target path not UTF-8")?, + ]) + .status() + .map_err(|_| { + "`mmdc` not found on PATH. Install via `npm install -g @mermaid-js/mermaid-cli`." + .to_string() + })?; + + // Remove the tmp regardless of outcome — leaving `.mmd` files around on + // failure mostly confuses users; the error message below is enough. + let _ = std::fs::remove_file(&tmp); + + if !status.success() { + return Err(format!("mmdc exited with status {}", status)); + } + Ok(if ext == "png" { ExportKind::MermaidPng } else { ExportKind::MermaidSvg }) +} + +fn export_dot(content: &str, target: &Path, ext: &str) -> Result { + // `dot` accepts stdin — no tempfile needed. + let mut child = Command::new("dot") + .args(["-T", ext, "-o", target.to_str().ok_or("target path not UTF-8")?]) + .stdin(Stdio::piped()) + .spawn() + .map_err(|_| { + "`dot` not found on PATH. Install Graphviz (e.g. `brew install graphviz`).".to_string() + })?; + + { + let stdin = child + .stdin + .as_mut() + .ok_or_else(|| "could not open dot stdin".to_string())?; + stdin + .write_all(content.as_bytes()) + .map_err(|e| e.to_string())?; + } + + let status = child.wait().map_err(|e| e.to_string())?; + if !status.success() { + return Err(format!("dot exited with status {}", status)); + } + Ok(if ext == "png" { ExportKind::DotPng } else { ExportKind::DotSvg }) +} + +/// Allocate a unique path in the system temp dir with the given extension. +/// We avoid pulling in the `tempfile` crate for one call — the path is used +/// immediately and removed in the happy path. +fn tempfile(ext: &str) -> Result { + let mut path = std::env::temp_dir(); + let pid = std::process::id(); + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0); + path.push(format!("cartographer-{}-{}{}", pid, nanos, ext)); + Ok(path) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn passthrough_for_non_image_extension() { + let dir = std::env::temp_dir(); + let target = dir.join(format!("cartographer-test-{}.mmd", std::process::id())); + let kind = export_diagram("graph TD\n A --> B", DiagramFormat::Mermaid, &target).unwrap(); + assert_eq!(kind, ExportKind::Source); + let written = std::fs::read_to_string(&target).unwrap(); + assert!(written.contains("graph TD")); + let _ = std::fs::remove_file(&target); + } + + #[test] + fn passthrough_for_dot_source_with_dot_extension() { + let dir = std::env::temp_dir(); + let target = dir.join(format!("cartographer-test-{}.dot", std::process::id())); + let kind = export_diagram("digraph G { A -> B }", DiagramFormat::Dot, &target).unwrap(); + assert_eq!(kind, ExportKind::Source); + let _ = std::fs::remove_file(&target); + } + + // mmdc / dot may not be installed in CI, so we don't drive actual + // conversion in unit tests. The shell-out paths are exercised manually. +} diff --git a/third_party/cartographer/mapper-core/cartographer/src/git_analysis.rs b/third_party/cartographer/mapper-core/cartographer/src/git_analysis.rs index 555530a1..f7664e45 100644 --- a/third_party/cartographer/mapper-core/cartographer/src/git_analysis.rs +++ b/third_party/cartographer/mapper-core/cartographer/src/git_analysis.rs @@ -402,3 +402,75 @@ pub fn git_diff_files(root: &Path, c1: &str, c2: &str) -> Vec<(String, char)> { result } + +// --------------------------------------------------------------------------- +// git_ownership +// --------------------------------------------------------------------------- + +/// Dominant author per file over the last `limit` commits. "Dominant" = +/// highest raw commit count; ties broken alphabetically. Bot authors and +/// formatting-only commits are excluded (same filters as churn/cochange). +/// +/// Returns an empty map if git is unavailable or the directory is not a repo. +/// Keys are repo-relative paths matching `git log --name-only` output. +pub fn git_ownership(root: &Path, limit: usize) -> HashMap { + let output = Command::new("git") + .args([ + "-C", + &root.to_string_lossy(), + "log", + &format!("-n {}", limit), + "--name-only", + "--format=%x1f%an%x1f%s", + ]) + .output(); + + let output = match output { + Ok(o) if o.status.success() => o, + _ => return HashMap::new(), + }; + + let text = String::from_utf8_lossy(&output.stdout); + + // Per-file per-author commit counts. We need the raw author name here + // (not just the skip flag) so we parse the header locally instead of + // reusing `parse_header`. + let mut counts: HashMap> = HashMap::new(); + let mut current_author: Option = None; + let mut skip_current = false; + + for line in text.lines() { + let line = line.trim(); + if line.starts_with('\x1f') { + let parts: Vec<&str> = line.splitn(3, '\x1f').collect(); + let author = parts.get(1).copied().unwrap_or("").trim().to_string(); + let subject = parts.get(2).copied().unwrap_or("").trim(); + skip_current = is_bot_author(&author) || is_formatting_subject(subject); + current_author = if skip_current { None } else { Some(author) }; + continue; + } + if line.is_empty() || skip_current { + continue; + } + if let Some(ref author) = current_author { + *counts + .entry(line.to_string()) + .or_default() + .entry(author.clone()) + .or_insert(0) += 1; + } + } + + let mut owners: HashMap = HashMap::with_capacity(counts.len()); + for (file, authors) in counts { + // Pick the author with the highest count; ties → alphabetical so the + // result is deterministic across runs. + let dominant = authors + .into_iter() + .max_by(|a, b| a.1.cmp(&b.1).then_with(|| b.0.cmp(&a.0))); + if let Some((name, _)) = dominant { + owners.insert(file, name); + } + } + owners +} diff --git a/third_party/cartographer/mapper-core/cartographer/src/html_export.rs b/third_party/cartographer/mapper-core/cartographer/src/html_export.rs new file mode 100644 index 00000000..23625601 --- /dev/null +++ b/third_party/cartographer/mapper-core/cartographer/src/html_export.rs @@ -0,0 +1,413 @@ +//! Self-contained interactive HTML diagram. +//! +//! One-file output: no network dependency, no external assets. The graph is +//! serialized into a ` + + +"#, + nodes_json = nodes_json, + edges_json = edges_json, + ) +} + +fn json_str(s: &str) -> String { + let mut out = String::with_capacity(s.len() + 2); + out.push('"'); + for c in s.chars() { + match c { + '"' => out.push_str("\\\""), + '\\' => out.push_str("\\\\"), + '\n' => out.push_str("\\n"), + '\r' => out.push_str("\\r"), + '\t' => out.push_str("\\t"), + c if (c as u32) < 0x20 => out.push_str(&format!("\\u{:04x}", c as u32)), + c => out.push(c), + } + } + out.push('"'); + out +} + +fn json_opt_str(s: Option<&str>) -> String { + match s { + Some(v) => json_str(v), + None => "null".into(), + } +} + +fn violation_tag(vt: &LayerViolationType) -> &'static str { + match vt { + LayerViolationType::BackCall => "\"BackCall\"", + LayerViolationType::SkipCall => "\"SkipCall\"", + LayerViolationType::CircularCrossLayer => "\"CircularCrossLayer\"", + LayerViolationType::DirectForeignImport => "\"DirectForeignImport\"", + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::api::{GraphEdge, GraphMetadata}; + + fn node(id: &str, role: Option<&str>) -> GraphNode { + GraphNode { + module_id: id.into(), + path: format!("src/{}.rs", id), + language: "rust".into(), + signature_count: 3, + complexity: None, + is_bridge: None, + bridge_score: None, + degree: None, + risk_level: None, + churn: Some(5), + hotspot_score: Some(42.0), + role: role.map(String::from), + is_dead: None, + unreferenced_exports: None, + fan_in: Some(2), + fan_out: Some(1), + cochange_partners: None, + cochange_entropy: None, + owner: Some("alice".into()), + } + } + + fn fixture() -> ProjectGraphResponse { + ProjectGraphResponse { + nodes: vec![ + node("a", Some("core")), + node("b", None), + node("c", Some("bridge")), + ], + edges: vec![ + GraphEdge { + source: "a".into(), + target: "b".into(), + edge_type: "import".into(), + at_range: None, + }, + GraphEdge { + source: "b".into(), + target: "c".into(), + edge_type: "import".into(), + at_range: None, + }, + ], + cycles: vec![], + god_modules: vec![], + layer_violations: vec![], + metadata: GraphMetadata { + total_files: 3, + total_edges: 2, + languages: HashMap::new(), + generated_at: "".into(), + bridge_count: None, + cycle_count: None, + god_module_count: None, + health_score: None, + layer_violation_count: None, + architectural_drift: None, + hotspot_count: None, + dead_code_count: None, + unreferenced_exports_count: None, + }, + cochange_pairs: vec![], + } + } + + #[test] + fn html_contains_structure_and_embedded_graph() { + let g = fixture(); + let included: Vec = g.nodes.iter().map(|n| n.module_id.clone()).collect(); + let html = render_html(&g, &included); + assert!(html.starts_with("")); + assert!(html.contains("