From 8c36c86d150ebe8d65a0288ca7af9461b830326e Mon Sep 17 00:00:00 2001 From: avfirsov Date: Sat, 13 Jun 2026 10:21:00 +0300 Subject: [PATCH 1/2] feat(cli): add daemonless `gortex analyze` command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expose graph analyzers on the CLI without a running daemon. `gortex analyze --kind --path [--format json|text]` indexes the path entirely in-process (no daemon, no socket) and runs the analyzer against the fresh graph — handy for CI pipelines and one-shot scripts. Supported kinds: `synthesizers`, `resolution_outcomes`. To avoid duplicating logic between the MCP tool and the CLI, the analyzer cores are extracted into a new `internal/analyzer` package as pure calculations (graph in, struct out): - `AnalyzeSynthesizers` - `AnalyzeResolutionOutcomes` / `ClassifyUnresolved` The existing MCP handlers (`handleAnalyzeSynthesizers`, `handleAnalyzeResolutionOutcomes`) are refactored to call these cores; their JSON/compact output shapes are unchanged and the existing MCP-layer tests pass as-is. The resolution-outcome taxonomy constants now live in `internal/analyzer` with thin aliases kept in the mcp package. Tests: unit tests for both cores + an end-to-end index→analyze CLI test. Co-Authored-By: Claude Opus 4.8 (1M context) --- cmd/gortex/analyze.go | 154 +++++++++++++ cmd/gortex/analyze_test.go | 86 ++++++++ internal/analyzer/resolution_outcomes.go | 193 +++++++++++++++++ internal/analyzer/resolution_outcomes_test.go | 93 ++++++++ internal/analyzer/synthesizers.go | 117 ++++++++++ internal/analyzer/synthesizers_test.go | 84 ++++++++ .../mcp/tools_analyze_resolution_outcomes.go | 202 ++---------------- internal/mcp/tools_analyze_synthesizers.go | 81 ++----- 8 files changed, 762 insertions(+), 248 deletions(-) create mode 100644 cmd/gortex/analyze.go create mode 100644 cmd/gortex/analyze_test.go create mode 100644 internal/analyzer/resolution_outcomes.go create mode 100644 internal/analyzer/resolution_outcomes_test.go create mode 100644 internal/analyzer/synthesizers.go create mode 100644 internal/analyzer/synthesizers_test.go diff --git a/cmd/gortex/analyze.go b/cmd/gortex/analyze.go new file mode 100644 index 00000000..b3c70fa9 --- /dev/null +++ b/cmd/gortex/analyze.go @@ -0,0 +1,154 @@ +package main + +// PURPOSE — daemonless `gortex analyze` cobra command: indexes a repository +// path entirely in-process (no daemon socket) and runs one of the supported +// analyzer kinds against the resulting graph, printing either JSON or a +// human-readable text summary. +// RATIONALE — gives CI pipelines and one-shot scripts access to graph +// analytics without requiring a running daemon; the full indexing pipeline +// runs in the calling process and exits when done. +// KEYWORDS — analyze, daemonless, synthesizers, resolution_outcomes, CLI + +import ( + "context" + "encoding/json" + "fmt" + "runtime" + + "github.com/spf13/cobra" + "go.uber.org/zap" + + "github.com/zzet/gortex/internal/analyzer" + "github.com/zzet/gortex/internal/config" + "github.com/zzet/gortex/internal/graph" + "github.com/zzet/gortex/internal/indexer" + "github.com/zzet/gortex/internal/parser" + "github.com/zzet/gortex/internal/parser/languages" +) + +var ( + analyzeKind string + analyzePath string + analyzeFormat string +) + +// supportedAnalyzeKinds lists the analyzer kinds accepted by the --kind flag. +var supportedAnalyzeKinds = []string{ + "synthesizers", + "resolution_outcomes", +} + +var analyzeCmd = &cobra.Command{ + Use: "analyze", + Short: "Index a repository in-process and run an analyzer (no daemon required)", + Long: `Indexes the repository at --path entirely in-process — no daemon, no socket — +then runs the specified --kind analyzer and prints results. + +Supported kinds: + synthesizers — Synthesized edge groups by framework-dispatch pass + resolution_outcomes — Taxonomy of unresolved call/reference edges`, + RunE: runAnalyze, +} + +func init() { + analyzeCmd.Flags().StringVar(&analyzeKind, "kind", "", "analyzer kind: synthesizers|resolution_outcomes (required)") + analyzeCmd.Flags().StringVar(&analyzePath, "path", ".", "repository path to index") + analyzeCmd.Flags().StringVar(&analyzeFormat, "format", "text", "output format: json|text") + _ = analyzeCmd.MarkFlagRequired("kind") + rootCmd.AddCommand(analyzeCmd) +} + +// runAnalyze is the RunE for analyzeCmd. It loads config, builds the graph + +// registry + parser in-process, indexes the target path, then dispatches to +// the requested analyzer kind. +func runAnalyze(cmd *cobra.Command, _ []string) error { + // Validate --kind early so users get a clear error before any indexing work. + if !isSupportedKind(analyzeKind) { + return fmt.Errorf("unsupported --kind %q; supported kinds: synthesizers, resolution_outcomes", analyzeKind) + } + + cfg, err := config.Load(cfgFile) + if err != nil { + return fmt.Errorf("loading config: %w", err) + } + + // Mirror the index.go pattern: default Workers to NumCPU when config + // leaves it at zero. + if cfg.Index.Workers == 0 { + cfg.Index.Workers = runtime.NumCPU() + } + + // Build the in-process graph + indexer (no daemon involved). Mirrors the + // registry construction in `gortex index`. + logger := zap.NewNop() + g := graph.New() + reg := parser.NewRegistry() + languages.RegisterAll(reg) + languages.RegisterCustomGrammars(reg, cfg.Index.Grammars, logger) + languages.RegisterExtractorPlugins(reg, cfg.Index.ExtractorPlugins, logger) + languages.RegisterFallbackChunkers(reg, cfg.Index.FallbackChunkers, logger) + idx := indexer.New(g, reg, cfg.Index, logger) + + ctx := context.Background() + if _, err := idx.IndexCtx(ctx, analyzePath); err != nil { + return fmt.Errorf("indexing %s: %w", analyzePath, err) + } + + // Dispatch to the requested analyzer kind. + switch analyzeKind { + case "synthesizers": + return runSynthesizers(cmd, g) + case "resolution_outcomes": + return runResolutionOutcomes(cmd, g) + default: + // Unreachable — validated above, but keeps the compiler happy. + return fmt.Errorf("unsupported kind: %s", analyzeKind) + } +} + +// isSupportedKind returns true if kind is in supportedAnalyzeKinds. +func isSupportedKind(kind string) bool { + for _, k := range supportedAnalyzeKinds { + if k == kind { + return true + } + } + return false +} + +// runSynthesizers analyzes synthesized edge groups and prints them. +func runSynthesizers(cmd *cobra.Command, g graph.Store) error { + result := analyzer.AnalyzeSynthesizers(g) + + switch analyzeFormat { + case "json": + enc := json.NewEncoder(cmd.OutOrStdout()) + enc.SetIndent("", " ") + return enc.Encode(result) + default: + fmt.Fprintf(cmd.OutOrStdout(), "synthesizers: groups=%d total_edges=%d\n", + len(result.Synthesizers), result.TotalEdges) + for _, row := range result.Synthesizers { + fmt.Fprintf(cmd.OutOrStdout(), " %s: edges=%d\n", row.Name, row.Edges) + } + return nil + } +} + +// runResolutionOutcomes analyzes unresolved edge taxonomy and prints it. +func runResolutionOutcomes(cmd *cobra.Command, g graph.Store) error { + result := analyzer.AnalyzeResolutionOutcomes(g, "", 50) + + switch analyzeFormat { + case "json": + enc := json.NewEncoder(cmd.OutOrStdout()) + enc.SetIndent("", " ") + return enc.Encode(result) + default: + fmt.Fprintf(cmd.OutOrStdout(), "resolution_outcomes: total=%d\n", result.Total) + for reason, count := range result.ByReason { + fmt.Fprintf(cmd.OutOrStdout(), " %s: %d\n", reason, count) + } + return nil + } +} diff --git a/cmd/gortex/analyze_test.go b/cmd/gortex/analyze_test.go new file mode 100644 index 00000000..17490ecf --- /dev/null +++ b/cmd/gortex/analyze_test.go @@ -0,0 +1,86 @@ +package main + +// PURPOSE — integration tests for the daemonless `gortex analyze` command: +// validates kind validation and an end-to-end index+analyze cycle producing +// machine-readable JSON. +// KEYWORDS — analyze, CLI, daemonless, integration + +import ( + "bytes" + "encoding/json" + "os" + "path/filepath" + "testing" + + "github.com/spf13/cobra" +) + +func TestRunAnalyze_UnsupportedKind(t *testing.T) { + analyzeKind = "bogus" + analyzePath = "." + analyzeFormat = "json" + cmd := &cobra.Command{} + cmd.SetOut(&bytes.Buffer{}) + if err := runAnalyze(cmd, nil); err == nil { + t.Fatal("expected error for unsupported --kind") + } +} + +func TestRunAnalyze_SynthesizersE2E(t *testing.T) { + dir := t.TempDir() + src := "package p\n\nfunc A() { B() }\n\nfunc B() {}\n" + if err := os.WriteFile(filepath.Join(dir, "main.go"), []byte(src), 0o644); err != nil { + t.Fatal(err) + } + + analyzeKind = "synthesizers" + analyzePath = dir + analyzeFormat = "json" + + var out bytes.Buffer + cmd := &cobra.Command{} + cmd.SetOut(&out) + if err := runAnalyze(cmd, nil); err != nil { + t.Fatalf("runAnalyze: %v", err) + } + + var res map[string]any + if err := json.Unmarshal(out.Bytes(), &res); err != nil { + t.Fatalf("invalid JSON output: %v\n%s", err, out.String()) + } + if _, ok := res["synthesizers"]; !ok { + t.Errorf("expected \"synthesizers\" key in output, got: %s", out.String()) + } + if _, ok := res["total_edges"]; !ok { + t.Errorf("expected \"total_edges\" key in output, got: %s", out.String()) + } +} + +func TestRunAnalyze_ResolutionOutcomesE2E(t *testing.T) { + dir := t.TempDir() + // A call to an undefined function leaves an unresolved edge the + // resolution_outcomes analyzer can classify. + src := "package p\n\nfunc A() { missingFunc() }\n" + if err := os.WriteFile(filepath.Join(dir, "main.go"), []byte(src), 0o644); err != nil { + t.Fatal(err) + } + + analyzeKind = "resolution_outcomes" + analyzePath = dir + analyzeFormat = "json" + + var out bytes.Buffer + cmd := &cobra.Command{} + cmd.SetOut(&out) + if err := runAnalyze(cmd, nil); err != nil { + t.Fatalf("runAnalyze: %v", err) + } + + var res map[string]any + if err := json.Unmarshal(out.Bytes(), &res); err != nil { + t.Fatalf("invalid JSON output: %v\n%s", err, out.String()) + } + if _, ok := res["by_reason"]; !ok { + t.Errorf("expected \"by_reason\" key in output, got: %s", out.String()) + } +} diff --git a/internal/analyzer/resolution_outcomes.go b/internal/analyzer/resolution_outcomes.go new file mode 100644 index 00000000..5b818897 --- /dev/null +++ b/internal/analyzer/resolution_outcomes.go @@ -0,0 +1,193 @@ +package analyzer + +// PURPOSE — pure computation core for the resolution-outcomes analyzer: +// classifies every unresolved call/reference edge by the structured reason +// the resolver gave up and returns a per-reason rollup plus example rows. +// RATIONALE — extracted from the MCP handler so the taxonomy logic is +// independently testable and reusable across surfaces (MCP, CLI, etc.). +// KEYWORDS — resolution_outcomes, unresolved, taxonomy, pure, calculation + +import ( + "strings" + + "github.com/zzet/gortex/internal/graph" +) + +// Resolution-outcome taxonomy constants. These are the canonical source of +// the taxonomy; the MCP layer aliases them so both surfaces agree. +const ( + // OutcomeAmbiguousMultiMatch: two or more same-name, same-language + // definitions exist — the resolver punted. + OutcomeAmbiguousMultiMatch = "ambiguous_multi_match" + // OutcomeCandidateOutOfScope: exactly one same-language definition + // exists but the edge stayed unresolved. + OutcomeCandidateOutOfScope = "candidate_out_of_scope" + // OutcomeCrossLanguageOnly: the only definitions are in a different + // language family. + OutcomeCrossLanguageOnly = "cross_language_only" + // OutcomeStubOnly: the name matches only stub/external-placeholder nodes. + OutcomeStubOnly = "stub_only" + // OutcomeNoDefinition: no definition of this name exists in the graph. + OutcomeNoDefinition = "no_definition" +) + +// ResolutionRow is one unresolved edge in the result. +// JSON field names mirror the MCP output shape exactly. +type ResolutionRow struct { + From string `json:"from"` + To string `json:"to"` + Kind string `json:"edge_kind"` + Name string `json:"name"` + Reason string `json:"reason"` + Candidates int `json:"candidates"` +} + +// ResolutionOutcomesResult is the return type of AnalyzeResolutionOutcomes. +// JSON field names mirror the MCP output shape exactly. +type ResolutionOutcomesResult struct { + ByReason map[string]int `json:"by_reason"` + Total int `json:"total"` + Rows []ResolutionRow `json:"rows"` +} + +// AnalyzeResolutionOutcomes classifies every unresolved call/reference edge +// in the graph by the structured reason the resolver gave up. reasonFilter +// restricts the returned rows to a single outcome; limit caps the row count. +// It is a pure Calculation: no side effects, no I/O. +func AnalyzeResolutionOutcomes(g graph.Store, reasonFilter string, limit int) ResolutionOutcomesResult { + type pending struct { + edge *graph.Edge + name string + } + var todo []pending + fromIDs := map[string]struct{}{} + for _, kind := range []graph.EdgeKind{graph.EdgeCalls, graph.EdgeReferences} { + for e := range g.EdgesByKind(kind) { + if e == nil || !graph.IsUnresolvedTarget(e.To) { + continue + } + name := graph.UnresolvedName(e.To) + if name == "" { + continue + } + // A receiver-qualified placeholder (`unresolved::*.foo`) keeps + // its method name after the dot; normalise to the bare name. + if i := strings.LastIndexByte(name, '.'); i >= 0 && i+1 < len(name) { + name = name[i+1:] + } + todo = append(todo, pending{edge: e, name: name}) + if e.From != "" { + fromIDs[e.From] = struct{}{} + } + } + } + fromList := make([]string, 0, len(fromIDs)) + for id := range fromIDs { + fromList = append(fromList, id) + } + fromNodes := g.GetNodesByIDs(fromList) + + byReason := map[string]int{} + var rows []ResolutionRow + + // Memoise classification by (name, caller-language). + type classKey struct{ name, lang string } + type classVal struct { + reason string + ncand int + } + classCache := map[classKey]classVal{} + + for _, p := range todo { + fromLang := "" + if n := fromNodes[p.edge.From]; n != nil { + fromLang = n.Language + } + key := classKey{name: p.name, lang: fromLang} + cv, ok := classCache[key] + if !ok { + cv.reason, cv.ncand = ClassifyUnresolved(g, p.name, fromLang) + classCache[key] = cv + } + reason, ncand := cv.reason, cv.ncand + byReason[reason]++ + if reasonFilter != "" && reason != reasonFilter { + continue + } + if len(rows) < limit { + rows = append(rows, ResolutionRow{ + From: p.edge.From, To: p.edge.To, Kind: string(p.edge.Kind), + Name: p.name, Reason: reason, Candidates: ncand, + }) + } + } + + total := 0 + for _, n := range byReason { + total += n + } + return ResolutionOutcomesResult{ByReason: byReason, Total: total, Rows: rows} +} + +// ClassifyUnresolved returns the structured suppression reason for an +// unresolved name relative to the caller's language, plus the number of +// real (non-stub) definition candidates considered. It is a pure Calculation. +func ClassifyUnresolved(g graph.Store, name, fromLang string) (reason string, candidates int) { + var realSameLang, realOtherLang, stubs int + for _, n := range g.FindNodesByName(name) { + if n == nil { + continue + } + if graph.IsStub(n.ID) { + stubs++ + continue + } + if !nodeIsDefinitionKind(n.Kind) { + continue + } + if fromLang != "" && n.Language != "" && !sameLanguageFamily(fromLang, n.Language) { + realOtherLang++ + continue + } + realSameLang++ + } + switch { + case realSameLang >= 2: + return OutcomeAmbiguousMultiMatch, realSameLang + case realSameLang == 1: + return OutcomeCandidateOutOfScope, 1 + case realOtherLang >= 1: + return OutcomeCrossLanguageOnly, realOtherLang + case stubs >= 1: + return OutcomeStubOnly, 0 + default: + return OutcomeNoDefinition, 0 + } +} + +// nodeIsDefinitionKind reports whether a node kind is a callable/type +// definition an unresolved call or reference could legitimately bind to. +func nodeIsDefinitionKind(k graph.NodeKind) bool { + switch k { + case graph.KindFunction, graph.KindMethod, graph.KindType, + graph.KindInterface, graph.KindVariable, graph.KindConstant, graph.KindField: + return true + } + return false +} + +// sameLanguageFamily folds the TS/JS pair so a cross-file TS→JS reference +// is not mis-reported as a cross-language suppression. +func sameLanguageFamily(a, b string) bool { + if a == b { + return true + } + norm := func(l string) string { + switch l { + case "javascript", "typescript", "tsx", "jsx": + return "jsts" + } + return l + } + return norm(a) == norm(b) +} diff --git a/internal/analyzer/resolution_outcomes_test.go b/internal/analyzer/resolution_outcomes_test.go new file mode 100644 index 00000000..c27a17e2 --- /dev/null +++ b/internal/analyzer/resolution_outcomes_test.go @@ -0,0 +1,93 @@ +package analyzer_test + +// PURPOSE — shape tests for AnalyzeResolutionOutcomes: verify the function +// classifies unresolved edges correctly and returns the right struct shape. +// RATIONALE — tests are MCP-layer-free so the core logic is independently +// verifiable; they mirror the taxonomy asserted in the MCP-layer tests. +// KEYWORDS — resolution_outcomes, unit, shape + +import ( + "testing" + + "github.com/zzet/gortex/internal/analyzer" + "github.com/zzet/gortex/internal/graph" +) + +func TestAnalyzeResolutionOutcomes_Shape(t *testing.T) { + g := newTestGraph() + // caller (go) + g.AddNode(&graph.Node{ID: "a.go::caller", Kind: graph.KindFunction, Name: "caller", FilePath: "a.go", Language: "go"}) + + // unresolved edge — no definition in graph at all. + g.AddEdge(&graph.Edge{From: "a.go::caller", To: "unresolved::ghost", Kind: graph.EdgeCalls, FilePath: "a.go", Line: 5}) + + res := analyzer.AnalyzeResolutionOutcomes(g, "", 50) + if res.Total == 0 { + t.Fatal("expected total > 0") + } + if res.Rows == nil { + t.Fatal("expected rows not nil") + } + if len(res.Rows) == 0 { + t.Fatal("expected at least one row") + } +} + +func TestAnalyzeResolutionOutcomes_Taxonomy(t *testing.T) { + g := newTestGraph() + g.AddNode(&graph.Node{ID: "a.go::caller", Kind: graph.KindFunction, Name: "caller", FilePath: "a.go", Language: "go"}) + + // ambiguous_multi_match: two same-name go funcs named "doThing". + g.AddNode(&graph.Node{ID: "x.go::doThing", Kind: graph.KindFunction, Name: "doThing", FilePath: "x.go", Language: "go"}) + g.AddNode(&graph.Node{ID: "y.go::doThing", Kind: graph.KindFunction, Name: "doThing", FilePath: "y.go", Language: "go"}) + g.AddEdge(&graph.Edge{From: "a.go::caller", To: "unresolved::doThing", Kind: graph.EdgeCalls, FilePath: "a.go", Line: 2}) + + // candidate_out_of_scope: exactly one same-lang def named "single". + g.AddNode(&graph.Node{ID: "z.go::single", Kind: graph.KindFunction, Name: "single", FilePath: "z.go", Language: "go"}) + g.AddEdge(&graph.Edge{From: "a.go::caller", To: "unresolved::single", Kind: graph.EdgeCalls, FilePath: "a.go", Line: 3}) + + // cross_language_only: only a python def named "pyOnly". + g.AddNode(&graph.Node{ID: "p.py::pyOnly", Kind: graph.KindFunction, Name: "pyOnly", FilePath: "p.py", Language: "python"}) + g.AddEdge(&graph.Edge{From: "a.go::caller", To: "unresolved::pyOnly", Kind: graph.EdgeCalls, FilePath: "a.go", Line: 4}) + + // no_definition: nothing named "ghost". + g.AddEdge(&graph.Edge{From: "a.go::caller", To: "unresolved::ghost", Kind: graph.EdgeCalls, FilePath: "a.go", Line: 5}) + + res := analyzer.AnalyzeResolutionOutcomes(g, "", 50) + check := func(reason string, want int) { + t.Helper() + got := res.ByReason[reason] + if got != want { + t.Errorf("by_reason[%q] = %d, want %d", reason, got, want) + } + } + check("ambiguous_multi_match", 1) + check("candidate_out_of_scope", 1) + check("cross_language_only", 1) + check("no_definition", 1) +} + +func TestAnalyzeResolutionOutcomes_ReasonFilter(t *testing.T) { + g := newTestGraph() + g.AddNode(&graph.Node{ID: "a.go::caller", Kind: graph.KindFunction, Name: "caller", FilePath: "a.go", Language: "go"}) + g.AddEdge(&graph.Edge{From: "a.go::caller", To: "unresolved::ghost", Kind: graph.EdgeCalls, FilePath: "a.go", Line: 5}) + + res := analyzer.AnalyzeResolutionOutcomes(g, "no_definition", 50) + if len(res.Rows) != 1 { + t.Fatalf("reason filter: want 1 row, got %d", len(res.Rows)) + } + if res.Rows[0].Reason != "no_definition" { + t.Errorf("row reason = %q", res.Rows[0].Reason) + } +} + +func TestClassifyUnresolved_NoDefinition(t *testing.T) { + g := newTestGraph() + reason, candidates := analyzer.ClassifyUnresolved(g, "ghost", "go") + if reason != "no_definition" { + t.Errorf("expected no_definition, got %q", reason) + } + if candidates != 0 { + t.Errorf("expected 0 candidates, got %d", candidates) + } +} diff --git a/internal/analyzer/synthesizers.go b/internal/analyzer/synthesizers.go new file mode 100644 index 00000000..ca719aaa --- /dev/null +++ b/internal/analyzer/synthesizers.go @@ -0,0 +1,117 @@ +package analyzer + +// PURPOSE — pure computation core for the synthesizers analyzer: groups +// every synthesized edge by the framework-dispatch pass that produced it, +// returning a structured result the MCP layer and CLI can both consume +// without duplicating logic. +// RATIONALE — extracted from the MCP handler so the aggregation is +// independently testable and reusable across surfaces (MCP, CLI, etc.). +// KEYWORDS — synthesizers, framework-dispatch, pure, calculation + +import ( + "sort" + + "github.com/zzet/gortex/internal/graph" +) + +const ( + // metaSynthesizedByKey is the Edge.Meta key stamped by the synthesizer engine. + metaSynthesizedByKey = "synthesized_by" + // metaProvenanceKey is the Edge.Meta key carrying provenance info. + metaProvenanceKey = "provenance" + // maxSamples is the maximum number of edge samples kept per synthesizer group. + maxSamples = 5 +) + +// SynthesizerSample is one example edge from a synthesizer group. +type SynthesizerSample struct { + From string `json:"from"` + To string `json:"to"` + Kind string `json:"kind"` + Via string `json:"via,omitempty"` +} + +// SynthesizerRow is one synthesizer group in the result. +// JSON field names are intentionally kept stable — callers rely on them. +type SynthesizerRow struct { + Name string `json:"synthesizer"` + Provenance string `json:"provenance"` + Edges int `json:"edges"` + ByKind map[string]int `json:"by_kind"` + Samples []SynthesizerSample `json:"samples,omitempty"` +} + +// SynthesizersResult is the return type of AnalyzeSynthesizers. +// JSON field names mirror the MCP output shape exactly. +type SynthesizersResult struct { + Synthesizers []*SynthesizerRow `json:"synthesizers"` + TotalEdges int `json:"total_edges"` +} + +// SynthesizersOption configures AnalyzeSynthesizers. +type SynthesizersOption func(*synthConfig) + +type synthConfig struct { + nameFilter string +} + +// WithSynthesizerNameFilter restricts the result to a single synthesizer name. +func WithSynthesizerNameFilter(name string) SynthesizersOption { + return func(c *synthConfig) { c.nameFilter = name } +} + +// AnalyzeSynthesizers groups every synthesized edge in the graph by the +// synthesizer that produced it and returns a sorted, structured result. +// It is a pure Calculation: no side effects, no I/O. +func AnalyzeSynthesizers(g graph.Store, opts ...SynthesizersOption) SynthesizersResult { + cfg := &synthConfig{} + for _, o := range opts { + o(cfg) + } + + rows := map[string]*SynthesizerRow{} + for _, e := range g.AllEdges() { + if e == nil || e.Meta == nil { + continue + } + by, _ := e.Meta[metaSynthesizedByKey].(string) + if by == "" { + continue + } + if cfg.nameFilter != "" && by != cfg.nameFilter { + continue + } + row, ok := rows[by] + if !ok { + prov, _ := e.Meta[metaProvenanceKey].(string) + row = &SynthesizerRow{Name: by, Provenance: prov, ByKind: map[string]int{}} + rows[by] = row + } + row.Edges++ + row.ByKind[string(e.Kind)]++ + if len(row.Samples) < maxSamples { + via, _ := e.Meta["via"].(string) + row.Samples = append(row.Samples, SynthesizerSample{ + From: e.From, + To: e.To, + Kind: string(e.Kind), + Via: via, + }) + } + } + + out := make([]*SynthesizerRow, 0, len(rows)) + total := 0 + for _, r := range rows { + total += r.Edges + out = append(out, r) + } + sort.Slice(out, func(i, j int) bool { + if out[i].Edges != out[j].Edges { + return out[i].Edges > out[j].Edges + } + return out[i].Name < out[j].Name + }) + + return SynthesizersResult{Synthesizers: out, TotalEdges: total} +} diff --git a/internal/analyzer/synthesizers_test.go b/internal/analyzer/synthesizers_test.go new file mode 100644 index 00000000..1047bf93 --- /dev/null +++ b/internal/analyzer/synthesizers_test.go @@ -0,0 +1,84 @@ +package analyzer_test + +// PURPOSE — shape tests for AnalyzeSynthesizers: verify the function +// returns the correct JSON-matching struct given a graph with synthesized +// edges. +// RATIONALE — these tests live here rather than in mcp/ so the core logic +// is independently verifiable without the MCP layer. +// KEYWORDS — synthesizers, unit, shape + +import ( + "testing" + + "github.com/zzet/gortex/internal/analyzer" + "github.com/zzet/gortex/internal/graph" +) + +func newTestGraph() graph.Store { + return graph.New() +} + +func addSynthEdge(g graph.Store, from, to, by, via string) { + g.AddEdge(&graph.Edge{ + From: from, To: to, Kind: graph.EdgeCalls, + Meta: map[string]any{ + "synthesized_by": by, + "provenance": "heuristic", + "via": via, + }, + }) +} + +func TestAnalyzeSynthesizers_Shape(t *testing.T) { + g := newTestGraph() + addSynthEdge(g, "a.go::A", "b.go::B", "event-channel", "event.channel") + addSynthEdge(g, "a.go::A", "c.go::C", "event-channel", "event.channel") + addSynthEdge(g, "cli.go::run", "svc.go::Handle", "grpc-stub", "grpc.stub") + + res := analyzer.AnalyzeSynthesizers(g) + if res.TotalEdges != 3 { + t.Fatalf("expected TotalEdges=3, got %d", res.TotalEdges) + } + if len(res.Synthesizers) != 2 { + t.Fatalf("expected 2 synthesizer groups, got %d", len(res.Synthesizers)) + } + // Sorted by edges desc: event-channel first. + first := res.Synthesizers[0] + if first.Name != "event-channel" { + t.Errorf("expected event-channel first, got %q", first.Name) + } + if first.Edges != 2 { + t.Errorf("expected 2 edges for event-channel, got %d", first.Edges) + } + if first.Provenance != "heuristic" { + t.Errorf("expected heuristic provenance, got %q", first.Provenance) + } +} + +func TestAnalyzeSynthesizers_NameFilter(t *testing.T) { + g := newTestGraph() + addSynthEdge(g, "a.go::A", "b.go::B", "event-channel", "event.channel") + addSynthEdge(g, "cli.go::run", "svc.go::Handle", "grpc-stub", "grpc.stub") + + res := analyzer.AnalyzeSynthesizers(g, analyzer.WithSynthesizerNameFilter("grpc-stub")) + if len(res.Synthesizers) != 1 { + t.Fatalf("expected 1 group with name filter, got %d", len(res.Synthesizers)) + } + if res.Synthesizers[0].Name != "grpc-stub" { + t.Errorf("name filter failed: %q", res.Synthesizers[0].Name) + } +} + +func TestAnalyzeSynthesizers_NoSynthEdges(t *testing.T) { + g := newTestGraph() + // plain non-synthesized edge + g.AddEdge(&graph.Edge{From: "x.go::X", To: "y.go::Y", Kind: graph.EdgeCalls}) + + res := analyzer.AnalyzeSynthesizers(g) + if res.TotalEdges != 0 { + t.Fatalf("expected 0 total_edges, got %d", res.TotalEdges) + } + if len(res.Synthesizers) != 0 { + t.Fatalf("expected no synthesizer groups, got %d", len(res.Synthesizers)) + } +} diff --git a/internal/mcp/tools_analyze_resolution_outcomes.go b/internal/mcp/tools_analyze_resolution_outcomes.go index ea1bbd81..3ad187bf 100644 --- a/internal/mcp/tools_analyze_resolution_outcomes.go +++ b/internal/mcp/tools_analyze_resolution_outcomes.go @@ -7,214 +7,58 @@ import ( "strings" "github.com/mark3labs/mcp-go/mcp" - "github.com/zzet/gortex/internal/graph" + + "github.com/zzet/gortex/internal/analyzer" ) -// Structured resolver-suppression taxonomy. When the resolver leaves a -// call / reference edge on an `unresolved::` placeholder it records no -// reason — an agent only sees that the edge is unresolved, not *why*. -// This analyzer reconstructs the why from the graph: for each unresolved -// edge it looks up the name's definition candidates and classifies the -// outcome. The reasons reflect Gortex's name-based resolver model (not a -// C++ overload set), so the taxonomy is honest about how this resolver -// actually gives up. +// Structured resolver-suppression taxonomy. The canonical constants live in +// internal/analyzer; these aliases keep existing MCP-package call sites and +// tests compiling against a single source of truth. const ( - // outcomeAmbiguousMultiMatch: two or more same-name, same-language - // definitions exist — the resolver could not pick one and punted. - outcomeAmbiguousMultiMatch = "ambiguous_multi_match" - // outcomeCandidateOutOfScope: exactly one same-language definition - // exists but the edge stayed unresolved — it was outside the caller's - // resolution scope (cross-package guard, reachability prune, or a - // receiver-type mismatch). - outcomeCandidateOutOfScope = "candidate_out_of_scope" - // outcomeCrossLanguageOnly: the only definitions of this name are in a - // different language family, so the language gate suppressed the link. - outcomeCrossLanguageOnly = "cross_language_only" - // outcomeStubOnly: the name matches only stub / external-placeholder - // nodes — no real definition is indexed. - outcomeStubOnly = "stub_only" - // outcomeNoDefinition: no definition of this name exists in the graph - // at all — a genuinely external or un-indexed target. - outcomeNoDefinition = "no_definition" + outcomeAmbiguousMultiMatch = analyzer.OutcomeAmbiguousMultiMatch + outcomeCandidateOutOfScope = analyzer.OutcomeCandidateOutOfScope + outcomeCrossLanguageOnly = analyzer.OutcomeCrossLanguageOnly + outcomeStubOnly = analyzer.OutcomeStubOnly + outcomeNoDefinition = analyzer.OutcomeNoDefinition ) // handleAnalyzeResolutionOutcomes classifies every unresolved call / // reference edge by the structured reason the resolver gave up, and // returns a per-reason rollup plus example rows. Optional `reason` // filters to one outcome; optional `limit` caps the example rows. +// +// The classification itself lives in +// internal/analyzer.AnalyzeResolutionOutcomes so the same logic backs both +// this MCP tool and the `gortex analyze` CLI. func (s *Server) handleAnalyzeResolutionOutcomes(ctx context.Context, req mcp.CallToolRequest) (*mcp.CallToolResult, error) { args := req.GetArguments() reasonFilter := strings.TrimSpace(stringArg(args, "reason")) limit := intArg(args, "limit", 50) - type row struct { - From string `json:"from"` - To string `json:"to"` - Kind string `json:"edge_kind"` - Name string `json:"name"` - Reason string `json:"reason"` - Candidates int `json:"candidates"` - } - - // Collect unresolved edges + the From IDs (for language lookup). - type pending struct { - edge *graph.Edge - name string - } - var todo []pending - fromIDs := map[string]struct{}{} - for _, kind := range []graph.EdgeKind{graph.EdgeCalls, graph.EdgeReferences} { - for e := range s.graph.EdgesByKind(kind) { - if e == nil || !graph.IsUnresolvedTarget(e.To) { - continue - } - name := graph.UnresolvedName(e.To) - if name == "" { - continue - } - // A receiver-qualified placeholder (`unresolved::*.foo`) keeps - // its method name after the dot; normalise to the bare name. - if i := strings.LastIndexByte(name, '.'); i >= 0 && i+1 < len(name) { - name = name[i+1:] - } - todo = append(todo, pending{edge: e, name: name}) - if e.From != "" { - fromIDs[e.From] = struct{}{} - } - } - } - fromList := make([]string, 0, len(fromIDs)) - for id := range fromIDs { - fromList = append(fromList, id) - } - fromNodes := s.graph.GetNodesByIDs(fromList) - - byReason := map[string]int{} - var rows []row - // Memoise classification by (name, caller-language): the same - // unresolved name is referenced from many sites — every call to one - // missing function — and classifyUnresolved is pure given that pair, - // so this collapses a FindNodesByName-per-edge into one per distinct - // (name, lang). - type classKey struct{ name, lang string } - type classVal struct { - reason string - ncand int - } - classCache := map[classKey]classVal{} - for _, p := range todo { - fromLang := "" - if n := fromNodes[p.edge.From]; n != nil { - fromLang = n.Language - } - key := classKey{name: p.name, lang: fromLang} - cv, ok := classCache[key] - if !ok { - cv.reason, cv.ncand = s.classifyUnresolved(p.name, fromLang) - classCache[key] = cv - } - reason, ncand := cv.reason, cv.ncand - byReason[reason]++ - if reasonFilter != "" && reason != reasonFilter { - continue - } - if len(rows) < limit { - rows = append(rows, row{ - From: p.edge.From, To: p.edge.To, Kind: string(p.edge.Kind), - Name: p.name, Reason: reason, Candidates: ncand, - }) - } - } + result := analyzer.AnalyzeResolutionOutcomes(s.graph, reasonFilter, limit) if isCompact(req) { var b strings.Builder - reasons := make([]string, 0, len(byReason)) - for r := range byReason { + reasons := make([]string, 0, len(result.ByReason)) + for r := range result.ByReason { reasons = append(reasons, r) } - sort.Slice(reasons, func(i, j int) bool { return byReason[reasons[i]] > byReason[reasons[j]] }) + sort.Slice(reasons, func(i, j int) bool { return result.ByReason[reasons[i]] > result.ByReason[reasons[j]] }) for _, r := range reasons { b.WriteString(r) b.WriteString(": ") - b.WriteString(strconv.Itoa(byReason[r])) + b.WriteString(strconv.Itoa(result.ByReason[r])) b.WriteByte('\n') } - if len(byReason) == 0 { + if len(result.ByReason) == 0 { b.WriteString("no unresolved edges\n") } return mcp.NewToolResultText(b.String()), nil } - total := 0 - for _, n := range byReason { - total += n - } return s.respondJSONOrTOON(ctx, req, map[string]any{ - "by_reason": byReason, - "total": total, - "rows": rows, + "by_reason": result.ByReason, + "total": result.Total, + "rows": result.Rows, }) } - -// classifyUnresolved returns the structured suppression reason for an -// unresolved name relative to the caller's language, plus the number of -// real (non-stub) definition candidates considered. -func (s *Server) classifyUnresolved(name, fromLang string) (reason string, candidates int) { - var realSameLang, realOtherLang, stubs int - for _, n := range s.graph.FindNodesByName(name) { - if n == nil { - continue - } - if graph.IsStub(n.ID) { - stubs++ - continue - } - if !nodeIsDefinitionKind(n.Kind) { - continue - } - if fromLang != "" && n.Language != "" && !sameLanguageFamily(fromLang, n.Language) { - realOtherLang++ - continue - } - realSameLang++ - } - switch { - case realSameLang >= 2: - return outcomeAmbiguousMultiMatch, realSameLang - case realSameLang == 1: - return outcomeCandidateOutOfScope, 1 - case realOtherLang >= 1: - return outcomeCrossLanguageOnly, realOtherLang - case stubs >= 1: - return outcomeStubOnly, 0 - default: - return outcomeNoDefinition, 0 - } -} - -// nodeIsDefinitionKind reports whether a node kind is a callable / type -// definition an unresolved call or reference could legitimately bind to. -func nodeIsDefinitionKind(k graph.NodeKind) bool { - switch k { - case graph.KindFunction, graph.KindMethod, graph.KindType, - graph.KindInterface, graph.KindVariable, graph.KindConstant, graph.KindField: - return true - } - return false -} - -// sameLanguageFamily folds the TS/JS pair so a cross-file TS→JS reference -// is not mis-reported as a cross-language suppression. -func sameLanguageFamily(a, b string) bool { - if a == b { - return true - } - norm := func(l string) string { - switch l { - case "javascript", "typescript", "tsx", "jsx": - return "jsts" - } - return l - } - return norm(a) == norm(b) -} diff --git a/internal/mcp/tools_analyze_synthesizers.go b/internal/mcp/tools_analyze_synthesizers.go index f1a70025..2731db9b 100644 --- a/internal/mcp/tools_analyze_synthesizers.go +++ b/internal/mcp/tools_analyze_synthesizers.go @@ -2,20 +2,12 @@ package mcp import ( "context" - "sort" "strconv" "strings" "github.com/mark3labs/mcp-go/mcp" -) -// Edge.Meta keys the framework dynamic-dispatch synthesizer engine -// stamps (mirrors resolver.MetaSynthesizedBy / MetaProvenance — kept as -// literals here so the MCP layer doesn't depend on the resolver package -// just for two string constants). -const ( - metaSynthesizedByKey = "synthesized_by" - metaProvenanceKey = "provenance" + "github.com/zzet/gortex/internal/analyzer" ) // handleAnalyzeSynthesizers rolls up the framework dynamic-dispatch @@ -27,72 +19,23 @@ const ( // Temporal proxy → activity, event-channel emit → listener, native // bridge call → implementation) separately from compiler-verified ones. // +// The aggregation itself lives in internal/analyzer.AnalyzeSynthesizers so +// the same logic backs both this MCP tool and the `gortex analyze` CLI. +// // Optional `name` filters to a single synthesizer. func (s *Server) handleAnalyzeSynthesizers(ctx context.Context, req mcp.CallToolRequest) (*mcp.CallToolResult, error) { args := req.GetArguments() nameFilter := strings.TrimSpace(stringArg(args, "name")) - type sample struct { - From string `json:"from"` - To string `json:"to"` - Kind string `json:"kind"` - Via string `json:"via,omitempty"` + var opts []analyzer.SynthesizersOption + if nameFilter != "" { + opts = append(opts, analyzer.WithSynthesizerNameFilter(nameFilter)) } - type synthRow struct { - Name string `json:"synthesizer"` - Provenance string `json:"provenance"` - Edges int `json:"edges"` - ByKind map[string]int `json:"by_kind"` - Samples []sample `json:"samples,omitempty"` - } - const maxSamples = 5 - rows := map[string]*synthRow{} - for _, e := range s.graph.AllEdges() { - if e == nil || e.Meta == nil { - continue - } - by, _ := e.Meta[metaSynthesizedByKey].(string) - if by == "" { - continue - } - if nameFilter != "" && by != nameFilter { - continue - } - row, ok := rows[by] - if !ok { - prov, _ := e.Meta[metaProvenanceKey].(string) - row = &synthRow{Name: by, Provenance: prov, ByKind: map[string]int{}} - rows[by] = row - } - row.Edges++ - row.ByKind[string(e.Kind)]++ - if len(row.Samples) < maxSamples { - via, _ := e.Meta["via"].(string) - row.Samples = append(row.Samples, sample{ - From: e.From, - To: e.To, - Kind: string(e.Kind), - Via: via, - }) - } - } - - out := make([]*synthRow, 0, len(rows)) - total := 0 - for _, r := range rows { - total += r.Edges - out = append(out, r) - } - sort.Slice(out, func(i, j int) bool { - if out[i].Edges != out[j].Edges { - return out[i].Edges > out[j].Edges - } - return out[i].Name < out[j].Name - }) + result := analyzer.AnalyzeSynthesizers(s.graph, opts...) if isCompact(req) { var b strings.Builder - for _, r := range out { + for _, r := range result.Synthesizers { b.WriteString(r.Name) b.WriteString(": ") b.WriteString(strconv.Itoa(r.Edges)) @@ -100,14 +43,14 @@ func (s *Server) handleAnalyzeSynthesizers(ctx context.Context, req mcp.CallTool b.WriteString(r.Provenance) b.WriteString(")\n") } - if len(out) == 0 { + if len(result.Synthesizers) == 0 { b.WriteString("no synthesized edges\n") } return mcp.NewToolResultText(b.String()), nil } return s.respondJSONOrTOON(ctx, req, map[string]any{ - "synthesizers": out, - "total_edges": total, + "synthesizers": result.Synthesizers, + "total_edges": result.TotalEdges, }) } From a3dc7633cf5076fd6ffa7c4aa6370b108af7afe9 Mon Sep 17 00:00:00 2001 From: avfirsov Date: Sat, 13 Jun 2026 12:01:44 +0300 Subject: [PATCH 2/2] fix(cli): check fmt.Fprintf return in analyze text output (errcheck) golangci-lint errcheck flagged the text-format print paths in cmd/gortex/analyze.go. Ignore the return explicitly, matching the codebase idiom. Co-Authored-By: Claude Opus 4.8 (1M context) --- cmd/gortex/analyze.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmd/gortex/analyze.go b/cmd/gortex/analyze.go index b3c70fa9..8e21e9bc 100644 --- a/cmd/gortex/analyze.go +++ b/cmd/gortex/analyze.go @@ -126,10 +126,10 @@ func runSynthesizers(cmd *cobra.Command, g graph.Store) error { enc.SetIndent("", " ") return enc.Encode(result) default: - fmt.Fprintf(cmd.OutOrStdout(), "synthesizers: groups=%d total_edges=%d\n", + _, _ = fmt.Fprintf(cmd.OutOrStdout(), "synthesizers: groups=%d total_edges=%d\n", len(result.Synthesizers), result.TotalEdges) for _, row := range result.Synthesizers { - fmt.Fprintf(cmd.OutOrStdout(), " %s: edges=%d\n", row.Name, row.Edges) + _, _ = fmt.Fprintf(cmd.OutOrStdout(), " %s: edges=%d\n", row.Name, row.Edges) } return nil } @@ -145,9 +145,9 @@ func runResolutionOutcomes(cmd *cobra.Command, g graph.Store) error { enc.SetIndent("", " ") return enc.Encode(result) default: - fmt.Fprintf(cmd.OutOrStdout(), "resolution_outcomes: total=%d\n", result.Total) + _, _ = fmt.Fprintf(cmd.OutOrStdout(), "resolution_outcomes: total=%d\n", result.Total) for reason, count := range result.ByReason { - fmt.Fprintf(cmd.OutOrStdout(), " %s: %d\n", reason, count) + _, _ = fmt.Fprintf(cmd.OutOrStdout(), " %s: %d\n", reason, count) } return nil }