From 5a50c5c84f77509b6815b3e141ec679699b0a148 Mon Sep 17 00:00:00 2001 From: Evgenii Danilin Date: Tue, 17 Mar 2026 21:14:56 +0700 Subject: [PATCH] enriched_overview_analysis --- internal/config/config.go | 15 +++++ internal/config/config_test.go | 11 +++ internal/indexer/indexer.go | 77 +++++++++++++++++++++ internal/prompts/prompts.go | 34 ++++++++++ internal/walker/walker.go | 118 +++++++++++++++++++++++++++++---- internal/walker/walker_test.go | 38 +++++++++-- 6 files changed, 275 insertions(+), 18 deletions(-) diff --git a/internal/config/config.go b/internal/config/config.go index 94a22e6..18566e5 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -31,6 +31,7 @@ type IndexerConfig struct { MaxFileSize int64 `yaml:"max_file_size"` IgnorePatterns []string `yaml:"ignore_patterns"` Workers int `yaml:"workers"` + TreeFileDepth int `yaml:"tree_file_depth"` } type ProviderConfig struct { @@ -51,9 +52,11 @@ type PromptsConfig struct { ProjectStructureAnalysis string `yaml:"project_structure_analysis"` SourceCodeAnalysis string `yaml:"source_code_analysis"` DirectoryAnalysis string `yaml:"directory_analysis"` + EnrichedOverviewAnalysis string `yaml:"enriched_overview_analysis"` } const DefaultMaxFileSize = 1048576 // 1 MB +const DefaultTreeFileDepth = 3 var envVarRegexp = regexp.MustCompile(`\$\{(\w+)\}`) @@ -188,6 +191,9 @@ func merge(home, project *Config) *Config { if project.Indexer.Workers != 0 { cfg.Indexer.Workers = project.Indexer.Workers } + if project.Indexer.TreeFileDepth != 0 { + cfg.Indexer.TreeFileDepth = project.Indexer.TreeFileDepth + } // IgnorePatterns: append project patterns to home patterns if len(project.Indexer.IgnorePatterns) > 0 { @@ -204,6 +210,9 @@ func merge(home, project *Config) *Config { if project.Prompts.DirectoryAnalysis != "" { cfg.Prompts.DirectoryAnalysis = project.Prompts.DirectoryAnalysis } + if project.Prompts.EnrichedOverviewAnalysis != "" { + cfg.Prompts.EnrichedOverviewAnalysis = project.Prompts.EnrichedOverviewAnalysis + } return &cfg } @@ -226,6 +235,9 @@ func setDefaults(cfg *Config) { if cfg.Indexer.Workers <= 0 { cfg.Indexer.Workers = 2 } + if cfg.Indexer.TreeFileDepth <= 0 { + cfg.Indexer.TreeFileDepth = DefaultTreeFileDepth + } if cfg.Prompts.ProjectStructureAnalysis == "" { cfg.Prompts.ProjectStructureAnalysis = prompts.DefaultProjectStructureAnalysis } @@ -235,6 +247,9 @@ func setDefaults(cfg *Config) { if cfg.Prompts.DirectoryAnalysis == "" { cfg.Prompts.DirectoryAnalysis = prompts.DefaultDirectoryAnalysis } + if cfg.Prompts.EnrichedOverviewAnalysis == "" { + cfg.Prompts.EnrichedOverviewAnalysis = prompts.DefaultEnrichedOverviewAnalysis + } } // pathToName converts an absolute path to a safe identifier. diff --git a/internal/config/config_test.go b/internal/config/config_test.go index adb32c8..b6891ea 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -541,6 +541,9 @@ func TestLoad_PromptsDefaultsWhenEmpty(t *testing.T) { if cfg.Prompts.DirectoryAnalysis != prompts.DefaultDirectoryAnalysis { t.Error("expected default DirectoryAnalysis prompt") } + if cfg.Prompts.EnrichedOverviewAnalysis != prompts.DefaultEnrichedOverviewAnalysis { + t.Error("expected default EnrichedOverviewAnalysis prompt") + } } func TestLoad_PromptsFromConfig(t *testing.T) { @@ -549,6 +552,7 @@ prompts: project_structure_analysis: "Custom structure: ${CONTENT}" source_code_analysis: "Custom code: ${CONTENT}" directory_analysis: "Custom dir: ${DIR_PATH}" + enriched_overview_analysis: "Custom enriched: ${FILE_TREE}" ` path := writeTestConfig(t, yml) @@ -565,6 +569,9 @@ prompts: if cfg.Prompts.DirectoryAnalysis != "Custom dir: ${DIR_PATH}" { t.Errorf("directory_analysis = %q, want custom", cfg.Prompts.DirectoryAnalysis) } + if cfg.Prompts.EnrichedOverviewAnalysis != "Custom enriched: ${FILE_TREE}" { + t.Errorf("enriched_overview_analysis = %q, want custom", cfg.Prompts.EnrichedOverviewAnalysis) + } } func TestLoad_VectorSizeDefault(t *testing.T) { @@ -645,6 +652,7 @@ prompts: project_structure_analysis: "home-structure" source_code_analysis: "home-code" directory_analysis: "home-dir" + enriched_overview_analysis: "home-enriched" ` homePath := writeTestConfig(t, homeYml) @@ -667,4 +675,7 @@ prompts: if cfg.Prompts.DirectoryAnalysis != "home-dir" { t.Errorf("directory_analysis = %q, want %q", cfg.Prompts.DirectoryAnalysis, "home-dir") } + if cfg.Prompts.EnrichedOverviewAnalysis != "home-enriched" { + t.Errorf("enriched_overview_analysis = %q, want %q", cfg.Prompts.EnrichedOverviewAnalysis, "home-enriched") + } } diff --git a/internal/indexer/indexer.go b/internal/indexer/indexer.go index 16b792e..4952c8f 100644 --- a/internal/indexer/indexer.go +++ b/internal/indexer/indexer.go @@ -324,6 +324,20 @@ func (t *dirTracker) results() (indexed, skipped, errors int) { return int(t.indexed.Load()), int(t.skipped.Load()), int(t.errors.Load()) } +// topLevelSummaries returns summaries of top-level directories (direct children of root). +func (t *dirTracker) topLevelSummaries() map[string]string { + t.mu.Lock() + defer t.mu.Unlock() + + result := make(map[string]string) + for dir, summary := range t.dirSummary { + if filepath.Dir(dir) == "." { + result[dir] = summary + } + } + return result +} + // Run executes the full indexing cycle for the project. func Run(configPath string, force bool, logger *slog.Logger) error { cfg, err := config.Load(configPath) @@ -402,6 +416,7 @@ func Run(configPath string, force bool, logger *slog.Logger) error { RootPath: rootPath, MaxFileSize: cfg.Indexer.MaxFileSize, IgnorePatterns: cfg.Indexer.IgnorePatterns, + TreeFileDepth: cfg.Indexer.TreeFileDepth, }) if err != nil { return fmt.Errorf("walking project: %w", err) @@ -522,6 +537,10 @@ func Run(configPath string, force bool, logger *slog.Logger) error { logger.Info(fmt.Sprintf("Found %d items to analyze (%d files, %d dirs)", totalItems, len(walkResult.Files), totalDirs)) + // Collect root-level file summaries for Phase 3 (enriched overview) + var rootFilesMu sync.Mutex + var rootFileSummaries []*fileInfo + for _, relPath := range walkResult.Files { absPath := filepath.Join(rootPath, relPath) @@ -542,6 +561,14 @@ func Run(configPath string, force bool, logger *slog.Logger) error { logger.Debug("file skipped (unchanged)", "file", relPath, "hash", hash) skippedCount++ tracker.fileCompleted(relPath, existing.Summary, existing.FileHash) + if filepath.Dir(relPath) == "." { + rootFilesMu.Lock() + rootFileSummaries = append(rootFileSummaries, &fileInfo{ + filePath: relPath, + summary: existing.Summary, + }) + rootFilesMu.Unlock() + } continue } @@ -628,6 +655,14 @@ func Run(configPath string, force bool, logger *slog.Logger) error { ) tracker.fileCompleted(relPath, analysis.Summary, hash) + if filepath.Dir(relPath) == "." { + rootFilesMu.Lock() + rootFileSummaries = append(rootFileSummaries, &fileInfo{ + filePath: relPath, + summary: analysis.Summary, + }) + rootFilesMu.Unlock() + } indexedCount.Add(1) }(relPath, content, hash) } @@ -635,6 +670,35 @@ func Run(configPath string, force bool, logger *slog.Logger) error { dirIndexed, dirSkipped, dirErrors := tracker.results() + // --- Stage 3: Enriched project overview --- + logger.Info("\n--- Stage 3: Enriched project overview ---") + + topLevelDirs := tracker.topLevelSummaries() + if len(topLevelDirs) > 0 || len(rootFileSummaries) > 0 { + topDirText := buildTopLevelDirsSummariesText(topLevelDirs) + rootFilesText := buildFilesSummariesText(rootFileSummaries) + + enrichedPrompt := prompts.Render(cfg.Prompts.EnrichedOverviewAnalysis, map[string]string{ + "FILE_TREE": walkResult.Tree, + "TOP_LEVEL_DIRS_SUMMARIES": topDirText, + "ROOT_FILES_SUMMARIES": rootFilesText, + }) + + logger.Info("Generating enriched project overview...") + enrichedOverview, err := llm.GenerateContent(enrichedPrompt) + if err != nil { + logger.Error(fmt.Sprintf("Error generating enriched overview: %v", err)) + } else { + if err := os.WriteFile(overviewPath, []byte(enrichedOverview), 0o644); err != nil { + logger.Error(fmt.Sprintf("Error saving enriched overview: %v", err)) + } else { + logger.Info(fmt.Sprintf("Enriched project overview saved to %s", overviewPath)) + } + } + } else { + logger.Info("Skipping enriched overview (no directory/file summaries available)") + } + // --- Summary --- logger.Info("\n=== Indexing complete ===") logger.Info(fmt.Sprintf("Total files: %d", len(walkResult.Files))) @@ -750,6 +814,19 @@ func buildFilesSummariesText(files []*fileInfo) string { return sb.String() } +// buildTopLevelDirsSummariesText formats top-level directory summaries for the enriched overview prompt. +func buildTopLevelDirsSummariesText(dirs map[string]string) string { + if len(dirs) == 0 { + return "(no directories)" + } + var lines []string + for dir, summary := range dirs { + lines = append(lines, "- "+dir+"/: "+summary) + } + sort.Strings(lines) + return strings.Join(lines, "\n") +} + // buildSubdirsSummariesText formats subdirectory summaries for the LLM prompt. func buildSubdirsSummariesText(childDirs []string, dirSummary map[string]string) string { if len(childDirs) == 0 { diff --git a/internal/prompts/prompts.go b/internal/prompts/prompts.go index 26ec152..e516011 100644 --- a/internal/prompts/prompts.go +++ b/internal/prompts/prompts.go @@ -73,6 +73,40 @@ Based on the files and subdirectories above, provide: Respond in JSON format.` +// DefaultEnrichedOverviewAnalysis is the built-in prompt for enriched project overview generation +// based on deep analysis of all source files and directories. +const DefaultEnrichedOverviewAnalysis = `You are a senior software architect. Generate a comprehensive project overview based on deep analysis of the codebase. + +## File Tree + +${FILE_TREE} + +## Top-Level Directory Summaries +[[[ +${TOP_LEVEL_DIRS_SUMMARIES} +]]] + +## Root-Level File Summaries +[[[ +${ROOT_FILES_SUMMARIES} +]]] + +## Instructions + +You have access to the project file tree AND semantic summaries of all top-level directories and root-level files, produced by deep analysis of every source file. + +Based on this information, produce a comprehensive project overview: + +1. **Framework / Platform** — What programming language(s), framework(s), or platform is this project built on? +2. **Architecture** — What architectural style does the project follow? Include specific patterns observed in the codebase. +3. **Modules / Components** — List the main modules, packages, or top-level components with detailed descriptions of their purpose and responsibilities. +4. **Domains** — Identify the business domains or bounded contexts present in the project. +5. **Patterns** — Note any recognizable design patterns or conventions. + +## Output Format + +Write a structured overview in Markdown. Be more detailed than a simple file-tree analysis — you have actual semantic knowledge about what each component does. Focus on accuracy and completeness. No more than 500 words.` + // Render replaces all occurrences of ${KEY} variables in the template // with the provided values. Unknown variables are left as-is. func Render(template string, vars map[string]string) string { diff --git a/internal/walker/walker.go b/internal/walker/walker.go index 15996cb..736f53e 100644 --- a/internal/walker/walker.go +++ b/internal/walker/walker.go @@ -16,6 +16,7 @@ type Options struct { RootPath string MaxFileSize int64 IgnorePatterns []string + TreeFileDepth int // max depth at which files are shown in the tree (0 = root only) } // Result contains the output of walking the file system. @@ -89,6 +90,7 @@ func Walk(opts Options) (*Result, error) { } var files []string + fileSizes := make(map[string]int64) err = filepath.WalkDir(rootPath, func(path string, d os.DirEntry, err error) error { if err != nil { return nil // skip entries with errors @@ -156,6 +158,7 @@ func Walk(opts Options) (*Result, error) { } files = append(files, relPath) + fileSizes[relPath] = fi.Size() return nil }) if err != nil { @@ -166,7 +169,7 @@ func Walk(opts Options) (*Result, error) { return &Result{ Files: files, - Tree: buildTree(files), + Tree: buildTree(files, fileSizes, opts.TreeFileDepth), }, nil } @@ -264,13 +267,18 @@ func isBinaryContent(path string) bool { // treeNode represents a node in the file tree structure. type treeNode struct { - name string - children map[string]*treeNode - order []string // maintains sorted insertion order + name string + children map[string]*treeNode + order []string // maintains sorted insertion order + isFile bool // true for file leaf nodes + totalSize int64 // recursive total size in bytes + extCounts map[string]int // recursive file counts by extension } // buildTree creates a text tree representation from a sorted list of file paths. -func buildTree(files []string) string { +// Directories show metadata (file counts by extension, total size). +// Files are shown up to treeFileDepth levels deep; deeper files are omitted. +func buildTree(files []string, fileSizes map[string]int64, treeFileDepth int) string { if len(files) == 0 { return ".\n" } @@ -280,7 +288,7 @@ func buildTree(files []string) string { for _, f := range files { parts := strings.Split(f, string(filepath.Separator)) current := root - for _, part := range parts { + for i, part := range parts { if _, exists := current.children[part]; !exists { child := &treeNode{ name: part, @@ -290,19 +298,59 @@ func buildTree(files []string) string { current.order = append(current.order, part) } current = current.children[part] + + // Mark leaf node (file) with size and extension + if i == len(parts)-1 { + current.isFile = true + current.totalSize = fileSizes[f] + ext := strings.ToLower(filepath.Ext(part)) + if ext == "" { + ext = part // extensionless files: Makefile, Dockerfile + } + current.extCounts = map[string]int{ext: 1} + } } } + computeTreeMeta(root) + var sb strings.Builder sb.WriteString(".\n") - writeTree(&sb, root, "") + writeTree(&sb, root, "", 0, treeFileDepth) return sb.String() } -func writeTree(sb *strings.Builder, n *treeNode, prefix string) { - for i, name := range n.order { +// computeTreeMeta aggregates totalSize and extCounts from leaves to root (post-order). +func computeTreeMeta(n *treeNode) { + for _, name := range n.order { + child := n.children[name] + computeTreeMeta(child) + n.totalSize += child.totalSize + for ext, count := range child.extCounts { + if n.extCounts == nil { + n.extCounts = make(map[string]int) + } + n.extCounts[ext] += count + } + } +} + +// writeTree recursively writes the tree. Files are shown up to maxFileDepth +// levels deep; at deeper levels, only directories are shown. +func writeTree(sb *strings.Builder, n *treeNode, prefix string, depth int, maxFileDepth int) { + // Filter visible children: skip files deeper than maxFileDepth + var visible []string + for _, name := range n.order { + child := n.children[name] + if depth > maxFileDepth && child.isFile { + continue + } + visible = append(visible, name) + } + + for i, name := range visible { child := n.children[name] - isLast := i == len(n.order)-1 + isLast := i == len(visible)-1 connector := "├── " childPrefix := "│ " @@ -311,9 +359,53 @@ func writeTree(sb *strings.Builder, n *treeNode, prefix string) { childPrefix = " " } - sb.WriteString(prefix + connector + name + "\n") - if len(child.children) > 0 { - writeTree(sb, child, prefix+childPrefix) + if child.isFile { + sb.WriteString(prefix + connector + name + "\n") + } else { + sb.WriteString(prefix + connector + name + " " + formatDirMeta(child) + "\n") + writeTree(sb, child, prefix+childPrefix, depth+1, maxFileDepth) } } } + +// formatDirMeta formats directory metadata as "(3 .go, 2 .yaml, 1.2 KB)". +func formatDirMeta(n *treeNode) string { + if len(n.extCounts) == 0 { + return "(empty)" + } + + // Sort extensions by count descending, then alphabetically + type extEntry struct { + ext string + count int + } + entries := make([]extEntry, 0, len(n.extCounts)) + for ext, count := range n.extCounts { + entries = append(entries, extEntry{ext, count}) + } + sort.Slice(entries, func(i, j int) bool { + if entries[i].count != entries[j].count { + return entries[i].count > entries[j].count + } + return entries[i].ext < entries[j].ext + }) + + var parts []string + for _, e := range entries { + parts = append(parts, fmt.Sprintf("%d %s", e.count, e.ext)) + } + + return "(" + strings.Join(parts, ", ") + ", " + formatSize(n.totalSize) + ")" +} + +// formatSize formats bytes into a human-readable string. +func formatSize(bytes int64) string { + switch { + case bytes >= 1<<20: + return fmt.Sprintf("%.1f MB", float64(bytes)/float64(1<<20)) + case bytes >= 1<<10: + return fmt.Sprintf("%.1f KB", float64(bytes)/float64(1<<10)) + default: + return fmt.Sprintf("%d B", bytes) + } +} diff --git a/internal/walker/walker_test.go b/internal/walker/walker_test.go index b263fc9..d177f58 100644 --- a/internal/walker/walker_test.go +++ b/internal/walker/walker_test.go @@ -247,14 +247,42 @@ func TestWalk_TreeFormat(t *testing.T) { expectedLines := []string{ ".", - "├── cmd", - "│ ├── indexer", + "├── cmd (2 .go, 24 B)", + "│ ├── indexer (1 .go, 12 B)", + "│ └── mcp (1 .go, 12 B)", + "├── go.mod", + "└── internal (1 .go, 14 B)", + " └── config (1 .go, 14 B)", + } + expected := strings.Join(expectedLines, "\n") + "\n" + + if result.Tree != expected { + t.Errorf("tree mismatch:\ngot:\n%s\nwant:\n%s", result.Tree, expected) + } +} + +func TestWalk_TreeFileDepth(t *testing.T) { + root := t.TempDir() + createFile(t, root, "cmd/indexer/main.go", "package main") + createFile(t, root, "cmd/mcp/main.go", "package main") + createFile(t, root, "go.mod", "module test") + createFile(t, root, "internal/config/config.go", "package config") + + result, err := Walk(Options{RootPath: root, TreeFileDepth: 3}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + expectedLines := []string{ + ".", + "├── cmd (2 .go, 24 B)", + "│ ├── indexer (1 .go, 12 B)", "│ │ └── main.go", - "│ └── mcp", + "│ └── mcp (1 .go, 12 B)", "│ └── main.go", "├── go.mod", - "└── internal", - " └── config", + "└── internal (1 .go, 14 B)", + " └── config (1 .go, 14 B)", " └── config.go", } expected := strings.Join(expectedLines, "\n") + "\n"