-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcodebase-map.py
More file actions
286 lines (235 loc) · 9.16 KB
/
codebase-map.py
File metadata and controls
286 lines (235 loc) · 9.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
#!/usr/bin/env python3
"""
codebase-map.py — Generate a structured codebase map artifact
Produces a codebase-map.md summarizing the tracked files in the git
repository, grouped by directory and file type. The artifact lands in
the current Copilot session's files/ directory so build-session-index.py
picks it up automatically on the next index pass.
Usage:
python3 codebase-map.py # Write to session files/ dir
python3 codebase-map.py --stdout # Print to stdout only
python3 codebase-map.py --output PATH # Write to an explicit path
python3 codebase-map.py --repo PATH # Use a different repo root
python3 codebase-map.py --no-write # Dry-run: show target path only
"""
import argparse
import os
import subprocess
import sys
from collections import defaultdict
from pathlib import Path
# Fix Windows console encoding
if os.name == "nt":
for _s in (sys.stdout, sys.stderr):
if hasattr(_s, "reconfigure"):
_s.reconfigure(encoding="utf-8", errors="replace")
SESSION_STATE = Path.home() / ".copilot" / "session-state"
# ─── Discovery helpers ────────────────────────────────────────────────────────
def find_git_root(start: Path | None = None) -> Path | None:
"""Walk up from *start* (defaults to cwd) to locate the git repository root."""
current = (start or Path.cwd()).resolve()
for candidate in [current, *current.parents]:
if (candidate / ".git").exists():
return candidate
return None
def get_session_files_dir() -> Path | None:
"""Return the files/ directory of the most-recently modified Copilot session."""
if not SESSION_STATE.exists():
return None
sessions = sorted(
(d for d in SESSION_STATE.iterdir() if d.is_dir()),
key=lambda d: d.stat().st_mtime,
reverse=True,
)
if not sessions:
return None
files_dir = sessions[0] / "files"
files_dir.mkdir(parents=True, exist_ok=True)
return files_dir
def get_git_last_commit_date(repo_root: Path) -> str:
"""Return ISO 8601 date of the last commit; empty string on failure.
Using the commit date rather than wall-clock time keeps the artifact
deterministic: identical repo state → identical output.
"""
try:
r = subprocess.run(
["git", "log", "-1", "--format=%cI"],
capture_output=True,
text=True,
timeout=5,
cwd=str(repo_root),
)
if r.returncode == 0:
return r.stdout.strip()
except Exception:
pass
return ""
# ─── File enumeration ─────────────────────────────────────────────────────────
def ls_files(repo_root: Path, timeout: int = 10) -> list[str]:
"""Return all git-tracked files relative to *repo_root*.
Falls back to an empty list on any error so callers don't need to handle
subprocess exceptions.
"""
try:
result = subprocess.run(
["git", "ls-files"],
capture_output=True,
text=True,
timeout=timeout,
cwd=str(repo_root),
)
if result.returncode != 0:
return []
return [line.strip() for line in result.stdout.splitlines() if line.strip()]
except Exception:
return []
# ─── Map generation ───────────────────────────────────────────────────────────
def group_files(files: list[str]) -> dict[str, list[str]]:
"""Group *files* by their top-level directory (root files → key '.')."""
groups: dict[str, list[str]] = defaultdict(list)
for f in sorted(files):
parts = Path(f).parts
top = parts[0] if len(parts) > 1 else "."
groups[top].append(f)
return dict(sorted(groups.items()))
def ext_summary(files: list[str], cap: int = 6) -> str:
"""Return a compact extension tally string, e.g. '.py×12, .md×3'."""
counts: dict[str, int] = defaultdict(int)
for f in files:
ext = Path(f).suffix or "(no ext)"
counts[ext] += 1
parts = [f"{ext}×{n}" if n > 1 else ext for ext, n in sorted(counts.items())]
tail = f", +{len(parts) - cap} more" if len(parts) > cap else ""
return ", ".join(parts[:cap]) + tail
def generate_map(repo_root: Path, files: list[str]) -> str:
"""Return the full text of a codebase-map.md for *repo_root*."""
groups = group_files(files)
total = len(files)
commit_date = get_git_last_commit_date(repo_root)
lines: list[str] = [
f"# Codebase Map — {repo_root.name}",
"",
f"Last commit: {commit_date} " if commit_date else "",
f"Repository: `{repo_root}` ",
f"Total tracked files: {total}",
"",
"## File Tree by Directory",
"",
]
# Remove empty lines caused by optional fields
lines = [l for l in lines if l != ""]
lines.append("")
for group, gfiles in groups.items():
label = f"`{group}/`" if group != "." else "`./` (root)"
lines.append(f"### {label} ({len(gfiles)} files — {ext_summary(gfiles)})")
lines.append("")
for f in gfiles:
lines.append(f"- `{f}`")
lines.append("")
lines += [
"## Summary",
"",
"| Directory | Files |",
"|-----------|-------|",
]
for group, gfiles in groups.items():
label = f"`{group}/`" if group != "." else "`./`"
lines.append(f"| {label} | {len(gfiles)} |")
lines += [
"",
"*Auto-generated by codebase-map.py — do not edit manually.*",
]
return "\n".join(lines) + "\n"
# ─── CLI ──────────────────────────────────────────────────────────────────────
def resolve_output_path(args_output: str | None) -> Path | None:
"""Determine where to write the artifact.
Returns None when no explicit path is given and no active Copilot session
can be found. Callers must handle None rather than silently polluting the
tools directory.
"""
if args_output:
return Path(args_output).resolve()
files_dir = get_session_files_dir()
if files_dir:
return files_dir / "codebase-map.md"
return None
def main() -> int:
parser = argparse.ArgumentParser(description="Generate codebase-map.md from git-tracked files.")
parser.add_argument(
"--stdout",
action="store_true",
help="Print map to stdout instead of writing a file.",
)
parser.add_argument(
"--output",
metavar="PATH",
help="Write to an explicit file path.",
)
parser.add_argument(
"--repo",
metavar="PATH",
help="Repository root (defaults to git root of cwd).",
)
parser.add_argument(
"--no-write",
action="store_true",
help="Dry-run: show the target path without writing.",
)
args = parser.parse_args()
repo_root = Path(args.repo).resolve() if args.repo else find_git_root()
if repo_root is None:
print(
"Error: not in a git repository. Use --repo to specify root.",
file=sys.stderr,
)
return 1
# When --repo is explicit, verify it's actually a git repo
if args.repo and not (repo_root / ".git").exists():
# Try git rev-parse as a fallback (bare repos, worktrees)
try:
r = subprocess.run(
["git", "rev-parse", "--git-dir"],
capture_output=True,
cwd=str(repo_root),
timeout=5,
)
if r.returncode != 0:
print(
f"Error: {repo_root} is not a git repository.",
file=sys.stderr,
)
return 1
except Exception:
print(
f"Error: {repo_root} is not a git repository.",
file=sys.stderr,
)
return 1
files = ls_files(repo_root)
if not files:
print(
f"Warning: no tracked files found in {repo_root} — map may be empty.",
file=sys.stderr,
)
content = generate_map(repo_root, files)
if args.stdout:
sys.stdout.write(content)
return 0
out_path = resolve_output_path(args.output)
if out_path is None:
print(
"Error: no active Copilot session found and no --output given. Use --output PATH or --stdout.",
file=sys.stderr,
)
return 1
if args.no_write:
print(f"Would write to: {out_path}")
return 0
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text(content, encoding="utf-8")
n_dirs = len(group_files(files))
print(f"✅ codebase-map.md → {out_path}")
print(f" {len(files)} files across {n_dirs} top-level directories")
return 0
if __name__ == "__main__":
sys.exit(main())