diff --git a/README.md b/README.md index 80c2716..0fa9fb7 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ Diffguard turns "is this code good?" into a set of numbers an agent can iterate - Cognitive complexity ≤ 10 per function - Function bodies ≤ 50 lines, files ≤ 500 lines - No new dependency cycles or Stable Dependencies Principle violations +- No unused (dead) symbols introduced by the diff - Tier‑1 mutation kill rate ≥ 90% (tests actually catch logic changes) Run diffguard, read the violations, change the code, run again — loop until it exits 0. The metrics become the spec. The agent has something objective to optimize for rather than guessing at taste, and you get a reproducible definition of "good enough" instead of having to re‑judge every diff by eye. Also useful for traditional human-written CI, but the real lift is on AI-generated PRs where line-by-line review doesn't scale. @@ -141,6 +142,21 @@ Builds a directed graph of internal package imports from changed packages and re Cross-references git history with complexity scores. Functions that are both complex AND frequently modified are the highest-risk targets for bugs. Reports the top 10 by `commits * complexity`. +### Dead Code + +Flags non-exported functions, variables, and constants that are declared in the diff but have no references in their analyzable scope. Useful for catching code that was added "just in case" but never wired up, or that became orphaned when its sole caller was removed in the same diff. + +Scope is deliberately conservative to avoid false positives: + +| Language | Scope scanned for references | Symbols considered | +|------------|------------------------------|-------------------------------------------------------| +| Go | All `*.go` in the package directory (including `_test.go`) | unexported free functions, package-level `var`/`const` | +| TypeScript | The single source file | non-`export`ed functions, classes, top-level `const`/`let`/`var` | + +Skipped on purpose: exported / public symbols (may be consumed externally), methods (may satisfy interfaces), types (uses through embedding/casting are noisy), and well-known runtime/test entry points (`init`, `main` in `package main`, `TestXxx`, `BenchmarkXxx`, `ExampleXxx`, `FuzzXxx`). + +Severity: **WARN**. Dead-code detection is heuristic — symbols can be referenced via reflection, framework registration, or codegen the detector can't see — so findings nudge the reviewer to verify rather than block the build outright. Use `--skip-deadcode` to disable the check entirely. + ### Mutation Testing Applies mutations to changed code and runs tests to verify they catch the change: @@ -206,6 +222,7 @@ Flags: --function-size-threshold int Maximum lines per function (default 50) --file-size-threshold int Maximum lines per file (default 500) --skip-mutation Skip mutation testing + --skip-deadcode Skip dead code (unused symbol) detection --skip-generated Skip files marked as generated (for example `Code generated ... DO NOT EDIT`) (default true) --mutation-sample-rate float Percentage of mutants to test, 0-100 (default 100) --test-timeout duration Per-mutant go test timeout (default 30s) @@ -337,6 +354,11 @@ Violations: 12 functions analyzed | Top churn*complexity score: 440 [WARN] Warnings: pkg/handler/routes.go:45:HandleRequest commits=20 complexity=22 score=440 [WARN] + +=== Dead Code === +1 unused symbol detected in changed code [WARN] +Warnings: + pkg/auth/token.go:208:legacyDecode unused func "legacyDecode" [WARN] ``` ## License diff --git a/cmd/diffguard/main.go b/cmd/diffguard/main.go index 7526957..e49e353 100644 --- a/cmd/diffguard/main.go +++ b/cmd/diffguard/main.go @@ -12,6 +12,7 @@ import ( "github.com/0xPolygon/diffguard/internal/churn" "github.com/0xPolygon/diffguard/internal/complexity" + "github.com/0xPolygon/diffguard/internal/deadcode" "github.com/0xPolygon/diffguard/internal/deps" "github.com/0xPolygon/diffguard/internal/diff" "github.com/0xPolygon/diffguard/internal/lang" @@ -28,6 +29,7 @@ func main() { flag.IntVar(&cfg.FunctionSizeThreshold, "function-size-threshold", 50, "Maximum lines per function") flag.IntVar(&cfg.FileSizeThreshold, "file-size-threshold", 500, "Maximum lines per file") flag.BoolVar(&cfg.SkipMutation, "skip-mutation", false, "Skip mutation testing") + flag.BoolVar(&cfg.SkipDeadCode, "skip-deadcode", false, "Skip dead code (unused symbol) detection") flag.BoolVar(&cfg.SkipGenerated, "skip-generated", true, "Skip files marked as generated (for example `Code generated ... DO NOT EDIT`)") flag.Float64Var(&cfg.MutationSampleRate, "mutation-sample-rate", 100, "Percentage of mutants to test, 0-100") flag.DurationVar(&cfg.TestTimeout, "test-timeout", 30*time.Second, "Per-mutant test binary timeout (e.g. 60s, 2m)") @@ -70,6 +72,7 @@ type Config struct { FunctionSizeThreshold int FileSizeThreshold int SkipMutation bool + SkipDeadCode bool SkipGenerated bool MutationSampleRate float64 TestTimeout time.Duration @@ -292,6 +295,14 @@ func runAnalyses(repoPath string, d *diff.Result, cfg Config, l lang.Language) ( } sections = append(sections, churnSection) + if !cfg.SkipDeadCode { + deadcodeSection, err := deadcode.Analyze(repoPath, d, l.DeadCodeDetector()) + if err != nil { + return nil, fmt.Errorf("dead code analysis: %w", err) + } + sections = append(sections, deadcodeSection) + } + if !cfg.SkipMutation { mutationSection, err := mutation.Analyze(repoPath, d, l, mutation.Options{ SampleRate: cfg.MutationSampleRate, diff --git a/cmd/diffguard/main_test.go b/cmd/diffguard/main_test.go index f559f56..766506d 100644 --- a/cmd/diffguard/main_test.go +++ b/cmd/diffguard/main_test.go @@ -270,6 +270,7 @@ func (s stubLanguage) MutantGenerator() lang.MutantGenerator { return func (s stubLanguage) MutantApplier() lang.MutantApplier { return nil } func (s stubLanguage) AnnotationScanner() lang.AnnotationScanner { return nil } func (s stubLanguage) TestRunner() lang.TestRunner { return nil } +func (s stubLanguage) DeadCodeDetector() lang.DeadCodeDetector { return nil } // initTempGoRepo creates a minimal git repo with a single committed Go // file on main, plus an additional file on HEAD so the diff has content. diff --git a/internal/deadcode/deadcode.go b/internal/deadcode/deadcode.go new file mode 100644 index 0000000..3d97d3c --- /dev/null +++ b/internal/deadcode/deadcode.go @@ -0,0 +1,108 @@ +// Package deadcode runs a language's DeadCodeDetector across a diff's +// changed files and produces the "Dead Code" report section. +// +// The per-language detection logic (AST/CST walk, reference counting) lives +// in the language back-end (for Go: internal/lang/goanalyzer/deadcode.go; +// for TypeScript: internal/lang/tsanalyzer/deadcode.go). This package is a +// thin orchestrator: it iterates over changed files, calls the detector on +// each, and formats the resulting unused-symbol list. +// +// Dead-code findings are reported as WARN (not FAIL) because the detector +// is conservative but not omniscient — symbols can be referenced via +// reflection, framework registration, or codegen the detector can't see. +// Treating them as warnings nudges the human to verify rather than blocking +// the build outright. +package deadcode + +import ( + "fmt" + "sort" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" + "github.com/0xPolygon/diffguard/internal/report" +) + +// Analyze runs detector across every file in d and returns the "Dead Code" +// report section. detector == nil produces a PASS section with a summary +// noting that the language has no detector wired up — useful as a hedge +// when adding the feature to languages incrementally. +func Analyze(repoPath string, d *diff.Result, detector lang.DeadCodeDetector) (report.Section, error) { + if detector == nil { + return report.Section{ + Name: "Dead Code", + Summary: "No dead code detector available for this language", + Severity: report.SeverityPass, + }, nil + } + + var results []lang.UnusedSymbol + for _, fc := range d.Files { + found, err := detector.FindDeadCode(repoPath, fc) + if err != nil { + return report.Section{}, fmt.Errorf("dead code analysis %s: %w", fc.Path, err) + } + results = append(results, found...) + } + return buildSection(results), nil +} + +// buildSection turns a list of unused symbols into the "Dead Code" section. +// An empty list yields a PASS; any unused symbols flip the section to WARN. +// Findings are sorted by file path then line so the output is deterministic. +func buildSection(results []lang.UnusedSymbol) report.Section { + if len(results) == 0 { + return report.Section{ + Name: "Dead Code", + Summary: "No unused symbols detected in changed code", + Severity: report.SeverityPass, + } + } + + findings := make([]report.Finding, 0, len(results)) + for _, r := range results { + findings = append(findings, report.Finding{ + File: r.File, + Line: r.Line, + Function: r.Name, + Message: fmt.Sprintf("unused %s %q", r.Kind, r.Name), + Value: 1, + Severity: report.SeverityWarn, + }) + } + sort.SliceStable(findings, func(i, j int) bool { + if findings[i].File != findings[j].File { + return findings[i].File < findings[j].File + } + return findings[i].Line < findings[j].Line + }) + + summary := fmt.Sprintf("%d unused %s detected in changed code", + len(results), pluralize("symbol", len(results))) + + return report.Section{ + Name: "Dead Code", + Summary: summary, + Severity: report.SeverityWarn, + Findings: findings, + Stats: map[string]any{ + "unused_symbols": len(results), + "by_kind": countByKind(results), + }, + } +} + +func countByKind(results []lang.UnusedSymbol) map[string]int { + out := map[string]int{} + for _, r := range results { + out[r.Kind]++ + } + return out +} + +func pluralize(word string, n int) string { + if n == 1 { + return word + } + return word + "s" +} diff --git a/internal/deadcode/deadcode_test.go b/internal/deadcode/deadcode_test.go new file mode 100644 index 0000000..cc09b26 --- /dev/null +++ b/internal/deadcode/deadcode_test.go @@ -0,0 +1,144 @@ +package deadcode + +import ( + "errors" + "testing" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" + "github.com/0xPolygon/diffguard/internal/report" +) + +// stubDetector is a minimal in-memory DeadCodeDetector that replays a fixed +// list of unused symbols (per file path) without touching the filesystem. +// Lets the orchestrator tests run without spinning up a real Go toolchain. +type stubDetector struct { + byPath map[string][]lang.UnusedSymbol + err error +} + +func (s *stubDetector) FindDeadCode(_ string, fc diff.FileChange) ([]lang.UnusedSymbol, error) { + if s.err != nil { + return nil, s.err + } + return s.byPath[fc.Path], nil +} + +func TestAnalyze_NilDetectorPasses(t *testing.T) { + d := &diff.Result{Files: []diff.FileChange{{Path: "a.go"}}} + s, err := Analyze("/repo", d, nil) + if err != nil { + t.Fatalf("Analyze: %v", err) + } + if s.Severity != report.SeverityPass { + t.Errorf("severity = %v, want PASS", s.Severity) + } + if s.Name != "Dead Code" { + t.Errorf("name = %q, want Dead Code", s.Name) + } +} + +func TestAnalyze_NoDeadCodePasses(t *testing.T) { + d := &diff.Result{Files: []diff.FileChange{{Path: "a.go"}}} + det := &stubDetector{byPath: map[string][]lang.UnusedSymbol{}} + s, err := Analyze("/repo", d, det) + if err != nil { + t.Fatalf("Analyze: %v", err) + } + if s.Severity != report.SeverityPass { + t.Errorf("severity = %v, want PASS", s.Severity) + } +} + +func TestAnalyze_DeadCodeWarns(t *testing.T) { + d := &diff.Result{Files: []diff.FileChange{{Path: "a.go"}, {Path: "b.go"}}} + det := &stubDetector{ + byPath: map[string][]lang.UnusedSymbol{ + "a.go": {{File: "a.go", Line: 5, Name: "foo", Kind: "func"}}, + "b.go": {{File: "b.go", Line: 10, Name: "bar", Kind: "var"}}, + }, + } + s, err := Analyze("/repo", d, det) + if err != nil { + t.Fatalf("Analyze: %v", err) + } + if s.Severity != report.SeverityWarn { + t.Errorf("severity = %v, want WARN", s.Severity) + } + if len(s.Findings) != 2 { + t.Fatalf("expected 2 findings, got %d", len(s.Findings)) + } + // Findings should be sorted by file then line. + if s.Findings[0].File != "a.go" || s.Findings[1].File != "b.go" { + t.Errorf("findings not sorted by file: %+v", s.Findings) + } + for _, f := range s.Findings { + if f.Severity != report.SeverityWarn { + t.Errorf("finding severity = %v, want WARN", f.Severity) + } + } +} + +func TestAnalyze_DetectorErrorPropagates(t *testing.T) { + d := &diff.Result{Files: []diff.FileChange{{Path: "a.go"}}} + det := &stubDetector{err: errors.New("boom")} + _, err := Analyze("/repo", d, det) + if err == nil { + t.Fatal("expected error, got nil") + } +} + +func TestBuildSection_StatsByKind(t *testing.T) { + results := []lang.UnusedSymbol{ + {File: "a.go", Line: 1, Name: "f1", Kind: "func"}, + {File: "a.go", Line: 2, Name: "f2", Kind: "func"}, + {File: "b.go", Line: 3, Name: "v1", Kind: "var"}, + } + s := buildSection(results) + stats, ok := s.Stats.(map[string]any) + if !ok { + t.Fatalf("stats wrong type: %T", s.Stats) + } + if stats["unused_symbols"] != 3 { + t.Errorf("unused_symbols = %v, want 3", stats["unused_symbols"]) + } + byKind, ok := stats["by_kind"].(map[string]int) + if !ok { + t.Fatalf("by_kind wrong type: %T", stats["by_kind"]) + } + if byKind["func"] != 2 || byKind["var"] != 1 { + t.Errorf("by_kind = %+v, want func:2 var:1", byKind) + } +} + +func TestBuildSection_FindingMessage(t *testing.T) { + s := buildSection([]lang.UnusedSymbol{ + {File: "a.go", Line: 1, Name: "foo", Kind: "func"}, + }) + if len(s.Findings) != 1 { + t.Fatalf("got %d findings, want 1", len(s.Findings)) + } + want := `unused func "foo"` + if s.Findings[0].Message != want { + t.Errorf("message = %q, want %q", s.Findings[0].Message, want) + } + if s.Findings[0].Function != "foo" { + t.Errorf("function = %q, want foo", s.Findings[0].Function) + } +} + +func TestPluralize(t *testing.T) { + tests := []struct { + n int + want string + }{ + {0, "symbols"}, + {1, "symbol"}, + {2, "symbols"}, + } + for _, tt := range tests { + if got := pluralize("symbol", tt.n); got != tt.want { + t.Errorf("pluralize(%d) = %q, want %q", tt.n, got, tt.want) + } + } +} diff --git a/internal/lang/goanalyzer/deadcode.go b/internal/lang/goanalyzer/deadcode.go new file mode 100644 index 0000000..9e36755 --- /dev/null +++ b/internal/lang/goanalyzer/deadcode.go @@ -0,0 +1,303 @@ +package goanalyzer + +import ( + "go/ast" + "go/parser" + "go/token" + "os" + "path/filepath" + "strings" + "unicode" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" +) + +// deadcodeImpl is the Go implementation of lang.DeadCodeDetector. +// +// Strategy: collect every non-exported, non-method top-level symbol declared +// inside the diff's changed regions, then count references to each name +// across every .go file in the package directory (including _test.go files, +// since test-only usage still counts as "used"). A symbol with zero +// references is reported as dead. +// +// The detector is deliberately scoped to: +// - Non-exported names — exported symbols may be consumed by external +// packages or external repos and we can't see those without a whole- +// codebase scan that would still miss reflective use. +// - Free functions, package-level vars, and package-level consts — methods +// are skipped because they may satisfy interfaces, types are skipped +// because tracking type uses through embedded fields and conversions is +// too coarse to be useful at this granularity. +// - Functions other than init / main / TestXxx / BenchmarkXxx / ExampleXxx +// / FuzzXxx — these are entry points called by the runtime or test +// framework, not by other Go code, so a "no references" result is a +// false positive by construction. +type deadcodeImpl struct{} + +// FindDeadCode reports unused symbols declared in fc's changed regions. +// Parse errors are swallowed (returning nil) to match the rest of the +// goanalyzer, which treats unparseable files as "skip silently". +func (deadcodeImpl) FindDeadCode(repoPath string, fc diff.FileChange) ([]lang.UnusedSymbol, error) { + absPath := filepath.Join(repoPath, fc.Path) + fset, file, err := parseFile(absPath, parser.SkipObjectResolution) + if err != nil { + return nil, nil + } + + candidates := collectDeadCodeCandidates(fset, file, fc) + if len(candidates) == 0 { + return nil, nil + } + + pkgDir := filepath.Dir(absPath) + refs := scanGoPackageReferences(pkgDir) + + var results []lang.UnusedSymbol + for _, c := range candidates { + if refs[c.Name] > 0 { + continue + } + results = append(results, c) + } + return results, nil +} + +// collectDeadCodeCandidates walks the top-level declarations of file and +// returns the ones that fall inside fc's changed regions, are non-exported, +// and aren't framework-special (init/main/TestXxx/...). +func collectDeadCodeCandidates(fset *token.FileSet, file *ast.File, fc diff.FileChange) []lang.UnusedSymbol { + pkgName := "" + if file.Name != nil { + pkgName = file.Name.Name + } + var out []lang.UnusedSymbol + for _, decl := range file.Decls { + out = append(out, candidatesFromDecl(fset, decl, fc, pkgName)...) + } + return out +} + +func candidatesFromDecl(fset *token.FileSet, decl ast.Decl, fc diff.FileChange, pkgName string) []lang.UnusedSymbol { + switch d := decl.(type) { + case *ast.FuncDecl: + return funcCandidate(fset, d, fc, pkgName) + case *ast.GenDecl: + return genDeclCandidates(fset, d, fc) + } + return nil +} + +// funcCandidate returns a single-element slice for non-exported free +// functions whose declaration line is in the diff. Methods (Recv != nil) +// are skipped — they may satisfy an interface that our scan can't see. So +// are init / main / TestXxx / BenchmarkXxx / ExampleXxx / FuzzXxx because +// the runtime or test framework calls them, not user Go code. +func funcCandidate(fset *token.FileSet, fn *ast.FuncDecl, fc diff.FileChange, pkgName string) []lang.UnusedSymbol { + if fn.Recv != nil { + return nil + } + if fn.Name == nil { + return nil + } + name := fn.Name.Name + if isExported(name) { + return nil + } + if isFrameworkSpecialFunc(name, pkgName) { + return nil + } + startLine := fset.Position(fn.Name.Pos()).Line + if !fc.ContainsLine(startLine) { + return nil + } + return []lang.UnusedSymbol{{ + File: fc.Path, + Line: startLine, + Name: name, + Kind: "func", + }} +} + +// genDeclCandidates handles `var`, `const`, and `type` blocks. Each spec +// inside a block is checked independently — `var ( a = 1; b = 2 )` produces +// up to two candidates depending on which lines are in the diff. Types are +// skipped: tracking type uses through embedding, conversions, and assertions +// is more involved than identifier counting and likely to be noisy. +func genDeclCandidates(fset *token.FileSet, decl *ast.GenDecl, fc diff.FileChange) []lang.UnusedSymbol { + var kind string + switch decl.Tok { + case token.VAR: + kind = "var" + case token.CONST: + kind = "const" + default: + return nil + } + var out []lang.UnusedSymbol + for _, spec := range decl.Specs { + vs, ok := spec.(*ast.ValueSpec) + if !ok { + continue + } + out = append(out, valueSpecCandidates(fset, vs, fc, kind)...) + } + return out +} + +func valueSpecCandidates(fset *token.FileSet, vs *ast.ValueSpec, fc diff.FileChange, kind string) []lang.UnusedSymbol { + var out []lang.UnusedSymbol + for _, name := range vs.Names { + if name == nil || name.Name == "_" { + continue + } + if isExported(name.Name) { + continue + } + line := fset.Position(name.Pos()).Line + if !fc.ContainsLine(line) { + continue + } + out = append(out, lang.UnusedSymbol{ + File: fc.Path, + Line: line, + Name: name.Name, + Kind: kind, + }) + } + return out +} + +// isExported reports whether the identifier is exported (starts with an +// uppercase letter). Mirrors token.IsExported but doesn't pull the package +// in for one trivial check. +func isExported(name string) bool { + if name == "" { + return false + } + r := []rune(name)[0] + return unicode.IsUpper(r) +} + +// isFrameworkSpecialFunc reports whether a function name is called by the +// Go runtime or the testing framework rather than by other Go source. Such +// functions have no callers in source code — flagging them would be a +// guaranteed false positive. +// +// - init / main: package-level entry points. +// - TestXxx, BenchmarkXxx, ExampleXxx, FuzzXxx: discovered by go test via +// reflection over the test binary's symbol table. +func isFrameworkSpecialFunc(name, pkgName string) bool { + switch name { + case "init": + return true + case "main": + return pkgName == "main" + } + for _, prefix := range []string{"Test", "Benchmark", "Example", "Fuzz"} { + if !strings.HasPrefix(name, prefix) { + continue + } + // "TestMain" and bare "Test" both qualify; what disqualifies is e.g. + // "Testify" where the next rune is lowercase. Per go test conventions, + // the function name must be `` followed by either nothing or + // an uppercase rune. + rest := name[len(prefix):] + if rest == "" { + return true + } + first := []rune(rest)[0] + if unicode.IsUpper(first) || unicode.IsDigit(first) { + return true + } + } + return false +} + +// scanGoPackageReferences walks every .go file in pkgDir (top-level only, +// not subdirectories — those are separate packages) and returns a map from +// identifier name to the number of NON-DECLARATION uses. The declaration +// site itself is excluded so a symbol that's declared exactly once and +// never used has refs[name] == 0. +// +// Returns an empty (non-nil) map on directory read failure so callers can +// continue without a nil check. +func scanGoPackageReferences(pkgDir string) map[string]int { + refs := map[string]int{} + entries, err := os.ReadDir(pkgDir) + if err != nil { + return refs + } + for _, e := range entries { + if e.IsDir() { + continue + } + if !strings.HasSuffix(e.Name(), ".go") { + continue + } + countFileReferences(filepath.Join(pkgDir, e.Name()), refs) + } + return refs +} + +// countFileReferences parses path and increments refs for every Ident node +// that is not itself a top-level declaration site. Selector expressions +// `pkg.Foo` count Foo as a reference (we walk the whole file regardless of +// where the Ident sits). +func countFileReferences(path string, refs map[string]int) { + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, path, nil, parser.SkipObjectResolution) + if err != nil { + return + } + declSites := collectTopLevelDeclSites(file) + ast.Inspect(file, func(n ast.Node) bool { + ident, ok := n.(*ast.Ident) + if !ok { + return true + } + if declSites[ident] { + return true + } + refs[ident.Name]++ + return true + }) +} + +// collectTopLevelDeclSites returns the set of *ast.Ident nodes that are the +// name of a top-level declaration in file. Used to subtract declarations +// from the reference count so a symbol declared once and used once has +// refs[name] == 1 (not 2). Local declarations inside function bodies are +// not tracked because they're caught by the Go compiler already (unused +// locals are a build error). +func collectTopLevelDeclSites(file *ast.File) map[*ast.Ident]bool { + sites := map[*ast.Ident]bool{} + for _, decl := range file.Decls { + switch d := decl.(type) { + case *ast.FuncDecl: + if d.Name != nil { + sites[d.Name] = true + } + case *ast.GenDecl: + for _, spec := range d.Specs { + addSpecDeclSites(spec, sites) + } + } + } + return sites +} + +func addSpecDeclSites(spec ast.Spec, sites map[*ast.Ident]bool) { + switch s := spec.(type) { + case *ast.ValueSpec: + for _, name := range s.Names { + if name != nil { + sites[name] = true + } + } + case *ast.TypeSpec: + if s.Name != nil { + sites[s.Name] = true + } + } +} diff --git a/internal/lang/goanalyzer/deadcode_test.go b/internal/lang/goanalyzer/deadcode_test.go new file mode 100644 index 0000000..71ece51 --- /dev/null +++ b/internal/lang/goanalyzer/deadcode_test.go @@ -0,0 +1,297 @@ +package goanalyzer + +import ( + "os" + "path/filepath" + "testing" + + "github.com/0xPolygon/diffguard/internal/diff" +) + +// fileChange is a small helper to avoid the verbose literal struct in every +// test. The single-region path covers all our cases — multi-region overlap +// is exercised by the diff package's own tests. +func fileChange(path string, startLine, endLine int) diff.FileChange { + return diff.FileChange{ + Path: path, + Regions: []diff.ChangedRegion{{StartLine: startLine, EndLine: endLine}}, + } +} + +// writePkg writes a single Go file into a temp directory and returns the +// directory plus the absolute path. Caller is responsible for nothing — +// t.TempDir() handles cleanup. +func writePkg(t *testing.T, filename, content string) string { + t.Helper() + dir := t.TempDir() + if err := os.WriteFile(filepath.Join(dir, filename), []byte(content), 0644); err != nil { + t.Fatal(err) + } + return dir +} + +// writeFile writes content to dir/filename and returns nothing — used to +// add a sibling file to a package after the initial setup. +func writeFile(t *testing.T, dir, filename, content string) { + t.Helper() + if err := os.WriteFile(filepath.Join(dir, filename), []byte(content), 0644); err != nil { + t.Fatal(err) + } +} + +func TestFindDeadCode_UnusedUnexportedFunction(t *testing.T) { + dir := writePkg(t, "a.go", `package main + +func unused() {} + +func main() { + _ = 1 +} +`) + got, err := deadcodeImpl{}.FindDeadCode(dir, fileChange("a.go", 1, 100)) + if err != nil { + t.Fatalf("FindDeadCode: %v", err) + } + if len(got) != 1 { + t.Fatalf("got %d unused symbols, want 1: %+v", len(got), got) + } + if got[0].Name != "unused" || got[0].Kind != "func" { + t.Errorf("got %+v, want unused/func", got[0]) + } +} + +func TestFindDeadCode_UsedFunction(t *testing.T) { + dir := writePkg(t, "a.go", `package main + +func used() {} + +func main() { + used() +} +`) + got, _ := deadcodeImpl{}.FindDeadCode(dir, fileChange("a.go", 1, 100)) + if len(got) != 0 { + t.Errorf("expected no unused symbols, got %+v", got) + } +} + +func TestFindDeadCode_ExportedSkipped(t *testing.T) { + dir := writePkg(t, "a.go", `package p + +// Exported but unused inside the package — we skip exported names because +// external packages might import them. +func PublicUnused() {} +`) + got, _ := deadcodeImpl{}.FindDeadCode(dir, fileChange("a.go", 1, 100)) + if len(got) != 0 { + t.Errorf("exported names should be skipped, got %+v", got) + } +} + +func TestFindDeadCode_MethodSkipped(t *testing.T) { + dir := writePkg(t, "a.go", `package p + +type t struct{} + +func (t) unused() {} +`) + got, _ := deadcodeImpl{}.FindDeadCode(dir, fileChange("a.go", 1, 100)) + if len(got) != 0 { + t.Errorf("methods should be skipped (may satisfy interfaces), got %+v", got) + } +} + +func TestFindDeadCode_FrameworkSpecialFuncs(t *testing.T) { + dir := writePkg(t, "a.go", `package main + +func init() {} +func main() {} +`) + got, _ := deadcodeImpl{}.FindDeadCode(dir, fileChange("a.go", 1, 100)) + if len(got) != 0 { + t.Errorf("init/main in package main should be skipped, got %+v", got) + } +} + +func TestFindDeadCode_TestFunctionsSkipped(t *testing.T) { + // Even though the candidate file is a regular .go file, a function named + // TestFoo there is treated as a test entry point and skipped — the + // detector's framework-special check is name-based, not path-based. + dir := writePkg(t, "a.go", `package p + +func TestFoo() {} +func BenchmarkBar() {} +func ExampleBaz() {} +func FuzzQux() {} +`) + got, _ := deadcodeImpl{}.FindDeadCode(dir, fileChange("a.go", 1, 100)) + // Note: TestFoo etc. are exported anyway, so they'd be skipped on the + // exported check before reaching the framework-special branch. This test + // pins that behavior — exported test-shaped names stay quiet. + if len(got) != 0 { + t.Errorf("test-shaped exported names should be skipped, got %+v", got) + } +} + +func TestFindDeadCode_MainOutsideMainPackageNotSkipped(t *testing.T) { + // `main` is special only in package main. In any other package it's an + // ordinary unexported function and should be flagged when unused. + dir := writePkg(t, "a.go", `package notmain + +func main() {} +`) + got, _ := deadcodeImpl{}.FindDeadCode(dir, fileChange("a.go", 1, 100)) + if len(got) != 1 || got[0].Name != "main" { + t.Errorf("main in non-main package should be flagged, got %+v", got) + } +} + +func TestFindDeadCode_UsedFromOtherFileInPackage(t *testing.T) { + dir := writePkg(t, "a.go", `package p + +func helper() {} +`) + writeFile(t, dir, "b.go", `package p + +func consumer() { + helper() +} +`) + got, _ := deadcodeImpl{}.FindDeadCode(dir, fileChange("a.go", 1, 100)) + if len(got) != 0 { + t.Errorf("helper used from sibling file should not be flagged, got %+v", got) + } +} + +func TestFindDeadCode_UsedOnlyFromTestFile(t *testing.T) { + // Test files are part of the package for reference scanning purposes. + // A function used only by tests is considered "used". + dir := writePkg(t, "a.go", `package p + +func helper() int { return 1 } +`) + writeFile(t, dir, "a_test.go", `package p + +import "testing" + +func TestFoo(t *testing.T) { + helper() +} +`) + got, _ := deadcodeImpl{}.FindDeadCode(dir, fileChange("a.go", 1, 100)) + if len(got) != 0 { + t.Errorf("helper used only from tests should not be flagged, got %+v", got) + } +} + +func TestFindDeadCode_UnusedVarsAndConsts(t *testing.T) { + dir := writePkg(t, "a.go", `package p + +var unusedVar = 1 +const unusedConst = "x" +var ( + usedVar = 2 + noTouch = 3 +) + +func sink() int { return usedVar } +`) + got, _ := deadcodeImpl{}.FindDeadCode(dir, fileChange("a.go", 1, 100)) + + names := map[string]string{} + for _, u := range got { + names[u.Name] = u.Kind + } + if names["unusedVar"] != "var" { + t.Errorf("unusedVar missing or wrong kind: %+v", got) + } + if names["unusedConst"] != "const" { + t.Errorf("unusedConst missing or wrong kind: %+v", got) + } + if names["noTouch"] != "var" { + t.Errorf("noTouch in multi-spec block missing: %+v", got) + } + if _, ok := names["usedVar"]; ok { + t.Errorf("usedVar should not be flagged, got %+v", got) + } +} + +func TestFindDeadCode_OutsideChangedRegionIgnored(t *testing.T) { + // `unused` is on line 3; if the diff only covers lines 100+ we should + // not report it as a candidate. + dir := writePkg(t, "a.go", `package p + +func unused() {} +`) + got, _ := deadcodeImpl{}.FindDeadCode(dir, fileChange("a.go", 100, 200)) + if len(got) != 0 { + t.Errorf("symbol outside changed region should be skipped, got %+v", got) + } +} + +func TestFindDeadCode_UnderscoreIdentifierSkipped(t *testing.T) { + // `var _ = something` is the canonical "compile-time use" pattern — we + // should not flag the underscore identifier (it isn't a symbol). + dir := writePkg(t, "a.go", `package p + +import "fmt" + +var _ = fmt.Println +`) + got, _ := deadcodeImpl{}.FindDeadCode(dir, fileChange("a.go", 1, 100)) + if len(got) != 0 { + t.Errorf("underscore identifier should not be flagged, got %+v", got) + } +} + +func TestFindDeadCode_ParseErrorReturnsNoResults(t *testing.T) { + dir := writePkg(t, "a.go", `this is not valid go`) + got, err := deadcodeImpl{}.FindDeadCode(dir, fileChange("a.go", 1, 100)) + if err != nil { + t.Errorf("parse error should be swallowed, got err=%v", err) + } + if len(got) != 0 { + t.Errorf("expected no results on parse error, got %+v", got) + } +} + +func TestIsExported(t *testing.T) { + tests := []struct { + name string + want bool + }{ + {"Foo", true}, + {"foo", false}, + {"_foo", false}, + {"", false}, + } + for _, tt := range tests { + if got := isExported(tt.name); got != tt.want { + t.Errorf("isExported(%q) = %v, want %v", tt.name, got, tt.want) + } + } +} + +func TestIsFrameworkSpecialFunc(t *testing.T) { + tests := []struct { + name, pkg string + want bool + }{ + {"init", "anything", true}, + {"main", "main", true}, + {"main", "other", false}, + {"Test", "p", true}, + {"TestFoo", "p", true}, + {"Testify", "p", false}, // lowercase rune after Test + {"BenchmarkBar", "p", true}, + {"ExampleBaz", "p", true}, + {"FuzzQux", "p", true}, + {"Test1", "p", true}, // digit qualifies per `go test` + {"plain", "p", false}, + } + for _, tt := range tests { + if got := isFrameworkSpecialFunc(tt.name, tt.pkg); got != tt.want { + t.Errorf("isFrameworkSpecialFunc(%q, %q) = %v, want %v", tt.name, tt.pkg, got, tt.want) + } + } +} diff --git a/internal/lang/goanalyzer/goanalyzer.go b/internal/lang/goanalyzer/goanalyzer.go index 6585305..fc6846a 100644 --- a/internal/lang/goanalyzer/goanalyzer.go +++ b/internal/lang/goanalyzer/goanalyzer.go @@ -42,6 +42,7 @@ func (*Language) MutantGenerator() lang.MutantGenerator { return mutan func (*Language) MutantApplier() lang.MutantApplier { return mutantApplierImpl{} } func (*Language) AnnotationScanner() lang.AnnotationScanner { return annotationScannerImpl{} } func (*Language) TestRunner() lang.TestRunner { return testRunnerImpl{} } +func (*Language) DeadCodeDetector() lang.DeadCodeDetector { return deadcodeImpl{} } // isGoTestFile matches the historical internal/diff check: any path ending // in `_test.go` is a test file. No magic, no parse. diff --git a/internal/lang/goanalyzer/goanalyzer_test.go b/internal/lang/goanalyzer/goanalyzer_test.go index d68cea3..54c62bf 100644 --- a/internal/lang/goanalyzer/goanalyzer_test.go +++ b/internal/lang/goanalyzer/goanalyzer_test.go @@ -232,6 +232,14 @@ func TestLanguage_Accessors_ReturnWorkingImpls(t *testing.T) { if tr == nil { t.Fatal("TestRunner returned nil") } + + dd := l.DeadCodeDetector() + if dd == nil { + t.Fatal("DeadCodeDetector returned nil") + } + if _, err := dd.FindDeadCode(dir, fc); err != nil { + t.Fatalf("FindDeadCode: %v", err) + } } // TestScanPackageImports_ParsesAndReportsEdges exercises the happy path and diff --git a/internal/lang/lang.go b/internal/lang/lang.go index 79e0661..db1cfe6 100644 --- a/internal/lang/lang.go +++ b/internal/lang/lang.go @@ -189,6 +189,35 @@ type TestRunner interface { RunTest(cfg TestRunConfig) (killed bool, output string, err error) } +// UnusedSymbol describes a symbol declared in a diff's changed region that +// has no references within its analyzable scope. Reported by the dead code +// detector as a warning — false positives are possible (reflective use, +// frameworks that wire symbols at runtime) so the human is the final judge. +type UnusedSymbol struct { + File string + Line int + Name string + // Kind is the language-agnostic declaration kind: "func", "var", "const", + // "type", "class". Languages reuse the same set so the orchestrator can + // format a uniform message. + Kind string +} + +// DeadCodeDetector finds non-exported symbols declared inside a diff's +// changed regions that have no references in their analyzable scope. The +// implementation defines the scope: Go scans the whole package directory +// (including _test.go files); TypeScript scans the single source file +// because non-exported symbols there are file-local. +// +// The detector is deliberately conservative: it only considers symbols that +// CANNOT be referenced from outside their scope, so a "no references" result +// is a real signal rather than a guess. Exported / public symbols, methods +// (which may satisfy interfaces), and well-known framework hooks (init, +// main, TestXxx, etc.) are skipped. +type DeadCodeDetector interface { + FindDeadCode(repoPath string, fc diff.FileChange) ([]UnusedSymbol, error) +} + // Language is the top-level per-language interface. Every language // implementation exposes its sub-components through this one type so the // orchestrator can iterate `for _, l := range lang.All()` and read out any @@ -204,4 +233,5 @@ type Language interface { MutantApplier() MutantApplier AnnotationScanner() AnnotationScanner TestRunner() TestRunner + DeadCodeDetector() DeadCodeDetector } diff --git a/internal/lang/registry_test.go b/internal/lang/registry_test.go index 53deeac..ebc1e6d 100644 --- a/internal/lang/registry_test.go +++ b/internal/lang/registry_test.go @@ -21,6 +21,7 @@ func (f *fakeLang) MutantGenerator() MutantGenerator { return nil } func (f *fakeLang) MutantApplier() MutantApplier { return nil } func (f *fakeLang) AnnotationScanner() AnnotationScanner { return nil } func (f *fakeLang) TestRunner() TestRunner { return nil } +func (f *fakeLang) DeadCodeDetector() DeadCodeDetector { return nil } // Silence the unused-import check — the import is kept so that fakeLang // remains plug-compatible with the analyzer interfaces that reference the diff --git a/internal/lang/tsanalyzer/deadcode.go b/internal/lang/tsanalyzer/deadcode.go new file mode 100644 index 0000000..335967e --- /dev/null +++ b/internal/lang/tsanalyzer/deadcode.go @@ -0,0 +1,220 @@ +package tsanalyzer + +import ( + "path/filepath" + "sort" + + sitter "github.com/smacker/go-tree-sitter" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" +) + +// deadcodeImpl is the TypeScript implementation of lang.DeadCodeDetector. +// +// Strategy: walk the program's direct children, collect every non-exported +// top-level declaration (function, lexical, class) whose name sits in a +// changed region of the diff, then count `identifier` references in the +// rest of the file. A non-exported TypeScript symbol is file-local — it +// cannot be imported from another module — so a "no references in this +// file" result is conclusive. +// +// Skipped on purpose: +// - Exported declarations (`export function ...`, `export const ...`, …) +// because they may be consumed by external modules. +// - Type-only declarations (interface_declaration, type_alias_declaration, +// enum). Tracking their uses through type positions, generics, and +// casts is more involved and likely noisy at the file granularity used +// here. +// - Class methods. The class itself is the candidate; flagging an +// individual method requires understanding which methods satisfy +// interfaces or are overridden, which a CST walk can't do reliably. +// - Local variables inside functions. tsc with `noUnusedLocals` and most +// linters already catch these. +type deadcodeImpl struct{} + +// FindDeadCode reports unused symbols declared in fc's changed regions for +// a TypeScript file. Parse failures return (nil, nil) to match the rest of +// tsanalyzer's "skip silently" convention. +func (deadcodeImpl) FindDeadCode(repoPath string, fc diff.FileChange) ([]lang.UnusedSymbol, error) { + absPath := filepath.Join(repoPath, fc.Path) + tree, src, err := parseFile(absPath) + if err != nil { + return nil, nil + } + defer tree.Close() + + candidates, declSites := collectTSDeadCodeCandidates(tree.RootNode(), src, fc) + if len(candidates) == 0 { + return nil, nil + } + + refs := countTSReferences(tree.RootNode(), src, declSites) + + var results []lang.UnusedSymbol + for _, c := range candidates { + if refs[c.Name] > 0 { + continue + } + results = append(results, c) + } + sort.SliceStable(results, func(i, j int) bool { + if results[i].Line != results[j].Line { + return results[i].Line < results[j].Line + } + return results[i].Name < results[j].Name + }) + return results, nil +} + +// collectTSDeadCodeCandidates returns: +// - candidates: non-exported top-level declarations whose name token sits +// in the diff's changed regions +// - declSites: byte-position set of EVERY top-level declaration name +// token (exported and not). This second set is what the reference +// counter uses to avoid double-counting the declaration itself as a +// reference to itself. +func collectTSDeadCodeCandidates(root *sitter.Node, src []byte, fc diff.FileChange) ([]lang.UnusedSymbol, map[uint32]bool) { + declSites := map[uint32]bool{} + var candidates []lang.UnusedSymbol + + for i := 0; i < int(root.NamedChildCount()); i++ { + child := root.NamedChild(i) + if child == nil { + continue + } + processTopLevelNode(child, src, fc, &candidates, declSites, false) + } + return candidates, declSites +} + +// processTopLevelNode handles one direct child of the program node. The +// `exported` flag controls whether discovered names should be added to the +// candidates list — when true (we're inside an export_statement), names go +// only into declSites. +func processTopLevelNode(n *sitter.Node, src []byte, fc diff.FileChange, candidates *[]lang.UnusedSymbol, declSites map[uint32]bool, exported bool) { + switch n.Type() { + case "export_statement": + recurseExportStatement(n, src, fc, candidates, declSites) + case "function_declaration", "generator_function_declaration": + recordNamed(n, src, fc, "func", candidates, declSites, exported) + case "class_declaration", "abstract_class_declaration": + recordNamed(n, src, fc, "class", candidates, declSites, exported) + case "lexical_declaration", "variable_declaration": + recordDeclaratorList(n, src, fc, candidates, declSites, exported) + } +} + +// recurseExportStatement walks the children of an export_statement and +// processes each as a top-level node with `exported=true`. Names discovered +// inside go into declSites only — never into candidates. +func recurseExportStatement(n *sitter.Node, src []byte, fc diff.FileChange, candidates *[]lang.UnusedSymbol, declSites map[uint32]bool) { + for i := 0; i < int(n.NamedChildCount()); i++ { + c := n.NamedChild(i) + if c == nil { + continue + } + processTopLevelNode(c, src, fc, candidates, declSites, true) + } +} + +// recordDeclaratorList handles a const/let/var statement, which can hold +// multiple declarators (`const a = 1, b = 2`). Each declarator is processed +// independently so per-name candidacy is correct. +func recordDeclaratorList(n *sitter.Node, src []byte, fc diff.FileChange, candidates *[]lang.UnusedSymbol, declSites map[uint32]bool, exported bool) { + for i := 0; i < int(n.NamedChildCount()); i++ { + c := n.NamedChild(i) + if c == nil || c.Type() != "variable_declarator" { + continue + } + recordVariableDeclarator(c, src, fc, candidates, declSites, exported) + } +} + +// recordNamed handles a node whose declared name lives in its `name` field +// (function_declaration, class_declaration, …). Adds the name's byte +// position to declSites unconditionally; appends to candidates only when +// the node is non-exported AND the name token sits in a changed region. +func recordNamed(n *sitter.Node, src []byte, fc diff.FileChange, kind string, candidates *[]lang.UnusedSymbol, declSites map[uint32]bool, exported bool) { + name := n.ChildByFieldName("name") + if name == nil { + return + } + declSites[name.StartByte()] = true + if exported { + return + } + line := nodeLine(name) + if !fc.ContainsLine(line) { + return + } + *candidates = append(*candidates, lang.UnusedSymbol{ + File: fc.Path, + Line: line, + Name: nodeText(name, src), + Kind: kind, + }) +} + +// recordVariableDeclarator handles one entry inside a const/let/var +// statement. We only honor plain-identifier names — destructuring patterns +// (`const { a } = obj`) are skipped because their used-ness depends on +// per-property analysis we don't do here. +func recordVariableDeclarator(declarator *sitter.Node, src []byte, fc diff.FileChange, candidates *[]lang.UnusedSymbol, declSites map[uint32]bool, exported bool) { + name := declarator.ChildByFieldName("name") + if name == nil || name.Type() != "identifier" { + return + } + declSites[name.StartByte()] = true + if exported { + return + } + line := nodeLine(name) + if !fc.ContainsLine(line) { + return + } + *candidates = append(*candidates, lang.UnusedSymbol{ + File: fc.Path, + Line: line, + Name: nodeText(name, src), + Kind: kindForDeclarator(declarator), + }) +} + +// kindForDeclarator returns "func" when the declarator's value is a +// function expression / arrow, otherwise "var" (or "const" / "let" — but we +// keep the kind set small and uniform with Go). This lets the report make +// the more useful distinction "unused arrow function" vs "unused +// variable". +func kindForDeclarator(declarator *sitter.Node) string { + value := declarator.ChildByFieldName("value") + if value == nil { + return "var" + } + switch value.Type() { + case "arrow_function", "function_expression", "function", + "generator_function": + return "func" + } + return "var" +} + +// countTSReferences walks every node in the tree and returns a map from +// identifier name → reference count. An identifier is a reference if it is +// NOT one of the declaration name tokens recorded by the candidate +// collector. Property accesses (`obj.foo`) use `property_identifier`, not +// `identifier`, and so don't pollute the count. +func countTSReferences(root *sitter.Node, src []byte, declSites map[uint32]bool) map[string]int { + refs := map[string]int{} + walk(root, func(n *sitter.Node) bool { + if n.Type() != "identifier" { + return true + } + if declSites[n.StartByte()] { + return true + } + refs[nodeText(n, src)]++ + return true + }) + return refs +} diff --git a/internal/lang/tsanalyzer/deadcode_test.go b/internal/lang/tsanalyzer/deadcode_test.go new file mode 100644 index 0000000..9ad7ea6 --- /dev/null +++ b/internal/lang/tsanalyzer/deadcode_test.go @@ -0,0 +1,199 @@ +package tsanalyzer + +import ( + "os" + "path/filepath" + "testing" + + "github.com/0xPolygon/diffguard/internal/diff" +) + +func tsFileChange(path string, startLine, endLine int) diff.FileChange { + return diff.FileChange{ + Path: path, + Regions: []diff.ChangedRegion{{StartLine: startLine, EndLine: endLine}}, + } +} + +func writeTSFile(t *testing.T, filename, content string) string { + t.Helper() + dir := t.TempDir() + if err := os.WriteFile(filepath.Join(dir, filename), []byte(content), 0644); err != nil { + t.Fatal(err) + } + return dir +} + +func TestFindDeadCode_UnusedFunction(t *testing.T) { + dir := writeTSFile(t, "a.ts", `function unused() { return 1; } + +function used() { return 2; } + +console.log(used()); +`) + got, err := deadcodeImpl{}.FindDeadCode(dir, tsFileChange("a.ts", 1, 100)) + if err != nil { + t.Fatalf("FindDeadCode: %v", err) + } + if len(got) != 1 { + t.Fatalf("got %d unused symbols, want 1: %+v", len(got), got) + } + if got[0].Name != "unused" || got[0].Kind != "func" { + t.Errorf("got %+v, want unused/func", got[0]) + } +} + +func TestFindDeadCode_ExportedSkipped(t *testing.T) { + dir := writeTSFile(t, "a.ts", `export function publicUnused() { return 1; } + +export const value = 42; +`) + got, _ := deadcodeImpl{}.FindDeadCode(dir, tsFileChange("a.ts", 1, 100)) + if len(got) != 0 { + t.Errorf("exported names should be skipped, got %+v", got) + } +} + +func TestFindDeadCode_UnusedConst(t *testing.T) { + dir := writeTSFile(t, "a.ts", `const unused = 5; +const used = 10; +console.log(used); +`) + got, _ := deadcodeImpl{}.FindDeadCode(dir, tsFileChange("a.ts", 1, 100)) + if len(got) != 1 { + t.Fatalf("got %d, want 1: %+v", len(got), got) + } + if got[0].Name != "unused" || got[0].Kind != "var" { + t.Errorf("got %+v, want unused/var", got[0]) + } +} + +func TestFindDeadCode_UnusedArrowFunctionConst(t *testing.T) { + // `const helper = () => ...` should be treated as a function (kind "func"), + // not a plain variable, so the report message reads naturally. + dir := writeTSFile(t, "a.ts", `const helper = (x: number) => x + 1; +`) + got, _ := deadcodeImpl{}.FindDeadCode(dir, tsFileChange("a.ts", 1, 100)) + if len(got) != 1 { + t.Fatalf("got %d, want 1: %+v", len(got), got) + } + if got[0].Name != "helper" || got[0].Kind != "func" { + t.Errorf("got %+v, want helper/func", got[0]) + } +} + +func TestFindDeadCode_UnusedClass(t *testing.T) { + dir := writeTSFile(t, "a.ts", `class Unused { + foo(): void {} +} +`) + got, _ := deadcodeImpl{}.FindDeadCode(dir, tsFileChange("a.ts", 1, 100)) + if len(got) != 1 { + t.Fatalf("got %d, want 1: %+v", len(got), got) + } + if got[0].Name != "Unused" || got[0].Kind != "class" { + t.Errorf("got %+v, want Unused/class", got[0]) + } +} + +func TestFindDeadCode_UsedClass(t *testing.T) { + dir := writeTSFile(t, "a.ts", `class Used {} + +const x = new Used(); +console.log(x); +`) + got, _ := deadcodeImpl{}.FindDeadCode(dir, tsFileChange("a.ts", 1, 100)) + if len(got) != 0 { + t.Errorf("Used should not be flagged, got %+v", got) + } +} + +func TestFindDeadCode_OutsideChangedRegionIgnored(t *testing.T) { + dir := writeTSFile(t, "a.ts", `function unused() { return 1; } +`) + got, _ := deadcodeImpl{}.FindDeadCode(dir, tsFileChange("a.ts", 100, 200)) + if len(got) != 0 { + t.Errorf("outside changed region should be skipped, got %+v", got) + } +} + +func TestFindDeadCode_ReferenceFromInsideAnotherFunction(t *testing.T) { + dir := writeTSFile(t, "a.ts", `function helper() { return 1; } + +export function api() { + return helper(); +} +`) + got, _ := deadcodeImpl{}.FindDeadCode(dir, tsFileChange("a.ts", 1, 100)) + if len(got) != 0 { + t.Errorf("helper used inside api should not be flagged, got %+v", got) + } +} + +func TestFindDeadCode_DestructuredVarSkipped(t *testing.T) { + // const { a, b } = obj — the destructuring pattern is not an identifier + // node in tree-sitter, so we conservatively skip these. Tracking which + // individual fields are "used" needs per-property analysis we don't do. + dir := writeTSFile(t, "a.ts", `const obj = { a: 1, b: 2 }; +const { a, b } = obj; +console.log(a, b); +`) + got, _ := deadcodeImpl{}.FindDeadCode(dir, tsFileChange("a.ts", 1, 100)) + // We expect no flag for the destructured names; obj is used so it's not + // flagged either. The exact assertion is "a" and "b" are not in results. + for _, r := range got { + if r.Name == "a" || r.Name == "b" { + t.Errorf("destructured name %q should not be flagged: %+v", r.Name, got) + } + } +} + +func TestFindDeadCode_MultipleDeclaratorsInOneStmt(t *testing.T) { + dir := writeTSFile(t, "a.ts", `let a = 1, b = 2, c = 3; +console.log(a); +`) + got, _ := deadcodeImpl{}.FindDeadCode(dir, tsFileChange("a.ts", 1, 100)) + + names := map[string]bool{} + for _, r := range got { + names[r.Name] = true + } + if names["a"] { + t.Errorf("a is used, should not be flagged: %+v", got) + } + if !names["b"] || !names["c"] { + t.Errorf("b and c are unused, should be flagged: %+v", got) + } +} + +func TestFindDeadCode_PropertyAccessIsNotAReference(t *testing.T) { + // `obj.foo` reads foo as a property_identifier, not an identifier — so + // a top-level `function foo` that's only matched by a property access + // should still be flagged as unused. + dir := writeTSFile(t, "a.ts", `function foo() { return 1; } + +const obj = { foo: 42 }; +console.log(obj.foo); +`) + got, _ := deadcodeImpl{}.FindDeadCode(dir, tsFileChange("a.ts", 1, 100)) + flagged := false + for _, r := range got { + if r.Name == "foo" && r.Kind == "func" { + flagged = true + } + } + if !flagged { + t.Errorf("function foo (only matched by property access) should be flagged, got %+v", got) + } +} + +func TestFindDeadCode_ParseErrorTolerated(t *testing.T) { + // Tree-sitter parses partial trees on broken input, so the call may + // either return nothing or whatever it managed to extract. Either way + // it must not error out. + dir := writeTSFile(t, "a.ts", `function broken( {`) + _, err := deadcodeImpl{}.FindDeadCode(dir, tsFileChange("a.ts", 1, 100)) + if err != nil { + t.Errorf("parse error should be tolerated, got err=%v", err) + } +} diff --git a/internal/lang/tsanalyzer/tsanalyzer.go b/internal/lang/tsanalyzer/tsanalyzer.go index 28aa500..14acf56 100644 --- a/internal/lang/tsanalyzer/tsanalyzer.go +++ b/internal/lang/tsanalyzer/tsanalyzer.go @@ -46,6 +46,7 @@ func (*Language) MutantGenerator() lang.MutantGenerator { return mutan func (*Language) MutantApplier() lang.MutantApplier { return mutantApplierImpl{} } func (*Language) AnnotationScanner() lang.AnnotationScanner { return annotationScannerImpl{} } func (*Language) TestRunner() lang.TestRunner { return newTestRunner() } +func (*Language) DeadCodeDetector() lang.DeadCodeDetector { return deadcodeImpl{} } // isTSTestFile reports whether path is a TypeScript test file. // diff --git a/internal/mutation/orchestration_race_test.go b/internal/mutation/orchestration_race_test.go index 36d2fed..c783d75 100644 --- a/internal/mutation/orchestration_race_test.go +++ b/internal/mutation/orchestration_race_test.go @@ -154,4 +154,5 @@ func (l *fakeLanguage) MutantApplier() lang.MutantApplier { return l.appli func (l *fakeLanguage) AnnotationScanner() lang.AnnotationScanner { panic("not used") } -func (l *fakeLanguage) TestRunner() lang.TestRunner { return l.runner } +func (l *fakeLanguage) TestRunner() lang.TestRunner { return l.runner } +func (l *fakeLanguage) DeadCodeDetector() lang.DeadCodeDetector { panic("not used") }