diff --git a/MULTI_LANGUAGE_SUPPORT.md b/MULTI_LANGUAGE_SUPPORT.md new file mode 100644 index 0000000..a1a0a89 --- /dev/null +++ b/MULTI_LANGUAGE_SUPPORT.md @@ -0,0 +1,629 @@ +# Multi-Language Support Guide + +A comprehensive checklist for adding new language support to diffguard. This document covers the one-time repo reorganization needed to enable multi-language support, defines the interfaces each language must implement, and provides a reusable per-language checklist. + +--- + +## Table of Contents + +1. [Architecture Overview](#architecture-overview) +2. [Repo Reorganization (One-Time)](#repo-reorganization-one-time) +3. [Interface Definitions](#interface-definitions) +4. [Per-Language Implementation Checklist](#per-language-implementation-checklist) +5. [Language-Specific Notes](#language-specific-notes) +6. [Key Design Decisions](#key-design-decisions) + +--- + +## Architecture Overview + +### What's Already Language-Agnostic + +These components work for any language with zero changes: + +| Component | Location | What It Does | +|-----------|----------|--------------| +| Report types | `internal/report/report.go` | `Finding`, `Section`, `Severity`, text/JSON output | +| Tier classification | `internal/mutation/tiers.go` | Groups mutation operators into Tier 1/2/3 by name | +| Graph algorithms | `internal/deps/deps.go` | Cycle detection, afferent/efferent coupling, instability, SDP violations | +| Git churn counting | `internal/churn/churn.go` | `git log --oneline --follow` to count commits per file | +| Diff format parsing | `internal/diff/diff.go` | Unified diff hunk header parsing (`@@ -a,b +c,d @@`) | +| CLI/config | `cmd/diffguard/main.go` | Flag parsing, exit code logic, analyzer orchestration | + +### What's Tightly Coupled to Go + +Every item below must be abstracted behind an interface and re-implemented per language: + +| Concern | Current Location | Go-Specific Mechanism | +|---------|------------------|-----------------------| +| File filtering | `diff/diff.go:92,175-177,201-208` | Hardcoded `*.go` glob, `_test.go` exclusion | +| Function identification | `sizes/sizes.go`, `complexity/complexity.go`, `churn/churn.go` | `*ast.FuncDecl` + receiver detection (duplicated 3x) | +| Complexity scoring | `complexity/complexity.go` | Walks `IfStmt`, `ForStmt`, `SwitchStmt`, `SelectStmt`, etc. | +| Import parsing | `deps/deps.go` | `parser.ParseDir()` + `go.mod` module path extraction | +| Mutation generation | `mutation/generate.go` | Go AST node pattern matching for 8 operator types | +| Mutation application | `mutation/apply.go` | Go AST rewriting + `go/printer` | +| Disable annotations | `mutation/annotations.go` | Scans Go comments + `*ast.FuncDecl` ranges | +| Test execution | `mutation/mutation.go` | `go test -overlay` (Go build system feature) | + +--- + +## Repo Reorganization (One-Time) + +These steps prepare the repo structure for multiple languages. Each step must leave all existing tests passing. + +### Step 1: Create the language abstraction layer + +- [ ] Create `internal/lang/lang.go` with all interface definitions (see [Interface Definitions](#interface-definitions)) +- [ ] Create `internal/lang/detect.go` with language auto-detection logic +- [ ] Create `internal/lang/registry.go` with a `Register()`/`Get()`/`All()` registry + +### Step 2: Extract Go file filtering + +- [ ] Create `internal/lang/goanalyzer/` package +- [ ] Implement `FileFilter` for Go (extensions: `.go`, test exclusion: `_test.go`, diff globs: `*.go`) +- [ ] Modify `diff.Parse()` and `diff.CollectPaths()` to accept a `FileFilter` parameter instead of hardcoded `.go` checks +- [ ] Update all callers in `cmd/diffguard/main.go` to pass the Go file filter + +### Step 3: Extract Go function extraction + +- [ ] Move function identification logic from `sizes.go`, `complexity.go`, and `churn.go` into `internal/lang/goanalyzer/parse.go` +- [ ] Consolidate the three duplicate `funcName()` implementations into one shared helper +- [ ] Implement `FunctionExtractor` interface for Go +- [ ] Modify `internal/sizes/sizes.go` to call through the interface + +### Step 4: Extract Go complexity scoring + +- [ ] Implement `ComplexityCalculator` interface for Go in `internal/lang/goanalyzer/complexity.go` +- [ ] Implement `ComplexityScorer` interface for Go (can share implementation with `ComplexityCalculator`) +- [ ] Modify `internal/complexity/complexity.go` to call through the interface +- [ ] Modify `internal/churn/churn.go` to call through the `ComplexityScorer` interface +- [ ] Delete the duplicated simplified `computeComplexity()` in churn + +### Step 5: Extract Go import resolution + +- [ ] Implement `ImportResolver` interface for Go in `internal/lang/goanalyzer/deps.go` +- [ ] Split `internal/deps/deps.go` into `graph.go` (pure algorithms) and `deps.go` (orchestration) +- [ ] Modify `deps.go` orchestration to call through the interface + +### Step 6: Extract Go mutation interfaces + +- [ ] Implement `MutantGenerator` in `internal/lang/goanalyzer/mutation_generate.go` +- [ ] Implement `MutantApplier` in `internal/lang/goanalyzer/mutation_apply.go` +- [ ] Implement `AnnotationScanner` in `internal/lang/goanalyzer/mutation_annotate.go` +- [ ] Implement `TestRunner` in `internal/lang/goanalyzer/testrunner.go` +- [ ] Modify `internal/mutation/` to call through interfaces +- [ ] Keep `tiers.go` in `internal/mutation/` (it's already language-agnostic) + +### Step 7: Wire up registration and detection + +- [ ] Add `init()` function to `internal/lang/goanalyzer/` that calls `lang.Register()` +- [ ] Add blank import `_ "github.com/0xPolygon/diffguard/internal/lang/goanalyzer"` in `cmd/diffguard/main.go` +- [ ] Add `--language` CLI flag (default: auto-detect) +- [ ] Modify `cmd/diffguard/main.go` to resolve language and pass it through the analyzer pipeline +- [ ] Add tests for language detection and registration + +### Resulting directory structure + +``` +internal/ + lang/ + lang.go # Interface definitions + detect.go # Auto-detection from file extensions / manifest files + registry.go # Register/Get/All + goanalyzer/ # Go implementation + goanalyzer.go # init() + Language interface impl + parse.go # Shared Go AST helpers (funcName, etc.) + complexity.go # ComplexityCalculator + ComplexityScorer + sizes.go # FunctionExtractor + deps.go # ImportResolver + mutation_generate.go # MutantGenerator + mutation_apply.go # MutantApplier + mutation_annotate.go # AnnotationScanner + testrunner.go # TestRunner (go test -overlay) + diff/ # Modified: parameterized file filtering + complexity/ # Modified: delegates to lang.ComplexityCalculator + sizes/ # Modified: delegates to lang.FunctionExtractor + deps/ + graph.go # Pure graph algorithms (extracted, unchanged) + deps.go # Orchestration, delegates to lang.ImportResolver + churn/ # Modified: delegates to lang.ComplexityScorer + mutation/ # Modified: delegates to lang interfaces + tiers.go # Unchanged (already language-agnostic) + report/ # Unchanged +``` + +--- + +## Interface Definitions + +Each language implementation must satisfy a top-level `Language` interface that provides access to all sub-interfaces. + +### Language (top-level) + +``` +Language + Name() string -- identifier: "go", "python", "typescript", etc. + FileFilter() FileFilter -- which files belong to this language + ComplexityCalculator() ComplexityCalculator + FunctionExtractor() FunctionExtractor + ImportResolver() ImportResolver + ComplexityScorer() ComplexityScorer + MutantGenerator() MutantGenerator + MutantApplier() MutantApplier + AnnotationScanner() AnnotationScanner + TestRunner() TestRunner +``` + +### FileFilter + +Controls which files the diff parser includes and which are excluded as test files. + +``` +FileFilter + Extensions []string -- source extensions incl. dot: [".go"], [".py"], [".ts", ".tsx"] + IsTestFile func(path string) bool -- returns true for test files to exclude from analysis + DiffGlobs []string -- globs passed to `git diff -- ` +``` + +### FunctionExtractor + +Parses source files, finds function/method declarations, reports their line ranges and sizes. + +``` +FunctionExtractor + ExtractFunctions(absPath, FileChange) -> ([]FunctionSize, *FileSize, error) + +FunctionInfo { File, Line, EndLine, Name } +FunctionSize { FunctionInfo, Lines } +FileSize { Path, Lines } +``` + +### ComplexityCalculator + +Computes cognitive complexity per function using the language's control flow constructs. + +``` +ComplexityCalculator + AnalyzeFile(absPath, FileChange) -> ([]FunctionComplexity, error) + +FunctionComplexity { FunctionInfo, Complexity int } +``` + +### ComplexityScorer + +Lightweight complexity scoring for churn weighting. May reuse `ComplexityCalculator` or be a faster approximation. + +``` +ComplexityScorer + ScoreFile(absPath, FileChange) -> ([]FunctionComplexity, error) +``` + +### ImportResolver + +Detects the project's module root and scans package-level imports to build the dependency graph. + +``` +ImportResolver + DetectModulePath(repoPath) -> (string, error) + ScanPackageImports(repoPath, pkgDir, modulePath) -> map[string]map[string]bool +``` + +### MutantGenerator + +Finds mutation sites in source code within changed regions. + +``` +MutantGenerator + GenerateMutants(absPath, FileChange, disabledLines map[int]bool) -> ([]MutantSite, error) + +MutantSite { File, Line, Description, Operator } +``` + +Operator names must use the canonical names so tiering works: +`conditional_boundary`, `negate_conditional`, `math_operator`, `return_value`, +`boolean_substitution`, `incdec`, `branch_removal`, `statement_deletion` + +New language-specific operators may be added but must be registered in `tiers.go`. + +### MutantApplier + +Applies a mutation to a source file and returns the modified source bytes. + +``` +MutantApplier + ApplyMutation(absPath, MutantSite) -> ([]byte, error) +``` + +### AnnotationScanner + +Scans source files for `mutator-disable-*` comments and returns the set of source lines to skip. + +``` +AnnotationScanner + ScanAnnotations(absPath) -> (disabledLines map[int]bool, error) +``` + +### TestRunner + +Executes the test suite against mutated code and reports whether the mutation was killed. + +``` +TestRunner + RunTest(TestRunConfig) -> (killed bool, output string, error) + +TestRunConfig { RepoPath, MutantFile, OriginalFile, Timeout, TestPattern, WorkDir, Index } +``` + +--- + +## Per-Language Implementation Checklist + +Copy this checklist when adding Language X. Replace `` with the language name (e.g., `python`, `typescript`). + +### Phase 0: Research and prerequisites + +- [ ] **Parser selection**: Identify how to parse `` source from Go. Options: + - Tree-sitter (`github.com/smacker/go-tree-sitter`) -- works for any language with a grammar + - Shell out to a helper script (`python3 -c "import ast; ..."`) -- simpler but adds runtime dep + - Language-specific Go library (if one exists) +- [ ] **Test runner**: Identify the test command for `` (e.g., `pytest`, `jest`, `cargo test`, `mvn test`) +- [ ] **Test isolation**: Determine mutation isolation strategy (see [Key Design Decisions](#key-design-decisions)) +- [ ] **Module manifest**: Identify the project manifest file (`pyproject.toml`, `package.json`, `Cargo.toml`, `pom.xml`) +- [ ] **Import system**: Document how imports work -- relative vs absolute, aliasing, re-exports +- [ ] **Test file conventions**: Document how test files are identified (naming, directory, annotations) +- [ ] **Comment syntax**: Document single-line and multi-line comment syntax +- [ ] **Function declaration patterns**: Document all forms -- standalone functions, class methods, lambdas, closures, nested functions, arrow functions, etc. + +### Phase 1: FileFilter + +- [ ] Create `internal/lang/analyzer/` package directory +- [ ] Define source file extensions (e.g., `.py`, `.ts`+`.tsx`, `.rs`, `.java`) +- [ ] Implement `IsTestFile()`: + - Python: `test_*.py`, `*_test.py`, files under `tests/` or `test/` directories + - TypeScript/JS: `*.test.ts`, `*.spec.ts`, `*.test.js`, `*.spec.js`, files under `__tests__/` + - Rust: files under `tests/` directory (inline `#[cfg(test)]` modules are harder -- may need AST) + - Java: `*Test.java`, `*Tests.java`, files under `src/test/` +- [ ] Define `DiffGlobs` for `git diff` +- [ ] **Tests**: correct extensions included, test files excluded, edge cases (e.g., `testutils.py` should NOT be excluded) + +### Phase 2: FunctionExtractor (unlocks sizes analyzer) + +- [ ] Parse source files and identify function/method declarations +- [ ] Extract function name including class/module prefix: + - Python: `ClassName.method_name`, standalone `function_name` + - TypeScript: `ClassName.methodName`, `functionName`, arrow functions assigned to `const` + - Rust: `impl Type::method_name`, standalone `fn function_name` + - Java: `ClassName.methodName` +- [ ] Extract start line and end line for each function +- [ ] Compute line count (`end - start + 1`) +- [ ] Compute total file line count +- [ ] Filter to only functions overlapping the `FileChange` regions +- [ ] **Tests**: empty file, single function, multiple functions, class methods, nested functions, decorators/annotations, out-of-range filtering + +### Phase 3: ComplexityCalculator (unlocks complexity analyzer) + +- [ ] Implement cognitive complexity scoring. Map language constructs to increments: + +| Increment | Go (reference) | Python | TypeScript/JS | Rust | Java | +|-----------|----------------|--------|---------------|------|------| +| +1 base | `if`, `for`, `switch`, `select` | `if`, `for`, `while`, `try`, `with` | `if`, `for`, `while`, `switch`, `try` | `if`, `for`, `while`, `loop`, `match` | `if`, `for`, `while`, `switch`, `try` | +| +1 nesting | per nesting level | per nesting level | per nesting level | per nesting level | per nesting level | +| +1 else | `else`, `else if` | `elif`, `else` | `else`, `else if` | `else`, `else if` | `else`, `else if` | +| +1 logical op | `&&`, `\|\|` | `and`, `or` | `&&`, `\|\|` | `&&`, `\|\|` | `&&`, `\|\|` | +| +1 op switch | operator changes in sequence | operator changes in sequence | operator changes in sequence | operator changes in sequence | operator changes in sequence | + +- [ ] Handle language-specific patterns: + - Python: comprehensions (list/dict/set/generator), `lambda`, walrus `:=` in conditions, `except` clauses + - TypeScript/JS: ternary `? :`, optional chaining `?.`, nullish coalescing `??`, arrow functions in callbacks + - Rust: `?` operator, `if let`/`while let`, `match` arms, closure complexity + - Java: ternary `? :`, enhanced for-each, try-with-resources, lambda expressions, streams +- [ ] **Tests**: empty function (score=0), each control flow type, nesting penalties, logical operators, language-specific patterns + +### Phase 4: ComplexityScorer (unlocks churn analyzer) + +- [ ] Implement a scoring function for churn weighting +- [ ] Can be the same as `ComplexityCalculator` if fast enough, or a simplified approximation (count control flow keywords) +- [ ] **Tests**: verify scores are consistent with `ComplexityCalculator` (or document the approximation) + +### Phase 5: ImportResolver (unlocks deps analyzer) + +- [ ] Implement `DetectModulePath()`: + - Python: parse `pyproject.toml` `[project] name`, or `setup.py`/`setup.cfg`, or fall back to directory name + - TypeScript/JS: parse `package.json` `name` field + - Rust: parse `Cargo.toml` `[package] name` + - Java: parse `pom.xml` `:`, or `build.gradle` `group` + project name +- [ ] Implement `ScanPackageImports()`: + - Python: scan `import X` and `from X import Y` statements, resolve relative imports (`.foo` -> parent package), filter to internal packages + - TypeScript/JS: scan `import {} from './path'` and `require('./path')`, resolve relative paths, filter to internal modules + - Rust: scan `use crate::` and `mod` declarations, map to internal crate modules + - Java: scan `import com.example.foo.Bar` statements, filter by project package prefix +- [ ] Define what "internal" means for this language (same module/package vs third-party) +- [ ] **Tests**: module path detection, internal import identification, external import filtering, relative import resolution + +### Phase 6: AnnotationScanner (for mutation testing) + +- [ ] Define annotation syntax using the language's comment style: + - Python: `# mutator-disable-next-line`, `# mutator-disable-func` + - TypeScript/JS: `// mutator-disable-next-line`, `// mutator-disable-func` + - Rust: `// mutator-disable-next-line`, `// mutator-disable-func` + - Java: `// mutator-disable-next-line`, `// mutator-disable-func` +- [ ] Implement function range detection (needed for `mutator-disable-func` to know which lines to skip) +- [ ] Return `map[int]bool` of disabled source line numbers +- [ ] **Tests**: next-line annotation disables the following line, function annotation disables all lines in function, no annotations returns empty map, irrelevant comments are ignored + +### Phase 7: MutantGenerator (for mutation testing) + +- [ ] Map the 8 canonical mutation operators to language-specific patterns: + +| Operator | Category | Go (reference) | Applicability Notes | +|----------|----------|----------------|-------------------| +| `conditional_boundary` | Tier 1 | `>` to `>=`, `<` to `<=` | Universal across all languages | +| `negate_conditional` | Tier 1 | `==` to `!=`, `>` to `<` | Universal. TS/JS: include `===`/`!==` | +| `math_operator` | Tier 1 | `+` to `-`, `*` to `/` | Universal. Python: include `//` (floor div), `**` (power) | +| `return_value` | Tier 1 | Replace returns with `nil` | Language-specific zero values: Python `None`, JS `null`/`undefined`, Rust `Default::default()`, Java `null`/`0`/`false` | +| `boolean_substitution` | Tier 2 | `true` to `false` | Python: `True`/`False`. Rust: same. Universal otherwise | +| `incdec` | Tier 2 | `++` to `--` | Python/Rust: N/A (no `++`/`--` operators). Skip for these languages | +| `branch_removal` | Tier 3 | Empty the body of `if` | Universal. Python: replace body with `pass` | +| `statement_deletion` | Tier 3 | Remove bare function calls | Universal | + +- [ ] Consider language-specific additional operators (register in `tiers.go` with appropriate tier): + - Python: `is`/`is not` mutations, `in`/`not in` mutations + - TypeScript: `===`/`!==` mutations, optional chaining `?.` removal, nullish coalescing `??` to `||` + - Rust: `unwrap()` removal, `?` operator removal, `Some(x)` to `None` + - Java: null-check removal, `equals()` to `==` swap, exception swallowing +- [ ] Filter mutants to only changed lines (respect `FileChange` regions) +- [ ] Exclude disabled lines (from `AnnotationScanner`) +- [ ] **Tests**: each operator type generates correct mutants, out-of-range lines are skipped, disabled lines are respected + +### Phase 8: MutantApplier (for mutation testing) + +- [ ] Choose mutation application strategy: + - **AST-based** (preferred if a good parser is available): parse file, modify AST node, render back to source + - **Text-based** (fallback): use line/column positions from `MutantSite` to do string replacement +- [ ] Handle edge cases: multiple operators on the same line, multi-line expressions, comment-only lines +- [ ] Verify that applied mutations produce syntactically valid source code +- [ ] **Tests**: each mutation type applied correctly, parse error returns nil, line mismatch returns nil + +### Phase 9: TestRunner (for mutation testing) + +- [ ] Implement test command construction: + - Python: `pytest [--timeout=] [-k ] ` + - TypeScript/JS: `npx jest [--testPathPattern ] --forceExit` or `npx vitest run` + - Rust: `cargo test [] -- --test-threads=1` + - Java: `mvn test -Dtest= -pl ` or `gradle test --tests ` +- [ ] Implement mutation isolation strategy: + - **Go (reference)**: Uses `go test -overlay` -- mutant files are overlaid at build time, no file copying needed, fully parallel + - **All other languages**: Use temp-copy strategy: + 1. Copy original file to backup location + 2. Write mutated source in place of original + 3. Run test command + 4. Restore original from backup + 5. **Critical**: Mutants on the same file must be serialized (acquire per-file lock). Mutants on different files can run in parallel. + - Alternative per-language isolation (if available): + - Python: `importlib` tricks or `PYTHONPATH` manipulation + - TypeScript: Jest `moduleNameMapper` config + - Rust: `cargo test` doesn't support overlay; temp-copy is the only option +- [ ] Handle test timeout (kill process after `TestRunConfig.Timeout`) +- [ ] Detect kill vs survive: test command exit code != 0 means killed +- [ ] **Tests**: killed mutant (test fails), survived mutant (test passes), timeout handling, file restoration after crash + +### Phase 10: Integration and registration + +- [ ] Create `internal/lang/analyzer/analyzer.go` implementing the `Language` interface +- [ ] Add `init()` function calling `lang.Register()` +- [ ] Add blank import to `cmd/diffguard/main.go`: `_ "github.com/.../internal/lang/analyzer"` +- [ ] Write end-to-end integration test: + - Create a temp directory with a small `` project (2-3 files, 1 test file) + - Run the full analyzer pipeline + - Assert each report section has expected content +- [ ] Verify all existing Go tests still pass + +### Phase 11: Documentation + +- [ ] Add the language to README sections: + - "Install" -- any additional toolchain requirements + - "Usage" -- language-specific examples + - "What It Measures" -- any scoring differences from the Go reference + - "CLI Reference" -- new flags if any + - "CI Integration" -- workflow examples for the language +- [ ] Document the annotation syntax for the language +- [ ] Document any language-specific mutation operators and their tier assignments +- [ ] Document known limitations (e.g., "Python closures are not analyzed individually") + +--- + +## Language-Specific Notes + +### Python + +**Parser options**: +- **Tree-sitter** (`tree-sitter-python`): Best option from Go. No Python runtime needed. CST-based, so node types are strings (`"function_definition"`, `"if_statement"`). +- **Shell out to `python3 -c "import ast; ..."`**: Simpler for prototyping but adds Python as a runtime dependency. + +**Test runner**: `pytest` (most common). Fall back to `unittest` (`python -m pytest` handles both). + +**Isolation**: Temp-copy strategy. Python caches bytecode in `__pycache__/` -- set `PYTHONDONTWRITEBYTECODE=1` when running mutant tests to avoid stale cache. + +**Unique complexity considerations**: +- List/dict/set/generator comprehensions should add +1 each (they're implicit loops) +- `with` statements add +1 (context manager control flow) +- `lambda` expressions: count complexity of the lambda body +- `try`/`except`/`finally`: +1 for `try`, +1 for each `except`, +1 for `finally` +- Decorators: don't count toward complexity (they're applied at definition time) + +**Import system**: +- `import foo` -- absolute import +- `from foo import bar` -- absolute import +- `from . import bar` -- relative import (resolve against package path) +- `from ..foo import bar` -- relative import up two levels +- Distinguish internal vs external by checking if the import path starts with a package in the project + +**Test file conventions**: `test_*.py`, `*_test.py`, files in `tests/` or `test/` directories. Also `conftest.py` (test infrastructure, not test files -- should be excluded from analysis but not treated as test files). + +**Missing operators**: No `++`/`--` -- skip `incdec`. Add `is`/`is not` and `in`/`not in` as `negate_conditional` variants. + +### TypeScript / JavaScript + +**Parser options**: +- **Tree-sitter** (`tree-sitter-typescript`, `tree-sitter-javascript`): Works well. TypeScript and JavaScript need separate grammars. +- **Shell out to Node.js**: Could use `@babel/parser` or `typescript` compiler API via a helper script. + +**Test runner**: Detect from `package.json`: +- `jest` or `@jest/core` in deps -> `npx jest` +- `vitest` in deps -> `npx vitest run` +- `mocha` in deps -> `npx mocha` +- Fall back to `npm test` + +**Isolation**: Temp-copy strategy. Jest supports `moduleNameMapper` in config which could theoretically be used for overlay-like behavior, but temp-copy is simpler and more universal. + +**Unique complexity considerations**: +- Ternary `condition ? a : b` adds +1 (it's a conditional) +- Optional chaining `foo?.bar` -- don't count (it's syntactic sugar, not control flow) +- Nullish coalescing `foo ?? bar` -- don't count (not branching in the cognitive sense) +- Arrow functions used as callbacks: count complexity of the body +- `async`/`await`: `try`/`catch` around `await` adds complexity; `await` alone does not +- Promise chains `.then().catch()` -- each `.catch()` adds +1 + +**Import system**: +- `import { x } from './local'` -- relative import (internal) +- `import { x } from 'package'` -- bare specifier (external) +- `require('./local')` -- CommonJS relative (internal) +- `require('package')` -- CommonJS bare (external) +- Distinguish internal by checking if the import path starts with `.` or `@/` (project alias) + +**Test file conventions**: `*.test.ts`, `*.spec.ts`, `*.test.js`, `*.spec.js`, `*.test.tsx`, `*.spec.tsx`, files under `__tests__/` directories. + +**Additional operators**: `===`/`!==` mutations (map to `negate_conditional`). Optional chaining removal (`foo?.bar` -> `foo.bar`, Tier 2). Nullish coalescing swap (`??` -> `||`, Tier 2). + +### Rust + +**Parser options**: +- **Tree-sitter** (`tree-sitter-rust`): Best option. Mature grammar. +- **Shell out to `rustc`**: Not practical. The `syn` crate is Rust-only. + +**Test runner**: `cargo test`. Always available in Rust projects. + +**Isolation**: Temp-copy strategy. `cargo test` recompiles from source, so replacing the file and running `cargo test` works. Set `CARGO_INCREMENTAL=0` to avoid stale incremental caches. + +**Unique complexity considerations**: +- `match` arms: +1 for the `match` statement, +1 for each arm with a guard (`if` condition) +- `if let` / `while let`: +1 each (they're pattern-matching control flow) +- `?` operator: don't count (it's error propagation syntax, not branching) +- `loop` (infinite loop): +1 +- Closures: count complexity of the closure body +- `unsafe` blocks: don't count toward complexity (they're a safety annotation, not control flow) + +**Import system**: +- `use crate::foo::bar` -- internal crate import +- `use other_crate::foo` -- external crate import +- `mod foo;` -- module declaration (internal) +- Distinguish internal by checking if the path starts with `crate::` or `self::` or `super::` + +**Test file conventions**: `tests/` directory contains integration tests. Unit tests are inline `#[cfg(test)] mod tests { ... }` -- these are harder to detect without parsing. For file filtering purposes, treat files in `tests/` as test files. For inline test modules, ignore them during analysis (they share the source file). + +**Missing operators**: No `++`/`--` -- skip `incdec`. Add `unwrap()` removal (Tier 1, return_value variant), `?` removal (Tier 2), `Some(x)` to `None` (Tier 1, return_value variant). + +### Java + +**Parser options**: +- **Tree-sitter** (`tree-sitter-java`): Works well. Mature grammar. +- **Shell out to a Java parser**: Could use JavaParser as a CLI tool. + +**Test runner**: Detect from build file: +- `pom.xml` present -> `mvn test -Dtest=` +- `build.gradle` or `build.gradle.kts` present -> `gradle test --tests ` + +**Isolation**: Temp-copy strategy. Both Maven and Gradle recompile from source. Replace the `.java` file, run tests, restore. + +**Unique complexity considerations**: +- Enhanced for-each (`for (X x : collection)`) adds +1 +- Try-with-resources: +1 for the `try` block +- `catch` clauses: +1 each +- `finally`: +1 +- Ternary `? :`: +1 +- Lambda expressions: count complexity of the lambda body +- Stream operations (`.filter()`, `.map()`, `.reduce()`): don't count individually (they're method calls) +- `synchronized` blocks: don't count (concurrency annotation, not control flow) +- `assert` statements: don't count + +**Import system**: +- `import com.example.foo.Bar` -- fully qualified import +- `import com.example.foo.*` -- wildcard import +- Determine internal by checking if the import matches the project's group/package prefix + +**Test file conventions**: `*Test.java`, `*Tests.java`, `*TestCase.java`, files under `src/test/java/`. + +**Additional operators**: `null` check removal (remove `if (x == null)` guards, Tier 2). `equals()` to `==` swap (Tier 1, negate_conditional variant). Exception swallowing (empty `catch` body, Tier 3). + +--- + +## Key Design Decisions + +### Parser strategy + +**Recommended: Tree-sitter for all non-Go languages.** + +Tree-sitter provides Go bindings (`github.com/smacker/go-tree-sitter`) and has mature grammars for Python, TypeScript, JavaScript, Rust, Java, and many others. This avoids requiring language runtimes as dependencies (no need for Python, Node.js, etc. to be installed). + +Trade-off: Tree-sitter returns a concrete syntax tree with string-based node kinds (`"if_statement"`, `"function_definition"`) rather than typed AST nodes. This means pattern matching is string-based rather than type-switch-based, but the uniformity across languages is worth it. + +Go remains the exception -- it continues to use Go's standard library `go/ast` packages, which provide superior type safety and formatting preservation. + +### Mutation isolation + +| Language | Isolation Mechanism | Parallelism | +|----------|-------------------|-------------| +| Go | `go test -overlay` (build-level file substitution) | Fully parallel -- all mutants can run simultaneously | +| All others | Temp-copy: backup original, write mutant, run tests, restore | Parallel across files, serial within same file | + +For non-Go languages, the `TestRunner` implementation must handle file locking internally. The mutation orchestrator calls `RunTest()` concurrently up to `--mutation-workers` goroutines. Each `TestRunner` acquires a per-file mutex before modifying the source file and releases it after restoration. + +### Language detection + +Auto-detect by scanning for manifest files at the repo root: + +| File | Language | +|------|----------| +| `go.mod` | Go | +| `pyproject.toml`, `setup.py`, `setup.cfg` | Python | +| `package.json` + `.ts`/`.tsx` files | TypeScript | +| `package.json` + `.js`/`.jsx` files (no TS) | JavaScript | +| `Cargo.toml` | Rust | +| `pom.xml`, `build.gradle`, `build.gradle.kts` | Java | + +If multiple languages are detected, require `--language` or analyze each language separately and merge report sections. + +### Annotation syntax + +Use the same annotation names across all languages, with the language-appropriate comment prefix: + +| Language | Line disable | Function disable | +|----------|-------------|-----------------| +| Go | `// mutator-disable-next-line` | `// mutator-disable-func` | +| Python | `# mutator-disable-next-line` | `# mutator-disable-func` | +| TypeScript/JS | `// mutator-disable-next-line` | `// mutator-disable-func` | +| Rust | `// mutator-disable-next-line` | `// mutator-disable-func` | +| Java | `// mutator-disable-next-line` | `// mutator-disable-func` | + +### New CLI flags + +``` +--language string Language to analyze (default: auto-detect) +--test-command string Custom test command override (use {file} and {dir} placeholders) +``` + +The `--test-command` flag is an escape hatch for projects with non-standard test setups. Example: `--test-command "python -m pytest {dir} --timeout=30"`. + +--- + +## Adding a New Language: Quick Reference + +1. Create `internal/lang/analyzer/` package +2. Implement all 9 sub-interfaces of `Language` +3. Add `init()` calling `lang.Register()` +4. Add blank import in `cmd/diffguard/main.go` +5. Add any new mutation operators to `internal/mutation/tiers.go` +6. Write unit tests for each interface implementation +7. Write one end-to-end integration test +8. Update README with language-specific examples +9. Follow the detailed [Per-Language Implementation Checklist](#per-language-implementation-checklist) above diff --git a/cmd/diffguard/main.go b/cmd/diffguard/main.go index 95bffdb..c795d71 100644 --- a/cmd/diffguard/main.go +++ b/cmd/diffguard/main.go @@ -6,6 +6,7 @@ import ( "os" "os/exec" "path/filepath" + "sort" "strings" "time" @@ -13,6 +14,8 @@ import ( "github.com/0xPolygon/diffguard/internal/complexity" "github.com/0xPolygon/diffguard/internal/deps" "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" + _ "github.com/0xPolygon/diffguard/internal/lang/goanalyzer" "github.com/0xPolygon/diffguard/internal/mutation" "github.com/0xPolygon/diffguard/internal/report" "github.com/0xPolygon/diffguard/internal/sizes" @@ -34,6 +37,7 @@ func main() { flag.StringVar(&cfg.FailOn, "fail-on", "warn", "Exit non-zero if thresholds breached: none, warn, all") flag.StringVar(&cfg.BaseBranch, "base", "", "Base branch to diff against (default: auto-detect)") flag.StringVar(&cfg.Paths, "paths", "", "Comma-separated files/dirs to analyze in full (refactoring mode); skips git diff") + flag.StringVar(&cfg.Language, "language", "", "Comma-separated languages to analyze (e.g. 'go' or 'rust,typescript'); empty = auto-detect") flag.Parse() if flag.NArg() < 1 { @@ -74,70 +78,219 @@ type Config struct { FailOn string BaseBranch string Paths string + Language string } +// langResult bundles the per-language analysis output so the orchestrator +// can merge sections after every language has been processed. +type langResult struct { + lang lang.Language + diff *diff.Result + sections []report.Section +} + +// run resolves the language set (explicit --language flag or auto-detect via +// manifest scan), then invokes the analyzer pipeline once per language and +// merges the resulting sections into a single report. func run(repoPath string, cfg Config) error { - d, err := loadFiles(repoPath, cfg) + languages, err := resolveLanguages(repoPath, cfg.Language) if err != nil { return err } - if len(d.Files) == 0 { - fmt.Println("No Go files found.") + results, done, err := collectLanguageResults(repoPath, cfg, languages) + if err != nil || done { + return err + } + if len(results) == 0 { + fmt.Printf("No %s files found.\n", languageNoun(languages[0])) return nil } - announceRun(d, cfg) + rpt := report.Report{Sections: mergeLanguageSections(results)} + if err := writeReport(rpt, cfg.Output); err != nil { + return err + } + return checkExitCode(rpt, cfg.FailOn) +} - sections, err := runAnalyses(repoPath, d, cfg) +// collectLanguageResults runs the analyzer pipeline once per language and +// returns the per-language sections. `done` is true when a single-language +// run discovered no files (the legacy byte-identical "No X files found." +// message has been emitted and run() should exit without writing a report). +func collectLanguageResults(repoPath string, cfg Config, languages []lang.Language) ([]langResult, bool, error) { + var results []langResult + for _, l := range languages { + r, skip, done, err := analyzeLanguage(repoPath, cfg, l, len(languages)) + if err != nil { + return nil, false, err + } + if done { + return nil, true, nil + } + if skip { + continue + } + results = append(results, r) + } + return results, false, nil +} + +// analyzeLanguage runs the pipeline for one language. Returns: +// - (result, false, false, nil) when analysis ran and produced sections. +// - (_, true, false, nil) when the language contributed no files in a +// multi-language run (skipped, a status line is emitted to stderr). +// - (_, _, true, nil) when a single-language run found no files +// (the caller should exit without writing a report — legacy UX). +// - (_, _, _, err) on pipeline failure. +func analyzeLanguage(repoPath string, cfg Config, l lang.Language, numLanguages int) (langResult, bool, bool, error) { + d, err := loadFiles(repoPath, cfg, diffFilter(l)) if err != nil { - return err + return langResult{}, false, false, err } + if len(d.Files) == 0 { + if numLanguages == 1 { + fmt.Printf("No %s files found.\n", languageNoun(l)) + return langResult{}, false, true, nil + } + fmt.Fprintf(os.Stderr, "No %s files found; skipping.\n", languageNoun(l)) + return langResult{}, true, false, nil + } + announceRun(d, cfg, l, numLanguages) + sections, err := runAnalyses(repoPath, d, cfg, l) + if err != nil { + return langResult{}, false, false, err + } + return langResult{lang: l, diff: d, sections: sections}, false, false, nil +} - r := report.Report{Sections: sections} - if err := writeReport(r, cfg.Output); err != nil { - return err +// mergeLanguageSections flattens per-language sections into a single list. +// In a multi-language run each section name is suffixed with `[]` and +// the combined list is sorted lexicographically for stable ordering. +func mergeLanguageSections(results []langResult) []report.Section { + multi := len(results) > 1 + var allSections []report.Section + for _, r := range results { + for _, s := range r.sections { + if multi { + s.Name = fmt.Sprintf("%s [%s]", s.Name, r.lang.Name()) + } + allSections = append(allSections, s) + } + } + if multi { + sort.SliceStable(allSections, func(i, j int) bool { + return allSections[i].Name < allSections[j].Name + }) + } + return allSections +} + +// resolveLanguages turns the --language flag value (or auto-detect) into a +// concrete list of Language implementations. Unknown names in the flag are +// a hard error; an empty detection set is a hard error with a suggestion +// to pass --language. +func resolveLanguages(repoPath, flagValue string) ([]lang.Language, error) { + if flagValue == "" { + langs := lang.Detect(repoPath) + if len(langs) == 0 { + return nil, fmt.Errorf("no supported language detected; pass --language to override (see --help)") + } + return langs, nil + } + + var out []lang.Language + seen := map[string]bool{} + for _, name := range strings.Split(flagValue, ",") { + name = strings.TrimSpace(name) + if name == "" || seen[name] { + continue + } + seen[name] = true + l, ok := lang.Get(name) + if !ok { + return nil, fmt.Errorf("unknown language %q (registered: %s)", name, strings.Join(registeredNames(), ", ")) + } + out = append(out, l) } - return checkExitCode(r, cfg.FailOn) + if len(out) == 0 { + return nil, fmt.Errorf("empty --language flag") + } + // Sort for determinism, matching lang.All()/Detect() behavior. + sort.Slice(out, func(i, j int) bool { return out[i].Name() < out[j].Name() }) + return out, nil +} + +func registeredNames() []string { + all := lang.All() + names := make([]string, len(all)) + for i, l := range all { + names[i] = l.Name() + } + return names } -func announceRun(d *diff.Result, cfg Config) { +// languageNoun returns the human-friendly noun for status messages. For Go +// we preserve the legacy capitalized form ("No Go files found.") so +// single-language output stays byte-identical. +func languageNoun(l lang.Language) string { + switch l.Name() { + case "go": + return "Go" + case "rust": + return "Rust" + case "typescript": + return "TypeScript" + default: + return l.Name() + } +} + +func announceRun(d *diff.Result, cfg Config, l lang.Language, numLanguages int) { + noun := languageNoun(l) + // For a single-language run, preserve the legacy message exactly: + // "Analyzing N changed Go files against main..." / refactoring-mode + // phrasing. Multi-language adds a bracketed suffix. + suffix := "" + if numLanguages > 1 { + suffix = fmt.Sprintf(" [%s]", l.Name()) + } if cfg.Paths != "" { - fmt.Fprintf(os.Stderr, "Analyzing %d Go files (refactoring mode)...\n", len(d.Files)) + fmt.Fprintf(os.Stderr, "Analyzing %d %s files (refactoring mode)%s...\n", len(d.Files), noun, suffix) } else { - fmt.Fprintf(os.Stderr, "Analyzing %d changed Go files against %s...\n", len(d.Files), cfg.BaseBranch) + fmt.Fprintf(os.Stderr, "Analyzing %d changed %s files against %s%s...\n", len(d.Files), noun, cfg.BaseBranch, suffix) } } -func runAnalyses(repoPath string, d *diff.Result, cfg Config) ([]report.Section, error) { +func runAnalyses(repoPath string, d *diff.Result, cfg Config, l lang.Language) ([]report.Section, error) { var sections []report.Section - complexitySection, err := complexity.Analyze(repoPath, d, cfg.ComplexityThreshold) + complexitySection, err := complexity.Analyze(repoPath, d, cfg.ComplexityThreshold, l.ComplexityCalculator()) if err != nil { return nil, fmt.Errorf("complexity analysis: %w", err) } sections = append(sections, complexitySection) - sizesSection, err := sizes.Analyze(repoPath, d, cfg.FunctionSizeThreshold, cfg.FileSizeThreshold) + sizesSection, err := sizes.Analyze(repoPath, d, cfg.FunctionSizeThreshold, cfg.FileSizeThreshold, l.FunctionExtractor()) if err != nil { return nil, fmt.Errorf("size analysis: %w", err) } sections = append(sections, sizesSection) - depsSection, err := deps.Analyze(repoPath, d) + depsSection, err := deps.Analyze(repoPath, d, l.ImportResolver()) if err != nil { return nil, fmt.Errorf("dependency analysis: %w", err) } sections = append(sections, depsSection) - churnSection, err := churn.Analyze(repoPath, d, cfg.ComplexityThreshold) + churnSection, err := churn.Analyze(repoPath, d, cfg.ComplexityThreshold, l.ComplexityScorer()) if err != nil { return nil, fmt.Errorf("churn analysis: %w", err) } sections = append(sections, churnSection) if !cfg.SkipMutation { - mutationSection, err := mutation.Analyze(repoPath, d, mutation.Options{ + mutationSection, err := mutation.Analyze(repoPath, d, l, mutation.Options{ SampleRate: cfg.MutationSampleRate, TestTimeout: cfg.TestTimeout, TestPattern: cfg.TestPattern, @@ -180,25 +333,38 @@ func checkExitCode(r report.Report, failOn string) error { return nil } -func loadFiles(repoPath string, cfg Config) (*diff.Result, error) { +func loadFiles(repoPath string, cfg Config, filter diff.Filter) (*diff.Result, error) { if cfg.Paths != "" { paths := strings.Split(cfg.Paths, ",") for i := range paths { paths[i] = strings.TrimSpace(paths[i]) } - d, err := diff.CollectPaths(repoPath, paths) + d, err := diff.CollectPaths(repoPath, paths, filter) if err != nil { return nil, fmt.Errorf("collecting paths: %w", err) } return d, nil } - d, err := diff.Parse(repoPath, cfg.BaseBranch) + d, err := diff.Parse(repoPath, cfg.BaseBranch, filter) if err != nil { return nil, fmt.Errorf("parsing diff: %w", err) } return d, nil } +// diffFilter converts a language's lang.FileFilter into the diff.Filter +// shape the parser expects. The two shapes are intentionally different: +// lang.FileFilter exposes the fields languages need to declare their +// territory (extensions, IsTestFile, DiffGlobs), while diff.Filter only +// carries what the parser itself reads on each file (Includes + DiffGlobs). +func diffFilter(l lang.Language) diff.Filter { + f := l.FileFilter() + return diff.Filter{ + DiffGlobs: f.DiffGlobs, + Includes: f.IncludesSource, + } +} + func detectBaseBranch(repoPath string) string { for _, branch := range []string{"develop", "main", "master"} { cmd := exec.Command("git", "rev-parse", "--verify", branch) diff --git a/cmd/diffguard/main_test.go b/cmd/diffguard/main_test.go new file mode 100644 index 0000000..dd48f07 --- /dev/null +++ b/cmd/diffguard/main_test.go @@ -0,0 +1,320 @@ +package main + +import ( + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + + "github.com/0xPolygon/diffguard/internal/lang" + "github.com/0xPolygon/diffguard/internal/report" +) + +// TestRun_SingleLanguageGo is the B6 smoke test: runs the orchestrator +// against a temp git repo with a single .go file change. Exercises the +// end-to-end path (CLI config → language resolution → diff parse → +// analyzer pipeline → report build → exit code) without spawning a +// subprocess. +// +// The cross-language E1 integration test lives below in +// TestMixedRepo_* — those build the binary and run it as a subprocess +// against the three-language fixture in testdata/mixed-repo/. +func TestRun_SingleLanguageGo(t *testing.T) { + repo := initTempGoRepo(t) + + cfg := Config{ + ComplexityThreshold: 10, + FunctionSizeThreshold: 50, + FileSizeThreshold: 500, + SkipMutation: true, + Output: "text", + FailOn: "none", + BaseBranch: "main", + } + + // Redirect stdout/stderr so the test doesn't pollute output. We don't + // assert on exact content here — the byte-identical regression gate + // covers that — but we do assert run() returns no error. + withSuppressedStdio(t, func() { + if err := run(repo, cfg); err != nil { + t.Fatalf("run returned error: %v", err) + } + }) +} + +// TestRun_UnknownLanguageHardError locks in that an unknown --language +// value fails with a clear error rather than silently falling back to +// auto-detect. +func TestRun_UnknownLanguageHardError(t *testing.T) { + repo := initTempGoRepo(t) + cfg := Config{ + Output: "text", + FailOn: "none", + BaseBranch: "main", + Language: "cobol", + } + err := run(repo, cfg) + if err == nil { + t.Fatal("expected error for unknown language, got nil") + } + if !strings.Contains(err.Error(), "cobol") { + t.Errorf("error = %q, want it to mention 'cobol'", err.Error()) + } +} + +// TestResolveLanguages_ExplicitGo verifies the comma-split path. +func TestResolveLanguages_ExplicitGo(t *testing.T) { + repo := initTempGoRepo(t) + langs, err := resolveLanguages(repo, "go") + if err != nil { + t.Fatalf("resolveLanguages: %v", err) + } + if len(langs) != 1 || langs[0].Name() != "go" { + t.Errorf("langs = %v, want [go]", names(langs)) + } +} + +// TestResolveLanguages_AutoDetect verifies that a repo with go.mod is +// auto-detected as Go. +func TestResolveLanguages_AutoDetect(t *testing.T) { + repo := initTempGoRepo(t) + langs, err := resolveLanguages(repo, "") + if err != nil { + t.Fatalf("resolveLanguages: %v", err) + } + if len(langs) != 1 || langs[0].Name() != "go" { + t.Errorf("langs = %v, want [go]", names(langs)) + } +} + +// TestResolveLanguages_EmptyDetection fails cleanly when nothing is +// detectable and no --language is provided. +func TestResolveLanguages_EmptyDetection(t *testing.T) { + dir := t.TempDir() + _, err := resolveLanguages(dir, "") + if err == nil { + t.Fatal("expected error for empty detection") + } + if !strings.Contains(err.Error(), "--language") { + t.Errorf("error = %q, expected hint about --language", err.Error()) + } +} + +// TestResolveLanguages_Deduplicates ensures passing "go,go" returns one +// Language, not two. +func TestResolveLanguages_Deduplicates(t *testing.T) { + repo := initTempGoRepo(t) + langs, err := resolveLanguages(repo, "go,go") + if err != nil { + t.Fatalf("resolveLanguages: %v", err) + } + if len(langs) != 1 { + t.Errorf("len = %d, want 1 (dedup)", len(langs)) + } +} + +// TestResolveLanguages_OnlyCommas fails with the "empty --language flag" +// hard error when the value contains nothing but separators. This exercises +// the final "len(out) == 0" guard that turns an empty parse into a visible +// error rather than falling back to auto-detect. +func TestResolveLanguages_OnlyCommas(t *testing.T) { + repo := initTempGoRepo(t) + _, err := resolveLanguages(repo, ", , ,") + if err == nil { + t.Fatal("expected error for empty --language value after splitting") + } + if !strings.Contains(err.Error(), "empty --language flag") { + t.Errorf("error = %q, want mention of 'empty --language flag'", err.Error()) + } +} + +// TestRegisteredNames_ListsGo verifies the helper returns at least "go" +// (other languages are registered via blank-import and may or may not be +// linked into this test binary). +func TestRegisteredNames_ListsGo(t *testing.T) { + names := registeredNames() + if len(names) == 0 { + t.Fatal("expected at least one registered language") + } + found := false + for _, n := range names { + if n == "go" { + found = true + } + } + if !found { + t.Errorf("registeredNames = %v, expected 'go'", names) + } +} + +// TestRun_NoFilesInDiff_SingleLanguage drives run() with a --paths filter +// that matches nothing. Exercises the "No files found." single- +// language short-circuit — the len(d.Files)==0 branch that mutation tests +// flagged as under-tested. +func TestRun_NoFilesInDiff_SingleLanguage(t *testing.T) { + dir := t.TempDir() + // Write a go.mod so language auto-detection succeeds, but no .go files + // so the diff comes back empty. + if err := os.WriteFile(filepath.Join(dir, "go.mod"), []byte("module example.com/empty\n\ngo 1.21\n"), 0644); err != nil { + t.Fatal(err) + } + + cfg := Config{ + SkipMutation: true, + Output: "text", + FailOn: "none", + Paths: ".", // refactoring mode skips git diff entirely + } + withSuppressedStdio(t, func() { + if err := run(dir, cfg); err != nil { + t.Errorf("run should succeed with empty diff, got %v", err) + } + }) +} + +// TestLanguageNoun_KnownLanguagesAndFallback covers every branch of the +// switch: known language with special capitalization, plus the default +// fallback for an unrecognized name. A stub Language lets us hit the +// default without registering a real language implementation. +func TestLanguageNoun_KnownLanguagesAndFallback(t *testing.T) { + got := languageNoun(stubLanguage("go")) + if got != "Go" { + t.Errorf("languageNoun(go) = %q, want Go", got) + } + got = languageNoun(stubLanguage("rust")) + if got != "Rust" { + t.Errorf("languageNoun(rust) = %q, want Rust", got) + } + got = languageNoun(stubLanguage("typescript")) + if got != "TypeScript" { + t.Errorf("languageNoun(typescript) = %q, want TypeScript", got) + } + // The fallback branch must echo the raw name. + got = languageNoun(stubLanguage("unknown")) + if got != "unknown" { + t.Errorf("languageNoun(unknown) = %q, want unknown (fallback)", got) + } +} + +// stubLanguage implements just enough of lang.Language to exercise +// languageNoun. Every accessor returns nil because languageNoun only +// reads Name(); the test is in cmd/diffguard so we can't register it +// globally anyway. +type stubLanguage string + +func (s stubLanguage) Name() string { return string(s) } +func (s stubLanguage) FileFilter() lang.FileFilter { return lang.FileFilter{} } +func (s stubLanguage) ComplexityCalculator() lang.ComplexityCalculator { return nil } +func (s stubLanguage) FunctionExtractor() lang.FunctionExtractor { return nil } +func (s stubLanguage) ImportResolver() lang.ImportResolver { return nil } +func (s stubLanguage) ComplexityScorer() lang.ComplexityScorer { return nil } +func (s stubLanguage) MutantGenerator() lang.MutantGenerator { return nil } +func (s stubLanguage) MutantApplier() lang.MutantApplier { return nil } +func (s stubLanguage) AnnotationScanner() lang.AnnotationScanner { return nil } +func (s stubLanguage) TestRunner() lang.TestRunner { return nil } + +// initTempGoRepo creates a minimal git repo with a single committed Go +// file on main, plus an additional file on HEAD so the diff has content. +// Returns the absolute path to the repo. +func initTempGoRepo(t *testing.T) string { + t.Helper() + dir := t.TempDir() + run := func(args ...string) { + cmd := exec.Command("git", args...) + cmd.Dir = dir + cmd.Env = append(os.Environ(), "GIT_CONFIG_GLOBAL=/dev/null") + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("git %v: %v\n%s", args, err, out) + } + } + + // init + author config + run("init", "-q", "--initial-branch=main") + run("config", "user.email", "test@example.com") + run("config", "user.name", "Test") + run("config", "commit.gpgsign", "false") + + // base commit with go.mod + a base file so Parse has something to + // merge-base against. + if err := os.WriteFile(filepath.Join(dir, "go.mod"), []byte("module example.com/testrepo\n\ngo 1.21\n"), 0644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(dir, "base.go"), []byte("package main\n"), 0644); err != nil { + t.Fatal(err) + } + run("add", ".") + run("commit", "-q", "-m", "base") + + // Feature commit adds a new file with a small function. This is what + // appears in the diff. + if err := os.WriteFile(filepath.Join(dir, "new.go"), []byte("package main\n\nfunc helper(x int) int {\n\tif x > 0 {\n\t\treturn x\n\t}\n\treturn -x\n}\n"), 0644); err != nil { + t.Fatal(err) + } + run("add", ".") + run("commit", "-q", "-m", "add new.go") + + return dir +} + +// withSuppressedStdio redirects os.Stdout/Stderr to /dev/null for the +// duration of fn. Restores on return. +func withSuppressedStdio(t *testing.T, fn func()) { + t.Helper() + devnull, err := os.OpenFile(os.DevNull, os.O_WRONLY, 0) + if err != nil { + t.Fatal(err) + } + defer devnull.Close() + + origOut, origErr := os.Stdout, os.Stderr + os.Stdout = devnull + os.Stderr = devnull + defer func() { + os.Stdout = origOut + os.Stderr = origErr + }() + fn() +} + +func names(langs []lang.Language) []string { + out := make([]string, len(langs)) + for i, l := range langs { + out[i] = l.Name() + } + return out +} + +// TestCheckExitCode_FailInAnyLanguageEscalates covers B5: a FAIL section +// in any language must escalate the overall exit code, regardless of how +// many languages contribute sections. checkExitCode already takes a +// merged report, so this is a unit-level check on WorstSeverity behavior +// mirrored through checkExitCode. +func TestCheckExitCode_FailInAnyLanguageEscalates(t *testing.T) { + fail := report.Section{Name: "Complexity [rust]", Severity: report.SeverityFail} + pass := report.Section{Name: "Complexity [go]", Severity: report.SeverityPass} + warn := report.Section{Name: "Sizes [typescript]", Severity: report.SeverityWarn} + + merged := report.Report{Sections: []report.Section{pass, fail, warn}} + + // fail-on=warn: any FAIL escalates. + if err := checkExitCode(merged, "warn"); err == nil { + t.Error("fail-on=warn with FAIL section should return error") + } + + // fail-on=all: any non-PASS escalates (FAIL or WARN). + if err := checkExitCode(merged, "all"); err == nil { + t.Error("fail-on=all with FAIL section should return error") + } + + // fail-on=none: never escalates. + if err := checkExitCode(merged, "none"); err != nil { + t.Errorf("fail-on=none should not error, got %v", err) + } + + // All PASS: no error. + allPass := report.Report{Sections: []report.Section{pass, pass}} + if err := checkExitCode(allPass, "warn"); err != nil { + t.Errorf("all-PASS should not error, got %v", err) + } +} diff --git a/docs/multi-lang-followups.md b/docs/multi-lang-followups.md new file mode 100644 index 0000000..987901e --- /dev/null +++ b/docs/multi-lang-followups.md @@ -0,0 +1,84 @@ +# Multi-language support — follow-ups + +Tracked outside `docs/rust-typescript-support.md` (the spec) and +`MULTI_LANGUAGE_SUPPORT.md` (the design) so future changes have a single +visible backlog and so these items don't drift into invisible tech debt. + +Filed during the multi-language sign-off on the `feat/multi-language-support` +branch (commits `c4bced2..1a7ac4a`). Parts A through E are complete and all +unit / eval / mixed tests are green. The items below are explicit carve-outs +from that work. + +## Deferred evaluation work + +### EVAL-5 — Pre-flight calibration (pre-ship) + +Spec reference: `docs/rust-typescript-support.md` §EVAL-5. + +The plan calls for running the built `diffguard` binary against two +open-source Rust crates (one small, one mid-sized) and two TypeScript +projects (one app, one library), triaging every FAIL/WARN, and recording +the baseline noise rate under a "Baseline noise rate" appendix. + +**Status**: not run. This is a human-in-the-loop activity (requires +picking representative repos, curating the triage write-up) rather than +something the agent pipeline can automate, so it was explicitly deferred. + +**Exit criteria before declaring Rust/TS support shipped**: <20% noise +rate per language, with the triage notes appended to +`docs/rust-typescript-support.md`. + +### EVAL-2 / EVAL-3 MVP carve-outs + +The Rust and TypeScript eval harnesses ship the MVP subset: +complexity (pos/neg), sizes function (pos/neg), deps cycle (pos/neg), +mutation kill (pos/neg), and one language-specific mutation operator +(pos/neg). The following sub-cases from the spec are deferred and +called out as in-code TODO blocks at the top of each `eval_test.go`: + +- `EVAL-2 sizes (file)` — >500-LOC Rust fixture + negative control. +- `EVAL-2 deps (SDP)` — stable→unstable Rust fixture + reversed + negative control. +- `EVAL-2 churn` — hot_complex / hot_simple Rust fixtures with seeded + git history; requires a shell-based git helper so the history isn't + committed as a nested `.git` dir. +- `EVAL-2 mutation (annotation respect)` — end-to-end run exercising + `// mutator-disable-func` and `// mutator-disable-next-line` on Rust. + (Unit-level coverage exists in `mutation_annotate_test.go`.) +- Mirror carve-outs on the TypeScript side in + `internal/lang/tsanalyzer/eval_test.go`. + +These are MVP-ready because the structural shape (fixtures, +`expected.json`, semantic compare) is in place; the missing rows are +more fixture content, not missing pipeline. + +## Known QA-flagged limitations + +### Rust workspace-crate path resolution + +`parseCargoPackageName` returns `""` for a bare `[workspace]` manifest +without a `[package]` section (see +`internal/lang/rustanalyzer/deps_test.go`). Repos whose root +`Cargo.toml` is a pure workspace manifest (common for multi-crate +projects) currently analyze each member crate but do not thread the +workspace root into module-path resolution, which can under-report +cross-crate imports in a workspace. + +**Impact**: single-crate repos are unaffected. Workspace repos get a +correct per-crate report but may miss dep edges between sibling crates. + +**Fix sketch**: resolve `workspace.members` globs, walk each member's +`Cargo.toml` for its `[package] name`, and union the module-path +registry before running `ScanPackageImports`. + +## How to close these out + +1. For EVAL-5, pick the calibration repos, run `diffguard` against each, + triage, and append a "Baseline noise rate" appendix to + `docs/rust-typescript-support.md`. +2. For the EVAL-2 / EVAL-3 sub-cases, add fixtures under each + analyzer's `evaldata/` and drop the corresponding TODO lines from + the header comment in `eval_test.go`. +3. For workspace-crate resolution, extend `ImportResolver` in + `internal/lang/rustanalyzer/deps.go` and add a workspace fixture to + the deps test suite. diff --git a/docs/rust-typescript-support.md b/docs/rust-typescript-support.md new file mode 100644 index 0000000..a146241 --- /dev/null +++ b/docs/rust-typescript-support.md @@ -0,0 +1,413 @@ +# Rust + TypeScript support — implementation checklist + +This is the execution checklist for adding Rust and TypeScript analyzer support to diffguard, sized so a single `diffguard` run on a mixed-language repo reports both languages side by side. + +For the deep technical decisions (interface shapes, tree-sitter vs. runtime parsers, mutation isolation strategy, per-language parser notes), see `../MULTI_LANGUAGE_SUPPORT.md`. This checklist references that doc rather than duplicating it. + +## Scope + +- **In scope**: Rust, TypeScript (including `.tsx`). All five analyzers (complexity, sizes, deps, churn, mutation). Multi-language single-invocation support. +- **Out of scope**: Java, Python, plain JavaScript-only (JS works incidentally under the TS grammar but the TS path is the supported one). A `--test-command` override flag (add only if a fixture needs it). +- **Left alone**: Go keeps `go/ast`. Only its packaging moves — the parser does not. + +## Legend + +- **[F]** foundation work (blocks both languages) +- **[O]** orchestration (the "simultaneous" piece) +- **[R]** Rust analyzer +- **[T]** TypeScript analyzer +- **[X]** cross-cutting (docs, CI, evals) +- **[EVAL]** correctness-evidence work (proves diffguard catches real issues) + +Parts R and T are disjoint and can be worked in parallel once F and O land. + +--- + +## Part A — Foundation (shared, one-time) [F] + +Repo reorganization so Go becomes one of several registered languages. Every step leaves `go test ./...` green. + +### A1. Language abstraction layer + +- [ ] Add `github.com/smacker/go-tree-sitter` (and sub-packages for `rust`, `typescript`, `tsx`) to `go.mod`. +- [ ] Create `internal/lang/lang.go` with the 9 sub-interfaces (`FileFilter`, `FunctionExtractor`, `ComplexityCalculator`, `ComplexityScorer`, `ImportResolver`, `MutantGenerator`, `MutantApplier`, `AnnotationScanner`, `TestRunner`) and the top-level `Language` interface — shapes from `MULTI_LANGUAGE_SUPPORT.md` §Interface Definitions. +- [ ] Create `internal/lang/registry.go` with `Register(Language)`, `Get(name string)`, and `All()`. +- [ ] Create `internal/lang/detect.go`. Detection rules from `MULTI_LANGUAGE_SUPPORT.md` §Language detection. Return order must be deterministic (sorted by name) so downstream report ordering is stable. +- [ ] Unit tests for registry (register/get/all, duplicate registration is an error) and detection (each manifest file → correct language, multi-language repos return multiple, empty repo returns empty). + +### A2. Extract Go → `goanalyzer` + +- [ ] Create `internal/lang/goanalyzer/` package. +- [ ] Move the three duplicate `funcName` helpers (`sizes.go`, `complexity.go`, `churn.go`) into `internal/lang/goanalyzer/parse.go` as a single helper. +- [ ] Implement each of the 9 interfaces in `goanalyzer/` (one file per concern; filenames from `MULTI_LANGUAGE_SUPPORT.md` §Resulting directory structure). +- [ ] `goanalyzer/goanalyzer.go` exposes a `Language` struct and an `init()` that calls `lang.Register(&Language{})`. +- [ ] Blank-import `_ "github.com/0xPolygon/diffguard/internal/lang/goanalyzer"` in `cmd/diffguard/main.go`. + +### A3. Parameterize the diff parser + +- [ ] Replace `isAnalyzableGoFile` (`internal/diff/diff.go:175-177`) with a `FileFilter` parameter. +- [ ] Replace hardcoded `--'*.go'` arg (`internal/diff/diff.go:92`) with globs from `FileFilter.DiffGlobs`. +- [ ] Replace the `+++` handler's `.go`/`_test.go` check (`internal/diff/diff.go:201-208`) with `FileFilter.IsTestFile` + extension check. +- [ ] Update `Parse()` and `CollectPaths()` signatures; callers in `cmd/diffguard/main.go` pass the appropriate filter. +- [ ] Keep `parseUnifiedDiff` and `parseHunkHeader` untouched — they're already language-agnostic. + +### A4. Route existing analyzers through the interface + +- [ ] `internal/complexity/complexity.go`: take a `lang.ComplexityCalculator` parameter, delete the embedded AST walk, call `calc.AnalyzeFile(...)` instead. +- [ ] `internal/sizes/sizes.go`: take a `lang.FunctionExtractor`; delegate. +- [ ] `internal/churn/churn.go`: take a `lang.ComplexityScorer`; delete the simplified `computeComplexity` duplicate; keep `git log --oneline --follow` counting (language-agnostic). +- [ ] `internal/deps/`: split into `graph.go` (pure graph math — cycles, afferent/efferent coupling, instability, SDP) and `deps.go` (orchestration taking `lang.ImportResolver`). +- [ ] `internal/mutation/`: route `Analyze` through `MutantGenerator`, `MutantApplier`, `AnnotationScanner`, `TestRunner`. `tiers.go` stays put; `operatorTier` gets new entries for Rust/TS operators (TBD in R/T phases). + +### A5. Regression gate + +- [ ] `go test ./...` green. +- [ ] `diffguard` binary on a self-diff of this repo produces byte-identical output before and after the reorg (record the baseline first). +- [ ] Wall-clock regression <5% on the self-diff. + +--- + +## Part B — Multi-language orchestration [O] + +The "simultaneous" requirement. Lands after A, before R and T. + +### B1. CLI + +- [ ] Add `--language` flag to `cmd/diffguard/main.go`. Default empty → auto-detect. Accepts comma-separated values (`--language rust,typescript`). +- [ ] Error messages cite the detected manifest files to help users debug "why did you pick that language". + +### B2. Orchestration loop + +- [ ] In `run()` (currently `main.go:79-102`), resolve the language set: + - [ ] If `--language` empty: call `lang.Detect(repoPath)`. + - [ ] Else: split the flag and call `lang.Get()` for each; unknown names are a hard error. + - [ ] Empty language set is a hard error with a clear message ("no supported language detected; pass --language to override"). +- [ ] For each resolved language, call `diff.Parse(repoPath, baseBranch, language.FileFilter())` → per-language `diff.Result`. +- [ ] For each `(language, Result)` with non-empty `Files`, run the full analyzer pipeline using the language's interfaces. +- [ ] Merge sections from all languages into the single `report.Report`. No concurrency at this layer — analyzers already parallelize where it matters. + +### B3. Section naming + +- [ ] Section names are suffixed `[]` (e.g., `Complexity [rust]`, `Mutation [typescript]`). `report.Section.Name` is already `string`, so no struct change. +- [ ] Text output groups by language first, then metric, so mixed reports stay readable. +- [ ] JSON output is stable: sections ordered `(language, metric)` lexicographically. + +### B4. Empty-languages behavior + +- [ ] If a detected language has no changed files in the diff, it produces no sections (no empty PASS rows). This matches existing Go behavior (`No Go files found.` early return generalizes to "No \ files found." per language, collapsing to the existing message when only one language is present). + +### B5. Exit-code aggregation + +- [ ] `checkExitCode` unchanged: it already takes a merged `Report` and returns the worst severity. Add a test that a FAIL in any language escalates the whole run. + +### B6. Mixed-repo smoke test + +- [ ] `cmd/diffguard/main_test.go` gains a test using a temp git repo with a Go file and stub Rust/TS files: run `main()` and assert all three language sections appear. (The Rust/TS analyzer impls are stubs at this point — they register, they return empty results. The point of this test is orchestration, not analysis.) + +--- + +## Part C — Rust analyzer [R] + +`internal/lang/rustanalyzer/`. See `MULTI_LANGUAGE_SUPPORT.md` §Rust for parser, complexity, import, and mutation notes. + +### C0. Research prerequisites + +- [ ] Confirm `github.com/smacker/go-tree-sitter/rust` grammar versions support the Rust edition(s) we care about. +- [ ] Decide: integration-test crates under `tests/` treated as test files? Inline `#[cfg(test)] mod tests { ... }` treated as live code? (Design doc recommends: `tests/` = test files, inline modules = live code ignored during analysis.) + +### C1. FileFilter + +- [ ] `.rs` extension. `IsTestFile`: any path segment equal to `tests`. +- [ ] `DiffGlobs`: `*.rs`. +- [ ] Tests: fixtures include `src/lib.rs`, `tests/integration.rs`, `src/foo/bar.rs`; assert expected inclusions/exclusions. + +### C2. FunctionExtractor + +- [ ] Tree-sitter query for `function_item`, `impl_item` → `function_item` (methods), `trait_item` → default methods. +- [ ] Name extraction: standalone `fn foo` → `foo`; `impl Type { fn bar }` → `Type::bar`; `impl Trait for Type { fn baz }` → `Type::baz`. +- [ ] Line range: node start/end lines. File line count from byte count. +- [ ] Filter to functions overlapping `FileChange.Regions`. +- [ ] Tests: each function form, filtering, nested functions (treated as separate). + +### C3. ComplexityCalculator + ComplexityScorer + +- [ ] Base +1 on: `if_expression`, `while_expression`, `for_expression`, `loop_expression`, `match_expression`, `if_let_expression`, `while_let_expression`. +- [ ] +1 per arm of `match_expression` with a guard (the `if` in `pattern if cond =>`). +- [ ] +1 per logical-op token sequence change inside a binary_expression chain (`&&` / `||`). +- [ ] +1 per nesting level for each scope-introducing ancestor. +- [ ] Do **not** count: `?` operator, `unsafe` blocks. +- [ ] `ComplexityScorer` reuses `ComplexityCalculator` (fast enough). +- [ ] Tests: empty fn (0), `match` with N guarded arms (N), nested `if let` inside `for`, logical chains. + +### C4. ImportResolver + +- [ ] `DetectModulePath`: parse `Cargo.toml` `[package] name`. +- [ ] `ScanPackageImports`: find `use_declaration` nodes. Internal iff the path starts with `crate::`, `self::`, or `super::`. Also treat `mod foo;` declarations as an edge to the child module. +- [ ] Map discovered paths back to package directories so the graph uses directory-level nodes consistent with Go's behavior. +- [ ] Tests: crate root detection, relative-path resolution (`super::foo`), external imports filtered out. + +### C5. AnnotationScanner + +- [ ] Scan `line_comment` tokens for `mutator-disable-next-line` and `mutator-disable-func`. +- [ ] Function ranges sourced from C2 so `mutator-disable-func` can expand to every line in the fn. +- [ ] Tests: next-line, func-wide, unrelated comments ignored, disabled-line map is complete. + +### C6. MutantGenerator + +- [ ] Canonical operators (names from `MULTI_LANGUAGE_SUPPORT.md` §MutantGenerator): + - [ ] `conditional_boundary`: `>` / `>=` / `<` / `<=` swaps. + - [ ] `negate_conditional`: `==` / `!=` swap; relational flips. + - [ ] `math_operator`: `+` / `-`, `*` / `/` swaps. + - [ ] `return_value`: replace return with `Default::default()` / `None` when the return type is an `Option` / unit. + - [ ] `boolean_substitution`: `true` / `false` swap. + - [ ] `branch_removal`: empty `if` body. + - [ ] `statement_deletion`: remove bare expression statements. +- [ ] Skip `incdec` (Rust has no `++` / `--`). +- [ ] Rust-specific additions: + - [ ] `unwrap_removal` (Tier 1 via `operatorTier` override): strip `.unwrap()` / `.expect(...)`. Register in `internal/mutation/tiers.go`. + - [ ] `some_to_none` (Tier 1): `Some(x)` → `None`. + - [ ] `question_mark_removal` (Tier 2): strip trailing `?`. Register in tiers. +- [ ] Filter mutants to changed regions; exclude disabled lines. +- [ ] Tests: each operator produces the expected mutant, out-of-range skipped, disabled lines honored. + +### C7. MutantApplier + +- [ ] Text-based application using node byte ranges from the CST. Tree-sitter gives us exact byte offsets; simpler than re-rendering the tree. +- [ ] After application, re-parse with tree-sitter and assert no syntax errors; return `nil` if the mutated source doesn't parse (silently skip corrupt mutants rather than running broken tests). +- [ ] Tests: each mutation type applied, re-parse check catches malformed output. + +### C8. TestRunner + +- [ ] Temp-copy isolation strategy (from `MULTI_LANGUAGE_SUPPORT.md` §Mutation isolation). +- [ ] Per-file `sync.Mutex` map so concurrent mutations on the same file serialize but different files run in parallel. +- [ ] Test command: `cargo test` with `CARGO_INCREMENTAL=0`. Honor `TestRunConfig.TestPattern` (pass as positional filter). +- [ ] Kill original file from a backup on restore; panic-safe via `defer`. +- [ ] Honor `TestRunConfig.Timeout` via `exec.CommandContext`. +- [ ] Tests: killed mutant (test fails → killed), survived (test passes → survived), timeout, crash-during-run leaves source restored (simulate via deliberate panic in a helper test). + +### C9. Register + wire-up + +- [ ] `rustanalyzer/rustanalyzer.go`: `Language` struct, `Name() string { return "rust" }`, `init()` calling `lang.Register`. +- [ ] Blank import in `cmd/diffguard/main.go`. + +--- + +## Part D — TypeScript analyzer [T] + +`internal/lang/tsanalyzer/`. See `MULTI_LANGUAGE_SUPPORT.md` §TypeScript for parser and operator notes. + +### D0. Research prerequisites + +- [ ] `github.com/smacker/go-tree-sitter/typescript/typescript` for `.ts`, `.../typescript/tsx` for `.tsx`. Use the grammar matching the file extension. +- [ ] Test runner detection: parse `package.json` devDependencies — prefer `vitest`, then `jest`, then fall back to `npm test`. + +### D1. FileFilter + +- [ ] Extensions: `.ts`, `.tsx`. Deliberately exclude `.js`, `.jsx`, `.mjs`, `.cjs` for now (JS-only repos out of scope). +- [ ] `IsTestFile`: suffixes `.test.ts`, `.test.tsx`, `.spec.ts`, `.spec.tsx`; any path segment `__tests__` or `__mocks__`. +- [ ] `DiffGlobs`: `*.ts`, `*.tsx`. +- [ ] Tests: glob matches, test-file exclusion, `utils.test-helper.ts` is NOT a test file (edge case). + +### D2. FunctionExtractor + +- [ ] Tree-sitter queries for: `function_declaration`, `method_definition`, `arrow_function` assigned to `variable_declarator`, `function` expressions assigned similarly, `generator_function`. +- [ ] Name extraction: `ClassName.method`, `functionName`, arrow assigned to `const x = () =>` → `x`. +- [ ] Line ranges, filtering, file LOC. +- [ ] Tests: each form, class methods (including static + private), nested arrow functions, exported vs. local. + +### D3. ComplexityCalculator + ComplexityScorer + +- [ ] Base +1 on: `if_statement`, `for_statement`, `for_in_statement`, `for_of_statement`, `while_statement`, `switch_statement`, `try_statement`, `ternary_expression`. +- [ ] +1 per `catch_clause`; +1 per `else` branch; +1 per `case` with content (empty fall-through cases don't count). +- [ ] +1 per `.catch(` promise-chain method call (string-match on identifier to avoid CST depth). +- [ ] +1 per `&&` / `||` run change. +- [ ] Do **not** count: optional chaining `?.`, nullish coalescing `??`, `await` alone, `async` keyword, stream method calls. +- [ ] Tests: ternary nest, `try/catch/finally`, logical chains, optional chaining ignored. + +### D4. ImportResolver + +- [ ] `DetectModulePath`: parse `package.json` `name` field. +- [ ] `ScanPackageImports`: `import` and `require(...)`. Internal iff the specifier starts with `.` or a registered project alias (`@/`, `~/`). Resolve relative paths against the source file's directory, fold to dir-level for the graph. +- [ ] Tests: internal vs. external classification, relative resolution, barrel re-exports count as one edge. + +### D5. AnnotationScanner + +- [ ] `// mutator-disable-next-line` and `// mutator-disable-func` comments. +- [ ] Function ranges from D2 for func-scope disables. +- [ ] Tests: same shape as Rust's C5. + +### D6. MutantGenerator + +- [ ] Canonical operators: `conditional_boundary`, `negate_conditional` (include `===` / `!==`), `math_operator`, `return_value` (use `null` / `undefined` appropriately), `boolean_substitution`, `incdec` (JS/TS has `++` / `--`), `branch_removal`, `statement_deletion`. +- [ ] TS-specific additions — register in `internal/mutation/tiers.go`: + - [ ] `strict_equality` (Tier 1): flip `===` ↔ `==` and `!==` ↔ `!=`. + - [ ] `nullish_to_logical_or` (Tier 2): `??` → `||`. + - [ ] `optional_chain_removal` (Tier 2): `foo?.bar` → `foo.bar`. +- [ ] Filter to changed regions, skip disabled lines. +- [ ] Tests: each operator emits mutants; TS-specific operators exercised. + +### D7. MutantApplier + +- [ ] Same text-based strategy as Rust's C7. Re-parse check after mutation. +- [ ] Tests: each mutation applied, re-parse catches corrupt output. + +### D8. TestRunner + +- [ ] Temp-copy + per-file lock, identical to Rust. +- [ ] Command selection by detected runner (vitest / jest / npm test). Compose with `--testPathPattern` or `-t` honoring `TestPattern`. +- [ ] Honor `TestRunConfig.Timeout`. +- [ ] Set `CI=true` to suppress interactive prompts. +- [ ] Tests: killed, survived, timeout, restoration after crash. + +### D9. Register + wire-up + +- [ ] `tsanalyzer/tsanalyzer.go`: `Language` with `Name() string { return "typescript" }`, `init()` calls `lang.Register`. +- [ ] Blank import in `cmd/diffguard/main.go`. + +--- + +## Part E — Integration & verification [X] + +### E1. Mixed-repo end-to-end + +- [ ] Fixture at `cmd/diffguard/testdata/mixed-repo/` containing a minimal Cargo crate, a minimal TS package, and (for completeness) a Go file. +- [ ] End-to-end test invoking the built binary (`go build` then `exec`) against the fixture. Assert each language's sections appear with correct suffixes. +- [ ] Negative control: same fixture stripped of violations must produce `WorstSeverity() == PASS`. + +### E2. CI + +- [ ] Extend `.github/workflows/` to install Rust (`rustup`) and Node (for test runners) before running the eval suites. +- [ ] Add `make eval-rust`, `make eval-ts`, `make eval-mixed` targets wrapping the eval Go tests with the right env (e.g., `CARGO_INCREMENTAL=0`, `CI=true`). +- [ ] Cache Cargo and npm artifacts so CI stays fast. + +### E3. README + docs + +- [ ] Update `README.md` top section: tagline no longer says Go-only; list supported languages. +- [ ] Add a per-language "Install" subsection (required toolchain: Rust + cargo, Node + npm). +- [ ] Add `--language` to the CLI reference. +- [ ] Document annotation syntax per language. +- [ ] Cross-link from `README.md` to this checklist and to `MULTI_LANGUAGE_SUPPORT.md`. + +--- + +## Evaluation suite [EVAL] — does diffguard actually catch real issues + +Structural tests (Parts A–E) prove the plumbing works. This section proves the analyzers produce correct verdicts on real, seeded problems. Every case is a **positive / negative control pair**: the positive must be flagged with the right severity, the negative must pass. Negative controls are the firewall against rubber-stamping. + +### EVAL-1. Harness + +- [ ] `internal/lang/analyzer/evaldata/` holds fixtures. +- [ ] `eval_test.go` in each analyzer package runs the full pipeline (built binary, full CLI path) against each fixture and diff-compares emitted findings to `expected.json`. +- [ ] Comparison is semantic (file + function + severity), not byte-for-byte, so cosmetic line shifts don't break the eval. +- [ ] Eval runs are deterministic: `--mutation-sample-rate 100`, fixed `--mutation-workers`, a stable seed for any randomized orderings. +- [ ] Each fixture directory has a `README.md` documenting the seeded issue and the expected verdict. + +### EVAL-2. Rust cases + +- [ ] **complexity**: + - Positive `complex_positive.rs`: nested `match` + `if let` + guarded arms, cognitive ≥11 → section FAIL with finding on that fn. + - Negative `complex_negative.rs`: same behavior split into helpers, each <10 → section PASS, zero findings. +- [ ] **sizes (function)**: + - Positive: single `fn` >50 lines → FAIL. + - Negative: same behavior factored across fns, each <50 → PASS. +- [ ] **sizes (file)**: + - Positive: `large_file.rs` >500 LOC → FAIL. + - Negative: <500 LOC → PASS. +- [ ] **deps (cycle)**: + - Positive: `a.rs` ↔ `b.rs` → FAIL with cycle finding. + - Negative: same modules with a shared `types.rs` breaking the cycle → PASS. +- [ ] **deps (SDP)**: + - Positive: unstable concrete module imported by stable abstract one → WARN/FAIL per current SDP severity. + - Negative: reversed dependency direction → PASS. +- [ ] **churn**: + - Positive `hot_complex.rs` with a baked `.git` dir showing 8+ commits on a complex fn → finding present. + - Negative `hot_simple.rs` same commit count, trivial fn → no finding. +- [ ] **mutation (kill)**: + - Positive `well_tested.rs`: arithmetic fn + tests covering boundary and sign → Tier-1 ≥90% → PASS. + - Negative `untested.rs`: same fn, test covers only one branch → Tier-1 <90% → FAIL. +- [ ] **mutation (Rust-specific operator)**: + - Positive: `unwrap_removal` / `some_to_none` on a tested fn is killed; on an untested fn survives. + - Proof that the operator adds signal, not noise. +- [ ] **mutation (annotation respect)**: + - Positive `# mutator-disable-func` suppresses all mutants in that fn. + - Negative (same file, annotation removed) regenerates them. + +### EVAL-3. TypeScript cases + +- [ ] **complexity**: + - Positive `complex_positive.ts`: nested ternaries + try/catch + `&&`/`||` chains ≥11 → FAIL. + - Negative `complex_negative.ts`: refactored into named helpers → PASS. +- [ ] **sizes (function)**: + - Positive: arrow fn assigned to `const` >50 LOC → FAIL. + - Negative: same logic across named exports → PASS. +- [ ] **sizes (file)**: + - Positive `large_file.ts` >500 LOC → FAIL. + - Negative: split across files → PASS. +- [ ] **deps (cycle)**: + - Positive `a.ts` ↔ `b.ts` → FAIL. + - Negative: shared `types.ts` breaking cycle → PASS. +- [ ] **deps (internal vs external)**: + - Positive: `./foo` appears in internal graph; `import 'lodash'` does NOT. + - Assert directly on the graph shape, not just pass/fail. +- [ ] **churn**: + - Positive `hot_complex.ts` with seeded history → finding. + - Negative `hot_simple.ts` same history → no finding. +- [ ] **mutation (kill, with configured runner)**: + - Positive: `arithmetic.ts` + tests covering boundary + sign → Tier-1 ≥90% → PASS. + - Negative: same fn, test covers one branch → Tier-1 <90% → FAIL. +- [ ] **mutation (TS-specific operators)**: + - Positive: `strict_equality` flip killed by tests that rely on strict equality; `nullish_to_logical_or` killed by tests that distinguish `null` from `undefined`. + - Negative: same operators survive when the test only asserts non-distinguishing inputs. Confirms the operators generate meaningful mutants, not noise. +- [ ] **mutation (annotation respect)**: + - Positive `// mutator-disable-next-line` suppresses the next-line mutant. + - Negative: annotation removed, mutant regenerated. + +### EVAL-4. Cross-cutting + +- [ ] **Mixed-repo severity propagation**: + - Rust FAIL + TS PASS → overall FAIL; TS section independently reports PASS. + - Flip: Rust PASS + TS FAIL → overall FAIL; Rust section independently reports PASS. + - Proves language sections don't contaminate each other. +- [ ] **Mutation concurrency safety**: + - Fixture with 3+ Rust and 3+ TS files, each with multiple mutants. Run `--mutation-workers 4`. + - Assert `git status --porcelain` is empty after the run (no temp-copy corruption). + - Assert repeated runs produce identical reports. + - Sweep `--mutation-workers` 1, 2, 4, 8 and assert report stability. +- [ ] **Disabled-line respect under concurrency**: + - A file with `mutator-disable-func` on one fn and live code on another, `--mutation-workers 4`. + - Assert zero mutants generated for the disabled fn; live fn's mutants execute. +- [ ] **False-positive ceiling**: + - Known-clean fixture (well-tested small Rust crate + well-tested small TS module) → `WorstSeverity() == PASS`, zero FAIL findings across all analyzers. + - This is the "does it cry wolf" gate. + +### EVAL-5. Pre-flight calibration (pre-ship) + +- [ ] Rust: run the built diffguard against two open-source crates (one small, one mid-sized). Triage every FAIL and WARN. If >20% are noise, iterate on thresholds/detection before declaring Rust support shipped. +- [ ] TypeScript: repeat with one app and one library project. +- [ ] Record triage findings in this document under a "Baseline noise rate" appendix so future changes know what "good" looks like. + +--- + +## Execution order summary + +``` +A (foundation) ──► B (orchestration) ──┬──► C (Rust) ──┬──► E (integration + CI) + └──► D (TypeScript) ──┘ + │ + └──► EVAL runs alongside C/D, per analyzer +``` + +Parts C and D are disjoint packages and can be implemented in parallel by separate agents / PRs, rebased onto the B branch. Part E holds the merge point and the final evaluation gate. + +--- + +## Sign-off criteria + +Before calling this done: + +- [ ] All checklist items above checked. +- [ ] `go test ./...` green. +- [ ] `make eval-rust`, `make eval-ts`, `make eval-mixed` all green in CI. +- [ ] Pre-flight calibration triage documented with <20% noise rate per language. +- [ ] README reflects multi-language support with install instructions for each toolchain. +- [ ] `diffguard` run on this repo's own HEAD produces identical output before and after the reorg (the Go path must be byte-stable). diff --git a/internal/churn/churn.go b/internal/churn/churn.go index 9d5fd8d..c3be9a1 100644 --- a/internal/churn/churn.go +++ b/internal/churn/churn.go @@ -1,11 +1,13 @@ +// Package churn cross-references git log with per-function complexity scores +// using a language-supplied lang.ComplexityScorer. The AST-level work lives +// in the language back-end (for Go: goanalyzer/complexity.go); this file +// owns the git log counting (which is language-agnostic) and the severity +// derivation. package churn import ( "bufio" "fmt" - "go/ast" - "go/parser" - "go/token" "os/exec" "path/filepath" "sort" @@ -13,6 +15,7 @@ import ( "strings" "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" "github.com/0xPolygon/diffguard/internal/report" ) @@ -26,10 +29,14 @@ type FunctionChurn struct { Score float64 } -// Analyze cross-references git log with cognitive complexity for changed functions. -func Analyze(repoPath string, d *diff.Result, complexityThreshold int) (report.Section, error) { +// Analyze cross-references git log with per-function complexity scores for +// the diff's changed files. +func Analyze(repoPath string, d *diff.Result, complexityThreshold int, scorer lang.ComplexityScorer) (report.Section, error) { fileCommits := collectFileCommits(repoPath, d.Files) - results := collectChurnResults(repoPath, d.Files, fileCommits) + results, err := collectChurnResults(repoPath, d.Files, fileCommits, scorer) + if err != nil { + return report.Section{}, err + } return buildSection(results, complexityThreshold), nil } @@ -41,49 +48,37 @@ func collectFileCommits(repoPath string, files []diff.FileChange) map[string]int return commits } -func collectChurnResults(repoPath string, files []diff.FileChange, fileCommits map[string]int) []FunctionChurn { +func collectChurnResults(repoPath string, files []diff.FileChange, fileCommits map[string]int, scorer lang.ComplexityScorer) ([]FunctionChurn, error) { var results []FunctionChurn for _, fc := range files { - results = append(results, analyzeFileChurn(repoPath, fc, fileCommits[fc.Path])...) + fnResults, err := analyzeFileChurn(repoPath, fc, fileCommits[fc.Path], scorer) + if err != nil { + return nil, fmt.Errorf("analyzing %s: %w", fc.Path, err) + } + results = append(results, fnResults...) } - return results + return results, nil } -func analyzeFileChurn(repoPath string, fc diff.FileChange, commits int) []FunctionChurn { +func analyzeFileChurn(repoPath string, fc diff.FileChange, commits int, scorer lang.ComplexityScorer) ([]FunctionChurn, error) { absPath := filepath.Join(repoPath, fc.Path) - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, absPath, nil, 0) + scores, err := scorer.ScoreFile(absPath, fc) if err != nil { - return nil + return nil, err } - var results []FunctionChurn - ast.Inspect(f, func(n ast.Node) bool { - fn, ok := n.(*ast.FuncDecl) - if !ok { - return true - } - - startLine := fset.Position(fn.Pos()).Line - endLine := fset.Position(fn.End()).Line - - if !fc.OverlapsRange(startLine, endLine) { - return false - } - - complexity := computeComplexity(fn.Body) + results := make([]FunctionChurn, 0, len(scores)) + for _, s := range scores { results = append(results, FunctionChurn{ - File: fc.Path, - Line: startLine, - Name: funcName(fn), + File: s.File, + Line: s.Line, + Name: s.Name, Commits: commits, - Complexity: complexity, - Score: float64(commits) * float64(complexity), + Complexity: s.Complexity, + Score: float64(commits) * float64(s.Complexity), }) - - return false - }) - return results + } + return results, nil } // countFileCommits counts the total number of commits that touched a file. @@ -100,52 +95,9 @@ func countFileCommits(repoPath, filePath string) int { for scanner.Scan() { count++ } - - return count -} - -// computeComplexity is a simplified cognitive complexity counter. -func computeComplexity(body *ast.BlockStmt) int { - if body == nil { - return 0 - } - var count int - ast.Inspect(body, func(n ast.Node) bool { - switch n.(type) { - case *ast.IfStmt: - count++ - case *ast.ForStmt, *ast.RangeStmt: - count++ - case *ast.SwitchStmt, *ast.TypeSwitchStmt, *ast.SelectStmt: - count++ - case *ast.BinaryExpr: - bin := n.(*ast.BinaryExpr) - if bin.Op == token.LAND || bin.Op == token.LOR { - count++ - } - } - return true - }) return count } -func funcName(fn *ast.FuncDecl) string { - if fn.Recv != nil && len(fn.Recv.List) > 0 { - recv := fn.Recv.List[0] - var typeName string - switch t := recv.Type.(type) { - case *ast.StarExpr: - if ident, ok := t.X.(*ast.Ident); ok { - typeName = ident.Name - } - case *ast.Ident: - typeName = t.Name - } - return fmt.Sprintf("(%s).%s", typeName, fn.Name.Name) - } - return fn.Name.Name -} - func collectChurnFindings(results []FunctionChurn, complexityThreshold int) ([]report.Finding, int) { var findings []report.Finding var warnCount int diff --git a/internal/churn/churn_test.go b/internal/churn/churn_test.go index 2e367ad..acfa3e1 100644 --- a/internal/churn/churn_test.go +++ b/internal/churn/churn_test.go @@ -1,63 +1,23 @@ package churn import ( - "go/ast" - "go/parser" - "go/token" "os" "path/filepath" "testing" "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" + _ "github.com/0xPolygon/diffguard/internal/lang/goanalyzer" "github.com/0xPolygon/diffguard/internal/report" ) -func TestComputeComplexity(t *testing.T) { - tests := []struct { - name string - code string - expected int - }{ - {"empty", `package p; func f() {}`, 0}, - {"single if", `package p; func f(x int) { if x > 0 {} }`, 1}, - {"for loop", `package p; func f() { for i := 0; i < 10; i++ {} }`, 1}, - {"switch", `package p; func f(x int) { switch x { case 1: } }`, 1}, - {"range", `package p; func f(s []int) { for range s {} }`, 1}, - {"select", `package p; func f(c chan int) { select { case <-c: } }`, 1}, - {"type switch", `package p; func f(x any) { switch x.(type) { case int: } }`, 1}, - {"logical and", `package p; func f(a, b bool) { if a && b {} }`, 2}, - {"logical or", `package p; func f(a, b bool) { if a || b {} }`, 2}, - {"nested", `package p; func f(x int) { if x > 0 { for x > 0 {} } }`, 2}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, "test.go", tt.code, 0) - if err != nil { - t.Fatalf("parse error: %v", err) - } - - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - fn = fd - break - } - } - - got := computeComplexity(fn.Body) - if got != tt.expected { - t.Errorf("computeComplexity = %d, want %d", got, tt.expected) - } - }) - } -} - -func TestComputeComplexity_NilBody(t *testing.T) { - if got := computeComplexity(nil); got != 0 { - t.Errorf("computeComplexity(nil) = %d, want 0", got) +func goScorer(t *testing.T) lang.ComplexityScorer { + t.Helper() + l, ok := lang.Get("go") + if !ok { + t.Fatal("go language not registered") } + return l.ComplexityScorer() } func TestCollectChurnFindings(t *testing.T) { @@ -81,7 +41,6 @@ func TestCollectChurnFindings(t *testing.T) { } func TestCollectChurnFindings_LimitExceeds(t *testing.T) { - // Fewer results than limit of 10 results := []FunctionChurn{ {File: "a.go", Score: 5, Commits: 1, Complexity: 5}, } @@ -92,16 +51,14 @@ func TestCollectChurnFindings_LimitExceeds(t *testing.T) { } func TestCollectChurnFindings_BoundaryCondition(t *testing.T) { - // Exactly at threshold — should NOT warn results := []FunctionChurn{ {File: "a.go", Score: 60, Commits: 6, Complexity: 10}, } _, warnCount := collectChurnFindings(results, 10) if warnCount != 0 { - t.Errorf("warnCount = %d, want 0 (complexity at threshold, not over)", warnCount) + t.Errorf("warnCount = %d, want 0", warnCount) } - // Over threshold and commits > 5 — should warn results2 := []FunctionChurn{ {File: "a.go", Score: 66, Commits: 6, Complexity: 11}, } @@ -171,29 +128,6 @@ func TestFormatTopScore(t *testing.T) { } } -func TestFuncName(t *testing.T) { - tests := []struct { - code string - expected string - }{ - {`package p; func Foo() {}`, "Foo"}, - {`package p; type T struct{}; func (t T) Bar() {}`, "(T).Bar"}, - {`package p; type T struct{}; func (t *T) Baz() {}`, "(T).Baz"}, - } - - for _, tt := range tests { - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", tt.code, 0) - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - if got := funcName(fd); got != tt.expected { - t.Errorf("funcName = %q, want %q", got, tt.expected) - } - } - } - } -} - func TestAnalyzeFileChurn(t *testing.T) { code := `package test @@ -218,7 +152,10 @@ func complex_fn(a, b int) int { Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, } - results := analyzeFileChurn(dir, fc, 5) + results, err := analyzeFileChurn(dir, fc, 5, goScorer(t)) + if err != nil { + t.Fatalf("analyzeFileChurn: %v", err) + } if len(results) != 2 { t.Fatalf("expected 2 results, got %d", len(results)) } @@ -232,25 +169,10 @@ func complex_fn(a, b int) int { } } -func TestAnalyzeFileChurn_ParseError(t *testing.T) { - dir := t.TempDir() - fc := diff.FileChange{ - Path: "nonexistent.go", - Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 10}}, - } - - results := analyzeFileChurn(dir, fc, 0) - if results != nil { - t.Error("expected nil for parse error") - } -} - func TestCollectFileCommits(t *testing.T) { - // Use the actual repo to test files := []diff.FileChange{ {Path: "internal/churn/churn.go"}, } - // This will either work or return 0, both are valid commits := collectFileCommits("../..", files) if commits == nil { t.Error("expected non-nil map") @@ -271,7 +193,10 @@ func f() {} } commits := map[string]int{"test.go": 3} - results := collectChurnResults(dir, files, commits) + results, err := collectChurnResults(dir, files, commits, goScorer(t)) + if err != nil { + t.Fatalf("collectChurnResults: %v", err) + } if len(results) != 1 { t.Fatalf("expected 1 result, got %d", len(results)) } @@ -279,3 +204,52 @@ func f() {} t.Errorf("commits = %d, want 3", results[0].Commits) } } + +// erroringScorer returns a canned error so the error-handling branches in +// collectChurnResults and Analyze run. +type erroringScorer struct{ err error } + +func (s erroringScorer) ScoreFile(absPath string, fc diff.FileChange) ([]lang.FunctionComplexity, error) { + return nil, s.err +} + +// TestCollectChurnResults_PropagatesError asserts the scorer error escapes +// the aggregation helper so the caller can react. +func TestCollectChurnResults_PropagatesError(t *testing.T) { + files := []diff.FileChange{ + {Path: "nope.go", Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 1}}}, + } + _, err := collectChurnResults(t.TempDir(), files, map[string]int{"nope.go": 0}, + erroringScorer{err: errTest}) + if err == nil { + t.Fatal("expected scorer error to propagate") + } + if !containsAnalyzingPrefix(err.Error()) { + t.Errorf("error %q should be wrapped with file context", err) + } +} + +// TestAnalyze_PropagatesScorerError pins that the top-level Analyze wraps +// scorer errors rather than swallowing them — covers the `if err != nil` +// branch in Analyze. +func TestAnalyze_PropagatesScorerError(t *testing.T) { + d := &diff.Result{Files: []diff.FileChange{ + {Path: "x.go", Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 1}}}, + }} + _, err := Analyze(t.TempDir(), d, 10, erroringScorer{err: errTest}) + if err == nil { + t.Fatal("expected Analyze to return scorer error") + } +} + +// errTest is a sentinel error value the helpers above return. +var errTest = testErr("scorer boom") + +type testErr string + +func (e testErr) Error() string { return string(e) } + +func containsAnalyzingPrefix(s string) bool { + prefix := "analyzing " + return len(s) >= len(prefix) && s[:len(prefix)] == prefix +} diff --git a/internal/complexity/complexity.go b/internal/complexity/complexity.go index 9bf4252..c3bf9a8 100644 --- a/internal/complexity/complexity.go +++ b/internal/complexity/complexity.go @@ -1,235 +1,43 @@ +// Package complexity runs a language's ComplexityCalculator across a diff's +// changed files and formats the results into a report.Section. +// +// All AST-level work happens in the language back-end (for Go: +// internal/lang/goanalyzer/complexity.go). This package is now a thin +// orchestrator — threshold check, severity derivation, per-language stats +// summary — so new languages inherit the analyzer for free by implementing +// lang.ComplexityCalculator. package complexity import ( "fmt" - "go/ast" - "go/parser" - "go/token" "math" "path/filepath" "sort" "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" "github.com/0xPolygon/diffguard/internal/report" ) -// FunctionComplexity holds the complexity result for a single function. -type FunctionComplexity struct { - File string - Line int - Name string - Complexity int -} - -// Analyze computes cognitive complexity for all functions in changed regions of the diff. -func Analyze(repoPath string, d *diff.Result, threshold int) (report.Section, error) { - var results []FunctionComplexity - +// Analyze computes cognitive complexity for all functions in the diff's +// changed regions using the supplied language calculator, then produces the +// "Cognitive Complexity" report section. Parse errors are swallowed at the +// calculator layer (returning nil) so a single malformed file doesn't fail +// the whole run. +func Analyze(repoPath string, d *diff.Result, threshold int, calc lang.ComplexityCalculator) (report.Section, error) { + var results []lang.FunctionComplexity for _, fc := range d.Files { - results = append(results, analyzeFile(repoPath, fc)...) - } - - return buildSection(results, threshold), nil -} - -func analyzeFile(repoPath string, fc diff.FileChange) []FunctionComplexity { - absPath := filepath.Join(repoPath, fc.Path) - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, absPath, nil, 0) - if err != nil { - return nil - } - - var results []FunctionComplexity - ast.Inspect(f, func(n ast.Node) bool { - fn, ok := n.(*ast.FuncDecl) - if !ok { - return true - } - - startLine := fset.Position(fn.Pos()).Line - endLine := fset.Position(fn.End()).Line - - if !fc.OverlapsRange(startLine, endLine) { - return false - } - - results = append(results, FunctionComplexity{ - File: fc.Path, - Line: startLine, - Name: funcName(fn), - Complexity: computeComplexity(fn.Body), - }) - - return false - }) - return results -} - -// computeComplexity calculates cognitive complexity of a function body. -func computeComplexity(body *ast.BlockStmt) int { - if body == nil { - return 0 - } - return walkBlock(body.List, 0) -} - -func walkBlock(stmts []ast.Stmt, nesting int) int { - total := 0 - for _, stmt := range stmts { - total += walkStmt(stmt, nesting) - } - return total -} - -func walkStmt(stmt ast.Stmt, nesting int) int { - switch s := stmt.(type) { - case *ast.IfStmt: - return walkIfStmt(s, nesting) - case *ast.ForStmt: - return walkForStmt(s, nesting) - case *ast.RangeStmt: - return 1 + nesting + walkBlock(s.Body.List, nesting+1) - case *ast.SwitchStmt: - return 1 + nesting + walkBlock(s.Body.List, nesting+1) - case *ast.TypeSwitchStmt: - return 1 + nesting + walkBlock(s.Body.List, nesting+1) - case *ast.SelectStmt: - return 1 + nesting + walkBlock(s.Body.List, nesting+1) - case *ast.CaseClause: - return walkBlock(s.Body, nesting) - case *ast.CommClause: - return walkBlock(s.Body, nesting) - case *ast.BlockStmt: - return walkBlock(s.List, nesting) - case *ast.LabeledStmt: - return walkStmt(s.Stmt, nesting) - case *ast.AssignStmt: - return walkExprsForFuncLit(s.Rhs, nesting) - case *ast.ExprStmt: - return walkExprForFuncLit(s.X, nesting) - case *ast.ReturnStmt: - return walkExprsForFuncLit(s.Results, nesting) - case *ast.GoStmt: - return walkExprForFuncLit(s.Call.Fun, nesting) - case *ast.DeferStmt: - return walkExprForFuncLit(s.Call.Fun, nesting) - } - return 0 -} - -func walkIfStmt(s *ast.IfStmt, nesting int) int { - total := 1 + nesting - total += countLogicalOps(s.Cond) - if s.Init != nil { - total += walkStmt(s.Init, nesting) - } - total += walkBlock(s.Body.List, nesting+1) - if s.Else != nil { - total += walkElseChain(s.Else, nesting) - } - return total -} - -func walkForStmt(s *ast.ForStmt, nesting int) int { - total := 1 + nesting - if s.Cond != nil { - total += countLogicalOps(s.Cond) - } - total += walkBlock(s.Body.List, nesting+1) - return total -} - -func walkElseChain(node ast.Node, nesting int) int { - switch e := node.(type) { - case *ast.IfStmt: - total := 1 - total += countLogicalOps(e.Cond) - if e.Init != nil { - total += walkStmt(e.Init, nesting) + absPath := filepath.Join(repoPath, fc.Path) + fnResults, err := calc.AnalyzeFile(absPath, fc) + if err != nil { + return report.Section{}, fmt.Errorf("analyzing %s: %w", fc.Path, err) } - total += walkBlock(e.Body.List, nesting+1) - if e.Else != nil { - total += walkElseChain(e.Else, nesting) - } - return total - case *ast.BlockStmt: - return 1 + walkBlock(e.List, nesting+1) - } - return 0 -} - -func walkExprsForFuncLit(exprs []ast.Expr, nesting int) int { - total := 0 - for _, expr := range exprs { - total += walkExprForFuncLit(expr, nesting) + results = append(results, fnResults...) } - return total -} - -func walkExprForFuncLit(expr ast.Expr, nesting int) int { - total := 0 - ast.Inspect(expr, func(n ast.Node) bool { - if fl, ok := n.(*ast.FuncLit); ok { - total += walkBlock(fl.Body.List, nesting+1) - return false - } - return true - }) - return total -} - -// countLogicalOps counts sequences of && and || in an expression. -func countLogicalOps(expr ast.Expr) int { - if expr == nil { - return 0 - } - ops := flattenLogicalOps(expr) - if len(ops) == 0 { - return 0 - } - count := 1 - for i := 1; i < len(ops); i++ { - if ops[i] != ops[i-1] { - count++ - } - } - return count -} - -func flattenLogicalOps(expr ast.Expr) []token.Token { - bin, ok := expr.(*ast.BinaryExpr) - if !ok { - return nil - } - if bin.Op != token.LAND && bin.Op != token.LOR { - return nil - } - var ops []token.Token - ops = append(ops, flattenLogicalOps(bin.X)...) - ops = append(ops, bin.Op) - ops = append(ops, flattenLogicalOps(bin.Y)...) - return ops -} - -func funcName(fn *ast.FuncDecl) string { - if fn.Recv != nil && len(fn.Recv.List) > 0 { - recv := fn.Recv.List[0] - var typeName string - switch t := recv.Type.(type) { - case *ast.StarExpr: - if ident, ok := t.X.(*ast.Ident); ok { - typeName = ident.Name - } - case *ast.Ident: - typeName = t.Name - } - return fmt.Sprintf("(%s).%s", typeName, fn.Name.Name) - } - return fn.Name.Name + return buildSection(results, threshold), nil } -func collectComplexityFindings(results []FunctionComplexity, threshold int) ([]report.Finding, []float64, int) { +func collectComplexityFindings(results []lang.FunctionComplexity, threshold int) ([]report.Finding, []float64, int) { var findings []report.Finding var values []float64 failCount := 0 @@ -258,7 +66,7 @@ func collectComplexityFindings(results []FunctionComplexity, threshold int) ([]r return findings, values, failCount } -func buildSection(results []FunctionComplexity, threshold int) report.Section { +func buildSection(results []lang.FunctionComplexity, threshold int) report.Section { if len(results) == 0 { return report.Section{ Name: "Cognitive Complexity", diff --git a/internal/complexity/complexity_extra_test.go b/internal/complexity/complexity_extra_test.go deleted file mode 100644 index 8a0f82b..0000000 --- a/internal/complexity/complexity_extra_test.go +++ /dev/null @@ -1,552 +0,0 @@ -package complexity - -import ( - "go/ast" - "go/parser" - "go/token" - "os" - "path/filepath" - "testing" - - "github.com/0xPolygon/diffguard/internal/diff" - "github.com/0xPolygon/diffguard/internal/report" -) - -func TestWalkStmt_NestingPenalty(t *testing.T) { - // Nesting penalty must be additive, not subtractive. - // If `1 + nesting` were mutated to `1 - nesting`, nested constructs - // would produce wrong (lower) values. - tests := []struct { - name string - code string - expected int - }{ - { - "range at nesting 1 with body", - `package p; func f(x int) { - if x > 0 { - for range []int{} { - if x > 0 {} - } - } - }`, - // if(1+0) + range(1+1) + inner_if(1+2) = 1 + 2 + 3 = 6 - 6, - }, - { - "switch at nesting 1 with body", - `package p; func f(x int) { - if x > 0 { - switch x { - case 1: - if x > 0 {} - } - } - }`, - // if(1+0) + switch(1+1) + case_if(1+2) = 1 + 2 + 3 = 6 - 6, - }, - { - "select at nesting 1 with body", - `package p; func f(x int, c chan int) { - if x > 0 { - select { - case <-c: - if x > 0 {} - } - } - }`, - // if(1+0) + select(1+1) + case_if(1+2) = 1 + 2 + 3 = 6 - 6, - }, - { - "type switch at nesting 1 with body", - `package p; func f(x int, v any) { - if x > 0 { - switch v.(type) { - case int: - if x > 0 {} - } - } - }`, - // if(1+0) + typeswitch(1+1) + case_if(1+2) = 1 + 2 + 3 = 6 - 6, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, "test.go", tt.code, 0) - if err != nil { - t.Fatalf("parse error: %v", err) - } - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - fn = fd - break - } - } - got := computeComplexity(fn.Body) - if got != tt.expected { - t.Errorf("complexity = %d, want %d", got, tt.expected) - } - }) - } -} - -func TestWalkForStmt_WithLogicalCondition(t *testing.T) { - // Tests that for-loop conditions with logical ops are counted. - // If `s.Cond != nil` were mutated to `s.Cond == nil`, the logical - // ops in the condition would be missed. - code := `package p; func f(a, b bool) { for a && b {} }` - // for(1) + &&(1) = 2 - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, 0) - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - fn = fd - break - } - } - got := computeComplexity(fn.Body) - if got != 2 { - t.Errorf("complexity = %d, want 2 (for + logical op)", got) - } -} - -func TestWalkIfStmt_WithElseChain(t *testing.T) { - code := `package p -func f(x int) { - if x > 0 { - } else if x < 0 { - } else { - } -}` - // if(1) + else if(1) + else(1) = 3 - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, 0) - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - fn = fd - break - } - } - got := computeComplexity(fn.Body) - if got != 3 { - t.Errorf("complexity = %d, want 3", got) - } -} - -func TestWalkIfStmt_WithInit(t *testing.T) { - // Tests that if-init is processed for complexity. - code := `package p -func f() error { - if err := g(); err != nil { - } - return nil -} -func g() error { return nil } -` - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, "test.go", code, 0) - if err != nil { - t.Fatalf("parse error: %v", err) - } - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok && fd.Name.Name == "f" { - fn = fd - break - } - } - got := computeComplexity(fn.Body) - // if(1+0) = 1 (init is an assignment with no control flow) - if got != 1 { - t.Errorf("complexity = %d, want 1", got) - } -} - -func TestWalkElseChain_NestedInit(t *testing.T) { - code := `package p -func f(x int) error { - if x > 0 { - } else if err := g(); err != nil { - } - return nil -} -func g() error { return nil } -` - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, 0) - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok && fd.Name.Name == "f" { - fn = fd - break - } - } - got := computeComplexity(fn.Body) - // if(1) + else-if(1) = 2 - if got != 2 { - t.Errorf("complexity = %d, want 2", got) - } -} - -func TestWalkElseChain_WithNestedBody(t *testing.T) { - // Tests that nesting+1 is correctly applied in walkElseChain's body. - code := `package p -func f(x int) { - if x > 0 { - } else if x < 0 { - if x < -10 { - } - } -}` - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, 0) - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - fn = fd - break - } - } - got := computeComplexity(fn.Body) - // if(1+0) + else-if(1) + nested-if(1+1nesting) = 1 + 1 + 2 = 4 - if got != 4 { - t.Errorf("complexity = %d, want 4", got) - } -} - -func TestBuildSection_StatsValues(t *testing.T) { - results := []FunctionComplexity{ - {File: "a.go", Line: 1, Name: "f1", Complexity: 4}, - {File: "b.go", Line: 1, Name: "f2", Complexity: 8}, - {File: "c.go", Line: 1, Name: "f3", Complexity: 12}, - } - - s := buildSection(results, 10) - - stats := s.Stats.(map[string]any) - if stats["total_functions"] != 3 { - t.Errorf("total_functions = %v, want 3", stats["total_functions"]) - } - if stats["violations"] != 1 { - t.Errorf("violations = %v, want 1", stats["violations"]) - } - // mean = (4+8+12)/3 = 8.0 - if stats["mean"] != 8.0 { - t.Errorf("mean = %v, want 8.0", stats["mean"]) - } - // median of [4,8,12] = 8 - if stats["median"] != 8.0 { - t.Errorf("median = %v, want 8.0", stats["median"]) - } - // max = 12 - if stats["max"] != 12.0 { - t.Errorf("max = %v, want 12.0", stats["max"]) - } -} - -func TestComputeComplexity_NilBody(t *testing.T) { - if got := computeComplexity(nil); got != 0 { - t.Errorf("computeComplexity(nil) = %d, want 0", got) - } -} - -func TestAnalyzeFile(t *testing.T) { - code := `package test - -func simple() { - x := 1 - _ = x -} - -func withIf(a int) { - if a > 0 { - } -} -` - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) - - fc := diff.FileChange{ - Path: "test.go", - Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, - } - - results := analyzeFile(dir, fc) - if len(results) != 2 { - t.Fatalf("expected 2 results, got %d", len(results)) - } - - // simple should have complexity 0 - if results[0].Complexity != 0 { - t.Errorf("simple complexity = %d, want 0", results[0].Complexity) - } - // withIf should have complexity 1 - if results[1].Complexity != 1 { - t.Errorf("withIf complexity = %d, want 1", results[1].Complexity) - } -} - -func TestAnalyzeFile_ParseError(t *testing.T) { - dir := t.TempDir() - fc := diff.FileChange{ - Path: "nonexistent.go", - Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 10}}, - } - - results := analyzeFile(dir, fc) - if results != nil { - t.Error("expected nil for parse error") - } -} - -func TestAnalyzeFile_MultipleFunctions(t *testing.T) { - // If the ast.Inspect callback's `return true` (for non-FuncDecl nodes) - // were mutated to `return false`, only the first function would be found. - code := `package test - -type S struct{} - -func (s S) Method1() { - if true {} -} - -func (s *S) Method2() { - if true {} -} - -func TopLevel() { - if true {} -} -` - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) - - fc := diff.FileChange{ - Path: "test.go", - Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, - } - - results := analyzeFile(dir, fc) - if len(results) != 3 { - t.Errorf("expected 3 functions, got %d", len(results)) - } -} - -func TestAnalyzeFile_OutOfRange(t *testing.T) { - code := `package test - -func f() { - x := 1 - _ = x -} -` - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) - - fc := diff.FileChange{ - Path: "test.go", - Regions: []diff.ChangedRegion{{StartLine: 100, EndLine: 200}}, - } - - results := analyzeFile(dir, fc) - if len(results) != 0 { - t.Errorf("expected 0 results for out-of-range, got %d", len(results)) - } -} - -func TestCollectComplexityFindings(t *testing.T) { - results := []FunctionComplexity{ - {File: "a.go", Line: 1, Name: "low", Complexity: 5}, - {File: "b.go", Line: 1, Name: "high", Complexity: 15}, - {File: "c.go", Line: 1, Name: "medium", Complexity: 10}, - } - - findings, values, failCount := collectComplexityFindings(results, 10) - - if failCount != 1 { - t.Errorf("failCount = %d, want 1", failCount) - } - if len(findings) != 1 { - t.Errorf("findings = %d, want 1", len(findings)) - } - if len(values) != 3 { - t.Errorf("values = %d, want 3", len(values)) - } -} - -func TestCollectComplexityFindings_AtBoundary(t *testing.T) { - results := []FunctionComplexity{ - {File: "a.go", Line: 1, Name: "exact", Complexity: 10}, - {File: "b.go", Line: 1, Name: "over", Complexity: 11}, - } - - _, _, failCount := collectComplexityFindings(results, 10) - if failCount != 1 { - t.Errorf("failCount = %d, want 1 (11 > 10, 10 is not > 10)", failCount) - } -} - -func TestBuildSection_Empty(t *testing.T) { - s := buildSection(nil, 10) - if s.Severity != report.SeverityPass { - t.Errorf("severity = %v, want PASS", s.Severity) - } -} - -func TestBuildSection_WithViolations(t *testing.T) { - results := []FunctionComplexity{ - {File: "a.go", Line: 1, Name: "complex", Complexity: 20}, - {File: "b.go", Line: 1, Name: "simple", Complexity: 3}, - } - - s := buildSection(results, 10) - if s.Severity != report.SeverityFail { - t.Errorf("severity = %v, want FAIL", s.Severity) - } - if len(s.Findings) != 1 { - t.Errorf("findings = %d, want 1", len(s.Findings)) - } -} - -func TestMean(t *testing.T) { - if got := mean(nil); got != 0 { - t.Errorf("mean(nil) = %f, want 0", got) - } - if got := mean([]float64{2, 4, 6}); got != 4 { - t.Errorf("mean([2,4,6]) = %f, want 4", got) - } -} - -func TestMedian(t *testing.T) { - if got := median(nil); got != 0 { - t.Errorf("median(nil) = %f, want 0", got) - } - // Odd count - if got := median([]float64{3, 1, 2}); got != 2 { - t.Errorf("median([3,1,2]) = %f, want 2", got) - } - // Even count - if got := median([]float64{4, 1, 3, 2}); got != 2.5 { - t.Errorf("median([4,1,3,2]) = %f, want 2.5", got) - } -} - -func TestMax(t *testing.T) { - if got := max(nil); got != 0 { - t.Errorf("max(nil) = %f, want 0", got) - } - if got := max([]float64{3, 7, 1, 5}); got != 7 { - t.Errorf("max([3,7,1,5]) = %f, want 7", got) - } -} - -func TestWalkStmt_LabeledStmt(t *testing.T) { - code := `package p -func f(x int) { -outer: - for x > 0 { - _ = x - break outer - } -}` - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, 0) - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - fn = fd - break - } - } - got := computeComplexity(fn.Body) - // labeled for(1) = 1 - if got != 1 { - t.Errorf("complexity = %d, want 1", got) - } -} - -func TestWalkStmt_GoAndDefer(t *testing.T) { - code := `package p -func f() { - go func() { - if true {} - }() - defer func() { - if true {} - }() -}` - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, 0) - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - fn = fd - break - } - } - got := computeComplexity(fn.Body) - // go func: if(1+1nesting) = 2 - // defer func: if(1+1nesting) = 2 - // total = 4 - if got != 4 { - t.Errorf("complexity = %d, want 4", got) - } -} - -func TestWalkStmt_FuncLitInAssign(t *testing.T) { - code := `package p -func f() { - x := func() { - if true {} - } - _ = x -}` - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, 0) - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - fn = fd - break - } - } - got := computeComplexity(fn.Body) - // func lit with if at nesting 1: if(1+1) = 2 - if got != 2 { - t.Errorf("complexity = %d, want 2", got) - } -} - -func TestWalkStmt_FuncLitInReturn(t *testing.T) { - code := `package p -func f() func() { - return func() { - if true {} - } -}` - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, 0) - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - fn = fd - break - } - } - got := computeComplexity(fn.Body) - // return func lit with if at nesting 1: if(1+1) = 2 - if got != 2 { - t.Errorf("complexity = %d, want 2", got) - } -} diff --git a/internal/complexity/complexity_test.go b/internal/complexity/complexity_test.go index 185241b..1f8dcfb 100644 --- a/internal/complexity/complexity_test.go +++ b/internal/complexity/complexity_test.go @@ -1,214 +1,204 @@ package complexity import ( - "go/ast" - "go/parser" - "go/token" + "os" + "path/filepath" "testing" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" + _ "github.com/0xPolygon/diffguard/internal/lang/goanalyzer" + "github.com/0xPolygon/diffguard/internal/report" ) -func TestComputeComplexity(t *testing.T) { - tests := []struct { - name string - code string - expected int - }{ - { - name: "empty function", - code: `package p; func f() {}`, - expected: 0, - }, - { - name: "single if", - code: `package p; func f(x int) { if x > 0 {} }`, - expected: 1, - }, - { - name: "if-else", - code: `package p; func f(x int) { if x > 0 {} else {} }`, - expected: 2, // +1 if, +1 else - }, - { - name: "if-else if-else", - code: `package p; func f(x int) { if x > 0 {} else if x < 0 {} else {} }`, - expected: 3, // +1 if, +1 else if, +1 else - }, - { - name: "nested if", - code: `package p; func f(x, y int) { if x > 0 { if y > 0 {} } }`, - expected: 3, // +1 outer if (nesting=0), +1 inner if + 1 nesting penalty - }, - { - name: "for loop", - code: `package p; func f() { for i := 0; i < 10; i++ {} }`, - expected: 1, - }, - { - name: "nested for", - code: `package p; func f() { for i := 0; i < 10; i++ { for j := 0; j < 10; j++ {} } }`, - expected: 3, // +1 outer for, +1 inner for + 1 nesting - }, - { - name: "switch with cases", - code: `package p; func f(x int) { switch x { case 1: case 2: case 3: } }`, - expected: 1, // +1 for switch, cases don't add complexity - }, - { - name: "logical operators same type", - code: `package p; func f(a, b, c bool) { if a && b && c {} }`, - expected: 2, // +1 if, +1 for &&-sequence (same operator = 1) - }, - { - name: "logical operators mixed", - code: `package p; func f(a, b, c bool) { if a && b || c {} }`, - expected: 3, // +1 if, +2 for mixed && then || - }, - { - name: "range loop", - code: `package p; func f(s []int) { for range s {} }`, - expected: 1, - }, - { - name: "select statement", - code: `package p; func f(c chan int) { select { case <-c: } }`, - expected: 1, - }, - { - name: "deeply nested", - code: `package p -func f(x, y, z int) { - if x > 0 { // +1 (nesting=0) - for y > 0 { // +1 +1 nesting (nesting=1) - if z > 0 { // +1 +2 nesting (nesting=2) +// goCalc returns the registered Go ComplexityCalculator. The goanalyzer +// package is blank-imported above so its init() has run by the time this +// helper is called. +func goCalc(t *testing.T) lang.ComplexityCalculator { + t.Helper() + l, ok := lang.Get("go") + if !ok { + t.Fatal("go language not registered") + } + return l.ComplexityCalculator() +} + +// TestAnalyze_WithGoCalc is the integration-shape replacement for the old +// tree of "exercise the AST walker directly" tests that lived here before +// the complexity AST logic moved into goanalyzer. The walker tests now live +// next to the walker in goanalyzer/complexity_walker_test.go; this test +// locks in the orchestration: calculator is consulted, findings are +// aggregated, summary severity and stats shape are correct. +func TestAnalyze_WithGoCalc(t *testing.T) { + code := `package test + +func simple() {} + +func complex_fn(x int) { + if x > 0 { + if x > 10 { + if x > 100 { + if x > 1000 { + if x > 10000 { + if x > 100000 { + _ = x + } + } + } } } } -}`, - expected: 6, // 1 + 2 + 3 +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "test.go") + if err := os.WriteFile(fp, []byte(code), 0644); err != nil { + t.Fatal(err) + } + + d := &diff.Result{ + Files: []diff.FileChange{ + {Path: "test.go", Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, "test.go", tt.code, 0) - if err != nil { - t.Fatalf("parse error: %v", err) - } + section, err := Analyze(dir, d, 10, goCalc(t)) + if err != nil { + t.Fatalf("Analyze: %v", err) + } + // complex_fn has 6 nested ifs — cognitive score > 10 triggers FAIL. + if section.Severity != report.SeverityFail { + t.Errorf("severity = %v, want FAIL", section.Severity) + } + if len(section.Findings) != 1 { + t.Fatalf("findings = %d, want 1", len(section.Findings)) + } + if section.Findings[0].Function != "complex_fn" { + t.Errorf("finding function = %q, want complex_fn", section.Findings[0].Function) + } +} - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - fn = fd - break - } - } - if fn == nil { - t.Fatal("no function found") - } +func TestAnalyze_EmptyResult(t *testing.T) { + d := &diff.Result{} // no files + section, err := Analyze(t.TempDir(), d, 10, goCalc(t)) + if err != nil { + t.Fatalf("Analyze: %v", err) + } + if section.Severity != report.SeverityPass { + t.Errorf("severity = %v, want PASS", section.Severity) + } + if section.Name != "Cognitive Complexity" { + t.Errorf("name = %q", section.Name) + } +} - got := computeComplexity(fn.Body) - if got != tt.expected { - t.Errorf("complexity = %d, want %d", got, tt.expected) - } - }) +func TestBuildSection_StatsValues(t *testing.T) { + results := []lang.FunctionComplexity{ + {FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "f1"}, Complexity: 4}, + {FunctionInfo: lang.FunctionInfo{File: "b.go", Line: 1, Name: "f2"}, Complexity: 8}, + {FunctionInfo: lang.FunctionInfo{File: "c.go", Line: 1, Name: "f3"}, Complexity: 12}, + } + + s := buildSection(results, 10) + + stats := s.Stats.(map[string]any) + if stats["total_functions"] != 3 { + t.Errorf("total_functions = %v, want 3", stats["total_functions"]) + } + if stats["violations"] != 1 { + t.Errorf("violations = %v, want 1", stats["violations"]) + } + if stats["mean"] != 8.0 { + t.Errorf("mean = %v, want 8.0", stats["mean"]) + } + if stats["median"] != 8.0 { + t.Errorf("median = %v, want 8.0", stats["median"]) + } + if stats["max"] != 12.0 { + t.Errorf("max = %v, want 12.0", stats["max"]) } } -func TestFuncName(t *testing.T) { - tests := []struct { - code string - expected string - }{ - { - code: `package p; func Foo() {}`, - expected: "Foo", - }, - { - code: `package p; type T struct{}; func (t T) Foo() {}`, - expected: "(T).Foo", - }, - { - code: `package p; type T struct{}; func (t *T) Foo() {}`, - expected: "(T).Foo", - }, +func TestBuildSection_Empty(t *testing.T) { + s := buildSection(nil, 10) + if s.Severity != report.SeverityPass { + t.Errorf("severity = %v, want PASS", s.Severity) } +} - for _, tt := range tests { - t.Run(tt.expected, func(t *testing.T) { - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, "test.go", tt.code, 0) - if err != nil { - t.Fatalf("parse error: %v", err) - } +func TestBuildSection_WithViolations(t *testing.T) { + results := []lang.FunctionComplexity{ + {FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "complex"}, Complexity: 20}, + {FunctionInfo: lang.FunctionInfo{File: "b.go", Line: 1, Name: "simple"}, Complexity: 3}, + } - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - got := funcName(fd) - if got != tt.expected { - t.Errorf("funcName = %q, want %q", got, tt.expected) - } - return - } - } - t.Fatal("no function found") - }) + s := buildSection(results, 10) + if s.Severity != report.SeverityFail { + t.Errorf("severity = %v, want FAIL", s.Severity) + } + if len(s.Findings) != 1 { + t.Errorf("findings = %d, want 1", len(s.Findings)) } } -func TestCountLogicalOps(t *testing.T) { - tests := []struct { - name string - code string - expected int - }{ - { - name: "no logical ops", - code: `package p; var x = 1 + 2`, - expected: 0, - }, - { - name: "single and", - code: `package p; var x = true && false`, - expected: 1, - }, - { - name: "chain same op", - code: `package p; var x = true && false && true`, - expected: 1, // same operator sequence counts as 1 - }, - { - name: "mixed ops", - code: `package p; var x = true && false || true`, - expected: 2, // switch from && to || - }, +func TestCollectComplexityFindings(t *testing.T) { + results := []lang.FunctionComplexity{ + {FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "low"}, Complexity: 5}, + {FunctionInfo: lang.FunctionInfo{File: "b.go", Line: 1, Name: "high"}, Complexity: 15}, + {FunctionInfo: lang.FunctionInfo{File: "c.go", Line: 1, Name: "medium"}, Complexity: 10}, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, "test.go", tt.code, 0) - if err != nil { - t.Fatalf("parse error: %v", err) - } + findings, values, failCount := collectComplexityFindings(results, 10) - // Find the expression in the var declaration - var expr ast.Expr - ast.Inspect(f, func(n ast.Node) bool { - if vs, ok := n.(*ast.ValueSpec); ok && len(vs.Values) > 0 { - expr = vs.Values[0] - return false - } - return true - }) - if expr == nil { - t.Fatal("no expression found") - } + if failCount != 1 { + t.Errorf("failCount = %d, want 1", failCount) + } + if len(findings) != 1 { + t.Errorf("findings = %d, want 1", len(findings)) + } + if len(values) != 3 { + t.Errorf("values = %d, want 3", len(values)) + } +} - got := countLogicalOps(expr) - if got != tt.expected { - t.Errorf("countLogicalOps = %d, want %d", got, tt.expected) - } - }) +func TestCollectComplexityFindings_AtBoundary(t *testing.T) { + results := []lang.FunctionComplexity{ + {FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "exact"}, Complexity: 10}, + {FunctionInfo: lang.FunctionInfo{File: "b.go", Line: 1, Name: "over"}, Complexity: 11}, + } + + _, _, failCount := collectComplexityFindings(results, 10) + if failCount != 1 { + t.Errorf("failCount = %d, want 1 (11 > 10, 10 is not > 10)", failCount) + } +} + +func TestMean(t *testing.T) { + if got := mean(nil); got != 0 { + t.Errorf("mean(nil) = %f, want 0", got) + } + if got := mean([]float64{2, 4, 6}); got != 4 { + t.Errorf("mean([2,4,6]) = %f, want 4", got) + } +} + +func TestMedian(t *testing.T) { + if got := median(nil); got != 0 { + t.Errorf("median(nil) = %f, want 0", got) + } + if got := median([]float64{3, 1, 2}); got != 2 { + t.Errorf("median([3,1,2]) = %f, want 2", got) + } + if got := median([]float64{4, 1, 3, 2}); got != 2.5 { + t.Errorf("median([4,1,3,2]) = %f, want 2.5", got) + } +} + +func TestMax(t *testing.T) { + if got := max(nil); got != 0 { + t.Errorf("max(nil) = %f, want 0", got) + } + if got := max([]float64{3, 7, 1, 5}); got != 7 { + t.Errorf("max([3,7,1,5]) = %f, want 7", got) } } diff --git a/internal/deps/deps.go b/internal/deps/deps.go index 0472dc7..135954c 100644 --- a/internal/deps/deps.go +++ b/internal/deps/deps.go @@ -2,51 +2,18 @@ package deps import ( "fmt" - "go/ast" - "go/parser" - "go/token" - "os" - "path/filepath" "sort" - "strings" "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" "github.com/0xPolygon/diffguard/internal/report" ) -// Graph represents the internal package dependency graph. -type Graph struct { - Edges map[string]map[string]bool - ModulePath string -} - -// PackageMetrics holds coupling and instability metrics for a package. -type PackageMetrics struct { - Package string - Afferent int - Efferent int - Instability float64 -} - -// Cycle represents a circular dependency chain. -type Cycle []string - -func (c Cycle) String() string { - return strings.Join(c, " -> ") + " -> " + c[0] -} - -// SDPViolation represents a Stable Dependencies Principle violation. -type SDPViolation struct { - Package string - Dependency string - PackageInstability float64 - DependencyInstability float64 -} - -// Analyze examines import changes in the diff, builds a dependency graph, -// and reports cycles, coupling, instability, and SDP violations. -func Analyze(repoPath string, d *diff.Result) (report.Section, error) { - modulePath, err := detectModulePath(repoPath) +// Analyze examines import changes in the diff, builds a dependency graph +// via the supplied ImportResolver, and reports cycles, coupling, +// instability, and SDP violations. +func Analyze(repoPath string, d *diff.Result, resolver lang.ImportResolver) (report.Section, error) { + modulePath, err := resolver.DetectModulePath(repoPath) if err != nil { return report.Section{ Name: "Dependency Structure", @@ -62,7 +29,8 @@ func Analyze(repoPath string, d *diff.Result) (report.Section, error) { changedPkgs := d.ChangedPackages() for _, pkg := range changedPkgs { - scanPackageImports(g, repoPath, pkg) + edges := resolver.ScanPackageImports(repoPath, pkg, modulePath) + mergeEdges(g.Edges, edges) } cycles := detectCycles(g) @@ -72,159 +40,19 @@ func Analyze(repoPath string, d *diff.Result) (report.Section, error) { return buildSection(g, cycles, metrics, sdpViolations, changedPkgs), nil } -func scanPackageImports(g *Graph, repoPath, pkg string) { - absDir := filepath.Join(repoPath, pkg) - fset := token.NewFileSet() - pkgs, err := parser.ParseDir(fset, absDir, nil, parser.ImportsOnly) - if err != nil { - return - } - - pkgImportPath := g.ModulePath + "/" + pkg - for _, p := range pkgs { - if strings.HasSuffix(p.Name, "_test") { - continue - } - collectImports(g, p, pkgImportPath) - } -} - -func collectImports(g *Graph, p *ast.Package, pkgImportPath string) { - for _, f := range p.Files { - for _, imp := range f.Imports { - importPath := strings.Trim(imp.Path.Value, `"`) - if !strings.HasPrefix(importPath, g.ModulePath) { - continue - } - if g.Edges[pkgImportPath] == nil { - g.Edges[pkgImportPath] = make(map[string]bool) - } - g.Edges[pkgImportPath][importPath] = true - } - } -} - -func detectModulePath(repoPath string) (string, error) { - goModPath := filepath.Join(repoPath, "go.mod") - content, err := readFile(goModPath) - if err != nil { - return "", fmt.Errorf("reading go.mod: %w", err) - } - for _, line := range strings.Split(content, "\n") { - line = strings.TrimSpace(line) - if strings.HasPrefix(line, "module ") { - return strings.TrimSpace(strings.TrimPrefix(line, "module ")), nil - } - } - return "", fmt.Errorf("no module directive found in go.mod") -} - -// detectCycles finds all cycles in the dependency graph using DFS. -func detectCycles(g *Graph) []Cycle { - var cycles []Cycle - visited := make(map[string]bool) - inStack := make(map[string]bool) - var stack []string - - var dfs func(node string) - dfs = func(node string) { - visited[node] = true - inStack[node] = true - stack = append(stack, node) - - for dep := range g.Edges[node] { - if !visited[dep] { - dfs(dep) - } else if inStack[dep] { - cycles = append(cycles, extractCycle(stack, dep)) - } - } - - stack = stack[:len(stack)-1] - inStack[node] = false - } - - for node := range g.Edges { - if !visited[node] { - dfs(node) - } - } - - return cycles -} - -func extractCycle(stack []string, target string) Cycle { - var cycle Cycle - for i := len(stack) - 1; i >= 0; i-- { - cycle = append([]string{stack[i]}, cycle...) - if stack[i] == target { - break - } - } - return cycle -} - -// computeMetrics calculates afferent/efferent coupling and instability. -func computeMetrics(g *Graph) map[string]*PackageMetrics { - metrics := make(map[string]*PackageMetrics) - - getOrCreate := func(pkg string) *PackageMetrics { - if m, ok := metrics[pkg]; ok { - return m - } - m := &PackageMetrics{Package: pkg} - metrics[pkg] = m - return m - } - - for pkg, imports := range g.Edges { - m := getOrCreate(pkg) - m.Efferent = len(imports) - for dep := range imports { - dm := getOrCreate(dep) - dm.Afferent++ - } - } - - for _, m := range metrics { - total := m.Afferent + m.Efferent - if total > 0 { - m.Instability = float64(m.Efferent) / float64(total) - } - } - - return metrics -} - -func detectSDPViolations(g *Graph, metrics map[string]*PackageMetrics) []SDPViolation { - var violations []SDPViolation - for pkg, imports := range g.Edges { - pkgMetric := metrics[pkg] - if pkgMetric == nil { - continue - } - violations = append(violations, checkSDPForPackage(pkgMetric, imports, metrics)...) - } - return violations -} - -func checkSDPForPackage(pkgMetric *PackageMetrics, imports map[string]bool, metrics map[string]*PackageMetrics) []SDPViolation { - var violations []SDPViolation - for dep := range imports { - depMetric := metrics[dep] - if depMetric == nil { - continue +// mergeEdges folds the resolver's per-package adjacency map into the running +// graph. Resolvers typically return a single-entry map on each call, but +// the interface is broad enough that a resolver could return edges for +// sub-packages too — so merge instead of assign. +func mergeEdges(dst, src map[string]map[string]bool) { + for from, tos := range src { + if dst[from] == nil { + dst[from] = make(map[string]bool) } - if depMetric.Instability > pkgMetric.Instability { - violations = append(violations, SDPViolation{ - Package: pkgMetric.Package, - Dependency: dep, - PackageInstability: pkgMetric.Instability, - DependencyInstability: depMetric.Instability, - }) + for to := range tos { + dst[from][to] = true } } - return violations } func buildSection(g *Graph, cycles []Cycle, metrics map[string]*PackageMetrics, sdpViolations []SDPViolation, changedPkgs []string) report.Section { @@ -285,15 +113,3 @@ func buildDepsStats(changedPkgs []string, cycles []Cycle, sdpViolations []SDPVio "metrics": metricsList, } } - -func trimModule(pkg, modulePath string) string { - return strings.TrimPrefix(pkg, modulePath+"/") -} - -func readFile(path string) (string, error) { - b, err := os.ReadFile(path) - if err != nil { - return "", err - } - return string(b), nil -} diff --git a/internal/deps/graph.go b/internal/deps/graph.go new file mode 100644 index 0000000..5664ca7 --- /dev/null +++ b/internal/deps/graph.go @@ -0,0 +1,167 @@ +// Package deps runs dependency-structure analysis on the files changed in a +// diff. It relies on a language-supplied lang.ImportResolver to turn source +// files into the adjacency map the graph algorithms operate on. +// +// graph.go contains the pure-math primitives: cycle detection, coupling, +// instability, SDP violation detection. deps.go wires them up to an +// ImportResolver and builds a report.Section. Splitting the two makes the +// graph algorithms reusable for any language without dragging the +// orchestration (module-path detection, section formatting) along. +package deps + +import "strings" + +// Graph represents an internal package dependency graph. Nodes are +// package-level identifiers (typically the module path plus the package +// directory, e.g. "example.com/mod/internal/foo"). Edges point from +// importer to importee. +type Graph struct { + Edges map[string]map[string]bool + ModulePath string +} + +// PackageMetrics holds coupling and instability metrics for a package. +// Afferent = how many other packages import this one ("fan-in"). +// Efferent = how many other packages this one imports ("fan-out"). +// Instability = Efferent / (Afferent + Efferent), range [0,1]. +type PackageMetrics struct { + Package string + Afferent int + Efferent int + Instability float64 +} + +// Cycle represents a circular dependency chain. +type Cycle []string + +// String formats the cycle as "a -> b -> c -> a" (closing back to the +// start). Used in report findings. +func (c Cycle) String() string { + return strings.Join(c, " -> ") + " -> " + c[0] +} + +// SDPViolation represents a Stable Dependencies Principle violation: a +// package with low instability (stable) imports a package with higher +// instability (unstable). +type SDPViolation struct { + Package string + Dependency string + PackageInstability float64 + DependencyInstability float64 +} + +// detectCycles finds all cycles in the dependency graph using DFS. +func detectCycles(g *Graph) []Cycle { + var cycles []Cycle + visited := make(map[string]bool) + inStack := make(map[string]bool) + var stack []string + + var dfs func(node string) + dfs = func(node string) { + visited[node] = true + inStack[node] = true + stack = append(stack, node) + + for dep := range g.Edges[node] { + if !visited[dep] { + dfs(dep) + } else if inStack[dep] { + cycles = append(cycles, extractCycle(stack, dep)) + } + } + + stack = stack[:len(stack)-1] + inStack[node] = false + } + + for node := range g.Edges { + if !visited[node] { + dfs(node) + } + } + + return cycles +} + +func extractCycle(stack []string, target string) Cycle { + var cycle Cycle + for i := len(stack) - 1; i >= 0; i-- { + cycle = append([]string{stack[i]}, cycle...) + if stack[i] == target { + break + } + } + return cycle +} + +// computeMetrics calculates afferent/efferent coupling and instability. +func computeMetrics(g *Graph) map[string]*PackageMetrics { + metrics := make(map[string]*PackageMetrics) + + getOrCreate := func(pkg string) *PackageMetrics { + if m, ok := metrics[pkg]; ok { + return m + } + m := &PackageMetrics{Package: pkg} + metrics[pkg] = m + return m + } + + for pkg, imports := range g.Edges { + m := getOrCreate(pkg) + m.Efferent = len(imports) + for dep := range imports { + dm := getOrCreate(dep) + dm.Afferent++ + } + } + + for _, m := range metrics { + total := m.Afferent + m.Efferent + if total > 0 { + m.Instability = float64(m.Efferent) / float64(total) + } + } + + return metrics +} + +// detectSDPViolations returns the package->dependency edges that violate +// the Stable Dependencies Principle (a package depending on something less +// stable than itself). +func detectSDPViolations(g *Graph, metrics map[string]*PackageMetrics) []SDPViolation { + var violations []SDPViolation + for pkg, imports := range g.Edges { + pkgMetric := metrics[pkg] + if pkgMetric == nil { + continue + } + violations = append(violations, checkSDPForPackage(pkgMetric, imports, metrics)...) + } + return violations +} + +func checkSDPForPackage(pkgMetric *PackageMetrics, imports map[string]bool, metrics map[string]*PackageMetrics) []SDPViolation { + var violations []SDPViolation + for dep := range imports { + depMetric := metrics[dep] + if depMetric == nil { + continue + } + if depMetric.Instability > pkgMetric.Instability { + violations = append(violations, SDPViolation{ + Package: pkgMetric.Package, + Dependency: dep, + PackageInstability: pkgMetric.Instability, + DependencyInstability: depMetric.Instability, + }) + } + } + return violations +} + +// trimModule strips the module prefix from a package path for display. +func trimModule(pkg, modulePath string) string { + return strings.TrimPrefix(pkg, modulePath+"/") +} diff --git a/internal/diff/diff.go b/internal/diff/diff.go index 74fcc8c..27391cb 100644 --- a/internal/diff/diff.go +++ b/internal/diff/diff.go @@ -49,7 +49,7 @@ func (fc FileChange) OverlapsRange(start, end int) bool { return false } -// Result holds all changed Go files parsed from a git diff. +// Result holds all changed source files parsed from a git diff. type Result struct { BaseBranch string Files []FileChange @@ -79,8 +79,35 @@ func (r Result) FilesByPackage() map[string][]FileChange { return m } -// Parse runs git diff against the given base branch and parses changed Go files. -func Parse(repoPath, baseBranch string) (*Result, error) { +// Filter describes the subset of the diff the caller cares about. It is a +// narrower shape than lang.FileFilter so the diff package doesn't have to +// import lang (which would pull the full analyzer stack). Callers (usually +// cmd/diffguard) construct a Filter from their chosen language's +// lang.FileFilter and pass it here. +type Filter struct { + // DiffGlobs is passed to `git diff -- ` to restrict the raw diff + // to language source files. + DiffGlobs []string + // Includes reports whether an analyzable source path (extension matches, + // not a test file) belongs to the caller's language. + Includes func(path string) bool +} + +// includes returns true iff the filter accepts the path. An empty filter +// (Includes == nil) defaults to accepting every path — but production +// callers always supply one. +func (f Filter) includes(path string) bool { + if f.Includes == nil { + return true + } + return f.Includes(path) +} + +// Parse runs `git diff` against the merge-base of baseBranch..HEAD and +// returns the changed files that pass the filter. The filter is also used to +// restrict the raw `git diff` output via -- globs so the parser never has to +// see files from other languages. +func Parse(repoPath, baseBranch string, filter Filter) (*Result, error) { mergeBaseCmd := exec.Command("git", "merge-base", baseBranch, "HEAD") mergeBaseCmd.Dir = repoPath mergeBaseOut, err := mergeBaseCmd.Output() @@ -89,14 +116,20 @@ func Parse(repoPath, baseBranch string) (*Result, error) { } mergeBase := strings.TrimSpace(string(mergeBaseOut)) - cmd := exec.Command("git", "diff", "-U0", mergeBase, "--", "*.go") + args := []string{"diff", "--src-prefix=a/", "--dst-prefix=b/", "-U0", mergeBase} + if len(filter.DiffGlobs) > 0 { + args = append(args, "--") + args = append(args, filter.DiffGlobs...) + } + + cmd := exec.Command("git", args...) cmd.Dir = repoPath out, err := cmd.Output() if err != nil { return nil, fmt.Errorf("git diff failed: %w", err) } - files, err := parseUnifiedDiff(string(out)) + files, err := parseUnifiedDiff(string(out), filter) if err != nil { return nil, err } @@ -107,18 +140,19 @@ func Parse(repoPath, baseBranch string) (*Result, error) { }, nil } -// CollectPaths builds a Result by treating each .go file under the given -// paths as fully changed. Useful for refactoring mode where you want to -// analyze entire files rather than diffed regions only. +// CollectPaths builds a Result by treating each analyzable source file under +// the given paths as fully changed. Useful for refactoring mode where you +// want to analyze entire files rather than diffed regions only. // // paths may contain individual files or directories (walked recursively). -// Test files (_test.go) are excluded to match Parse's behavior. -func CollectPaths(repoPath string, paths []string) (*Result, error) { +// Files that fail filter.Includes are excluded — test files and non-source +// files never show up in the result. +func CollectPaths(repoPath string, paths []string, filter Filter) (*Result, error) { var files []FileChange seen := make(map[string]bool) for _, p := range paths { - if err := collectPath(repoPath, p, &files, seen); err != nil { + if err := collectPath(repoPath, p, filter, &files, seen); err != nil { return nil, err } } @@ -126,7 +160,7 @@ func CollectPaths(repoPath string, paths []string) (*Result, error) { return &Result{Files: files}, nil } -func collectPath(repoPath, p string, files *[]FileChange, seen map[string]bool) error { +func collectPath(repoPath, p string, filter Filter, files *[]FileChange, seen map[string]bool) error { absPath := p if !filepath.IsAbs(p) { absPath = filepath.Join(repoPath, p) @@ -136,25 +170,25 @@ func collectPath(repoPath, p string, files *[]FileChange, seen map[string]bool) return fmt.Errorf("stat %s: %w", p, err) } if info.IsDir() { - return collectDir(repoPath, absPath, files, seen) + return collectDir(repoPath, absPath, filter, files, seen) } - return addFile(repoPath, absPath, files, seen) + return addFile(repoPath, absPath, filter, files, seen) } -func collectDir(repoPath, absPath string, files *[]FileChange, seen map[string]bool) error { +func collectDir(repoPath, absPath string, filter Filter, files *[]FileChange, seen map[string]bool) error { return filepath.WalkDir(absPath, func(path string, d fs.DirEntry, err error) error { if err != nil { return err } - if d.IsDir() || !isAnalyzableGoFile(path) { + if d.IsDir() || !filter.includes(path) { return nil } - return addFile(repoPath, path, files, seen) + return addFile(repoPath, path, filter, files, seen) }) } -func addFile(repoPath, absPath string, files *[]FileChange, seen map[string]bool) error { - if !isAnalyzableGoFile(absPath) { +func addFile(repoPath, absPath string, filter Filter, files *[]FileChange, seen map[string]bool) error { + if !filter.includes(absPath) { return nil } rel, err := filepath.Rel(repoPath, absPath) @@ -172,12 +206,9 @@ func addFile(repoPath, absPath string, files *[]FileChange, seen map[string]bool return nil } -func isAnalyzableGoFile(path string) bool { - return strings.HasSuffix(path, ".go") && !strings.HasSuffix(path, "_test.go") -} - -// parseUnifiedDiff parses the output of git diff -U0 into FileChange entries. -func parseUnifiedDiff(diffOutput string) ([]FileChange, error) { +// parseUnifiedDiff parses the output of git diff -U0 into FileChange entries, +// dropping files that don't match filter.Includes. +func parseUnifiedDiff(diffOutput string, filter Filter) ([]FileChange, error) { var files []FileChange var current *FileChange @@ -186,7 +217,7 @@ func parseUnifiedDiff(diffOutput string) ([]FileChange, error) { line := scanner.Text() if strings.HasPrefix(line, "+++ b/") { - current = handleFileLine(line, &files) + current = handleFileLine(line, filter, &files) continue } @@ -198,9 +229,9 @@ func parseUnifiedDiff(diffOutput string) ([]FileChange, error) { return files, scanner.Err() } -func handleFileLine(line string, files *[]FileChange) *FileChange { +func handleFileLine(line string, filter Filter, files *[]FileChange) *FileChange { path := strings.TrimPrefix(line, "+++ b/") - if !strings.HasSuffix(path, ".go") || strings.HasSuffix(path, "_test.go") { + if !filter.includes(path) { return nil } *files = append(*files, FileChange{Path: path}) diff --git a/internal/diff/diff_extra_test.go b/internal/diff/diff_extra_test.go index 62dec06..e5b6f46 100644 --- a/internal/diff/diff_extra_test.go +++ b/internal/diff/diff_extra_test.go @@ -11,7 +11,7 @@ func TestCollectPaths_SingleFile(t *testing.T) { fp := filepath.Join(dir, "foo.go") os.WriteFile(fp, []byte("package x\n\nfunc f() {}\n"), 0644) - r, err := CollectPaths(dir, []string{"foo.go"}) + r, err := CollectPaths(dir, []string{"foo.go"}, goFilter()) if err != nil { t.Fatalf("error: %v", err) } @@ -38,7 +38,7 @@ func TestCollectPaths_SkipsTestFiles(t *testing.T) { os.WriteFile(filepath.Join(dir, "foo.go"), []byte("package x\n"), 0644) os.WriteFile(filepath.Join(dir, "foo_test.go"), []byte("package x\n"), 0644) - r, err := CollectPaths(dir, []string{"foo_test.go"}) + r, err := CollectPaths(dir, []string{"foo_test.go"}, goFilter()) if err != nil { t.Fatalf("error: %v", err) } @@ -56,7 +56,7 @@ func TestCollectPaths_Directory(t *testing.T) { os.WriteFile(filepath.Join(dir, "README.md"), []byte("readme\n"), 0644) os.WriteFile(filepath.Join(dir, "sub", "c.go"), []byte("package x\n"), 0644) - r, err := CollectPaths(dir, []string{"."}) + r, err := CollectPaths(dir, []string{"."}, goFilter()) if err != nil { t.Fatalf("error: %v", err) } @@ -68,7 +68,7 @@ func TestCollectPaths_Directory(t *testing.T) { func TestCollectPaths_NonexistentPath(t *testing.T) { dir := t.TempDir() - _, err := CollectPaths(dir, []string{"nonexistent.go"}) + _, err := CollectPaths(dir, []string{"nonexistent.go"}, goFilter()) if err == nil { t.Error("expected error for nonexistent path") } @@ -81,7 +81,7 @@ func TestCollectPaths_MultiplePaths(t *testing.T) { os.WriteFile(filepath.Join(dir, "pkg1", "a.go"), []byte("package pkg1\n"), 0644) os.WriteFile(filepath.Join(dir, "pkg2", "b.go"), []byte("package pkg2\n"), 0644) - r, err := CollectPaths(dir, []string{"pkg1", "pkg2"}) + r, err := CollectPaths(dir, []string{"pkg1", "pkg2"}, goFilter()) if err != nil { t.Fatalf("error: %v", err) } @@ -95,7 +95,7 @@ func TestCollectPaths_Deduplicates(t *testing.T) { os.WriteFile(filepath.Join(dir, "a.go"), []byte("package x\n"), 0644) // Pass the same file via both file path and dir - r, err := CollectPaths(dir, []string{"a.go", "."}) + r, err := CollectPaths(dir, []string{"a.go", "."}, goFilter()) if err != nil { t.Fatalf("error: %v", err) } @@ -108,7 +108,7 @@ func TestCollectPaths_SkipsNonGoFile(t *testing.T) { dir := t.TempDir() os.WriteFile(filepath.Join(dir, "notes.txt"), []byte("notes"), 0644) - r, err := CollectPaths(dir, []string{"notes.txt"}) + r, err := CollectPaths(dir, []string{"notes.txt"}, goFilter()) if err != nil { t.Fatalf("error: %v", err) } @@ -117,7 +117,13 @@ func TestCollectPaths_SkipsNonGoFile(t *testing.T) { } } -func TestIsAnalyzableGoFile(t *testing.T) { +// TestFilter_IncludesGoFile exercises the path the diff parser takes when +// deciding whether to admit a file from `git diff` output. The old +// hardcoded isAnalyzableGoFile function is gone; the same semantic check +// now lives in the caller-supplied Filter.Includes, and this test locks in +// that Filter.includes() routes through it correctly. +func TestFilter_IncludesGoFile(t *testing.T) { + filter := goFilter() tests := []struct { path string want bool @@ -129,8 +135,19 @@ func TestIsAnalyzableGoFile(t *testing.T) { {"path/to/foo_test.go", false}, } for _, tt := range tests { - if got := isAnalyzableGoFile(tt.path); got != tt.want { - t.Errorf("isAnalyzableGoFile(%q) = %v, want %v", tt.path, got, tt.want) + if got := filter.includes(tt.path); got != tt.want { + t.Errorf("filter.includes(%q) = %v, want %v", tt.path, got, tt.want) + } + } +} + +// TestFilter_NilIncludesAdmitsAll covers the `Includes == nil` default +// branch: an empty filter must admit every path. +func TestFilter_NilIncludesAdmitsAll(t *testing.T) { + var f Filter + for _, p := range []string{"foo.go", "bar.rs", "anything"} { + if !f.includes(p) { + t.Errorf("nil-Includes filter should admit %q", p) } } } @@ -145,7 +162,7 @@ func filenames(files []FileChange) []string { func TestHandleFileLine_GoFile(t *testing.T) { var files []FileChange - result := handleFileLine("+++ b/pkg/handler.go", &files) + result := handleFileLine("+++ b/pkg/handler.go", goFilter(), &files) if result == nil { t.Fatal("expected non-nil result for .go file") } @@ -159,7 +176,7 @@ func TestHandleFileLine_GoFile(t *testing.T) { func TestHandleFileLine_TestFile(t *testing.T) { var files []FileChange - result := handleFileLine("+++ b/pkg/handler_test.go", &files) + result := handleFileLine("+++ b/pkg/handler_test.go", goFilter(), &files) if result != nil { t.Error("expected nil for test file") } @@ -170,7 +187,7 @@ func TestHandleFileLine_TestFile(t *testing.T) { func TestHandleFileLine_NonGoFile(t *testing.T) { var files []FileChange - result := handleFileLine("+++ b/README.md", &files) + result := handleFileLine("+++ b/README.md", goFilter(), &files) if result != nil { t.Error("expected nil for non-Go file") } @@ -263,7 +280,7 @@ func TestParseUnifiedDiff_NonGoFile(t *testing.T) { @@ -1,0 +1,5 @@ +new content ` - files, err := parseUnifiedDiff(input) + files, err := parseUnifiedDiff(input, goFilter()) if err != nil { t.Fatalf("error: %v", err) } @@ -273,7 +290,7 @@ func TestParseUnifiedDiff_NonGoFile(t *testing.T) { } func TestParseUnifiedDiff_EmptyInput(t *testing.T) { - files, err := parseUnifiedDiff("") + files, err := parseUnifiedDiff("", goFilter()) if err != nil { t.Fatalf("error: %v", err) } @@ -361,7 +378,7 @@ diff --git a/b.go b/b.go @@ -10,0 +11,3 @@ +new code ` - files, err := parseUnifiedDiff(input) + files, err := parseUnifiedDiff(input, goFilter()) if err != nil { t.Fatalf("error: %v", err) } diff --git a/internal/diff/diff_parse_test.go b/internal/diff/diff_parse_test.go index 2b6f13f..abbec47 100644 --- a/internal/diff/diff_parse_test.go +++ b/internal/diff/diff_parse_test.go @@ -29,7 +29,7 @@ func runGit(t *testing.T, dir string, args ...string) { func TestParse_NotGitRepo(t *testing.T) { dir := t.TempDir() - _, err := Parse(dir, "main") + _, err := Parse(dir, "main", goFilter()) if err == nil { t.Fatal("expected error when running Parse outside a git repo") } @@ -47,7 +47,7 @@ func TestParse_MissingBaseBranch(t *testing.T) { runGit(t, dir, "add", ".") runGit(t, dir, "commit", "-q", "-m", "init") - _, err := Parse(dir, "no-such-branch") + _, err := Parse(dir, "no-such-branch", goFilter()) if err == nil { t.Fatal("expected error for nonexistent base branch") } @@ -71,7 +71,7 @@ func TestParse_SuccessDetectsChangedGoFile(t *testing.T) { runGit(t, dir, "add", ".") runGit(t, dir, "commit", "-q", "-m", "add new.go") - result, err := Parse(dir, "main") + result, err := Parse(dir, "main", goFilter()) if err != nil { t.Fatalf("Parse error: %v", err) } @@ -102,7 +102,7 @@ func TestParse_IgnoresTestFiles(t *testing.T) { runGit(t, dir, "add", ".") runGit(t, dir, "commit", "-q", "-m", "add test") - result, err := Parse(dir, "main") + result, err := Parse(dir, "main", goFilter()) if err != nil { t.Fatalf("Parse error: %v", err) } diff --git a/internal/diff/diff_test.go b/internal/diff/diff_test.go index c0b1cd7..7531fba 100644 --- a/internal/diff/diff_test.go +++ b/internal/diff/diff_test.go @@ -30,7 +30,7 @@ diff --git a/pkg/handler/routes_test.go b/pkg/handler/routes_test.go +test file should be skipped ` - files, err := parseUnifiedDiff(input) + files, err := parseUnifiedDiff(input, goFilter()) if err != nil { t.Fatalf("parseUnifiedDiff error: %v", err) } @@ -69,7 +69,7 @@ func TestParseUnifiedDiff_PureDeletion(t *testing.T) { @@ -10,5 +10,0 @@ ` - files, err := parseUnifiedDiff(input) + files, err := parseUnifiedDiff(input, goFilter()) if err != nil { t.Fatalf("parseUnifiedDiff error: %v", err) } diff --git a/internal/diff/helpers_test.go b/internal/diff/helpers_test.go new file mode 100644 index 0000000..5b47ffa --- /dev/null +++ b/internal/diff/helpers_test.go @@ -0,0 +1,16 @@ +package diff + +import "strings" + +// goFilter returns a minimal Filter matching the old hardcoded Go behavior: +// includes any path ending in .go except _test.go. Used by the in-package +// tests so they exercise the filter parameter without pulling in the +// goanalyzer package (which would create a test-time import cycle). +func goFilter() Filter { + return Filter{ + DiffGlobs: []string{"*.go"}, + Includes: func(path string) bool { + return strings.HasSuffix(path, ".go") && !strings.HasSuffix(path, "_test.go") + }, + } +} diff --git a/internal/lang/detect.go b/internal/lang/detect.go new file mode 100644 index 0000000..6b9cd0a --- /dev/null +++ b/internal/lang/detect.go @@ -0,0 +1,103 @@ +package lang + +import ( + "os" + "path/filepath" + "sort" + "sync" +) + +// manifestFiles maps a repo-root filename to the language Name() that owns +// it. When multiple languages share a manifest (e.g. package.json for JS and +// TS), the ambiguity is resolved inside the language's own detection hook +// — here we only record the canonical owner. +// +// Languages without a manifest (or where the manifest needs extra inspection +// to disambiguate) can add themselves to this map from their init() via +// RegisterManifest so the auto-detector still picks them up. +var ( + manifestMu sync.Mutex + manifests = map[string]string{} +) + +// RegisterManifest associates a repo-root filename with a language name. +// A language implementation typically calls this alongside Register(): +// +// func init() { +// lang.Register(&Language{}) +// lang.RegisterManifest("go.mod", "go") +// } +// +// The detector only fires on files that exist at the repository root, so +// sub-directory manifests (e.g. nested Cargo.toml for workspaces) don't +// falsely trigger; languages that need subtree scanning should implement +// their own detection hook via RegisterDetector. +func RegisterManifest(filename, languageName string) { + manifestMu.Lock() + defer manifestMu.Unlock() + manifests[filename] = languageName +} + +// Detector is a per-language hook that reports whether the given repo root +// contains a project of this language. Languages use RegisterDetector when +// manifest-file matching is too coarse — e.g. "package.json + at least one +// .ts file" for TypeScript. +type Detector func(repoPath string) bool + +var ( + detectorMu sync.Mutex + detectors = map[string]Detector{} +) + +// RegisterDetector associates a language name with a custom detection +// function. Both the detector (if present) and the manifest file (if +// registered) are consulted during Detect; a language matches if either +// returns true. +func RegisterDetector(languageName string, d Detector) { + detectorMu.Lock() + defer detectorMu.Unlock() + detectors[languageName] = d +} + +// Detect scans repoPath for per-language manifest files and custom detectors +// and returns the languages whose signatures match. The returned slice is +// sorted by Name() so report ordering stays deterministic across calls. +// +// Only languages that are both (a) registered via Register and (b) match via +// a manifest or detector are returned. That way, adding a new language to +// the binary without a matching manifest entry is inert — nothing misfires. +func Detect(repoPath string) []Language { + matched := map[string]bool{} + + // Manifest-based detection. + manifestMu.Lock() + for filename, name := range manifests { + if _, err := os.Stat(filepath.Join(repoPath, filename)); err == nil { + matched[name] = true + } + } + manifestMu.Unlock() + + // Custom-detector fallback. Languages that can't be distinguished by a + // single manifest file (TypeScript vs. JavaScript, for example) install + // a detector that inspects the tree. + detectorMu.Lock() + for name, d := range detectors { + if d(repoPath) { + matched[name] = true + } + } + detectorMu.Unlock() + + var out []Language + registryMu.RLock() + for name := range matched { + if l, ok := registryMap[name]; ok { + out = append(out, l) + } + } + registryMu.RUnlock() + + sort.Slice(out, func(i, j int) bool { return out[i].Name() < out[j].Name() }) + return out +} diff --git a/internal/lang/detect_test.go b/internal/lang/detect_test.go new file mode 100644 index 0000000..d80b1dd --- /dev/null +++ b/internal/lang/detect_test.go @@ -0,0 +1,131 @@ +package lang + +import ( + "os" + "path/filepath" + "testing" +) + +func TestDetect_ManifestMatch(t *testing.T) { + defer UnregisterForTest("test-detect-manifest") + Register(&fakeLang{name: "test-detect-manifest"}) + RegisterManifest("test-detect-marker", "test-detect-manifest") + t.Cleanup(func() { + manifestMu.Lock() + delete(manifests, "test-detect-marker") + manifestMu.Unlock() + }) + + dir := t.TempDir() + if err := os.WriteFile(filepath.Join(dir, "test-detect-marker"), []byte("x"), 0644); err != nil { + t.Fatal(err) + } + + found := names(Detect(dir)) + if !contains(found, "test-detect-manifest") { + t.Errorf("Detect returned %v, want it to include test-detect-manifest", found) + } +} + +func TestDetect_CustomDetector(t *testing.T) { + defer UnregisterForTest("test-detect-custom") + Register(&fakeLang{name: "test-detect-custom"}) + RegisterDetector("test-detect-custom", func(string) bool { return true }) + t.Cleanup(func() { + detectorMu.Lock() + delete(detectors, "test-detect-custom") + detectorMu.Unlock() + }) + + dir := t.TempDir() + found := names(Detect(dir)) + if !contains(found, "test-detect-custom") { + t.Errorf("Detect returned %v, want it to include test-detect-custom", found) + } +} + +func TestDetect_EmptyRepo(t *testing.T) { + dir := t.TempDir() + // No languages with matching manifests should fire on an empty dir. + // We can't assert len==0 because goanalyzer's init() registered "go" + // with a go.mod manifest, and there's no go.mod in the tempdir so "go" + // should not match. + found := names(Detect(dir)) + if contains(found, "go") { + t.Errorf("Detect on empty dir returned %v, did not expect 'go'", found) + } +} + +func TestDetect_MultipleLanguages(t *testing.T) { + defer UnregisterForTest("test-multi-a") + defer UnregisterForTest("test-multi-b") + Register(&fakeLang{name: "test-multi-a"}) + Register(&fakeLang{name: "test-multi-b"}) + RegisterManifest("marker-a", "test-multi-a") + RegisterManifest("marker-b", "test-multi-b") + t.Cleanup(func() { + manifestMu.Lock() + delete(manifests, "marker-a") + delete(manifests, "marker-b") + manifestMu.Unlock() + }) + + dir := t.TempDir() + os.WriteFile(filepath.Join(dir, "marker-a"), []byte("x"), 0644) + os.WriteFile(filepath.Join(dir, "marker-b"), []byte("x"), 0644) + + found := names(Detect(dir)) + if !contains(found, "test-multi-a") || !contains(found, "test-multi-b") { + t.Errorf("Detect returned %v, want both test-multi-a and test-multi-b", found) + } + + // Ordering must be deterministic (sorted by Name()). + idxA, idxB := -1, -1 + for i, n := range found { + if n == "test-multi-a" { + idxA = i + } + if n == "test-multi-b" { + idxB = i + } + } + if idxA > idxB { + t.Errorf("Detect did not sort by name: %v", found) + } +} + +func TestDetect_UnregisteredManifestIgnored(t *testing.T) { + // Register a manifest pointing to a language that is NOT registered. + // Detect should not include it in the results. + RegisterManifest("unknown-manifest", "no-such-language") + t.Cleanup(func() { + manifestMu.Lock() + delete(manifests, "unknown-manifest") + manifestMu.Unlock() + }) + + dir := t.TempDir() + os.WriteFile(filepath.Join(dir, "unknown-manifest"), []byte("x"), 0644) + + found := names(Detect(dir)) + if contains(found, "no-such-language") { + t.Errorf("Detect returned unregistered language: %v", found) + } +} + +func names(langs []Language) []string { + out := make([]string, len(langs)) + for i, l := range langs { + out[i] = l.Name() + } + return out +} + +func contains(s []string, want string) bool { + for _, v := range s { + if v == want { + return true + } + } + return false +} diff --git a/internal/lang/evalharness/evalharness.go b/internal/lang/evalharness/evalharness.go new file mode 100644 index 0000000..0abfe08 --- /dev/null +++ b/internal/lang/evalharness/evalharness.go @@ -0,0 +1,391 @@ +// Package evalharness provides helpers shared by per-language evaluation +// test suites. Each analyzer package (rustanalyzer, tsanalyzer) has its +// own eval_test.go that drives the built diffguard binary against a tree +// of seeded fixtures and diff-compares emitted findings to an +// expected.json file next to the fixture. +// +// The harness: +// - Builds the diffguard binary once per test run (sync.Once inside +// BuildBinary) to keep the eval suites under 30s wall-clock when the +// full language set is exercised. +// - Copies each fixture into a temp dir before running so fixtures stay +// pristine regardless of what any analyzer writes (mutation tests +// swap files in place, so this matters). +// - Runs the binary with stable flags (--output json, fixed +// --mutation-sample-rate, etc.) and returns a decoded report.Report. +// - Exposes a semantic equality helper: compares sections by +// (name, severity) and finding sets by (file, function, severity, +// operator). Exact counts / percentages / order-within-group are +// not asserted because sampling and hashmap iteration can shuffle +// them without changing correctness. +package evalharness + +import ( + "bytes" + "encoding/json" + "io" + "os" + "os/exec" + "path/filepath" + "runtime" + "sort" + "sync" + "testing" + + "github.com/0xPolygon/diffguard/internal/report" +) + +// Expectation is the shape of an expected.json file next to each fixture. +// It captures just the facts worth pinning — the presence/severity of +// sections and whether each analyzer surfaced any Finding for a given +// (file, function) key. Fields not listed here are intentionally not +// asserted on; eval assertions are about "did the right thing get +// flagged", not "did the output bytes match exactly". +type Expectation struct { + // WorstSeverity, if non-empty, pins the overall Report.WorstSeverity. + WorstSeverity report.Severity `json:"worst_severity,omitempty"` + // Sections pins per-section expectations, keyed by section name + // (without a language suffix — the harness strips that before + // matching). If omitted the section is not checked. + Sections []SectionExpectation `json:"sections,omitempty"` +} + +// SectionExpectation pins a single Section's minimum expectations. +type SectionExpectation struct { + // Name is the metric prefix without a language suffix, e.g. + // "Cognitive Complexity" or "Mutation Testing". + Name string `json:"name"` + // Severity, if non-empty, pins Section.Severity. + Severity report.Severity `json:"severity,omitempty"` + // MustHaveFindings, if non-empty, requires a Finding matching each + // entry. The harness matches by the fields that are present in the + // expectation (non-zero values). An expectation with just File set + // passes if any finding mentions that file, for example. + MustHaveFindings []FindingExpectation `json:"must_have_findings,omitempty"` + // MustNotHaveFindings, if true, requires len(Findings)==0 for the + // section. + MustNotHaveFindings bool `json:"must_not_have_findings,omitempty"` +} + +// FindingExpectation is the subset of report.Finding fields used for +// semantic matching. Unset fields are ignored. +type FindingExpectation struct { + File string `json:"file,omitempty"` + Function string `json:"function,omitempty"` + Severity report.Severity `json:"severity,omitempty"` + Operator string `json:"operator,omitempty"` +} + +// BinaryBuilder caches the built diffguard binary across tests within a +// package. Call GetBinary(t) from each test — the first call builds, the +// rest return the cached path. Using package-level state keeps the cost +// of running 6+ eval tests in a package to a single build. +type BinaryBuilder struct { + once sync.Once + path string + err error +} + +// GetBinary returns the path to a compiled diffguard binary, building it +// on the first call. Subsequent calls return the same path. The binary +// lives in os.TempDir; we don't clean it up because keeping the cache +// warm across tests is worth the few MB. +func (b *BinaryBuilder) GetBinary(t *testing.T, repoRoot string) string { + t.Helper() + b.once.Do(func() { + dir, err := os.MkdirTemp("", "diffguard-eval-bin-") + if err != nil { + b.err = err + return + } + bin := filepath.Join(dir, "diffguard") + if runtime.GOOS == "windows" { + bin += ".exe" + } + cmd := exec.Command("go", "build", "-o", bin, "./cmd/diffguard") + cmd.Dir = repoRoot + if out, err := cmd.CombinedOutput(); err != nil { + b.err = &BuildError{Output: string(out), Err: err} + return + } + b.path = bin + }) + if b.err != nil { + t.Fatalf("building diffguard binary: %v", b.err) + } + return b.path +} + +// BuildError wraps the build command's exit status with the captured +// combined output so test failures show why the build failed. +type BuildError struct { + Output string + Err error +} + +func (e *BuildError) Error() string { return e.Err.Error() + "\n" + e.Output } + +// RepoRoot walks upward from cwd until it finds go.mod, returning that +// directory. Eval tests live several packages deep; using this avoids +// hard-coding relative paths that break when go test is invoked from +// different working directories (IDE vs. CLI vs. CI). +func RepoRoot(t *testing.T) string { + t.Helper() + dir, err := os.Getwd() + if err != nil { + t.Fatal(err) + } + for { + if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil { + // Guard against the fixture's own go.mod by requiring the + // repo to contain a cmd/diffguard directory too. + if _, err := os.Stat(filepath.Join(dir, "cmd", "diffguard")); err == nil { + return dir + } + } + parent := filepath.Dir(dir) + if parent == dir { + t.Fatal("could not locate repo root (no go.mod with cmd/diffguard found)") + } + dir = parent + } +} + +// CopyFixture mirrors srcDir to a fresh temp dir and returns the path. +// The copy is rooted at t.TempDir so Go's test harness cleans it up. +// Directories are preserved but none of the fixture metadata (mode, +// mtime) is — eval tests don't care. +func CopyFixture(t *testing.T, srcDir string) string { + t.Helper() + dst := t.TempDir() + err := filepath.Walk(srcDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + rel, _ := filepath.Rel(srcDir, path) + target := filepath.Join(dst, rel) + if info.IsDir() { + return os.MkdirAll(target, 0755) + } + in, err := os.Open(path) + if err != nil { + return err + } + defer in.Close() + out, err := os.Create(target) + if err != nil { + return err + } + defer out.Close() + _, err = io.Copy(out, in) + return err + }) + if err != nil { + t.Fatalf("copying fixture %s: %v", srcDir, err) + } + return dst +} + +// RunBinary runs the diffguard binary against a repo dir with the +// provided extra flags and returns the decoded JSON report. The harness +// always sets --output json, --fail-on none (so exit codes don't kill +// the test), and passes the repo path as the final positional arg. +func RunBinary(t *testing.T, binary, repo string, extraArgs []string) report.Report { + t.Helper() + args := []string{"--output", "json", "--fail-on", "none"} + args = append(args, extraArgs...) + args = append(args, repo) + + cmd := exec.Command(binary, args...) + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + err := cmd.Run() + if stderr.Len() > 0 { + t.Logf("diffguard stderr:\n%s", stderr.String()) + } + if err != nil { + if ee, ok := err.(*exec.ExitError); ok { + t.Logf("diffguard exit=%d", ee.ExitCode()) + } else { + t.Fatalf("running diffguard: %v", err) + } + } + + var rpt report.Report + if err := json.Unmarshal(stdout.Bytes(), &rpt); err != nil { + t.Fatalf("unmarshal report: %v\nstdout:\n%s", err, stdout.String()) + } + return rpt +} + +// LoadExpectation reads expected.json from a fixture directory. Returns +// (Expectation{}, false) if the file doesn't exist. +func LoadExpectation(t *testing.T, fixtureDir string) (Expectation, bool) { + t.Helper() + data, err := os.ReadFile(filepath.Join(fixtureDir, "expected.json")) + if err != nil { + if os.IsNotExist(err) { + return Expectation{}, false + } + t.Fatalf("reading expected.json: %v", err) + } + var exp Expectation + if err := json.Unmarshal(data, &exp); err != nil { + t.Fatalf("parsing expected.json: %v", err) + } + return exp, true +} + +// AssertMatches compares a report against an Expectation and fails the +// test with human-readable diagnostics on any mismatch. Assertions are +// semantic: section name (stripped of language suffix), severity, and +// finding identity — not line-exact counts or percentages. +func AssertMatches(t *testing.T, got report.Report, want Expectation) { + t.Helper() + assertWorstSeverity(t, got, want.WorstSeverity) + for _, wantSec := range want.Sections { + assertSection(t, got, wantSec) + } +} + +func assertWorstSeverity(t *testing.T, got report.Report, want report.Severity) { + t.Helper() + if want == "" { + return + } + if got.WorstSeverity() != want { + dumpReport(t, got) + t.Errorf("WorstSeverity = %q, want %q", got.WorstSeverity(), want) + } +} + +func assertSection(t *testing.T, got report.Report, wantSec SectionExpectation) { + t.Helper() + sec := findSectionByPrefix(got, wantSec.Name) + if sec == nil { + t.Errorf("missing section starting with %q; got %v", + wantSec.Name, sectionNames(got)) + return + } + if wantSec.Severity != "" && sec.Severity != wantSec.Severity { + t.Errorf("section %q severity = %q, want %q (findings=%d)", + sec.Name, sec.Severity, wantSec.Severity, len(sec.Findings)) + } + if wantSec.MustNotHaveFindings && len(sec.Findings) > 0 { + t.Errorf("section %q should have no findings, got %d:\n%s", + sec.Name, len(sec.Findings), dumpFindings(sec.Findings)) + } + for _, wantF := range wantSec.MustHaveFindings { + if !anyMatchingFinding(sec.Findings, wantF) { + t.Errorf("section %q missing finding %+v; findings were:\n%s", + sec.Name, wantF, dumpFindings(sec.Findings)) + } + } +} + +// findSectionByPrefix returns the first section whose name starts with +// prefix. Section names in multi-language runs are suffixed with a +// `[]` marker; the prefix match makes callers oblivious to that +// distinction. +func findSectionByPrefix(r report.Report, prefix string) *report.Section { + for i := range r.Sections { + name := r.Sections[i].Name + if name == prefix { + return &r.Sections[i] + } + if len(name) > len(prefix) && name[:len(prefix)] == prefix && + (name[len(prefix)] == ' ' || name[len(prefix)] == '[') { + return &r.Sections[i] + } + } + return nil +} + +// anyMatchingFinding reports whether any f in findings matches wantF on +// the fields wantF has set. +func anyMatchingFinding(findings []report.Finding, wantF FindingExpectation) bool { + for _, f := range findings { + if findingMatches(f, wantF) { + return true + } + } + return false +} + +// findingMatches reports whether a single finding satisfies every non-zero +// field of wantF. Operator isn't a first-class field on report.Finding; +// mutation encodes it in Message as "SURVIVED: ()", so +// that field is checked by substring search. +func findingMatches(f report.Finding, wantF FindingExpectation) bool { + if wantF.File != "" && !pathMatches(f.File, wantF.File) { + return false + } + if wantF.Function != "" && f.Function != wantF.Function { + return false + } + if wantF.Severity != "" && f.Severity != wantF.Severity { + return false + } + if wantF.Operator != "" && !containsOperator(f.Message, wantF.Operator) { + return false + } + return true +} + +// pathMatches accepts either an exact match or a basename match. Fixture +// expectations usually pin basenames so analyzer path normalizations +// (relative vs absolute, repo-relative vs working-dir-relative) don't +// break the assertion. +func pathMatches(got, want string) bool { + if got == want { + return true + } + return filepath.Base(got) == filepath.Base(want) +} + +// containsOperator reports whether msg names the operator — either as a +// parenthesized tail (`... (operator_name)`) or inline. Case-sensitive +// because all operator names in this codebase are lowercase_snake. +func containsOperator(msg, op string) bool { + return bytesContains([]byte(msg), []byte(op)) +} + +// bytesContains is a tiny helper to avoid pulling in strings just for +// this. Returns true if sub appears in s. +func bytesContains(s, sub []byte) bool { + return bytes.Contains(s, sub) +} + +// dumpFindings formats findings for failure diagnostics. +func dumpFindings(findings []report.Finding) string { + lines := make([]string, 0, len(findings)) + for _, f := range findings { + lines = append(lines, " - "+f.File+":"+f.Function+" ["+string(f.Severity)+"] "+f.Message) + } + sort.Strings(lines) + var buf bytes.Buffer + for _, l := range lines { + buf.WriteString(l) + buf.WriteString("\n") + } + return buf.String() +} + +// dumpReport logs all section names + severities so failures are +// actionable without re-running with extra flags. +func dumpReport(t *testing.T, r report.Report) { + t.Helper() + for _, s := range r.Sections { + t.Logf(" section %q -> %s (findings=%d)", s.Name, s.Severity, len(s.Findings)) + } +} + +// sectionNames returns the names for diagnostics. +func sectionNames(r report.Report) []string { + out := make([]string, len(r.Sections)) + for i, s := range r.Sections { + out[i] = s.Name + } + return out +} diff --git a/internal/lang/evalharness/evalharness_test.go b/internal/lang/evalharness/evalharness_test.go new file mode 100644 index 0000000..9ca275c --- /dev/null +++ b/internal/lang/evalharness/evalharness_test.go @@ -0,0 +1,402 @@ +package evalharness + +import ( + "encoding/json" + "os" + "path/filepath" + "testing" + + "github.com/0xPolygon/diffguard/internal/report" +) + +// TestFindingMatches_AllFields exercises every field of FindingExpectation +// plus the empty-field ignore behavior. +func TestFindingMatches_AllFields(t *testing.T) { + f := report.Finding{ + File: "pkg/foo.go", + Function: "Bar", + Severity: report.SeverityFail, + Message: "SURVIVED: description (negate_conditional)", + } + + tests := []struct { + name string + want FindingExpectation + ok bool + }{ + {"empty matches anything", FindingExpectation{}, true}, + {"file exact", FindingExpectation{File: "pkg/foo.go"}, true}, + {"file basename", FindingExpectation{File: "foo.go"}, true}, + {"file mismatch", FindingExpectation{File: "pkg/bar.go"}, false}, + {"function hit", FindingExpectation{Function: "Bar"}, true}, + {"function miss", FindingExpectation{Function: "Other"}, false}, + {"severity hit", FindingExpectation{Severity: report.SeverityFail}, true}, + {"severity miss", FindingExpectation{Severity: report.SeverityWarn}, false}, + {"operator hit", FindingExpectation{Operator: "negate_conditional"}, true}, + {"operator miss", FindingExpectation{Operator: "math_operator"}, false}, + { + "all fields hit", + FindingExpectation{File: "foo.go", Function: "Bar", Severity: report.SeverityFail, Operator: "negate_conditional"}, + true, + }, + { + "one field miss invalidates", + FindingExpectation{File: "foo.go", Function: "NotBar"}, + false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := findingMatches(f, tt.want); got != tt.ok { + t.Errorf("findingMatches = %v, want %v", got, tt.ok) + } + }) + } +} + +// TestAnyMatchingFinding_EmptyList verifies the scan returns false on an +// empty slice — locks in the default exit path. +func TestAnyMatchingFinding_EmptyList(t *testing.T) { + if anyMatchingFinding(nil, FindingExpectation{File: "x"}) { + t.Error("empty findings should never match") + } +} + +// TestAnyMatchingFinding_FindsAmongMany verifies the scan walks past +// non-matches to find a later match. +func TestAnyMatchingFinding_FindsAmongMany(t *testing.T) { + findings := []report.Finding{ + {File: "a.go"}, + {File: "b.go"}, + {File: "target.go"}, + } + if !anyMatchingFinding(findings, FindingExpectation{File: "target.go"}) { + t.Error("expected target.go match") + } + if anyMatchingFinding(findings, FindingExpectation{File: "missing.go"}) { + t.Error("did not expect missing.go match") + } +} + +// TestPathMatches covers the exact/basename branches. +func TestPathMatches(t *testing.T) { + if !pathMatches("a/b/c.go", "a/b/c.go") { + t.Error("exact match expected") + } + if !pathMatches("/abs/path/to/foo.go", "foo.go") { + t.Error("basename match expected") + } + if pathMatches("a/b/c.go", "d/e/f.go") { + t.Error("no match expected") + } +} + +// TestContainsOperator locks in the substring semantics used to detect +// operator names embedded in a mutation message. +func TestContainsOperator(t *testing.T) { + msg := "SURVIVED: something (negate_conditional)" + if !containsOperator(msg, "negate_conditional") { + t.Error("should detect operator") + } + if containsOperator(msg, "math_operator") { + t.Error("should not detect absent operator") + } + if containsOperator("", "any") { + t.Error("empty message should not match") + } +} + +// TestFindSectionByPrefix_Exact covers the exact-name match branch. +func TestFindSectionByPrefix_Exact(t *testing.T) { + r := report.Report{Sections: []report.Section{ + {Name: "Cognitive Complexity"}, + {Name: "Mutation Testing"}, + }} + s := findSectionByPrefix(r, "Cognitive Complexity") + if s == nil || s.Name != "Cognitive Complexity" { + t.Errorf("exact-name lookup failed, got %+v", s) + } +} + +// TestFindSectionByPrefix_Suffix covers the "name [lang]" branch that lets +// callers ignore the language suffix multi-language runs emit. +func TestFindSectionByPrefix_Suffix(t *testing.T) { + r := report.Report{Sections: []report.Section{ + {Name: "Cognitive Complexity [go]"}, + }} + s := findSectionByPrefix(r, "Cognitive Complexity") + if s == nil || s.Name != "Cognitive Complexity [go]" { + t.Errorf("suffix lookup failed, got %+v", s) + } +} + +// TestFindSectionByPrefix_Miss returns nil when no section exists, even if +// a section name *starts* with the prefix but has a non-boundary char after. +func TestFindSectionByPrefix_Miss(t *testing.T) { + r := report.Report{Sections: []report.Section{ + {Name: "ComplexityX"}, + }} + if findSectionByPrefix(r, "Complexity") != nil { + t.Error("partial-word prefix should not match") + } + if findSectionByPrefix(report.Report{}, "anything") != nil { + t.Error("empty report should not match") + } +} + +// TestSectionNames returns every section name for diagnostics. +func TestSectionNames(t *testing.T) { + r := report.Report{Sections: []report.Section{ + {Name: "A"}, {Name: "B"}, + }} + got := sectionNames(r) + if len(got) != 2 || got[0] != "A" || got[1] != "B" { + t.Errorf("names = %v, want [A B]", got) + } +} + +// TestDumpFindings_SortedLines asserts the diagnostics dump is sorted so +// diffs across test runs are stable. +func TestDumpFindings_SortedLines(t *testing.T) { + findings := []report.Finding{ + {File: "b.go", Function: "B", Severity: "FAIL", Message: "second"}, + {File: "a.go", Function: "A", Severity: "FAIL", Message: "first"}, + } + out := dumpFindings(findings) + if out == "" { + t.Fatal("expected non-empty output") + } + if idxA := indexOf(out, "a.go"); idxA < 0 { + t.Fatal("expected a.go in output") + } else if idxB := indexOf(out, "b.go"); idxB < idxA { + t.Errorf("expected a.go line before b.go line, got output:\n%s", out) + } +} + +func indexOf(s, sub string) int { + for i := 0; i+len(sub) <= len(s); i++ { + if s[i:i+len(sub)] == sub { + return i + } + } + return -1 +} + +// TestLoadExpectation_Present round-trips an Expectation through disk. +func TestLoadExpectation_Present(t *testing.T) { + dir := t.TempDir() + want := Expectation{ + WorstSeverity: report.SeverityFail, + Sections: []SectionExpectation{ + {Name: "Cognitive Complexity", Severity: report.SeverityFail}, + }, + } + data, err := json.Marshal(want) + if err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(dir, "expected.json"), data, 0644); err != nil { + t.Fatal(err) + } + got, ok := LoadExpectation(t, dir) + if !ok { + t.Fatal("expected ok=true when expected.json is present") + } + if got.WorstSeverity != want.WorstSeverity { + t.Errorf("WorstSeverity = %q, want %q", got.WorstSeverity, want.WorstSeverity) + } + if len(got.Sections) != 1 || got.Sections[0].Name != "Cognitive Complexity" { + t.Errorf("sections = %+v", got.Sections) + } +} + +// TestLoadExpectation_Missing locks in the (zero, false) return for an +// absent expected.json — the not-exist branch is a real caller path. +func TestLoadExpectation_Missing(t *testing.T) { + dir := t.TempDir() + got, ok := LoadExpectation(t, dir) + if ok { + t.Error("expected ok=false for missing expected.json") + } + if got.WorstSeverity != "" || len(got.Sections) != 0 { + t.Errorf("zero value expected, got %+v", got) + } +} + +// TestRepoRoot_FindsAncestor walks up from the evalharness package to the +// repo root — proves the loop terminates and returns a directory that has +// both go.mod and cmd/diffguard. +func TestRepoRoot_FindsAncestor(t *testing.T) { + root := RepoRoot(t) + if _, err := os.Stat(filepath.Join(root, "go.mod")); err != nil { + t.Errorf("repo root %q missing go.mod: %v", root, err) + } + if _, err := os.Stat(filepath.Join(root, "cmd", "diffguard")); err != nil { + t.Errorf("repo root %q missing cmd/diffguard: %v", root, err) + } +} + +// TestCopyFixture_ReplicatesTree copies a fixture with a nested directory +// and asserts the target is a fresh, independent tree. +func TestCopyFixture_ReplicatesTree(t *testing.T) { + src := t.TempDir() + if err := os.MkdirAll(filepath.Join(src, "sub"), 0755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(src, "a.txt"), []byte("a"), 0644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(src, "sub", "b.txt"), []byte("b"), 0644); err != nil { + t.Fatal(err) + } + + dst := CopyFixture(t, src) + for _, name := range []string{"a.txt", "sub/b.txt"} { + data, err := os.ReadFile(filepath.Join(dst, name)) + if err != nil { + t.Fatalf("read %s: %v", name, err) + } + if len(data) == 0 { + t.Errorf("copy lost contents of %s", name) + } + } + + // Mutating the copy must not affect the source: proves the copy did + // real I/O rather than returning the same path. + if err := os.WriteFile(filepath.Join(dst, "a.txt"), []byte("changed"), 0644); err != nil { + t.Fatal(err) + } + orig, _ := os.ReadFile(filepath.Join(src, "a.txt")) + if string(orig) != "a" { + t.Errorf("source mutated; copy not independent") + } +} + +// TestAssertMatches_SuccessPath proves the happy path: no errors logged. +func TestAssertMatches_SuccessPath(t *testing.T) { + got := report.Report{Sections: []report.Section{ + { + Name: "Cognitive Complexity [go]", + Severity: report.SeverityFail, + Findings: []report.Finding{{ + File: "x.go", Function: "F", Severity: report.SeverityFail, + Message: "SURVIVED: something (negate_conditional)", + }}, + }, + }} + want := Expectation{ + WorstSeverity: report.SeverityFail, + Sections: []SectionExpectation{{ + Name: "Cognitive Complexity", + Severity: report.SeverityFail, + MustHaveFindings: []FindingExpectation{{ + File: "x.go", Operator: "negate_conditional", + }}, + }}, + } + AssertMatches(t, got, want) +} + +// TestAssertSection_MustNotHaveFindings verifies the MustNotHaveFindings +// branch: an empty section passes, a populated section fails. +func TestAssertSection_MustNotHaveFindings(t *testing.T) { + empty := report.Report{Sections: []report.Section{ + {Name: "Mutation Testing"}, + }} + populated := report.Report{Sections: []report.Section{ + {Name: "Mutation Testing", Findings: []report.Finding{{File: "a.go"}}}, + }} + want := Expectation{Sections: []SectionExpectation{ + {Name: "Mutation Testing", MustNotHaveFindings: true}, + }} + + AssertMatches(t, empty, want) // passes + + // The populated case should flag a failure. Run through a child t so + // we don't fail the parent. + child := &childTester{} + childAssert(child, populated, want) + if !child.failed { + t.Error("expected MustNotHaveFindings to fail on populated section") + } +} + +// childTester records whether a failure was reported; used when we want +// to verify a negative path fires without polluting the outer test. +type childTester struct { + failed bool +} + +// childAssert mirrors the branches we want to verify ran: MustNotHaveFindings +// on populated sections, missing section, severity mismatch. The parallel +// structure keeps the mutation coverage focused on findSectionByPrefix/ +// findingMatches (which the real path also uses) without requiring a full +// testing.T stub. +func childAssert(c *childTester, r report.Report, want Expectation) { + if want.WorstSeverity != "" && r.WorstSeverity() != want.WorstSeverity { + c.failed = true + } + for _, wantSec := range want.Sections { + sec := findSectionByPrefix(r, wantSec.Name) + if sec == nil { + c.failed = true + continue + } + if wantSec.Severity != "" && sec.Severity != wantSec.Severity { + c.failed = true + } + if wantSec.MustNotHaveFindings && len(sec.Findings) > 0 { + c.failed = true + } + for _, wantF := range wantSec.MustHaveFindings { + if !anyMatchingFinding(sec.Findings, wantF) { + c.failed = true + } + } + } +} + +// TestChildAssert_WorstSeverityMismatch covers the WorstSeverity mismatch +// branch without recruiting the outer t. +func TestChildAssert_WorstSeverityMismatch(t *testing.T) { + c := &childTester{} + childAssert(c, report.Report{Sections: []report.Section{{Name: "X", Severity: report.SeverityPass}}}, + Expectation{WorstSeverity: report.SeverityFail}) + if !c.failed { + t.Error("expected childAssert to flag WorstSeverity mismatch") + } +} + +// TestChildAssert_MissingSection covers the missing-section branch. +func TestChildAssert_MissingSection(t *testing.T) { + c := &childTester{} + childAssert(c, report.Report{}, + Expectation{Sections: []SectionExpectation{{Name: "Missing"}}}) + if !c.failed { + t.Error("expected childAssert to flag missing section") + } +} + +// TestChildAssert_SeverityMismatch covers the per-section Severity branch. +func TestChildAssert_SeverityMismatch(t *testing.T) { + c := &childTester{} + r := report.Report{Sections: []report.Section{{Name: "A", Severity: report.SeverityPass}}} + childAssert(c, r, Expectation{Sections: []SectionExpectation{ + {Name: "A", Severity: report.SeverityFail}, + }}) + if !c.failed { + t.Error("expected childAssert to flag section severity mismatch") + } +} + +// TestChildAssert_MissingFinding covers the MustHaveFindings branch. +func TestChildAssert_MissingFinding(t *testing.T) { + c := &childTester{} + r := report.Report{Sections: []report.Section{{Name: "A"}}} + childAssert(c, r, Expectation{Sections: []SectionExpectation{ + {Name: "A", MustHaveFindings: []FindingExpectation{{File: "x.go"}}}, + }}) + if !c.failed { + t.Error("expected childAssert to flag missing finding") + } +} diff --git a/internal/lang/goanalyzer/complexity.go b/internal/lang/goanalyzer/complexity.go new file mode 100644 index 0000000..efb5820 --- /dev/null +++ b/internal/lang/goanalyzer/complexity.go @@ -0,0 +1,267 @@ +package goanalyzer + +import ( + "go/ast" + "go/token" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" +) + +// complexityImpl is the Go implementation of both lang.ComplexityCalculator +// and lang.ComplexityScorer. The scorer interface is defined separately so +// a language can ship a faster approximation; for Go the full cognitive +// score is cheap enough that one struct serves both. +type complexityImpl struct{} + +// AnalyzeFile returns per-function cognitive complexity for functions whose +// line range overlaps the diff's changed regions. Parse errors return +// (nil, nil) — the old analyzer treated parse failure as "skip the file" +// and we preserve that behavior. +func (complexityImpl) AnalyzeFile(absPath string, fc diff.FileChange) ([]lang.FunctionComplexity, error) { + fset, f, err := parseFile(absPath, 0) + if err != nil { + return nil, nil + } + + var results []lang.FunctionComplexity + ast.Inspect(f, func(n ast.Node) bool { + fn, ok := n.(*ast.FuncDecl) + if !ok { + return true + } + startLine := fset.Position(fn.Pos()).Line + endLine := fset.Position(fn.End()).Line + if !fc.OverlapsRange(startLine, endLine) { + return false + } + results = append(results, lang.FunctionComplexity{ + FunctionInfo: lang.FunctionInfo{ + File: fc.Path, + Line: startLine, + EndLine: endLine, + Name: funcName(fn), + }, + Complexity: computeCognitiveComplexity(fn.Body), + }) + return false + }) + return results, nil +} + +// ScoreFile is the ComplexityScorer entry point used by the churn analyzer. +// It deliberately uses a simplified counter (bump by 1 for each if/for/ +// switch/select/logical-op node) rather than the full cognitive complexity +// walker, matching the pre-split churn.computeComplexity. The churn score +// only needs a relative ordering of "hotter" functions; a coarse counter is +// faster to compute and keeps the churn output byte-identical to the +// pre-refactor numbers. +func (complexityImpl) ScoreFile(absPath string, fc diff.FileChange) ([]lang.FunctionComplexity, error) { + fset, f, err := parseFile(absPath, 0) + if err != nil { + return nil, nil + } + + var results []lang.FunctionComplexity + ast.Inspect(f, func(n ast.Node) bool { + fn, ok := n.(*ast.FuncDecl) + if !ok { + return true + } + startLine := fset.Position(fn.Pos()).Line + endLine := fset.Position(fn.End()).Line + if !fc.OverlapsRange(startLine, endLine) { + return false + } + results = append(results, lang.FunctionComplexity{ + FunctionInfo: lang.FunctionInfo{ + File: fc.Path, + Line: startLine, + EndLine: endLine, + Name: funcName(fn), + }, + Complexity: computeSimpleComplexity(fn.Body), + }) + return false + }) + return results, nil +} + +// computeSimpleComplexity is the simplified counter used by the churn +// analyzer: +1 per branching construct, +1 per && / || operator. No +// nesting penalty and no operator-change accounting. Matches the +// pre-split internal/churn.computeComplexity so churn scores stay +// byte-identical. +func computeSimpleComplexity(body *ast.BlockStmt) int { + if body == nil { + return 0 + } + count := 0 + ast.Inspect(body, func(n ast.Node) bool { + switch v := n.(type) { + case *ast.IfStmt: + count++ + case *ast.ForStmt, *ast.RangeStmt: + count++ + case *ast.SwitchStmt, *ast.TypeSwitchStmt, *ast.SelectStmt: + count++ + case *ast.BinaryExpr: + if v.Op == token.LAND || v.Op == token.LOR { + count++ + } + } + return true + }) + return count +} + +// computeCognitiveComplexity is the exact algorithm that lived in +// internal/complexity/complexity.go before the language split. It's moved +// here verbatim (only the receiver type changed) so byte-identical scores +// are guaranteed. +func computeCognitiveComplexity(body *ast.BlockStmt) int { + if body == nil { + return 0 + } + return walkBlock(body.List, 0) +} + +func walkBlock(stmts []ast.Stmt, nesting int) int { + total := 0 + for _, stmt := range stmts { + total += walkStmt(stmt, nesting) + } + return total +} + +func walkStmt(stmt ast.Stmt, nesting int) int { + switch s := stmt.(type) { + case *ast.IfStmt: + return walkIfStmt(s, nesting) + case *ast.ForStmt: + return walkForStmt(s, nesting) + case *ast.RangeStmt: + return 1 + nesting + walkBlock(s.Body.List, nesting+1) + case *ast.SwitchStmt: + return 1 + nesting + walkBlock(s.Body.List, nesting+1) + case *ast.TypeSwitchStmt: + return 1 + nesting + walkBlock(s.Body.List, nesting+1) + case *ast.SelectStmt: + return 1 + nesting + walkBlock(s.Body.List, nesting+1) + case *ast.CaseClause: + return walkBlock(s.Body, nesting) + case *ast.CommClause: + return walkBlock(s.Body, nesting) + case *ast.BlockStmt: + return walkBlock(s.List, nesting) + case *ast.LabeledStmt: + return walkStmt(s.Stmt, nesting) + case *ast.AssignStmt: + return walkExprsForFuncLit(s.Rhs, nesting) + case *ast.ExprStmt: + return walkExprForFuncLit(s.X, nesting) + case *ast.ReturnStmt: + return walkExprsForFuncLit(s.Results, nesting) + case *ast.GoStmt: + return walkExprForFuncLit(s.Call.Fun, nesting) + case *ast.DeferStmt: + return walkExprForFuncLit(s.Call.Fun, nesting) + } + return 0 +} + +func walkIfStmt(s *ast.IfStmt, nesting int) int { + total := 1 + nesting + total += countLogicalOps(s.Cond) + if s.Init != nil { + total += walkStmt(s.Init, nesting) + } + total += walkBlock(s.Body.List, nesting+1) + if s.Else != nil { + total += walkElseChain(s.Else, nesting) + } + return total +} + +func walkForStmt(s *ast.ForStmt, nesting int) int { + total := 1 + nesting + if s.Cond != nil { + total += countLogicalOps(s.Cond) + } + total += walkBlock(s.Body.List, nesting+1) + return total +} + +func walkElseChain(node ast.Node, nesting int) int { + switch e := node.(type) { + case *ast.IfStmt: + total := 1 + total += countLogicalOps(e.Cond) + if e.Init != nil { + total += walkStmt(e.Init, nesting) + } + total += walkBlock(e.Body.List, nesting+1) + if e.Else != nil { + total += walkElseChain(e.Else, nesting) + } + return total + case *ast.BlockStmt: + return 1 + walkBlock(e.List, nesting+1) + } + return 0 +} + +func walkExprsForFuncLit(exprs []ast.Expr, nesting int) int { + total := 0 + for _, expr := range exprs { + total += walkExprForFuncLit(expr, nesting) + } + return total +} + +func walkExprForFuncLit(expr ast.Expr, nesting int) int { + total := 0 + ast.Inspect(expr, func(n ast.Node) bool { + if fl, ok := n.(*ast.FuncLit); ok { + total += walkBlock(fl.Body.List, nesting+1) + return false + } + return true + }) + return total +} + +// countLogicalOps counts operator-type changes in a chain of && / ||. +// A run of the same operator counts as 1; each switch to the other +// operator adds 1. No logical ops at all → 0. +func countLogicalOps(expr ast.Expr) int { + if expr == nil { + return 0 + } + ops := flattenLogicalOps(expr) + if len(ops) == 0 { + return 0 + } + count := 1 + for i := 1; i < len(ops); i++ { + if ops[i] != ops[i-1] { + count++ + } + } + return count +} + +func flattenLogicalOps(expr ast.Expr) []token.Token { + bin, ok := expr.(*ast.BinaryExpr) + if !ok { + return nil + } + if bin.Op != token.LAND && bin.Op != token.LOR { + return nil + } + var ops []token.Token + ops = append(ops, flattenLogicalOps(bin.X)...) + ops = append(ops, bin.Op) + ops = append(ops, flattenLogicalOps(bin.Y)...) + return ops +} diff --git a/internal/lang/goanalyzer/complexity_walker_test.go b/internal/lang/goanalyzer/complexity_walker_test.go new file mode 100644 index 0000000..a893db8 --- /dev/null +++ b/internal/lang/goanalyzer/complexity_walker_test.go @@ -0,0 +1,245 @@ +package goanalyzer + +import ( + "go/ast" + "go/parser" + "go/token" + "testing" +) + +// Most of these tests are imported verbatim from the pre-split +// internal/complexity package. They exercise the walker directly (rather +// than going through AnalyzeFile + a tempdir file) so failures localize to +// the exact construct that broke. + +func TestComputeComplexity(t *testing.T) { + tests := []struct { + name string + code string + expected int + }{ + {"empty function", `package p; func f() {}`, 0}, + {"single if", `package p; func f(x int) { if x > 0 {} }`, 1}, + {"if-else", `package p; func f(x int) { if x > 0 {} else {} }`, 2}, + {"if-else if-else", `package p; func f(x int) { if x > 0 {} else if x < 0 {} else {} }`, 3}, + {"nested if", `package p; func f(x, y int) { if x > 0 { if y > 0 {} } }`, 3}, + {"for loop", `package p; func f() { for i := 0; i < 10; i++ {} }`, 1}, + {"nested for", `package p; func f() { for i := 0; i < 10; i++ { for j := 0; j < 10; j++ {} } }`, 3}, + {"switch with cases", `package p; func f(x int) { switch x { case 1: case 2: case 3: } }`, 1}, + {"logical operators same type", `package p; func f(a, b, c bool) { if a && b && c {} }`, 2}, + {"logical operators mixed", `package p; func f(a, b, c bool) { if a && b || c {} }`, 3}, + {"range loop", `package p; func f(s []int) { for range s {} }`, 1}, + {"select statement", `package p; func f(c chan int) { select { case <-c: } }`, 1}, + {"deeply nested", `package p +func f(x, y, z int) { + if x > 0 { + for y > 0 { + if z > 0 { + } + } + } +}`, 6}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + body := parseFuncBody(t, tt.code) + if got := computeCognitiveComplexity(body); got != tt.expected { + t.Errorf("complexity = %d, want %d", got, tt.expected) + } + }) + } +} + +func TestComputeComplexity_NilBody(t *testing.T) { + if got := computeCognitiveComplexity(nil); got != 0 { + t.Errorf("computeCognitiveComplexity(nil) = %d, want 0", got) + } +} + +func TestWalkStmt_NestingPenalty(t *testing.T) { + tests := []struct { + name string + code string + expected int + }{ + {"range at nesting 1", `package p; func f(x int) { + if x > 0 { + for range []int{} { + if x > 0 {} + } + } + }`, 6}, + {"switch at nesting 1", `package p; func f(x int) { + if x > 0 { + switch x { + case 1: + if x > 0 {} + } + } + }`, 6}, + {"select at nesting 1", `package p; func f(x int, c chan int) { + if x > 0 { + select { + case <-c: + if x > 0 {} + } + } + }`, 6}, + {"type switch at nesting 1", `package p; func f(x int, v any) { + if x > 0 { + switch v.(type) { + case int: + if x > 0 {} + } + } + }`, 6}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + body := parseFuncBody(t, tt.code) + if got := computeCognitiveComplexity(body); got != tt.expected { + t.Errorf("complexity = %d, want %d", got, tt.expected) + } + }) + } +} + +func TestWalkForStmt_WithLogicalCondition(t *testing.T) { + body := parseFuncBody(t, `package p; func f(a, b bool) { for a && b {} }`) + if got := computeCognitiveComplexity(body); got != 2 { + t.Errorf("complexity = %d, want 2", got) + } +} + +func TestWalkIfStmt_WithElseChain(t *testing.T) { + body := parseFuncBody(t, `package p +func f(x int) { + if x > 0 { + } else if x < 0 { + } else { + } +}`) + if got := computeCognitiveComplexity(body); got != 3 { + t.Errorf("complexity = %d, want 3", got) + } +} + +func TestWalkIfStmt_WithInit(t *testing.T) { + body := parseFuncBody(t, `package p +func f() error { + if err := g(); err != nil { + } + return nil +} +func g() error { return nil }`) + if got := computeCognitiveComplexity(body); got != 1 { + t.Errorf("complexity = %d, want 1", got) + } +} + +func TestWalkStmt_LabeledStmt(t *testing.T) { + body := parseFuncBody(t, `package p +func f(x int) { +outer: + for x > 0 { + _ = x + break outer + } +}`) + if got := computeCognitiveComplexity(body); got != 1 { + t.Errorf("complexity = %d, want 1", got) + } +} + +func TestWalkStmt_GoAndDefer(t *testing.T) { + body := parseFuncBody(t, `package p +func f() { + go func() { + if true {} + }() + defer func() { + if true {} + }() +}`) + if got := computeCognitiveComplexity(body); got != 4 { + t.Errorf("complexity = %d, want 4", got) + } +} + +func TestWalkStmt_FuncLitInAssign(t *testing.T) { + body := parseFuncBody(t, `package p +func f() { + x := func() { + if true {} + } + _ = x +}`) + if got := computeCognitiveComplexity(body); got != 2 { + t.Errorf("complexity = %d, want 2", got) + } +} + +func TestWalkStmt_FuncLitInReturn(t *testing.T) { + body := parseFuncBody(t, `package p +func f() func() { + return func() { + if true {} + } +}`) + if got := computeCognitiveComplexity(body); got != 2 { + t.Errorf("complexity = %d, want 2", got) + } +} + +func TestCountLogicalOps(t *testing.T) { + tests := []struct { + name string + code string + expected int + }{ + {"no logical ops", `package p; var x = 1 + 2`, 0}, + {"single and", `package p; var x = true && false`, 1}, + {"chain same op", `package p; var x = true && false && true`, 1}, + {"mixed ops", `package p; var x = true && false || true`, 2}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, "test.go", tt.code, 0) + if err != nil { + t.Fatalf("parse: %v", err) + } + var expr ast.Expr + ast.Inspect(f, func(n ast.Node) bool { + if vs, ok := n.(*ast.ValueSpec); ok && len(vs.Values) > 0 { + expr = vs.Values[0] + return false + } + return true + }) + if got := countLogicalOps(expr); got != tt.expected { + t.Errorf("countLogicalOps = %d, want %d", got, tt.expected) + } + }) + } +} + +// parseFuncBody parses code and returns the body of the first FuncDecl. +// All the walker tests use this rather than open-coding the parse loop. +func parseFuncBody(t *testing.T, code string) *ast.BlockStmt { + t.Helper() + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, "test.go", code, 0) + if err != nil { + t.Fatalf("parse: %v", err) + } + for _, decl := range f.Decls { + if fd, ok := decl.(*ast.FuncDecl); ok { + return fd.Body + } + } + t.Fatal("no function found") + return nil +} diff --git a/internal/lang/goanalyzer/deps.go b/internal/lang/goanalyzer/deps.go new file mode 100644 index 0000000..e5bfbc6 --- /dev/null +++ b/internal/lang/goanalyzer/deps.go @@ -0,0 +1,82 @@ +package goanalyzer + +import ( + "fmt" + "go/ast" + "go/parser" + "go/token" + "os" + "path/filepath" + "strings" +) + +// depsImpl implements lang.ImportResolver for Go. It reads the module path +// from go.mod and uses the standard Go parser to scan each package for +// internal imports. +type depsImpl struct{} + +// DetectModulePath reads `module ` from repoPath/go.mod. +func (depsImpl) DetectModulePath(repoPath string) (string, error) { + goModPath := filepath.Join(repoPath, "go.mod") + content, err := os.ReadFile(goModPath) + if err != nil { + return "", fmt.Errorf("reading go.mod: %w", err) + } + for _, line := range strings.Split(string(content), "\n") { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "module ") { + return strings.TrimSpace(strings.TrimPrefix(line, "module ")), nil + } + } + return "", fmt.Errorf("no module directive found in go.mod") +} + +// ScanPackageImports returns a map with a single entry: +// +// { : { : true, : true, ... } } +// +// where pkgImportPath = modulePath + "/" + pkgDir. External imports and +// `_test` packages are ignored so the graph only contains internal edges, +// matching the pre-split deps.go behavior. +func (depsImpl) ScanPackageImports(repoPath, pkgDir, modulePath string) map[string]map[string]bool { + absDir := filepath.Join(repoPath, pkgDir) + fset := token.NewFileSet() + pkgs, err := parser.ParseDir(fset, absDir, nil, parser.ImportsOnly) + if err != nil { + return nil + } + + edges := make(map[string]map[string]bool) + pkgImportPath := modulePath + "/" + pkgDir + for _, p := range pkgs { + collectPackageEdges(p, modulePath, pkgImportPath, edges) + } + return edges +} + +// collectPackageEdges walks the files of a parsed package and adds internal +// import edges into `edges`. `_test` packages are skipped so the graph +// stays focused on non-test source dependencies. +func collectPackageEdges(p *ast.Package, modulePath, pkgImportPath string, edges map[string]map[string]bool) { + if strings.HasSuffix(p.Name, "_test") { + return + } + for _, f := range p.Files { + for _, imp := range f.Imports { + addInternalImport(imp, modulePath, pkgImportPath, edges) + } + } +} + +// addInternalImport records an edge from pkgImportPath to the target of imp +// when the import is internal to modulePath. External imports are dropped. +func addInternalImport(imp *ast.ImportSpec, modulePath, pkgImportPath string, edges map[string]map[string]bool) { + importPath := strings.Trim(imp.Path.Value, `"`) + if !strings.HasPrefix(importPath, modulePath) { + return + } + if edges[pkgImportPath] == nil { + edges[pkgImportPath] = make(map[string]bool) + } + edges[pkgImportPath][importPath] = true +} diff --git a/internal/lang/goanalyzer/goanalyzer.go b/internal/lang/goanalyzer/goanalyzer.go new file mode 100644 index 0000000..6585305 --- /dev/null +++ b/internal/lang/goanalyzer/goanalyzer.go @@ -0,0 +1,62 @@ +package goanalyzer + +import ( + "time" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +// defaultGoTestTimeout is the per-mutant test timeout applied when the +// caller did not set one in TestRunConfig. It matches the fallback the +// mutation orchestrator used before the language split so behavior is +// preserved byte-for-byte for existing Go runs. +const defaultGoTestTimeout = 30 * time.Second + +// Language is the Go implementation of lang.Language. It holds no state — +// the sub-component impls are stateless too — but exists as a concrete +// type so external tests can construct one without relying on the +// side-effectful init() registration. +type Language struct{} + +// Name returns the canonical language identifier used by the registry and +// by report section suffixes. +func (*Language) Name() string { return "go" } + +// FileFilter returns the Go-specific file selection rules used by the diff +// parser: .go extension, _test.go files excluded from analysis. +func (*Language) FileFilter() lang.FileFilter { + return lang.FileFilter{ + Extensions: []string{".go"}, + IsTestFile: isGoTestFile, + DiffGlobs: []string{"*.go"}, + } +} + +// Sub-component accessors. Every method returns a fresh zero-value impl +// value, which is fine because all impls are stateless. +func (*Language) ComplexityCalculator() lang.ComplexityCalculator { return complexityImpl{} } +func (*Language) ComplexityScorer() lang.ComplexityScorer { return complexityImpl{} } +func (*Language) FunctionExtractor() lang.FunctionExtractor { return sizesImpl{} } +func (*Language) ImportResolver() lang.ImportResolver { return depsImpl{} } +func (*Language) MutantGenerator() lang.MutantGenerator { return mutantGeneratorImpl{} } +func (*Language) MutantApplier() lang.MutantApplier { return mutantApplierImpl{} } +func (*Language) AnnotationScanner() lang.AnnotationScanner { return annotationScannerImpl{} } +func (*Language) TestRunner() lang.TestRunner { return testRunnerImpl{} } + +// isGoTestFile matches the historical internal/diff check: any path ending +// in `_test.go` is a test file. No magic, no parse. +func isGoTestFile(path string) bool { + return hasSuffix(path, "_test.go") +} + +func hasSuffix(s, suffix string) bool { + return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix +} + +// init registers the Go analyzer with the global lang registry. The blank +// import in cmd/diffguard/main.go triggers this; other binaries wishing to +// include the Go analyzer must also blank-import this package. +func init() { + lang.Register(&Language{}) + lang.RegisterManifest("go.mod", "go") +} diff --git a/internal/lang/goanalyzer/goanalyzer_test.go b/internal/lang/goanalyzer/goanalyzer_test.go new file mode 100644 index 0000000..d68cea3 --- /dev/null +++ b/internal/lang/goanalyzer/goanalyzer_test.go @@ -0,0 +1,340 @@ +package goanalyzer + +import ( + "os" + "path/filepath" + "testing" + + "github.com/0xPolygon/diffguard/internal/diff" +) + +// TestLanguage_Name pins the registered name. Other packages (CLI +// suffixing, tiers.go) key on this string. +func TestLanguage_Name(t *testing.T) { + l := &Language{} + if l.Name() != "go" { + t.Errorf("Name() = %q, want go", l.Name()) + } +} + +func TestLanguage_FileFilter(t *testing.T) { + f := (&Language{}).FileFilter() + if len(f.Extensions) != 1 || f.Extensions[0] != ".go" { + t.Errorf("Extensions = %v, want [.go]", f.Extensions) + } + if !f.IsTestFile("foo_test.go") { + t.Error("IsTestFile(foo_test.go) = false, want true") + } + if f.IsTestFile("foo.go") { + t.Error("IsTestFile(foo.go) = true, want false") + } + if len(f.DiffGlobs) != 1 || f.DiffGlobs[0] != "*.go" { + t.Errorf("DiffGlobs = %v, want [*.go]", f.DiffGlobs) + } +} + +// TestFuncName covers all three canonical forms: free function, value +// receiver method, pointer receiver method. funcName used to live in three +// places pre-split; this test is the canary that the consolidation didn't +// drop a case. +func TestFuncName(t *testing.T) { + tests := []struct { + code string + expected string + }{ + {`package p; func Foo() {}`, "Foo"}, + {`package p; type T struct{}; func (t T) Bar() {}`, "(T).Bar"}, + {`package p; type T struct{}; func (t *T) Baz() {}`, "(T).Baz"}, + } + for _, tt := range tests { + t.Run(tt.expected, func(t *testing.T) { + dir := t.TempDir() + fp := filepath.Join(dir, "test.go") + if err := os.WriteFile(fp, []byte(tt.code), 0644); err != nil { + t.Fatal(err) + } + fc := diff.FileChange{ + Path: "test.go", + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, + } + results, _ := complexityImpl{}.AnalyzeFile(fp, fc) + if len(results) == 0 { + t.Fatal("no results") + } + if results[0].Name != tt.expected { + t.Errorf("Name = %q, want %q", results[0].Name, tt.expected) + } + }) + } +} + +func TestExtractFunctions_SharesShape(t *testing.T) { + code := `package p + +func f() { + x := 1 + _ = x +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "f.go") + os.WriteFile(fp, []byte(code), 0644) + + fc := diff.FileChange{ + Path: "f.go", + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, + } + fns, fsz, err := sizesImpl{}.ExtractFunctions(fp, fc) + if err != nil { + t.Fatalf("ExtractFunctions: %v", err) + } + if len(fns) != 1 { + t.Fatalf("len(fns) = %d, want 1", len(fns)) + } + if fns[0].Name != "f" { + t.Errorf("Name = %q, want f", fns[0].Name) + } + if fsz == nil || fsz.Lines == 0 { + t.Error("expected non-nil fsz with non-zero Lines") + } +} + +// TestScorer_SimpleCounter locks in the ScoreFile behavior: it's the +// simpler "bump by 1 per branch" counter, not the full cognitive walker. +// Two nested if statements score 2 (not 3 — no nesting penalty). +func TestScorer_SimpleCounter(t *testing.T) { + code := `package p +func f(x int) { + if x > 0 { + if x > 1 {} + } +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "f.go") + os.WriteFile(fp, []byte(code), 0644) + fc := diff.FileChange{ + Path: "f.go", + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, + } + + score, _ := complexityImpl{}.ScoreFile(fp, fc) + if len(score) != 1 { + t.Fatalf("len(score) = %d, want 1", len(score)) + } + if score[0].Complexity != 2 { + t.Errorf("score = %d, want 2 (+1 per if, no nesting)", score[0].Complexity) + } + + // The full calculator gives the same code a higher score due to nesting. + analyze, _ := complexityImpl{}.AnalyzeFile(fp, fc) + if analyze[0].Complexity != 3 { + t.Errorf("AnalyzeFile = %d, want 3 (cognitive with nesting)", analyze[0].Complexity) + } +} + +func TestDetectModulePath(t *testing.T) { + dir := t.TempDir() + os.WriteFile(filepath.Join(dir, "go.mod"), []byte("module example.com/foo\n\ngo 1.21\n"), 0644) + mod, err := depsImpl{}.DetectModulePath(dir) + if err != nil { + t.Fatalf("DetectModulePath: %v", err) + } + if mod != "example.com/foo" { + t.Errorf("mod = %q, want example.com/foo", mod) + } +} + +func TestDetectModulePath_Missing(t *testing.T) { + dir := t.TempDir() + _, err := depsImpl{}.DetectModulePath(dir) + if err == nil { + t.Error("expected error when go.mod is missing") + } +} + +// TestLanguage_Accessors_ReturnWorkingImpls pins each accessor to the real +// impl by exercising its primary entry point. This catches `return_value` +// mutations that zero out the return without the tests noticing. +func TestLanguage_Accessors_ReturnWorkingImpls(t *testing.T) { + l := &Language{} + + dir := t.TempDir() + fp := filepath.Join(dir, "f.go") + if err := os.WriteFile(fp, []byte("package p\nfunc f(x int) int { if x > 0 { return x }; return -x }\n"), 0644); err != nil { + t.Fatal(err) + } + fc := diff.FileChange{Path: "f.go", Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}} + + cc := l.ComplexityCalculator() + if cc == nil { + t.Fatal("ComplexityCalculator returned nil") + } + res, err := cc.AnalyzeFile(fp, fc) + if err != nil { + t.Fatalf("AnalyzeFile: %v", err) + } + if len(res) == 0 { + t.Error("ComplexityCalculator produced no results") + } + + cs := l.ComplexityScorer() + if cs == nil { + t.Fatal("ComplexityScorer returned nil") + } + scored, err := cs.ScoreFile(fp, fc) + if err != nil { + t.Fatalf("ScoreFile: %v", err) + } + if len(scored) == 0 { + t.Error("ComplexityScorer produced no results") + } + + fe := l.FunctionExtractor() + if fe == nil { + t.Fatal("FunctionExtractor returned nil") + } + fns, fsize, err := fe.ExtractFunctions(fp, fc) + if err != nil { + t.Fatalf("ExtractFunctions: %v", err) + } + if len(fns) == 0 || fsize == nil { + t.Error("FunctionExtractor produced no output") + } + + ir := l.ImportResolver() + if ir == nil { + t.Fatal("ImportResolver returned nil") + } + + mg := l.MutantGenerator() + if mg == nil { + t.Fatal("MutantGenerator returned nil") + } + if _, err := mg.GenerateMutants(fp, fc, nil); err != nil { + t.Fatalf("GenerateMutants: %v", err) + } + + ma := l.MutantApplier() + if ma == nil { + t.Fatal("MutantApplier returned nil") + } + + as := l.AnnotationScanner() + if as == nil { + t.Fatal("AnnotationScanner returned nil") + } + if _, err := as.ScanAnnotations(fp); err != nil { + t.Fatalf("ScanAnnotations: %v", err) + } + + tr := l.TestRunner() + if tr == nil { + t.Fatal("TestRunner returned nil") + } +} + +// TestScanPackageImports_ParsesAndReportsEdges exercises the happy path and +// verifies external imports and _test packages are filtered out. +func TestScanPackageImports_ParsesAndReportsEdges(t *testing.T) { + dir := t.TempDir() + pkgDir := filepath.Join(dir, "pkg") + if err := os.MkdirAll(pkgDir, 0755); err != nil { + t.Fatal(err) + } + code := `package pkg + +import ( + "fmt" + "example.com/mod/other" +) + +var _ = fmt.Println +var _ = other.X +` + if err := os.WriteFile(filepath.Join(pkgDir, "a.go"), []byte(code), 0644); err != nil { + t.Fatal(err) + } + edges := depsImpl{}.ScanPackageImports(dir, "pkg", "example.com/mod") + if edges == nil { + t.Fatal("expected non-nil edges for valid package") + } + deps := edges["example.com/mod/pkg"] + if deps == nil { + t.Fatalf("expected edges for example.com/mod/pkg, got %+v", edges) + } + if !deps["example.com/mod/other"] { + t.Errorf("expected internal edge to example.com/mod/other, got %+v", deps) + } + if deps["fmt"] { + t.Errorf("external import fmt should be excluded, got %+v", deps) + } +} + +// TestScanPackageImports_ParseError returns nil when the directory contains +// unparseable Go. Exercises the `if err != nil { return nil }` branch. +func TestScanPackageImports_ParseError(t *testing.T) { + dir := t.TempDir() + pkgDir := filepath.Join(dir, "pkg") + if err := os.MkdirAll(pkgDir, 0755); err != nil { + t.Fatal(err) + } + // Malformed Go — parser will fail on ParseDir. + if err := os.WriteFile(filepath.Join(pkgDir, "a.go"), []byte("this is not go"), 0644); err != nil { + t.Fatal(err) + } + edges := depsImpl{}.ScanPackageImports(dir, "pkg", "example.com/mod") + if edges != nil { + t.Errorf("expected nil for parse error, got %+v", edges) + } +} + +// TestScanPackageImports_SkipsTestPackages verifies _test packages don't +// contribute edges. +func TestScanPackageImports_SkipsTestPackages(t *testing.T) { + dir := t.TempDir() + pkgDir := filepath.Join(dir, "pkg") + if err := os.MkdirAll(pkgDir, 0755); err != nil { + t.Fatal(err) + } + // Put only a _test package; ensure no edges are produced. + code := `package pkg_test + +import "example.com/mod/other" + +var _ = other.X +` + if err := os.WriteFile(filepath.Join(pkgDir, "a_test.go"), []byte(code), 0644); err != nil { + t.Fatal(err) + } + edges := depsImpl{}.ScanPackageImports(dir, "pkg", "example.com/mod") + if len(edges) != 0 { + t.Errorf("expected no edges for _test package, got %+v", edges) + } +} + +// TestIsGoTestFile covers both branches of the suffix check. +func TestIsGoTestFile(t *testing.T) { + if !isGoTestFile("x_test.go") { + t.Error("x_test.go should be a test file") + } + if isGoTestFile("x.go") { + t.Error("x.go should not be a test file") + } + if isGoTestFile("test.go") { + t.Error("test.go should not be a test file (no _ prefix)") + } +} + +// TestHasSuffix covers both branches of the suffix helper. +func TestHasSuffix(t *testing.T) { + if !hasSuffix("abc", "bc") { + t.Error("abc should have suffix bc") + } + if hasSuffix("ab", "abc") { + t.Error("short string cannot have a longer suffix") + } + if hasSuffix("abc", "ac") { + t.Error("abc should not have suffix ac") + } +} diff --git a/internal/mutation/annotations.go b/internal/lang/goanalyzer/mutation_annotate.go similarity index 66% rename from internal/mutation/annotations.go rename to internal/lang/goanalyzer/mutation_annotate.go index 6910cde..f3bd3c2 100644 --- a/internal/mutation/annotations.go +++ b/internal/lang/goanalyzer/mutation_annotate.go @@ -1,27 +1,38 @@ -package mutation +package goanalyzer import ( "go/ast" + "go/parser" "go/token" "strings" ) -// scanAnnotations returns the set of source lines where mutation generation -// should be suppressed based on mutator-disable-* comment annotations. -// -// Supported annotations: -// - // mutator-disable-next-line : skips mutations on the following line -// - // mutator-disable-func : skips mutations in the enclosing function -func scanAnnotations(fset *token.FileSet, f *ast.File) map[int]bool { +// annotationScannerImpl implements lang.AnnotationScanner for Go. +// The disable annotations are `// mutator-disable-next-line` (skips the +// following source line) and `// mutator-disable-func` (skips every line of +// the enclosing function, including its signature). Both forms are stripped +// of their comment markers before matching so either `//` or `/* ... */` is +// accepted. +type annotationScannerImpl struct{} + +// ScanAnnotations returns the set of source lines on which mutation +// generation should be suppressed for absPath. The returned map is keyed by +// 1-based line number; a `true` value means disabled. +func (annotationScannerImpl) ScanAnnotations(absPath string) (map[int]bool, error) { + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, absPath, nil, parser.ParseComments) + if err != nil { + return nil, err + } + disabled := make(map[int]bool) funcs := funcRanges(fset, f) - for _, cg := range f.Comments { for _, c := range cg.List { applyAnnotation(stripCommentMarkers(c.Text), fset.Position(c.Pos()).Line, funcs, disabled) } } - return disabled + return disabled, nil } func stripCommentMarkers(raw string) string { @@ -65,9 +76,7 @@ func markFuncDisabled(r funcRange, disabled map[int]bool) { } } -type funcRange struct { - start, end int -} +type funcRange struct{ start, end int } func funcRanges(fset *token.FileSet, f *ast.File) []funcRange { var ranges []funcRange diff --git a/internal/lang/goanalyzer/mutation_annotate_test.go b/internal/lang/goanalyzer/mutation_annotate_test.go new file mode 100644 index 0000000..17f25b4 --- /dev/null +++ b/internal/lang/goanalyzer/mutation_annotate_test.go @@ -0,0 +1,138 @@ +package goanalyzer + +import ( + "go/parser" + "go/token" + "os" + "path/filepath" + "testing" +) + +func TestScanAnnotations_DisableNextLine(t *testing.T) { + code := `package p + +func f() { + // mutator-disable-next-line + if true { + } +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "t.go") + if err := os.WriteFile(fp, []byte(code), 0644); err != nil { + t.Fatal(err) + } + disabled, err := annotationScannerImpl{}.ScanAnnotations(fp) + if err != nil { + t.Fatal(err) + } + if !disabled[5] { + t.Errorf("expected line 5 disabled, got %v", disabled) + } + if disabled[4] { + t.Error("comment line should not be disabled") + } + if disabled[6] { + t.Error("line 6 should not be disabled") + } +} + +func TestScanAnnotations_DisableFunc(t *testing.T) { + code := `package p + +// mutator-disable-func +func f() { + if true { + } + x := 1 + _ = x +} + +func g() { + if true { + } +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "t.go") + os.WriteFile(fp, []byte(code), 0644) + + disabled, err := annotationScannerImpl{}.ScanAnnotations(fp) + if err != nil { + t.Fatal(err) + } + + for i := 4; i <= 9; i++ { + if !disabled[i] { + t.Errorf("expected line %d disabled (inside f)", i) + } + } + if disabled[12] { + t.Error("g()'s line 12 should not be disabled") + } +} + +func TestScanAnnotations_NoAnnotations(t *testing.T) { + code := `package p + +func f() { + if true {} +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "t.go") + os.WriteFile(fp, []byte(code), 0644) + disabled, err := annotationScannerImpl{}.ScanAnnotations(fp) + if err != nil { + t.Fatal(err) + } + if len(disabled) != 0 { + t.Errorf("expected empty disabled map, got %v", disabled) + } +} + +func TestScanAnnotations_IrrelevantComment(t *testing.T) { + code := `package p + +// this is just a regular comment +func f() { + if true {} +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "t.go") + os.WriteFile(fp, []byte(code), 0644) + disabled, err := annotationScannerImpl{}.ScanAnnotations(fp) + if err != nil { + t.Fatal(err) + } + if len(disabled) != 0 { + t.Errorf("regular comments should not disable mutations, got %v", disabled) + } +} + +// TestFuncRanges_IncludesSignatureAndBody ensures funcRanges spans the +// whole FuncDecl (signature + body), since that's what mutator-disable-func +// should cover. +func TestFuncRanges_IncludesSignatureAndBody(t *testing.T) { + code := `package p +func f() { + if true {} +} +` + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, "t.go", code, parser.ParseComments) + if err != nil { + t.Fatal(err) + } + ranges := funcRanges(fset, f) + if len(ranges) != 1 { + t.Fatalf("expected 1 range, got %d", len(ranges)) + } + if ranges[0].start != 2 { + t.Errorf("start = %d, want 2", ranges[0].start) + } + if ranges[0].end < ranges[0].start { + t.Errorf("end=%d < start=%d", ranges[0].end, ranges[0].start) + } +} diff --git a/internal/mutation/apply.go b/internal/lang/goanalyzer/mutation_apply.go similarity index 70% rename from internal/mutation/apply.go rename to internal/lang/goanalyzer/mutation_apply.go index 08d95dd..b9c7da5 100644 --- a/internal/mutation/apply.go +++ b/internal/lang/goanalyzer/mutation_apply.go @@ -1,4 +1,4 @@ -package mutation +package goanalyzer import ( "bytes" @@ -7,30 +7,41 @@ import ( "go/printer" "go/token" "strings" + + "github.com/0xPolygon/diffguard/internal/lang" ) -// applyMutation re-parses the file and applies the specific mutation. -func applyMutation(absPath string, m *Mutant) []byte { +// mutantApplierImpl implements lang.MutantApplier for Go by re-parsing the +// original file, walking to the line of the mutation, and mutating the +// matching AST node. The caller gets the rendered source bytes back — the +// mutation orchestrator is responsible for writing them to a temp file and +// invoking `go test -overlay`. +type mutantApplierImpl struct{} + +// ApplyMutation returns mutated source bytes, or (nil, nil) if the mutation +// can't be applied (parse error, line/operator mismatch, etc.). Returning a +// nil-without-error is the signal the orchestrator expects for "skip this +// mutant" — matching the pre-split behavior. +func (mutantApplierImpl) ApplyMutation(absPath string, site lang.MutantSite) ([]byte, error) { fset := token.NewFileSet() f, err := parser.ParseFile(fset, absPath, nil, parser.ParseComments) if err != nil { - return nil + return nil, nil } var applied bool - if m.Operator == "statement_deletion" { - applied = applyStatementDeletion(fset, f, m) + if site.Operator == "statement_deletion" { + applied = applyStatementDeletion(fset, f, site) } else { - applied = applyMutationToAST(fset, f, m) + applied = applyMutationToAST(fset, f, site) } - if !applied { - return nil + return nil, nil } - return renderFile(fset, f) + return renderFile(fset, f), nil } -func applyMutationToAST(fset *token.FileSet, f *ast.File, m *Mutant) bool { +func applyMutationToAST(fset *token.FileSet, f *ast.File, m lang.MutantSite) bool { applied := false ast.Inspect(f, func(n ast.Node) bool { if applied || n == nil { @@ -45,9 +56,10 @@ func applyMutationToAST(fset *token.FileSet, f *ast.File, m *Mutant) bool { return applied } -// applyStatementDeletion needs the containing block to replace a statement, -// so it walks BlockStmts instead of the flat ast.Inspect used for other ops. -func applyStatementDeletion(fset *token.FileSet, f *ast.File, m *Mutant) bool { +// applyStatementDeletion walks BlockStmts instead of the flat ast.Inspect +// used for other operators because it needs the containing block to replace +// a statement. +func applyStatementDeletion(fset *token.FileSet, f *ast.File, m lang.MutantSite) bool { applied := false ast.Inspect(f, func(n ast.Node) bool { if applied { @@ -66,7 +78,7 @@ func applyStatementDeletion(fset *token.FileSet, f *ast.File, m *Mutant) bool { return applied } -func tryDeleteInBlock(fset *token.FileSet, block *ast.BlockStmt, m *Mutant) bool { +func tryDeleteInBlock(fset *token.FileSet, block *ast.BlockStmt, m lang.MutantSite) bool { for i, stmt := range block.List { if fset.Position(stmt.Pos()).Line != m.Line { continue @@ -80,7 +92,7 @@ func tryDeleteInBlock(fset *token.FileSet, block *ast.BlockStmt, m *Mutant) bool return false } -func tryApplyMutation(n ast.Node, m *Mutant) bool { +func tryApplyMutation(n ast.Node, m lang.MutantSite) bool { switch m.Operator { case "conditional_boundary", "negate_conditional", "math_operator": return applyBinaryMutation(n, m) @@ -96,7 +108,7 @@ func tryApplyMutation(n ast.Node, m *Mutant) bool { return false } -func applyBinaryMutation(n ast.Node, m *Mutant) bool { +func applyBinaryMutation(n ast.Node, m lang.MutantSite) bool { expr, ok := n.(*ast.BinaryExpr) if !ok { return false @@ -114,7 +126,7 @@ func applyBinaryMutation(n ast.Node, m *Mutant) bool { return true } -func applyBoolMutation(n ast.Node, m *Mutant) bool { +func applyBoolMutation(n ast.Node, m lang.MutantSite) bool { ident, ok := n.(*ast.Ident) if !ok || (ident.Name != "true" && ident.Name != "false") { return false @@ -163,14 +175,13 @@ func applyBranchRemoval(n ast.Node) bool { return true } -// parseMutationOp parses a mutant description of the form "X -> Y" into -// the (from, to) operator pair. Either token is ILLEGAL if parsing fails. +// parseMutationOp parses a mutant description of the form "X -> Y" into the +// (from, to) operator pair. Either token is ILLEGAL if parsing fails. func parseMutationOp(desc string) (from, to token.Token) { parts := strings.Split(desc, " -> ") if len(parts) != 2 { return token.ILLEGAL, token.ILLEGAL } - opMap := map[string]token.Token{ ">": token.GTR, ">=": token.GEQ, "<": token.LSS, "<=": token.LEQ, @@ -178,7 +189,6 @@ func parseMutationOp(desc string) (from, to token.Token) { "+": token.ADD, "-": token.SUB, "*": token.MUL, "/": token.QUO, } - fromOp, okFrom := opMap[parts[0]] toOp, okTo := opMap[parts[1]] if !okFrom || !okTo { diff --git a/internal/lang/goanalyzer/mutation_apply_test.go b/internal/lang/goanalyzer/mutation_apply_test.go new file mode 100644 index 0000000..2181908 --- /dev/null +++ b/internal/lang/goanalyzer/mutation_apply_test.go @@ -0,0 +1,345 @@ +package goanalyzer + +import ( + "go/ast" + "go/parser" + "go/token" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +func TestApplyBinaryMutation_Success(t *testing.T) { + expr := &ast.BinaryExpr{Op: token.GTR} + site := lang.MutantSite{Description: "> -> >=", Operator: "conditional_boundary"} + if !applyBinaryMutation(expr, site) { + t.Error("expected successful apply") + } + if expr.Op != token.GEQ { + t.Errorf("op = %v, want GEQ", expr.Op) + } +} + +func TestApplyBinaryMutation_WrongNodeType(t *testing.T) { + ident := &ast.Ident{Name: "x"} + site := lang.MutantSite{Description: "> -> >=", Operator: "conditional_boundary"} + if applyBinaryMutation(ident, site) { + t.Error("expected false for non-BinaryExpr") + } +} + +func TestApplyBinaryMutation_IllegalOp(t *testing.T) { + expr := &ast.BinaryExpr{Op: token.GTR} + site := lang.MutantSite{Description: "invalid", Operator: "conditional_boundary"} + if applyBinaryMutation(expr, site) { + t.Error("expected false for invalid description") + } +} + +// TestApplyBinaryMutation_OperatorMismatch locks in the fix for a bug where +// applyBinaryMutation rewrote the first BinaryExpr found on a line even +// when its operator differed from the mutant's intended `from` op. +func TestApplyBinaryMutation_OperatorMismatch(t *testing.T) { + expr := &ast.BinaryExpr{Op: token.LAND} + site := lang.MutantSite{Description: "!= -> ==", Operator: "negate_conditional"} + if applyBinaryMutation(expr, site) { + t.Error("expected false when expr.Op (&&) does not match mutant's from-op (!=)") + } + if expr.Op != token.LAND { + t.Errorf("expr.Op = %v, want LAND", expr.Op) + } +} + +func TestApplyBinaryMutation_MathOperatorMismatch(t *testing.T) { + expr := &ast.BinaryExpr{Op: token.SUB} + site := lang.MutantSite{Description: "+ -> -", Operator: "math_operator"} + if applyBinaryMutation(expr, site) { + t.Error("expected false when expr.Op (-) does not match from-op (+)") + } +} + +func TestApplyBoolMutation_TrueToFalse(t *testing.T) { + ident := &ast.Ident{Name: "true"} + site := lang.MutantSite{Description: "true -> false", Operator: "boolean_substitution"} + if !applyBoolMutation(ident, site) { + t.Error("expected successful apply") + } + if ident.Name != "false" { + t.Errorf("name = %q, want false", ident.Name) + } +} + +func TestApplyBoolMutation_FalseToTrue(t *testing.T) { + ident := &ast.Ident{Name: "false"} + site := lang.MutantSite{Description: "false -> true", Operator: "boolean_substitution"} + if !applyBoolMutation(ident, site) { + t.Error("expected successful apply") + } + if ident.Name != "true" { + t.Errorf("name = %q, want true", ident.Name) + } +} + +func TestApplyBoolMutation_WrongNodeType(t *testing.T) { + expr := &ast.BinaryExpr{Op: token.ADD} + site := lang.MutantSite{Description: "true -> false", Operator: "boolean_substitution"} + if applyBoolMutation(expr, site) { + t.Error("expected false for non-Ident") + } +} + +func TestApplyReturnMutation_Success(t *testing.T) { + ret := &ast.ReturnStmt{Results: []ast.Expr{&ast.Ident{Name: "x", NamePos: 1}}} + if !applyReturnMutation(ret) { + t.Error("expected successful apply") + } + if ident, ok := ret.Results[0].(*ast.Ident); !ok || ident.Name != "nil" { + t.Error("expected result replaced with nil") + } +} + +func TestApplyReturnMutation_WrongNodeType(t *testing.T) { + ident := &ast.Ident{Name: "x"} + if applyReturnMutation(ident) { + t.Error("expected false for non-ReturnStmt") + } +} + +func TestApplyIncDecMutation_Inc(t *testing.T) { + stmt := &ast.IncDecStmt{Tok: token.INC} + if !applyIncDecMutation(stmt) { + t.Error("expected successful apply") + } + if stmt.Tok != token.DEC { + t.Errorf("tok = %v, want DEC", stmt.Tok) + } +} + +func TestApplyIncDecMutation_Dec(t *testing.T) { + stmt := &ast.IncDecStmt{Tok: token.DEC} + if !applyIncDecMutation(stmt) { + t.Error("expected successful apply") + } + if stmt.Tok != token.INC { + t.Errorf("tok = %v, want INC", stmt.Tok) + } +} + +func TestApplyIncDecMutation_WrongNodeType(t *testing.T) { + if applyIncDecMutation(&ast.Ident{Name: "x"}) { + t.Error("expected false for non-IncDecStmt") + } +} + +func TestApplyBranchRemoval(t *testing.T) { + body := &ast.BlockStmt{List: []ast.Stmt{&ast.ExprStmt{X: &ast.Ident{Name: "x"}}}} + ifStmt := &ast.IfStmt{Cond: &ast.Ident{Name: "cond"}, Body: body} + if !applyBranchRemoval(ifStmt) { + t.Error("expected successful apply") + } + if len(ifStmt.Body.List) != 0 { + t.Errorf("expected body emptied, got %d stmts", len(ifStmt.Body.List)) + } +} + +func TestApplyBranchRemoval_WrongType(t *testing.T) { + if applyBranchRemoval(&ast.Ident{Name: "x"}) { + t.Error("expected false for non-IfStmt") + } +} + +func TestTryApplyMutation_Binary(t *testing.T) { + expr := &ast.BinaryExpr{Op: token.ADD} + site := lang.MutantSite{Description: "+ -> -", Operator: "math_operator"} + if !tryApplyMutation(expr, site) { + t.Error("expected successful apply") + } + if expr.Op != token.SUB { + t.Errorf("op = %v, want SUB", expr.Op) + } +} + +func TestTryApplyMutation_Bool(t *testing.T) { + ident := &ast.Ident{Name: "true"} + site := lang.MutantSite{Description: "true -> false", Operator: "boolean_substitution"} + if !tryApplyMutation(ident, site) { + t.Error("expected successful apply") + } +} + +func TestTryApplyMutation_Return(t *testing.T) { + ret := &ast.ReturnStmt{Results: []ast.Expr{&ast.Ident{Name: "x", NamePos: 1}}} + site := lang.MutantSite{Operator: "return_value"} + if !tryApplyMutation(ret, site) { + t.Error("expected successful apply") + } +} + +func TestTryApplyMutation_Unknown(t *testing.T) { + ident := &ast.Ident{Name: "x"} + site := lang.MutantSite{Operator: "unknown_operator"} + if tryApplyMutation(ident, site) { + t.Error("expected false for unknown operator") + } +} + +func TestApplyMutationToAST(t *testing.T) { + code := `package test + +func f() bool { + return true +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "test.go") + os.WriteFile(fp, []byte(code), 0644) + + fset := token.NewFileSet() + f, _ := parser.ParseFile(fset, fp, nil, parser.ParseComments) + + site := lang.MutantSite{Line: 4, Description: "true -> false", Operator: "boolean_substitution"} + if !applyMutationToAST(fset, f, site) { + t.Error("expected mutation to be applied") + } +} + +func TestApplyMutationToAST_NoMatch(t *testing.T) { + code := `package test + +func f() int { + return 42 +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "test.go") + os.WriteFile(fp, []byte(code), 0644) + + fset := token.NewFileSet() + f, _ := parser.ParseFile(fset, fp, nil, parser.ParseComments) + + site := lang.MutantSite{Line: 999, Description: "true -> false", Operator: "boolean_substitution"} + if applyMutationToAST(fset, f, site) { + t.Error("expected no mutation applied") + } +} + +func TestApplyMutation_Full(t *testing.T) { + code := `package test + +func f(a, b int) bool { + return a > b +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "test.go") + os.WriteFile(fp, []byte(code), 0644) + + site := lang.MutantSite{File: "test.go", Line: 4, Description: "> -> >=", Operator: "conditional_boundary"} + result, _ := mutantApplierImpl{}.ApplyMutation(fp, site) + if result == nil { + t.Fatal("expected non-nil result") + } + if !strings.Contains(string(result), ">=") { + t.Error("expected mutated code to contain >=") + } +} + +func TestApplyMutation_ParseError(t *testing.T) { + site := lang.MutantSite{Line: 1, Operator: "boolean_substitution"} + result, _ := mutantApplierImpl{}.ApplyMutation("/nonexistent/file.go", site) + if result != nil { + t.Error("expected nil for parse error") + } +} + +func TestApplyMutation_NoMatch(t *testing.T) { + code := `package test + +func f() {} +` + dir := t.TempDir() + fp := filepath.Join(dir, "test.go") + os.WriteFile(fp, []byte(code), 0644) + + site := lang.MutantSite{Line: 999, Operator: "boolean_substitution", Description: "true -> false"} + result, _ := mutantApplierImpl{}.ApplyMutation(fp, site) + if result != nil { + t.Error("expected nil when mutation can't be applied") + } +} + +func TestApplyStatementDeletion(t *testing.T) { + code := `package test + +func f() { + doThing() + x := 1 + _ = x +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "test.go") + os.WriteFile(fp, []byte(code), 0644) + + site := lang.MutantSite{Line: 4, Operator: "statement_deletion"} + result, _ := mutantApplierImpl{}.ApplyMutation(fp, site) + if result == nil { + t.Fatal("expected non-nil result") + } + if strings.Contains(string(result), "doThing()") { + t.Errorf("expected doThing() removed, got:\n%s", string(result)) + } +} + +func TestRenderFile(t *testing.T) { + code := `package test + +func f() {} +` + fset := token.NewFileSet() + f, _ := parser.ParseFile(fset, "test.go", code, parser.ParseComments) + + result := renderFile(fset, f) + if result == nil { + t.Fatal("expected non-nil render result") + } + if !strings.Contains(string(result), "package test") { + t.Error("rendered file should contain package declaration") + } +} + +func TestZeroValueExpr(t *testing.T) { + original := &ast.Ident{Name: "x", NamePos: 42} + result := zeroValueExpr(original) + ident, ok := result.(*ast.Ident) + if !ok { + t.Fatal("expected *ast.Ident") + } + if ident.Name != "nil" { + t.Errorf("name = %q, want nil", ident.Name) + } +} + +func TestParseMutationOp(t *testing.T) { + tests := []struct { + desc string + wantFrom token.Token + wantTo token.Token + }{ + {"> -> >=", token.GTR, token.GEQ}, + {"== -> !=", token.EQL, token.NEQ}, + {"+ -> -", token.ADD, token.SUB}, + {"invalid", token.ILLEGAL, token.ILLEGAL}, + {"+ -> unknown", token.ILLEGAL, token.ILLEGAL}, + } + for _, tt := range tests { + gotFrom, gotTo := parseMutationOp(tt.desc) + if gotFrom != tt.wantFrom || gotTo != tt.wantTo { + t.Errorf("parseMutationOp(%q) = (%v, %v), want (%v, %v)", + tt.desc, gotFrom, gotTo, tt.wantFrom, tt.wantTo) + } + } +} diff --git a/internal/mutation/generate.go b/internal/lang/goanalyzer/mutation_generate.go similarity index 73% rename from internal/mutation/generate.go rename to internal/lang/goanalyzer/mutation_generate.go index ab345bf..8a4503b 100644 --- a/internal/mutation/generate.go +++ b/internal/lang/goanalyzer/mutation_generate.go @@ -1,4 +1,4 @@ -package mutation +package goanalyzer import ( "fmt" @@ -7,20 +7,26 @@ import ( "go/token" "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" ) -// generateMutants parses a file and creates mutants for changed regions. -// Lines disabled via mutator-disable-* annotations are skipped. -func generateMutants(absPath string, fc diff.FileChange) ([]Mutant, error) { +// mutantGeneratorImpl implements lang.MutantGenerator for Go. The generation +// strategy is unchanged from the pre-split internal/mutation/generate.go — +// the only difference is that mutants are now returned as []lang.MutantSite +// so the mutation orchestrator can stay language-agnostic. +type mutantGeneratorImpl struct{} + +// GenerateMutants re-parses the file (with comments so annotation scanning +// can share the same AST) and emits a MutantSite for each operator that +// applies on a changed, non-disabled line. +func (mutantGeneratorImpl) GenerateMutants(absPath string, fc diff.FileChange, disabled map[int]bool) ([]lang.MutantSite, error) { fset := token.NewFileSet() f, err := parser.ParseFile(fset, absPath, nil, parser.ParseComments) if err != nil { return nil, err } - disabled := scanAnnotations(fset, f) - var mutants []Mutant - + var mutants []lang.MutantSite ast.Inspect(f, func(n ast.Node) bool { if n == nil { return true @@ -32,11 +38,10 @@ func generateMutants(absPath string, fc diff.FileChange) ([]Mutant, error) { mutants = append(mutants, mutantsFor(fc.Path, line, n)...) return true }) - return mutants, nil } -func mutantsFor(file string, line int, n ast.Node) []Mutant { +func mutantsFor(file string, line int, n ast.Node) []lang.MutantSite { switch node := n.(type) { case *ast.BinaryExpr: return binaryMutants(file, line, node) @@ -54,8 +59,11 @@ func mutantsFor(file string, line int, n ast.Node) []Mutant { return nil } -// binaryMutants generates mutations for binary expressions. -func binaryMutants(file string, line int, expr *ast.BinaryExpr) []Mutant { +// binaryMutants covers the conditional_boundary / negate_conditional / +// math_operator operators. Each source operator maps to a single canonical +// replacement; a surviving mutant should never be ambiguous about what +// "the mutation" was. +func binaryMutants(file string, line int, expr *ast.BinaryExpr) []lang.MutantSite { replacements := map[token.Token][]token.Token{ token.GTR: {token.GEQ}, token.LSS: {token.LEQ}, @@ -74,31 +82,28 @@ func binaryMutants(file string, line int, expr *ast.BinaryExpr) []Mutant { return nil } - var mutants []Mutant + var mutants []lang.MutantSite for _, newOp := range targets { - mutants = append(mutants, Mutant{ + mutants = append(mutants, lang.MutantSite{ File: file, Line: line, Description: fmt.Sprintf("%s -> %s", expr.Op, newOp), Operator: operatorName(expr.Op, newOp), }) } - return mutants } // boolMutants generates true <-> false mutations. -func boolMutants(file string, line int, ident *ast.Ident) []Mutant { +func boolMutants(file string, line int, ident *ast.Ident) []lang.MutantSite { if ident.Name != "true" && ident.Name != "false" { return nil } - newVal := "true" if ident.Name == "true" { newVal = "false" } - - return []Mutant{{ + return []lang.MutantSite{{ File: file, Line: line, Description: fmt.Sprintf("%s -> %s", ident.Name, newVal), @@ -111,16 +116,15 @@ func boolMutants(file string, line int, ident *ast.Ident) []Mutant { // Returns whose every result is already the literal identifier `nil` are // skipped: the zero-value mutation rewrites each result to `nil`, producing // an identical AST and therefore an equivalent mutant that can never be -// killed. Including them only adds noise to the score. -func returnMutants(file string, line int, ret *ast.ReturnStmt) []Mutant { +// killed. +func returnMutants(file string, line int, ret *ast.ReturnStmt) []lang.MutantSite { if len(ret.Results) == 0 { return nil } if allLiteralNil(ret.Results) { return nil } - - return []Mutant{{ + return []lang.MutantSite{{ File: file, Line: line, Description: "replace return values with zero values", @@ -128,8 +132,6 @@ func returnMutants(file string, line int, ret *ast.ReturnStmt) []Mutant { }} } -// allLiteralNil reports whether every expression is the bare identifier -// `nil`. See returnMutants for why this suppresses mutant generation. func allLiteralNil(exprs []ast.Expr) bool { for _, e := range exprs { ident, ok := e.(*ast.Ident) @@ -141,7 +143,7 @@ func allLiteralNil(exprs []ast.Expr) bool { } // incdecMutants swaps ++ with -- and vice versa. -func incdecMutants(file string, line int, stmt *ast.IncDecStmt) []Mutant { +func incdecMutants(file string, line int, stmt *ast.IncDecStmt) []lang.MutantSite { var newTok token.Token switch stmt.Tok { case token.INC: @@ -151,7 +153,7 @@ func incdecMutants(file string, line int, stmt *ast.IncDecStmt) []Mutant { default: return nil } - return []Mutant{{ + return []lang.MutantSite{{ File: file, Line: line, Description: fmt.Sprintf("%s -> %s", stmt.Tok, newTok), @@ -160,11 +162,11 @@ func incdecMutants(file string, line int, stmt *ast.IncDecStmt) []Mutant { } // ifBodyMutants empties the body of an if statement. -func ifBodyMutants(file string, line int, stmt *ast.IfStmt) []Mutant { +func ifBodyMutants(file string, line int, stmt *ast.IfStmt) []lang.MutantSite { if stmt.Body == nil || len(stmt.Body.List) == 0 { return nil } - return []Mutant{{ + return []lang.MutantSite{{ File: file, Line: line, Description: "remove if body", @@ -173,11 +175,11 @@ func ifBodyMutants(file string, line int, stmt *ast.IfStmt) []Mutant { } // exprStmtMutants deletes a bare function-call statement (discards side effects). -func exprStmtMutants(file string, line int, stmt *ast.ExprStmt) []Mutant { +func exprStmtMutants(file string, line int, stmt *ast.ExprStmt) []lang.MutantSite { if _, ok := stmt.X.(*ast.CallExpr); !ok { return nil } - return []Mutant{{ + return []lang.MutantSite{{ File: file, Line: line, Description: "remove call statement", diff --git a/internal/lang/goanalyzer/mutation_generate_test.go b/internal/lang/goanalyzer/mutation_generate_test.go new file mode 100644 index 0000000..3ecd8cf --- /dev/null +++ b/internal/lang/goanalyzer/mutation_generate_test.go @@ -0,0 +1,313 @@ +package goanalyzer + +import ( + "go/ast" + "go/token" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/0xPolygon/diffguard/internal/diff" +) + +func TestBinaryMutants(t *testing.T) { + tests := []struct { + name string + op token.Token + expected int + }{ + {"greater than", token.GTR, 1}, + {"less than", token.LSS, 1}, + {"equal", token.EQL, 1}, + {"not equal", token.NEQ, 1}, + {"add", token.ADD, 1}, + {"subtract", token.SUB, 1}, + {"multiply", token.MUL, 1}, + {"divide", token.QUO, 1}, + {"and (no mutation)", token.LAND, 0}, + {"or (no mutation)", token.LOR, 0}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + expr := &ast.BinaryExpr{Op: tt.op} + mutants := binaryMutants("test.go", 1, expr) + if len(mutants) != tt.expected { + t.Errorf("binaryMutants(%v) = %d mutants, want %d", tt.op, len(mutants), tt.expected) + } + }) + } +} + +func TestBoolMutants(t *testing.T) { + tests := []struct { + name string + ident string + expected int + }{ + {"true", "true", 1}, + {"false", "false", 1}, + {"other", "x", 0}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ident := &ast.Ident{Name: tt.ident} + mutants := boolMutants("test.go", 1, ident) + if len(mutants) != tt.expected { + t.Errorf("boolMutants(%q) = %d, want %d", tt.ident, len(mutants), tt.expected) + } + }) + } +} + +func TestReturnMutants(t *testing.T) { + ret := &ast.ReturnStmt{Results: []ast.Expr{&ast.Ident{Name: "x"}}} + mutants := returnMutants("test.go", 1, ret) + if len(mutants) != 1 { + t.Errorf("returnMutants with values: got %d, want 1", len(mutants)) + } + + bareRet := &ast.ReturnStmt{} + mutants = returnMutants("test.go", 1, bareRet) + if len(mutants) != 0 { + t.Errorf("returnMutants bare: got %d, want 0", len(mutants)) + } +} + +func TestIncDecMutants(t *testing.T) { + incStmt := &ast.IncDecStmt{Tok: token.INC} + m := incdecMutants("a.go", 5, incStmt) + if len(m) != 1 { + t.Fatalf("expected 1 mutant for ++, got %d", len(m)) + } + if m[0].Operator != "incdec" { + t.Errorf("operator = %q, want incdec", m[0].Operator) + } + if !strings.Contains(m[0].Description, "--") { + t.Errorf("description = %q", m[0].Description) + } + + decStmt := &ast.IncDecStmt{Tok: token.DEC} + m = incdecMutants("a.go", 5, decStmt) + if len(m) != 1 { + t.Fatalf("expected 1 mutant for --, got %d", len(m)) + } + + other := &ast.IncDecStmt{Tok: token.ADD} + if ms := incdecMutants("a.go", 5, other); len(ms) != 0 { + t.Errorf("unexpected mutants for non-incdec tok: %+v", ms) + } +} + +func TestIfBodyMutants(t *testing.T) { + body := &ast.BlockStmt{List: []ast.Stmt{&ast.ExprStmt{X: &ast.Ident{Name: "x"}}}} + ifStmt := &ast.IfStmt{Cond: &ast.Ident{Name: "cond"}, Body: body} + m := ifBodyMutants("a.go", 5, ifStmt) + if len(m) != 1 { + t.Fatalf("expected 1 mutant, got %d", len(m)) + } + if m[0].Operator != "branch_removal" { + t.Errorf("operator = %q, want branch_removal", m[0].Operator) + } + + empty := &ast.IfStmt{Cond: &ast.Ident{Name: "cond"}, Body: &ast.BlockStmt{}} + if ms := ifBodyMutants("a.go", 5, empty); len(ms) != 0 { + t.Errorf("expected no mutants for empty if body, got %d", len(ms)) + } +} + +func TestExprStmtMutants_CallExpr(t *testing.T) { + call := &ast.ExprStmt{X: &ast.CallExpr{Fun: &ast.Ident{Name: "foo"}}} + m := exprStmtMutants("a.go", 5, call) + if len(m) != 1 { + t.Fatalf("expected 1 mutant, got %d", len(m)) + } + if m[0].Operator != "statement_deletion" { + t.Errorf("operator = %q", m[0].Operator) + } +} + +func TestExprStmtMutants_NonCall(t *testing.T) { + stmt := &ast.ExprStmt{X: &ast.Ident{Name: "x"}} + if ms := exprStmtMutants("a.go", 5, stmt); len(ms) != 0 { + t.Errorf("expected no mutants for non-call, got %d", len(ms)) + } +} + +func TestOperatorName(t *testing.T) { + tests := []struct { + from, to token.Token + expected string + }{ + {token.GTR, token.GEQ, "conditional_boundary"}, + {token.EQL, token.NEQ, "negate_conditional"}, + {token.ADD, token.SUB, "math_operator"}, + } + for _, tt := range tests { + got := operatorName(tt.from, tt.to) + if got != tt.expected { + t.Errorf("operatorName(%v, %v) = %q, want %q", tt.from, tt.to, got, tt.expected) + } + } +} + +func TestIsBoundary(t *testing.T) { + if !isBoundary(token.GTR) { + t.Error("GTR should be boundary") + } + if !isBoundary(token.GEQ) { + t.Error("GEQ should be boundary") + } + if isBoundary(token.EQL) { + t.Error("EQL should not be boundary") + } +} + +func TestIsComparison(t *testing.T) { + if !isComparison(token.EQL) { + t.Error("EQL should be comparison") + } + if isComparison(token.GTR) { + t.Error("GTR should not be comparison") + } +} + +func TestIsMath(t *testing.T) { + if !isMath(token.ADD) { + t.Error("ADD should be math") + } + if isMath(token.EQL) { + t.Error("EQL should not be math") + } +} + +func TestGenerateMutants_EndToEnd(t *testing.T) { + code := `package test + +func add(a, b int) int { + if a > b { + return a + b + } + return a - b +} +` + dir := t.TempDir() + filePath := filepath.Join(dir, "test.go") + if err := os.WriteFile(filePath, []byte(code), 0644); err != nil { + t.Fatal(err) + } + + fc := diff.FileChange{ + Path: "test.go", + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 8}}, + } + + mutants, err := mutantGeneratorImpl{}.GenerateMutants(filePath, fc, nil) + if err != nil { + t.Fatalf("GenerateMutants: %v", err) + } + if len(mutants) == 0 { + t.Error("expected mutants, got none") + } + + operators := make(map[string]int) + for _, m := range mutants { + operators[m.Operator]++ + } + + if operators["conditional_boundary"] == 0 { + t.Error("expected conditional_boundary mutants") + } + if operators["math_operator"] == 0 { + t.Error("expected math_operator mutants") + } +} + +func TestGenerateMutants_WithAllTypes(t *testing.T) { + code := `package test + +func f(a, b int) bool { + if a > b { + return true + } + x := a + b + _ = x + return false +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "test.go") + os.WriteFile(fp, []byte(code), 0644) + + fc := diff.FileChange{ + Path: "test.go", + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 20}}, + } + + mutants, err := mutantGeneratorImpl{}.GenerateMutants(fp, fc, nil) + if err != nil { + t.Fatalf("GenerateMutants: %v", err) + } + + operators := make(map[string]int) + for _, m := range mutants { + operators[m.Operator]++ + } + + for _, want := range []string{"conditional_boundary", "boolean_substitution", "math_operator", "return_value"} { + if operators[want] == 0 { + t.Errorf("missing %s mutants", want) + } + } +} + +func TestGenerateMutants_HonorsDisableNextLine(t *testing.T) { + code := `package test + +func f(x int) bool { + // mutator-disable-next-line + if x > 0 { + return true + } + if x < 0 { + return false + } + return false +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "test.go") + os.WriteFile(fp, []byte(code), 0644) + + fc := diff.FileChange{ + Path: "test.go", + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, + } + + disabled, err := annotationScannerImpl{}.ScanAnnotations(fp) + if err != nil { + t.Fatal(err) + } + mutants, err := mutantGeneratorImpl{}.GenerateMutants(fp, fc, disabled) + if err != nil { + t.Fatal(err) + } + + for _, m := range mutants { + if m.Line == 5 { + t.Errorf("expected no mutants on annotated line 5, got: %+v", m) + } + } + + foundAt8 := false + for _, m := range mutants { + if m.Line == 8 { + foundAt8 = true + } + } + if !foundAt8 { + t.Error("expected mutants on un-annotated line 8") + } +} diff --git a/internal/lang/goanalyzer/parse.go b/internal/lang/goanalyzer/parse.go new file mode 100644 index 0000000..e1e7c4d --- /dev/null +++ b/internal/lang/goanalyzer/parse.go @@ -0,0 +1,60 @@ +// Package goanalyzer implements the lang.Language interface for Go. It is +// blank-imported from cmd/diffguard/main.go so that Go gets registered at +// process start. +// +// One file per concern per the top-level design doc: +// - goanalyzer.go -- Language + init()/Register +// - parse.go -- shared AST helpers (funcName, parseFile) +// - complexity.go -- ComplexityCalculator + ComplexityScorer +// - sizes.go -- FunctionExtractor +// - deps.go -- ImportResolver +// - mutation_generate.go-- MutantGenerator +// - mutation_apply.go -- MutantApplier +// - mutation_annotate.go-- AnnotationScanner +// - testrunner.go -- TestRunner (wraps go test -overlay) +package goanalyzer + +import ( + "fmt" + "go/ast" + "go/parser" + "go/token" +) + +// funcName returns the canonical identifier for a function or method: +// +// func Foo() -> "Foo" +// func (t T) Bar() -> "(T).Bar" +// func (t *T) Baz() -> "(T).Baz" +// +// This was duplicated in complexity.go, sizes.go, and churn.go before the +// language split; it now lives here as the single shared implementation. +func funcName(fn *ast.FuncDecl) string { + if fn.Recv != nil && len(fn.Recv.List) > 0 { + recv := fn.Recv.List[0] + var typeName string + switch t := recv.Type.(type) { + case *ast.StarExpr: + if ident, ok := t.X.(*ast.Ident); ok { + typeName = ident.Name + } + case *ast.Ident: + typeName = t.Name + } + return fmt.Sprintf("(%s).%s", typeName, fn.Name.Name) + } + return fn.Name.Name +} + +// parseFile parses absPath with the given mode. Returning (nil, nil, err) on +// parse failure keeps callers uniform: the existing Go analyzers treated a +// parse error as "skip this file" rather than propagating it up, and we +// preserve that behavior behind the interface. +func parseFile(absPath string, mode parser.Mode) (*token.FileSet, *ast.File, error) { + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, absPath, nil, mode) + if err != nil { + return nil, nil, err + } + return fset, f, nil +} diff --git a/internal/lang/goanalyzer/sizes.go b/internal/lang/goanalyzer/sizes.go new file mode 100644 index 0000000..5c58ff0 --- /dev/null +++ b/internal/lang/goanalyzer/sizes.go @@ -0,0 +1,51 @@ +package goanalyzer + +import ( + "go/ast" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" +) + +// sizesImpl implements lang.FunctionExtractor for Go by parsing the file +// and reporting function line ranges plus the overall file line count. +type sizesImpl struct{} + +// ExtractFunctions parses absPath and returns (functions-in-changed-regions, +// file size, error). Parse errors return (nil, nil, nil) to match the +// pre-refactor behavior where parse failure silently skipped the file. +func (sizesImpl) ExtractFunctions(absPath string, fc diff.FileChange) ([]lang.FunctionSize, *lang.FileSize, error) { + fset, f, err := parseFile(absPath, 0) + if err != nil { + return nil, nil, nil + } + + var fileSize *lang.FileSize + if file := fset.File(f.Pos()); file != nil { + fileSize = &lang.FileSize{Path: fc.Path, Lines: file.LineCount()} + } + + var results []lang.FunctionSize + ast.Inspect(f, func(n ast.Node) bool { + fn, ok := n.(*ast.FuncDecl) + if !ok { + return true + } + startLine := fset.Position(fn.Pos()).Line + endLine := fset.Position(fn.End()).Line + if !fc.OverlapsRange(startLine, endLine) { + return false + } + results = append(results, lang.FunctionSize{ + FunctionInfo: lang.FunctionInfo{ + File: fc.Path, + Line: startLine, + EndLine: endLine, + Name: funcName(fn), + }, + Lines: endLine - startLine + 1, + }) + return false + }) + return results, fileSize, nil +} diff --git a/internal/lang/goanalyzer/testrunner.go b/internal/lang/goanalyzer/testrunner.go new file mode 100644 index 0000000..090714b --- /dev/null +++ b/internal/lang/goanalyzer/testrunner.go @@ -0,0 +1,76 @@ +package goanalyzer + +import ( + "bytes" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +// testRunnerImpl implements lang.TestRunner for Go using `go test -overlay`. +// The overlay mechanism lets mutants run fully in parallel — the build +// system picks up the mutant file without touching the real source — so +// this runner is stateless and safe to call concurrently. +type testRunnerImpl struct{} + +// RunTest writes a build-time overlay that redirects cfg.OriginalFile to +// cfg.MutantFile and invokes `go test` from the directory of the original +// file. A non-nil error from `go test` means at least one test failed — +// the mutant was killed. +// +// The returned (killed, output, err) triple matches the lang.TestRunner +// contract: err is the only error return for "the runner itself could not +// run" (e.g. couldn't write the overlay file); a normal test failure is +// reported via killed=true with the test output in `output`. +func (testRunnerImpl) RunTest(cfg lang.TestRunConfig) (bool, string, error) { + overlayPath := filepath.Join(cfg.WorkDir, fmt.Sprintf("m%d-overlay.json", cfg.Index)) + if err := writeOverlayJSON(overlayPath, cfg.OriginalFile, cfg.MutantFile); err != nil { + return false, "", err + } + + pkgDir := filepath.Dir(cfg.OriginalFile) + cmd := exec.Command("go", buildTestArgs(cfg, overlayPath)...) + cmd.Dir = pkgDir + var stderr bytes.Buffer + cmd.Stderr = &stderr + err := cmd.Run() + + if err != nil { + return true, stderr.String(), nil + } + return false, "", nil +} + +// writeOverlayJSON writes a go build overlay file mapping originalPath to +// mutantPath. See `go help build` -overlay flag for format details. +func writeOverlayJSON(path, originalPath, mutantPath string) error { + overlay := struct { + Replace map[string]string `json:"Replace"` + }{ + Replace: map[string]string{originalPath: mutantPath}, + } + data, err := json.Marshal(overlay) + if err != nil { + return err + } + return os.WriteFile(path, data, 0644) +} + +// buildTestArgs constructs the `go test` argv. The overlay argument is +// always present; -run is only added if the caller set TestPattern. +func buildTestArgs(cfg lang.TestRunConfig, overlayPath string) []string { + timeout := cfg.Timeout + if timeout <= 0 { + timeout = defaultGoTestTimeout + } + args := []string{"test", "-overlay=" + overlayPath, "-count=1", "-timeout", timeout.String()} + if cfg.TestPattern != "" { + args = append(args, "-run", cfg.TestPattern) + } + args = append(args, "./...") + return args +} diff --git a/internal/lang/goanalyzer/testrunner_test.go b/internal/lang/goanalyzer/testrunner_test.go new file mode 100644 index 0000000..b3d2d15 --- /dev/null +++ b/internal/lang/goanalyzer/testrunner_test.go @@ -0,0 +1,183 @@ +package goanalyzer + +import ( + "os" + "os/exec" + "path/filepath" + "testing" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +func TestWriteOverlayJSON(t *testing.T) { + dir := t.TempDir() + overlayPath := filepath.Join(dir, "overlay.json") + if err := writeOverlayJSON(overlayPath, "/orig/foo.go", "/tmp/mutated.go"); err != nil { + t.Fatalf("writeOverlayJSON error: %v", err) + } + data, err := os.ReadFile(overlayPath) + if err != nil { + t.Fatal(err) + } + // Must be the exact shape go test -overlay expects: + // {"Replace":{"":""}} + expected := `{"Replace":{"/orig/foo.go":"/tmp/mutated.go"}}` + if string(data) != expected { + t.Errorf("overlay JSON = %q, want %q", string(data), expected) + } +} + +func TestBuildTestArgs_Default(t *testing.T) { + args := buildTestArgs(lang.TestRunConfig{}, "/tmp/overlay.json") + if args[0] != "test" { + t.Errorf("args[0] = %q, want test", args[0]) + } + foundOverlay := false + for _, a := range args { + if a == "-overlay=/tmp/overlay.json" { + foundOverlay = true + } + } + if !foundOverlay { + t.Errorf("expected -overlay in args, got %v", args) + } + for _, a := range args { + if a == "-run" { + t.Error("did not expect -run in default args") + } + } +} + +func TestBuildTestArgs_WithPattern(t *testing.T) { + args := buildTestArgs(lang.TestRunConfig{TestPattern: "TestFoo"}, "/tmp/overlay.json") + found := false + for i, a := range args { + if a == "-run" && i+1 < len(args) && args[i+1] == "TestFoo" { + found = true + } + } + if !found { + t.Errorf("expected -run TestFoo in args, got %v", args) + } +} + +func TestBuildTestArgs_TimeoutPassed(t *testing.T) { + args := buildTestArgs(lang.TestRunConfig{}, "/tmp/overlay.json") + // Default timeout (30s) should be formatted as "30s" + found := false + for i, a := range args { + if a == "-timeout" && i+1 < len(args) && args[i+1] == "30s" { + found = true + } + } + if !found { + t.Errorf("expected -timeout 30s in args, got %v", args) + } +} + +// TestRunTest_OverlayWriteFailsReturnsError forces the overlay-write +// failure path by pointing WorkDir at a non-existent nested directory. +func TestRunTest_OverlayWriteFailsReturnsError(t *testing.T) { + // WorkDir that doesn't exist: writeOverlayJSON will fail on Create. + cfg := lang.TestRunConfig{ + WorkDir: filepath.Join(t.TempDir(), "missing", "dir"), + OriginalFile: "/tmp/orig.go", + MutantFile: "/tmp/mut.go", + Index: 0, + } + killed, out, err := testRunnerImpl{}.RunTest(cfg) + if err == nil { + t.Fatal("expected an error when overlay directory is missing") + } + if killed { + t.Error("killed should be false on setup error") + } + if out != "" { + t.Errorf("output should be empty on setup error, got %q", out) + } +} + +// TestRunTest_KillsMutantWhenTestFails end-to-end-verifies the kill path +// by creating a tiny Go module whose test fails after an overlay swaps in +// a bad file. The runner must return killed=true and a non-empty output. +func TestRunTest_KillsMutantWhenTestFails(t *testing.T) { + if _, err := exec.LookPath("go"); err != nil { + t.Skip("go binary not on PATH") + } + modDir := t.TempDir() + writeFile := func(name, content string) { + t.Helper() + if err := os.WriteFile(filepath.Join(modDir, name), []byte(content), 0644); err != nil { + t.Fatal(err) + } + } + writeFile("go.mod", "module example.com/mut\n\ngo 1.21\n") + writeFile("m.go", "package mut\n\nfunc Add(a, b int) int { return a + b }\n") + writeFile("m_test.go", "package mut\n\nimport \"testing\"\n\nfunc TestAdd(t *testing.T) { if Add(1, 2) != 3 { t.Fail() } }\n") + + mutant := `package mut + +func Add(a, b int) int { return a - b } +` + mutantPath := filepath.Join(t.TempDir(), "m.go") + if err := os.WriteFile(mutantPath, []byte(mutant), 0644); err != nil { + t.Fatal(err) + } + + work := t.TempDir() + cfg := lang.TestRunConfig{ + WorkDir: work, + OriginalFile: filepath.Join(modDir, "m.go"), + MutantFile: mutantPath, + Index: 1, + } + killed, _, err := testRunnerImpl{}.RunTest(cfg) + if err != nil { + t.Fatalf("RunTest: %v", err) + } + if !killed { + t.Error("expected killed=true when tests fail") + } +} + +// TestRunTest_LivesWhenTestsPass covers the survive (!killed) path. +func TestRunTest_LivesWhenTestsPass(t *testing.T) { + if _, err := exec.LookPath("go"); err != nil { + t.Skip("go binary not on PATH") + } + modDir := t.TempDir() + writeFile := func(name, content string) { + t.Helper() + if err := os.WriteFile(filepath.Join(modDir, name), []byte(content), 0644); err != nil { + t.Fatal(err) + } + } + writeFile("go.mod", "module example.com/mut\n\ngo 1.21\n") + writeFile("m.go", "package mut\n\nfunc Add(a, b int) int { return a + b }\n") + writeFile("m_test.go", "package mut\n\nimport \"testing\"\n\nfunc TestAdd(t *testing.T) { if Add(1, 2) != 3 { t.Fail() } }\n") + + // Mutant is semantically equivalent so tests still pass. + mutant := "package mut\n\nfunc Add(a, b int) int { return b + a }\n" + mutantPath := filepath.Join(t.TempDir(), "m.go") + if err := os.WriteFile(mutantPath, []byte(mutant), 0644); err != nil { + t.Fatal(err) + } + + work := t.TempDir() + cfg := lang.TestRunConfig{ + WorkDir: work, + OriginalFile: filepath.Join(modDir, "m.go"), + MutantFile: mutantPath, + Index: 2, + } + killed, out, err := testRunnerImpl{}.RunTest(cfg) + if err != nil { + t.Fatalf("RunTest: %v", err) + } + if killed { + t.Error("expected killed=false when mutant is equivalent") + } + if out != "" { + t.Errorf("expected empty output on survive, got %q", out) + } +} diff --git a/internal/lang/lang.go b/internal/lang/lang.go new file mode 100644 index 0000000..79e0661 --- /dev/null +++ b/internal/lang/lang.go @@ -0,0 +1,207 @@ +// Package lang defines the per-language analyzer interfaces that diffguard +// plugs into. A language implementation registers itself via Register() from +// an init() function; the diffguard CLI blank-imports each language package it +// supports so the registration happens at process start. +// +// The types and interfaces declared here are the single source of truth for +// the data passed between the diff parser, the analyzers, and the language +// back-ends. Keeping them in one package avoids import cycles (analyzer +// packages import `lang`; language packages import `lang`; neither imports +// the other). +package lang + +import ( + "time" + + "github.com/0xPolygon/diffguard/internal/diff" +) + +// FileFilter controls which files the diff parser includes and which it +// classifies as test files. A language exposes its filter as a plain value +// struct so callers can read the fields directly — the diff parser uses +// Extensions/IsTestFile/DiffGlobs during path walks. +type FileFilter struct { + // Extensions is the list of source file extensions (including the leading + // dot) that belong to this language, e.g. [".go"] or [".ts", ".tsx"]. + Extensions []string + // IsTestFile reports whether the given path is a test file that should be + // excluded from analysis. + IsTestFile func(path string) bool + // DiffGlobs is the list of globs passed to `git diff -- ` to scope + // the diff output to this language's files. + DiffGlobs []string +} + +// MatchesExtension reports whether path has one of the filter's source +// extensions. It does not apply the IsTestFile check. +func (f FileFilter) MatchesExtension(path string) bool { + for _, ext := range f.Extensions { + if hasSuffix(path, ext) { + return true + } + } + return false +} + +// IncludesSource reports whether path is an analyzable source file: the +// extension matches and the file is not a test file. +func (f FileFilter) IncludesSource(path string) bool { + if !f.MatchesExtension(path) { + return false + } + if f.IsTestFile != nil && f.IsTestFile(path) { + return false + } + return true +} + +// hasSuffix is a tiny helper used to avoid pulling in strings just for this +// single call — FileFilter is referenced on hot paths (every file walked) so +// keeping the dependency list short is worthwhile. +func hasSuffix(s, suffix string) bool { + if len(s) < len(suffix) { + return false + } + return s[len(s)-len(suffix):] == suffix +} + +// FunctionInfo identifies a function in a source file. It's embedded by the +// richer FunctionSize and FunctionComplexity types so analyzers can share one +// identity struct. +type FunctionInfo struct { + File string + Line int + EndLine int + Name string +} + +// FunctionSize holds size info for a single function. +type FunctionSize struct { + FunctionInfo + Lines int +} + +// FileSize holds size info for a single file. +type FileSize struct { + Path string + Lines int +} + +// FunctionComplexity holds a complexity score for a single function. It's +// used by both the complexity analyzer and the churn analyzer (via the +// ComplexityScorer interface, which may reuse the ComplexityCalculator's +// implementation or provide a lighter approximation). +type FunctionComplexity struct { + FunctionInfo + Complexity int +} + +// MutantSite describes a single potential mutation within changed code. +type MutantSite struct { + File string + Line int + Description string + Operator string +} + +// TestRunConfig carries the parameters needed to run tests against a single +// mutant. The set of fields is deliberately broad so temp-copy runners +// (which need WorkDir and Index to write a scratch copy) and overlay-based +// runners (which only need the MutantFile, OriginalFile, and RepoPath) can +// share one shape. +type TestRunConfig struct { + // RepoPath is the absolute path to the repository root. + RepoPath string + // MutantFile is the absolute path to the file containing the mutated + // source (usually a temp file). For languages that run tests directly on + // the original tree this may be the path to the original file after the + // mutation has been written to it. + MutantFile string + // OriginalFile is the absolute path to the original (unmutated) source + // file. Temp-copy runners use this to restore the original after running + // the tests. + OriginalFile string + // Timeout caps the test run's wall-clock duration. + Timeout time.Duration + // TestPattern, if non-empty, is passed to the runner's test filter flag + // (e.g. `go test -run `). + TestPattern string + // WorkDir is a writable directory private to this run, available for + // overlay files, backups, etc. + WorkDir string + // Index is a monotonically-increasing identifier for the mutant within + // the current run. Useful for naming per-mutant temp files without + // collision. + Index int +} + +// ComplexityCalculator computes cognitive complexity per function for a +// single file's changed regions. +type ComplexityCalculator interface { + AnalyzeFile(absPath string, fc diff.FileChange) ([]FunctionComplexity, error) +} + +// ComplexityScorer is a lightweight complexity score for churn weighting. It +// may share its implementation with ComplexityCalculator or be a faster, +// coarser approximation — the churn analyzer only needs a number, not a +// categorized score. +type ComplexityScorer interface { + ScoreFile(absPath string, fc diff.FileChange) ([]FunctionComplexity, error) +} + +// FunctionExtractor parses a single file and reports its function sizes plus +// the overall file size. +type FunctionExtractor interface { + ExtractFunctions(absPath string, fc diff.FileChange) ([]FunctionSize, *FileSize, error) +} + +// ImportResolver drives the deps analyzer. DetectModulePath returns the +// project-level identifier used to classify internal vs. external imports; +// ScanPackageImports returns a per-package adjacency list keyed by the +// importing package's directory-level identifier. +type ImportResolver interface { + DetectModulePath(repoPath string) (string, error) + ScanPackageImports(repoPath, pkgDir, modulePath string) map[string]map[string]bool +} + +// MutantGenerator returns the mutation sites produced for a single file's +// changed regions, after disabled lines have been filtered out. +type MutantGenerator interface { + GenerateMutants(absPath string, fc diff.FileChange, disabledLines map[int]bool) ([]MutantSite, error) +} + +// MutantApplier produces the mutated source bytes for a given mutation site. +// Returning nil signals "skip this mutant" — callers should not treat a nil +// return as an error. +type MutantApplier interface { + ApplyMutation(absPath string, site MutantSite) ([]byte, error) +} + +// AnnotationScanner returns the set of source lines on which mutation +// generation should be suppressed, based on in-source annotations. +type AnnotationScanner interface { + ScanAnnotations(absPath string) (map[int]bool, error) +} + +// TestRunner executes the language's test suite against a mutated source +// tree and reports whether any test failed (the mutant was "killed"). +type TestRunner interface { + RunTest(cfg TestRunConfig) (killed bool, output string, err error) +} + +// Language is the top-level per-language interface. Every language +// implementation exposes its sub-components through this one type so the +// orchestrator can iterate `for _, l := range lang.All()` and read out any +// capability it needs. +type Language interface { + Name() string + FileFilter() FileFilter + ComplexityCalculator() ComplexityCalculator + FunctionExtractor() FunctionExtractor + ImportResolver() ImportResolver + ComplexityScorer() ComplexityScorer + MutantGenerator() MutantGenerator + MutantApplier() MutantApplier + AnnotationScanner() AnnotationScanner + TestRunner() TestRunner +} diff --git a/internal/lang/lang_test.go b/internal/lang/lang_test.go new file mode 100644 index 0000000..85ed81c --- /dev/null +++ b/internal/lang/lang_test.go @@ -0,0 +1,66 @@ +package lang + +import "testing" + +func TestFileFilter_MatchesExtension(t *testing.T) { + f := FileFilter{Extensions: []string{".go"}} + tests := []struct { + path string + want bool + }{ + {"foo.go", true}, + {"path/to/foo.go", true}, + {"foo_test.go", true}, + {"foo.txt", false}, + {"", false}, + } + for _, tt := range tests { + if got := f.MatchesExtension(tt.path); got != tt.want { + t.Errorf("MatchesExtension(%q) = %v, want %v", tt.path, got, tt.want) + } + } +} + +func TestFileFilter_IncludesSource(t *testing.T) { + f := FileFilter{ + Extensions: []string{".go"}, + IsTestFile: func(p string) bool { + return len(p) >= len("_test.go") && p[len(p)-len("_test.go"):] == "_test.go" + }, + } + tests := []struct { + path string + want bool + }{ + {"foo.go", true}, + {"foo_test.go", false}, + {"foo.txt", false}, + } + for _, tt := range tests { + if got := f.IncludesSource(tt.path); got != tt.want { + t.Errorf("IncludesSource(%q) = %v, want %v", tt.path, got, tt.want) + } + } +} + +func TestFileFilter_MultipleExtensions(t *testing.T) { + f := FileFilter{Extensions: []string{".ts", ".tsx"}} + if !f.MatchesExtension("foo.ts") { + t.Error("want .ts to match") + } + if !f.MatchesExtension("foo.tsx") { + t.Error("want .tsx to match") + } + if f.MatchesExtension("foo.js") { + t.Error("want .js not to match") + } +} + +func TestFileFilter_NilIsTestFile(t *testing.T) { + // IncludesSource with nil IsTestFile must not panic and should treat + // everything with a matching extension as non-test. + f := FileFilter{Extensions: []string{".go"}} + if !f.IncludesSource("foo_test.go") { + t.Error("with nil IsTestFile, everything with matching ext should be included") + } +} diff --git a/internal/lang/registry.go b/internal/lang/registry.go new file mode 100644 index 0000000..e56b842 --- /dev/null +++ b/internal/lang/registry.go @@ -0,0 +1,79 @@ +package lang + +import ( + "fmt" + "sort" + "sync" +) + +// registry stores the set of languages that have self-registered via init(). +// It is safe for concurrent use; registrations happen during package init so +// the lock is rarely contended in practice, but Get/All are called from the +// main goroutine while other init() calls may still be running when the +// diffguard binary is linked with many language plugins. +var ( + registryMu sync.RWMutex + registryMap = map[string]Language{} +) + +// Register adds a Language to the global registry under its Name(). It +// panics on duplicate registration because registrations always happen from +// init() functions: a duplicate is a programming error in the build graph +// (two packages registering the same language) and should fail loudly before +// main() runs. +func Register(l Language) { + if l == nil { + panic("lang.Register: nil Language") + } + name := l.Name() + if name == "" { + panic("lang.Register: Language.Name() returned empty string") + } + registryMu.Lock() + defer registryMu.Unlock() + if _, exists := registryMap[name]; exists { + panic(fmt.Sprintf("lang.Register: language %q already registered", name)) + } + registryMap[name] = l +} + +// Get returns the language registered under the given name, or (nil, false) +// if no such language is registered. +func Get(name string) (Language, bool) { + registryMu.RLock() + defer registryMu.RUnlock() + l, ok := registryMap[name] + return l, ok +} + +// All returns every registered language, sorted by Name(). Deterministic +// ordering keeps report sections stable across runs and hosts. +func All() []Language { + registryMu.RLock() + defer registryMu.RUnlock() + out := make([]Language, 0, len(registryMap)) + for _, l := range registryMap { + out = append(out, l) + } + sort.Slice(out, func(i, j int) bool { return out[i].Name() < out[j].Name() }) + return out +} + +// unregisterForTest removes the named language from the registry. It is only +// useful from _test.go files that temporarily register fake languages; the +// production code path never unregisters. +// +// Tests use it by calling `lang.UnregisterForTest("x")` — declared here so +// test packages can access it without exporting an unhygienic symbol. +func unregisterForTest(name string) { + registryMu.Lock() + defer registryMu.Unlock() + delete(registryMap, name) +} + +// UnregisterForTest is the exported entry point into unregisterForTest. +// Production code must never call it; it exists so unit tests can keep the +// registry clean after injecting a fake Language. +func UnregisterForTest(name string) { + unregisterForTest(name) +} diff --git a/internal/lang/registry_test.go b/internal/lang/registry_test.go new file mode 100644 index 0000000..53deeac --- /dev/null +++ b/internal/lang/registry_test.go @@ -0,0 +1,109 @@ +package lang + +import ( + "testing" + + "github.com/0xPolygon/diffguard/internal/diff" +) + +// fakeLang is a minimal Language stub used to exercise the registry. Its +// sub-component accessors all return nil — nothing calls them in the +// registry-only tests. +type fakeLang struct{ name string } + +func (f *fakeLang) Name() string { return f.name } +func (f *fakeLang) FileFilter() FileFilter { return FileFilter{} } +func (f *fakeLang) ComplexityCalculator() ComplexityCalculator { return nil } +func (f *fakeLang) FunctionExtractor() FunctionExtractor { return nil } +func (f *fakeLang) ImportResolver() ImportResolver { return nil } +func (f *fakeLang) ComplexityScorer() ComplexityScorer { return nil } +func (f *fakeLang) MutantGenerator() MutantGenerator { return nil } +func (f *fakeLang) MutantApplier() MutantApplier { return nil } +func (f *fakeLang) AnnotationScanner() AnnotationScanner { return nil } +func (f *fakeLang) TestRunner() TestRunner { return nil } + +// Silence the unused-import check — the import is kept so that fakeLang +// remains plug-compatible with the analyzer interfaces that reference the +// diff package in their method signatures. +var _ = diff.FileChange{} + +func TestRegister_And_Get(t *testing.T) { + defer UnregisterForTest("test-registry-1") + + l := &fakeLang{name: "test-registry-1"} + Register(l) + + got, ok := Get("test-registry-1") + if !ok { + t.Fatal("expected Get to find registered language") + } + if got.Name() != "test-registry-1" { + t.Errorf("Get returned %q, want test-registry-1", got.Name()) + } + + if _, ok := Get("no-such-language"); ok { + t.Error("Get should return false for unknown name") + } +} + +func TestRegister_DuplicatePanics(t *testing.T) { + defer UnregisterForTest("test-dup") + + Register(&fakeLang{name: "test-dup"}) + + defer func() { + if r := recover(); r == nil { + t.Error("expected panic on duplicate registration") + } + }() + Register(&fakeLang{name: "test-dup"}) +} + +func TestRegister_NilPanics(t *testing.T) { + defer func() { + if r := recover(); r == nil { + t.Error("expected panic on nil registration") + } + }() + Register(nil) +} + +func TestRegister_EmptyNamePanics(t *testing.T) { + defer func() { + if r := recover(); r == nil { + t.Error("expected panic on empty-name registration") + } + }() + Register(&fakeLang{name: ""}) +} + +func TestAll_SortedByName(t *testing.T) { + // Use distinct prefixes so we don't collide with any real language + // registrations coming from goanalyzer/init(). + defer UnregisterForTest("zzz-all-b") + defer UnregisterForTest("zzz-all-a") + defer UnregisterForTest("zzz-all-c") + + Register(&fakeLang{name: "zzz-all-b"}) + Register(&fakeLang{name: "zzz-all-a"}) + Register(&fakeLang{name: "zzz-all-c"}) + + all := All() + // Filter to just our test fakes so real registrations (e.g. "go" from + // goanalyzer) don't disturb the ordering assertion. + var got []string + for _, l := range all { + if len(l.Name()) >= 4 && l.Name()[:4] == "zzz-" { + got = append(got, l.Name()) + } + } + want := []string{"zzz-all-a", "zzz-all-b", "zzz-all-c"} + if len(got) != len(want) { + t.Fatalf("got %v, want %v", got, want) + } + for i := range got { + if got[i] != want[i] { + t.Errorf("All[%d] = %q, want %q", i, got[i], want[i]) + } + } +} diff --git a/internal/mutation/features_test.go b/internal/mutation/features_test.go deleted file mode 100644 index e5d4cf9..0000000 --- a/internal/mutation/features_test.go +++ /dev/null @@ -1,374 +0,0 @@ -package mutation - -import ( - "go/ast" - "go/parser" - "go/token" - "os" - "path/filepath" - "runtime" - "strings" - "testing" - - "github.com/0xPolygon/diffguard/internal/diff" -) - -// --- Annotation tests --- - -func TestScanAnnotations_DisableNextLine(t *testing.T) { - code := `package p - -func f() { - // mutator-disable-next-line - if true { - } -} -` - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, "test.go", code, parser.ParseComments) - if err != nil { - t.Fatal(err) - } - disabled := scanAnnotations(fset, f) - // Comment is on line 4, so line 5 should be disabled - if !disabled[5] { - t.Errorf("expected line 5 disabled, got disabled=%v", disabled) - } - if disabled[4] { - t.Error("comment line should not be disabled") - } - if disabled[6] { - t.Error("line 6 should not be disabled") - } -} - -func TestScanAnnotations_DisableFunc(t *testing.T) { - code := `package p - -// mutator-disable-func -func f() { - if true { - } - x := 1 - _ = x -} - -func g() { - if true { - } -} -` - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, "test.go", code, parser.ParseComments) - if err != nil { - t.Fatal(err) - } - disabled := scanAnnotations(fset, f) - - // All lines of f() (4-9) should be disabled - for i := 4; i <= 9; i++ { - if !disabled[i] { - t.Errorf("expected line %d disabled (inside f)", i) - } - } - // g() should not be disabled - if disabled[12] { - t.Error("g()'s line 12 should not be disabled") - } -} - -func TestScanAnnotations_NoAnnotations(t *testing.T) { - code := `package p - -func f() { - if true {} -} -` - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, parser.ParseComments) - disabled := scanAnnotations(fset, f) - if len(disabled) != 0 { - t.Errorf("expected empty disabled map, got %v", disabled) - } -} - -func TestScanAnnotations_IrrelevantComment(t *testing.T) { - code := `package p - -// this is just a regular comment -func f() { - if true {} -} -` - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, parser.ParseComments) - disabled := scanAnnotations(fset, f) - if len(disabled) != 0 { - t.Errorf("regular comments should not disable mutations, got %v", disabled) - } -} - -func TestGenerateMutants_HonorsDisableNextLine(t *testing.T) { - code := `package test - -func f(x int) bool { - // mutator-disable-next-line - if x > 0 { - return true - } - if x < 0 { - return false - } - return false -} -` - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) - - fc := diff.FileChange{ - Path: "test.go", - Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, - } - - mutants, err := generateMutants(fp, fc) - if err != nil { - t.Fatal(err) - } - - // The `x > 0` line is annotated — no mutants for line 5 - for _, m := range mutants { - if m.Line == 5 { - t.Errorf("expected no mutants on annotated line 5, got: %+v", m) - } - } - - // The `x < 0` line should still have mutants - foundAt8 := false - for _, m := range mutants { - if m.Line == 8 { - foundAt8 = true - } - } - if !foundAt8 { - t.Error("expected mutants on un-annotated line 8") - } -} - -// --- New operator tests --- - -func TestIncDecMutants(t *testing.T) { - // x++ -> x-- - incStmt := &ast.IncDecStmt{Tok: token.INC} - m := incdecMutants("a.go", 5, incStmt) - if len(m) != 1 { - t.Fatalf("expected 1 mutant for ++, got %d", len(m)) - } - if m[0].Operator != "incdec" { - t.Errorf("operator = %q, want incdec", m[0].Operator) - } - if !strings.Contains(m[0].Description, "--") { - t.Errorf("description = %q, expected it to mention --", m[0].Description) - } - - // x-- -> x++ - decStmt := &ast.IncDecStmt{Tok: token.DEC} - m = incdecMutants("a.go", 5, decStmt) - if len(m) != 1 { - t.Fatalf("expected 1 mutant for --, got %d", len(m)) - } - - // Other tokens produce nothing - other := &ast.IncDecStmt{Tok: token.ADD} - if ms := incdecMutants("a.go", 5, other); len(ms) != 0 { - t.Errorf("unexpected mutants for non-incdec tok: %+v", ms) - } -} - -func TestIfBodyMutants(t *testing.T) { - // If with body - body := &ast.BlockStmt{List: []ast.Stmt{&ast.ExprStmt{X: &ast.Ident{Name: "x"}}}} - ifStmt := &ast.IfStmt{Cond: &ast.Ident{Name: "cond"}, Body: body} - m := ifBodyMutants("a.go", 5, ifStmt) - if len(m) != 1 { - t.Fatalf("expected 1 mutant for non-empty if body, got %d", len(m)) - } - if m[0].Operator != "branch_removal" { - t.Errorf("operator = %q, want branch_removal", m[0].Operator) - } - - // If with empty body — no mutant - empty := &ast.IfStmt{Cond: &ast.Ident{Name: "cond"}, Body: &ast.BlockStmt{}} - if ms := ifBodyMutants("a.go", 5, empty); len(ms) != 0 { - t.Errorf("expected no mutants for empty if body, got %d", len(ms)) - } -} - -func TestExprStmtMutants_CallExpr(t *testing.T) { - call := &ast.ExprStmt{X: &ast.CallExpr{Fun: &ast.Ident{Name: "foo"}}} - m := exprStmtMutants("a.go", 5, call) - if len(m) != 1 { - t.Fatalf("expected 1 mutant for call expr, got %d", len(m)) - } - if m[0].Operator != "statement_deletion" { - t.Errorf("operator = %q, want statement_deletion", m[0].Operator) - } -} - -func TestExprStmtMutants_NonCall(t *testing.T) { - // ExprStmt wrapping a non-call (e.g., an ident) — skip - stmt := &ast.ExprStmt{X: &ast.Ident{Name: "x"}} - if ms := exprStmtMutants("a.go", 5, stmt); len(ms) != 0 { - t.Errorf("expected no mutants for non-call expr, got %d", len(ms)) - } -} - -func TestApplyIncDecMutation_Inc(t *testing.T) { - stmt := &ast.IncDecStmt{Tok: token.INC} - if !applyIncDecMutation(stmt) { - t.Error("expected successful apply") - } - if stmt.Tok != token.DEC { - t.Errorf("tok = %v, want DEC", stmt.Tok) - } -} - -func TestApplyIncDecMutation_Dec(t *testing.T) { - stmt := &ast.IncDecStmt{Tok: token.DEC} - if !applyIncDecMutation(stmt) { - t.Error("expected successful apply") - } - if stmt.Tok != token.INC { - t.Errorf("tok = %v, want INC", stmt.Tok) - } -} - -func TestApplyIncDecMutation_WrongNodeType(t *testing.T) { - if applyIncDecMutation(&ast.Ident{Name: "x"}) { - t.Error("expected false for non-IncDecStmt") - } -} - -func TestApplyBranchRemoval(t *testing.T) { - body := &ast.BlockStmt{List: []ast.Stmt{&ast.ExprStmt{X: &ast.Ident{Name: "x"}}}} - ifStmt := &ast.IfStmt{Cond: &ast.Ident{Name: "cond"}, Body: body} - if !applyBranchRemoval(ifStmt) { - t.Error("expected successful apply") - } - if len(ifStmt.Body.List) != 0 { - t.Errorf("expected body emptied, got %d stmts", len(ifStmt.Body.List)) - } -} - -func TestApplyBranchRemoval_WrongType(t *testing.T) { - if applyBranchRemoval(&ast.Ident{Name: "x"}) { - t.Error("expected false for non-IfStmt") - } -} - -func TestApplyStatementDeletion(t *testing.T) { - code := `package test - -func f() { - doThing() - x := 1 - _ = x -} -` - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) - - m := &Mutant{Line: 4, Operator: "statement_deletion"} - result := applyMutation(fp, m) - if result == nil { - t.Fatal("expected non-nil result") - } - // doThing() should be removed (replaced with empty stmt) - if strings.Contains(string(result), "doThing()") { - t.Errorf("expected doThing() removed, got:\n%s", string(result)) - } -} - -// --- Options tests --- - -func TestOptionsTimeout_Default(t *testing.T) { - opts := Options{} - if opts.timeout() != 30*1000*1000*1000 { // 30 seconds in ns - t.Errorf("default timeout = %v, want 30s", opts.timeout()) - } -} - -func TestOptionsWorkers(t *testing.T) { - // Zero → NumCPU. - zero := Options{} - if got, want := zero.workers(), runtime.NumCPU(); got != want { - t.Errorf("zero workers = %d, want runtime.NumCPU() = %d", got, want) - } - - // Negative → NumCPU (treat as unset). - neg := Options{Workers: -4} - if got, want := neg.workers(), runtime.NumCPU(); got != want { - t.Errorf("negative workers = %d, want runtime.NumCPU() = %d", got, want) - } - - // Explicit positive value is honored. - explicit := Options{Workers: 3} - if got := explicit.workers(); got != 3 { - t.Errorf("explicit workers = %d, want 3", got) - } -} - -func TestWriteOverlayJSON(t *testing.T) { - dir := t.TempDir() - overlayPath := filepath.Join(dir, "overlay.json") - if err := writeOverlayJSON(overlayPath, "/orig/foo.go", "/tmp/mutated.go"); err != nil { - t.Fatalf("writeOverlayJSON error: %v", err) - } - data, err := os.ReadFile(overlayPath) - if err != nil { - t.Fatal(err) - } - // Must be the exact shape go test -overlay expects: - // {"Replace":{"":""}} - expected := `{"Replace":{"/orig/foo.go":"/tmp/mutated.go"}}` - if string(data) != expected { - t.Errorf("overlay JSON = %q, want %q", string(data), expected) - } -} - -func TestBuildTestArgs_Default(t *testing.T) { - args := buildTestArgs(Options{}, "/tmp/overlay.json") - if args[0] != "test" { - t.Errorf("args[0] = %q, want test", args[0]) - } - // -overlay must always be present - foundOverlay := false - for _, a := range args { - if a == "-overlay=/tmp/overlay.json" { - foundOverlay = true - } - } - if !foundOverlay { - t.Errorf("expected -overlay=/tmp/overlay.json in args, got %v", args) - } - // No -run flag in default case - for _, a := range args { - if a == "-run" { - t.Error("did not expect -run in default args") - } - } -} - -func TestBuildTestArgs_WithPattern(t *testing.T) { - args := buildTestArgs(Options{TestPattern: "TestFoo"}, "/tmp/overlay.json") - found := false - for i, a := range args { - if a == "-run" && i+1 < len(args) && args[i+1] == "TestFoo" { - found = true - } - } - if !found { - t.Errorf("expected -run TestFoo in args, got %v", args) - } -} diff --git a/internal/mutation/mutation.go b/internal/mutation/mutation.go index fff7ee8..74bf795 100644 --- a/internal/mutation/mutation.go +++ b/internal/mutation/mutation.go @@ -1,12 +1,15 @@ +// Package mutation orchestrates mutation testing across a diff's changed +// files. The AST-level work (generating mutants, applying them, scanning +// annotations, running tests) is provided by the language back-end via +// lang.MutantGenerator / lang.MutantApplier / lang.AnnotationScanner / +// lang.TestRunner. This package owns the scheduling, tiering, and report +// formatting — pieces that don't depend on any particular language. package mutation import ( - "bytes" - "encoding/json" "fmt" "math/rand" "os" - "os/exec" "path/filepath" "runtime" "strings" @@ -14,6 +17,7 @@ import ( "time" "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" "github.com/0xPolygon/diffguard/internal/report" ) @@ -31,33 +35,34 @@ type Mutant struct { type Options struct { // SampleRate is the percentage (0-100) of generated mutants to actually test. SampleRate float64 - // TestTimeout is the per-mutant timeout passed to `go test -timeout`. + // TestTimeout is the per-mutant timeout. // Zero means use the default (30s). TestTimeout time.Duration - // TestPattern, if non-empty, is passed to `go test -run` to scope tests. + // TestPattern, if non-empty, is passed to the language's test runner to + // scope tests. TestPattern string // Tier1Threshold is the minimum killed-percentage for Tier 1 operators - // (logic mutations) below which the section is reported as FAIL. Zero - // falls back to defaultTier1Threshold. + // below which the section is reported as FAIL. Zero falls back to + // defaultTier1Threshold. Tier1Threshold float64 // Tier2Threshold is the minimum killed-percentage for Tier 2 operators - // (semantic mutations) below which the section is reported as WARN. Zero - // falls back to defaultTier2Threshold. + // below which the section is reported as WARN. Zero falls back to + // defaultTier2Threshold. Tier2Threshold float64 - // Workers caps the number of packages processed concurrently. Zero or - // negative means use runtime.NumCPU(). Mutants within a single package - // always run sequentially regardless of this setting. + // Workers caps the number of mutants processed concurrently. Zero or + // negative means use runtime.NumCPU(). Workers int } const ( defaultTier1Threshold = 90.0 defaultTier2Threshold = 70.0 + defaultTestTimeout = 30 * time.Second ) func (o Options) timeout() time.Duration { if o.TestTimeout <= 0 { - return 30 * time.Second + return defaultTestTimeout } return o.TestTimeout } @@ -83,14 +88,16 @@ func (o Options) workers() int { return o.Workers } -// Analyze applies mutation operators to changed code and runs tests. +// Analyze applies mutation operators to changed code (via the language's +// MutantGenerator/Applier) and runs the language's TestRunner against each +// mutant. // -// Each mutant is tested in isolation using `go test -overlay` so mutants -// never touch the real source files on disk. This means mutants can be -// fully parallelized — including mutants on the same file or package — -// up to opts.workers() concurrent go test invocations. -func Analyze(repoPath string, d *diff.Result, opts Options) (report.Section, error) { - allMutants := collectMutants(repoPath, d) +// Parallelism is controlled by Options.Workers; concurrency safety is the +// TestRunner's responsibility (Go's overlay-based runner is safe to call +// concurrently; temp-copy runners for other languages must serialize +// per-file internally). +func Analyze(repoPath string, d *diff.Result, l lang.Language, opts Options) (report.Section, error) { + allMutants := collectMutants(repoPath, d, l) if len(allMutants) == 0 { return report.Section{ @@ -110,27 +117,49 @@ func Analyze(repoPath string, d *diff.Result, opts Options) (report.Section, err } defer os.RemoveAll(workDir) - killed := runMutantsParallel(repoPath, allMutants, opts, workDir) + killed := runMutantsParallel(repoPath, allMutants, l, opts, workDir) return buildSection(allMutants, killed, opts), nil } -func collectMutants(repoPath string, d *diff.Result) []Mutant { +// collectMutants gathers mutation sites for every changed file, honoring +// the language's annotation scanner so lines marked +// `// mutator-disable-*` never produce mutants. +func collectMutants(repoPath string, d *diff.Result, l lang.Language) []Mutant { + gen := l.MutantGenerator() + scanner := l.AnnotationScanner() + var all []Mutant for _, fc := range d.Files { absPath := filepath.Join(repoPath, fc.Path) - mutants, err := generateMutants(absPath, fc) + disabled, err := scanner.ScanAnnotations(absPath) + if err != nil { + continue + } + sites, err := gen.GenerateMutants(absPath, fc, disabled) if err != nil { continue } - all = append(all, mutants...) + for _, s := range sites { + all = append(all, Mutant{ + File: s.File, + Line: s.Line, + Description: s.Description, + Operator: s.Operator, + }) + } } return all } -// runMutantsParallel processes mutants fully in parallel (including mutants -// on the same file) up to opts.workers() concurrent workers. Isolation -// between mutants is provided by `go test -overlay`, not by serialization. -func runMutantsParallel(repoPath string, mutants []Mutant, opts Options, workDir string) int { +// runMutantsParallel processes mutants concurrently up to opts.workers(). +// Each mutant goes through ApplyMutation -> TestRunner.RunTest; the +// TestRunner implementation is responsible for isolating concurrent +// invocations (the Go runner uses `go test -overlay`; non-Go runners use +// per-file temp-copy + mutex). +func runMutantsParallel(repoPath string, mutants []Mutant, l lang.Language, opts Options, workDir string) int { + applier := l.MutantApplier() + runner := l.TestRunner() + var wg sync.WaitGroup sem := make(chan struct{}, opts.workers()) @@ -140,7 +169,7 @@ func runMutantsParallel(repoPath string, mutants []Mutant, opts Options, workDir go func(idx int) { defer wg.Done() defer func() { <-sem }() - mutants[idx].Killed = runMutant(repoPath, &mutants[idx], opts, workDir, idx) + mutants[idx].Killed = runMutant(repoPath, &mutants[idx], applier, runner, opts, workDir, idx) }(i) } wg.Wait() @@ -154,63 +183,43 @@ func runMutantsParallel(repoPath string, mutants []Mutant, opts Options, workDir return killed } -// runMutant applies a mutation to a temp file, uses go test -overlay to -// have the test compile against the temp file (leaving the real source -// untouched), and returns whether any test failed. -func runMutant(repoPath string, m *Mutant, opts Options, workDir string, idx int) bool { +// runMutant applies the mutation, writes the mutated source to a temp file +// inside workDir, and hands it to the language's TestRunner. The runner +// returns (killed, output, err); on runner error we skip the mutant. +func runMutant(repoPath string, m *Mutant, applier lang.MutantApplier, runner lang.TestRunner, opts Options, workDir string, idx int) bool { absPath := filepath.Join(repoPath, m.File) - mutated := applyMutation(absPath, m) - if mutated == nil { + mutated, err := applier.ApplyMutation(absPath, lang.MutantSite{ + File: m.File, + Line: m.Line, + Description: m.Description, + Operator: m.Operator, + }) + if err != nil || mutated == nil { return false } - mutantFile := filepath.Join(workDir, fmt.Sprintf("m%d.go", idx)) + mutantFile := filepath.Join(workDir, fmt.Sprintf("m%d%s", idx, filepath.Ext(absPath))) if err := os.WriteFile(mutantFile, mutated, 0644); err != nil { return false } - overlayPath := filepath.Join(workDir, fmt.Sprintf("m%d-overlay.json", idx)) - if err := writeOverlayJSON(overlayPath, absPath, mutantFile); err != nil { - return false - } - - pkgDir := filepath.Dir(absPath) - cmd := exec.Command("go", buildTestArgs(opts, overlayPath)...) - cmd.Dir = pkgDir - var stderr bytes.Buffer - cmd.Stderr = &stderr - err := cmd.Run() - - if err != nil { - m.TestOutput = stderr.String() - return true - } - return false -} - -// writeOverlayJSON writes a go build overlay file mapping originalPath to -// mutantPath. See `go help build` -overlay flag for format details. -func writeOverlayJSON(path, originalPath, mutantPath string) error { - overlay := struct { - Replace map[string]string `json:"Replace"` - }{ - Replace: map[string]string{originalPath: mutantPath}, - } - data, err := json.Marshal(overlay) + killed, output, err := runner.RunTest(lang.TestRunConfig{ + RepoPath: repoPath, + MutantFile: mutantFile, + OriginalFile: absPath, + Timeout: opts.timeout(), + TestPattern: opts.TestPattern, + WorkDir: workDir, + Index: idx, + }) if err != nil { - return err + return false } - return os.WriteFile(path, data, 0644) -} - -func buildTestArgs(opts Options, overlayPath string) []string { - args := []string{"test", "-overlay=" + overlayPath, "-count=1", "-timeout", opts.timeout().String()} - if opts.TestPattern != "" { - args = append(args, "-run", opts.TestPattern) + if killed { + m.TestOutput = output } - args = append(args, "./...") - return args + return killed } func sampleMutants(mutants []Mutant, rate float64) []Mutant { diff --git a/internal/mutation/mutation_extra_test.go b/internal/mutation/mutation_extra_test.go index fd29586..8c9e87b 100644 --- a/internal/mutation/mutation_extra_test.go +++ b/internal/mutation/mutation_extra_test.go @@ -1,284 +1,14 @@ package mutation import ( - "go/ast" - "go/parser" - "go/token" - "os" - "path/filepath" - "strings" "testing" - "github.com/0xPolygon/diffguard/internal/diff" "github.com/0xPolygon/diffguard/internal/report" ) -func TestApplyBinaryMutation_Success(t *testing.T) { - expr := &ast.BinaryExpr{Op: token.GTR} - m := &Mutant{Description: "> -> >=", Operator: "conditional_boundary"} - if !applyBinaryMutation(expr, m) { - t.Error("expected successful apply") - } - if expr.Op != token.GEQ { - t.Errorf("op = %v, want GEQ", expr.Op) - } -} - -func TestApplyBinaryMutation_WrongNodeType(t *testing.T) { - ident := &ast.Ident{Name: "x"} - m := &Mutant{Description: "> -> >=", Operator: "conditional_boundary"} - if applyBinaryMutation(ident, m) { - t.Error("expected false for non-BinaryExpr") - } -} - -func TestApplyBinaryMutation_IllegalOp(t *testing.T) { - expr := &ast.BinaryExpr{Op: token.GTR} - m := &Mutant{Description: "invalid", Operator: "conditional_boundary"} - if applyBinaryMutation(expr, m) { - t.Error("expected false for invalid description") - } -} - -// TestApplyBinaryMutation_OperatorMismatch locks in the fix for a bug where -// applyBinaryMutation rewrote the first BinaryExpr found on a line even -// when its operator differed from the mutant's intended `from` op. E.g. -// given mutant "!= -> ==", applying it to the outer `&&` of `a != nil && b` -// must NOT succeed — otherwise `&&` gets replaced and the inner `!=` stays -// untouched, producing a false-surviving mutant. -func TestApplyBinaryMutation_OperatorMismatch(t *testing.T) { - expr := &ast.BinaryExpr{Op: token.LAND} - m := &Mutant{Description: "!= -> ==", Operator: "negate_conditional"} - if applyBinaryMutation(expr, m) { - t.Error("expected false when expr.Op (&&) does not match mutant's from-op (!=)") - } - if expr.Op != token.LAND { - t.Errorf("expr.Op = %v, want LAND (unchanged)", expr.Op) - } -} - -// TestApplyBinaryMutation_MathOperatorMismatch: same fix for math operators -// — `start + count - 1` parses with an outer SUB, and mutant "+ -> -" must -// not no-op on that outer SUB. -func TestApplyBinaryMutation_MathOperatorMismatch(t *testing.T) { - expr := &ast.BinaryExpr{Op: token.SUB} - m := &Mutant{Description: "+ -> -", Operator: "math_operator"} - if applyBinaryMutation(expr, m) { - t.Error("expected false when expr.Op (-) does not match mutant's from-op (+)") - } - if expr.Op != token.SUB { - t.Errorf("expr.Op = %v, want SUB (unchanged)", expr.Op) - } -} - -func TestApplyBoolMutation_TrueToFalse(t *testing.T) { - ident := &ast.Ident{Name: "true"} - m := &Mutant{Description: "true -> false", Operator: "boolean_substitution"} - if !applyBoolMutation(ident, m) { - t.Error("expected successful apply") - } - if ident.Name != "false" { - t.Errorf("name = %q, want false", ident.Name) - } -} - -func TestApplyBoolMutation_FalseToTrue(t *testing.T) { - ident := &ast.Ident{Name: "false"} - m := &Mutant{Description: "false -> true", Operator: "boolean_substitution"} - if !applyBoolMutation(ident, m) { - t.Error("expected successful apply") - } - if ident.Name != "true" { - t.Errorf("name = %q, want true", ident.Name) - } -} - -func TestApplyBoolMutation_WrongNodeType(t *testing.T) { - expr := &ast.BinaryExpr{Op: token.ADD} - m := &Mutant{Description: "true -> false", Operator: "boolean_substitution"} - if applyBoolMutation(expr, m) { - t.Error("expected false for non-Ident") - } -} - -func TestApplyBoolMutation_NonBoolIdent(t *testing.T) { - ident := &ast.Ident{Name: "x"} - m := &Mutant{Description: "true -> false", Operator: "boolean_substitution"} - if applyBoolMutation(ident, m) { - t.Error("expected false for non-bool ident") - } -} - -func TestApplyReturnMutation_Success(t *testing.T) { - ret := &ast.ReturnStmt{ - Results: []ast.Expr{ - &ast.Ident{Name: "x", NamePos: 1}, - }, - } - if !applyReturnMutation(ret) { - t.Error("expected successful apply") - } - if ident, ok := ret.Results[0].(*ast.Ident); !ok || ident.Name != "nil" { - t.Error("expected result replaced with nil") - } -} - -func TestApplyReturnMutation_WrongNodeType(t *testing.T) { - ident := &ast.Ident{Name: "x"} - if applyReturnMutation(ident) { - t.Error("expected false for non-ReturnStmt") - } -} - -func TestTryApplyMutation_Binary(t *testing.T) { - expr := &ast.BinaryExpr{Op: token.ADD} - m := &Mutant{Description: "+ -> -", Operator: "math_operator"} - if !tryApplyMutation(expr, m) { - t.Error("expected successful apply for math_operator") - } - if expr.Op != token.SUB { - t.Errorf("op = %v, want SUB", expr.Op) - } -} - -func TestTryApplyMutation_Bool(t *testing.T) { - ident := &ast.Ident{Name: "true"} - m := &Mutant{Description: "true -> false", Operator: "boolean_substitution"} - if !tryApplyMutation(ident, m) { - t.Error("expected successful apply for boolean_substitution") - } -} - -func TestTryApplyMutation_Return(t *testing.T) { - ret := &ast.ReturnStmt{Results: []ast.Expr{&ast.Ident{Name: "x", NamePos: 1}}} - m := &Mutant{Operator: "return_value"} - if !tryApplyMutation(ret, m) { - t.Error("expected successful apply for return_value") - } -} - -func TestTryApplyMutation_Unknown(t *testing.T) { - ident := &ast.Ident{Name: "x"} - m := &Mutant{Operator: "unknown_operator"} - if tryApplyMutation(ident, m) { - t.Error("expected false for unknown operator") - } -} - -func TestApplyMutationToAST(t *testing.T) { - code := `package test - -func f() bool { - return true -} -` - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) - - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, fp, nil, parser.ParseComments) - - m := &Mutant{Line: 4, Description: "true -> false", Operator: "boolean_substitution"} - if !applyMutationToAST(fset, f, m) { - t.Error("expected mutation to be applied") - } -} - -func TestApplyMutationToAST_NoMatch(t *testing.T) { - code := `package test - -func f() int { - return 42 -} -` - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) - - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, fp, nil, parser.ParseComments) - - m := &Mutant{Line: 999, Description: "true -> false", Operator: "boolean_substitution"} - if applyMutationToAST(fset, f, m) { - t.Error("expected no mutation applied for wrong line") - } -} - -func TestApplyMutation_Full(t *testing.T) { - code := `package test - -func f(a, b int) bool { - return a > b -} -` - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) - - m := &Mutant{Line: 4, Description: "> -> >=", Operator: "conditional_boundary"} - result := applyMutation(fp, m) - if result == nil { - t.Fatal("expected non-nil result") - } - if !strings.Contains(string(result), ">=") { - t.Error("expected mutated code to contain >=") - } -} - -func TestApplyMutation_ParseError(t *testing.T) { - m := &Mutant{Line: 1, Operator: "boolean_substitution"} - result := applyMutation("/nonexistent/file.go", m) - if result != nil { - t.Error("expected nil for parse error") - } -} - -func TestApplyMutation_NoMatch(t *testing.T) { - code := `package test - -func f() {} -` - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) - - m := &Mutant{Line: 999, Operator: "boolean_substitution", Description: "true -> false"} - result := applyMutation(fp, m) - if result != nil { - t.Error("expected nil when mutation can't be applied") - } -} - -func TestRenderFile(t *testing.T) { - code := `package test - -func f() {} -` - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, parser.ParseComments) - - result := renderFile(fset, f) - if result == nil { - t.Fatal("expected non-nil render result") - } - if !strings.Contains(string(result), "package test") { - t.Error("rendered file should contain package declaration") - } -} - -func TestZeroValueExpr(t *testing.T) { - original := &ast.Ident{Name: "x", NamePos: 42} - result := zeroValueExpr(original) - ident, ok := result.(*ast.Ident) - if !ok { - t.Fatal("expected *ast.Ident") - } - if ident.Name != "nil" { - t.Errorf("name = %q, want nil", ident.Name) - } -} - +// TestBuildSection_HighScore confirms a fully-killed Tier-1 run reports +// PASS. This is the "100% kill rate ⇒ PASS" invariant the CI gate relies +// on. func TestBuildSection_HighScore(t *testing.T) { mutants := []Mutant{ {File: "a.go", Line: 1, Killed: true, Operator: "negate_conditional"}, @@ -289,7 +19,7 @@ func TestBuildSection_HighScore(t *testing.T) { } s := buildSection(mutants, 5, Options{}) if s.Severity != report.SeverityPass { - t.Errorf("severity = %v, want PASS (100%% kill rate)", s.Severity) + t.Errorf("severity = %v, want PASS", s.Severity) } } @@ -305,10 +35,10 @@ func TestBuildSection_LowScore(t *testing.T) { } s := buildSection(mutants, 1, Options{}) if s.Severity != report.SeverityFail { - t.Errorf("severity = %v, want FAIL (Tier 1 at 20%% < default 90%%)", s.Severity) + t.Errorf("severity = %v, want FAIL", s.Severity) } if len(s.Findings) != 4 { - t.Errorf("findings = %d, want 4 (survived mutants)", len(s.Findings)) + t.Errorf("findings = %d, want 4", len(s.Findings)) } } @@ -324,99 +54,16 @@ func TestBuildSection_MediumScore(t *testing.T) { } s := buildSection(mutants, killed, Options{}) if s.Severity != report.SeverityWarn { - t.Errorf("severity = %v, want WARN (Tier 2 at 60%% < default 70%%)", s.Severity) + t.Errorf("severity = %v, want WARN", s.Severity) } } func TestBuildSection_ZeroMutants(t *testing.T) { s := buildSection(nil, 0, Options{}) - // No mutants means nothing to gate on — severity should be PASS and - // stats should still be populated. if s.Severity != report.SeverityPass { - t.Errorf("severity = %v, want PASS (no mutants to gate on)", s.Severity) + t.Errorf("severity = %v, want PASS", s.Severity) } if s.Stats == nil { t.Error("expected non-nil stats") } } - -func TestGenerateMutants_WithAllTypes(t *testing.T) { - code := `package test - -func f(a, b int) bool { - if a > b { - return true - } - x := a + b - _ = x - return false -} -` - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) - - fc := diff.FileChange{ - Path: "test.go", - Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 20}}, - } - - mutants, err := generateMutants(fp, fc) - if err != nil { - t.Fatalf("error: %v", err) - } - - operators := make(map[string]int) - for _, m := range mutants { - operators[m.Operator]++ - } - - if operators["conditional_boundary"] == 0 { - t.Error("missing conditional_boundary mutants") - } - if operators["boolean_substitution"] == 0 { - t.Error("missing boolean_substitution mutants") - } - if operators["math_operator"] == 0 { - t.Error("missing math_operator mutants") - } - if operators["return_value"] == 0 { - t.Error("missing return_value mutants") - } -} - -func TestIsBoundary(t *testing.T) { - if !isBoundary(token.GTR) { - t.Error("GTR should be boundary") - } - if !isBoundary(token.GEQ) { - t.Error("GEQ should be boundary") - } - if isBoundary(token.EQL) { - t.Error("EQL should not be boundary") - } -} - -func TestIsComparison(t *testing.T) { - if !isComparison(token.EQL) { - t.Error("EQL should be comparison") - } - if !isComparison(token.NEQ) { - t.Error("NEQ should be comparison") - } - if isComparison(token.GTR) { - t.Error("GTR should not be comparison") - } -} - -func TestIsMath(t *testing.T) { - if !isMath(token.ADD) { - t.Error("ADD should be math") - } - if !isMath(token.MUL) { - t.Error("MUL should be math") - } - if isMath(token.EQL) { - t.Error("EQL should not be math") - } -} diff --git a/internal/mutation/mutation_test.go b/internal/mutation/mutation_test.go index 5c63307..7a1b4c5 100644 --- a/internal/mutation/mutation_test.go +++ b/internal/mutation/mutation_test.go @@ -1,128 +1,17 @@ package mutation import ( - "go/ast" - "go/token" - "os" + "runtime" "testing" - - "github.com/0xPolygon/diffguard/internal/diff" ) -func TestBinaryMutants(t *testing.T) { - tests := []struct { - name string - op token.Token - expected int - }{ - {"greater than", token.GTR, 1}, - {"less than", token.LSS, 1}, - {"equal", token.EQL, 1}, - {"not equal", token.NEQ, 1}, - {"add", token.ADD, 1}, - {"subtract", token.SUB, 1}, - {"multiply", token.MUL, 1}, - {"divide", token.QUO, 1}, - {"and (no mutation)", token.LAND, 0}, - {"or (no mutation)", token.LOR, 0}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - expr := &ast.BinaryExpr{Op: tt.op} - mutants := binaryMutants("test.go", 1, expr) - if len(mutants) != tt.expected { - t.Errorf("binaryMutants(%v) produced %d mutants, want %d", tt.op, len(mutants), tt.expected) - } - }) - } -} - -func TestBoolMutants(t *testing.T) { - tests := []struct { - name string - ident string - expected int - }{ - {"true", "true", 1}, - {"false", "false", 1}, - {"other", "x", 0}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - ident := &ast.Ident{Name: tt.ident} - mutants := boolMutants("test.go", 1, ident) - if len(mutants) != tt.expected { - t.Errorf("boolMutants(%q) produced %d mutants, want %d", tt.ident, len(mutants), tt.expected) - } - }) - } -} - -func TestReturnMutants(t *testing.T) { - // Return with values - ret := &ast.ReturnStmt{ - Results: []ast.Expr{&ast.Ident{Name: "x"}}, - } - mutants := returnMutants("test.go", 1, ret) - if len(mutants) != 1 { - t.Errorf("returnMutants with values: got %d, want 1", len(mutants)) - } - - // Bare return - bareRet := &ast.ReturnStmt{} - mutants = returnMutants("test.go", 1, bareRet) - if len(mutants) != 0 { - t.Errorf("returnMutants bare: got %d, want 0", len(mutants)) - } -} - -func TestGenerateMutants(t *testing.T) { - code := `package test - -func add(a, b int) int { - if a > b { - return a + b - } - return a - b -} -` - dir := t.TempDir() - filePath := dir + "/test.go" - if err := os.WriteFile(filePath, []byte(code), 0644); err != nil { - t.Fatalf("writeTestFile: %v", err) - } - - fc := diff.FileChange{ - Path: "test.go", - Regions: []diff.ChangedRegion{ - {StartLine: 1, EndLine: 8}, - }, - } - - mutants, err := generateMutants(filePath, fc) - if err != nil { - t.Fatalf("generateMutants error: %v", err) - } - - if len(mutants) == 0 { - t.Error("expected mutants, got none") - } - - // Should have mutations for: > (boundary), + (math), - (math) - operators := make(map[string]int) - for _, m := range mutants { - operators[m.Operator]++ - } - - if operators["conditional_boundary"] == 0 { - t.Error("expected conditional_boundary mutants") - } - if operators["math_operator"] == 0 { - t.Error("expected math_operator mutants") - } -} +// Most of what used to be tested here was the Go AST machinery: +// binaryMutants, boolMutants, applyBoolMutation, writeOverlayJSON, +// buildTestArgs, scanAnnotations, etc. All of that now lives in +// internal/lang/goanalyzer/ next to the code, and the tests moved with it. +// +// What remains here exercises the orchestration: options defaults, mutant +// sampling, tier aggregation, and section formatting. func TestSampleMutants(t *testing.T) { mutants := make([]Mutant, 100) @@ -141,42 +30,46 @@ func TestSampleMutants(t *testing.T) { } } -func TestOperatorName(t *testing.T) { - tests := []struct { - from, to token.Token - expected string - }{ - {token.GTR, token.GEQ, "conditional_boundary"}, - {token.EQL, token.NEQ, "negate_conditional"}, - {token.ADD, token.SUB, "math_operator"}, +func TestOptionsTimeout_Default(t *testing.T) { + opts := Options{} + if opts.timeout() != defaultTestTimeout { + t.Errorf("default timeout = %v, want %v", opts.timeout(), defaultTestTimeout) + } +} + +func TestOptionsWorkers(t *testing.T) { + zero := Options{} + if got, want := zero.workers(), runtime.NumCPU(); got != want { + t.Errorf("zero workers = %d, want NumCPU = %d", got, want) + } + + neg := Options{Workers: -4} + if got, want := neg.workers(), runtime.NumCPU(); got != want { + t.Errorf("negative workers = %d, want NumCPU = %d", got, want) } - for _, tt := range tests { - got := operatorName(tt.from, tt.to) - if got != tt.expected { - t.Errorf("operatorName(%v, %v) = %q, want %q", tt.from, tt.to, got, tt.expected) - } + explicit := Options{Workers: 3} + if got := explicit.workers(); got != 3 { + t.Errorf("explicit workers = %d, want 3", got) } } -func TestParseMutationOp(t *testing.T) { - tests := []struct { - desc string - wantFrom token.Token - wantTo token.Token - }{ - {"> -> >=", token.GTR, token.GEQ}, - {"== -> !=", token.EQL, token.NEQ}, - {"+ -> -", token.ADD, token.SUB}, - {"invalid", token.ILLEGAL, token.ILLEGAL}, - {"+ -> unknown", token.ILLEGAL, token.ILLEGAL}, +func TestOptionsTiers(t *testing.T) { + // Defaults kick in when thresholds are zero. + zero := Options{} + if got := zero.tier1Threshold(); got != defaultTier1Threshold { + t.Errorf("tier1 default = %v, want %v", got, defaultTier1Threshold) + } + if got := zero.tier2Threshold(); got != defaultTier2Threshold { + t.Errorf("tier2 default = %v, want %v", got, defaultTier2Threshold) } - for _, tt := range tests { - gotFrom, gotTo := parseMutationOp(tt.desc) - if gotFrom != tt.wantFrom || gotTo != tt.wantTo { - t.Errorf("parseMutationOp(%q) = (%v, %v), want (%v, %v)", - tt.desc, gotFrom, gotTo, tt.wantFrom, tt.wantTo) - } + // Explicit values are honored. + explicit := Options{Tier1Threshold: 75, Tier2Threshold: 50} + if got := explicit.tier1Threshold(); got != 75 { + t.Errorf("tier1 explicit = %v, want 75", got) + } + if got := explicit.tier2Threshold(); got != 50 { + t.Errorf("tier2 explicit = %v, want 50", got) } } diff --git a/internal/mutation/tiers.go b/internal/mutation/tiers.go index 14e9bb9..f733fc4 100644 --- a/internal/mutation/tiers.go +++ b/internal/mutation/tiers.go @@ -46,11 +46,33 @@ func (t Tier) String() string { // operatorTier maps a mutation operator name (as set on Mutant.Operator) to // its tier. Unknown operators default to TierSemantic so a new operator // doesn't silently become report-only noise. +// +// Language-specific operators are listed alongside the canonical ones — the +// tier reflects the signal quality of a surviving mutant, which is a +// property of what the mutation encodes rather than which language it +// targets. Rust's `unwrap_removal` and `some_to_none` strip error-handling +// that well-tested code almost always exercises, so both sit in Tier 1 +// alongside negate_conditional. Rust's `question_mark_removal` also +// disables error propagation but equivalent-mutant rate is higher (early +// returns can be substituted by the caller's own match), so it lands in +// Tier 2. +// +// TypeScript's `strict_equality` (toggling `===` vs `==`) almost always +// exposes a real semantic gap when it survives — tests that don't +// distinguish strict from loose equality are usually broken — so it sits +// in Tier 1 alongside negate_conditional. `nullish_to_logical_or` (`??` +// to `||`) and `optional_chain_removal` (`foo?.bar` to `foo.bar`) land in +// Tier 2: both have meaningful equivalent-mutant cases on code that +// never encounters nullish inputs. func operatorTier(op string) Tier { switch op { - case "negate_conditional", "conditional_boundary", "return_value", "math_operator": + case "negate_conditional", "conditional_boundary", "return_value", "math_operator", + "unwrap_removal", "some_to_none", + "strict_equality": return TierLogic - case "boolean_substitution", "incdec": + case "boolean_substitution", "incdec", + "question_mark_removal", + "nullish_to_logical_or", "optional_chain_removal": return TierSemantic case "statement_deletion", "branch_removal": return TierObservability diff --git a/internal/mutation/tiers_test.go b/internal/mutation/tiers_test.go index 3bd08d6..a91bd0f 100644 --- a/internal/mutation/tiers_test.go +++ b/internal/mutation/tiers_test.go @@ -21,6 +21,23 @@ func TestOperatorTier(t *testing.T) { {"incdec", TierSemantic}, {"statement_deletion", TierObservability}, {"branch_removal", TierObservability}, + // Rust-specific: unwrap_removal and some_to_none encode real test + // gaps when they survive (well-tested code exercises error paths), + // so they land in Tier 1. question_mark_removal has a higher + // equivalent-mutant rate because callers can substitute their own + // error plumbing, so it sits in Tier 2. + {"unwrap_removal", TierLogic}, + {"some_to_none", TierLogic}, + {"question_mark_removal", TierSemantic}, + // TypeScript-specific: strict_equality toggles === vs == — when a + // surviving mutant doesn't get caught the test suite almost certainly + // isn't exercising strict-comparison semantics, so Tier 1. + // nullish_to_logical_or and optional_chain_removal have higher + // equivalent-mutant rates because code that never encounters nullish + // inputs won't distinguish the mutated form, so they're Tier 2. + {"strict_equality", TierLogic}, + {"nullish_to_logical_or", TierSemantic}, + {"optional_chain_removal", TierSemantic}, // Unknown defaults to TierSemantic so new operators don't silently // land in the noise-prone tier. {"unknown_operator", TierSemantic}, diff --git a/internal/sizes/sizes.go b/internal/sizes/sizes.go index 027730c..bf67a5e 100644 --- a/internal/sizes/sizes.go +++ b/internal/sizes/sizes.go @@ -1,38 +1,30 @@ +// Package sizes reports function and file line counts for diff-scoped files +// using a language-supplied lang.FunctionExtractor. The per-language AST +// work lives in the language back-end (for Go: goanalyzer/sizes.go). package sizes import ( "fmt" - "go/ast" - "go/parser" - "go/token" "path/filepath" "sort" "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" "github.com/0xPolygon/diffguard/internal/report" ) -// FunctionSize holds size info for a single function. -type FunctionSize struct { - File string - Line int - Name string - Lines int -} - -// FileSize holds size info for a single file. -type FileSize struct { - Path string - Lines int -} - -// Analyze measures lines of code for changed functions and files. -func Analyze(repoPath string, d *diff.Result, funcThreshold, fileThreshold int) (report.Section, error) { - var funcResults []FunctionSize - var fileResults []FileSize +// Analyze measures lines of code for changed functions and files using the +// supplied language extractor. +func Analyze(repoPath string, d *diff.Result, funcThreshold, fileThreshold int, extractor lang.FunctionExtractor) (report.Section, error) { + var funcResults []lang.FunctionSize + var fileResults []lang.FileSize for _, fc := range d.Files { - funcs, fileSize := analyzeFile(repoPath, fc) + absPath := filepath.Join(repoPath, fc.Path) + funcs, fileSize, err := extractor.ExtractFunctions(absPath, fc) + if err != nil { + return report.Section{}, fmt.Errorf("analyzing %s: %w", fc.Path, err) + } funcResults = append(funcResults, funcs...) if fileSize != nil { fileResults = append(fileResults, *fileSize) @@ -42,64 +34,7 @@ func Analyze(repoPath string, d *diff.Result, funcThreshold, fileThreshold int) return buildSection(funcResults, fileResults, funcThreshold, fileThreshold), nil } -func analyzeFile(repoPath string, fc diff.FileChange) ([]FunctionSize, *FileSize) { - absPath := filepath.Join(repoPath, fc.Path) - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, absPath, nil, 0) - if err != nil { - return nil, nil - } - - var fileSize *FileSize - file := fset.File(f.Pos()) - if file != nil { - fileSize = &FileSize{Path: fc.Path, Lines: file.LineCount()} - } - - return collectFunctionSizes(fset, f, fc), fileSize -} - -func collectFunctionSizes(fset *token.FileSet, f *ast.File, fc diff.FileChange) []FunctionSize { - var results []FunctionSize - ast.Inspect(f, func(n ast.Node) bool { - fn, ok := n.(*ast.FuncDecl) - if !ok { - return true - } - startLine := fset.Position(fn.Pos()).Line - endLine := fset.Position(fn.End()).Line - if !fc.OverlapsRange(startLine, endLine) { - return false - } - results = append(results, FunctionSize{ - File: fc.Path, - Line: startLine, - Name: funcName(fn), - Lines: endLine - startLine + 1, - }) - return false - }) - return results -} - -func funcName(fn *ast.FuncDecl) string { - if fn.Recv != nil && len(fn.Recv.List) > 0 { - recv := fn.Recv.List[0] - var typeName string - switch t := recv.Type.(type) { - case *ast.StarExpr: - if ident, ok := t.X.(*ast.Ident); ok { - typeName = ident.Name - } - case *ast.Ident: - typeName = t.Name - } - return fmt.Sprintf("(%s).%s", typeName, fn.Name.Name) - } - return fn.Name.Name -} - -func checkFuncSizes(funcs []FunctionSize, threshold int) []report.Finding { +func checkFuncSizes(funcs []lang.FunctionSize, threshold int) []report.Finding { var findings []report.Finding for _, f := range funcs { if f.Lines > threshold { @@ -117,7 +52,7 @@ func checkFuncSizes(funcs []FunctionSize, threshold int) []report.Finding { return findings } -func checkFileSizes(files []FileSize, threshold int) []report.Finding { +func checkFileSizes(files []lang.FileSize, threshold int) []report.Finding { var findings []report.Finding for _, f := range files { if f.Lines > threshold { @@ -133,7 +68,7 @@ func checkFileSizes(files []FileSize, threshold int) []report.Finding { return findings } -func buildSection(funcs []FunctionSize, files []FileSize, funcThreshold, fileThreshold int) report.Section { +func buildSection(funcs []lang.FunctionSize, files []lang.FileSize, funcThreshold, fileThreshold int) report.Section { if len(funcs) == 0 && len(files) == 0 { return report.Section{ Name: "Code Sizes", @@ -163,10 +98,10 @@ func buildSection(funcs []FunctionSize, files []FileSize, funcThreshold, fileThr Findings: findings, Stats: map[string]any{ "total_functions": len(funcs), - "total_files": len(files), - "violations": len(findings), + "total_files": len(files), + "violations": len(findings), "function_threshold": funcThreshold, - "file_threshold": fileThreshold, + "file_threshold": fileThreshold, }, } } diff --git a/internal/sizes/sizes_test.go b/internal/sizes/sizes_test.go index 1b7df27..d53f11d 100644 --- a/internal/sizes/sizes_test.go +++ b/internal/sizes/sizes_test.go @@ -6,139 +6,25 @@ import ( "testing" "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" + _ "github.com/0xPolygon/diffguard/internal/lang/goanalyzer" "github.com/0xPolygon/diffguard/internal/report" ) -func TestAnalyzeFile(t *testing.T) { - code := `package test - -func short() { - x := 1 - _ = x -} - -func longer() { - a := 1 - b := 2 - c := 3 - d := 4 - e := 5 - _ = a + b + c + d + e -} -` - dir := t.TempDir() - filePath := filepath.Join(dir, "test.go") - os.WriteFile(filePath, []byte(code), 0644) - - fc := diff.FileChange{ - Path: "test.go", - Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, - } - - funcs, fileSize := analyzeFile(dir, fc) - - if fileSize == nil { - t.Fatal("expected non-nil fileSize") - } - if fileSize.Lines == 0 { - t.Error("file should have non-zero lines") - } - if fileSize.Path != "test.go" { - t.Errorf("fileSize.Path = %q, want test.go", fileSize.Path) - } - - if len(funcs) != 2 { - t.Fatalf("expected 2 functions, got %d", len(funcs)) - } - if funcs[0].Name != "short" { - t.Errorf("funcs[0].Name = %q, want short", funcs[0].Name) - } - if funcs[0].Lines <= 0 { - t.Error("function lines should be > 0") - } -} - -func TestAnalyzeFile_ParseError(t *testing.T) { - dir := t.TempDir() - fc := diff.FileChange{ - Path: "nonexistent.go", - Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 10}}, - } - - funcs, fileSize := analyzeFile(dir, fc) - if funcs != nil { - t.Error("expected nil funcs for parse error") - } - if fileSize != nil { - t.Error("expected nil fileSize for parse error") - } -} - -func TestCollectFunctionSizes_OnlyInRange(t *testing.T) { - code := `package test - -func inRange() { - x := 1 - _ = x -} - -func outOfRange() { - y := 2 - _ = y -} -` - dir := t.TempDir() - filePath := filepath.Join(dir, "test.go") - os.WriteFile(filePath, []byte(code), 0644) - - fc := diff.FileChange{ - Path: "test.go", - Regions: []diff.ChangedRegion{{StartLine: 3, EndLine: 6}}, - } - - funcs, _ := analyzeFile(dir, fc) - if len(funcs) != 1 { - t.Fatalf("expected 1 function in range, got %d", len(funcs)) - } - if funcs[0].Name != "inRange" { - t.Errorf("expected inRange, got %s", funcs[0].Name) - } -} - -func TestCollectFunctionSizes_LineCalc(t *testing.T) { - code := `package test - -func f() { - a := 1 - b := 2 - c := 3 - _ = a + b + c -} -` - dir := t.TempDir() - filePath := filepath.Join(dir, "test.go") - os.WriteFile(filePath, []byte(code), 0644) - - fc := diff.FileChange{ - Path: "test.go", - Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, - } - - funcs, _ := analyzeFile(dir, fc) - if len(funcs) != 1 { - t.Fatalf("expected 1 function, got %d", len(funcs)) - } - // func f() { starts at line 3, } at line 8 = 6 lines - if funcs[0].Lines != 6 { - t.Errorf("function lines = %d, want 6", funcs[0].Lines) +func goExtractor(t *testing.T) lang.FunctionExtractor { + t.Helper() + l, ok := lang.Get("go") + if !ok { + t.Fatal("go language not registered") } + return l.FunctionExtractor() } func TestCheckFuncSizes(t *testing.T) { - funcs := []FunctionSize{ - {File: "a.go", Line: 1, Name: "small", Lines: 10}, - {File: "b.go", Line: 1, Name: "big", Lines: 60}, - {File: "c.go", Line: 1, Name: "huge", Lines: 100}, + funcs := []lang.FunctionSize{ + {FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "small"}, Lines: 10}, + {FunctionInfo: lang.FunctionInfo{File: "b.go", Line: 1, Name: "big"}, Lines: 60}, + {FunctionInfo: lang.FunctionInfo{File: "c.go", Line: 1, Name: "huge"}, Lines: 100}, } findings := checkFuncSizes(funcs, 50) @@ -153,9 +39,9 @@ func TestCheckFuncSizes(t *testing.T) { } func TestCheckFuncSizes_AtBoundary(t *testing.T) { - funcs := []FunctionSize{ - {File: "a.go", Line: 1, Name: "exact", Lines: 50}, - {File: "b.go", Line: 1, Name: "over", Lines: 51}, + funcs := []lang.FunctionSize{ + {FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "exact"}, Lines: 50}, + {FunctionInfo: lang.FunctionInfo{File: "b.go", Line: 1, Name: "over"}, Lines: 51}, } findings := checkFuncSizes(funcs, 50) @@ -165,7 +51,7 @@ func TestCheckFuncSizes_AtBoundary(t *testing.T) { } func TestCheckFileSizes(t *testing.T) { - files := []FileSize{ + files := []lang.FileSize{ {Path: "small.go", Lines: 100}, {Path: "big.go", Lines: 600}, } @@ -177,7 +63,7 @@ func TestCheckFileSizes(t *testing.T) { } func TestCheckFileSizes_AtBoundary(t *testing.T) { - files := []FileSize{ + files := []lang.FileSize{ {Path: "exact.go", Lines: 500}, {Path: "over.go", Lines: 501}, } @@ -199,7 +85,7 @@ func TestBuildSection_Empty(t *testing.T) { } func TestBuildSection_WithViolations(t *testing.T) { - funcs := []FunctionSize{{File: "a.go", Line: 1, Name: "big", Lines: 100}} + funcs := []lang.FunctionSize{{FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "big"}, Lines: 100}} s := buildSection(funcs, nil, 50, 500) if s.Severity != report.SeverityFail { t.Errorf("section severity = %v, want FAIL", s.Severity) @@ -210,8 +96,8 @@ func TestBuildSection_WithViolations(t *testing.T) { } func TestBuildSection_NoViolations(t *testing.T) { - funcs := []FunctionSize{{File: "a.go", Line: 1, Name: "small", Lines: 10}} - files := []FileSize{{Path: "a.go", Lines: 100}} + funcs := []lang.FunctionSize{{FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "small"}, Lines: 10}} + files := []lang.FileSize{{Path: "a.go", Lines: 100}} s := buildSection(funcs, files, 50, 500) if s.Severity != report.SeverityPass { t.Errorf("severity = %v, want PASS", s.Severity) @@ -219,10 +105,10 @@ func TestBuildSection_NoViolations(t *testing.T) { } func TestBuildSection_SortedByValue(t *testing.T) { - funcs := []FunctionSize{ - {File: "a.go", Line: 1, Name: "medium", Lines: 60}, - {File: "b.go", Line: 1, Name: "huge", Lines: 200}, - {File: "c.go", Line: 1, Name: "big", Lines: 80}, + funcs := []lang.FunctionSize{ + {FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "medium"}, Lines: 60}, + {FunctionInfo: lang.FunctionInfo{File: "b.go", Line: 1, Name: "huge"}, Lines: 200}, + {FunctionInfo: lang.FunctionInfo{File: "c.go", Line: 1, Name: "big"}, Lines: 80}, } s := buildSection(funcs, nil, 50, 500) if len(s.Findings) != 3 { @@ -233,41 +119,10 @@ func TestBuildSection_SortedByValue(t *testing.T) { } } -func TestFuncName(t *testing.T) { - tests := []struct { - code string - expected string - }{ - {`package p; func Foo() {}`, "Foo"}, - {`package p; type T struct{}; func (t T) Bar() {}`, "(T).Bar"}, - {`package p; type T struct{}; func (t *T) Baz() {}`, "(T).Baz"}, - } - - for _, tt := range tests { - t.Run(tt.expected, func(t *testing.T) { - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(tt.code), 0644) - - fc := diff.FileChange{ - Path: "test.go", - Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, - } - funcs, _ := analyzeFile(dir, fc) - found := false - for _, f := range funcs { - if f.Name == tt.expected { - found = true - } - } - if !found { - t.Errorf("funcName not found: want %q, got %v", tt.expected, funcs) - } - }) - } -} - -func TestAnalyze(t *testing.T) { +// TestAnalyze_WithGoExtractor is the integration replacement for the old +// analyzeFile-based unit tests. The AST walk logic now lives in goanalyzer +// and has its own tests; here we only verify the orchestration wiring. +func TestAnalyze_WithGoExtractor(t *testing.T) { code := `package test func small() { @@ -277,7 +132,9 @@ func small() { ` dir := t.TempDir() fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) + if err := os.WriteFile(fp, []byte(code), 0644); err != nil { + t.Fatal(err) + } d := &diff.Result{ Files: []diff.FileChange{ @@ -285,7 +142,7 @@ func small() { }, } - section, err := Analyze(dir, d, 50, 500) + section, err := Analyze(dir, d, 50, 500, goExtractor(t)) if err != nil { t.Fatalf("Analyze error: %v", err) }