diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8bdace3..2a1e9df 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,6 +21,20 @@ jobs: go-version: '1.26.1' cache: true + # Node is required by the TS mutation evals (npm test -> vitest / + # node). Minimum 22.6 so `--experimental-strip-types` is default. + - uses: actions/setup-node@v4 + with: + node-version: '22' + + - name: Cache npm + uses: actions/cache@v4 + with: + path: ~/.npm + key: npm-${{ runner.os }}-${{ hashFiles('**/package.json', '**/package-lock.json') }} + restore-keys: | + npm-${{ runner.os }}- + - name: Build run: go build ./... @@ -30,6 +44,11 @@ jobs: - name: Vet run: go vet ./... + - name: Eval — TypeScript (EVAL-3) + env: + CI: "true" + run: make eval-ts + diffguard: # Dogfooding: run diffguard's own quality gate against this repo. # Mutation testing runs at 20% sample rate here as a fast smoke diff --git a/Makefile b/Makefile index c5e4dcf..c5bbe4f 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,12 @@ BINARY := diffguard PKG := ./cmd/diffguard PATHS := internal/,cmd/ -.PHONY: all build install test coverage check check-mutation check-fast clean help +# Shared env for evaluation suites. CI=true nudges sub-commands (cargo, +# npm) into non-interactive modes; CARGO_INCREMENTAL=0 keeps the +# mutation runs deterministic and avoids a multi-GB incremental cache. +EVAL_ENV := CI=true CARGO_INCREMENTAL=0 + +.PHONY: all build install test coverage check check-mutation check-fast eval-ts clean help all: build @@ -28,6 +33,9 @@ check: build ## Run the full quality gate including 100% mutation testing (slow) check-mutation: build ## Only the mutation section, full codebase ./$(BINARY) --paths $(PATHS) --fail-on warn . +eval-ts: ## Run the TypeScript correctness eval (EVAL-3). Requires node+npm for mutation tests. + $(EVAL_ENV) go test ./internal/lang/tsanalyzer/... -run TestEval -count=1 -v + clean: ## Remove build artifacts rm -f $(BINARY) coverage.out diff --git a/README.md b/README.md index d4d9e4f..80c2716 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # diffguard -A targeted code quality gate for Go repositories. Analyzes either the changed regions of a git diff (CI mode) or specified files/directories (refactoring mode), and reports on complexity, size, dependency structure, churn risk, and mutation test coverage. +A targeted code quality gate for Go and TypeScript repositories. Analyzes either the changed regions of a git diff (CI mode) or specified files/directories (refactoring mode), and reports on complexity, size, dependency structure, churn risk, and mutation test coverage. ## Why @@ -42,6 +42,10 @@ diffguard --base main /path/to/repo diffguard --paths internal/foo/bar.go /path/to/repo diffguard --paths internal/foo/,internal/bar/ /path/to/repo +# TypeScript project (vitest/jest auto-detected from package.json) +diffguard --base main /path/to/ts-repo +diffguard --paths src/auth/,src/billing/ /path/to/ts-repo + # Skip mutation testing (fastest) diffguard --skip-mutation /path/to/repo @@ -70,6 +74,40 @@ diffguard \ **Generated-file skipping (`--skip-generated`):** Enabled by default. Files marked with a standard generated-code banner such as `Code generated ... DO NOT EDIT` are excluded before they reach any analyzer. Pass `--skip-generated=false` to include them. +## Languages + +Diffguard auto-detects supported languages from the files it sees. No flag selects the language — analyzers activate on their own file types. + +| Language | Files | Test runner | +|------------|----------------|--------------------------------------------------------------------------------| +| Go | `*.go` | `go test` | +| TypeScript | `*.ts`, `*.tsx`| Auto-detected from `package.json`: `npx vitest run` → `npx jest` → `npm test` | + +**TypeScript prerequisites.** `node` and `npm` (or `npx`) must be on `PATH` for mutation testing. The TypeScript analyzer only activates when the repo has a `package.json` AND at least one `.ts` / `.tsx` file, so pure-JS projects are left alone. Test files (`*.test.ts`, `*.spec.ts`, `*.test.tsx`, `*.spec.tsx`, or anything under a `__tests__` / `__mocks__` segment) are excluded from mutation. Mutation testing spawns the detected runner once per mutant, so expect TS runs to take longer than Go runs (node startup + TS compile per mutant) — use `--mutation-sample-rate` for fast PR feedback. + +### TypeScript example + +```bash +# Go install once +go install github.com/0xPolygon/diffguard/cmd/diffguard@latest + +# From your TypeScript repo, PR-style diff mode with a 20% mutation sample +cd /path/to/ts-repo +diffguard --mutation-sample-rate 20 --base origin/main . + +# Or scope to specific subdirectories in refactoring mode +diffguard --paths src/billing/,src/auth/ . +``` + +In GitHub Actions, add `actions/setup-node` and an `npm ci` step to the [Per-PR gate workflow](#github-actions) below so `npx vitest` / `npx jest` are available when diffguard spawns mutant runs. The extra steps, inserted after `actions/setup-go`: + +```yaml +- uses: actions/setup-node@v4 + with: + node-version: '20' +- run: npm ci # installs vitest / jest so diffguard can invoke them per mutant +``` + ## What It Measures ### Cognitive Complexity diff --git a/cmd/diffguard/main.go b/cmd/diffguard/main.go index a6b319c..7526957 100644 --- a/cmd/diffguard/main.go +++ b/cmd/diffguard/main.go @@ -16,6 +16,7 @@ import ( "github.com/0xPolygon/diffguard/internal/diff" "github.com/0xPolygon/diffguard/internal/lang" _ "github.com/0xPolygon/diffguard/internal/lang/goanalyzer" + _ "github.com/0xPolygon/diffguard/internal/lang/tsanalyzer" "github.com/0xPolygon/diffguard/internal/mutation" "github.com/0xPolygon/diffguard/internal/report" "github.com/0xPolygon/diffguard/internal/sizes" diff --git a/go.mod b/go.mod index ee2a376..bd17a28 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,5 @@ module github.com/0xPolygon/diffguard go 1.26.1 + +require github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..702e57b --- /dev/null +++ b/go.sum @@ -0,0 +1,10 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 h1:6C8qej6f1bStuePVkLSFxoU22XBS165D3klxlzRg8F4= +github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82/go.mod h1:xe4pgH49k4SsmkQq5OT8abwhWmnzkhpgnXeekbx2efw= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/lang/tsanalyzer/complexity.go b/internal/lang/tsanalyzer/complexity.go new file mode 100644 index 0000000..0f71d6b --- /dev/null +++ b/internal/lang/tsanalyzer/complexity.go @@ -0,0 +1,324 @@ +package tsanalyzer + +import ( + "sort" + + sitter "github.com/smacker/go-tree-sitter" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" +) + +// complexityImpl implements both lang.ComplexityCalculator and +// lang.ComplexityScorer for TypeScript via tree-sitter. Same reuse +// strategy as the Go and Rust analyzers: the per-file walk is fast enough +// that the churn analyzer shares the full algorithm instead of a lighter +// approximation. +type complexityImpl struct{} + +// AnalyzeFile returns per-function cognitive complexity for every function +// overlapping the diff's changed regions. +func (complexityImpl) AnalyzeFile(absPath string, fc diff.FileChange) ([]lang.FunctionComplexity, error) { + return scoreFile(absPath, fc) +} + +// ScoreFile is the ComplexityScorer entry point used by the churn +// analyzer. +func (complexityImpl) ScoreFile(absPath string, fc diff.FileChange) ([]lang.FunctionComplexity, error) { + return scoreFile(absPath, fc) +} + +func scoreFile(absPath string, fc diff.FileChange) ([]lang.FunctionComplexity, error) { + tree, src, err := parseFile(absPath) + if err != nil { + return nil, nil + } + defer tree.Close() + + fns := collectFunctions(tree.RootNode(), src) + + var results []lang.FunctionComplexity + for _, fn := range fns { + if !fc.OverlapsRange(fn.startLine, fn.endLine) { + continue + } + results = append(results, lang.FunctionComplexity{ + FunctionInfo: lang.FunctionInfo{ + File: fc.Path, + Line: fn.startLine, + EndLine: fn.endLine, + Name: fn.name, + }, + Complexity: cognitiveComplexity(fn.body, src), + }) + } + + sort.SliceStable(results, func(i, j int) bool { + if results[i].Line != results[j].Line { + return results[i].Line < results[j].Line + } + return results[i].Name < results[j].Name + }) + return results, nil +} + +// cognitiveComplexity computes the TypeScript cognitive-complexity score +// for the body of a function. Per the design doc: +// +// - Base +1 on: if_statement, for_statement, for_in_statement, +// for_of_statement, while_statement, switch_statement, try_statement, +// ternary_expression. +// - +1 per catch_clause. +// - +1 per else branch. +// - +1 per case clause with content (empty fall-through cases don't count). +// - +1 per `.catch(` promise-chain call (string-match on the method name). +// - +1 per operator-sequence switch inside &&/|| chains. +// - Do NOT count: optional chaining `?.`, nullish coalescing `??`, +// `await` alone, `async` keyword, stream method calls. +// - Nesting penalty: +1 per nesting level when descending into bodies of +// scope-introducing constructs. +// +// A nil body (abstract method, overload signature) has complexity 0. +func cognitiveComplexity(body *sitter.Node, src []byte) int { + if body == nil { + return 0 + } + return walkComplexity(body, src, 0) +} + +// walkComplexity is the recursive heart of the algorithm. `nesting` is the +// depth penalty applied when an increment fires. +func walkComplexity(n *sitter.Node, src []byte, nesting int) int { + if n == nil { + return 0 + } + if isFunctionLikeNode(n.Type()) { + // A nested function has its own complexity tracked separately (as a + // distinct entry from collectFunctions). Don't add the inner + // complexity to the outer function. + return 0 + } + switch n.Type() { + case "if_statement": + return ifComplexity(n, src, nesting) + case "for_statement", "for_in_statement", "for_of_statement", "while_statement": + return loopComplexity(n, src, nesting) + case "switch_statement": + return 1 + nesting + countNonEmptyCases(n) + walkChildrenWithNesting(n, src, nesting) + case "try_statement", "ternary_expression": + return 1 + nesting + walkChildrenWithNesting(n, src, nesting) + case "catch_clause": + return 1 + walkChildrenWithNesting(n, src, nesting) + case "call_expression": + return callComplexity(n, src, nesting) + } + return walkAllChildren(n, src, nesting) +} + +// ifComplexity scores an if_statement, charging +1+nesting for the branch, +// +1 per logical operator switch in the condition, and +1 if an `else` +// alternative is attached. +func ifComplexity(n *sitter.Node, src []byte, nesting int) int { + total := 1 + nesting + conditionLogicalOps(n.ChildByFieldName("condition")) + if n.ChildByFieldName("alternative") != nil { + total += 1 + } + return total + walkChildrenWithNesting(n, src, nesting) +} + +// loopComplexity scores any for_*/while_ loop: +1+nesting for the loop plus +// logical-operator switches inside its condition. +func loopComplexity(n *sitter.Node, src []byte, nesting int) int { + total := 1 + nesting + conditionLogicalOps(n.ChildByFieldName("condition")) + return total + walkChildrenWithNesting(n, src, nesting) +} + +// callComplexity charges +1 for promise-chain `.catch(` calls and keeps +// descending into the call's arguments. +func callComplexity(n *sitter.Node, src []byte, nesting int) int { + total := 0 + if isDotCatchCall(n, src) { + total += 1 + } + return total + walkAllChildren(n, src, nesting) +} + +// walkAllChildren descends into every child at the current nesting level. +// Used for pass-through node types that don't introduce a nesting penalty. +func walkAllChildren(n *sitter.Node, src []byte, nesting int) int { + total := 0 + for i := 0; i < int(n.ChildCount()); i++ { + total += walkComplexity(n.Child(i), src, nesting) + } + return total +} + +// isFunctionLikeNode reports whether a tree-sitter node type declares a +// fresh function scope whose body should be scored independently. +func isFunctionLikeNode(kind string) bool { + switch kind { + case "arrow_function", "function_expression", "function_declaration", + "method_definition", "generator_function", "generator_function_declaration": + return true + } + return false +} + +// walkChildrenWithNesting recurses into the sub-trees that belong to the +// construct at `n`, bumping nesting only for body-like children. This +// mirrors the Rust analyzer's behavior. +func walkChildrenWithNesting(n *sitter.Node, src []byte, nesting int) int { + total := 0 + for i := 0; i < int(n.ChildCount()); i++ { + c := n.Child(i) + if c == nil { + continue + } + fieldName := n.FieldNameForChild(i) + switch fieldName { + case "condition", "value", "left", "right": + // Conditions / operands stay at the current nesting. Their + // sub-expressions (nested ternaries, logical chains) are + // counted via their own node-type cases. + total += walkComplexity(c, src, nesting) + case "body", "consequence", "alternative": + total += walkComplexity(c, src, nesting+1) + default: + total += walkComplexity(c, src, nesting) + } + } + return total +} + +// countNonEmptyCases walks a switch_statement's body and returns the +// number of case clauses that contain at least one statement. Empty +// fall-through cases (`case 1:` with no body that falls into the next +// arm) don't count, matching the design doc. +// +// In tree-sitter-typescript, a switch body has `switch_case` children with +// a value field and a body field (the body field's NamedChildCount tells +// us whether the case has content). `default` clauses are modeled as +// `switch_default`. +func countNonEmptyCases(switchNode *sitter.Node) int { + body := switchNode.ChildByFieldName("body") + if body == nil { + return 0 + } + count := 0 + for i := 0; i < int(body.NamedChildCount()); i++ { + c := body.NamedChild(i) + if c == nil { + continue + } + if c.Type() != "switch_case" && c.Type() != "switch_default" { + continue + } + if caseHasContent(c) { + count++ + } + } + return count +} + +// caseHasContent returns true when a switch_case/switch_default has at +// least one statement-like named child beyond the value expression. +func caseHasContent(c *sitter.Node) bool { + for i := 0; i < int(c.NamedChildCount()); i++ { + child := c.NamedChild(i) + if child == nil { + continue + } + // Skip the case's value expression — grammars expose it as the + // first named child for switch_case. We count anything else. + fname := c.FieldNameForChild(i) + if fname == "value" { + continue + } + return true + } + return false +} + +// isDotCatchCall reports whether a call_expression is `something.catch(...)` +// — i.e. a promise-chain `.catch` invocation. We match on the member +// expression's property name being literally `catch`. The spec calls out +// string-matching on the identifier explicitly to avoid CST depth tuning. +func isDotCatchCall(call *sitter.Node, src []byte) bool { + fn := call.ChildByFieldName("function") + if fn == nil || fn.Type() != "member_expression" { + return false + } + prop := fn.ChildByFieldName("property") + if prop == nil { + return false + } + return nodeText(prop, src) == "catch" +} + +// conditionLogicalOps returns the operator-switch count for the chain of +// `&&` / `||` operators inside a condition. Matches the Rust algorithm: a +// run of the same operator counts as 1, each switch to the other adds 1. +func conditionLogicalOps(cond *sitter.Node) int { + if cond == nil { + return 0 + } + ops := flattenLogicalOps(cond) + if len(ops) == 0 { + return 0 + } + count := 1 + for i := 1; i < len(ops); i++ { + if ops[i] != ops[i-1] { + count++ + } + } + return count +} + +// flattenLogicalOps collects the `&&` / `||` sequence from a +// binary_expression tree, left-to-right. Non-logical binary ops stop the +// recursion. +// +// Tree-sitter TypeScript wraps `if (cond)` conditions in a +// `parenthesized_expression` (`( binary_expression )`). We strip that +// wrapper when we first see it so a condition chain like +// `if (a && b || c)` is traversed as the inner binary tree. +// +// Tree-sitter TypeScript models `a && b` as +// +// (binary_expression left: ... operator: "&&" right: ...) +// +// — the operator is an anonymous child whose Type() is the operator token. +func flattenLogicalOps(n *sitter.Node) []string { + n = unwrapParens(n) + if n == nil || n.Type() != "binary_expression" { + return nil + } + op := n.ChildByFieldName("operator") + if op == nil { + return nil + } + opText := op.Type() + if opText != "&&" && opText != "||" { + return nil + } + var out []string + out = append(out, flattenLogicalOps(n.ChildByFieldName("left"))...) + out = append(out, opText) + out = append(out, flattenLogicalOps(n.ChildByFieldName("right"))...) + return out +} + +// unwrapParens strips a leading parenthesized_expression wrapper so +// condition handling doesn't have to special-case the if/while grammar +// shape. Returns n unchanged when no wrapping is present. +func unwrapParens(n *sitter.Node) *sitter.Node { + for n != nil && n.Type() == "parenthesized_expression" { + // The inner expression is the first (and only) named child. + if n.NamedChildCount() == 0 { + return n + } + n = n.NamedChild(0) + } + return n +} diff --git a/internal/lang/tsanalyzer/complexity_test.go b/internal/lang/tsanalyzer/complexity_test.go new file mode 100644 index 0000000..52a1e76 --- /dev/null +++ b/internal/lang/tsanalyzer/complexity_test.go @@ -0,0 +1,143 @@ +package tsanalyzer + +import ( + "path/filepath" + "testing" + + sitter "github.com/smacker/go-tree-sitter" +) + +// TestCognitiveComplexity_ByFixture asserts per-function scores on +// testdata/complexity.ts. The fixture documents each function's expected +// score inline; this test locks them in. +func TestCognitiveComplexity_ByFixture(t *testing.T) { + absPath, _ := filepath.Abs("testdata/complexity.ts") + scores, err := complexityImpl{}.AnalyzeFile(absPath, fullRegion("testdata/complexity.ts")) + if err != nil { + t.Fatal(err) + } + byName := map[string]int{} + for _, s := range scores { + byName[s.Name] = s.Complexity + } + + cases := []struct { + name string + want int + }{ + {"empty", 0}, + {"oneIf", 1}, + {"ifElse", 2}, + {"sw", 5}, + {"tryCatch", 2}, + {"ternary", 3}, + {"logical", 3}, + {"notCounted", 1}, // `?.`, `??`, `await`, `async` don't count + {"promiseCatch", 1}, + } + for _, tc := range cases { + got, ok := byName[tc.name] + if !ok { + t.Errorf("missing score for %q (have %v)", tc.name, byName) + continue + } + if got != tc.want { + t.Errorf("complexity(%s) = %d, want %d", tc.name, got, tc.want) + } + } +} + +// TestComplexityScorer_ReusesCalculator asserts the Scorer returns the +// same values as the Calculator — matches the design note's reuse policy. +func TestComplexityScorer_ReusesCalculator(t *testing.T) { + absPath, _ := filepath.Abs("testdata/complexity.ts") + calc, err := complexityImpl{}.AnalyzeFile(absPath, fullRegion("testdata/complexity.ts")) + if err != nil { + t.Fatal(err) + } + score, err := complexityImpl{}.ScoreFile(absPath, fullRegion("testdata/complexity.ts")) + if err != nil { + t.Fatal(err) + } + if len(calc) != len(score) { + t.Fatalf("counts differ: calc=%d score=%d", len(calc), len(score)) + } + for i := range calc { + if calc[i].Name != score[i].Name || calc[i].Complexity != score[i].Complexity { + t.Errorf("row %d differs: calc=%+v score=%+v", i, calc[i], score[i]) + } + } +} + +// TestLogicalOpChain directly asserts the operator-switch counter. +func TestLogicalOpChain(t *testing.T) { + cases := []struct { + src string + want int + }{ + {"const f = (a: boolean, b: boolean) => a && b", 1}, + {"const f = (a: boolean, b: boolean, c: boolean) => a && b && c", 1}, + {"const f = (a: boolean, b: boolean, c: boolean) => a && b || c", 2}, + {"const f = (a: boolean, b: boolean, c: boolean, d: boolean) => a || b && c || d", 3}, + {"const f = (a: number) => a === 1", 0}, + } + for _, tc := range cases { + tree, err := parseBytes([]byte(tc.src)) + if err != nil { + t.Fatalf("parseBytes(%q): %v", tc.src, err) + } + target := findFirstLogical(tree.RootNode()) + got := conditionLogicalOps(target) + if got != tc.want { + t.Errorf("conditionLogicalOps(%q) = %d, want %d", tc.src, got, tc.want) + } + tree.Close() + } +} + +func findFirstLogical(root *sitter.Node) *sitter.Node { + var hit *sitter.Node + walk(root, func(n *sitter.Node) bool { + if hit != nil { + return false + } + if n.Type() != "binary_expression" { + return true + } + op := n.ChildByFieldName("operator") + if op == nil { + return true + } + if op.Type() == "&&" || op.Type() == "||" { + hit = n + return false + } + return true + }) + return hit +} + +// TestComplexity_OptionalChainingNotCounted is a regression guard that +// optional chaining `?.` and nullish coalescing `??` do NOT increment the +// score. A function containing only these constructs must score 0. +func TestComplexity_OptionalChainingNotCounted(t *testing.T) { + src := `function f(x: { a?: { b?: number } } | null): number { + const v = x?.a?.b ?? 0; + return v; +} +` + dir := t.TempDir() + path := filepath.Join(dir, "a.ts") + if err := writeFile(path, []byte(src)); err != nil { + t.Fatal(err) + } + scores, err := complexityImpl{}.AnalyzeFile(path, fullRegion("a.ts")) + if err != nil { + t.Fatal(err) + } + for _, s := range scores { + if s.Name == "f" && s.Complexity != 0 { + t.Errorf("optional chain + nullish should score 0, got %d", s.Complexity) + } + } +} diff --git a/internal/lang/tsanalyzer/deps.go b/internal/lang/tsanalyzer/deps.go new file mode 100644 index 0000000..f18ce8b --- /dev/null +++ b/internal/lang/tsanalyzer/deps.go @@ -0,0 +1,210 @@ +package tsanalyzer + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + + sitter "github.com/smacker/go-tree-sitter" +) + +// depsImpl implements lang.ImportResolver for TypeScript via tree-sitter. +// package.json gives us the project name; import and require() statements +// in source files provide the internal dependency edges. +// +// The returned graph uses directory-level node keys (paths relative to the +// repo root) so it matches the Go and Rust analyzers' shape: every edge +// says "this package directory depends on that package directory". +type depsImpl struct{} + +// DetectModulePath returns the `name` from package.json. Missing / unnamed +// package.json returns an error — same contract as the Rust analyzer's +// Cargo.toml handler. +func (depsImpl) DetectModulePath(repoPath string) (string, error) { + manifestPath := filepath.Join(repoPath, "package.json") + content, err := os.ReadFile(manifestPath) + if err != nil { + return "", fmt.Errorf("reading package.json: %w", err) + } + var pkg struct { + Name string `json:"name"` + } + if err := json.Unmarshal(content, &pkg); err != nil { + return "", fmt.Errorf("parsing package.json: %w", err) + } + if pkg.Name == "" { + return "", fmt.Errorf("no name field in package.json") + } + return pkg.Name, nil +} + +// ScanPackageImports returns a single-entry adjacency map: +// +// { : { : true, : true, ... } } +// +// where keys are directories relative to repoPath. An import specifier is +// "internal" when it begins with `.` (relative import) or a registered +// project alias (`@/`, `~/`). External packages (bare specifiers) are +// filtered out. +func (depsImpl) ScanPackageImports(repoPath, pkgDir, _ string) map[string]map[string]bool { + absDir := filepath.Join(repoPath, pkgDir) + entries, err := os.ReadDir(absDir) + if err != nil { + return nil + } + + deps := map[string]bool{} + for _, e := range entries { + if e.IsDir() { + continue + } + name := e.Name() + if !strings.HasSuffix(name, ".ts") && !strings.HasSuffix(name, ".tsx") { + continue + } + absFile := filepath.Join(absDir, name) + if isTSTestFile(absFile) { + continue + } + collectImports(absFile, repoPath, pkgDir, deps) + } + if len(deps) == 0 { + return nil + } + return map[string]map[string]bool{pkgDir: deps} +} + +// collectImports parses one .ts/.tsx file and adds each internal import / +// require to `deps`. Parse errors are silently ignored to match the other +// analyzers' "skip broken files" behavior. +func collectImports(absFile, repoPath, pkgDir string, deps map[string]bool) { + tree, src, err := parseFile(absFile) + if err != nil { + return + } + defer tree.Close() + + walk(tree.RootNode(), func(n *sitter.Node) bool { + switch n.Type() { + case "import_statement": + addImportEdge(n, src, repoPath, pkgDir, deps) + case "call_expression": + // require('./foo') style. + addRequireEdge(n, src, repoPath, pkgDir, deps) + } + return true + }) +} + +// addImportEdge reads the source specifier of an import_statement and, if +// it resolves to an internal module, records an edge. +func addImportEdge(n *sitter.Node, src []byte, repoPath, pkgDir string, deps map[string]bool) { + // The `source` field is a string node. + source := n.ChildByFieldName("source") + if source == nil { + return + } + spec := unquote(nodeText(source, src)) + target := resolveInternal(spec, repoPath, pkgDir) + if target == "" { + return + } + deps[target] = true +} + +// addRequireEdge matches `require('...')` calls and records an edge when +// the specifier is internal. +func addRequireEdge(n *sitter.Node, src []byte, repoPath, pkgDir string, deps map[string]bool) { + fn := n.ChildByFieldName("function") + if fn == nil { + return + } + if nodeText(fn, src) != "require" { + return + } + args := n.ChildByFieldName("arguments") + if args == nil || args.NamedChildCount() == 0 { + return + } + arg := args.NamedChild(0) + if arg == nil { + return + } + if arg.Type() != "string" { + return + } + spec := unquote(nodeText(arg, src)) + target := resolveInternal(spec, repoPath, pkgDir) + if target == "" { + return + } + deps[target] = true +} + +// unquote strips a single pair of surrounding single, double, or backtick +// quotes from a string literal's source text. Nothing fancier — TypeScript +// string literals in import specifiers are always simple. +func unquote(s string) string { + if len(s) < 2 { + return s + } + first, last := s[0], s[len(s)-1] + if first != last { + return s + } + if first == '"' || first == '\'' || first == '`' { + return s[1 : len(s)-1] + } + return s +} + +// resolveInternal maps an import specifier to a repo-relative directory or +// returns "" if the import is external. +// +// ./foo -> pkgDir/foo +// ../shared/util -> /shared/util +// @/components/Card -> @/components/Card (final segment kept; only /index folds) +// @/components/index -> @/components (index fold) +// ~/lib/util -> lib/util +// lodash -> "" (external) +// +// Both `@/` and `~/` aliases treat the remaining path identically: the +// final segment is kept as-is unless it is literally "index", in which +// case it is folded to the parent directory. `@/` retains its prefix so +// alias edges are visibly tagged in the graph; `~/` strips the prefix +// because `~` conventionally points at the project root. +func resolveInternal(spec, repoPath, pkgDir string) string { + if spec == "" { + return "" + } + switch { + case strings.HasPrefix(spec, "./") || strings.HasPrefix(spec, "../") || spec == "." || spec == "..": + return resolveRelative(spec, pkgDir) + case strings.HasPrefix(spec, "@/"): + rest := filepath.ToSlash(spec[2:]) + return "@/" + foldIndex(rest) + case strings.HasPrefix(spec, "~/"): + return foldIndex(filepath.ToSlash(spec[2:])) + } + return "" +} + +// foldIndex collapses a trailing `/index` on a cleaned path to its +// parent directory so the graph uses one node for both `./foo` and +// `./foo/index`. +func foldIndex(p string) string { + if base := filepath.Base(p); base == "index" { + return filepath.ToSlash(filepath.Dir(p)) + } + return p +} + +// resolveRelative resolves a relative specifier against the importing +// file's package directory. We fold `/index` on the cleaned path because +// `./foo` and `./foo/index` point at the same module. +func resolveRelative(spec, pkgDir string) string { + combined := filepath.Join(pkgDir, spec) + return foldIndex(filepath.ToSlash(filepath.Clean(combined))) +} diff --git a/internal/lang/tsanalyzer/deps_test.go b/internal/lang/tsanalyzer/deps_test.go new file mode 100644 index 0000000..5352855 --- /dev/null +++ b/internal/lang/tsanalyzer/deps_test.go @@ -0,0 +1,187 @@ +package tsanalyzer + +import ( + "os" + "path/filepath" + "testing" +) + +func TestDetectModulePath(t *testing.T) { + dir := t.TempDir() + if err := os.WriteFile(filepath.Join(dir, "package.json"), []byte(` +{ + "name": "my-package", + "version": "1.0.0" +} +`), 0644); err != nil { + t.Fatal(err) + } + got, err := depsImpl{}.DetectModulePath(dir) + if err != nil { + t.Fatal(err) + } + if got != "my-package" { + t.Errorf("DetectModulePath = %q, want my-package", got) + } +} + +func TestDetectModulePath_Missing(t *testing.T) { + dir := t.TempDir() + _, err := depsImpl{}.DetectModulePath(dir) + if err == nil { + t.Error("expected error for missing package.json") + } +} + +func TestDetectModulePath_NoName(t *testing.T) { + dir := t.TempDir() + if err := os.WriteFile(filepath.Join(dir, "package.json"), []byte(`{"version":"1.0.0"}`), 0644); err != nil { + t.Fatal(err) + } + _, err := depsImpl{}.DetectModulePath(dir) + if err == nil { + t.Error("expected error for package.json without name") + } +} + +// TestScanPackageImports_InternalVsExternal asserts that relative imports +// and project-alias imports produce internal edges while bare specifiers +// (external packages) are filtered out. +func TestScanPackageImports_InternalVsExternal(t *testing.T) { + root := t.TempDir() + + must := func(p, content string) { + full := filepath.Join(root, p) + if err := os.MkdirAll(filepath.Dir(full), 0755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(full, []byte(content), 0644); err != nil { + t.Fatal(err) + } + } + + must("package.json", `{"name":"demo"}`) + must("src/index.ts", ` +import { foo } from './foo'; +import { bar } from '../util/bar'; +import * as _React from 'react'; +import { Card } from '@/components/Card'; +import { util } from '~/lib/util'; +`) + must("src/foo.ts", `export const foo = 1;`) + must("util/bar.ts", `export const bar = 2;`) + + edges := depsImpl{}.ScanPackageImports(root, "src", "demo") + if edges == nil { + t.Fatal("expected non-nil edges for src") + } + srcEdges := edges["src"] + if srcEdges == nil { + t.Fatalf("expected edges keyed by 'src', got %v", edges) + } + + // Relative imports resolve against src/, so './foo' -> src/foo. + if !srcEdges["src/foo"] { + t.Errorf("missing internal edge src/foo in %v", srcEdges) + } + // '../util/bar' resolves against parent of src -> util/bar. + if !srcEdges["util/bar"] { + t.Errorf("missing internal edge util/bar in %v", srcEdges) + } + // Project aliases: @/components/Card -> @/components/Card (final segment + // kept; only /index folds), ~/lib/util -> lib/util. + if !srcEdges["@/components/Card"] { + t.Errorf("missing alias edge @/components/Card in %v", srcEdges) + } + if !srcEdges["lib/util"] { + t.Errorf("missing alias edge lib/util in %v", srcEdges) + } + + // External imports must not leak edges. 'react' is bare. + for k := range srcEdges { + if k == "react" { + t.Errorf("external react edge leaked: %q", k) + } + } +} + +// TestScanPackageImports_Require exercises the CommonJS path. +func TestScanPackageImports_Require(t *testing.T) { + root := t.TempDir() + + must := func(p, content string) { + full := filepath.Join(root, p) + if err := os.MkdirAll(filepath.Dir(full), 0755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(full, []byte(content), 0644); err != nil { + t.Fatal(err) + } + } + + must("package.json", `{"name":"demo"}`) + must("src/index.ts", ` +const foo = require('./foo'); +const lodash = require('lodash'); +`) + must("src/foo.ts", `module.exports = {};`) + + edges := depsImpl{}.ScanPackageImports(root, "src", "demo") + srcEdges := edges["src"] + if !srcEdges["src/foo"] { + t.Errorf("missing require internal edge src/foo, got %v", srcEdges) + } + if srcEdges["lodash"] { + t.Errorf("external require leaked: %v", srcEdges) + } +} + +// TestResolveInternal exercises the resolver directly — handy for pinning +// the alias / relative path rules. +func TestResolveInternal(t *testing.T) { + cases := []struct { + spec string + pkgDir string + want string + }{ + {"./foo", "src", "src/foo"}, + {"./foo/bar", "src", "src/foo/bar"}, + {"../util/x", "src/lib", "src/util/x"}, + // @/ aliases: final segment is kept; only /index folds. + {"@/components/Card", "src", "@/components/Card"}, + {"@/components/index", "src", "@/components"}, + {"@/components", "src", "@/components"}, + // ~/ aliases: same folding semantics, no prefix retained. + {"~/lib/util", "src", "lib/util"}, + {"lodash", "src", ""}, + {"react-dom", "src", ""}, + // index fold: `./dir/index` collapses to `./dir` + {"./comp/index", "src", "src/comp"}, + } + for _, tc := range cases { + got := resolveInternal(tc.spec, "", tc.pkgDir) + if got != tc.want { + t.Errorf("resolveInternal(%q in %q) = %q, want %q", tc.spec, tc.pkgDir, got, tc.want) + } + } +} + +// TestUnquote exercises the tiny literal-stripping helper so regressions +// (e.g. template string handling) are caught. +func TestUnquote(t *testing.T) { + cases := []struct { + in, want string + }{ + {`'foo'`, "foo"}, + {`"bar"`, "bar"}, + {"`baz`", "baz"}, + {"foo", "foo"}, // unquoted passthrough + {`'foo`, `'foo`}, + } + for _, tc := range cases { + got := unquote(tc.in) + if got != tc.want { + t.Errorf("unquote(%q) = %q, want %q", tc.in, got, tc.want) + } + } +} diff --git a/internal/lang/tsanalyzer/eval_test.go b/internal/lang/tsanalyzer/eval_test.go new file mode 100644 index 0000000..c3bdeb6 --- /dev/null +++ b/internal/lang/tsanalyzer/eval_test.go @@ -0,0 +1,137 @@ +package tsanalyzer_test + +import ( + "os/exec" + "path/filepath" + "testing" + + "github.com/0xPolygon/diffguard/internal/lang/evalharness" +) + +// EVAL-3 — TypeScript correctness evaluation suite. +// +// Each test below drives the built diffguard binary against a fixture +// under evaldata// and compares the emitted report to +// expected.json. Semantic matching (section + severity + finding +// file/function) keeps the tests robust against line-number drift. +// +// Mutation tests run the fixture's `npm test` which shells out to +// `node --experimental-strip-types` — that flag is enabled by default +// in Node 22.6+, so we require a minimum Node on PATH. When `node` is +// missing the test skips cleanly. +// +// Follow-up TODOs (for the verifier agent to pick up): +// +// - EVAL-3 sizes (file): add a >500-LOC fixture + negative control. +// - EVAL-3 deps (internal vs external): assert directly on the graph +// shape (lodash excluded, ./foo included) rather than just pass/fail. +// - EVAL-3 churn: needs seeded git history. +// - EVAL-3 mutation (annotation respect): exercise +// `// mutator-disable-next-line` end-to-end; currently covered at +// unit level in mutation_annotate_test.go. + +var binBuilder evalharness.BinaryBuilder + +func fixtureDir(t *testing.T, name string) string { + t.Helper() + wd, err := filepath.Abs(filepath.Join("evaldata", name)) + if err != nil { + t.Fatal(err) + } + return wd +} + +func runEvalFixture(t *testing.T, name string, extraFlags []string) { + t.Helper() + + binary := binBuilder.GetBinary(t, evalharness.RepoRoot(t)) + repo := evalharness.CopyFixture(t, fixtureDir(t, name)) + + flags := append([]string{ + "--paths", ".", + "--language", "typescript", + }, extraFlags...) + + rpt := evalharness.RunBinary(t, binary, repo, flags) + exp, ok := evalharness.LoadExpectation(t, fixtureDir(t, name)) + if !ok { + t.Fatalf("fixture %s has no expected.json", name) + } + evalharness.AssertMatches(t, rpt, exp) +} + +func TestEval_Complexity_Positive(t *testing.T) { + runEvalFixture(t, "complexity_positive", []string{"--skip-mutation"}) +} + +func TestEval_Complexity_Negative(t *testing.T) { + runEvalFixture(t, "complexity_negative", []string{"--skip-mutation"}) +} + +func TestEval_Sizes_Function_Positive(t *testing.T) { + runEvalFixture(t, "sizes_positive", []string{"--skip-mutation"}) +} + +func TestEval_Sizes_Function_Negative(t *testing.T) { + runEvalFixture(t, "sizes_negative", []string{"--skip-mutation"}) +} + +func TestEval_Deps_Cycle_Positive(t *testing.T) { + runEvalFixture(t, "deps_cycle_positive", []string{"--skip-mutation"}) +} + +func TestEval_Deps_Cycle_Negative(t *testing.T) { + runEvalFixture(t, "deps_cycle_negative", []string{"--skip-mutation"}) +} + +func TestEval_Mutation_Kill_Positive(t *testing.T) { + requireNode(t) + if testing.Short() { + t.Skip("skipping mutation eval in -short mode") + } + runEvalFixture(t, "mutation_kill_positive", mutationFlags()) +} + +func TestEval_Mutation_Kill_Negative(t *testing.T) { + requireNode(t) + if testing.Short() { + t.Skip("skipping mutation eval in -short mode") + } + runEvalFixture(t, "mutation_kill_negative", mutationFlags()) +} + +func TestEval_Mutation_TSOp_Positive(t *testing.T) { + requireNode(t) + if testing.Short() { + t.Skip("skipping mutation eval in -short mode") + } + runEvalFixture(t, "mutation_tsop_positive", mutationFlags()) +} + +func TestEval_Mutation_TSOp_Negative(t *testing.T) { + requireNode(t) + if testing.Short() { + t.Skip("skipping mutation eval in -short mode") + } + runEvalFixture(t, "mutation_tsop_negative", mutationFlags()) +} + +// requireNode skips the test when `node` or `npm` isn't on $PATH. The +// fixture's package.json uses `npm test` which in turn runs node; both +// must be present. +func requireNode(t *testing.T) { + t.Helper() + for _, cmd := range []string{"node", "npm"} { + if _, err := exec.LookPath(cmd); err != nil { + t.Skipf("%s not on PATH; skipping mutation eval", cmd) + } + } +} + +func mutationFlags() []string { + return []string{ + "--mutation-sample-rate", "100", + "--mutation-workers", "2", + "--test-timeout", "60s", + } +} diff --git a/internal/lang/tsanalyzer/evaldata/complexity_negative/README.md b/internal/lang/tsanalyzer/evaldata/complexity_negative/README.md new file mode 100644 index 0000000..84b0f33 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/complexity_negative/README.md @@ -0,0 +1,7 @@ +# complexity_negative + +Negative control for complexity_positive: same overall work split into +small helpers (`sign`, `doubled`, `classify`). Each function stays well +below the cognitive threshold. + +Expected verdict: Cognitive Complexity PASS, zero findings. diff --git a/internal/lang/tsanalyzer/evaldata/complexity_negative/clean.ts b/internal/lang/tsanalyzer/evaldata/complexity_negative/clean.ts new file mode 100644 index 0000000..5801843 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/complexity_negative/clean.ts @@ -0,0 +1,16 @@ +// Negative control: same overall work split into helpers. Each stays +// well under the cognitive threshold. + +export function sign(n: number): number { + if (n > 0) return 1; + if (n < 0) return -1; + return 0; +} + +export function doubled(n: number | null): number { + return (n ?? 0) * 2; +} + +export function classify(a: number): string { + return a === 1 ? "one" : "other"; +} diff --git a/internal/lang/tsanalyzer/evaldata/complexity_negative/expected.json b/internal/lang/tsanalyzer/evaldata/complexity_negative/expected.json new file mode 100644 index 0000000..9a638c5 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/complexity_negative/expected.json @@ -0,0 +1,10 @@ +{ + "worst_severity": "PASS", + "sections": [ + { + "name": "Cognitive Complexity", + "severity": "PASS", + "must_not_have_findings": true + } + ] +} diff --git a/internal/lang/tsanalyzer/evaldata/complexity_negative/package.json b/internal/lang/tsanalyzer/evaldata/complexity_negative/package.json new file mode 100644 index 0000000..affc982 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/complexity_negative/package.json @@ -0,0 +1,5 @@ +{ + "name": "complexity-negative", + "version": "0.1.0", + "private": true +} diff --git a/internal/lang/tsanalyzer/evaldata/complexity_positive/README.md b/internal/lang/tsanalyzer/evaldata/complexity_positive/README.md new file mode 100644 index 0000000..66e52c6 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/complexity_positive/README.md @@ -0,0 +1,7 @@ +# complexity_positive + +Seeded issue: `tangled` mixes nested ternaries, `try/catch`, a `switch` +with multiple cases, and long `&&`/`||` chains — cognitive complexity +well above 10. + +Expected verdict: Cognitive Complexity FAIL with a finding on `tangled`. diff --git a/internal/lang/tsanalyzer/evaldata/complexity_positive/expected.json b/internal/lang/tsanalyzer/evaldata/complexity_positive/expected.json new file mode 100644 index 0000000..cd7bb4c --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/complexity_positive/expected.json @@ -0,0 +1,12 @@ +{ + "worst_severity": "FAIL", + "sections": [ + { + "name": "Cognitive Complexity", + "severity": "FAIL", + "must_have_findings": [ + {"file": "tangled.ts", "function": "tangled", "severity": "FAIL"} + ] + } + ] +} diff --git a/internal/lang/tsanalyzer/evaldata/complexity_positive/package.json b/internal/lang/tsanalyzer/evaldata/complexity_positive/package.json new file mode 100644 index 0000000..9980a13 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/complexity_positive/package.json @@ -0,0 +1,5 @@ +{ + "name": "complexity-positive", + "version": "0.1.0", + "private": true +} diff --git a/internal/lang/tsanalyzer/evaldata/complexity_positive/tangled.ts b/internal/lang/tsanalyzer/evaldata/complexity_positive/tangled.ts new file mode 100644 index 0000000..1c5b4b0 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/complexity_positive/tangled.ts @@ -0,0 +1,36 @@ +// Seeded: nested ternaries + try/catch + long logical chains drive +// cognitive complexity above the default 10 threshold. + +export function tangled(a: number | null, b: number | null, flag: boolean): number { + let total = 0; + try { + if (a !== null && b !== null) { + if (a > 0 && (b > 0 || flag)) { + for (let i = 0; i < a; i++) { + if (i % 2 === 0 && flag) { + total += i > 10 ? i * 2 : i; + } else if (i % 3 === 0 || b < 0) { + total -= b > 5 ? b : 1; + } + } + } else { + switch (a) { + case 1: + total = 1; + break; + case 2: + total = 2; + break; + case 3: + total = 3; + break; + default: + total = -1; + } + } + } + } catch (e) { + total = -1; + } + return total; +} diff --git a/internal/lang/tsanalyzer/evaldata/deps_cycle_negative/README.md b/internal/lang/tsanalyzer/evaldata/deps_cycle_negative/README.md new file mode 100644 index 0000000..65af003 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/deps_cycle_negative/README.md @@ -0,0 +1,6 @@ +# deps_cycle_negative + +Negative control: same `a` and `b` packages but both import a shared +`types` module instead of each other, breaking the cycle. + +Expected verdict: Dependency Structure PASS. diff --git a/internal/lang/tsanalyzer/evaldata/deps_cycle_negative/expected.json b/internal/lang/tsanalyzer/evaldata/deps_cycle_negative/expected.json new file mode 100644 index 0000000..75b2069 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/deps_cycle_negative/expected.json @@ -0,0 +1,9 @@ +{ + "worst_severity": "PASS", + "sections": [ + { + "name": "Dependency Structure", + "severity": "PASS" + } + ] +} diff --git a/internal/lang/tsanalyzer/evaldata/deps_cycle_negative/package.json b/internal/lang/tsanalyzer/evaldata/deps_cycle_negative/package.json new file mode 100644 index 0000000..562466d --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/deps_cycle_negative/package.json @@ -0,0 +1,5 @@ +{ + "name": "deps-cycle-negative", + "version": "0.1.0", + "private": true +} diff --git a/internal/lang/tsanalyzer/evaldata/deps_cycle_negative/src/a/index.ts b/internal/lang/tsanalyzer/evaldata/deps_cycle_negative/src/a/index.ts new file mode 100644 index 0000000..160f38d --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/deps_cycle_negative/src/a/index.ts @@ -0,0 +1,5 @@ +import { Shared } from "../types"; + +export function aFn(x: number): Shared { + return { value: x + 1 }; +} diff --git a/internal/lang/tsanalyzer/evaldata/deps_cycle_negative/src/b/index.ts b/internal/lang/tsanalyzer/evaldata/deps_cycle_negative/src/b/index.ts new file mode 100644 index 0000000..d337a7f --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/deps_cycle_negative/src/b/index.ts @@ -0,0 +1,5 @@ +import { Shared } from "../types"; + +export function bFn(x: number): Shared { + return { value: x + 2 }; +} diff --git a/internal/lang/tsanalyzer/evaldata/deps_cycle_negative/src/types/index.ts b/internal/lang/tsanalyzer/evaldata/deps_cycle_negative/src/types/index.ts new file mode 100644 index 0000000..ba31fea --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/deps_cycle_negative/src/types/index.ts @@ -0,0 +1,3 @@ +export interface Shared { + value: number; +} diff --git a/internal/lang/tsanalyzer/evaldata/deps_cycle_positive/README.md b/internal/lang/tsanalyzer/evaldata/deps_cycle_positive/README.md new file mode 100644 index 0000000..5bf893a --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/deps_cycle_positive/README.md @@ -0,0 +1,7 @@ +# deps_cycle_positive + +Seeded issue: `src/a/index.ts` imports from `../b` and `src/b/index.ts` +imports from `../a`, producing a 2-cycle in the internal dependency +graph. + +Expected verdict: Dependency Structure FAIL. diff --git a/internal/lang/tsanalyzer/evaldata/deps_cycle_positive/expected.json b/internal/lang/tsanalyzer/evaldata/deps_cycle_positive/expected.json new file mode 100644 index 0000000..5e252f8 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/deps_cycle_positive/expected.json @@ -0,0 +1,9 @@ +{ + "worst_severity": "FAIL", + "sections": [ + { + "name": "Dependency Structure", + "severity": "FAIL" + } + ] +} diff --git a/internal/lang/tsanalyzer/evaldata/deps_cycle_positive/package.json b/internal/lang/tsanalyzer/evaldata/deps_cycle_positive/package.json new file mode 100644 index 0000000..cb22cee --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/deps_cycle_positive/package.json @@ -0,0 +1,5 @@ +{ + "name": "deps-cycle-positive", + "version": "0.1.0", + "private": true +} diff --git a/internal/lang/tsanalyzer/evaldata/deps_cycle_positive/src/a/index.ts b/internal/lang/tsanalyzer/evaldata/deps_cycle_positive/src/a/index.ts new file mode 100644 index 0000000..18ca3f5 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/deps_cycle_positive/src/a/index.ts @@ -0,0 +1,5 @@ +import { bFn } from "../b"; + +export function aFn(x: number): number { + return bFn(x) + 1; +} diff --git a/internal/lang/tsanalyzer/evaldata/deps_cycle_positive/src/b/index.ts b/internal/lang/tsanalyzer/evaldata/deps_cycle_positive/src/b/index.ts new file mode 100644 index 0000000..0dc129e --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/deps_cycle_positive/src/b/index.ts @@ -0,0 +1,6 @@ +import { aFn } from "../a"; + +export function bFn(x: number): number { + if (x > 100) return x; + return aFn(x - 1); +} diff --git a/internal/lang/tsanalyzer/evaldata/mutation_kill_negative/README.md b/internal/lang/tsanalyzer/evaldata/mutation_kill_negative/README.md new file mode 100644 index 0000000..8b9b873 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/mutation_kill_negative/README.md @@ -0,0 +1,8 @@ +# mutation_kill_negative + +Same `classify(x)` as mutation_kill_positive, but test.mjs only exercises +the positive branch. Most Tier-1 mutants survive. + +Expected verdict: Mutation Testing FAIL. + +Requires `node` 22.6+ on PATH. eval_test.go skips cleanly if node is absent. diff --git a/internal/lang/tsanalyzer/evaldata/mutation_kill_negative/arith.ts b/internal/lang/tsanalyzer/evaldata/mutation_kill_negative/arith.ts new file mode 100644 index 0000000..6c2f39a --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/mutation_kill_negative/arith.ts @@ -0,0 +1,9 @@ +export function classify(x: number): number { + if (x > 0) { + return 1; + } else if (x < 0) { + return -1; + } else { + return 0; + } +} diff --git a/internal/lang/tsanalyzer/evaldata/mutation_kill_negative/expected.json b/internal/lang/tsanalyzer/evaldata/mutation_kill_negative/expected.json new file mode 100644 index 0000000..8d5211f --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/mutation_kill_negative/expected.json @@ -0,0 +1,9 @@ +{ + "worst_severity": "FAIL", + "sections": [ + { + "name": "Mutation Testing", + "severity": "FAIL" + } + ] +} diff --git a/internal/lang/tsanalyzer/evaldata/mutation_kill_negative/package.json b/internal/lang/tsanalyzer/evaldata/mutation_kill_negative/package.json new file mode 100644 index 0000000..333bb2b --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/mutation_kill_negative/package.json @@ -0,0 +1,9 @@ +{ + "name": "mutation-kill-negative", + "version": "0.1.0", + "private": true, + "type": "module", + "scripts": { + "test": "node --experimental-strip-types --disable-warning=ExperimentalWarning test.mjs" + } +} diff --git a/internal/lang/tsanalyzer/evaldata/mutation_kill_negative/test.mjs b/internal/lang/tsanalyzer/evaldata/mutation_kill_negative/test.mjs new file mode 100644 index 0000000..180343d --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/mutation_kill_negative/test.mjs @@ -0,0 +1,11 @@ +// Loose test: only covers the positive branch. Most Tier-1 mutants +// (negate_conditional on x<0, boundary flips, return_value swaps) +// survive. +import { classify } from "./arith.ts"; + +const got = classify(5); +if (got !== 1) { + console.error(`classify(5) = ${got}, want 1`); + process.exit(1); +} +console.log("PASS"); diff --git a/internal/lang/tsanalyzer/evaldata/mutation_kill_positive/README.md b/internal/lang/tsanalyzer/evaldata/mutation_kill_positive/README.md new file mode 100644 index 0000000..cc6885b --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/mutation_kill_positive/README.md @@ -0,0 +1,10 @@ +# mutation_kill_positive + +Well-tested `classify(x)` with boundary + sign + zero coverage in +test.mjs. Tier-1 operators (conditional_boundary, negate_conditional, +math_operator, return_value) should be killed. + +Expected verdict: Mutation Testing PASS; Tier-1 kill rate ≥ 90%. + +Requires `node` 22.6+ on PATH (uses `--experimental-strip-types` to +import `.ts` directly). eval_test.go skips cleanly if node is absent. diff --git a/internal/lang/tsanalyzer/evaldata/mutation_kill_positive/arith.ts b/internal/lang/tsanalyzer/evaldata/mutation_kill_positive/arith.ts new file mode 100644 index 0000000..9dc0e6a --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/mutation_kill_positive/arith.ts @@ -0,0 +1,11 @@ +// Arithmetic classifier with boundary + sign coverage via test.mjs. + +export function classify(x: number): number { + if (x > 0) { + return 1; + } else if (x < 0) { + return -1; + } else { + return 0; + } +} diff --git a/internal/lang/tsanalyzer/evaldata/mutation_kill_positive/expected.json b/internal/lang/tsanalyzer/evaldata/mutation_kill_positive/expected.json new file mode 100644 index 0000000..ebfd556 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/mutation_kill_positive/expected.json @@ -0,0 +1,9 @@ +{ + "worst_severity": "PASS", + "sections": [ + { + "name": "Mutation Testing", + "severity": "PASS" + } + ] +} diff --git a/internal/lang/tsanalyzer/evaldata/mutation_kill_positive/package.json b/internal/lang/tsanalyzer/evaldata/mutation_kill_positive/package.json new file mode 100644 index 0000000..af9bc1a --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/mutation_kill_positive/package.json @@ -0,0 +1,9 @@ +{ + "name": "mutation-kill-positive", + "version": "0.1.0", + "private": true, + "type": "module", + "scripts": { + "test": "node --experimental-strip-types --disable-warning=ExperimentalWarning test.mjs" + } +} diff --git a/internal/lang/tsanalyzer/evaldata/mutation_kill_positive/test.mjs b/internal/lang/tsanalyzer/evaldata/mutation_kill_positive/test.mjs new file mode 100644 index 0000000..0081e02 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/mutation_kill_positive/test.mjs @@ -0,0 +1,25 @@ +// Boundary + sign + zero coverage: kills Tier-1 mutants +// (conditional_boundary, negate_conditional, math_operator, return_value). +import { classify } from "./arith.ts"; + +const cases = [ + [5, 1], + [-5, -1], + [0, 0], + [1, 1], + [-1, -1], +]; + +let failed = 0; +for (const [input, expected] of cases) { + const got = classify(input); + if (got !== expected) { + console.error(`classify(${input}) = ${got}, want ${expected}`); + failed++; + } +} + +if (failed > 0) { + process.exit(1); +} +console.log("PASS"); diff --git a/internal/lang/tsanalyzer/evaldata/mutation_tsop_negative/README.md b/internal/lang/tsanalyzer/evaldata/mutation_tsop_negative/README.md new file mode 100644 index 0000000..1fafd37 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/mutation_tsop_negative/README.md @@ -0,0 +1,11 @@ +# mutation_tsop_negative + +Negative control: same code as mutation_tsop_positive but tests don't +distinguish strict equality from loose or nullish coalescing from +logical-or. The operators fire, mutants survive, Tier-1 drops below +threshold. + +Expected verdict: Mutation Testing FAIL — confirms the operators +generate meaningful mutants whose signal depends on test quality. + +Requires `node` 22.6+ on PATH. diff --git a/internal/lang/tsanalyzer/evaldata/mutation_tsop_negative/expected.json b/internal/lang/tsanalyzer/evaldata/mutation_tsop_negative/expected.json new file mode 100644 index 0000000..8d5211f --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/mutation_tsop_negative/expected.json @@ -0,0 +1,9 @@ +{ + "worst_severity": "FAIL", + "sections": [ + { + "name": "Mutation Testing", + "severity": "FAIL" + } + ] +} diff --git a/internal/lang/tsanalyzer/evaldata/mutation_tsop_negative/ops.ts b/internal/lang/tsanalyzer/evaldata/mutation_tsop_negative/ops.ts new file mode 100644 index 0000000..e3f6819 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/mutation_tsop_negative/ops.ts @@ -0,0 +1,7 @@ +export function isExact(a: unknown, b: unknown): boolean { + return a === b; +} + +export function pickDefault(a: number | null): number { + return a ?? 42; +} diff --git a/internal/lang/tsanalyzer/evaldata/mutation_tsop_negative/package.json b/internal/lang/tsanalyzer/evaldata/mutation_tsop_negative/package.json new file mode 100644 index 0000000..054fa60 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/mutation_tsop_negative/package.json @@ -0,0 +1,9 @@ +{ + "name": "mutation-tsop-negative", + "version": "0.1.0", + "private": true, + "type": "module", + "scripts": { + "test": "node --experimental-strip-types --disable-warning=ExperimentalWarning test.mjs" + } +} diff --git a/internal/lang/tsanalyzer/evaldata/mutation_tsop_negative/test.mjs b/internal/lang/tsanalyzer/evaldata/mutation_tsop_negative/test.mjs new file mode 100644 index 0000000..37e2cc5 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/mutation_tsop_negative/test.mjs @@ -0,0 +1,23 @@ +// Loose tests that can't distinguish strict equality or nullish +// coalescing from their looser counterparts, so strict_equality and +// nullish_to_logical_or mutants survive. +import { isExact, pickDefault } from "./ops.ts"; + +let failed = 0; +const check = (got, want, name) => { + if (got !== want) { + console.error(`${name}: got ${got}, want ${want}`); + failed++; + } +}; + +// Equal string inputs — no strict-vs-loose distinction. +check(isExact("x", "x"), true, "strings equal"); +check(isExact("x", "y"), false, "strings unequal"); + +// Non-zero default with a null input — ?? and || behave the same. +check(pickDefault(null), 42, "null -> 42"); +check(pickDefault(100), 100, "100 passthrough"); + +if (failed > 0) process.exit(1); +console.log("PASS"); diff --git a/internal/lang/tsanalyzer/evaldata/mutation_tsop_positive/README.md b/internal/lang/tsanalyzer/evaldata/mutation_tsop_positive/README.md new file mode 100644 index 0000000..306ac61 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/mutation_tsop_positive/README.md @@ -0,0 +1,10 @@ +# mutation_tsop_positive + +Exercises TS-specific operators `strict_equality` and +`nullish_to_logical_or`. test.mjs asserts inputs that distinguish +`===`/`==` (0 vs. false) and `??`/`||` (0 as a valid value), so the +mutants are killed. + +Expected verdict: Mutation Testing PASS. + +Requires `node` 22.6+ on PATH. diff --git a/internal/lang/tsanalyzer/evaldata/mutation_tsop_positive/expected.json b/internal/lang/tsanalyzer/evaldata/mutation_tsop_positive/expected.json new file mode 100644 index 0000000..ebfd556 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/mutation_tsop_positive/expected.json @@ -0,0 +1,9 @@ +{ + "worst_severity": "PASS", + "sections": [ + { + "name": "Mutation Testing", + "severity": "PASS" + } + ] +} diff --git a/internal/lang/tsanalyzer/evaldata/mutation_tsop_positive/ops.ts b/internal/lang/tsanalyzer/evaldata/mutation_tsop_positive/ops.ts new file mode 100644 index 0000000..f83b778 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/mutation_tsop_positive/ops.ts @@ -0,0 +1,10 @@ +// Exercises the TS-specific operators strict_equality (===) and +// nullish_to_logical_or (??). + +export function isExact(a: unknown, b: unknown): boolean { + return a === b; +} + +export function pickDefault(a: number | null): number { + return a ?? 42; +} diff --git a/internal/lang/tsanalyzer/evaldata/mutation_tsop_positive/package.json b/internal/lang/tsanalyzer/evaldata/mutation_tsop_positive/package.json new file mode 100644 index 0000000..952f6b9 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/mutation_tsop_positive/package.json @@ -0,0 +1,9 @@ +{ + "name": "mutation-tsop-positive", + "version": "0.1.0", + "private": true, + "type": "module", + "scripts": { + "test": "node --experimental-strip-types --disable-warning=ExperimentalWarning test.mjs" + } +} diff --git a/internal/lang/tsanalyzer/evaldata/mutation_tsop_positive/test.mjs b/internal/lang/tsanalyzer/evaldata/mutation_tsop_positive/test.mjs new file mode 100644 index 0000000..6e5e4bd --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/mutation_tsop_positive/test.mjs @@ -0,0 +1,24 @@ +// Tests specifically distinguish strict-vs-loose equality and +// nullish-vs-falsy defaults, so strict_equality and +// nullish_to_logical_or mutants are killed. +import { isExact, pickDefault } from "./ops.ts"; + +let failed = 0; +const check = (got, want, name) => { + if (got !== want) { + console.error(`${name}: got ${got}, want ${want}`); + failed++; + } +}; + +// strict_equality: 0 === false is false, but 0 == false is true. +check(isExact(0, false), false, "0 !== false"); +check(isExact(null, undefined), false, "null !== undefined"); +check(isExact(1, 1), true, "1 === 1"); + +// nullish_to_logical_or: 0 ?? 42 is 0, but 0 || 42 is 42. +check(pickDefault(0), 0, "0 ?? 42"); +check(pickDefault(null), 42, "null ?? 42"); + +if (failed > 0) process.exit(1); +console.log("PASS"); diff --git a/internal/lang/tsanalyzer/evaldata/sizes_negative/README.md b/internal/lang/tsanalyzer/evaldata/sizes_negative/README.md new file mode 100644 index 0000000..f65518f --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/sizes_negative/README.md @@ -0,0 +1,6 @@ +# sizes_negative + +Negative control: same overall work split into short helpers. No +function approaches the 50-line threshold. + +Expected verdict: Code Sizes PASS, zero findings. diff --git a/internal/lang/tsanalyzer/evaldata/sizes_negative/expected.json b/internal/lang/tsanalyzer/evaldata/sizes_negative/expected.json new file mode 100644 index 0000000..3ac1812 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/sizes_negative/expected.json @@ -0,0 +1,10 @@ +{ + "worst_severity": "PASS", + "sections": [ + { + "name": "Code Sizes", + "severity": "PASS", + "must_not_have_findings": true + } + ] +} diff --git a/internal/lang/tsanalyzer/evaldata/sizes_negative/package.json b/internal/lang/tsanalyzer/evaldata/sizes_negative/package.json new file mode 100644 index 0000000..75fd614 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/sizes_negative/package.json @@ -0,0 +1,5 @@ +{ + "name": "sizes-negative", + "version": "0.1.0", + "private": true +} diff --git a/internal/lang/tsanalyzer/evaldata/sizes_negative/short.ts b/internal/lang/tsanalyzer/evaldata/sizes_negative/short.ts new file mode 100644 index 0000000..8b47ece --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/sizes_negative/short.ts @@ -0,0 +1,13 @@ +// Negative: same behavior refactored across named exports. Nothing +// approaches the 50-line threshold. + +export function stepOne(x: number): number { return x + 1; } +export function stepTwo(x: number): number { return stepOne(x) + 1; } +export function stepThree(x: number): number { return stepTwo(x) + 1; } + +export const shortFunc = (input: number): number => { + const a = stepOne(input); + const b = stepTwo(a); + const c = stepThree(b); + return c; +}; diff --git a/internal/lang/tsanalyzer/evaldata/sizes_positive/README.md b/internal/lang/tsanalyzer/evaldata/sizes_positive/README.md new file mode 100644 index 0000000..bdb3045 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/sizes_positive/README.md @@ -0,0 +1,7 @@ +# sizes_positive + +Seeded issue: `longFunc` is an arrow function assigned to a `const` with +~60 lines of body, exceeding the 50-line function threshold. Complexity +stays flat (no branches). + +Expected verdict: Code Sizes FAIL with a finding on `longFunc`. diff --git a/internal/lang/tsanalyzer/evaldata/sizes_positive/expected.json b/internal/lang/tsanalyzer/evaldata/sizes_positive/expected.json new file mode 100644 index 0000000..53e3d82 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/sizes_positive/expected.json @@ -0,0 +1,12 @@ +{ + "worst_severity": "FAIL", + "sections": [ + { + "name": "Code Sizes", + "severity": "FAIL", + "must_have_findings": [ + {"file": "long.ts", "function": "longFunc", "severity": "FAIL"} + ] + } + ] +} diff --git a/internal/lang/tsanalyzer/evaldata/sizes_positive/long.ts b/internal/lang/tsanalyzer/evaldata/sizes_positive/long.ts new file mode 100644 index 0000000..58c4f01 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/sizes_positive/long.ts @@ -0,0 +1,64 @@ +// Seeded: ~60-line arrow function assigned to const — exceeds default +// 50-line function threshold without tripping complexity. + +export const longFunc = (input: number): number => { + const a = input + 1; + const b = a + 1; + const c = b + 1; + const d = c + 1; + const e = d + 1; + const f = e + 1; + const g = f + 1; + const h = g + 1; + const i = h + 1; + const j = i + 1; + const k = j + 1; + const l = k + 1; + const m = l + 1; + const n = m + 1; + const o = n + 1; + const p = o + 1; + const q = p + 1; + const r = q + 1; + const s = r + 1; + const t = s + 1; + const u = t + 1; + const v = u + 1; + const w = v + 1; + const x = w + 1; + const y = x + 1; + const z = y + 1; + const aa = z + 1; + const bb = aa + 1; + const cc = bb + 1; + const dd = cc + 1; + const ee = dd + 1; + const ff = ee + 1; + const gg = ff + 1; + const hh = gg + 1; + const ii = hh + 1; + const jj = ii + 1; + const kk = jj + 1; + const ll = kk + 1; + const mm = ll + 1; + const nn = mm + 1; + const oo = nn + 1; + const pp = oo + 1; + const qq = pp + 1; + const rr = qq + 1; + const ss = rr + 1; + const tt = ss + 1; + const uu = tt + 1; + const vv = uu + 1; + const ww = vv + 1; + const xx = ww + 1; + const yy = xx + 1; + const zz = yy + 1; + const aaa = zz + 1; + const bbb = aaa + 1; + const ccc = bbb + 1; + const ddd = ccc + 1; + const eee = ddd + 1; + const fff = eee + 1; + return fff; +}; diff --git a/internal/lang/tsanalyzer/evaldata/sizes_positive/package.json b/internal/lang/tsanalyzer/evaldata/sizes_positive/package.json new file mode 100644 index 0000000..be97862 --- /dev/null +++ b/internal/lang/tsanalyzer/evaldata/sizes_positive/package.json @@ -0,0 +1,5 @@ +{ + "name": "sizes-positive", + "version": "0.1.0", + "private": true +} diff --git a/internal/lang/tsanalyzer/helpers_test.go b/internal/lang/tsanalyzer/helpers_test.go new file mode 100644 index 0000000..9156890 --- /dev/null +++ b/internal/lang/tsanalyzer/helpers_test.go @@ -0,0 +1,21 @@ +package tsanalyzer + +import ( + "os" + "path/filepath" +) + +// writeFile is the shared test helper used across the tsanalyzer test +// files. Mirrors the rustanalyzer's helper (rustanalyzer/helpers_test.go) +// — defined once here rather than via a testutil package so each _test.go +// file stays self-contained in what it inspects. +// +// Returns an error (rather than swallowing silently) so tests that care +// about directory/write failures can t.Fatal on them — matching the +// rustanalyzer pattern. +func writeFile(path string, data []byte) error { + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + return err + } + return os.WriteFile(path, data, 0644) +} diff --git a/internal/lang/tsanalyzer/mutation_annotate.go b/internal/lang/tsanalyzer/mutation_annotate.go new file mode 100644 index 0000000..a801d92 --- /dev/null +++ b/internal/lang/tsanalyzer/mutation_annotate.go @@ -0,0 +1,110 @@ +package tsanalyzer + +import ( + "strings" + + sitter "github.com/smacker/go-tree-sitter" +) + +// annotationScannerImpl implements lang.AnnotationScanner for TypeScript. +// Disable annotations use the JS/TS comment prefix: +// +// // mutator-disable-next-line +// // mutator-disable-func +// +// Block comments (`/* ... */`) are accepted for parity with the other +// analyzers; tree-sitter models them as `comment` or `block_comment` +// depending on grammar version, so we check both. +type annotationScannerImpl struct{} + +// ScanAnnotations returns the set of 1-based source lines on which +// mutation generation should be suppressed. +func (annotationScannerImpl) ScanAnnotations(absPath string) (map[int]bool, error) { + tree, src, err := parseFile(absPath) + if err != nil { + return nil, err + } + defer tree.Close() + + disabled := map[int]bool{} + funcRanges := collectFuncRanges(tree.RootNode()) + + walk(tree.RootNode(), func(n *sitter.Node) bool { + switch n.Type() { + case "comment", "line_comment", "block_comment": + applyAnnotation(n, src, funcRanges, disabled) + } + return true + }) + return disabled, nil +} + +// applyAnnotation consumes a single comment node and, if it carries a +// known annotation, disables the appropriate line(s) in `disabled`. +func applyAnnotation(comment *sitter.Node, src []byte, funcs []funcRange, disabled map[int]bool) { + text := stripCommentMarkers(nodeText(comment, src)) + line := nodeLine(comment) + switch { + case strings.HasPrefix(text, "mutator-disable-next-line"): + disabled[line+1] = true + case strings.HasPrefix(text, "mutator-disable-func"): + disableEnclosingFunc(line, funcs, disabled) + } +} + +// stripCommentMarkers strips `//`, `/*`, `*/` and surrounding whitespace. +// Matches the Rust/Go analyzer helpers. +func stripCommentMarkers(raw string) string { + s := strings.TrimSpace(raw) + s = strings.TrimPrefix(s, "//") + s = strings.TrimPrefix(s, "/*") + s = strings.TrimSuffix(s, "*/") + return strings.TrimSpace(s) +} + +// disableEnclosingFunc marks every line of the function the comment +// belongs to as disabled. A comment belongs to a function when it sits +// inside the function's range, or when it directly precedes the function +// (at most one blank line in between). +func disableEnclosingFunc(commentLine int, funcs []funcRange, disabled map[int]bool) { + for _, r := range funcs { + if isCommentForFunc(commentLine, r) { + for i := r.start; i <= r.end; i++ { + disabled[i] = true + } + return + } + } +} + +func isCommentForFunc(commentLine int, r funcRange) bool { + if commentLine >= r.start && commentLine <= r.end { + return true + } + return r.start > commentLine && r.start-commentLine <= 2 +} + +// funcRange is the 1-based inclusive line span of a function declaration. +// Same shape used by the annotation scanner and the mutant generator. +type funcRange struct{ start, end int } + +// collectFuncRanges returns one funcRange per function declaration in the +// file — all the forms collectFunctions picks up (function_declaration, +// method_definition, arrow functions/function expressions assigned to a +// variable_declarator, generator functions). +func collectFuncRanges(root *sitter.Node) []funcRange { + var ranges []funcRange + walk(root, func(n *sitter.Node) bool { + switch n.Type() { + case "function_declaration", "method_definition", + "generator_function_declaration", + "arrow_function", "function_expression", "generator_function": + ranges = append(ranges, funcRange{ + start: nodeLine(n), + end: nodeEndLine(n), + }) + } + return true + }) + return ranges +} diff --git a/internal/lang/tsanalyzer/mutation_annotate_test.go b/internal/lang/tsanalyzer/mutation_annotate_test.go new file mode 100644 index 0000000..e57ee8f --- /dev/null +++ b/internal/lang/tsanalyzer/mutation_annotate_test.go @@ -0,0 +1,113 @@ +package tsanalyzer + +import ( + "path/filepath" + "testing" +) + +// TestScanAnnotations_NextLine writes a fixture with a mutator-disable- +// next-line comment and confirms the following source line is disabled. +func TestScanAnnotations_NextLine(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "a.ts") + src := []byte(`function f(x: number): number { + // mutator-disable-next-line + if (x > 0) { return 1; } else { return 0; } +} +`) + if err := writeFile(path, src); err != nil { + t.Fatal(err) + } + disabled, err := annotationScannerImpl{}.ScanAnnotations(path) + if err != nil { + t.Fatal(err) + } + if !disabled[3] { + t.Errorf("expected line 3 disabled, got %v", disabled) + } + if disabled[4] { + t.Errorf("line 4 should not be disabled (unrelated), got %v", disabled) + } +} + +// TestScanAnnotations_FuncWide asserts `mutator-disable-func` marks every +// line of the enclosing function — including the signature line. +func TestScanAnnotations_FuncWide(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "a.ts") + src := []byte(`// mutator-disable-func +function top(x: number): number { + return x + 1; +} + +function other(x: number): number { + return x * 2; +} +`) + if err := writeFile(path, src); err != nil { + t.Fatal(err) + } + disabled, err := annotationScannerImpl{}.ScanAnnotations(path) + if err != nil { + t.Fatal(err) + } + // top spans lines 2-4. + for _, line := range []int{2, 3, 4} { + if !disabled[line] { + t.Errorf("expected line %d disabled in top, got %v", line, disabled) + } + } + // other (lines 6-8) must not be touched. + for _, line := range []int{6, 7, 8} { + if disabled[line] { + t.Errorf("line %d in other should not be disabled, got %v", line, disabled) + } + } +} + +// TestScanAnnotations_UnrelatedComments: ordinary comments must not +// toggle anything. +func TestScanAnnotations_UnrelatedComments(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "a.ts") + src := []byte(`// just a regular comment +function f(x: number): number { + // another regular comment + return x; +} +`) + if err := writeFile(path, src); err != nil { + t.Fatal(err) + } + disabled, err := annotationScannerImpl{}.ScanAnnotations(path) + if err != nil { + t.Fatal(err) + } + if len(disabled) != 0 { + t.Errorf("expected empty disabled map, got %v", disabled) + } +} + +// TestScanAnnotations_FuncInsideComment: comment INSIDE the function body +// still applies to the enclosing function. +func TestScanAnnotations_FuncInsideComment(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "a.ts") + src := []byte(`function only(x: number): number { + // mutator-disable-func + return x + 1; +} +`) + if err := writeFile(path, src); err != nil { + t.Fatal(err) + } + disabled, err := annotationScannerImpl{}.ScanAnnotations(path) + if err != nil { + t.Fatal(err) + } + for _, line := range []int{1, 2, 3, 4} { + if !disabled[line] { + t.Errorf("expected line %d disabled, got %v", line, disabled) + } + } +} diff --git a/internal/lang/tsanalyzer/mutation_apply.go b/internal/lang/tsanalyzer/mutation_apply.go new file mode 100644 index 0000000..ed81c25 --- /dev/null +++ b/internal/lang/tsanalyzer/mutation_apply.go @@ -0,0 +1,278 @@ +package tsanalyzer + +import ( + "path/filepath" + "slices" + "strings" + + sitter "github.com/smacker/go-tree-sitter" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +// mutantApplierImpl implements lang.MutantApplier for TypeScript. We +// operate on source bytes directly — same strategy as the Rust analyzer — +// because tree-sitter gives us exact byte offsets for every node and +// text-level edits preserve formatting without a dedicated TS formatter. +// +// After every mutation we re-parse with the correct grammar (.ts vs .tsx +// based on the file's extension) and check for parse errors. If the +// mutated source fails to parse we return nil so the orchestrator treats +// the mutant as skipped rather than running invalid code. +type mutantApplierImpl struct{} + +// ApplyMutation returns the mutated file bytes, or (nil, nil) if the +// mutation can't be applied cleanly. +func (mutantApplierImpl) ApplyMutation(absPath string, site lang.MutantSite) ([]byte, error) { + tree, src, err := parseFile(absPath) + if err != nil { + return nil, nil + } + defer tree.Close() + + mutated := applyBySite(tree.RootNode(), src, site) + if mutated == nil { + return nil, nil + } + if !isValidTS(mutated, absPath) { + return nil, nil + } + return mutated, nil +} + +// applyBySite dispatches to the operator-specific helper. +func applyBySite(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + switch site.Operator { + case "conditional_boundary", "negate_conditional", "math_operator", + "strict_equality", "nullish_to_logical_or": + return applyBinary(root, src, site) + case "boolean_substitution": + return applyBool(root, src, site) + case "incdec": + return applyIncDec(root, src, site) + case "return_value": + return applyReturnValue(root, src, site) + case "branch_removal": + return applyBranchRemoval(root, src, site) + case "statement_deletion": + return applyStatementDeletion(root, src, site) + case "optional_chain_removal": + return applyOptionalChainRemoval(root, src, site) + } + return nil +} + +// findOnLine returns the first node matching `pred` whose start line +// equals `line`. +func findOnLine(root *sitter.Node, line int, pred func(*sitter.Node) bool) *sitter.Node { + var hit *sitter.Node + walk(root, func(n *sitter.Node) bool { + if hit != nil { + return false + } + if nodeLine(n) != line { + if int(n.StartPoint().Row)+1 > line || int(n.EndPoint().Row)+1 < line { + return false + } + return true + } + if pred(n) { + hit = n + return false + } + return true + }) + return hit +} + +// replaceRange returns src with [start, end) replaced by `with`. +func replaceRange(src []byte, start, end uint32, with []byte) []byte { + return slices.Concat(src[:start], with, src[end:]) +} + +// applyBinary swaps the operator of a binary_expression on the target +// line, honoring the description ("X -> Y") so overlapping binaries on +// the same line mutate the exact one the generator emitted. +func applyBinary(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + fromOp, toOp := parseBinaryDesc(site.Description) + if fromOp == "" { + return nil + } + var target *sitter.Node + walk(root, func(n *sitter.Node) bool { + if target != nil { + return false + } + if n.Type() != "binary_expression" || nodeLine(n) != site.Line { + return true + } + op := n.ChildByFieldName("operator") + if op != nil && op.Type() == fromOp { + target = n + return false + } + return true + }) + if target == nil { + return nil + } + op := target.ChildByFieldName("operator") + return replaceRange(src, op.StartByte(), op.EndByte(), []byte(toOp)) +} + +func parseBinaryDesc(desc string) (string, string) { + parts := strings.SplitN(desc, " -> ", 2) + if len(parts) != 2 { + return "", "" + } + return parts[0], parts[1] +} + +// applyBool flips a boolean literal on the target line. +func applyBool(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + n := findOnLine(root, site.Line, func(n *sitter.Node) bool { + return n.Type() == "true" || n.Type() == "false" + }) + if n == nil { + return nil + } + flipped := "false" + if n.Type() == "false" { + flipped = "true" + } + return replaceRange(src, n.StartByte(), n.EndByte(), []byte(flipped)) +} + +// applyIncDec swaps ++ and -- on an update_expression on the target line. +// We rewrite just the operator token, keeping pre/postfix position intact. +func applyIncDec(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + n := findOnLine(root, site.Line, func(n *sitter.Node) bool { + if n.Type() != "update_expression" { + return false + } + op := n.ChildByFieldName("operator") + return op != nil && (op.Type() == "++" || op.Type() == "--") + }) + if n == nil { + return nil + } + op := n.ChildByFieldName("operator") + flipped := "--" + if op.Type() == "--" { + flipped = "++" + } + return replaceRange(src, op.StartByte(), op.EndByte(), []byte(flipped)) +} + +// applyReturnValue replaces the returned expression with `null` or +// `undefined` based on the description. We read the target from the +// description so the applier and generator agree on which value to write. +func applyReturnValue(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + ret := findOnLine(root, site.Line, func(n *sitter.Node) bool { + return n.Type() == "return_statement" + }) + if ret == nil { + return nil + } + if ret.NamedChildCount() == 0 { + return nil + } + value := ret.NamedChild(0) + if value == nil { + return nil + } + target := "null" + if strings.Contains(site.Description, "undefined") { + target = "undefined" + } + return replaceRange(src, value.StartByte(), value.EndByte(), []byte(target)) +} + +// applyBranchRemoval empties the consequence block of an if_statement. +// We preserve the outer braces; remove only the inner bytes so the +// resulting source still parses. +func applyBranchRemoval(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + ifNode := findOnLine(root, site.Line, func(n *sitter.Node) bool { + return n.Type() == "if_statement" + }) + if ifNode == nil { + return nil + } + body := ifNode.ChildByFieldName("consequence") + if body == nil { + return nil + } + inner := bodyInnerRange(body, src) + if inner == nil { + // If the consequence is a single statement (no braces), replace + // the whole statement with an empty block `{}` so the `if` + // structure stays intact and parseable. + return replaceRange(src, body.StartByte(), body.EndByte(), []byte("{}")) + } + return replaceRange(src, inner[0], inner[1], []byte{}) +} + +// bodyInnerRange returns [openBracePlusOne, closeBrace) for a block node, +// or nil if the node doesn't look like a braced block. +func bodyInnerRange(block *sitter.Node, src []byte) []uint32 { + start := block.StartByte() + end := block.EndByte() + if start >= end { + return nil + } + if src[start] != '{' || src[end-1] != '}' { + return nil + } + return []uint32{start + 1, end - 1} +} + +// applyStatementDeletion replaces a bare call statement with an empty +// statement (`;`). Keeps the source parseable and kills any side effect. +func applyStatementDeletion(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + stmt := findOnLine(root, site.Line, func(n *sitter.Node) bool { + return n.Type() == "expression_statement" + }) + if stmt == nil { + return nil + } + return replaceRange(src, stmt.StartByte(), stmt.EndByte(), []byte(";")) +} + +// applyOptionalChainRemoval replaces a `?.` token between the object and +// property of a member_expression on the target line with a plain `.`. +// Token scanning is delegated to optionalChainTokenOffset so detection and +// application share one implementation. +func applyOptionalChainRemoval(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + n := findOnLine(root, site.Line, func(n *sitter.Node) bool { + return n.Type() == "member_expression" && hasOptionalChainToken(n, src) + }) + if n == nil { + return nil + } + tokenStart, ok := optionalChainTokenOffset(n, src) + if !ok { + return nil + } + return replaceRange(src, tokenStart, tokenStart+2, []byte(".")) +} + +// isValidTS re-parses the mutated source with the grammar matching the +// original file extension and reports whether tree-sitter encountered any +// syntax errors. We pick the grammar from the ABSOLUTE path so `.tsx` +// files are validated with the tsx grammar. +func isValidTS(src []byte, absPath string) bool { + grammar := typescriptLanguage() + if strings.ToLower(filepath.Ext(absPath)) == ".tsx" { + grammar = tsxLanguage() + } + tree, err := parseBytesAs(src, grammar) + if err != nil || tree == nil { + return false + } + defer tree.Close() + root := tree.RootNode() + if root == nil { + return false + } + return !root.HasError() +} diff --git a/internal/lang/tsanalyzer/mutation_apply_test.go b/internal/lang/tsanalyzer/mutation_apply_test.go new file mode 100644 index 0000000..02b12dc --- /dev/null +++ b/internal/lang/tsanalyzer/mutation_apply_test.go @@ -0,0 +1,348 @@ +package tsanalyzer + +import ( + "path/filepath" + "strings" + "testing" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +// applyAt writes src to a temp file and invokes the applier for `site`. +// Returns the mutated bytes (or nil if the applier skipped). +func applyAt(t *testing.T, src string, site lang.MutantSite) []byte { + t.Helper() + return applyAtExt(t, src, ".ts", site) +} + +func applyAtExt(t *testing.T, src string, ext string, site lang.MutantSite) []byte { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, "a"+ext) + if err := writeFile(path, []byte(src)); err != nil { + t.Fatal(err) + } + out, err := mutantApplierImpl{}.ApplyMutation(path, site) + if err != nil { + t.Fatal(err) + } + return out +} + +func TestApply_BinaryOperator(t *testing.T) { + src := `function f(x: number): boolean { + return x > 0; +} +` + site := lang.MutantSite{ + File: "a.ts", + Line: 2, + Operator: "conditional_boundary", + Description: "> -> >=", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + if !strings.Contains(string(out), "x >= 0") { + t.Errorf("expected 'x >= 0', got:\n%s", out) + } +} + +func TestApply_StrictEquality(t *testing.T) { + src := `function f(a: number, b: number): boolean { + return a === b; +} +` + site := lang.MutantSite{ + File: "a.ts", + Line: 2, + Operator: "strict_equality", + Description: "=== -> ==", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + s := string(out) + if !strings.Contains(s, " == ") { + t.Errorf("expected ' == ', got:\n%s", s) + } + if strings.Contains(s, "===") { + t.Errorf("=== not replaced, got:\n%s", s) + } +} + +func TestApply_NullishToLogicalOr(t *testing.T) { + src := `function f(a: number | null, b: number): number { + return a ?? b; +} +` + site := lang.MutantSite{ + File: "a.ts", + Line: 2, + Operator: "nullish_to_logical_or", + Description: "?? -> ||", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + if !strings.Contains(string(out), "a || b") { + t.Errorf("expected 'a || b', got:\n%s", out) + } +} + +func TestApply_BooleanFlip(t *testing.T) { + src := `function f(): boolean { return true; } +` + site := lang.MutantSite{ + File: "a.ts", + Line: 1, + Operator: "boolean_substitution", + Description: "true -> false", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + if !strings.Contains(string(out), "false") { + t.Errorf("expected 'false', got:\n%s", out) + } + if strings.Contains(string(out), "true") { + t.Errorf("'true' should have been replaced, got:\n%s", out) + } +} + +func TestApply_IncDec(t *testing.T) { + src := `function f(): void { + let x = 0; + x++; +} +` + site := lang.MutantSite{ + File: "a.ts", + Line: 3, + Operator: "incdec", + Description: "++ -> --", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + if !strings.Contains(string(out), "x--") { + t.Errorf("expected 'x--', got:\n%s", out) + } +} + +func TestApply_ReturnValueToNull(t *testing.T) { + src := `function f(): number { + return 42; +} +` + site := lang.MutantSite{ + File: "a.ts", + Line: 2, + Operator: "return_value", + Description: "replace return value with null", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + if !strings.Contains(string(out), "return null") { + t.Errorf("expected 'return null', got:\n%s", out) + } +} + +func TestApply_ReturnValueToUndefined(t *testing.T) { + src := `function f(): null { + return null; +} +` + site := lang.MutantSite{ + File: "a.ts", + Line: 2, + Operator: "return_value", + Description: "replace return value with undefined", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + if !strings.Contains(string(out), "return undefined") { + t.Errorf("expected 'return undefined', got:\n%s", out) + } +} + +func TestApply_BranchRemoval(t *testing.T) { + src := `function side(): void {} +function f(x: number): void { + if (x > 0) { + side(); + } +} +` + site := lang.MutantSite{ + File: "a.ts", + Line: 3, + Operator: "branch_removal", + Description: "remove if body", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + // side() inside the if body must be gone. + if strings.Contains(string(out), "if (x > 0) {\n side();") { + t.Errorf("if body not emptied, got:\n%s", out) + } +} + +func TestApply_StatementDeletion(t *testing.T) { + src := `function side(): void {} +function f(): void { + side(); +} +` + site := lang.MutantSite{ + File: "a.ts", + Line: 3, + Operator: "statement_deletion", + Description: "remove call statement", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + // Should retain the function shell + semicolon marker. + if strings.Contains(string(out), "side();\n}\n") && !strings.Contains(string(out), "function f(): void {\n ;\n}") { + t.Errorf("expected statement replaced with ';' or similar, got:\n%s", out) + } +} + +func TestApply_OptionalChainRemoval(t *testing.T) { + src := `function f(x: { a?: number } | null): number | undefined { + return x?.a; +} +` + site := lang.MutantSite{ + File: "a.ts", + Line: 2, + Operator: "optional_chain_removal", + Description: "?. -> .", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + if !strings.Contains(string(out), "return x.a") { + t.Errorf("expected 'return x.a', got:\n%s", out) + } + if strings.Contains(string(out), "?.a") { + t.Errorf("?. not stripped, got:\n%s", out) + } +} + +// TestApply_BranchRemoval_BareForm ensures the applier handles an if-statement +// whose consequence is a single statement without braces (bare form). The +// mutated output must replace that statement with an empty block `{}` so the +// result still parses cleanly as valid TypeScript. +func TestApply_BranchRemoval_BareForm(t *testing.T) { + src := `function doThing(): void {} +function f(x: boolean): void { + if (x) doThing(); +} +` + site := lang.MutantSite{ + File: "a.ts", + Line: 3, + Operator: "branch_removal", + Description: "remove if body", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil for bare-form if body") + } + s := string(out) + // The bare `doThing();` must be gone — replaced by `{}`. + if strings.Contains(s, "doThing();") { + t.Errorf("bare if body not removed, got:\n%s", s) + } + // The if structure itself must remain parseable — confirmed by the + // isValidTS gate inside ApplyMutation, but we also sanity-check the + // output contains `if (x)`. + if !strings.Contains(s, "if (x)") { + t.Errorf("if condition not preserved, got:\n%s", s) + } +} + +func TestApply_UnknownOperatorReturnsNil(t *testing.T) { + src := `function f(): void {} +` + site := lang.MutantSite{Line: 1, Operator: "nonexistent_op"} + out := applyAt(t, src, site) + if out != nil { + t.Errorf("expected nil for unknown operator, got:\n%s", out) + } +} + +func TestApply_SiteMismatchReturnsNil(t *testing.T) { + src := `function f(): number { return 42; } +` + site := lang.MutantSite{Line: 1, Operator: "boolean_substitution", Description: "true -> false"} + out := applyAt(t, src, site) + if out != nil { + t.Errorf("expected nil for site with no matching node, got:\n%s", out) + } +} + +// TestIsValidTS exercises the re-parse gate directly for both grammars. +func TestIsValidTS(t *testing.T) { + goodTS := []byte(`function f(): number { return 42; }`) + badTS := []byte(`function f(): number { return 42 ;;;; ; return;;; ;`) // malformed braces + if !isValidTS(goodTS, "a.ts") { + t.Error("well-formed TS reported invalid") + } + if isValidTS(badTS, "a.ts") { + t.Error("malformed TS reported valid") + } + + // TSX grammar accepts JSX; the plain TS grammar does not. + jsxSrc := []byte(`function F() { return
hi
; }`) + if !isValidTS(jsxSrc, "a.tsx") { + t.Error("JSX reported invalid under tsx grammar") + } + if isValidTS(jsxSrc, "a.ts") { + // The plain typescript grammar rejects `
` at expression + // position (it's parsed as a generic type). HasError will be + // true — which is what we want the caller to observe. + t.Log("JSX under .ts grammar correctly reported invalid") + } +} + +// TestApply_TSXFile exercises the applier end-to-end on a .tsx source — +// proves the re-parse uses the correct grammar after mutation. +func TestApply_TSXFile(t *testing.T) { + src := `import * as React from "react"; +export function F(n: number) { + if (n > 0) { + return
{n}
; + } + return null; +} +` + site := lang.MutantSite{ + File: "a.tsx", + Line: 3, + Operator: "conditional_boundary", + Description: "> -> >=", + } + out := applyAtExt(t, src, ".tsx", site) + if out == nil { + t.Fatal("applier returned nil for .tsx file") + } + if !strings.Contains(string(out), "n >= 0") { + t.Errorf("expected 'n >= 0' in .tsx output, got:\n%s", out) + } +} diff --git a/internal/lang/tsanalyzer/mutation_generate.go b/internal/lang/tsanalyzer/mutation_generate.go new file mode 100644 index 0000000..663712b --- /dev/null +++ b/internal/lang/tsanalyzer/mutation_generate.go @@ -0,0 +1,354 @@ +package tsanalyzer + +import ( + "fmt" + "sort" + + sitter "github.com/smacker/go-tree-sitter" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" +) + +// mutantGeneratorImpl implements lang.MutantGenerator for TypeScript. It +// emits canonical operators (conditional_boundary, negate_conditional, +// math_operator, return_value, boolean_substitution, incdec, branch_removal, +// statement_deletion) plus the TS-specific operators defined in the design +// doc: strict_equality, nullish_to_logical_or, optional_chain_removal. +// +// Unlike Rust, TypeScript has `++`/`--`, so incdec IS emitted. +type mutantGeneratorImpl struct{} + +// GenerateMutants walks the CST and emits a MutantSite for each qualifying +// node on a changed, non-disabled line. Output is deterministic. +func (mutantGeneratorImpl) GenerateMutants(absPath string, fc diff.FileChange, disabled map[int]bool) ([]lang.MutantSite, error) { + tree, src, err := parseFile(absPath) + if err != nil { + return nil, err + } + defer tree.Close() + + var out []lang.MutantSite + walk(tree.RootNode(), func(n *sitter.Node) bool { + line := nodeLine(n) + if !fc.ContainsLine(line) || disabled[line] { + return true + } + out = append(out, mutantsFor(fc.Path, line, n, src)...) + return true + }) + sort.SliceStable(out, func(i, j int) bool { + if out[i].Line != out[j].Line { + return out[i].Line < out[j].Line + } + if out[i].Operator != out[j].Operator { + return out[i].Operator < out[j].Operator + } + return out[i].Description < out[j].Description + }) + return out, nil +} + +// mutantsFor dispatches on the node kind. Nodes that don't match any +// operator return nil. +func mutantsFor(file string, line int, n *sitter.Node, src []byte) []lang.MutantSite { + switch n.Type() { + case "binary_expression": + return binaryMutants(file, line, n, src) + case "true", "false": + return boolLiteralMutants(file, line, n, src) + case "update_expression": + return updateMutants(file, line, n, src) + case "return_statement": + return returnMutants(file, line, n, src) + case "if_statement": + return ifMutants(file, line, n, src) + case "expression_statement": + return exprStmtMutants(file, line, n, src) + case "member_expression": + return optionalChainMutants(file, line, n, src) + } + return nil +} + +// binaryFlips covers conditional_boundary + negate_conditional + +// math_operator: same flip table as the Rust analyzer, extended with the +// TS-strict variants. +var binaryFlips = map[string]string{ + ">": ">=", + "<": "<=", + ">=": ">", + "<=": "<", + "==": "!=", + "!=": "==", + "===": "!==", + "!==": "===", + "+": "-", + "-": "+", + "*": "/", + "/": "*", +} + +// strictEqualityFlips toggle strictness independently of inversion. +var strictEqualityFlips = map[string]string{ + "===": "==", + "==": "===", + "!==": "!=", + "!=": "!==", +} + +// binaryMutants covers conditional_boundary, negate_conditional, +// math_operator, strict_equality, and nullish_to_logical_or. +// +// Rules (per design doc): +// - `>` / `<` / `>=` / `<=` swaps → conditional_boundary +// - `==` / `!=` / `===` / `!==` flips → negate_conditional +// - `===` ↔ `==`, `!==` ↔ `!=` → strict_equality (Tier 1) +// - `+` / `-`, `*` / `/` swaps → math_operator +// - `??` → `||` → nullish_to_logical_or (Tier 2) +// +// negate_conditional covers both loose (==/!=) and strict (===/!==) +// comparison flips, while strict_equality specifically toggles the +// strictness (===/==). Both can apply to the same source expression; we +// emit both so tests gain independent signal. +func binaryMutants(file string, line int, n *sitter.Node, _ []byte) []lang.MutantSite { + opNode := n.ChildByFieldName("operator") + if opNode == nil { + return nil + } + op := opNode.Type() + var out []lang.MutantSite + if newOp, ok := binaryFlips[op]; ok { + out = append(out, newMutantSite(file, line, op, newOp, binaryOperatorName(op, newOp))) + } + if newOp, ok := strictEqualityFlips[op]; ok { + out = append(out, newMutantSite(file, line, op, newOp, "strict_equality")) + } + // nullish_to_logical_or: `??` -> `||`. We don't emit the reverse + // because `||` doesn't distinguish null/undefined from falsy, so + // flipping `||` -> `??` would produce a tautological mutant on + // non-nullable code. + if op == "??" { + out = append(out, newMutantSite(file, line, "??", "||", "nullish_to_logical_or")) + } + return out +} + +// newMutantSite builds a MutantSite for a "from -> to" operator swap. +func newMutantSite(file string, line int, from, to, operator string) lang.MutantSite { + return lang.MutantSite{ + File: file, + Line: line, + Description: fmt.Sprintf("%s -> %s", from, to), + Operator: operator, + } +} + +// binaryOperatorName classifies a source/target operator pair into the +// canonical tier-1 operator name. The strict (===/!==) equality operators +// fold into negate_conditional for this classifier; the strict_equality +// operator is emitted as a SEPARATE mutant by binaryMutants. +func binaryOperatorName(from, to string) string { + if isBoundary(from) || isBoundary(to) { + return "conditional_boundary" + } + if isComparison(from) || isComparison(to) { + return "negate_conditional" + } + if isMath(from) || isMath(to) { + return "math_operator" + } + return "unknown" +} + +func isBoundary(op string) bool { + return op == ">" || op == ">=" || op == "<" || op == "<=" +} + +func isComparison(op string) bool { + return op == "==" || op == "!=" || op == "===" || op == "!==" +} + +func isMath(op string) bool { + return op == "+" || op == "-" || op == "*" || op == "/" +} + +// boolLiteralMutants flips true <-> false. tree-sitter-typescript exposes +// boolean literals as nodes of type "true" and "false" (whose Type() is +// literally that token). +func boolLiteralMutants(file string, line int, n *sitter.Node, _ []byte) []lang.MutantSite { + var flipped string + switch n.Type() { + case "true": + flipped = "false" + case "false": + flipped = "true" + default: + return nil + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: fmt.Sprintf("%s -> %s", n.Type(), flipped), + Operator: "boolean_substitution", + }} +} + +// updateMutants emits the incdec operator for `++` and `--` expressions. +// Tree-sitter models `x++` / `++x` / `x--` / `--x` as update_expression. +func updateMutants(file string, line int, n *sitter.Node, _ []byte) []lang.MutantSite { + opNode := n.ChildByFieldName("operator") + if opNode == nil { + return nil + } + op := opNode.Type() + flipped := "" + switch op { + case "++": + flipped = "--" + case "--": + flipped = "++" + default: + return nil + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: fmt.Sprintf("%s -> %s", op, flipped), + Operator: "incdec", + }} +} + +// returnMutants emits the return_value operator. TypeScript has both +// `null` and `undefined` as zero values; we use `null` when the return +// has a non-undefined expression, and `undefined` otherwise. An empty +// `return;` already returns undefined so there's nothing to mutate. +func returnMutants(file string, line int, n *sitter.Node, src []byte) []lang.MutantSite { + // return_statement has at most one named child — the returned value. + if n.NamedChildCount() == 0 { + return nil + } + value := n.NamedChild(0) + if value == nil { + return nil + } + // Choose the target zero value. If the current expression is literally + // `null`, swap to `undefined` so the mutant is non-equivalent. + target := "null" + if nodeText(value, src) == "null" { + target = "undefined" + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: fmt.Sprintf("replace return value with %s", target), + Operator: "return_value", + }} +} + +// ifMutants empties an if_statement's consequence (branch_removal). +func ifMutants(file string, line int, n *sitter.Node, _ []byte) []lang.MutantSite { + body := n.ChildByFieldName("consequence") + if body == nil { + return nil + } + // Only emit when the consequence actually has content (otherwise + // there's nothing to remove and the mutant is trivially equivalent). + if body.NamedChildCount() == 0 { + return nil + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: "remove if body", + Operator: "branch_removal", + }} +} + +// exprStmtMutants removes a bare call statement (statement_deletion). Only +// expression_statements whose payload is a call_expression qualify — bare +// assignments, let bindings, etc. are left alone because deleting them +// tends to produce un-killable dead-code mutants. +func exprStmtMutants(file string, line int, n *sitter.Node, _ []byte) []lang.MutantSite { + if n.NamedChildCount() == 0 { + return nil + } + payload := n.NamedChild(0) + if payload == nil || payload.Type() != "call_expression" { + return nil + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: "remove call statement", + Operator: "statement_deletion", + }} +} + +// optionalChainMutants emits the optional_chain_removal operator for +// `foo?.bar`. Tree-sitter models optional chains as member_expression +// nodes with an optional_chain child token (a literal `?.`). We detect +// the presence of that child and emit the mutant. +func optionalChainMutants(file string, line int, n *sitter.Node, src []byte) []lang.MutantSite { + if !hasOptionalChainToken(n, src) { + return nil + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: "?. -> .", + Operator: "optional_chain_removal", + }} +} + +// hasOptionalChainToken reports whether a member_expression carries the +// `?.` token between its object and property. Different grammar versions +// model this differently (anonymous child vs named `optional_chain`), so +// we look at the literal source text between the object and the property. +func hasOptionalChainToken(n *sitter.Node, src []byte) bool { + if hasOptionalChainChild(n) { + return true + } + _, ok := optionalChainTokenOffset(n, src) + return ok +} + +// hasOptionalChainChild checks the fast path: some grammar versions expose +// an explicit child whose Type() is literally "optional_chain" or "?.". +func hasOptionalChainChild(n *sitter.Node) bool { + for i := 0; i < int(n.ChildCount()); i++ { + c := n.Child(i) + if c == nil { + continue + } + if c.Type() == "optional_chain" || c.Type() == "?." { + return true + } + } + return false +} + +// optionalChainTokenOffset scans the raw bytes between a member_expression's +// object and property for the literal `?.` token and returns its absolute +// start offset. Used both for detection (hasOptionalChainToken) and for +// applying the mutation (applyOptionalChainRemoval) so the two stay in sync +// across grammar versions. +func optionalChainTokenOffset(n *sitter.Node, src []byte) (uint32, bool) { + obj := n.ChildByFieldName("object") + prop := n.ChildByFieldName("property") + if obj == nil || prop == nil { + return 0, false + } + start := obj.EndByte() + end := prop.StartByte() + if end <= start || int(end) > len(src) { + return 0, false + } + between := src[start:end] + for i := 0; i+1 < len(between); i++ { + if between[i] == '?' && between[i+1] == '.' { + return start + uint32(i), true + } + } + return 0, false +} diff --git a/internal/lang/tsanalyzer/mutation_generate_test.go b/internal/lang/tsanalyzer/mutation_generate_test.go new file mode 100644 index 0000000..036f9d8 --- /dev/null +++ b/internal/lang/tsanalyzer/mutation_generate_test.go @@ -0,0 +1,249 @@ +package tsanalyzer + +import ( + "math" + "path/filepath" + "testing" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" +) + +// writeAndGenerate is a small harness: write `src` to a temp .ts file, +// generate mutants over the entire file, and return them. +func writeAndGenerate(t *testing.T, src string, disabled map[int]bool) []lang.MutantSite { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, "a.ts") + if err := writeFile(path, []byte(src)); err != nil { + t.Fatal(err) + } + fc := diff.FileChange{ + Path: "a.ts", + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: math.MaxInt32}}, + } + mutants, err := mutantGeneratorImpl{}.GenerateMutants(path, fc, disabled) + if err != nil { + t.Fatal(err) + } + return mutants +} + +// collectOps returns the counts of operator names from a mutant list. +func collectOps(mutants []lang.MutantSite) map[string]int { + m := map[string]int{} + for _, x := range mutants { + m[x.Operator]++ + } + return m +} + +func TestGenerate_BinaryOps(t *testing.T) { + src := `function f(x: number): boolean { + return x > 0; +} +` + m := writeAndGenerate(t, src, nil) + ops := collectOps(m) + if ops["conditional_boundary"] == 0 { + t.Errorf("expected conditional_boundary, got %v", ops) + } +} + +func TestGenerate_EqualityAndStrict(t *testing.T) { + src := `function f(a: number, b: number): boolean { + return a === b; +} +` + m := writeAndGenerate(t, src, nil) + ops := collectOps(m) + // === gets TWO mutants: negate_conditional (flip to !==) and + // strict_equality (flip to ==). + if ops["negate_conditional"] == 0 { + t.Errorf("expected negate_conditional for ===, got %v", ops) + } + if ops["strict_equality"] == 0 { + t.Errorf("expected strict_equality for ===, got %v", ops) + } +} + +func TestGenerate_LooseEquality(t *testing.T) { + src := `function f(a: any, b: any): boolean { + return a == b; +} +` + m := writeAndGenerate(t, src, nil) + ops := collectOps(m) + if ops["negate_conditional"] == 0 { + t.Errorf("expected negate_conditional for ==, got %v", ops) + } + if ops["strict_equality"] == 0 { + t.Errorf("expected strict_equality for ==, got %v", ops) + } +} + +func TestGenerate_Math(t *testing.T) { + src := `function g(a: number, b: number): number { + return a + b; +} +` + m := writeAndGenerate(t, src, nil) + if collectOps(m)["math_operator"] == 0 { + t.Errorf("expected math_operator for +, got %v", collectOps(m)) + } +} + +func TestGenerate_BooleanLiteral(t *testing.T) { + src := `function g(): boolean { return true; } +` + m := writeAndGenerate(t, src, nil) + if collectOps(m)["boolean_substitution"] == 0 { + t.Errorf("expected boolean_substitution, got %v", collectOps(m)) + } +} + +func TestGenerate_IncDec(t *testing.T) { + src := `function g(): void { + let x = 0; + x++; +} +` + m := writeAndGenerate(t, src, nil) + if collectOps(m)["incdec"] == 0 { + t.Errorf("expected incdec, got %v", collectOps(m)) + } +} + +func TestGenerate_ReturnValue(t *testing.T) { + src := `function g(): number { + return 42; +} +` + m := writeAndGenerate(t, src, nil) + if collectOps(m)["return_value"] == 0 { + t.Errorf("expected return_value mutant, got %v", collectOps(m)) + } +} + +func TestGenerate_NullishToLogicalOr(t *testing.T) { + src := `function g(a: number | null, b: number): number { + return a ?? b; +} +` + m := writeAndGenerate(t, src, nil) + if collectOps(m)["nullish_to_logical_or"] == 0 { + t.Errorf("expected nullish_to_logical_or, got %v", collectOps(m)) + } +} + +func TestGenerate_OptionalChainRemoval(t *testing.T) { + src := `function g(x: { a?: number } | null): number | undefined { + return x?.a; +} +` + m := writeAndGenerate(t, src, nil) + if collectOps(m)["optional_chain_removal"] == 0 { + t.Errorf("expected optional_chain_removal, got %v", collectOps(m)) + } +} + +func TestGenerate_BranchRemovalAndStatementDeletion(t *testing.T) { + src := `function side(): void {} + +function g(x: number): void { + if (x > 0) { + side(); + } + side(); +} +` + m := writeAndGenerate(t, src, nil) + ops := collectOps(m) + if ops["branch_removal"] == 0 { + t.Errorf("expected branch_removal, got %v", ops) + } + if ops["statement_deletion"] == 0 { + t.Errorf("expected statement_deletion for bare call, got %v", ops) + } +} + +func TestGenerate_RespectsChangedRegion(t *testing.T) { + src := `function inRegion(x: number): boolean { return x > 0; } +function outOfRegion(x: number): boolean { return x > 0; } +` + dir := t.TempDir() + path := filepath.Join(dir, "a.ts") + if err := writeFile(path, []byte(src)); err != nil { + t.Fatal(err) + } + fc := diff.FileChange{ + Path: "a.ts", + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 1}}, + } + mutants, err := mutantGeneratorImpl{}.GenerateMutants(path, fc, nil) + if err != nil { + t.Fatal(err) + } + for _, m := range mutants { + if m.Line != 1 { + t.Errorf("out-of-region mutant at line %d: %+v", m.Line, m) + } + } +} + +func TestGenerate_RespectsDisabledLines(t *testing.T) { + src := `function g(a: number, b: number): boolean { + return a > b; +} +` + disabled := map[int]bool{2: true} + m := writeAndGenerate(t, src, disabled) + for _, x := range m { + if x.Line == 2 { + t.Errorf("mutant on disabled line 2: %+v", x) + } + } +} + +func TestGenerate_Deterministic(t *testing.T) { + src := `function g(a: number, b: number): boolean { + return a > b && b < 10; +} +` + dir := t.TempDir() + path := filepath.Join(dir, "a.ts") + if err := writeFile(path, []byte(src)); err != nil { + t.Fatal(err) + } + fc := diff.FileChange{Path: "a.ts", Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}} + first, _ := mutantGeneratorImpl{}.GenerateMutants(path, fc, nil) + second, _ := mutantGeneratorImpl{}.GenerateMutants(path, fc, nil) + if len(first) != len(second) { + t.Fatalf("lengths differ: %d vs %d", len(first), len(second)) + } + for i := range first { + if first[i] != second[i] { + t.Errorf("row %d differs: %+v vs %+v", i, first[i], second[i]) + } + } +} + +// TestGenerate_TSXFileProducesMutants smoke-tests that the generator +// works on a .tsx file (the parser picks up the tsx grammar). +func TestGenerate_TSXFileProducesMutants(t *testing.T) { + absPath, _ := filepath.Abs("testdata/component.tsx") + fc := diff.FileChange{ + Path: "testdata/component.tsx", + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: math.MaxInt32}}, + } + mutants, err := mutantGeneratorImpl{}.GenerateMutants(absPath, fc, nil) + if err != nil { + t.Fatal(err) + } + // The fixture's `if (props.name.length > 0)` produces at least one + // binary-comparison mutant (conditional_boundary or negate_conditional). + ops := collectOps(mutants) + if ops["conditional_boundary"] == 0 { + t.Errorf("expected conditional_boundary mutant on .tsx, got %v", ops) + } +} diff --git a/internal/lang/tsanalyzer/parse.go b/internal/lang/tsanalyzer/parse.go new file mode 100644 index 0000000..46e738a --- /dev/null +++ b/internal/lang/tsanalyzer/parse.go @@ -0,0 +1,157 @@ +// Package tsanalyzer implements the lang.Language interface for TypeScript +// (and .tsx). It is blank-imported from cmd/diffguard/main.go so TypeScript +// gets registered at process start. +// +// One file per concern, mirroring the Go and Rust analyzers: +// - tsanalyzer.go -- Language + init()/Register + detector +// - parse.go -- tree-sitter setup, CST helpers, grammar pick +// - sizes.go -- FunctionExtractor +// - complexity.go -- ComplexityCalculator + ComplexityScorer +// - deps.go -- ImportResolver +// - mutation_generate.go-- MutantGenerator +// - mutation_apply.go -- MutantApplier +// - mutation_annotate.go-- AnnotationScanner +// - testrunner.go -- TestRunner (wraps vitest/jest/npm test) +// +// Unlike the Go analyzer, TypeScript requires two tree-sitter grammars: +// one for `.ts` (typescript) and one for `.tsx` (tsx). The two grammars +// are nearly identical for our purposes (node kinds like `if_statement`, +// `function_declaration`, etc. are shared) but the parser input has to +// match the extension — the tsx grammar accepts JSX syntax, the plain +// typescript grammar rejects it. +package tsanalyzer + +import ( + "context" + "os" + "path/filepath" + "strings" + "sync" + + sitter "github.com/smacker/go-tree-sitter" + "github.com/smacker/go-tree-sitter/typescript/tsx" + "github.com/smacker/go-tree-sitter/typescript/typescript" +) + +// tsLang / tsxLang are the cached tree-sitter grammar handles. Building +// them crosses cgo so we only do it once each. +var ( + tsLangOnce sync.Once + tsLang *sitter.Language + tsxLangOnce sync.Once + tsxLangLang *sitter.Language +) + +// typescriptLanguage returns the tree-sitter grammar for `.ts` source. +func typescriptLanguage() *sitter.Language { + tsLangOnce.Do(func() { + tsLang = typescript.GetLanguage() + }) + return tsLang +} + +// tsxLanguage returns the tree-sitter grammar for `.tsx` source. +func tsxLanguage() *sitter.Language { + tsxLangOnce.Do(func() { + tsxLangLang = tsx.GetLanguage() + }) + return tsxLangLang +} + +// grammarFor returns the grammar that matches the given file's extension. +// `.tsx` uses the tsx grammar (accepts JSX); everything else (including +// `.ts`, or when the extension isn't obvious) uses the plain typescript +// grammar. Callers are expected to have already filtered to TypeScript +// extensions upstream so the default branch is rare. +func grammarFor(path string) *sitter.Language { + ext := strings.ToLower(filepath.Ext(path)) + if ext == ".tsx" { + return tsxLanguage() + } + return typescriptLanguage() +} + +// parseFile reads absPath from disk and returns the parsed tree plus the +// source bytes, picking the grammar by file extension. Callers get back +// (nil, nil, err) on read error. +func parseFile(absPath string) (*sitter.Tree, []byte, error) { + src, err := os.ReadFile(absPath) + if err != nil { + return nil, nil, err + } + tree, err := parseBytesAs(src, grammarFor(absPath)) + if err != nil { + return nil, nil, err + } + return tree, src, nil +} + +// parseBytes parses src with the plain TypeScript grammar. Convenience +// wrapper used by tests that don't care about JSX. +func parseBytes(src []byte) (*sitter.Tree, error) { + return parseBytesAs(src, typescriptLanguage()) +} + +// parseBytesAs parses src with the given grammar. The returned *sitter.Tree +// must have its Close() called to release the underlying C allocation. +func parseBytesAs(src []byte, grammar *sitter.Language) (*sitter.Tree, error) { + parser := sitter.NewParser() + parser.SetLanguage(grammar) + return parser.ParseCtx(context.Background(), nil, src) +} + +// walk invokes fn on every node in the subtree rooted at n. Plain +// depth-first pre-order, identical to the rust analyzer's walk. +func walk(n *sitter.Node, fn func(*sitter.Node) bool) { + if n == nil { + return + } + if !fn(n) { + return + } + count := int(n.ChildCount()) + for i := 0; i < count; i++ { + walk(n.Child(i), fn) + } +} + +// nodeLine returns the 1-based start line of n. +func nodeLine(n *sitter.Node) int { + return int(n.StartPoint().Row) + 1 +} + +// nodeEndLine returns the 1-based end line of n. Tree-sitter reports +// EndPoint at the position one past the last byte, so a function whose +// closing brace is the last char on line 10 has EndPoint at (11, 0) and we +// subtract 1 in that case to match diffguard's inclusive convention. +func nodeEndLine(n *sitter.Node) int { + end := n.EndPoint() + if end.Column == 0 && end.Row > 0 { + return int(end.Row) + } + return int(end.Row) + 1 +} + +// nodeText returns the byte slice of src covering n. +func nodeText(n *sitter.Node, src []byte) string { + return string(src[n.StartByte():n.EndByte()]) +} + +// countLines returns the number of source lines in src. Same rules as the +// other analyzers: empty file is 0, a file without a trailing newline still +// counts its final line. +func countLines(src []byte) int { + if len(src) == 0 { + return 0 + } + count := 0 + for _, b := range src { + if b == '\n' { + count++ + } + } + if src[len(src)-1] != '\n' { + count++ + } + return count +} diff --git a/internal/lang/tsanalyzer/sizes.go b/internal/lang/tsanalyzer/sizes.go new file mode 100644 index 0000000..ecea9b6 --- /dev/null +++ b/internal/lang/tsanalyzer/sizes.go @@ -0,0 +1,195 @@ +package tsanalyzer + +import ( + "sort" + + sitter "github.com/smacker/go-tree-sitter" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" +) + +// sizesImpl implements lang.FunctionExtractor for TypeScript via +// tree-sitter. Both the `.ts` and `.tsx` grammars share the function node +// kinds we care about (function_declaration, method_definition, +// arrow_function, generator_function), so the walk is grammar-agnostic — +// we only switch grammars at parse time based on the file extension. +type sizesImpl struct{} + +// ExtractFunctions parses absPath and returns functions overlapping the +// diff's changed regions plus the overall file size. A parse failure is +// treated as "skip this file" to match the Go and Rust analyzers' +// (nil, nil, nil) convention. +func (sizesImpl) ExtractFunctions(absPath string, fc diff.FileChange) ([]lang.FunctionSize, *lang.FileSize, error) { + tree, src, err := parseFile(absPath) + if err != nil { + return nil, nil, nil + } + defer tree.Close() + + fns := collectFunctions(tree.RootNode(), src) + fileSize := &lang.FileSize{Path: fc.Path, Lines: countLines(src)} + + var results []lang.FunctionSize + for _, fn := range fns { + if !fc.OverlapsRange(fn.startLine, fn.endLine) { + continue + } + results = append(results, lang.FunctionSize{ + FunctionInfo: lang.FunctionInfo{ + File: fc.Path, + Line: fn.startLine, + EndLine: fn.endLine, + Name: fn.name, + }, + Lines: fn.endLine - fn.startLine + 1, + }) + } + + sort.SliceStable(results, func(i, j int) bool { + if results[i].Line != results[j].Line { + return results[i].Line < results[j].Line + } + return results[i].Name < results[j].Name + }) + return results, fileSize, nil +} + +// tsFunction is the internal record produced by the extractor. Wider than +// FunctionSize so the complexity analyzer can walk the body without +// re-parsing. +type tsFunction struct { + name string + startLine int + endLine int + body *sitter.Node // the body/statement_block + node *sitter.Node // the outer function-ish node +} + +// collectFunctions walks the CST and returns every declared function form +// the spec cares about: +// +// - function_declaration: classic `function foo() {}` or `function* gen() {}` +// - generator_function_declaration: `function* gen() {}` (some grammars) +// - method_definition: `class X { foo() {} }` — named after its class +// - variable_declarator with an arrow_function or function expression +// initializer: `const foo = () => ...` or `const foo = function() {}` +// +// Nested functions are separate entries (matching Rust/Go). +func collectFunctions(root *sitter.Node, src []byte) []tsFunction { + var fns []tsFunction + + walk(root, func(n *sitter.Node) bool { + switch n.Type() { + case "function_declaration", "generator_function_declaration": + fns = appendFunction(fns, n, src, standaloneName(n, src)) + case "method_definition": + fns = appendFunction(fns, n, src, methodName(n, src)) + case "variable_declarator": + // const/let/var NAME = (arrow|function) + if fn := variableInitializedFn(n, src); fn != nil { + fns = append(fns, *fn) + } + } + return true + }) + return fns +} + +// appendFunction pushes a function record with startLine/endLine/body +// resolved. Returns nil if the node has no resolvable body so callers +// don't end up with partial records. +func appendFunction(acc []tsFunction, n *sitter.Node, src []byte, name string) []tsFunction { + if name == "" { + return acc + } + body := n.ChildByFieldName("body") + return append(acc, tsFunction{ + name: name, + startLine: nodeLine(n), + endLine: nodeEndLine(n), + body: body, + node: n, + }) +} + +// standaloneName returns the function's name for a function_declaration or +// generator_function_declaration. tree-sitter exposes the name via a +// "name" field. +func standaloneName(n *sitter.Node, src []byte) string { + if name := n.ChildByFieldName("name"); name != nil { + return nodeText(name, src) + } + return "" +} + +// methodName returns `ClassName.method` for a method_definition. The +// grammar puts the enclosing class_declaration/class a few levels up; we +// walk ancestors until we find one and take its name field. If there's no +// class (rare — e.g. an object literal method), we fall back to the bare +// method name so the function is still tracked. +func methodName(n *sitter.Node, src []byte) string { + name := n.ChildByFieldName("name") + if name == nil { + return "" + } + methodBase := nodeText(name, src) + + // Walk up for the enclosing class name. Stop if we hit a function + // boundary first (e.g. a method defined inside a nested function — we + // don't prefix it with the outer class then). + for parent := n.Parent(); parent != nil; parent = parent.Parent() { + switch parent.Type() { + case "class_declaration", "class", "abstract_class_declaration": + if cn := parent.ChildByFieldName("name"); cn != nil { + return nodeText(cn, src) + "." + methodBase + } + return methodBase + case "function_declaration", "arrow_function", "function_expression", + "generator_function", "generator_function_declaration", + "method_definition": + // Crossed a function boundary with no class — surface just the + // method base name. + if parent == n { + continue + } + return methodBase + } + } + return methodBase +} + +// variableInitializedFn returns a tsFunction if the variable_declarator's +// value is a function-like initializer (arrow or function expression / +// generator). Name is taken from the declarator's "name" field, which is +// an identifier for the common `const x = () => {}` pattern. +// +// Destructuring patterns (`const {a} = ...`) don't count — the "name" field +// of the declarator is a pattern rather than an identifier. We only emit +// when the name resolves to a plain identifier. +func variableInitializedFn(n *sitter.Node, src []byte) *tsFunction { + nameNode := n.ChildByFieldName("name") + if nameNode == nil || nameNode.Type() != "identifier" { + return nil + } + value := n.ChildByFieldName("value") + if value == nil { + return nil + } + // Grammars differ slightly on the node kind for function expressions; + // we accept the canonical set covering `() => {}`, `function() {}`, + // `function* () {}`, and async variants. + switch value.Type() { + case "arrow_function", "function_expression", "function", + "generator_function": + body := value.ChildByFieldName("body") + return &tsFunction{ + name: nodeText(nameNode, src), + startLine: nodeLine(value), + endLine: nodeEndLine(value), + body: body, + node: value, + } + } + return nil +} diff --git a/internal/lang/tsanalyzer/sizes_test.go b/internal/lang/tsanalyzer/sizes_test.go new file mode 100644 index 0000000..2c1123d --- /dev/null +++ b/internal/lang/tsanalyzer/sizes_test.go @@ -0,0 +1,183 @@ +package tsanalyzer + +import ( + "math" + "path/filepath" + "sort" + "testing" + + "github.com/0xPolygon/diffguard/internal/diff" +) + +// fullRegion returns a FileChange covering every line so tests can assert +// against every function in a fixture without threading line numbers. +func fullRegion(path string) diff.FileChange { + return diff.FileChange{ + Path: path, + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: math.MaxInt32}}, + } +} + +func TestExtractFunctions_AllForms(t *testing.T) { + absPath, err := filepath.Abs("testdata/functions.ts") + if err != nil { + t.Fatal(err) + } + s := sizesImpl{} + fns, fsize, err := s.ExtractFunctions(absPath, fullRegion("testdata/functions.ts")) + if err != nil { + t.Fatalf("ExtractFunctions: %v", err) + } + if fsize == nil { + t.Fatal("expected non-nil file size") + } + if fsize.Lines == 0 { + t.Error("file size reports zero lines") + } + + names := make([]string, 0, len(fns)) + for _, fn := range fns { + names = append(names, fn.Name) + } + sort.Strings(names) + + // The fixture declares: standalone, arrowConst (arrow assigned to + // const), fnExpr (function expression assigned to const), the Counter + // class (constructor + increment + make + reset), the nested + // arrow inside increment (as its own bare name since it's an + // arrow assigned to const), and the gen generator. + mustHave := []string{ + "standalone", + "arrowConst", + "fnExpr", + "Counter.constructor", + "Counter.increment", + "Counter.make", + "Counter.reset", + "nestedHelper", + "gen", + } + set := map[string]bool{} + for _, n := range names { + set[n] = true + } + for _, want := range mustHave { + if !set[want] { + t.Errorf("missing expected function %q (got %v)", want, names) + } + } +} + +func TestExtractFunctions_LineRanges(t *testing.T) { + absPath, _ := filepath.Abs("testdata/functions.ts") + fns, _, err := sizesImpl{}.ExtractFunctions(absPath, fullRegion("testdata/functions.ts")) + if err != nil { + t.Fatal(err) + } + for _, fn := range fns { + if fn.Line <= 0 { + t.Errorf("%s: Line = %d, want > 0 (1-based)", fn.Name, fn.Line) + } + if fn.EndLine < fn.Line { + t.Errorf("%s: EndLine %d < Line %d", fn.Name, fn.EndLine, fn.Line) + } + if fn.Lines != fn.EndLine-fn.Line+1 { + t.Errorf("%s: Lines = %d, want %d", fn.Name, fn.Lines, fn.EndLine-fn.Line+1) + } + } +} + +func TestExtractFunctions_FilterToChangedRegion(t *testing.T) { + absPath, _ := filepath.Abs("testdata/functions.ts") + + // Narrow region that only covers the first function (standalone) + // — around lines 6-8 in the fixture. + fc := diff.FileChange{ + Path: "testdata/functions.ts", + Regions: []diff.ChangedRegion{{StartLine: 6, EndLine: 8}}, + } + fns, _, err := sizesImpl{}.ExtractFunctions(absPath, fc) + if err != nil { + t.Fatal(err) + } + foundStandalone := false + for _, fn := range fns { + if fn.Name == "standalone" { + foundStandalone = true + } + if fn.Name == "Counter.reset" || fn.Name == "gen" { + t.Errorf("unexpected function %q in narrow region", fn.Name) + } + } + if !foundStandalone { + t.Errorf("expected standalone in narrow region") + } +} + +func TestExtractFunctions_EmptyFile(t *testing.T) { + dir := t.TempDir() + empty := filepath.Join(dir, "empty.ts") + if err := writeFile(empty, []byte("")); err != nil { + t.Fatal(err) + } + fns, fsize, err := sizesImpl{}.ExtractFunctions(empty, fullRegion("empty.ts")) + if err != nil { + t.Fatalf("ExtractFunctions: %v", err) + } + if len(fns) != 0 { + t.Errorf("empty file: got %d fns, want 0", len(fns)) + } + if fsize == nil { + t.Fatal("expected non-nil file size for empty file") + } + if fsize.Lines != 0 { + t.Errorf("empty file: Lines = %d, want 0", fsize.Lines) + } +} + +// TestExtractFunctions_TSXGrammar exercises the .tsx grammar path. The +// fixture contains JSX that the plain typescript grammar would reject; +// a successful extraction here proves parse.go routes .tsx to the tsx +// grammar. +func TestExtractFunctions_TSXGrammar(t *testing.T) { + absPath, err := filepath.Abs("testdata/component.tsx") + if err != nil { + t.Fatal(err) + } + fns, fsize, err := sizesImpl{}.ExtractFunctions(absPath, fullRegion("testdata/component.tsx")) + if err != nil { + t.Fatalf("ExtractFunctions on .tsx: %v", err) + } + if fsize == nil || fsize.Lines == 0 { + t.Error("expected non-empty file size for .tsx fixture") + } + names := map[string]bool{} + for _, fn := range fns { + names[fn.Name] = true + } + for _, want := range []string{"Hello", "Count"} { + if !names[want] { + t.Errorf("missing %q in .tsx extraction, got %v", want, names) + } + } +} + +func TestCountLines(t *testing.T) { + cases := []struct { + in string + want int + }{ + {"", 0}, + {"x", 1}, + {"x\n", 1}, + {"x\ny", 2}, + {"x\ny\n", 2}, + {"\n", 1}, + } + for _, tc := range cases { + got := countLines([]byte(tc.in)) + if got != tc.want { + t.Errorf("countLines(%q) = %d, want %d", tc.in, got, tc.want) + } + } +} diff --git a/internal/lang/tsanalyzer/testdata/complexity.ts b/internal/lang/tsanalyzer/testdata/complexity.ts new file mode 100644 index 0000000..17df814 --- /dev/null +++ b/internal/lang/tsanalyzer/testdata/complexity.ts @@ -0,0 +1,94 @@ +// Fixture for the TypeScript cognitive-complexity scorer. Each function +// below has a documented expected score so the test can assert precise +// numbers. + +// Empty function: no control flow, score 0. +export function empty(): void {} + +// Single if: +1 base, 0 nesting, 0 logical. +export function oneIf(x: number): number { + if (x > 0) { + return 1; + } + return 0; +} + +// if/else: +1 for the if, +1 for the else branch = 2. +export function ifElse(x: number): number { + if (x > 0) { + return 1; + } else { + return 0; + } +} + +// switch with 3 cases (all with content) + default: +1 for the switch, +// +3 for non-empty cases. Default counts only if it has content — it does +// here, so +1. Total = 1 + 3 + 1 = 5. +export function sw(x: number): number { + switch (x) { + case 1: { + return 10; + } + case 2: { + return 20; + } + case 3: { + return 30; + } + default: { + return 0; + } + } +} + +// try/catch: +1 for the try, +1 for the catch = 2. +export function tryCatch(): void { + try { + doSomething(); + } catch (e) { + handle(e); + } +} + +// Ternary: +1. Nested ternary: +1 base + 1 nesting = 2 for the inner. +// Total = 1 + 2 = 3. +export function ternary(x: number): number { + return x > 0 ? (x > 10 ? 100 : 50) : 0; +} + +// Logical chain: if +1, && run = +1, then switch to || = +1. Total = 3. +export function logical(a: boolean, b: boolean, c: boolean): boolean { + if (a && b || c) { + return true; + } + return false; +} + +// Optional chaining + nullish coalescing — MUST NOT count toward +// complexity per the spec. `await`/`async` alone also must not count. +// The only control flow is the `if`, so score = 1. +export async function notCounted(x: { v?: number } | null): Promise { + const val = x?.v ?? 0; + if (val > 0) { + await someAsync(); + return 1; + } + return 0; +} + +// Promise-chain .catch: +1 per `.catch(...)` promise-chain call. No other +// control flow in the body, so score = 1. +export function promiseCatch(): Promise { + return someAsync().catch((e) => 0); +} + +// Stub helpers so the fixture type-checks in users' IDEs. Tree-sitter +// doesn't care but keeping them real functions lets us verify they're +// treated as separate entries (complexity 0 for the declaration itself, +// but the calculator ignores them — their bodies contain no control flow). +function doSomething(): void {} +function handle(_e: unknown): void {} +function someAsync(): Promise { + return Promise.resolve(1); +} diff --git a/internal/lang/tsanalyzer/testdata/component.tsx b/internal/lang/tsanalyzer/testdata/component.tsx new file mode 100644 index 0000000..04c645d --- /dev/null +++ b/internal/lang/tsanalyzer/testdata/component.tsx @@ -0,0 +1,21 @@ +// Minimal .tsx fixture to exercise the tsx grammar path. The analyzer +// picks the tsx grammar based on the extension; this file uses JSX that +// the plain typescript grammar rejects, so a successful parse here proves +// the grammar routing works. + +import * as React from "react"; + +export interface HelloProps { + name: string; +} + +export function Hello(props: HelloProps): JSX.Element { + if (props.name.length > 0) { + return
Hello, {props.name}!
; + } + return No name.; +} + +export const Count = (props: { n: number }) => { + return {props.n}; +}; diff --git a/internal/lang/tsanalyzer/testdata/functions.ts b/internal/lang/tsanalyzer/testdata/functions.ts new file mode 100644 index 0000000..a1e9ea5 --- /dev/null +++ b/internal/lang/tsanalyzer/testdata/functions.ts @@ -0,0 +1,43 @@ +// Fixture: a small TypeScript file covering every function form the +// extractor should handle: standalone `function`, class methods (including +// static + private), arrow functions assigned to const, function +// expressions assigned to const, and a nested arrow. + +export function standalone(x: number): number { + return x + 1; +} + +export const arrowConst = (x: number): number => { + return x * 2; +}; + +export const fnExpr = function (x: number): number { + return x - 1; +}; + +export class Counter { + private n: number; + + constructor() { + this.n = 0; + } + + public increment(): number { + const nestedHelper = (x: number) => x + 1; + this.n = nestedHelper(this.n); + return this.n; + } + + static make(): Counter { + return new Counter(); + } + + private reset(): void { + this.n = 0; + } +} + +export function* gen(): Generator { + yield 1; + yield 2; +} diff --git a/internal/lang/tsanalyzer/testrunner.go b/internal/lang/tsanalyzer/testrunner.go new file mode 100644 index 0000000..9b3b7d5 --- /dev/null +++ b/internal/lang/tsanalyzer/testrunner.go @@ -0,0 +1,195 @@ +package tsanalyzer + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "sync" + "time" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +// cancelWaitDelay bounds how long cmd.Wait() can block after the context +// is canceled. Process-group kill (see configureProcessGroup) should +// reap descendants immediately on unix; this is a cross-platform safety +// net so pipes inherited by any orphan are force-closed. +const cancelWaitDelay = 2 * time.Second + +// testRunnerImpl implements lang.TestRunner for TypeScript using the +// project's configured test runner (vitest, jest, or `npm test`). Same +// temp-copy isolation as the Rust analyzer: +// +// 1. Per-file mutex so concurrent mutants on the same file serialize. +// 2. Back up original bytes, swap mutant in place, run tests, restore. +// 3. Timeout via context.WithTimeout. +// +// Test command selection is driven by package.json devDependencies. If +// neither vitest nor jest appears, we fall back to `npm test`. +type testRunnerImpl struct { + // cmd / extraArgs override hooks used by tests. Normal production runs + // leave them empty and buildCommand derives argv from package.json. + cmd string + extraArgs []string + + mu sync.Mutex + locks map[string]*sync.Mutex +} + +// newTestRunner builds a fresh runner. Fields are filled at Run time from +// the repo under test; tests construct their own via the fakeRunner helper +// in testrunner_test.go. +func newTestRunner() *testRunnerImpl { + return &testRunnerImpl{} +} + +// fileLock returns the per-file mutex for the given path, lazily creating +// the entry on first access. +func (r *testRunnerImpl) fileLock(path string) *sync.Mutex { + r.mu.Lock() + defer r.mu.Unlock() + if r.locks == nil { + r.locks = map[string]*sync.Mutex{} + } + m, ok := r.locks[path] + if !ok { + m = &sync.Mutex{} + r.locks[path] = m + } + return m +} + +// RunTest implements the lang.TestRunner contract. Returning (true, ..., +// nil) signals killed; (false, ..., nil) signals survived; (false, "", err) +// signals a runner failure. +func (r *testRunnerImpl) RunTest(cfg lang.TestRunConfig) (bool, string, error) { + lock := r.fileLock(cfg.OriginalFile) + lock.Lock() + defer lock.Unlock() + + mutantBytes, err := os.ReadFile(cfg.MutantFile) + if err != nil { + return false, "", fmt.Errorf("reading mutant file: %w", err) + } + originalBytes, err := os.ReadFile(cfg.OriginalFile) + if err != nil { + return false, "", fmt.Errorf("reading original file: %w", err) + } + + // Defer restore BEFORE writing so a panic between write and run can't + // leave corrupt source behind. + defer func() { _ = os.WriteFile(cfg.OriginalFile, originalBytes, 0644) }() + + if err := os.WriteFile(cfg.OriginalFile, mutantBytes, 0644); err != nil { + return false, "", fmt.Errorf("writing mutant over original: %w", err) + } + + timeout := cfg.Timeout + if timeout <= 0 { + timeout = defaultTSTestTimeout + } + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + killed, output := r.runCommand(ctx, cfg) + return killed, output, nil +} + +// runCommand spawns the test runner under ctx, captures combined stdout + +// stderr, and reports whether the mutant was killed. A non-zero exit or a +// context-deadline cancellation both count as killed; only a clean exit +// means the mutant survived. +func (r *testRunnerImpl) runCommand(ctx context.Context, cfg lang.TestRunConfig) (bool, string) { + cmdName, args := r.buildCommand(cfg) + cmd := exec.CommandContext(ctx, cmdName, args...) + cmd.Dir = cfg.RepoPath + // CI=true suppresses interactive prompts from jest/vitest. + cmd.Env = append(os.Environ(), "CI=true") + configureProcessGroup(cmd) + cmd.WaitDelay = cancelWaitDelay + var combined bytes.Buffer + cmd.Stdout = &combined + cmd.Stderr = &combined + + runErr := cmd.Run() + output := combined.String() + if ctx.Err() == context.DeadlineExceeded || runErr != nil { + return true, output + } + return false, output +} + +// buildCommand returns the argv to execute for this RunTest call. +// +// Precedence: +// +// 1. If the runner has a hard-coded cmd/extraArgs (tests), use them. +// 2. Detect the configured runner by reading package.json: +// vitest > jest > `npm test`. +// 3. Honor TestPattern by appending the runner's pattern flag. +func (r *testRunnerImpl) buildCommand(cfg lang.TestRunConfig) (string, []string) { + if r.cmd != "" { + return r.cmd, append([]string(nil), r.extraArgs...) + } + runner := detectTSRunner(cfg.RepoPath) + switch runner { + case "vitest": + args := []string{"vitest", "run"} + if cfg.TestPattern != "" { + args = append(args, "-t", cfg.TestPattern) + } + return "npx", args + case "jest": + args := []string{"jest"} + if cfg.TestPattern != "" { + args = append(args, "--testNamePattern", cfg.TestPattern) + } + return "npx", args + } + // Fall back: plain `npm test`. Pattern handling isn't portable here, + // so we just skip it and hope the suite is fast enough. + return "npm", []string{"test"} +} + +// detectTSRunner reads package.json's devDependencies / dependencies and +// returns "vitest", "jest", or "" for fall-back to npm test. The choice +// prefers vitest over jest per the design doc. +func detectTSRunner(repoPath string) string { + data, err := os.ReadFile(filepath.Join(repoPath, "package.json")) + if err != nil { + return "" + } + var pkg struct { + DevDependencies map[string]string `json:"devDependencies"` + Dependencies map[string]string `json:"dependencies"` + } + if err := json.Unmarshal(data, &pkg); err != nil { + return "" + } + has := func(name string) bool { + if _, ok := pkg.DevDependencies[name]; ok { + return true + } + _, ok := pkg.Dependencies[name] + return ok + } + if has("vitest") { + return "vitest" + } + if has("jest") || has("@jest/core") { + return "jest" + } + return "" +} + +// tsTestArgs is exposed to tests so they can assert the argv shape that +// would be sent to the detected runner when no overrides are in play. +func tsTestArgs(repoPath string, cfg lang.TestRunConfig) (string, []string) { + r := &testRunnerImpl{} + cfg.RepoPath = repoPath + return r.buildCommand(cfg) +} diff --git a/internal/lang/tsanalyzer/testrunner_other.go b/internal/lang/tsanalyzer/testrunner_other.go new file mode 100644 index 0000000..6c963f3 --- /dev/null +++ b/internal/lang/tsanalyzer/testrunner_other.go @@ -0,0 +1,10 @@ +//go:build !unix + +package tsanalyzer + +import "os/exec" + +// configureProcessGroup is a no-op on non-unix platforms. WaitDelay in +// testrunner.go still provides an upper bound on how long cmd.Wait() can +// block after context cancellation. +func configureProcessGroup(cmd *exec.Cmd) {} diff --git a/internal/lang/tsanalyzer/testrunner_test.go b/internal/lang/tsanalyzer/testrunner_test.go new file mode 100644 index 0000000..896aa8f --- /dev/null +++ b/internal/lang/tsanalyzer/testrunner_test.go @@ -0,0 +1,298 @@ +package tsanalyzer + +import ( + "os" + "path/filepath" + "strings" + "sync" + "testing" + "time" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +// fakeRunner returns a runner that invokes `/bin/sh -c