diff --git a/Makefile b/Makefile index db3bcf9..6260419 100644 --- a/Makefile +++ b/Makefile @@ -26,6 +26,7 @@ .PHONY: aarch64 aarch64-debug aarch64-samples aarch64-tests test-aarch64 test-aarch64-vm .PHONY: vm-x86 vm-aarch64 vm-riscv64 vm-test-all .PHONY: deb deb-x86 +.PHONY: analysis analysis-quick analysis-standard analysis-deep .PHONY: dev shell check eval # Default target @@ -75,6 +76,12 @@ help: @echo " make vm-riscv64 Build RISC-V MicroVM -> result-vm-riscv64/" @echo " make vm-test-all Run full VM lifecycle tests (all architectures)" @echo "" + @echo "=== Static Analysis ===" + @echo " make analysis Run quick static analysis (alias for analysis-quick)" + @echo " make analysis-quick Run clang-tidy + cppcheck" + @echo " make analysis-standard Run + flawfinder, clang-analyzer, gcc-warnings" + @echo " make analysis-deep Run all 8 tools including gcc-analyzer, semgrep, sanitizers" + @echo "" @echo "=== Packaging ===" @echo " make deb Build Debian package -> result-deb/" @echo "" @@ -280,6 +287,33 @@ deb: deb-x86: deb +# ============================================================================= +# Static Analysis +# ============================================================================= + +# Quick analysis: clang-tidy + cppcheck +analysis: analysis-quick + +analysis-quick: + @echo "Running quick static analysis (clang-tidy + cppcheck)..." + nix build .#analysis-quick -o result-analysis-quick + @echo "" + @cat result-analysis-quick/summary.txt + +# Standard analysis: + flawfinder, clang-analyzer, gcc-warnings +analysis-standard: + @echo "Running standard static analysis..." + nix build .#analysis-standard -o result-analysis-standard + @echo "" + @cat result-analysis-standard/summary.txt + +# Deep analysis: all 8 tools +analysis-deep: + @echo "Running deep static analysis (all tools)..." + nix build .#analysis-deep -o result-analysis-deep + @echo "" + @cat result-analysis-deep/summary.txt + # ============================================================================= # Development # ============================================================================= @@ -313,6 +347,10 @@ eval: nix eval .#xdp2-debug-aarch64 --apply 'x: x.name' 2>/dev/null && echo " xdp2-debug-aarch64: OK" || echo " xdp2-debug-aarch64: FAIL" nix eval .#prebuilt-samples-aarch64 --apply 'x: x.name' 2>/dev/null && echo " prebuilt-samples-aarch64: OK" || echo " prebuilt-samples-aarch64: FAIL" nix eval .#aarch64-tests.all --apply 'x: x.name' 2>/dev/null && echo " aarch64-tests.all: OK" || echo " aarch64-tests.all: FAIL" + @echo "Analysis:" + nix eval .#analysis-quick --apply 'x: x.name' 2>/dev/null && echo " analysis-quick: OK" || echo " analysis-quick: FAIL" + nix eval .#analysis-standard --apply 'x: x.name' 2>/dev/null && echo " analysis-standard: OK" || echo " analysis-standard: FAIL" + nix eval .#analysis-deep --apply 'x: x.name' 2>/dev/null && echo " analysis-deep: OK" || echo " analysis-deep: FAIL" @echo "" @echo "All evaluations completed." diff --git a/flake.nix b/flake.nix index b459f85..9203270 100644 --- a/flake.nix +++ b/flake.nix @@ -111,6 +111,15 @@ xdp2 = xdp2-debug; # Tests use debug build with assertions }; + # ===================================================================== + # Static Analysis Infrastructure + # Ported from reference implementation, adapted for C/Make build system + # ===================================================================== + analysis = import ./nix/analysis { + inherit pkgs lib llvmConfig packagesModule; + src = ./.; + }; + # ===================================================================== # Phase 1: Packaging (x86_64 .deb only) # See: documentation/nix/microvm-implementation-phase1.md @@ -172,6 +181,24 @@ # Usage: nix run .#run-sample-tests inherit run-sample-tests; + # =================================================================== + # Static Analysis + # Usage: nix build .#analysis-quick + # nix build .#analysis-standard + # nix build .#analysis-deep + # =================================================================== + analysis-quick = analysis.quick; + analysis-standard = analysis.standard; + analysis-deep = analysis.deep; + analysis-clang-tidy = analysis.clang-tidy; + analysis-cppcheck = analysis.cppcheck; + analysis-flawfinder = analysis.flawfinder; + analysis-clang-analyzer = analysis.clang-analyzer; + analysis-gcc-warnings = analysis.gcc-warnings; + analysis-gcc-analyzer = analysis.gcc-analyzer; + analysis-semgrep = analysis.semgrep; + analysis-sanitizers = analysis.sanitizers; + # =================================================================== # Phase 1: Packaging outputs (x86_64 .deb only) # See: documentation/nix/microvm-implementation-phase1.md diff --git a/nix/analysis/clang-analyzer.nix b/nix/analysis/clang-analyzer.nix new file mode 100644 index 0000000..19b59f3 --- /dev/null +++ b/nix/analysis/clang-analyzer.nix @@ -0,0 +1,197 @@ +# nix/analysis/clang-analyzer.nix +# +# Clang Static Analyzer (scan-build) for XDP2's C codebase. +# +# Adapted from the reference C++ implementation: +# - Uses C-specific checkers (core.*, security.*, unix.*, alpha.security.*) +# - No C++ checkers (cplusplus.*, alpha.cplusplus.*) +# - Builds via Make instead of Meson+Ninja +# + +{ + lib, + pkgs, + src, + llvmConfig, + nativeBuildInputs, + buildInputs, +}: + +let + llvmPackages = llvmConfig.llvmPackages; + hostPkgs = pkgs.buildPackages; + hostCC = hostPkgs.stdenv.cc; + hostPython = hostPkgs.python3.withPackages (p: [ p.scapy ]); + + host-gcc = hostPkgs.writeShellScript "host-gcc" '' + exec ${hostCC}/bin/gcc \ + -I${hostPkgs.boost.dev}/include \ + -I${hostPkgs.libpcap}/include \ + -L${hostPkgs.boost}/lib \ + -L${hostPkgs.libpcap.lib}/lib \ + "$@" + ''; + + host-gxx = hostPkgs.writeShellScript "host-g++" '' + exec ${hostCC}/bin/g++ \ + -I${hostPkgs.boost.dev}/include \ + -I${hostPkgs.libpcap}/include \ + -I${hostPython}/include/python3.13 \ + -L${hostPkgs.boost}/lib \ + -L${hostPkgs.libpcap.lib}/lib \ + -L${hostPython}/lib \ + -Wl,-rpath,${hostPython}/lib \ + "$@" + ''; + + scanBuildCheckers = lib.concatStringsSep " " [ + "-enable-checker core.NullDereference" + "-enable-checker core.DivideZero" + "-enable-checker core.UndefinedBinaryOperatorResult" + "-enable-checker core.uninitialized.Assign" + "-enable-checker security.FloatLoopCounter" + "-enable-checker security.insecureAPI.getpw" + "-enable-checker security.insecureAPI.gets" + "-enable-checker security.insecureAPI.vfork" + "-enable-checker unix.Malloc" + "-enable-checker unix.MallocSizeof" + "-enable-checker unix.MismatchedDeallocator" + "-enable-checker alpha.security.ArrayBoundV2" + "-enable-checker alpha.unix.SimpleStream" + ]; + +in +pkgs.stdenv.mkDerivation { + pname = "xdp2-analysis-clang-analyzer"; + version = "0.1.0"; + inherit src; + + nativeBuildInputs = nativeBuildInputs ++ [ + pkgs.clang-analyzer + ]; + inherit buildInputs; + + hardeningDisable = [ "all" ]; + dontFixup = true; + doCheck = false; + + HOST_CC = "${hostCC}/bin/gcc"; + HOST_CXX = "${hostCC}/bin/g++"; + HOST_LLVM_CONFIG = "${llvmConfig.llvm-config-wrapped}/bin/llvm-config"; + XDP2_CLANG_VERSION = llvmConfig.version; + XDP2_CLANG_RESOURCE_PATH = llvmConfig.paths.clangResourceDir; + + LD_LIBRARY_PATH = lib.makeLibraryPath [ + llvmPackages.llvm + llvmPackages.libclang.lib + hostPkgs.boost + ]; + + postPatch = '' + substituteInPlace thirdparty/cppfront/Makefile \ + --replace-fail 'include ../../src/config.mk' '# config.mk not needed for standalone build' + + sed -i '1i#include \n#include \n' thirdparty/cppfront/include/cpp2util.h + + substituteInPlace src/configure.sh \ + --replace-fail 'CC_GCC="gcc"' 'CC_GCC="''${CC_GCC:-gcc}"' \ + --replace-fail 'CC_CXX="g++"' 'CC_CXX="''${CC_CXX:-g++}"' + ''; + + configurePhase = '' + runHook preConfigure + + cd src + + export PATH="${hostCC}/bin:${hostPython}/bin:$PATH" + export CC_GCC="${host-gcc}" + export CC_CXX="${host-gxx}" + export CC="${host-gcc}" + export CXX="${host-gxx}" + export PKG_CONFIG_PATH="${hostPython}/lib/pkgconfig:$PKG_CONFIG_PATH" + export HOST_CC="$CC" + export HOST_CXX="$CXX" + export HOST_LLVM_CONFIG="${llvmConfig.llvm-config-wrapped}/bin/llvm-config" + export XDP2_CLANG_VERSION="${llvmConfig.version}" + export XDP2_CLANG_RESOURCE_PATH="${llvmConfig.paths.clangResourceDir}" + export XDP2_C_INCLUDE_PATH="${llvmConfig.paths.clangResourceDir}/include" + export CONFIGURE_DEBUG_LEVEL=7 + + bash configure.sh --build-opt-parser + + if grep -q 'PATH_ARG="--with-path=' config.mk; then + sed -i 's|PATH_ARG="--with-path=.*"|PATH_ARG=""|' config.mk + fi + + sed -i 's|^HOST_CC := gcc$|HOST_CC := ${host-gcc}|' config.mk + sed -i 's|^HOST_CXX := g++$|HOST_CXX := ${host-gxx}|' config.mk + echo "HOST_LDFLAGS := -L${hostPkgs.boost}/lib -Wl,-rpath,${hostPkgs.boost}/lib" >> config.mk + + cd .. + + runHook postConfigure + ''; + + buildPhase = '' + runHook preBuild + + export HOST_CC="${hostCC}/bin/gcc" + export HOST_CXX="${hostCC}/bin/g++" + export HOST_LLVM_CONFIG="${llvmConfig.llvm-config-wrapped}/bin/llvm-config" + export XDP2_CLANG_VERSION="${llvmConfig.version}" + export XDP2_CLANG_RESOURCE_PATH="${llvmConfig.paths.clangResourceDir}" + export XDP2_C_INCLUDE_PATH="${llvmConfig.paths.clangResourceDir}/include" + export XDP2_GLIBC_INCLUDE_PATH="${hostPkgs.stdenv.cc.libc.dev}/include" + export XDP2_LINUX_HEADERS_PATH="${hostPkgs.linuxHeaders}/include" + + # Build cppfront first + echo "Building cppfront..." + cd thirdparty/cppfront + $HOST_CXX -std=c++20 source/cppfront.cpp -o cppfront-compiler + cd ../.. + + # Build xdp2-compiler + echo "Building xdp2-compiler..." + cd src/tools/compiler + make -j''${NIX_BUILD_CORES:-1} + cd ../../.. + + # Build xdp2 libraries wrapped with scan-build. + # Use full path to clang-analyzer's scan-build (properly wrapped with Nix shebang). + # The one from llvmPackages.clang has a broken /usr/bin/env shebang. + echo "Running scan-build on xdp2..." + cd src + ${pkgs.clang-analyzer}/bin/scan-build \ + --use-analyzer=${llvmPackages.clang}/bin/clang \ + ${scanBuildCheckers} \ + -o "$NIX_BUILD_TOP/scan-results" \ + make -j''${NIX_BUILD_CORES:-1} \ + 2>&1 | tee "$NIX_BUILD_TOP/scan-build.log" || true + cd .. + + runHook postBuild + ''; + + installPhase = '' + mkdir -p $out + + # Copy HTML reports if produced + if [ -d "$NIX_BUILD_TOP/scan-results" ] && [ "$(ls -A "$NIX_BUILD_TOP/scan-results" 2>/dev/null)" ]; then + mkdir -p $out/html-report + cp -r "$NIX_BUILD_TOP/scan-results"/* $out/html-report/ 2>/dev/null || true + fi + + # Extract finding count from scan-build output + findings=$(grep -oP '\d+ bugs? found' "$NIX_BUILD_TOP/scan-build.log" | grep -oP '^\d+' || echo "0") + echo "$findings" > $out/count.txt + + cp "$NIX_BUILD_TOP/scan-build.log" $out/report.txt + + { + echo "=== Clang Static Analyzer (C) ===" + echo "" + echo "Path-sensitive analysis with C-specific checkers." + echo "Findings: $findings" + } > $out/summary.txt + ''; +} diff --git a/nix/analysis/clang-tidy.nix b/nix/analysis/clang-tidy.nix new file mode 100644 index 0000000..aa30b28 --- /dev/null +++ b/nix/analysis/clang-tidy.nix @@ -0,0 +1,48 @@ +# nix/analysis/clang-tidy.nix +# +# clang-tidy runner for XDP2's C codebase. +# +# Adapted from the reference C++ implementation: +# - Finds .c and .h files instead of .cc +# - Uses C-appropriate checks (no cppcoreguidelines, modernize) +# - No custom plugin (nixTidyChecks not applicable to XDP2) +# + +{ + pkgs, + mkCompileDbReport, +}: + +let + runner = pkgs.writeShellApplication { + name = "run-clang-tidy-analysis"; + runtimeInputs = with pkgs; [ + clang-tools + coreutils + findutils + gnugrep + ]; + text = '' + compile_db="$1" + source_dir="$2" + output_dir="$3" + + echo "=== clang-tidy Analysis (C) ===" + echo "Using compilation database: $compile_db" + + # Find all .c source files in library and tool directories + find "$source_dir/src" -name '*.c' -not -path '*/test*' -print0 | \ + xargs -0 -P "$(nproc)" -I{} \ + clang-tidy \ + -p "$compile_db" \ + --header-filter='src/.*' \ + --checks='-*,bugprone-*,cert-*,clang-analyzer-*,misc-*,readability-*' \ + {} \ + > "$output_dir/report.txt" 2>&1 || true + + findings=$(grep -c ': warning:\|: error:' "$output_dir/report.txt" || echo "0") + echo "$findings" > "$output_dir/count.txt" + ''; + }; +in +mkCompileDbReport "clang-tidy" runner diff --git a/nix/analysis/compile-db.nix b/nix/analysis/compile-db.nix new file mode 100644 index 0000000..7454983 --- /dev/null +++ b/nix/analysis/compile-db.nix @@ -0,0 +1,259 @@ +# nix/analysis/compile-db.nix +# +# Generate compile_commands.json for XDP2. +# +# Unlike the reference Nix project (which uses Meson's built-in compile DB +# generation), XDP2 uses Make. We parse `make V=1 VERBOSE=1` output directly +# because bear's LD_PRELOAD fails in the Nix sandbox, and compiledb doesn't +# recognize Nix wrapper paths as compilers. +# + +{ + pkgs, + lib, + llvmConfig, + nativeBuildInputs, + buildInputs, +}: + +let + llvmPackages = llvmConfig.llvmPackages; + hostPkgs = pkgs.buildPackages; + hostCC = hostPkgs.stdenv.cc; + hostPython = hostPkgs.python3.withPackages (p: [ p.scapy ]); + + host-gcc = hostPkgs.writeShellScript "host-gcc" '' + exec ${hostCC}/bin/gcc \ + -I${hostPkgs.boost.dev}/include \ + -I${hostPkgs.libpcap}/include \ + -L${hostPkgs.boost}/lib \ + -L${hostPkgs.libpcap.lib}/lib \ + "$@" + ''; + + host-gxx = hostPkgs.writeShellScript "host-g++" '' + exec ${hostCC}/bin/g++ \ + -I${hostPkgs.boost.dev}/include \ + -I${hostPkgs.libpcap}/include \ + -I${hostPython}/include/python3.13 \ + -L${hostPkgs.boost}/lib \ + -L${hostPkgs.libpcap.lib}/lib \ + -L${hostPython}/lib \ + -Wl,-rpath,${hostPython}/lib \ + "$@" + ''; + + # Python script to generate compile_commands.json from make build output. + genCompileDbScript = pkgs.writeText "gen-compile-db.py" '' + import json, os, re, sys + + make_output = sys.argv[1] + output_file = sys.argv[2] + store_src = sys.argv[3] + source_root = sys.argv[4] + + build_prefix = "/build/" + source_root + + entries = [] + current_dir = None + + with open(make_output) as f: + raw_lines = f.readlines() + + print(f"Raw lines read: {len(raw_lines)}", file=sys.stderr) + + # Join backslash-continued lines, stripping continuation indentation + lines = [] + buf = "" + for raw in raw_lines: + stripped = raw.rstrip('\n').rstrip('\r') + if stripped.rstrip().endswith('\\'): + s = stripped.rstrip() + buf += s[:-1].rstrip() + " " + else: + if buf: + # This is a continuation line - strip leading whitespace + buf += stripped.lstrip() + else: + buf = stripped + lines.append(buf) + buf = "" + if buf: + lines.append(buf) + + print(f"Joined lines: {len(lines)}", file=sys.stderr) + + c_lines = [l for l in lines if ' -c ' in l] + print(f"Compilation lines found: {len(c_lines)}", file=sys.stderr) + + for line in lines: + # Track directory changes from make -w + m = re.match(r"make\[\d+\]: Entering directory '(.+)'", line) + if m: + current_dir = m.group(1) + continue + + # Match C/C++ compilation commands: must contain -c flag + if ' -c ' not in line: + continue + + # Find source file: last token matching *.c, *.cc, *.cpp, *.cxx + tokens = line.split() + src_file = None + for token in reversed(tokens): + if re.match(r'.*\.(?:c|cc|cpp|cxx|C)$', token): + src_file = token + break + if not src_file: + continue + + directory = current_dir or os.getcwd() + + # Normalize paths + abs_file = src_file + if not os.path.isabs(src_file): + abs_file = os.path.normpath(os.path.join(directory, src_file)) + + # Fix sandbox paths to store paths + abs_file = abs_file.replace(build_prefix, store_src) + directory = directory.replace(build_prefix, store_src) + cmd = line.strip().replace(build_prefix, store_src) + + entries.append({ + "directory": directory, + "command": cmd, + "file": abs_file, + }) + + with open(output_file, "w") as f: + json.dump(entries, f, indent=2) + + print(f"Generated {len(entries)} compile commands", file=sys.stderr) + ''; + +in +pkgs.stdenv.mkDerivation { + pname = "xdp2-compilation-db"; + version = "0.1.0"; + + src = ../..; + + nativeBuildInputs = nativeBuildInputs ++ [ + pkgs.buildPackages.python3 + ]; + inherit buildInputs; + + hardeningDisable = [ "all" ]; + + HOST_CC = "${hostCC}/bin/gcc"; + HOST_CXX = "${hostCC}/bin/g++"; + HOST_LLVM_CONFIG = "${llvmConfig.llvm-config-wrapped}/bin/llvm-config"; + XDP2_CLANG_VERSION = llvmConfig.version; + XDP2_CLANG_RESOURCE_PATH = llvmConfig.paths.clangResourceDir; + + LD_LIBRARY_PATH = lib.makeLibraryPath [ + llvmPackages.llvm + llvmPackages.libclang.lib + hostPkgs.boost + ]; + + dontFixup = true; + doCheck = false; + + # Replicate derivation.nix's postPatch + postPatch = '' + substituteInPlace thirdparty/cppfront/Makefile \ + --replace-fail 'include ../../src/config.mk' '# config.mk not needed for standalone build' + + sed -i '1i#include \n#include \n' thirdparty/cppfront/include/cpp2util.h + + substituteInPlace src/configure.sh \ + --replace-fail 'CC_GCC="gcc"' 'CC_GCC="''${CC_GCC:-gcc}"' \ + --replace-fail 'CC_CXX="g++"' 'CC_CXX="''${CC_CXX:-g++}"' + ''; + + # Replicate derivation.nix's configurePhase + configurePhase = '' + runHook preConfigure + + cd src + + export PATH="${hostCC}/bin:${hostPython}/bin:$PATH" + export CC_GCC="${host-gcc}" + export CC_CXX="${host-gxx}" + export CC="${host-gcc}" + export CXX="${host-gxx}" + export PKG_CONFIG_PATH="${hostPython}/lib/pkgconfig:$PKG_CONFIG_PATH" + export HOST_CC="$CC" + export HOST_CXX="$CXX" + export HOST_LLVM_CONFIG="${llvmConfig.llvm-config-wrapped}/bin/llvm-config" + export XDP2_CLANG_VERSION="${llvmConfig.version}" + export XDP2_CLANG_RESOURCE_PATH="${llvmConfig.paths.clangResourceDir}" + export XDP2_C_INCLUDE_PATH="${llvmConfig.paths.clangResourceDir}/include" + export CONFIGURE_DEBUG_LEVEL=7 + + bash configure.sh --build-opt-parser + + if grep -q 'PATH_ARG="--with-path=' config.mk; then + sed -i 's|PATH_ARG="--with-path=.*"|PATH_ARG=""|' config.mk + fi + + sed -i 's|^HOST_CC := gcc$|HOST_CC := ${host-gcc}|' config.mk + sed -i 's|^HOST_CXX := g++$|HOST_CXX := ${host-gxx}|' config.mk + echo "HOST_LDFLAGS := -L${hostPkgs.boost}/lib -Wl,-rpath,${hostPkgs.boost}/lib" >> config.mk + + cd .. + + runHook postConfigure + ''; + + # Build prerequisites, then use compiledb to capture compile commands + buildPhase = '' + runHook preBuild + + export HOST_CC="${hostCC}/bin/gcc" + export HOST_CXX="${hostCC}/bin/g++" + export HOST_LLVM_CONFIG="${llvmConfig.llvm-config-wrapped}/bin/llvm-config" + export XDP2_CLANG_VERSION="${llvmConfig.version}" + export XDP2_CLANG_RESOURCE_PATH="${llvmConfig.paths.clangResourceDir}" + export XDP2_C_INCLUDE_PATH="${llvmConfig.paths.clangResourceDir}/include" + export XDP2_GLIBC_INCLUDE_PATH="${hostPkgs.stdenv.cc.libc.dev}/include" + export XDP2_LINUX_HEADERS_PATH="${hostPkgs.linuxHeaders}/include" + + # Build cppfront first (needed by xdp2-compiler) + echo "Building cppfront..." + cd thirdparty/cppfront + $HOST_CXX -std=c++20 source/cppfront.cpp -o cppfront-compiler + cd ../.. + + # Build xdp2-compiler (needed for source generation) + echo "Building xdp2-compiler..." + cd src/tools/compiler + make -j''${NIX_BUILD_CORES:-1} + cd ../../.. + + # Build xdp2 with verbose output and capture all compiler invocations. + # We parse the real build output because: + # - bear's LD_PRELOAD doesn't work in Nix sandbox + # - compiledb doesn't recognize Nix wrapper paths as compilers + # Use -j1 to prevent interleaved output that breaks line continuation parsing. + # Use both V=1 and VERBOSE=1 for full command echoing. + echo "Building xdp2 libraries (capturing compile commands)..." + cd src + make V=1 VERBOSE=1 -j1 -wk 2>&1 | tee "$NIX_BUILD_TOP/make-build.log" || true + cd .. + + runHook postBuild + ''; + + installPhase = '' + mkdir -p $out + + ${pkgs.buildPackages.python3}/bin/python3 \ + ${genCompileDbScript} \ + "$NIX_BUILD_TOP/make-build.log" \ + "$out/compile_commands.json" \ + "${../..}" \ + "$sourceRoot" + ''; +} diff --git a/nix/analysis/cppcheck.nix b/nix/analysis/cppcheck.nix new file mode 100644 index 0000000..04e8d69 --- /dev/null +++ b/nix/analysis/cppcheck.nix @@ -0,0 +1,55 @@ +# nix/analysis/cppcheck.nix +# +# cppcheck runner for XDP2's C codebase. +# +# Adapted from reference: uses --std=c11 instead of --std=c++20. +# + +{ + pkgs, + mkCompileDbReport, +}: + +let + runner = pkgs.writeShellApplication { + name = "run-cppcheck-analysis"; + runtimeInputs = with pkgs; [ + cppcheck + coreutils + gnugrep + ]; + text = '' + compile_db="$1" + # shellcheck disable=SC2034 + source_dir="$2" + output_dir="$3" + + echo "=== cppcheck Analysis (C) ===" + + # Use --project for compilation database (cannot combine with source args) + cppcheck \ + --project="$compile_db/compile_commands.json" \ + --enable=all \ + --std=c11 \ + --suppress=missingInclude \ + --suppress=unusedFunction \ + --suppress=unmatchedSuppression \ + --xml \ + 2> "$output_dir/report.xml" || true + + # Also produce a human-readable text report + cppcheck \ + --project="$compile_db/compile_commands.json" \ + --enable=all \ + --std=c11 \ + --suppress=missingInclude \ + --suppress=unusedFunction \ + --suppress=unmatchedSuppression \ + 2> "$output_dir/report.txt" || true + + findings=$(grep -c '\(error\|warning\|style\|performance\|portability\)' "$output_dir/report.txt" || echo "0") + echo "$findings" > "$output_dir/count.txt" + ''; + }; +in +mkCompileDbReport "cppcheck" runner diff --git a/nix/analysis/default.nix b/nix/analysis/default.nix new file mode 100644 index 0000000..23bd313 --- /dev/null +++ b/nix/analysis/default.nix @@ -0,0 +1,182 @@ +# nix/analysis/default.nix +# +# Static analysis infrastructure entry point for XDP2. +# +# Ported from the reference Nix project's analysis framework, +# adapted for XDP2's C codebase and Make-based build system. +# +# Provides 8 analysis tools at 3 levels: +# quick: clang-tidy + cppcheck +# standard: + flawfinder, clang-analyzer, gcc-warnings +# deep: + gcc-analyzer, semgrep, sanitizers +# +# Usage: +# nix build .#analysis-quick +# nix build .#analysis-standard +# nix build .#analysis-deep +# + +{ + pkgs, + lib, + llvmConfig, + packagesModule, + src, +}: + +let + # ── Compilation database ──────────────────────────────────────── + + compilationDb = import ./compile-db.nix { + inherit lib pkgs llvmConfig; + inherit (packagesModule) nativeBuildInputs buildInputs; + }; + + # ── Helper for tools that need compilation database ───────────── + + mkCompileDbReport = name: script: + pkgs.runCommand "xdp2-analysis-${name}" + { + nativeBuildInputs = [ script ]; + } + '' + mkdir -p $out + ${lib.getExe script} ${compilationDb} ${src} $out + ''; + + # ── Helper for tools that work on raw source ──────────────────── + + mkSourceReport = name: script: + pkgs.runCommand "xdp2-analysis-${name}" + { + nativeBuildInputs = [ script ]; + } + '' + mkdir -p $out + ${lib.getExe script} ${src} $out + ''; + + # ── Individual tool targets ──────────────────────────────────── + + clang-tidy = import ./clang-tidy.nix { + inherit pkgs mkCompileDbReport; + }; + + cppcheck = import ./cppcheck.nix { + inherit pkgs mkCompileDbReport; + }; + + flawfinder = import ./flawfinder.nix { + inherit pkgs mkSourceReport; + }; + + semgrep = import ./semgrep.nix { + inherit pkgs mkSourceReport; + }; + + gccTargets = import ./gcc.nix { + inherit lib pkgs src llvmConfig; + inherit (packagesModule) nativeBuildInputs buildInputs; + }; + + clang-analyzer = import ./clang-analyzer.nix { + inherit lib pkgs src llvmConfig; + inherit (packagesModule) nativeBuildInputs buildInputs; + }; + + sanitizers = import ./sanitizers.nix { + inherit lib pkgs src llvmConfig; + inherit (packagesModule) nativeBuildInputs buildInputs; + }; + + # ── Triage system path ────────────────────────────────────────── + + triagePath = "${src}/nix/analysis/triage"; + + # ── Combined targets ─────────────────────────────────────────── + + quick = pkgs.runCommand "xdp2-analysis-quick" { nativeBuildInputs = [ pkgs.python3 ]; } '' + mkdir -p $out + ln -s ${clang-tidy} $out/clang-tidy + ln -s ${cppcheck} $out/cppcheck + python3 ${triagePath} $out --output-dir $out/triage + { + echo "=== Analysis Summary (quick) ===" + echo "" + echo "clang-tidy: $(cat ${clang-tidy}/count.txt) findings" + echo "cppcheck: $(cat ${cppcheck}/count.txt) findings" + echo "triage: $(cat $out/triage/count.txt) high-confidence findings" + echo "" + echo "Run 'nix build .#analysis-standard' for more thorough analysis." + } > $out/summary.txt + cat $out/summary.txt + ''; + + standard = pkgs.runCommand "xdp2-analysis-standard" { nativeBuildInputs = [ pkgs.python3 ]; } '' + mkdir -p $out + ln -s ${clang-tidy} $out/clang-tidy + ln -s ${cppcheck} $out/cppcheck + ln -s ${flawfinder} $out/flawfinder + ln -s ${clang-analyzer} $out/clang-analyzer + ln -s ${gccTargets.gcc-warnings} $out/gcc-warnings + python3 ${triagePath} $out --output-dir $out/triage + { + echo "=== Analysis Summary (standard) ===" + echo "" + echo "clang-tidy: $(cat ${clang-tidy}/count.txt) findings" + echo "cppcheck: $(cat ${cppcheck}/count.txt) findings" + echo "flawfinder: $(cat ${flawfinder}/count.txt) findings" + echo "clang-analyzer: $(cat ${clang-analyzer}/count.txt) findings" + echo "gcc-warnings: $(cat ${gccTargets.gcc-warnings}/count.txt) findings" + echo "triage: $(cat $out/triage/count.txt) high-confidence findings" + echo "" + echo "Run 'nix build .#analysis-deep' for full analysis including" + echo "GCC -fanalyzer, semgrep, and sanitizer builds." + } > $out/summary.txt + cat $out/summary.txt + ''; + + deep = pkgs.runCommand "xdp2-analysis-deep" { nativeBuildInputs = [ pkgs.python3 ]; } '' + mkdir -p $out + ln -s ${clang-tidy} $out/clang-tidy + ln -s ${cppcheck} $out/cppcheck + ln -s ${flawfinder} $out/flawfinder + ln -s ${clang-analyzer} $out/clang-analyzer + ln -s ${gccTargets.gcc-warnings} $out/gcc-warnings + ln -s ${gccTargets.gcc-analyzer} $out/gcc-analyzer + ln -s ${semgrep} $out/semgrep + ln -s ${sanitizers} $out/sanitizers + python3 ${triagePath} $out --output-dir $out/triage + { + echo "=== Analysis Summary (deep) ===" + echo "" + echo "clang-tidy: $(cat ${clang-tidy}/count.txt) findings" + echo "cppcheck: $(cat ${cppcheck}/count.txt) findings" + echo "flawfinder: $(cat ${flawfinder}/count.txt) findings" + echo "clang-analyzer: $(cat ${clang-analyzer}/count.txt) findings" + echo "gcc-warnings: $(cat ${gccTargets.gcc-warnings}/count.txt) findings" + echo "gcc-analyzer: $(cat ${gccTargets.gcc-analyzer}/count.txt) findings" + echo "semgrep: $(cat ${semgrep}/count.txt) findings" + echo "sanitizers: $(cat ${sanitizers}/count.txt) findings" + echo "triage: $(cat $out/triage/count.txt) high-confidence findings" + echo "" + echo "All analysis tools completed." + } > $out/summary.txt + cat $out/summary.txt + ''; + +in +{ + inherit + clang-tidy + cppcheck + flawfinder + clang-analyzer + semgrep + sanitizers + quick + standard + deep + ; + inherit (gccTargets) gcc-warnings gcc-analyzer; +} diff --git a/nix/analysis/flawfinder.nix b/nix/analysis/flawfinder.nix new file mode 100644 index 0000000..c867e0d --- /dev/null +++ b/nix/analysis/flawfinder.nix @@ -0,0 +1,40 @@ +# nix/analysis/flawfinder.nix +# +# flawfinder source scanner — works equally on C and C++. +# Identical to the reference implementation. +# + +{ + pkgs, + mkSourceReport, +}: + +let + runner = pkgs.writeShellApplication { + name = "run-flawfinder-analysis"; + runtimeInputs = with pkgs; [ + flawfinder + coreutils + gnugrep + ]; + text = '' + source_dir="$1" + output_dir="$2" + + echo "=== flawfinder Analysis ===" + + flawfinder \ + --minlevel=1 \ + --columns \ + --context \ + --singleline \ + "$source_dir/src" \ + > "$output_dir/report.txt" 2>&1 || true + + # Extract hit count from flawfinder's summary line: "Hits = N" + findings=$(grep -oP 'Hits = \K[0-9]+' "$output_dir/report.txt" || echo "0") + echo "$findings" > "$output_dir/count.txt" + ''; + }; +in +mkSourceReport "flawfinder" runner diff --git a/nix/analysis/gcc.nix b/nix/analysis/gcc.nix new file mode 100644 index 0000000..5e68edc --- /dev/null +++ b/nix/analysis/gcc.nix @@ -0,0 +1,215 @@ +# nix/analysis/gcc.nix +# +# GCC-based analysis: gcc-warnings and gcc-analyzer. +# +# Adapted from the reference C++ implementation: +# - Uses NIX_CFLAGS_COMPILE instead of NIX_CXXFLAGS_COMPILE +# - Adds C-specific flags: -Wstrict-prototypes, -Wold-style-definition, +# -Wmissing-prototypes, -Wbad-function-cast +# - Builds via Make instead of Meson+Ninja +# + +{ + lib, + pkgs, + src, + llvmConfig, + nativeBuildInputs, + buildInputs, +}: + +let + llvmPackages = llvmConfig.llvmPackages; + hostPkgs = pkgs.buildPackages; + hostCC = hostPkgs.stdenv.cc; + hostPython = hostPkgs.python3.withPackages (p: [ p.scapy ]); + + host-gcc = hostPkgs.writeShellScript "host-gcc" '' + exec ${hostCC}/bin/gcc \ + -I${hostPkgs.boost.dev}/include \ + -I${hostPkgs.libpcap}/include \ + -L${hostPkgs.boost}/lib \ + -L${hostPkgs.libpcap.lib}/lib \ + "$@" + ''; + + host-gxx = hostPkgs.writeShellScript "host-g++" '' + exec ${hostCC}/bin/g++ \ + -I${hostPkgs.boost.dev}/include \ + -I${hostPkgs.libpcap}/include \ + -I${hostPython}/include/python3.13 \ + -L${hostPkgs.boost}/lib \ + -L${hostPkgs.libpcap.lib}/lib \ + -L${hostPython}/lib \ + -Wl,-rpath,${hostPython}/lib \ + "$@" + ''; + + gccWarningFlags = [ + "-Wall" + "-Wextra" + "-Wpedantic" + "-Wformat=2" + "-Wformat-security" + "-Wshadow" + "-Wcast-qual" + "-Wcast-align" + "-Wwrite-strings" + "-Wpointer-arith" + "-Wconversion" + "-Wsign-conversion" + "-Wduplicated-cond" + "-Wduplicated-branches" + "-Wlogical-op" + "-Wnull-dereference" + "-Wdouble-promotion" + "-Wfloat-equal" + "-Walloca" + "-Wvla" + "-Werror=return-type" + "-Werror=format-security" + # C-specific warnings + "-Wstrict-prototypes" + "-Wold-style-definition" + "-Wmissing-prototypes" + "-Wbad-function-cast" + ]; + + mkGccAnalysisBuild = name: extraFlags: + pkgs.stdenv.mkDerivation { + pname = "xdp2-analysis-${name}"; + version = "0.1.0"; + inherit src; + + inherit nativeBuildInputs buildInputs; + + hardeningDisable = [ "all" ]; + + env.NIX_CFLAGS_COMPILE = lib.concatStringsSep " " extraFlags; + + HOST_CC = "${hostCC}/bin/gcc"; + HOST_CXX = "${hostCC}/bin/g++"; + HOST_LLVM_CONFIG = "${llvmConfig.llvm-config-wrapped}/bin/llvm-config"; + XDP2_CLANG_VERSION = llvmConfig.version; + XDP2_CLANG_RESOURCE_PATH = llvmConfig.paths.clangResourceDir; + + LD_LIBRARY_PATH = lib.makeLibraryPath [ + llvmPackages.llvm + llvmPackages.libclang.lib + hostPkgs.boost + ]; + + dontFixup = true; + doCheck = false; + + postPatch = '' + substituteInPlace thirdparty/cppfront/Makefile \ + --replace-fail 'include ../../src/config.mk' '# config.mk not needed for standalone build' + + sed -i '1i#include \n#include \n' thirdparty/cppfront/include/cpp2util.h + + substituteInPlace src/configure.sh \ + --replace-fail 'CC_GCC="gcc"' 'CC_GCC="''${CC_GCC:-gcc}"' \ + --replace-fail 'CC_CXX="g++"' 'CC_CXX="''${CC_CXX:-g++}"' + ''; + + configurePhase = '' + runHook preConfigure + + cd src + + export PATH="${hostCC}/bin:${hostPython}/bin:$PATH" + export CC_GCC="${host-gcc}" + export CC_CXX="${host-gxx}" + export CC="${host-gcc}" + export CXX="${host-gxx}" + export PKG_CONFIG_PATH="${hostPython}/lib/pkgconfig:$PKG_CONFIG_PATH" + export HOST_CC="$CC" + export HOST_CXX="$CXX" + export HOST_LLVM_CONFIG="${llvmConfig.llvm-config-wrapped}/bin/llvm-config" + export XDP2_CLANG_VERSION="${llvmConfig.version}" + export XDP2_CLANG_RESOURCE_PATH="${llvmConfig.paths.clangResourceDir}" + export XDP2_C_INCLUDE_PATH="${llvmConfig.paths.clangResourceDir}/include" + export CONFIGURE_DEBUG_LEVEL=7 + + bash configure.sh --build-opt-parser + + if grep -q 'PATH_ARG="--with-path=' config.mk; then + sed -i 's|PATH_ARG="--with-path=.*"|PATH_ARG=""|' config.mk + fi + + sed -i 's|^HOST_CC := gcc$|HOST_CC := ${host-gcc}|' config.mk + sed -i 's|^HOST_CXX := g++$|HOST_CXX := ${host-gxx}|' config.mk + echo "HOST_LDFLAGS := -L${hostPkgs.boost}/lib -Wl,-rpath,${hostPkgs.boost}/lib" >> config.mk + + cd .. + + runHook postConfigure + ''; + + buildPhase = '' + runHook preBuild + + export HOST_CC="${hostCC}/bin/gcc" + export HOST_CXX="${hostCC}/bin/g++" + export HOST_LLVM_CONFIG="${llvmConfig.llvm-config-wrapped}/bin/llvm-config" + export XDP2_CLANG_VERSION="${llvmConfig.version}" + export XDP2_CLANG_RESOURCE_PATH="${llvmConfig.paths.clangResourceDir}" + export XDP2_C_INCLUDE_PATH="${llvmConfig.paths.clangResourceDir}/include" + export XDP2_GLIBC_INCLUDE_PATH="${hostPkgs.stdenv.cc.libc.dev}/include" + export XDP2_LINUX_HEADERS_PATH="${hostPkgs.linuxHeaders}/include" + + # Build cppfront first + echo "Building cppfront..." + cd thirdparty/cppfront + $HOST_CXX -std=c++20 source/cppfront.cpp -o cppfront-compiler + cd ../.. + + # Build xdp2-compiler + echo "Building xdp2-compiler..." + cd src/tools/compiler + make -j''${NIX_BUILD_CORES:-1} + cd ../../.. + + # Build xdp2 libraries and capture all compiler output + echo "Building xdp2 with ${name} flags..." + cd src + make -j''${NIX_BUILD_CORES:-1} 2>&1 | tee "$NIX_BUILD_TOP/build-output.log" || true + cd .. + + runHook postBuild + ''; + + installPhase = '' + mkdir -p $out + # Extract warning/error lines from the build output + grep -E ': warning:|: error:' "$NIX_BUILD_TOP/build-output.log" > $out/report.txt || true + findings=$(wc -l < $out/report.txt) + echo "$findings" > $out/count.txt + + # Include full build log for reference + cp "$NIX_BUILD_TOP/build-output.log" $out/full-build.log + + { + echo "=== ${name} Analysis ===" + echo "" + echo "Flags: ${lib.concatStringsSep " " extraFlags}" + echo "Findings: $findings warnings/errors" + if [ "$findings" -gt 0 ]; then + echo "" + echo "=== Warnings ===" + cat $out/report.txt + fi + } > $out/summary.txt + ''; + }; + +in +{ + gcc-warnings = mkGccAnalysisBuild "gcc-warnings" gccWarningFlags; + + gcc-analyzer = mkGccAnalysisBuild "gcc-analyzer" [ + "-fanalyzer" + "-fdiagnostics-plain-output" + ]; +} diff --git a/nix/analysis/sanitizers.nix b/nix/analysis/sanitizers.nix new file mode 100644 index 0000000..1fd101e --- /dev/null +++ b/nix/analysis/sanitizers.nix @@ -0,0 +1,207 @@ +# nix/analysis/sanitizers.nix +# +# ASan + UBSan instrumented build and test execution. +# +# Unlike the reference (which uses nixComponents.overrideScope), XDP2 +# builds with Make. We build with sanitizer flags and run sample tests +# to detect runtime violations. +# + +{ + lib, + pkgs, + src, + llvmConfig, + nativeBuildInputs, + buildInputs, +}: + +let + llvmPackages = llvmConfig.llvmPackages; + hostPkgs = pkgs.buildPackages; + hostCC = hostPkgs.stdenv.cc; + hostPython = hostPkgs.python3.withPackages (p: [ p.scapy ]); + + host-gcc = hostPkgs.writeShellScript "host-gcc" '' + exec ${hostCC}/bin/gcc \ + -I${hostPkgs.boost.dev}/include \ + -I${hostPkgs.libpcap}/include \ + -L${hostPkgs.boost}/lib \ + -L${hostPkgs.libpcap.lib}/lib \ + "$@" + ''; + + host-gxx = hostPkgs.writeShellScript "host-g++" '' + exec ${hostCC}/bin/g++ \ + -I${hostPkgs.boost.dev}/include \ + -I${hostPkgs.libpcap}/include \ + -I${hostPython}/include/python3.13 \ + -L${hostPkgs.boost}/lib \ + -L${hostPkgs.libpcap.lib}/lib \ + -L${hostPython}/lib \ + -Wl,-rpath,${hostPython}/lib \ + "$@" + ''; + +in +pkgs.stdenv.mkDerivation { + pname = "xdp2-analysis-sanitizers"; + version = "0.1.0"; + inherit src; + + inherit nativeBuildInputs buildInputs; + + hardeningDisable = [ "all" ]; + + HOST_CC = "${hostCC}/bin/gcc"; + HOST_CXX = "${hostCC}/bin/g++"; + HOST_LLVM_CONFIG = "${llvmConfig.llvm-config-wrapped}/bin/llvm-config"; + XDP2_CLANG_VERSION = llvmConfig.version; + XDP2_CLANG_RESOURCE_PATH = llvmConfig.paths.clangResourceDir; + + LD_LIBRARY_PATH = lib.makeLibraryPath [ + llvmPackages.llvm + llvmPackages.libclang.lib + hostPkgs.boost + ]; + + # NOTE: Sanitizer flags are NOT applied via NIX_CFLAGS_COMPILE because + # that would break configure.sh's link tests. Instead, we inject them + # into config.mk CFLAGS/LDFLAGS after configure completes. + + dontFixup = true; + + postPatch = '' + substituteInPlace thirdparty/cppfront/Makefile \ + --replace-fail 'include ../../src/config.mk' '# config.mk not needed for standalone build' + + sed -i '1i#include \n#include \n' thirdparty/cppfront/include/cpp2util.h + + substituteInPlace src/configure.sh \ + --replace-fail 'CC_GCC="gcc"' 'CC_GCC="''${CC_GCC:-gcc}"' \ + --replace-fail 'CC_CXX="g++"' 'CC_CXX="''${CC_CXX:-g++}"' + ''; + + configurePhase = '' + runHook preConfigure + + cd src + + export PATH="${hostCC}/bin:${hostPython}/bin:$PATH" + export CC_GCC="${host-gcc}" + export CC_CXX="${host-gxx}" + export CC="${host-gcc}" + export CXX="${host-gxx}" + export PKG_CONFIG_PATH="${hostPython}/lib/pkgconfig:$PKG_CONFIG_PATH" + export HOST_CC="$CC" + export HOST_CXX="$CXX" + export HOST_LLVM_CONFIG="${llvmConfig.llvm-config-wrapped}/bin/llvm-config" + export XDP2_CLANG_VERSION="${llvmConfig.version}" + export XDP2_CLANG_RESOURCE_PATH="${llvmConfig.paths.clangResourceDir}" + export XDP2_C_INCLUDE_PATH="${llvmConfig.paths.clangResourceDir}/include" + export CONFIGURE_DEBUG_LEVEL=7 + + bash configure.sh --build-opt-parser + + if grep -q 'PATH_ARG="--with-path=' config.mk; then + sed -i 's|PATH_ARG="--with-path=.*"|PATH_ARG=""|' config.mk + fi + + sed -i 's|^HOST_CC := gcc$|HOST_CC := ${host-gcc}|' config.mk + sed -i 's|^HOST_CXX := g++$|HOST_CXX := ${host-gxx}|' config.mk + echo "HOST_LDFLAGS := -L${hostPkgs.boost}/lib -Wl,-rpath,${hostPkgs.boost}/lib" >> config.mk + + # Inject sanitizer flags into config.mk AFTER configure completes + echo "EXTRA_CFLAGS += -fsanitize=address,undefined -fno-omit-frame-pointer" >> config.mk + echo "LDFLAGS += -fsanitize=address,undefined" >> config.mk + + cd .. + + runHook postConfigure + ''; + + buildPhase = '' + runHook preBuild + + export HOST_CC="${hostCC}/bin/gcc" + export HOST_CXX="${hostCC}/bin/g++" + export HOST_LLVM_CONFIG="${llvmConfig.llvm-config-wrapped}/bin/llvm-config" + export XDP2_CLANG_VERSION="${llvmConfig.version}" + export XDP2_CLANG_RESOURCE_PATH="${llvmConfig.paths.clangResourceDir}" + export XDP2_C_INCLUDE_PATH="${llvmConfig.paths.clangResourceDir}/include" + export XDP2_GLIBC_INCLUDE_PATH="${hostPkgs.stdenv.cc.libc.dev}/include" + export XDP2_LINUX_HEADERS_PATH="${hostPkgs.linuxHeaders}/include" + + # Build cppfront (without sanitizers — host tool) + echo "Building cppfront..." + cd thirdparty/cppfront + $HOST_CXX -std=c++20 source/cppfront.cpp -o cppfront-compiler + cd ../.. + + # Build xdp2-compiler (host tool) + echo "Building xdp2-compiler..." + cd src/tools/compiler + make -j''${NIX_BUILD_CORES:-1} + cd ../../.. + + # Build xdp2 libraries with sanitizer instrumentation + echo "Building xdp2 with ASan+UBSan..." + cd src + make -j''${NIX_BUILD_CORES:-1} 2>&1 | tee "$NIX_BUILD_TOP/build-output.log" || true + cd .. + + runHook postBuild + ''; + + # Run sample tests — sanitizer violations cause non-zero exit + checkPhase = '' + echo "Running tests with sanitizer instrumentation..." + sanitizer_violations=0 + + # Run any built sample parsers against test pcaps + for test_bin in src/test/*/test_*; do + if [ -x "$test_bin" ]; then + echo " Running: $test_bin" + if ! "$test_bin" 2>&1 | tee -a "$NIX_BUILD_TOP/sanitizer-output.log"; then + echo " FAIL: $test_bin" + sanitizer_violations=$((sanitizer_violations + 1)) + fi + fi + done + + echo "$sanitizer_violations" > "$NIX_BUILD_TOP/sanitizer-violations.txt" + ''; + + doCheck = true; + + installPhase = '' + mkdir -p $out + + violations=0 + if [ -f "$NIX_BUILD_TOP/sanitizer-violations.txt" ]; then + violations=$(cat "$NIX_BUILD_TOP/sanitizer-violations.txt") + fi + + { + echo "=== ASan + UBSan Analysis ===" + echo "" + echo "Built with AddressSanitizer + UndefinedBehaviorSanitizer." + echo "Sample tests executed with sanitizer instrumentation." + echo "" + if [ "$violations" -gt 0 ]; then + echo "Result: $violations sanitizer violations detected." + else + echo "Result: All tests passed — no sanitizer violations detected." + fi + } > $out/report.txt + + echo "$violations" > $out/count.txt + + if [ -f "$NIX_BUILD_TOP/sanitizer-output.log" ]; then + cp "$NIX_BUILD_TOP/sanitizer-output.log" $out/sanitizer-output.log + fi + if [ -f "$NIX_BUILD_TOP/build-output.log" ]; then + cp "$NIX_BUILD_TOP/build-output.log" $out/build-output.log + fi + ''; +} diff --git a/nix/analysis/semgrep-rules.yaml b/nix/analysis/semgrep-rules.yaml new file mode 100644 index 0000000..261569d --- /dev/null +++ b/nix/analysis/semgrep-rules.yaml @@ -0,0 +1,204 @@ +rules: + # ── Category 1: Unsafe C String/Memory Functions ────────────── + - id: dangerous-system-call + pattern: system($ARG) + message: Use of system() is dangerous — consider execve() or posix_spawn() + languages: [c, cpp] + severity: WARNING + - id: unsafe-sprintf + pattern: sprintf($BUF, ...) + message: sprintf() has no bounds checking — use snprintf() instead + languages: [c, cpp] + severity: WARNING + - id: unsafe-strcpy + pattern: strcpy($DST, $SRC) + message: strcpy() has no bounds checking — use strncpy() or strlcpy() + languages: [c, cpp] + severity: WARNING + - id: unsafe-strcat + pattern: strcat($DST, $SRC) + message: strcat() has no bounds checking — use strncat() or strlcat() + languages: [c, cpp] + severity: WARNING + - id: potential-format-string + patterns: + - pattern: printf($FMT) + - pattern-not: printf("...") + message: Potential format string vulnerability — ensure format string is a literal + languages: [c, cpp] + severity: WARNING + - id: unsafe-vsprintf + pattern: vsprintf($BUF, ...) + message: vsprintf() has no bounds checking — use vsnprintf() instead + languages: [c, cpp] + severity: WARNING + - id: unsafe-gets + pattern: gets($BUF) + message: gets() is always unsafe (unbounded read) — use fgets() instead + languages: [c, cpp] + severity: ERROR + - id: unsafe-strncpy-strlen + pattern: strncpy($D, $S, strlen($S)) + message: strncpy with strlen(src) as length defeats the purpose of bounds checking + languages: [c, cpp] + severity: WARNING + + # ── Category 2: Memory Management (C-specific) ───────────────── + - id: memset-zero-length + pattern: memset($B, $V, 0) + message: memset with length 0 is a no-op — check arguments + languages: [c, cpp] + severity: WARNING + - id: memcpy-sizeof-pointer + pattern: memcpy($D, $S, sizeof($PTR)) + message: memcpy with sizeof(pointer) likely copies only pointer size, not data + languages: [c, cpp] + severity: WARNING + - id: malloc-no-null-check + patterns: + - pattern: | + $PTR = malloc(...); + ... + *$PTR + - pattern-not: | + $PTR = malloc(...); + ... + if ($PTR == NULL) { ... } + ... + *$PTR + message: malloc() result used without NULL check + languages: [c] + severity: WARNING + - id: realloc-self-assign + pattern: $PTR = realloc($PTR, $SIZE) + message: realloc self-assignment leaks memory on failure — use a temporary pointer + languages: [c, cpp] + severity: WARNING + + # ── Category 3: Race Conditions / TOCTOU ───────────────────── + - id: toctou-access + pattern: access($PATH, ...) + message: access() is prone to TOCTOU races — use faccessat() or open-then-check + languages: [c, cpp] + severity: WARNING + - id: chmod-on-pathname + pattern: chmod($PATH, $MODE) + message: chmod on pathname is TOCTOU-prone — prefer fchmod() on an open fd + languages: [c, cpp] + severity: INFO + - id: chown-on-pathname + patterns: + - pattern-either: + - pattern: chown($PATH, $UID, $GID) + - pattern: lchown($PATH, $UID, $GID) + message: chown/lchown on pathname is TOCTOU-prone — prefer fchown() on an open fd + languages: [c, cpp] + severity: INFO + - id: insecure-rand + patterns: + - pattern-either: + - pattern: rand() + - pattern: srand(...) + message: rand()/srand() are not cryptographically secure — use getrandom() + languages: [c, cpp] + severity: WARNING + - id: toctou-stat + patterns: + - pattern-either: + - pattern: stat($PATH, $BUF) + - pattern: lstat($PATH, $BUF) + message: stat/lstat on pathname is TOCTOU-prone — prefer fstat() on an open fd + languages: [c, cpp] + severity: INFO + + # ── Category 5: Error Handling (C-specific) ──────────────────── + - id: strerror-thread-unsafe + pattern: strerror($E) + message: strerror() is not thread-safe — use strerror_r() + languages: [c, cpp] + severity: INFO + + # ── Category 6: Resource Management (C-specific) ────────────── + - id: fopen-no-close-check + pattern: fopen($P, $M) + message: Raw FILE* from fopen — ensure fclose() is called on all paths + languages: [c] + severity: INFO + - id: signal-not-sigaction + pattern: signal($SIG, $H) + message: signal() has portability issues — prefer sigaction() + languages: [c, cpp] + severity: WARNING + - id: vfork-usage + pattern: vfork() + message: vfork() shares address space with parent — prefer posix_spawn() or fork() + languages: [c, cpp] + severity: WARNING + - id: popen-usage + pattern: popen($CMD, $MODE) + message: popen() invokes shell — risk of command injection, prefer posix_spawn() + languages: [c, cpp] + severity: WARNING + + # ── Category 7: Privilege and Command Execution ─────────────── + - id: setuid-setgid + patterns: + - pattern-either: + - pattern: setuid(...) + - pattern: setgid(...) + message: setuid/setgid changes process privileges — ensure proper error checking + languages: [c, cpp] + severity: WARNING + - id: chroot-usage + pattern: chroot($PATH) + message: chroot alone is not a security boundary — ensure chdir+drop privileges + languages: [c, cpp] + severity: WARNING + - id: getenv-unchecked + pattern: getenv($VAR) + message: getenv() returns nullable pointer — check for NULL before use + languages: [c, cpp] + severity: INFO + - id: exec-family + patterns: + - pattern-either: + - pattern: execvp(...) + - pattern: execv(...) + - pattern: execve(...) + message: exec-family call — ensure arguments are validated and paths are absolute + languages: [c, cpp] + severity: INFO + + # ── Category 9: Code Quality / Defensive Programming ───────── + - id: goto-usage + pattern: goto $LABEL + message: goto usage — consider structured control flow alternatives + languages: [c, cpp] + severity: INFO + - id: assert-side-effect + patterns: + - pattern-either: + - pattern: assert($X = $Y) + - pattern: assert($X++) + message: Side effect in assert() — expression is removed in release builds (NDEBUG) + languages: [c, cpp] + severity: ERROR + - id: fprintf-stderr + pattern: fprintf(stderr, ...) + message: Direct stderr output — consider using the project logging infrastructure + languages: [c, cpp] + severity: INFO + - id: atoi-atol-usage + patterns: + - pattern-either: + - pattern: atoi(...) + - pattern: atol(...) + - pattern: atof(...) + message: atoi/atol/atof have no error checking — use strtol/strtod instead + languages: [c, cpp] + severity: WARNING + - id: alloca-usage + pattern: alloca($SIZE) + message: alloca() allocates on stack with no overflow check — prefer heap allocation + languages: [c, cpp] + severity: WARNING diff --git a/nix/analysis/semgrep.nix b/nix/analysis/semgrep.nix new file mode 100644 index 0000000..23bc293 --- /dev/null +++ b/nix/analysis/semgrep.nix @@ -0,0 +1,59 @@ +# nix/analysis/semgrep.nix +# +# semgrep pattern-based code search with custom rules. +# Same structure as reference, uses C-filtered semgrep-rules.yaml. +# + +{ + pkgs, + mkSourceReport, +}: + +let + rulesFile = ./semgrep-rules.yaml; + + runner = pkgs.writeShellApplication { + name = "run-semgrep-analysis"; + runtimeInputs = with pkgs; [ + semgrep + coreutils + gnugrep + cacert + ]; + text = '' + source_dir="$1" + output_dir="$2" + + echo "=== semgrep Analysis ===" + + export SEMGREP_ENABLE_VERSION_CHECK=0 + export SEMGREP_SEND_METRICS=off + export SSL_CERT_FILE="${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt" + export OTEL_TRACES_EXPORTER=none + # semgrep needs a writable HOME for its config/cache + HOME="$(mktemp -d)" + export HOME + + semgrep \ + --config ${rulesFile} \ + --json \ + --metrics=off \ + --no-git-ignore \ + "$source_dir/src" \ + > "$output_dir/report.json" 2>&1 || true + + # Also produce a text report + semgrep \ + --config ${rulesFile} \ + --metrics=off \ + --no-git-ignore \ + "$source_dir/src" \ + > "$output_dir/report.txt" 2>&1 || true + + # Count results from JSON output + findings=$(grep -o '"check_id"' "$output_dir/report.json" | wc -l || echo "0") + echo "$findings" > "$output_dir/count.txt" + ''; + }; +in +mkSourceReport "semgrep" runner diff --git a/nix/analysis/triage/EXEMPTIONS.md b/nix/analysis/triage/EXEMPTIONS.md new file mode 100644 index 0000000..1319bf1 --- /dev/null +++ b/nix/analysis/triage/EXEMPTIONS.md @@ -0,0 +1,151 @@ +# Static Analysis Exemptions + +This document describes each exemption in the triage system and the +rationale for excluding it from high-confidence findings. + +## Excluded Check IDs (`filters.py`) + +These check IDs are removed entirely during filtering — their findings +never appear in triage output. + +### `syntaxError` +- **Tool:** cppcheck +- **Count:** 14 (all test code) +- **Reason:** cppcheck's parser cannot handle XDP2's complex macro + constructs (variadic macros, token pasting, nested expansion). These + are parser failures in the tool, not syntax errors in the code. The + code compiles successfully with GCC and Clang. + +### `preprocessorErrorDirective` +- **Tool:** cppcheck +- **Count:** 8 +- **Reason:** Two categories: + 1. **Intentional `#error` platform guards** — e.g., + `#error "Unsupported long size"` in `bitmap.h`, + `#error "Endianness not identified"` in `proto_geneve.h`. These are + compile-time assertions that fire only on unsupported platforms. + 2. **cppcheck macro expansion failures** — e.g., `pmacro.h` lines + where cppcheck fails to expand `XDP2_SELECT_START` / + `XDP2_VSTRUCT_VSCONST` due to complex `##` token pasting. The + macros work correctly with real compilers. + +### `unknownMacro` +- **Tool:** cppcheck +- **Count:** 2 +- **Reason:** cppcheck doesn't recognize project-specific macros: + - `LIST_FOREACH` (`dtable.c:789`) — standard BSD-style list traversal + macro, defined in project headers. + - `__XDP2_PMACRO_APPLYXDP2_PMACRO_NARGS` (`bitmap_word.h:544`) — + internal macro helper from the pmacro system. + These would require `--suppress=` or `--library=` configuration for + cppcheck, which is not worth the maintenance burden. + +### `arithOperationsOnVoidPointer` +- **Tool:** cppcheck +- **Count:** 25 +- **Reason:** Void pointer arithmetic (`void *p; p += n`) is a GNU C + extension where `sizeof(void) == 1`. This is used intentionally + throughout the codebase: + - `siphash.c` — hash function byte-level pointer walks + - `obj_allocator.c` — memory pool object addressing + - `parser.c` — packet header pointer advancement + All three GCC, Clang, and the Linux kernel rely on this extension. + The code is correct and compiles without warnings under `-std=gnu11`. + +### `subtractPointers` +- **Tool:** cppcheck +- **Count:** 3 +- **Reason:** Pointer subtraction in `cli.h:88,107` and `dtable.h:85` + implements `container_of`-style macros — computing the offset of a + member within a struct to recover the containing struct pointer. This + is a standard C idiom used throughout Linux kernel code and system + libraries. cppcheck flags it because the two pointers technically + point to "different objects" (member vs. container), but the operation + is well-defined in practice on all target platforms. + +## Generated File Patterns (`filters.py`) + +### `*.template.c` +- **Reason:** Template files under `src/templates/xdp2/` are input to + `xdp2-compiler`, which processes them into final C source. They + contain placeholder identifiers and incomplete type references that + are resolved during code generation. Findings like + `clang-diagnostic-implicit-int` and + `clang-diagnostic-implicit-function-declaration` in these files are + expected and not actionable. + +## Scoring Adjustments (`scoring.py`) + +These checks still appear in the full triage summary but are excluded +from the high-confidence list. + +### `bugprone-narrowing-conversions` → `STYLE_ONLY_CHECKS` +- **Tool:** clang-tidy +- **Count:** 56 (was the single largest category in high-confidence) +- **Reason:** The vast majority are `size_t` → `ssize_t` and + `unsigned int` → `int` conversions in packet parsing code where sizes + and offsets are bounded by protocol constraints (e.g., packet length + fits in `int`). These narrowing conversions are intentional and + ubiquitous in C networking code. Previously listed in + `BUG_CLASS_CHECKS`, which incorrectly elevated all 56 to + high-confidence. Moved to `STYLE_ONLY_CHECKS` so they remain visible + in the full report but don't overwhelm the actionable findings list. + +### `variableScope` → `STYLE_ONLY_CHECKS` +- **Tool:** cppcheck +- **Count:** 30 +- **Reason:** Suggestions to move variable declarations closer to first + use. This is a style preference — the existing code follows C89-style + declarations-at-top-of-block, which is a valid convention. Not a bug. + +### `constParameter`, `constParameterCallback` → `STYLE_ONLY_CHECKS` +- **Tool:** cppcheck +- **Count:** 14 +- **Reason:** Suggestions to add `const` to parameters that are not + modified. Valid style improvement but not a correctness issue, and + changing function signatures affects the public API. + +### Excluded from High-Confidence via `_HIGH_CONF_EXCLUDED_PREFIXES` + +#### `bugprone-reserved-identifier` +- **Count:** 642 (combined with `cert-dcl37-c,cert-dcl51-cpp`) +- **Reason:** XDP2 uses double-underscore prefixed identifiers + (`__XDP2_PMACRO_*`, `___XDP2_BITMAP_WORD_*`) as internal macro + helpers. This is the project's deliberate convention for namespace + separation. While technically reserved by the C standard, these + identifiers do not conflict with any compiler or library names. + +#### `bugprone-easily-swappable-parameters` +- **Count:** 201 +- **Reason:** Functions with multiple parameters of the same type (e.g., + `int offset, int length`). This is inherent to packet parsing APIs + where multiple integer parameters represent distinct protocol fields. + Cannot be changed without breaking the API. + +#### `bugprone-assignment-in-if-condition` +- **Count:** 79 +- **Reason:** `if ((x = func()))` is an intentional C idiom used + throughout the codebase for error-checked assignment. This is standard + practice in C systems code (Linux kernel, glibc, etc.). + +#### `bugprone-macro-parentheses` +- **Count:** 329 +- **Reason:** Suggestions to add parentheses around macro arguments. + Many of XDP2's macros are protocol field accessors where the argument + is always a simple identifier, making extra parentheses unnecessary. + Some macros intentionally use unparenthesized arguments for token + pasting or stringification. + +#### `bugprone-implicit-widening-of-multiplication-result` +- **Count:** 139 +- **Reason:** Multiplication results widened to `size_t` or `ssize_t` + in packet offset calculations. The operands are bounded by protocol + constraints (header sizes, field counts), so overflow is not possible + in practice. False positives in packet parsing arithmetic. + +#### `misc-no-recursion` +- **Count:** 29 +- **Reason:** Recursion is used intentionally in protocol graph + traversal (nested protocol headers, decision tables). The recursion + depth is bounded by protocol nesting limits. Eliminating recursion + would require significant refactoring with no safety benefit. diff --git a/nix/analysis/triage/__main__.py b/nix/analysis/triage/__main__.py new file mode 100644 index 0000000..27439c7 --- /dev/null +++ b/nix/analysis/triage/__main__.py @@ -0,0 +1,71 @@ +"""CLI entry point for static analysis triage. + +Usage: + python -m triage # Full prioritized report + python triage --summary # Category summary + python triage --high-confidence # Likely real bugs only + python triage --cross-ref # Multi-tool correlations + python triage --category # Drill into category +""" + +import argparse +import os +import sys + +from parsers import load_all_findings +from filters import filter_findings, deduplicate, is_test_code +from reports import ( + print_summary, print_high_confidence, print_cross_ref, + print_category, print_full_report, write_all_reports, +) + + +def main(): + parser = argparse.ArgumentParser( + description='Triage static analysis findings across multiple tools.' + ) + parser.add_argument('result_dir', help='Path to analysis results directory') + parser.add_argument('--summary', action='store_true', + help='Show category summary') + parser.add_argument('--high-confidence', action='store_true', + help='Show only likely-real-bug findings') + parser.add_argument('--cross-ref', action='store_true', + help='Show multi-tool correlations') + parser.add_argument('--category', type=str, + help='Drill into a specific check category') + parser.add_argument('--output-dir', type=str, + help='Write all reports as files to this directory') + parser.add_argument('--include-test', action='store_true', + help='Include test code findings (excluded by default in high-confidence)') + + args = parser.parse_args() + + if not os.path.isdir(args.result_dir): + print(f'Error: {args.result_dir} is not a directory', file=sys.stderr) + sys.exit(1) + + # Load, filter, deduplicate + raw = load_all_findings(args.result_dir) + filtered = filter_findings(raw) + findings = deduplicate(filtered) + + print(f'Loaded {len(raw)} raw -> {len(filtered)} filtered -> {len(findings)} dedup') + + if args.output_dir: + write_all_reports(findings, args.output_dir) + elif args.summary: + print_summary(findings) + elif args.high_confidence: + if not args.include_test: + findings = [f for f in findings if not is_test_code(f.file)] + print_high_confidence(findings) + elif args.cross_ref: + print_cross_ref(findings) + elif args.category: + print_category(findings, args.category) + else: + print_full_report(findings) + + +if __name__ == '__main__': + main() diff --git a/nix/analysis/triage/filters.py b/nix/analysis/triage/filters.py new file mode 100644 index 0000000..64bb582 --- /dev/null +++ b/nix/analysis/triage/filters.py @@ -0,0 +1,107 @@ +"""Noise constants, path classifiers, filtering, and deduplication. + +Adapted for XDP2's C codebase — path patterns and exclusions +are specific to this project's directory structure. +""" + +from finding import Finding + + +# Third-party code — findings are not actionable +# Note: /nix/store/ prefix is stripped by normalize_path before filtering +THIRD_PARTY_PATTERNS = [ + 'thirdparty/', 'cppfront/', +] + +# Generated files — findings are not actionable +GENERATED_FILE_PATTERNS = [ + 'parser_*.p.c', # xdp2-compiler generated parser code + '*.template.c', # Template files before xdp2-compiler processing + '_pmacro_gen.h', # Packet macro generator output + '_dtable.h', # Decision table output + '_stable.h', # State table output +] + +EXCLUDED_CHECK_IDS = { + # Known false positive categories + 'normalCheckLevelMaxBranches', + # Cppcheck noise — tool limitations, not code bugs + 'missingIncludeSystem', + 'missingInclude', + 'unmatchedSuppression', + 'checkersReport', + 'syntaxError', # Can't parse complex macro constructs + 'preprocessorErrorDirective', # Intentional #error guards / macro expansion failures + 'unknownMacro', # Doesn't understand project macros (LIST_FOREACH, pmacro) + # Cppcheck false positives in idiomatic C + 'arithOperationsOnVoidPointer', # GNU C extension (sizeof(void)==1), intentional in networking code + 'subtractPointers', # container_of style pointer arithmetic + # Clang-tidy build errors (not real findings) + 'clang-diagnostic-error', + # _FORTIFY_SOURCE warnings (build config, not code bugs) + '-W#warnings', + '-Wcpp', +} + +EXCLUDED_MESSAGE_PATTERNS = [ + '_FORTIFY_SOURCE', +] + +TEST_PATH_PATTERNS = ['src/test/', '/test/'] + +SECURITY_PATHS = ['src/lib/', 'src/include/xdp2/'] + + +def _match_generated(path: str) -> bool: + """Check if file matches a generated file pattern (supports * glob).""" + import fnmatch + name = path.rsplit('/', 1)[-1] if '/' in path else path + return any(fnmatch.fnmatch(name, pat) for pat in GENERATED_FILE_PATTERNS) + + +def is_generated(path: str) -> bool: + return _match_generated(path) + + +def is_third_party(path: str) -> bool: + for pat in THIRD_PARTY_PATTERNS: + if pat in path: + return True + # Files not under src/ are likely third-party or generated + return not path.startswith('src/') + + +def is_test_code(path: str) -> bool: + return any(pat in path for pat in TEST_PATH_PATTERNS) + + +def is_security_sensitive(path: str) -> bool: + return any(pat in path for pat in SECURITY_PATHS) + + +def filter_findings(findings: list) -> list: + """Remove third-party code and known false positive categories.""" + return [ + f for f in findings + if not is_third_party(f.file) + and not is_generated(f.file) + and f.check_id not in EXCLUDED_CHECK_IDS + and f.line > 0 + and not any(pat in f.message for pat in EXCLUDED_MESSAGE_PATTERNS) + ] + + +def deduplicate(findings: list) -> list: + """Deduplicate findings by (file, line, check_id). + + clang-tidy reports the same header finding once per translation unit. + Keep first occurrence only. + """ + seen = set() + result = [] + for f in findings: + key = f.dedup_key() + if key not in seen: + seen.add(key) + result.append(f) + return result diff --git a/nix/analysis/triage/finding.py b/nix/analysis/triage/finding.py new file mode 100644 index 0000000..fc0f7fb --- /dev/null +++ b/nix/analysis/triage/finding.py @@ -0,0 +1,44 @@ +"""Finding dataclass and path/severity normalization.""" + +import re +from dataclasses import dataclass + + +@dataclass +class Finding: + tool: str + check_id: str + severity: str # "error", "warning", "style", "info" + file: str # normalized relative path + line: int + message: str + + def location_key(self): + return (self.file, self.line) + + def dedup_key(self): + return (self.file, self.line, self.check_id) + + +_NIX_STORE_RE = re.compile(r'/nix/store/[a-z0-9]{32}-[^/]*/') + + +def normalize_path(path: str) -> str: + """Strip Nix store prefix to get relative source path.""" + path = _NIX_STORE_RE.sub('', path) + # Clean up double slashes (from Makefile $(CURRDIR)//include patterns) + while '//' in path: + path = path.replace('//', '/') + return path + + +def normalize_severity(sev: str) -> str: + """Map tool-specific severities to unified levels.""" + sev = sev.lower().strip() + if sev in ('error', 'high', '5', '4'): + return 'error' + if sev in ('warning', 'medium', '3', 'portability', 'performance'): + return 'warning' + if sev in ('style', 'low', '2', '1', '0', 'information', 'info'): + return 'style' + return 'warning' diff --git a/nix/analysis/triage/parsers/__init__.py b/nix/analysis/triage/parsers/__init__.py new file mode 100644 index 0000000..3498744 --- /dev/null +++ b/nix/analysis/triage/parsers/__init__.py @@ -0,0 +1,49 @@ +"""Unified finding loader across all tool parsers.""" + +from pathlib import Path + +from finding import Finding +from parsers import cppcheck, semgrep, clang, flawfinder + + +def load_all_findings(result_dir: str) -> list: + """Load findings from all available tool reports.""" + findings = [] + rd = Path(result_dir) + + # cppcheck XML + p = rd / 'cppcheck' / 'report.xml' + if p.exists(): + findings.extend(cppcheck.parse(str(p))) + + # semgrep JSON + p = rd / 'semgrep' / 'report.json' + if p.exists(): + findings.extend(semgrep.parse(str(p))) + + # clang-tidy + p = rd / 'clang-tidy' / 'report.txt' + if p.exists(): + findings.extend(clang.parse(str(p), 'clang-tidy')) + + # gcc-warnings + p = rd / 'gcc-warnings' / 'report.txt' + if p.exists(): + findings.extend(clang.parse(str(p), 'gcc-warnings')) + + # flawfinder + p = rd / 'flawfinder' / 'report.txt' + if p.exists(): + findings.extend(flawfinder.parse(str(p))) + + # clang-analyzer + p = rd / 'clang-analyzer' / 'report.txt' + if p.exists(): + findings.extend(clang.parse(str(p), 'clang-analyzer')) + + # gcc-analyzer + p = rd / 'gcc-analyzer' / 'report.txt' + if p.exists(): + findings.extend(clang.parse(str(p), 'gcc-analyzer')) + + return findings diff --git a/nix/analysis/triage/parsers/clang.py b/nix/analysis/triage/parsers/clang.py new file mode 100644 index 0000000..65dbf0f --- /dev/null +++ b/nix/analysis/triage/parsers/clang.py @@ -0,0 +1,36 @@ +"""Parse clang-tidy, clang-analyzer, gcc-warnings, and gcc-analyzer reports. + +All four tools share the same text line format: + /path/to/file.c:123:45: warning: message [check-name] +""" + +import re + +from finding import Finding, normalize_path, normalize_severity + + +_LINE_RE = re.compile( + r'^(.+?):(\d+):\d+:\s+(warning|error):\s+(.+?)\s+\[([^\]]+)\]$' +) + + +def parse(path: str, tool_name: str) -> list: + """Parse a clang-style diagnostic text report.""" + findings = [] + try: + with open(path) as f: + for line in f: + line = line.strip() + m = _LINE_RE.match(line) + if m: + findings.append(Finding( + tool=tool_name, + check_id=m.group(5), + severity=normalize_severity(m.group(3)), + file=normalize_path(m.group(1)), + line=int(m.group(2)), + message=m.group(4), + )) + except FileNotFoundError: + pass + return findings diff --git a/nix/analysis/triage/parsers/cppcheck.py b/nix/analysis/triage/parsers/cppcheck.py new file mode 100644 index 0000000..b13cf71 --- /dev/null +++ b/nix/analysis/triage/parsers/cppcheck.py @@ -0,0 +1,35 @@ +"""Parse cppcheck XML reports.""" + +import xml.etree.ElementTree as ET + +from finding import Finding, normalize_path, normalize_severity + + +def parse(path: str) -> list: + """Parse cppcheck XML report.""" + findings = [] + try: + tree = ET.parse(path) + except (ET.ParseError, FileNotFoundError): + return findings + + for error in tree.iter('error'): + check_id = error.get('id', '') + severity = error.get('severity', 'warning') + msg = error.get('msg', '') + + for loc in error.iter('location'): + filepath = normalize_path(loc.get('file', '')) + line = int(loc.get('line', 0)) + if filepath and line > 0: + findings.append(Finding( + tool='cppcheck', + check_id=check_id, + severity=normalize_severity(severity), + file=filepath, + line=line, + message=msg, + )) + break # Only take first location per error + + return findings diff --git a/nix/analysis/triage/parsers/flawfinder.py b/nix/analysis/triage/parsers/flawfinder.py new file mode 100644 index 0000000..834cda1 --- /dev/null +++ b/nix/analysis/triage/parsers/flawfinder.py @@ -0,0 +1,37 @@ +"""Parse flawfinder text reports. + +Flawfinder line format: + /path/to/file.c:123:45: [5] (race) chmod:message +""" + +import re + +from finding import Finding, normalize_path, normalize_severity + + +_LINE_RE = re.compile( + r'^(.+?):(\d+):\d+:\s+\[(\d+)\]\s+\((\w+)\)\s+(\w+):(.+)$' +) + + +def parse(path: str) -> list: + """Parse flawfinder text report.""" + findings = [] + try: + with open(path) as f: + for line in f: + m = _LINE_RE.match(line.strip()) + if m: + category = m.group(4) + func_name = m.group(5) + findings.append(Finding( + tool='flawfinder', + check_id=f'{category}.{func_name}', + severity=normalize_severity(m.group(3)), + file=normalize_path(m.group(1)), + line=int(m.group(2)), + message=m.group(6).strip(), + )) + except FileNotFoundError: + pass + return findings diff --git a/nix/analysis/triage/parsers/semgrep.py b/nix/analysis/triage/parsers/semgrep.py new file mode 100644 index 0000000..a5e452b --- /dev/null +++ b/nix/analysis/triage/parsers/semgrep.py @@ -0,0 +1,38 @@ +"""Parse semgrep JSON reports.""" + +import json + +from finding import Finding, normalize_path, normalize_severity + + +def parse(path: str) -> list: + """Parse semgrep JSON report (may contain multiple JSON objects).""" + findings = [] + try: + with open(path) as f: + content = f.read() + except FileNotFoundError: + return findings + + # Parse all JSON objects in the file (semgrep may output multiple) + decoder = json.JSONDecoder() + pos = 0 + while pos < len(content): + try: + idx = content.index('{', pos) + data, end = decoder.raw_decode(content, idx) + pos = end + except (ValueError, json.JSONDecodeError): + break + + for result in data.get('results', []): + findings.append(Finding( + tool='semgrep', + check_id=result.get('check_id', ''), + severity=normalize_severity(result.get('extra', {}).get('severity', 'warning')), + file=normalize_path(result.get('path', '')), + line=result.get('start', {}).get('line', 0), + message=result.get('extra', {}).get('message', ''), + )) + + return findings diff --git a/nix/analysis/triage/reports.py b/nix/analysis/triage/reports.py new file mode 100644 index 0000000..7f25fff --- /dev/null +++ b/nix/analysis/triage/reports.py @@ -0,0 +1,170 @@ +"""Report formatting and output functions.""" + +import io +import os +from collections import defaultdict +from contextlib import redirect_stdout + +from finding import Finding +from filters import is_test_code, is_security_sensitive +from scoring import priority_score, find_cross_tool_hits, get_high_confidence + + +def format_finding(f, score=None) -> str: + score_str = f'[score={score}] ' if score is not None else '' + return f'{score_str}{f.tool}: {f.file}:{f.line}: [{f.severity}] {f.check_id} -- {f.message}' + + +def print_summary(findings: list): + """Print category summary after filtering, sorted by priority.""" + # Count by (tool, check_id) + category_counts = defaultdict(lambda: { + 'count': 0, 'tools': set(), 'severities': set(), + 'prod': 0, 'test': 0, 'security': 0 + }) + + for f in findings: + cat = category_counts[f.check_id] + cat['count'] += 1 + cat['tools'].add(f.tool) + cat['severities'].add(f.severity) + if is_test_code(f.file): + cat['test'] += 1 + else: + cat['prod'] += 1 + if is_security_sensitive(f.file): + cat['security'] += 1 + + # Sort: error first, then by count ascending (anomalies first) + def sort_key(item): + name, cat = item + has_error = 'error' in cat['severities'] + return (not has_error, cat['count'], name) + + print(f'\n{"Category":<50} {"Count":>6} {"Prod":>5} {"Test":>5} {"Sec":>4} {"Sev":<8} {"Tools"}') + print('-' * 110) + + for name, cat in sorted(category_counts.items(), key=sort_key): + sev = '/'.join(sorted(cat['severities'])) + tools = ','.join(sorted(cat['tools'])) + print(f'{name:<50} {cat["count"]:>6} {cat["prod"]:>5} {cat["test"]:>5} ' + f'{cat["security"]:>4} {sev:<8} {tools}') + + total = sum(c['count'] for c in category_counts.values()) + prod = sum(c['prod'] for c in category_counts.values()) + test = sum(c['test'] for c in category_counts.values()) + print(f'\nTotal: {total} findings ({prod} production, {test} test) ' + f'across {len(category_counts)} categories') + + +def print_high_confidence(findings: list): + """Print only likely-real-bug findings.""" + high_conf = get_high_confidence(findings) + + if not high_conf: + print('No high-confidence findings.') + return + + print(f'\n=== High-Confidence Findings ({len(high_conf)}) ===\n') + + for f, score, is_cross in high_conf: + cross_marker = ' [CROSS-TOOL]' if is_cross else '' + loc = 'security' if is_security_sensitive(f.file) else ('test' if is_test_code(f.file) else 'prod') + print(f' [{score:>3}] [{loc:<8}]{cross_marker}') + print(f' {f.tool}: {f.file}:{f.line}') + print(f' {f.check_id} [{f.severity}]') + print(f' {f.message}') + print() + + +def print_cross_ref(findings: list): + """Print multi-tool correlations.""" + clusters = find_cross_tool_hits(findings) + + if not clusters: + print('No cross-tool correlations found.') + return + + print(f'\n=== Cross-Tool Correlations ({len(clusters)} clusters) ===\n') + + for i, cluster in enumerate(clusters, 1): + tools = sorted(set(f.tool for f in cluster)) + print(f' Cluster #{i} -- {cluster[0].file}:{cluster[0].line} ({", ".join(tools)})') + for f in cluster: + print(f' {f.tool}: [{f.severity}] {f.check_id} -- {f.message}') + print() + + +def print_category(findings: list, category: str): + """Print all findings for a specific category.""" + matches = [f for f in findings if f.check_id == category] + + if not matches: + # Try partial match + matches = [f for f in findings if category.lower() in f.check_id.lower()] + + if not matches: + print(f'No findings matching category "{category}".') + return + + # Group by check_id if partial match found multiple + by_check = defaultdict(list) + for f in matches: + by_check[f.check_id].append(f) + + for check_id, check_findings in sorted(by_check.items()): + print(f'\n=== {check_id} ({len(check_findings)} findings) ===\n') + for f in sorted(check_findings, key=lambda x: (x.file, x.line)): + loc = 'test' if is_test_code(f.file) else 'prod' + print(f' [{loc}] {f.file}:{f.line}') + print(f' {f.message}') + print() + + +def print_full_report(findings: list): + """Print all findings sorted by priority.""" + cat_counts = defaultdict(int) + for f in findings: + cat_counts[f.check_id] += 1 + + scored = [(f, priority_score(f, cat_counts)) for f in findings] + scored.sort(key=lambda x: (-x[1], x[0].file, x[0].line)) + + print(f'\n=== All Findings ({len(scored)}) ===\n') + + for f, score in scored[:200]: # Limit output + print(format_finding(f, score)) + + if len(scored) > 200: + print(f'\n... and {len(scored) - 200} more (use --summary or --category to drill in)') + + +def capture_output(func, *args, **kwargs) -> str: + """Capture stdout from a function call and return as string.""" + buf = io.StringIO() + with redirect_stdout(buf): + func(*args, **kwargs) + return buf.getvalue() + + +def write_all_reports(findings: list, output_dir: str): + """Write all report modes as files to output_dir.""" + os.makedirs(output_dir, exist_ok=True) + + with open(os.path.join(output_dir, 'summary.txt'), 'w') as f: + f.write(capture_output(print_summary, findings)) + + prod_findings = [f for f in findings if not is_test_code(f.file)] + + with open(os.path.join(output_dir, 'high-confidence.txt'), 'w') as f: + f.write(capture_output(print_high_confidence, prod_findings)) + + high_conf = get_high_confidence(prod_findings) + with open(os.path.join(output_dir, 'count.txt'), 'w') as f: + f.write(str(len(high_conf))) + + with open(os.path.join(output_dir, 'cross-ref.txt'), 'w') as f: + f.write(capture_output(print_cross_ref, findings)) + + with open(os.path.join(output_dir, 'full-report.txt'), 'w') as f: + f.write(capture_output(print_full_report, findings)) diff --git a/nix/analysis/triage/scoring.py b/nix/analysis/triage/scoring.py new file mode 100644 index 0000000..fbb9d09 --- /dev/null +++ b/nix/analysis/triage/scoring.py @@ -0,0 +1,171 @@ +"""Priority scoring, cross-tool correlation, and high-confidence filtering. + +Adapted for XDP2's C codebase — check IDs and noise patterns +are specific to C static analysis tools. +""" + +from collections import defaultdict + +from finding import Finding +from filters import is_security_sensitive, is_test_code + + +# flawfinder check_ids that are intentional patterns in system software +FLAWFINDER_NOISE = { + 'race.chmod', 'race.chown', 'race.access', 'race.vfork', + 'shell.system', 'shell.execl', 'shell.execlp', 'shell.execv', 'shell.execvp', + 'buffer.read', 'buffer.char', 'buffer.equal', 'buffer.memcpy', + 'buffer.strlen', 'buffer.getenv', 'buffer.wchar_t', + 'misc.open', 'random.random', 'tmpfile.mkstemp', 'access.umask', + 'format.snprintf', 'format.vsnprintf', 'misc.chroot', +} + +# Bug-class check IDs that represent real correctness issues (not style) +BUG_CLASS_CHECKS = { + # Correctness bugs + 'uninitMemberVar', 'unsignedLessThanZero', + 'core.UndefinedBinaryOperatorResult', 'core.NullDereference', + 'core.DivideZero', 'core.uninitialized', + 'core.uninitialized.Assign', + # Bugprone clang-tidy checks + 'bugprone-use-after-move', + 'bugprone-sizeof-expression', + 'bugprone-integer-division', + # Memory safety + 'unix.Malloc', 'unix.MismatchedDeallocator', + 'alpha.security.ArrayBoundV2', +} + +# Style checks that shouldn't appear in high-confidence even in security code +STYLE_ONLY_CHECKS = { + 'constParameterPointer', 'constVariablePointer', + 'constParameter', 'constParameterCallback', + 'shadowVariable', 'shadowArgument', 'shadowFunction', + 'knownConditionTrueFalse', 'unusedStructMember', + 'variableScope', # Moving declarations is style, not bugs + 'bugprone-narrowing-conversions', # size_t→ssize_t, uint→int: intentional in C networking code +} + +# Prefixes excluded from high-confidence output +_HIGH_CONF_EXCLUDED_PREFIXES = ( + 'readability-', + 'misc-include-cleaner', 'misc-use-internal-linkage', + 'misc-use-anonymous-namespace', 'misc-unused-parameters', + 'misc-const-correctness', 'misc-header-include-cycle', + 'misc-no-recursion', + 'cert-', + # High-volume bugprone checks that are style/convention, not bugs + 'bugprone-reserved-identifier', # __XDP2_PMACRO_* is project convention + 'bugprone-easily-swappable-parameters', # Style — can't change existing API + 'bugprone-assignment-in-if-condition', # Intentional C pattern: if ((x = func())) + 'bugprone-macro-parentheses', # Style — many macros are correct without extra parens + 'bugprone-implicit-widening-of-multiplication-result', # False positives in packet offset math +) + + +def priority_score(f, category_counts: dict) -> int: + """Higher score = higher priority. Range roughly 0-100.""" + score = 0 + + # Severity + if f.severity == 'error': + score += 40 + elif f.severity == 'warning': + score += 20 + + # Security-sensitive location + if is_security_sensitive(f.file): + score += 20 + + # Test code is lower priority + if is_test_code(f.file): + score -= 15 + + # Small-count categories are anomalies (more likely real bugs) + count = category_counts.get(f.check_id, 999) + if count <= 3: + score += 30 + elif count <= 10: + score += 20 + elif count <= 30: + score += 10 + + return score + + +def find_cross_tool_hits(findings: list, tolerance: int = 3) -> list: + """Find file:line pairs flagged by 2+ independent tools. + + Uses a tolerance of +/-N lines to account for minor line number differences. + """ + # Group findings by file + by_file = defaultdict(list) + for f in findings: + by_file[f.file].append(f) + + clusters = [] + for filepath, file_findings in by_file.items(): + # Sort by line + file_findings.sort(key=lambda f: f.line) + + # For each pair of findings from different tools, check proximity + for i, f1 in enumerate(file_findings): + cluster = [f1] + for f2 in file_findings[i + 1:]: + if f2.line > f1.line + tolerance: + break + if f2.tool != f1.tool: + cluster.append(f2) + if len(set(f.tool for f in cluster)) >= 2: + clusters.append(cluster) + + # Deduplicate overlapping clusters + seen_keys = set() + unique_clusters = [] + for cluster in clusters: + key = frozenset((f.tool, f.file, f.line) for f in cluster) + if key not in seen_keys: + seen_keys.add(key) + unique_clusters.append(cluster) + + return unique_clusters + + +def get_high_confidence(findings: list) -> list: + """Return high-confidence findings as (Finding, score, is_cross) tuples, sorted by score.""" + # Compute category counts + cat_counts = defaultdict(int) + for f in findings: + cat_counts[f.check_id] += 1 + + # Find cross-tool correlations (excluding flawfinder noise) + non_noise = [f for f in findings if f.check_id not in FLAWFINDER_NOISE] + cross_hits = find_cross_tool_hits(non_noise) + cross_locations = set() + for cluster in cross_hits: + for f in cluster: + cross_locations.add((f.file, f.line)) + + high_conf = [] + for f in findings: + # Skip noise categories entirely from high-confidence + if f.check_id in FLAWFINDER_NOISE or f.check_id in STYLE_ONLY_CHECKS: + continue + # Skip excluded prefixes (style, not bugs) + if any(f.check_id.startswith(p) for p in _HIGH_CONF_EXCLUDED_PREFIXES): + continue + + score = priority_score(f, cat_counts) + is_cross = (f.file, f.line) in cross_locations + is_bug_class = f.check_id in BUG_CLASS_CHECKS + is_small_cat = cat_counts[f.check_id] <= 3 + # Only cppcheck/clang-analyzer error-severity in security code + is_security_bug = (is_security_sensitive(f.file) and f.severity == 'error' + and f.tool in ('cppcheck', 'clang-analyzer', 'gcc-analyzer')) + + if is_cross or is_bug_class or (is_small_cat and score >= 60) or is_security_bug: + high_conf.append((f, score, is_cross)) + + # Sort by score descending + high_conf.sort(key=lambda x: -x[1]) + return high_conf diff --git a/nix/packages.nix b/nix/packages.nix index fa84064..07a2ed0 100644 --- a/nix/packages.nix +++ b/nix/packages.nix @@ -85,6 +85,12 @@ in pkgs.shellcheck llvmPackages.clang-tools # clang-tidy, clang-format, etc. + # Static analysis tools + pkgs.compiledb # Compile command capture for static analysis (make dry-run) + pkgs.cppcheck # Static analysis + pkgs.flawfinder # C/C++ security scanner + pkgs.clang-analyzer # Clang static analyzer (scan-build) + # Utilities pkgs.jp2a # ASCII art for logo pkgs.glibcLocales # Locale support