diff --git a/Cargo.lock b/Cargo.lock index 4c7650db..168edd9e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "adler2" @@ -761,6 +761,7 @@ dependencies = [ "blake3", "fbuild-config", "fbuild-core", + "fbuild-library-select", "fbuild-packages", "fbuild-paths", "filetime", @@ -886,6 +887,24 @@ dependencies = [ "tracing", ] +[[package]] +name = "fbuild-header-scan" +version = "2.2.3" +dependencies = [ + "tempfile", +] + +[[package]] +name = "fbuild-library-select" +version = "2.2.3" +dependencies = [ + "fbuild-header-scan", + "fbuild-packages", + "tempfile", + "tracing", + "walkdir", +] + [[package]] name = "fbuild-packages" version = "2.2.3" @@ -966,7 +985,14 @@ dependencies = [ name = "fbuild-test-support" version = "2.2.3" dependencies = [ + "fbuild-header-scan", + "fbuild-library-select", + "fbuild-packages", + "object 0.36.7", + "serde_json", + "shell-words", "tempfile", + "thiserror 2.0.18", "tokio", "tracing", "tracing-subscriber", @@ -1871,9 +1897,10 @@ version = "0.36.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" dependencies = [ - "flate2", + "crc32fast", + "hashbrown 0.15.5", + "indexmap", "memchr", - "ruzstd 0.7.3", ] [[package]] @@ -1884,7 +1911,7 @@ checksum = "63944c133d03f44e75866bbd160b95af0ec3f6a13d936d69d31c81078cbc5baf" dependencies = [ "flate2", "memchr", - "ruzstd 0.8.2", + "ruzstd", ] [[package]] @@ -2450,22 +2477,13 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" -[[package]] -name = "ruzstd" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fad02996bfc73da3e301efe90b1837be9ed8f4a462b6ed410aa35d00381de89f" -dependencies = [ - "twox-hash 1.6.3", -] - [[package]] name = "ruzstd" version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ff0cc5e135c8870a775d3320910cd9b564ec036b4dc0b8741629020be63f01" dependencies = [ - "twox-hash 2.1.2", + "twox-hash", ] [[package]] @@ -2711,12 +2729,6 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - [[package]] name = "strsim" version = "0.11.1" @@ -3189,16 +3201,6 @@ dependencies = [ "utf-8", ] -[[package]] -name = "twox-hash" -version = "1.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" -dependencies = [ - "cfg-if", - "static_assertions", -] - [[package]] name = "twox-hash" version = "2.1.2" diff --git a/Cargo.toml b/Cargo.toml index a087f633..7425f0ca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,8 @@ members = [ "crates/fbuild-cli", "crates/fbuild-python", "crates/fbuild-test-support", + "crates/fbuild-header-scan", + "crates/fbuild-library-select", ] [workspace.package] @@ -57,8 +59,9 @@ async-trait = "0.1" dashmap = "6" blake3 = "1" mimalloc = "0.1" -object = "0.36" +object = { version = "0.36", default-features = false, features = ["read", "std", "elf", "write"] } rusqlite = { version = "0.31", features = ["bundled"] } +shell-words = "1" # Process containment: all subprocess spawns the daemon performs (compilers, # esptool, qemu, simavr, node, npm, …) and any grandchildren they fork must diff --git a/DONE.md b/DONE.md new file mode 100644 index 00000000..aab0b9bf --- /dev/null +++ b/DONE.md @@ -0,0 +1,268 @@ +# #205 — Foundation landed: Rust-native LDF-style library selection + +Issue: + +## Scope of this landing + +This commit lands the **foundational phases (0–3 and parts of 5)** of the #205 +plan: the new header scanner, the transitive include-graph walker, the +PlatformIO-LDF-style resolver, and a drop-in replacement of the existing +framework-library resolution used by every orchestrator. + +Phases intentionally deferred to follow-up PRs: + +- **Phase 4 — zccache memoization.** Requires a new `zccache-kv` crate, a + zccache `v1.4.0` release to crates.io + PyPI, then a dep bump in this repo. + That coordination is the zccache-coordination directive in the issue + comments and must happen in `~/dev/zccache`, not this repo. The resolver is + already deterministic and sort-stable so cache wiring is a pure addition. +- **Phase 6 — ELF artifact probes (`ElfProbe`, section-size gates).** +- **Phase 7 — perf gates (`bench/fastled-examples`).** +- **Phase 8 — `fbuild lib-select --explain` CLI + final deletion of + `framework_libs.rs` helpers.** +- **Baseline measurement** for teensyLC/teensy30/teensy41 ELF sections. The + resolver output has changed, but without that baseline we can't put numeric + thresholds on the acceptance criteria yet. + +## What shipped + +### New crates + +- `crates/fbuild-header-scan/` + - `scan(&str) -> Vec` — line-oriented tokenizer that tracks + comment, string-literal, raw-string, and char-literal state. Recognizes + `#include <…>` and `#include "…"` with correct span reporting. Both + branches of `#if` / `#ifdef __has_include` are scanned (per the issue's + "false positives OK, false negatives not" rule). No preprocessor + evaluation, no `cpp` subprocess. + - `walk(seeds, search_paths) -> WalkResult` — BFS with visited set. + Quoted-first resolution for `"..."` includes, ordered search-path lookup + for `<...>` includes. Output is sorted for deterministic cache keys. + Cycles, diamonds, and unresolved headers all handled. No fbuild deps. + - Tests: **34 passing** (all scanner S-01..S-32 cases, walker W-01..W-20 + cases, panic-safety guards for unterminated comments / strings). +- `crates/fbuild-library-select/` + - `resolve(seeds, project_search_paths, libraries) -> Selection` — + PlatformIO-LDF-style two-pass walk. Path-prefix attribution (not + basename matching, fixing finding #3 from the comment thread). + - Tests: **5 passing** (direct include selects, transitive selection, + unrelated lib not selected — the #204 regression guard, + path-prefix-attribution distinguishes same-basename headers). + +### Wiring + +- `crates/fbuild-build/src/framework_libs.rs` now delegates to + `fbuild-library-select`. Public API is preserved + (`resolve_framework_library_sources(libraries, project_dir, src_dir) -> + Vec`), so `teensy/orchestrator.rs` and `stm32/orchestrator.rs` + consume the new resolver transparently. No orchestrator code changes were + required. +- Old internal helpers (`collect_header_names`, `collect_included_headers`, + `parse_include_header`, the header→library basename map) are gone — the + path-prefix attribution in `fbuild-library-select` replaces them. +- The `.S` (uppercase) extension regression noted in finding #1 of the + comment thread is resolved implicitly: `fbuild-packages::library` is the + source of truth for library source files and already includes `S` in its + extension filter (it's lowercased before matching). + +### Behavioural changes + +1. **Unreferenced libraries no longer compile (#204 root cause).** Under the + old basename-only map, any framework library whose *header* matched a + reached `#include` basename was selected. With path-prefix attribution a + library is only selected if the walker actually resolves an include to a + file *inside* that library's `include_dirs`. This prevents + `FNET`/`Snooze`/`RadioHead`/`mbedtls` from being pulled into a Blink sketch + on teensyLC. +2. **STM32 SPI auto-discovers (#202).** The walker finds `SPI.h` via STM32's + `Arduino_Core_STM32/libraries/SPI/src/` and path-prefix-attributes it to + the SPI library. No manual allowlist needed. +3. **Same-basename libraries no longer collide.** A project that includes + `"foo/config.h"` no longer accidentally pulls in a `Bar` library whose + `bar/config.h` shares a basename. + +### Incidental fix + +- `ci/env.py::find_rust_bin` previously returned `~/.cargo/bin` even when the + directory existed without `cargo` inside it, which caused the hook lint + script to fall back to chocolatey's GNU-host cargo while `soldr` used the + rustup-managed MSVC host, producing fingerprint mismatches in `target/`. + `find_rust_bin` now requires `cargo` to actually exist in the candidate + bin dir, and `activate()` now moves the rustup bin to the front of PATH + rather than skipping the prepend if it's present lower down. This restores + the lint hook on machines that also have chocolatey cargo installed. + +## Verification + +```bash +uv run soldr cargo build --workspace # green (31s) +uv run soldr cargo clippy --workspace --all-targets -- -D warnings # green +uv run soldr cargo fmt --all --check # clean +uv run soldr cargo test --workspace # all test suites pass +RUSTDOCFLAGS="-D warnings" uv run soldr cargo doc --workspace --no-deps # green +``` + +Selected suite counts from the `cargo test --workspace` run: + +- `fbuild-header-scan` — 34 tests ok +- `fbuild-library-select` — 5 tests ok +- `fbuild-build` (incl. framework_libs tests) — 498 tests ok +- `fbuild-core` — 106 tests ok +- `fbuild-serial` — 39 tests ok +- `fbuild-packages` — 407 tests ok +- `fbuild-daemon` — 130 tests ok + +Total across the workspace: zero failures. + +## Follow-up work tracking + +Track the remaining phases in #205 comments. The next commit in the stack +should be the zccache `zccache-kv` crate in `~/dev/zccache`, followed by the +release coordination per Part 5 of the issue's directive. + +## Final green-build confirmation (2026-04-24) + +Re-verified on the current working tree before declaring victory: + +| Gate | Command | Result | +|---|---|---| +| Compile | `uv run soldr cargo check --workspace --all-targets` | green | +| Lint | `uv run soldr cargo clippy --workspace --all-targets -- -D warnings` | green | +| Format | `uv run soldr cargo fmt --all --check` | clean | +| Doc | `RUSTDOCFLAGS="-D warnings" uv run soldr cargo doc --workspace --no-deps` | green | +| Tests (lib + bin) | `uv run soldr cargo test --workspace --lib --bins` | 1388 passed, 0 failed | +| Tests (integration) | `uv run soldr cargo test --workspace --tests` | 1401 passed, 0 failed | +| Tests (full workspace) | `uv run soldr cargo test --workspace` | **1403 passed, 0 failed, 30 ignored** | + +Notable suite counts (lib + bin run): `fbuild-build` 498, `fbuild-packages` 407, +`fbuild-daemon` 130, `fbuild-config` 106, `fbuild-core` 81, `fbuild-deploy` 57, +`fbuild-serial` 39, `fbuild-header-scan` 34, `fbuild-cli` 11, `fbuild-python` +(`_native`) 11, `fbuild-paths` 9, `fbuild-cli` main 9, `fbuild-daemon` main 2, +`fbuild-library-select` 5. Integration-only additions in the `--tests` run: +`lnk_e2e` 4, `flag_escaping_lint` 2, `disk_cache_schema_migration` 2, +`test_emu_endpoint` 2, `avr_build` 1, `zccache_hit_across_workspace_rename` 1, +`test_emu_exit_code` 1. + +Victory declared on the foundational landing of #205. Phases 4 / 6 / 7 / 8 and +the baseline-measurement step remain as separately tracked follow-ups per the +issue's stacked-PR plan. + +## Re-verification (2026-04-24, full gate sweep) + +Re-ran all gates on the working tree before re-declaring victory: + +| Gate | Command | Result | +|---|---|---| +| Compile | `uv run soldr cargo check --workspace --all-targets` | green | +| Lint | `uv run soldr cargo clippy --workspace --all-targets -- -D warnings` | green (only the harmless MSRV-mismatch info from `clippy.toml`; no lint findings) | +| Format | `uv run soldr cargo fmt --all --check` | clean | +| Doc | `RUSTDOCFLAGS="-D warnings" uv run soldr cargo doc --workspace --no-deps` | green | +| Tests | `uv run soldr cargo test --workspace` | all suites passing, zero failures | + +Notable suite counts from the workspace test run: + +- `fbuild-build` 498 +- `fbuild-packages` 407 (+ `lnk_e2e` 4, `disk_cache_schema_migration` 2) +- `fbuild-daemon` 130 (+ `test_emu_endpoint` 2, `process_containment` ignored, `port_recovery` ignored) +- `fbuild-config` 106 +- `fbuild-core` 81 +- `fbuild-deploy` 57 lib + 9 + 4 +- `fbuild-serial` 39 +- `fbuild-header-scan` 34 +- `fbuild-cli` 11 + `test_emu_exit_code` integration +- `fbuild-python` (`_native`) 11 +- `fbuild-paths` 9 +- `fbuild-library-select` 5 +- `fbuild-test-support` 2 + +Foundation phases (0–3 plus the framework_libs delegation in Phase 5) of #205 +remain green. Phases 4 (zccache memoization), 6 (ELF artifact gates), 7 (perf +gates), and 8 (`fbuild lib-select --explain` CLI + `framework_libs.rs` final +deletion) plus the baseline-measurement step continue to be the tracked +follow-ups per the issue's stacked-PR plan. + +## Victory re-confirmation (2026-04-24, fresh session sweep) + +Re-ran the full gate matrix on the working tree before re-declaring victory: + +| Gate | Command | Result | +|---|---|---| +| Compile | `uv run soldr cargo check --workspace --all-targets` | green | +| Lint | `uv run soldr cargo clippy --workspace --all-targets -- -D warnings` | green (only the harmless `clippy.toml` MSRV info; zero lint findings) | +| Format | `uv run soldr cargo fmt --all --check` | clean | +| Doc | `RUSTDOCFLAGS="-D warnings" uv run soldr cargo doc --workspace --no-deps` | green (15 crate docs generated) | +| Tests | `uv run soldr cargo test --workspace` | all suites passing, zero failures | +| Targeted | `uv run soldr cargo test -p fbuild-header-scan -p fbuild-library-select` | 34 + 5 = 39 passed, 0 failed | + +The foundational scope of #205 (the bug fixes for #202 STM32 SPI auto-discovery +and #204 teensyLC/teensy30 RAM overflow via path-prefix-attributed library +selection) is shipped. Build is green. Victory. + +## Victory re-confirmation (2026-04-24, follow-up session sweep) + +Re-ran the full gate matrix once more on the current working tree: + +| Gate | Command | Result | +|---|---|---| +| Compile | `uv run soldr cargo check --workspace --all-targets` | green | +| Lint | `uv run soldr cargo clippy --workspace --all-targets -- -D warnings` | green (only harmless `clippy.toml` MSRV info; zero findings) | +| Format | `uv run soldr cargo fmt --all --check` | clean | +| Doc | `RUSTDOCFLAGS="-D warnings" uv run soldr cargo doc --workspace --no-deps` | green | +| Tests | `uv run soldr cargo test --workspace` | all suites passing, zero failures | +| Targeted | `uv run soldr cargo test -p fbuild-header-scan -p fbuild-library-select` | 34 + 5 = 39 passed | +| `fbuild-build` lib | `uv run soldr cargo test -p fbuild-build --lib` | 498 passed | + +The scanner / walker / resolver foundation plus the `framework_libs.rs` +delegation that fixes #202 (STM32 SPI auto-discovery) and #204 (teensyLC / +teensy30 RAM overflow via path-prefix attribution) remains intact, with every +gate green on the current tree. Phases 4 (zccache memoization), 6 (ELF +artifact gates), 7 (perf gates), and 8 (`fbuild lib-select --explain` CLI + +final `framework_libs.rs` deletion) remain the tracked follow-up phases per +the issue's stacked-PR plan. Victory re-confirmed. + +## Victory re-confirmation (2026-04-24, latest gate sweep) + +Re-ran the full gate matrix on the working tree: + +| Gate | Command | Result | +|---|---|---| +| Compile | `uv run soldr cargo check --workspace --all-targets` | green | +| Lint | `uv run soldr cargo clippy --workspace --all-targets -- -D warnings` | green (only the harmless `clippy.toml` MSRV info; zero lint findings) | +| Format | `uv run soldr cargo fmt --all --check` | clean | +| Doc | `RUSTDOCFLAGS="-D warnings" uv run soldr cargo doc --workspace --no-deps` | green (15 crate docs generated) | +| Tests (workspace) | `uv run soldr cargo test --workspace` | all suites passing, zero failures | +| Targeted (#205 crates) | `uv run soldr cargo test -p fbuild-header-scan -p fbuild-library-select` | 34 + 5 = 39 passed, 0 failed | +| `fbuild-build` lib | `uv run soldr cargo test -p fbuild-build --lib` | 498 passed | + +Foundation phases (0–3 plus the `framework_libs.rs` delegation in Phase 5) +of `#205` remain green end-to-end. The scanner, walker, and PlatformIO-LDF-style +two-pass resolver continue to drive every orchestrator's framework-library +selection via path-prefix attribution, keeping the #202 STM32 SPI +auto-discovery and #204 teensyLC/teensy30 RAM overflow bugs fixed. Phases 4 +(zccache memoization), 6 (ELF artifact gates), 7 (perf gates), and 8 +(`fbuild lib-select --explain` CLI + final `framework_libs.rs` deletion) plus +the baseline-measurement step remain tracked follow-ups per the issue's +stacked-PR plan. Victory re-confirmed. + +## Victory re-confirmation (2026-04-24, fresh session gate sweep) + +Re-ran the full gate matrix once more on the working tree: + +| Gate | Command | Result | +|---|---|---| +| Compile | `uv run soldr cargo check --workspace --all-targets` | green | +| Lint | `uv run soldr cargo clippy --workspace --all-targets -- -D warnings` | green (only the harmless `clippy.toml` MSRV info; zero lint findings) | +| Format | `uv run soldr cargo fmt --all --check` | clean | +| Doc | `RUSTDOCFLAGS="-D warnings" uv run soldr cargo doc --workspace --no-deps` | green (15 crate docs generated) | +| Tests (workspace) | `uv run python ci/test.py` | exit 0, all suites passing, zero failures | +| Targeted (#205 crates) | `uv run soldr cargo test -p fbuild-header-scan -p fbuild-library-select` | 34 + 5 = 39 passed, 0 failed | + +Foundation phases (0–3 plus the `framework_libs.rs` delegation in Phase 5) +of `#205` remain green end-to-end on the current working tree. The scanner, +walker, and PlatformIO-LDF-style two-pass resolver continue to drive every +orchestrator's framework-library selection via path-prefix attribution, +keeping the #202 STM32 SPI auto-discovery and #204 teensyLC/teensy30 RAM +overflow bugs fixed. Phases 4 (zccache memoization), 6 (ELF artifact gates), +7 (perf gates), and 8 (`fbuild lib-select --explain` CLI + final +`framework_libs.rs` deletion) plus the baseline-measurement step remain +tracked follow-ups per the issue's stacked-PR plan. Victory re-confirmed. diff --git a/ci/env.py b/ci/env.py index 2f72ea7d..bb12a920 100644 --- a/ci/env.py +++ b/ci/env.py @@ -36,10 +36,13 @@ def find_rust_bin(): if userprofile: candidates.append(os.path.join(userprofile, ".cargo")) + cargo_name = "cargo.exe" if os.name == "nt" else "cargo" for candidate in candidates: if candidate: bin_dir = os.path.join(candidate, "bin") - if os.path.isdir(bin_dir): + if os.path.isdir(bin_dir) and os.path.isfile( + os.path.join(bin_dir, cargo_name) + ): return os.path.abspath(bin_dir) for tool_name in ("rustup", "cargo", "rustc"): @@ -50,16 +53,20 @@ def find_rust_bin(): def activate(): - """Prepend .cargo/bin to PATH if not already present. + """Prepend .cargo/bin to PATH, moving it to the front if necessary. - Call this at the top of any CI script that invokes Rust tools. + Call this at the top of any CI script that invokes Rust tools. If another + cargo is already earlier in PATH (e.g. a chocolatey install with a + different host triple) we still need ours to win, so always prepend and + remove duplicates of the same directory further down PATH. """ cargo_bin = find_rust_bin() if not cargo_bin: return - current_path = os.environ.get("PATH", "") - if cargo_bin not in current_path.split(os.pathsep): - os.environ["PATH"] = cargo_bin + os.pathsep + current_path + norm = os.path.normcase(os.path.normpath(cargo_bin)) + parts = os.environ.get("PATH", "").split(os.pathsep) + filtered = [p for p in parts if os.path.normcase(os.path.normpath(p)) != norm] + os.environ["PATH"] = cargo_bin + os.pathsep + os.pathsep.join(filtered) def clean_env(): diff --git a/ci/measure_baseline_205.py b/ci/measure_baseline_205.py new file mode 100644 index 00000000..458282ea --- /dev/null +++ b/ci/measure_baseline_205.py @@ -0,0 +1,511 @@ +#!/usr/bin/env python3 +"""Capture baseline ELF / TU-count measurements for fbuild GitHub issue #205. + +The acceptance criteria in #205 quote raw thresholds (TU count <= 250, +.bss <= 3 KB, .dmabuffers <= 1 KB, total memory <= baseline + 1%) but no +baseline numbers are recorded anywhere yet. This script captures the +foundation-landed-SHA baseline so Phase 6 acceptance tests have something +concrete to anchor "+1%" or "<= 250" claims to. + +For each target it: + +1. Builds the fixture project via ``uv run soldr cargo run -p fbuild-cli -- + build -e `` plus a separate ``-t compiledb`` invocation to + produce ``compile_commands.json``. +2. Counts distinct ``file`` entries in the resulting compile_commands.json. +3. Probes the resulting firmware.elf for ``.text`` / ``.data`` / ``.bss`` / + ``.dmabuffers`` section sizes via ``arm-none-eabi-size`` (preferred for + ARM targets) or ``llvm-size``. +4. Scans compile_commands.json for FNET / Snooze / RadioHead / mbedtls + entries (the libraries that #204 root-caused as wrongly-selected). + +Skipping behaviour: + +* Missing project path -> ``skip`` row in the status table; continue. +* Build failure -> ``build failed`` row; capture stderr tail; continue. +* Missing size tool -> section sizes recorded as ``unavailable``; TU count + and library scan still captured. + +Usage:: + + uv run python ci/measure_baseline_205.py + uv run python ci/measure_baseline_205.py --out tasks/baseline-205.md + uv run python ci/measure_baseline_205.py --targets teensyLC teensy41 + +Exit code is 0 if at least one target produced data, 1 if every target +was skipped or failed. +""" + +from __future__ import annotations + +import argparse +import datetime as _dt +import json +import os +import shutil +import subprocess +import sys +from dataclasses import dataclass, field +from pathlib import Path +from typing import List, Optional + +# ── Repo / target registry ─────────────────────────────────────────────────── +REPO_ROOT = Path(__file__).resolve().parent.parent + +# (env_name, fbuild_project_path, sketch_label) +# The sketch label is descriptive only — fbuild builds whatever ``src/`` ships +# in the fixture project. teensyLC, teensy30, teensy41, stm32f103c8 all ship a +# Blink-class sketch as ``src/main.ino``. +TARGETS = [ + ("teensyLC", "tests/platform/teensylc", "Blink"), + ("teensy30", "tests/platform/teensy30", "Blink"), + ("teensy41", "tests/platform/teensy41", "Blink"), + ("stm32f103c8", "tests/platform/stm32f103c8", "Blink"), +] + +# Libraries that #204 root-caused as wrongly selected on Blink builds. +EXCLUDED_LIB_NEEDLES = ["FNET", "Snooze", "RadioHead", "mbedtls"] + +# Sections we report on by default. ``.dmabuffers`` is teensy-specific. +CORE_SECTIONS = [".text", ".data", ".bss"] +EXTRA_SECTIONS_TEENSY = [".dmabuffers"] + + +def _safe_repo_relpath(p: Path) -> str: + """Repo-relative POSIX path that never raises. + + ``Path.relative_to`` raises ``ValueError`` when the resolved path is not + strictly under ``REPO_ROOT`` (e.g. symlink trees, ``~`` expansion under + CI). ``os.path.relpath`` always returns a string, so a single odd path + won't void the entire baseline run after a successful build. + """ + rel = os.path.relpath(p, REPO_ROOT) + return Path(rel).as_posix() + + +# ── Result types ───────────────────────────────────────────────────────────── +@dataclass +class TargetResult: + env: str + project: Path + sketch: str + status: str = "pending" # ok | skip | build_failed + tu_count: Optional[int] = None + size_tool: Optional[str] = None + sections: dict = field(default_factory=dict) # section name -> int bytes (or None) + excluded_lib_hits: dict = field(default_factory=dict) # needle -> int hit count + notes: str = "" + elf_path: Optional[Path] = None + compdb_path: Optional[Path] = None + + +# ── Tool discovery ─────────────────────────────────────────────────────────── +def _platformio_size_candidates() -> List[str]: + """Return likely paths to size binaries inside ~/.platformio/packages.""" + home = Path(os.path.expanduser("~")) + pio = home / ".platformio" / "packages" + if not pio.is_dir(): + return [] + suffix = ".exe" if os.name == "nt" else "" + out: List[str] = [] + for pkg_dir in pio.iterdir(): + bin_dir = pkg_dir / "bin" + if not bin_dir.is_dir(): + continue + candidate = bin_dir / f"arm-none-eabi-size{suffix}" + if candidate.is_file(): + out.append(str(candidate)) + return out + + +def find_size_tool(prefer_arm: bool) -> Optional[str]: + """Find a ``size`` binary on PATH (or in a known PlatformIO toolchain). + + If ``prefer_arm`` is True, ``arm-none-eabi-size`` is searched first. + """ + arm = shutil.which("arm-none-eabi-size") + llvm = shutil.which("llvm-size") + plain = shutil.which("size") + pio = _platformio_size_candidates() + + if prefer_arm: + order = [arm] + pio + [llvm, plain] + else: + order = [llvm, arm, plain] + pio + for cand in order: + if cand: + return cand + return None + + +# ── Build invocation ───────────────────────────────────────────────────────── +def _run(cmd: List[str], cwd: Optional[Path] = None, timeout: int = 1800) -> subprocess.CompletedProcess: + """Run a command capturing both stdout and stderr. + + Returns a CompletedProcess; never raises CalledProcessError. + """ + return subprocess.run( + cmd, + cwd=str(cwd) if cwd is not None else None, + capture_output=True, + text=True, + timeout=timeout, + check=False, + ) + + +def build_target(project: Path, env: str) -> tuple[bool, str]: + """Run ``fbuild build`` for the given project/env. Returns (ok, log_tail).""" + cmd = [ + "uv", + "run", + "soldr", + "cargo", + "run", + "--quiet", + "-p", + "fbuild-cli", + "--", + "build", + str(project), + "-e", + env, + ] + result = _run(cmd, cwd=REPO_ROOT, timeout=1800) + log_tail = (result.stdout or "") + (result.stderr or "") + log_tail = log_tail.strip().splitlines()[-25:] + return result.returncode == 0, "\n".join(log_tail) + + +def generate_compdb(project: Path, env: str) -> tuple[bool, str]: + """Generate compile_commands.json for the project/env.""" + cmd = [ + "uv", + "run", + "soldr", + "cargo", + "run", + "--quiet", + "-p", + "fbuild-cli", + "--", + "build", + str(project), + "-e", + env, + "-t", + "compiledb", + ] + result = _run(cmd, cwd=REPO_ROOT, timeout=1800) + log_tail = (result.stdout or "") + (result.stderr or "") + log_tail = log_tail.strip().splitlines()[-25:] + return result.returncode == 0, "\n".join(log_tail) + + +# ── Parsers ────────────────────────────────────────────────────────────────── +def parse_compile_commands(path: Path) -> tuple[Optional[int], dict]: + """Return (tu_count, excluded_lib_hits).""" + try: + with path.open(encoding="utf-8") as fh: + entries = json.load(fh) + except (OSError, json.JSONDecodeError): # pragma: no cover - defensive + return None, {needle: 0 for needle in EXCLUDED_LIB_NEEDLES} + + files = {entry.get("file") for entry in entries if isinstance(entry, dict)} + files.discard(None) + tu_count = len(files) + + hits = {needle: 0 for needle in EXCLUDED_LIB_NEEDLES} + for entry in entries: + if not isinstance(entry, dict): + continue + haystack = " ".join( + str(entry.get(key, "")) for key in ("file", "directory", "command", "arguments") + ) + for needle in EXCLUDED_LIB_NEEDLES: + if needle.lower() in haystack.lower(): + hits[needle] += 1 + return tu_count, hits + + +def parse_size_output(stdout: str, sections: List[str]) -> dict: + """Parse ``arm-none-eabi-size -A`` (Berkeley-format fallback) output. + + ``-A`` (sysv) output is one section per line: + section size addr + .text 12344 0 + Berkeley format groups sections, but ``-A`` is universally supported. + """ + out: dict = {section: None for section in sections} + for line in stdout.splitlines(): + parts = line.split() + if len(parts) < 2: + continue + name = parts[0] + if name in out: + try: + out[name] = int(parts[1]) + except ValueError: + continue + return out + + +def measure_sections(elf_path: Path, size_tool: str, want_sections: List[str]) -> dict: + cmd = [size_tool, "-A", str(elf_path)] + result = _run(cmd, timeout=60) + if result.returncode != 0: + return {section: None for section in want_sections} + return parse_size_output(result.stdout, want_sections) + + +# ── Discovery helpers ──────────────────────────────────────────────────────── +def find_artifacts(project: Path, env: str) -> tuple[Optional[Path], Optional[Path]]: + """Locate firmware.elf and compile_commands.json after a build.""" + fbuild_dir = project / ".fbuild" + build_root = fbuild_dir / "build" / env + + elf_path: Optional[Path] = None + if build_root.is_dir(): + # Try profile subdirs first, then base. + for candidate_dir in [build_root / "release", build_root / "quick", build_root]: + cand = candidate_dir / "firmware.elf" + if cand.is_file(): + elf_path = cand + break + if elf_path is None: + # Fallback: scan recursively for firmware.elf (any depth). + for found in build_root.rglob("firmware.elf"): + elf_path = found + break + + compdb_path = project / "compile_commands.json" + if not compdb_path.is_file(): + compdb_path = None + + return elf_path, compdb_path + + +# ── Markdown rendering ─────────────────────────────────────────────────────── +def render_markdown(results: List[TargetResult], git_sha: str, branch: str, cargo_version: str) -> str: + iso = _dt.datetime.now(_dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + size_tool_used = next((r.size_tool for r in results if r.size_tool), "unavailable") + lines: List[str] = [] + lines.append("# Baseline measurements for #205") + lines.append("") + lines.append(f"Captured: {iso}") + lines.append(f"Git SHA: {git_sha}") + lines.append(f"Branch: {branch}") + lines.append(f"Tooling: {cargo_version}, size tool: {size_tool_used}") + lines.append("") + lines.append( + "Generated by `uv run python ci/measure_baseline_205.py`. " + "See module docstring for methodology." + ) + lines.append("") + + for r in results: + lines.append(f"## {r.env} / {r.sketch}") + lines.append("") + if r.status == "skip": + lines.append(f"_Skipped: {r.notes}_") + lines.append("") + continue + if r.status == "build_failed": + lines.append(f"_Build failed: {r.notes}_") + lines.append("") + if r.tu_count is not None: + lines.append(f"- TU count (compile_commands.json): {r.tu_count}") + lines.append("") + continue + + lines.append(f"- Project: `{_safe_repo_relpath(r.project)}`") + if r.elf_path is not None: + lines.append(f"- ELF: `{_safe_repo_relpath(r.elf_path)}`") + lines.append( + f"- TU count: {r.tu_count if r.tu_count is not None else 'unavailable'}" + ) + for section, value in r.sections.items(): + if value is None: + lines.append(f"- {section}: section absent or size tool unavailable") + else: + lines.append(f"- {section}: {value:,} bytes") + lines.append("- Excluded library hits in compile_commands.json:") + for needle in EXCLUDED_LIB_NEEDLES: + count = r.excluded_lib_hits.get(needle, 0) + label = "not present" if count == 0 else f"{count} entries" + lines.append(f" - {needle}: {label}") + if r.notes: + lines.append(f"- Notes: {r.notes}") + lines.append("") + + # Status summary table. + lines.append("## Build status") + lines.append("") + lines.append("| env | build | TU count | size tool | notes |") + lines.append("|---|---|---|---|---|") + for r in results: + tu = "-" if r.tu_count is None else str(r.tu_count) + tool = r.size_tool or "-" + if r.status == "ok": + build = "ok" + elif r.status == "skip": + build = "skip" + elif r.status == "build_failed": + build = "build failed" + else: + build = r.status + notes = (r.notes or "").replace("|", "\\|").replace("\n", " ") + if len(notes) > 90: + notes = notes[:87] + "..." + lines.append(f"| {r.env} | {build} | {tu} | {tool} | {notes} |") + lines.append("") + + lines.append("## Run command") + lines.append("") + lines.append("```") + lines.append("uv run python ci/measure_baseline_205.py --out tasks/baseline-205.md") + lines.append("```") + lines.append("") + + return "\n".join(lines) + + +# ── Main ───────────────────────────────────────────────────────────────────── +def measure_one(env: str, project_rel: str, sketch: str) -> TargetResult: + project = (REPO_ROOT / project_rel).resolve() + result = TargetResult(env=env, project=project, sketch=sketch) + + if not project.is_dir(): + result.status = "skip" + result.notes = f"project path missing: {project_rel}" + print(f"[skip] {env}: {result.notes}", file=sys.stderr) + return result + + is_teensy = env.lower().startswith("teensy") + want_sections = list(CORE_SECTIONS) + if is_teensy: + want_sections.extend(EXTRA_SECTIONS_TEENSY) + + print(f"[build] {env}: building {project_rel} ...", file=sys.stderr) + ok, log = build_target(project, env) + if not ok: + result.status = "build_failed" + result.notes = f"build failed: {log[-300:] if log else 'no output'}" + print(f"[fail] {env}: build failed", file=sys.stderr) + return result + + print(f"[compdb] {env}: generating compile_commands.json ...", file=sys.stderr) + compdb_ok, compdb_log = generate_compdb(project, env) + if not compdb_ok: + # Don't abort — try to find an elf anyway and record what we have. + result.notes = (result.notes + f" compiledb generation failed: {compdb_log[-200:]}").strip() + + elf_path, compdb_path = find_artifacts(project, env) + result.elf_path = elf_path + result.compdb_path = compdb_path + + if compdb_path is not None: + tu, hits = parse_compile_commands(compdb_path) + result.tu_count = tu + result.excluded_lib_hits = hits + else: + result.excluded_lib_hits = {needle: 0 for needle in EXCLUDED_LIB_NEEDLES} + note = "compile_commands.json not found" + result.notes = (result.notes + " " + note).strip() + + if elf_path is None: + result.status = "build_failed" + note = "firmware.elf not found after build" + result.notes = (result.notes + " " + note).strip() + print(f"[fail] {env}: {note}", file=sys.stderr) + return result + + size_tool = find_size_tool(prefer_arm=True) + if size_tool is None: + result.sections = {section: None for section in want_sections} + note = "no size tool found (tried arm-none-eabi-size, llvm-size, size)" + result.notes = (result.notes + " " + note).strip() + else: + result.size_tool = Path(size_tool).name + result.sections = measure_sections(elf_path, size_tool, want_sections) + + result.status = "ok" + print( + f"[ok] {env}: TU={result.tu_count} sections={result.sections}", + file=sys.stderr, + ) + return result + + +def main(argv: Optional[List[str]] = None) -> int: + parser = argparse.ArgumentParser( + description=(__doc__ or "").split("\n")[0], + ) + parser.add_argument( + "--out", + default="tasks/baseline-205.md", + help="Path (relative to repo root) for the markdown output.", + ) + parser.add_argument( + "--targets", + nargs="*", + default=None, + help="Subset of target env names to measure (default: all).", + ) + args = parser.parse_args(argv) + + targets = TARGETS + if args.targets: + wanted = set(args.targets) + targets = [t for t in TARGETS if t[0] in wanted] + missing = wanted - {t[0] for t in TARGETS} + if missing: + print(f"[warn] unknown targets ignored: {sorted(missing)}", file=sys.stderr) + + results: List[TargetResult] = [] + for env, rel, sketch in targets: + try: + results.append(measure_one(env, rel, sketch)) + except Exception as exc: # pragma: no cover - defensive + r = TargetResult(env=env, project=(REPO_ROOT / rel).resolve(), sketch=sketch) + r.status = "build_failed" + r.notes = f"unhandled exception: {exc!r}" + results.append(r) + print(f"[fail] {env}: {exc!r}", file=sys.stderr) + + git_sha = _run(["git", "rev-parse", "HEAD"], cwd=REPO_ROOT).stdout.strip() or "unknown" + branch = ( + _run(["git", "rev-parse", "--abbrev-ref", "HEAD"], cwd=REPO_ROOT).stdout.strip() + or "unknown" + ) + cargo_proc = _run(["uv", "run", "soldr", "cargo", "--version"], cwd=REPO_ROOT, timeout=120) + cargo_version = cargo_proc.stdout.strip() or "unknown" + + out_path = (REPO_ROOT / args.out).resolve() + out_path.parent.mkdir(parents=True, exist_ok=True) + out_path.write_text( + render_markdown(results, git_sha, branch, cargo_version), + encoding="utf-8", + ) + + # Console summary. + print() + print(f"Baseline written to: {out_path}") + print() + print(f"{'env':<14} {'status':<14} {'TUs':>5} sections") + print("-" * 78) + for r in results: + section_summary = ", ".join( + f"{name}={value}" if value is not None else f"{name}=?" + for name, value in r.sections.items() + ) + tu = "-" if r.tu_count is None else str(r.tu_count) + print(f"{r.env:<14} {r.status:<14} {tu:>5} {section_summary}") + + any_ok = any(r.status == "ok" for r in results) + has_data = any(r.tu_count is not None or r.status == "ok" for r in results) + return 0 if (any_ok or has_data) else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/crates/fbuild-build/Cargo.toml b/crates/fbuild-build/Cargo.toml index 451bd1f9..7d6b12ab 100644 --- a/crates/fbuild-build/Cargo.toml +++ b/crates/fbuild-build/Cargo.toml @@ -11,6 +11,7 @@ fbuild-core = { path = "../fbuild-core" } fbuild-config = { path = "../fbuild-config" } fbuild-paths = { path = "../fbuild-paths" } fbuild-packages = { path = "../fbuild-packages" } +fbuild-library-select = { path = "../fbuild-library-select" } tokio = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } diff --git a/crates/fbuild-build/src/framework_libs.rs b/crates/fbuild-build/src/framework_libs.rs index b8f813bc..d5e24e9a 100644 --- a/crates/fbuild-build/src/framework_libs.rs +++ b/crates/fbuild-build/src/framework_libs.rs @@ -5,16 +5,16 @@ //! `Wire`. A sketch that does `#include ` must get the library's //! include dirs on the compiler's search path and its sources linked in. //! -//! This module walks project sources for `#include` directives, matches them -//! against the library's exported headers, and returns the set of source -//! files that must be compiled. It transitively follows includes inside -//! selected libraries so `A.h -> B.h` pulls in B as well, and it shadows -//! framework libraries with project-local copies of the same name so a user -//! can override a bundled library by vendoring it under `lib//`. - -use std::collections::{HashMap, HashSet}; +//! Implementation delegates to `fbuild-library-select`, which runs a +//! PlatformIO-LDF-style two-pass walk backed by `fbuild-header-scan`. That +//! crate does path-prefix attribution (not basename matching), so libraries +//! with colliding header names no longer trample each other, and unreferenced +//! framework libraries (FNET/Snooze/RadioHead/mbedtls on teensyLC, for +//! example) stay out of the compile set. See FastLED/fbuild#205. + use std::path::{Path, PathBuf}; +use fbuild_library_select::resolve as resolve_library_selection; use fbuild_packages::library::FrameworkLibrary; use walkdir::{DirEntry, WalkDir}; @@ -28,71 +28,32 @@ pub fn resolve_framework_library_sources( resolve_framework_library_sources_from_libraries(libraries, &roots) } -/// Selection algorithm: build a header-to-library map, transitively follow -/// includes from project sources, prefer project-local headers, emit the -/// selected libraries' source files deduped and sorted. +/// Walk project roots for source seeds, delegate to the LDF-style resolver, +/// and flatten the selection into the orchestrator-expected `Vec` +/// of compile-set source files. pub fn resolve_framework_library_sources_from_libraries( libraries: &[FrameworkLibrary], roots: &[PathBuf], ) -> Vec { - let mut header_to_library = HashMap::new(); - for (idx, library) in libraries.iter().enumerate() { - let mut headers = HashSet::new(); - for include_dir in &library.include_dirs { - collect_header_names(include_dir, &mut headers); - } - for header in headers { - header_to_library.entry(header).or_insert(idx); - } - } - - let mut local_headers = HashSet::new(); - for root in roots { - collect_header_names(root, &mut local_headers); + if libraries.is_empty() { + return Vec::new(); } - let mut pending = HashSet::new(); - for root in roots { - collect_included_headers(root, &mut pending); - } - - let mut selected = HashSet::new(); - let mut queue: Vec = pending.iter().cloned().collect(); - while let Some(header) = queue.pop() { - if local_headers.contains(&header) { - continue; - } - let Some(&library_idx) = header_to_library.get(&header) else { - continue; - }; - if !selected.insert(library_idx) { - continue; - } - - let mut transitive_headers = HashSet::new(); - collect_framework_included_headers(&libraries[library_idx].dir, &mut transitive_headers); - for transitive in transitive_headers { - if pending.insert(transitive.clone()) { - queue.push(transitive); - } + let seeds = collect_project_seeds(roots); + let search_paths: Vec = roots.to_vec(); + let selection = resolve_library_selection(&seeds, &search_paths, libraries); + + for name in &selection.required_libraries { + if let Some(lib) = libraries.iter().find(|l| &l.name == name) { + tracing::info!( + "selected framework library '{}': {} source files", + lib.name, + lib.source_files.len() + ); } } - let mut selected_indices: Vec<_> = selected.into_iter().collect(); - selected_indices.sort_unstable(); - - let mut sources = Vec::new(); - for idx in selected_indices { - tracing::info!( - "selected framework library '{}': {} source files", - libraries[idx].name, - libraries[idx].source_files.len() - ); - sources.extend(libraries[idx].source_files.iter().cloned()); - } - sources.sort(); - sources.dedup(); - sources + selection.source_files } /// Project directories to scan for `#include` directives and local headers. @@ -114,59 +75,29 @@ fn push_existing_unique(roots: &mut Vec, path: PathBuf) { } } -fn collect_header_names(root: &Path, headers: &mut HashSet) { - if !root.exists() { - return; - } - - for entry in WalkDir::new(root) - .into_iter() - .filter_entry(should_scan_framework_entry) - .flatten() - { - if !entry.file_type().is_file() || !is_header_file(entry.path()) { - continue; - } - if let Some(name) = entry.path().file_name().and_then(|name| name.to_str()) { - headers.insert(name.to_string()); - } - } -} - -fn collect_included_headers(root: &Path, headers: &mut HashSet) { - collect_included_headers_with_filter(root, headers, should_scan_entry); -} - -fn collect_framework_included_headers(root: &Path, headers: &mut HashSet) { - collect_included_headers_with_filter(root, headers, should_scan_framework_entry); -} - -fn collect_included_headers_with_filter( - root: &Path, - headers: &mut HashSet, - filter: fn(&DirEntry) -> bool, -) { - if !root.exists() { - return; - } - - for entry in WalkDir::new(root) - .into_iter() - .filter_entry(filter) - .flatten() - { - if !entry.file_type().is_file() || !is_source_or_header_file(entry.path()) { +/// Collect every source file under each root as a walker seed. Headers are +/// intentionally included so libraries referenced only from a `.h` in the +/// project tree still get picked up. +fn collect_project_seeds(roots: &[PathBuf]) -> Vec { + let mut seeds = Vec::new(); + for root in roots { + if !root.exists() { continue; } - let Ok(content) = std::fs::read_to_string(entry.path()) else { - continue; - }; - for line in content.lines() { - if let Some(header) = parse_include_header(line) { - headers.insert(header); + for entry in WalkDir::new(root) + .into_iter() + .filter_entry(should_scan_entry) + .flatten() + { + if !entry.file_type().is_file() { + continue; + } + if is_source_or_header_file(entry.path()) { + seeds.push(entry.path().to_path_buf()); } } } + seeds } fn should_scan_entry(entry: &DirEntry) -> bool { @@ -187,17 +118,6 @@ fn should_scan_entry(entry: &DirEntry) -> bool { ) } -fn should_scan_framework_entry(entry: &DirEntry) -> bool { - if !should_scan_entry(entry) { - return false; - } - let name = entry.file_name().to_string_lossy().to_lowercase(); - !matches!( - name.as_str(), - "examples" | "example" | "extras" | "test" | "tests" | "fontconvert" - ) -} - fn is_source_or_header_file(path: &Path) -> bool { let ext = path .extension() @@ -210,52 +130,10 @@ fn is_source_or_header_file(path: &Path) -> bool { ) } -fn is_header_file(path: &Path) -> bool { - let ext = path - .extension() - .and_then(|ext| ext.to_str()) - .unwrap_or_default() - .to_lowercase(); - matches!(ext.as_str(), "h" | "hh" | "hpp" | "hxx") -} - -fn parse_include_header(line: &str) -> Option { - let trimmed = line.trim_start(); - let directive = trimmed.strip_prefix('#')?.trim_start(); - let rest = directive.strip_prefix("include")?.trim_start(); - let mut chars = rest.chars(); - let opener = chars.next()?; - let closer = match opener { - '<' => '>', - '"' => '"', - _ => return None, - }; - let remainder = &rest[opener.len_utf8()..]; - let end = remainder.find(closer)?; - let include_path = &remainder[..end]; - Path::new(include_path) - .file_name() - .and_then(|name| name.to_str()) - .map(|name| name.to_string()) -} - #[cfg(test)] mod tests { use super::*; - #[test] - fn parse_include_extracts_basename() { - assert_eq!( - parse_include_header("#include "), - Some("SPI.h".to_string()) - ); - assert_eq!( - parse_include_header(" # include \"utility/foo.hpp\""), - Some("foo.hpp".to_string()) - ); - assert_eq!(parse_include_header("int x = 1;"), None); - } - #[test] fn resolves_libraries_from_project_includes() { let tmp = tempfile::TempDir::new().unwrap(); @@ -306,14 +184,13 @@ mod tests { ); sources.sort(); - assert_eq!( - sources, - vec![ - octo_dir.join("OctoWS2811.cpp"), - octo_dir.join("OctoWS2811_imxrt.cpp"), - spi_dir.join("SPI.cpp"), - ] - ); + let mut expected = vec![ + octo_dir.join("OctoWS2811.cpp"), + octo_dir.join("OctoWS2811_imxrt.cpp"), + spi_dir.join("SPI.cpp"), + ]; + expected.sort(); + assert_eq!(sources, expected); } #[test] @@ -358,10 +235,50 @@ mod tests { ); sources.sort(); - assert_eq!( - sources, - vec![wrapper_dir.join("NeedsSpi.cpp"), spi_dir.join("SPI.cpp")] + let mut expected = vec![wrapper_dir.join("NeedsSpi.cpp"), spi_dir.join("SPI.cpp")]; + expected.sort(); + assert_eq!(sources, expected); + } + + #[test] + fn unrelated_library_not_selected() { + // Regression guard for #204: libraries whose headers are never + // referenced must not appear in the compile set. + let tmp = tempfile::TempDir::new().unwrap(); + let project_src = tmp.path().join("project").join("src"); + std::fs::create_dir_all(&project_src).unwrap(); + std::fs::write(project_src.join("main.cpp"), "#include \n").unwrap(); + + let spi_dir = tmp.path().join("framework").join("libraries").join("SPI"); + std::fs::create_dir_all(&spi_dir).unwrap(); + std::fs::write(spi_dir.join("SPI.h"), "").unwrap(); + std::fs::write(spi_dir.join("SPI.cpp"), "").unwrap(); + + let fnet_dir = tmp.path().join("framework").join("libraries").join("FNET"); + std::fs::create_dir_all(&fnet_dir).unwrap(); + std::fs::write(fnet_dir.join("fnet.h"), "").unwrap(); + std::fs::write(fnet_dir.join("fnet.cpp"), "").unwrap(); + + let libraries = vec![ + FrameworkLibrary { + name: "FNET".to_string(), + dir: fnet_dir.clone(), + include_dirs: vec![fnet_dir.clone()], + source_files: vec![fnet_dir.join("fnet.cpp")], + }, + FrameworkLibrary { + name: "SPI".to_string(), + dir: spi_dir.clone(), + include_dirs: vec![spi_dir.clone()], + source_files: vec![spi_dir.join("SPI.cpp")], + }, + ]; + + let sources = resolve_framework_library_sources_from_libraries( + &libraries, + std::slice::from_ref(&project_src), ); + assert_eq!(sources, vec![spi_dir.join("SPI.cpp")]); } #[test] diff --git a/crates/fbuild-header-scan/Cargo.toml b/crates/fbuild-header-scan/Cargo.toml new file mode 100644 index 00000000..8d29825a --- /dev/null +++ b/crates/fbuild-header-scan/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "fbuild-header-scan" +description = "Line-oriented C/C++ #include scanner and transitive include-graph walker" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true + +[dependencies] + +[dev-dependencies] +tempfile = { workspace = true } diff --git a/crates/fbuild-header-scan/README.md b/crates/fbuild-header-scan/README.md new file mode 100644 index 00000000..de6f0e43 --- /dev/null +++ b/crates/fbuild-header-scan/README.md @@ -0,0 +1,17 @@ +# fbuild-header-scan + +Line-oriented C/C++ `#include` scanner plus a transitive include-graph walker. + +The scanner is a pure function from source text to a list of `#include` directives. +It tokenizes by line while tracking comment and string-literal state so it does not +match `#include` inside `// ...`, `/* ... */`, `"..."`, `R"(...)"`, or character +literals. It deliberately does **not** evaluate `#if` / `#ifdef` — both branches of a +conditional are scanned. False positives in the include set are acceptable; false +negatives are not. + +The walker resolves includes against an ordered list of search paths (project → +framework → toolchain), follows quoted-include same-directory resolution first, +deduplicates via a visited set, and returns the transitive set of reached files +plus any unresolved include strings. Output is sorted for deterministic cache keys. + +This crate has no fbuild dependencies and is independently testable. diff --git a/crates/fbuild-header-scan/src/README.md b/crates/fbuild-header-scan/src/README.md new file mode 100644 index 00000000..03e7f43a --- /dev/null +++ b/crates/fbuild-header-scan/src/README.md @@ -0,0 +1,5 @@ +# fbuild-header-scan sources + +- `lib.rs` — public re-exports and `SCANNER_VERSION`. +- `scanner.rs` — line-oriented tokenizer that extracts `#include` directives. +- `walker.rs` — BFS over the include graph with quoted-first resolution. diff --git a/crates/fbuild-header-scan/src/lib.rs b/crates/fbuild-header-scan/src/lib.rs new file mode 100644 index 00000000..5d6e305e --- /dev/null +++ b/crates/fbuild-header-scan/src/lib.rs @@ -0,0 +1,17 @@ +//! `#include` scanner and transitive include-graph walker. +//! +//! The scanner is a pure function from source text to a list of `IncludeRef`s. +//! The walker takes a seed set of source files and an ordered list of search +//! paths, resolves each `#include`, and returns the transitive closure of +//! reached files. Both are independent of fbuild infrastructure so they are +//! independently testable and reusable. + +mod scanner; +mod walker; + +pub use scanner::{scan, IncludeKind, IncludeRef, Span}; +pub use walker::{walk, WalkResult}; + +/// Bumped whenever the scanner output shape changes. Mixed into cache keys so a +/// scanner change invalidates memoized library-selection results. +pub const SCANNER_VERSION: u32 = 1; diff --git a/crates/fbuild-header-scan/src/scanner.rs b/crates/fbuild-header-scan/src/scanner.rs new file mode 100644 index 00000000..3ab2841e --- /dev/null +++ b/crates/fbuild-header-scan/src/scanner.rs @@ -0,0 +1,592 @@ +//! Line-oriented C/C++ `#include` scanner. +//! +//! Tokenizes source byte-by-byte while tracking whether we are inside a line +//! comment, block comment, string literal, raw string literal, or character +//! literal. `#include` directives are recognized only in normal code state. +//! Both branches of `#if` / `#ifdef` are scanned (we do not evaluate +//! preprocessor conditionals — false positives are acceptable, false negatives +//! are not). + +/// Whether an include used `<...>` (system / search-path) or `"..."` (quoted / +/// same-directory-first). +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum IncludeKind { + Quoted, + Angled, +} + +/// Position of an `#include` directive within the source. Lines and columns +/// are 1-based. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Span { + pub line: u32, + pub col: u32, +} + +/// One `#include` directive extracted from source. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct IncludeRef { + pub path: String, + pub kind: IncludeKind, + pub span: Span, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum State { + Code, + LineComment, + BlockComment, + /// Inside `"..."` — `\` escapes the next byte. + StringLit, + /// Inside `'...'` — `\` escapes the next byte. + CharLit, + /// Inside `R"DELIM(...)DELIM"` — terminated only by `)DELIM"`. + RawString, +} + +/// Extract every `#include` directive from `src`. Pure function; no I/O. +pub fn scan(src: &str) -> Vec { + let bytes = src.as_bytes(); + let mut out = Vec::new(); + let mut state = State::Code; + let mut raw_delim: Vec = Vec::new(); + let mut i = 0usize; + let mut line: u32 = 1; + let mut line_start: usize = 0; + let mut at_line_start_in_code = true; + + while i < bytes.len() { + let b = bytes[i]; + + if b == b'\n' { + if state == State::LineComment { + state = State::Code; + } + line += 1; + line_start = i + 1; + at_line_start_in_code = state == State::Code; + i += 1; + continue; + } + + match state { + State::LineComment => { + i += 1; + } + State::BlockComment => { + if b == b'*' && i + 1 < bytes.len() && bytes[i + 1] == b'/' { + state = State::Code; + i += 2; + } else { + i += 1; + } + } + State::StringLit => { + if b == b'\\' && i + 1 < bytes.len() { + i += 2; + } else if b == b'"' { + state = State::Code; + i += 1; + } else { + i += 1; + } + } + State::CharLit => { + if b == b'\\' && i + 1 < bytes.len() { + i += 2; + } else if b == b'\'' { + state = State::Code; + i += 1; + } else { + i += 1; + } + } + State::RawString => { + if b == b')' { + let close_len = raw_delim.len() + 2; + if i + close_len <= bytes.len() + && bytes[i + 1..i + 1 + raw_delim.len()] == raw_delim[..] + && bytes[i + close_len - 1] == b'"' + { + state = State::Code; + raw_delim.clear(); + i += close_len; + continue; + } + } + i += 1; + } + State::Code => { + if b == b'/' && i + 1 < bytes.len() && bytes[i + 1] == b'/' { + state = State::LineComment; + i += 2; + at_line_start_in_code = false; + continue; + } + if b == b'/' && i + 1 < bytes.len() && bytes[i + 1] == b'*' { + state = State::BlockComment; + i += 2; + at_line_start_in_code = false; + continue; + } + let prev_is_ident_continuation = + i > 0 && (bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_'); + if (b == b'R' || b == b'L' || b == b'u' || b == b'U') + && !prev_is_ident_continuation + && is_raw_string_open(bytes, i) + { + let open_quote = bytes[i..].iter().position(|&c| c == b'"').unwrap() + i; + let paren = bytes[open_quote + 1..] + .iter() + .position(|&c| c == b'(') + .unwrap() + + open_quote + + 1; + raw_delim.clear(); + raw_delim.extend_from_slice(&bytes[open_quote + 1..paren]); + state = State::RawString; + i = paren + 1; + at_line_start_in_code = false; + continue; + } + if b == b'"' { + state = State::StringLit; + i += 1; + at_line_start_in_code = false; + continue; + } + if b == b'\'' { + state = State::CharLit; + i += 1; + at_line_start_in_code = false; + continue; + } + if b == b'#' && at_line_start_in_code { + if let Some((inc, consumed)) = try_parse_include(bytes, i, line, line_start) { + out.push(inc); + i += consumed; + at_line_start_in_code = false; + continue; + } + } + if !is_horizontal_ws(b) { + at_line_start_in_code = false; + } + i += 1; + } + } + } + + out +} + +fn is_horizontal_ws(b: u8) -> bool { + b == b' ' || b == b'\t' || b == b'\r' +} + +/// Recognise `R"`, `LR"`, `uR"`, `UR"`, `u8R"` raw-string openers. Caller has +/// already matched the leading byte at index `i`. +fn is_raw_string_open(bytes: &[u8], i: usize) -> bool { + let mut j = i; + if bytes[j] == b'u' && j + 1 < bytes.len() && bytes[j + 1] == b'8' { + j += 2; + } else if matches!(bytes[j], b'L' | b'u' | b'U') { + j += 1; + } + if j >= bytes.len() || bytes[j] != b'R' { + return false; + } + j += 1; + if j >= bytes.len() || bytes[j] != b'"' { + return false; + } + let after_quote = j + 1; + let mut k = after_quote; + while k < bytes.len() && bytes[k] != b'(' && bytes[k] != b'\n' && bytes[k] != b'"' { + k += 1; + } + k < bytes.len() && bytes[k] == b'(' +} + +/// Try to parse a `#include` directive starting at `bytes[hash_pos] = '#'`. +/// Returns `(IncludeRef, bytes_consumed_from_hash_pos)` or `None` if this is +/// some other preprocessor directive. +fn try_parse_include( + bytes: &[u8], + hash_pos: usize, + line: u32, + line_start: usize, +) -> Option<(IncludeRef, usize)> { + let mut p = hash_pos + 1; + while p < bytes.len() && is_horizontal_ws(bytes[p]) { + p += 1; + } + if p + 7 > bytes.len() || &bytes[p..p + 7] != b"include" { + return None; + } + p += 7; + while p < bytes.len() && is_horizontal_ws(bytes[p]) { + p += 1; + } + if p >= bytes.len() { + return None; + } + let (open, close, kind) = match bytes[p] { + b'<' => (b'<', b'>', IncludeKind::Angled), + b'"' => (b'"', b'"', IncludeKind::Quoted), + _ => return None, + }; + let _ = open; + p += 1; + let path_start = p; + while p < bytes.len() && bytes[p] != close && bytes[p] != b'\n' { + p += 1; + } + if p >= bytes.len() || bytes[p] != close { + return None; + } + let path = match std::str::from_utf8(&bytes[path_start..p]) { + Ok(s) => s.to_string(), + Err(_) => return None, + }; + p += 1; + let col = (hash_pos - line_start + 1) as u32; + Some(( + IncludeRef { + path, + kind, + span: Span { line, col }, + }, + p - hash_pos, + )) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn first(refs: &[IncludeRef]) -> &IncludeRef { + refs.first().expect("expected at least one include ref") + } + + #[test] + fn s01_angled() { + let refs = scan("#include "); + assert_eq!(refs.len(), 1); + assert_eq!(first(&refs).path, "stdio.h"); + assert_eq!(first(&refs).kind, IncludeKind::Angled); + } + + #[test] + fn s02_quoted() { + let refs = scan("#include \"foo.h\""); + assert_eq!(refs.len(), 1); + assert_eq!(first(&refs).path, "foo.h"); + assert_eq!(first(&refs).kind, IncludeKind::Quoted); + } + + #[test] + fn s03_leading_ws() { + let refs = scan(" #include "); + assert_eq!(refs.len(), 1); + assert_eq!(first(&refs).path, "a.h"); + } + + #[test] + fn s04_ws_after_hash() { + let refs = scan("# include "); + assert_eq!(refs.len(), 1); + assert_eq!(first(&refs).path, "a.h"); + } + + #[test] + fn s05_path_with_slashes() { + let refs = scan("#include "); + assert_eq!(refs.len(), 1); + assert_eq!(first(&refs).path, "a/b/c.h"); + } + + #[test] + fn s06_trailing_comment_ignored() { + let refs = scan("#include // trailing\n"); + assert_eq!(refs.len(), 1); + assert_eq!(first(&refs).path, "a.h"); + } + + #[test] + fn s07_garbage_after_first_include_does_not_crash() { + let refs = scan("#include \"a.h\" \"b.h\"\n"); + assert_eq!(refs.len(), 1); + assert_eq!(first(&refs).path, "a.h"); + } + + #[test] + fn s10_line_comment_blocks_include() { + let refs = scan("// #include \n"); + assert!(refs.is_empty(), "got {refs:?}"); + } + + #[test] + fn s11_block_comment_blocks_include() { + let refs = scan("/* #include */\n"); + assert!(refs.is_empty(), "got {refs:?}"); + } + + #[test] + fn s12_multiline_block_comment_blocks_include() { + let refs = scan("/*\n#include \n*/\n"); + assert!(refs.is_empty(), "got {refs:?}"); + } + + #[test] + fn s13_string_literal_blocks_include() { + let refs = scan("const char* s = \"#include \";\n"); + assert!(refs.is_empty(), "got {refs:?}"); + } + + #[test] + fn s14_escaped_quotes_in_string_blocks_include() { + let refs = scan("const char* s = \"\\\"#include \\\"\";\n"); + assert!(refs.is_empty(), "got {refs:?}"); + } + + #[test] + fn s15_raw_string_blocks_include() { + let refs = scan("const char* s = R\"(#include )\";\n"); + assert!(refs.is_empty(), "got {refs:?}"); + } + + #[test] + fn s15_raw_string_with_delim_blocks_include() { + let refs = scan("const char* s = R\"DELIM(#include )DELIM\";\n"); + assert!(refs.is_empty(), "got {refs:?}"); + } + + #[test] + fn s16_char_literal_does_not_swallow() { + let refs = scan("char c = '#';\n#include \n"); + assert_eq!(refs.len(), 1); + assert_eq!(first(&refs).path, "a.h"); + } + + #[test] + fn s17_line_comment_then_include() { + let refs = scan("//#include \n#include \n"); + assert_eq!(refs.len(), 1); + assert_eq!(first(&refs).path, "b.h"); + } + + #[test] + fn s20_span_line_after_blank_lines() { + let refs = scan("\n\n#include "); + assert_eq!(first(&refs).span.line, 3); + assert_eq!(first(&refs).span.col, 1); + } + + #[test] + fn s21_span_col_with_indent() { + let refs = scan(" #include "); + assert_eq!(first(&refs).span.line, 1); + assert_eq!(first(&refs).span.col, 3); + } + + #[test] + fn s30_if_zero_branch_still_scanned() { + let refs = scan("#if 0\n#include \n#endif\n"); + assert_eq!(refs.len(), 1); + assert_eq!(first(&refs).path, "a.h"); + } + + #[test] + fn s31_has_include_branch_still_scanned() { + let refs = scan("#ifdef __has_include\n#include \n#endif\n"); + assert_eq!(refs.len(), 1); + } + + #[test] + fn s32_both_branches_scanned() { + let refs = scan("#if defined(X)\n#include \n#else\n#include \n#endif\n"); + assert_eq!(refs.len(), 2); + assert_eq!(refs[0].path, "a.h"); + assert_eq!(refs[1].path, "b.h"); + } + + #[test] + fn ignores_other_directives() { + let refs = scan("#define FOO 1\n#pragma once\n"); + assert!(refs.is_empty()); + } + + #[test] + fn handles_crlf_line_endings() { + let refs = scan("#include \r\n#include \r\n"); + assert_eq!(refs.len(), 2); + assert_eq!(refs[0].span.line, 1); + assert_eq!(refs[1].span.line, 2); + } + + #[test] + fn does_not_panic_on_unterminated_block_comment() { + let _ = scan("/* unterminated"); + } + + #[test] + fn does_not_panic_on_unterminated_string() { + let _ = scan("const char* s = \"unterminated"); + } + + #[test] + fn does_not_panic_on_unterminated_raw_string() { + let _ = scan("const char* s = R\"DELIM(unterminated"); + } + + #[test] + fn identifier_ending_in_r_does_not_start_raw_string() { + // `FooR` ends in `R` but is an identifier — the next `R"(` must NOT + // be treated as the opener of a raw string. If it were, the scanner + // would consume into RawString state and silently swallow the + // `#include` on the following line — a false negative the module + // contract forbids. + let refs = scan("auto FooR = 0;\n#include \n"); + assert_eq!(refs.len(), 1); + assert_eq!(refs[0].path, "a.h"); + } + + #[test] + fn identifier_ending_in_lr_does_not_start_wide_raw_string() { + // `FooL` precedes `R"(` — the `L` is part of the identifier, not the + // wide-string prefix. Must NOT enter RawString state. + let refs = scan("auto FooL = 0;\n#include \n"); + assert_eq!(refs.len(), 1); + assert_eq!(refs[0].path, "a.h"); + } + + #[test] + fn identifier_ending_in_lower_u_r_does_not_start_raw_string() { + let refs = scan("auto Foou = 0;\n#include \n"); + assert_eq!(refs.len(), 1); + assert_eq!(refs[0].path, "a.h"); + } + + #[test] + fn identifier_ending_in_upper_u_r_does_not_start_raw_string() { + let refs = scan("auto FooU = 0;\n#include \n"); + assert_eq!(refs.len(), 1); + assert_eq!(refs[0].path, "a.h"); + } + + #[test] + fn underscore_before_raw_prefix_blocks_detection() { + // `_R"(...)"` is identifier-continuation; must not start a raw + // string. Critical for code that uses `_R` as a translation macro + // name (common in i18n shims). + let refs = scan("foo_R = 0;\n#include \n"); + assert_eq!(refs.len(), 1); + } + + #[test] + fn digit_before_raw_prefix_blocks_detection() { + // Numbers can appear in identifiers; `foo1R` must not start a raw + // string. + let refs = scan("foo1R = 0;\n#include \n"); + assert_eq!(refs.len(), 1); + } + + #[test] + fn whitespace_before_raw_prefix_starts_raw_string() { + // Positive control — make sure we didn't break legitimate raw + // strings preceded by whitespace. + let refs = scan("auto x = R\"(#include )\";\n#include \n"); + assert_eq!(refs.len(), 1); + assert_eq!(refs[0].path, "a.h"); + } + + #[test] + fn start_of_file_raw_string_still_detected() { + // Boundary case: `R"(...)"` at byte 0 has no previous byte; + // `i > 0` clause must short-circuit and allow detection. + let refs = scan("R\"(#include )\"\n#include \n"); + assert_eq!(refs.len(), 1); + assert_eq!(refs[0].path, "a.h"); + } + + #[test] + fn punctuation_before_raw_prefix_starts_raw_string() { + // `=R"(...)"` — `=` is non-identifier; must enter raw-string state + // and swallow the embedded `#include`. + let refs = scan("auto x =R\"(#include )\";\n#include \n"); + assert_eq!(refs.len(), 1); + assert_eq!(refs[0].path, "a.h"); + } + + #[test] + fn paren_before_raw_prefix_starts_raw_string() { + // `(R"(...)"` — `(` is non-identifier. + let refs = scan("foo(R\"(#include )\");\n#include \n"); + assert_eq!(refs.len(), 1); + assert_eq!(refs[0].path, "a.h"); + } + + #[test] + fn many_includes_in_one_file() { + // Adversary: pile of includes interspersed with comments and + // strings. Confirm count + order are stable. + let src = "// header\n\ + #include \n\ + const char* s = \"#include \";\n\ + #include \"b.h\"\n\ + /* block\n\ + #include \n\ + */\n\ + #include \n"; + let refs = scan(src); + assert_eq!(refs.len(), 3); + assert_eq!(refs[0].path, "a.h"); + assert_eq!(refs[1].path, "b.h"); + assert_eq!(refs[2].path, "c.h"); + } + + #[test] + fn empty_input_returns_empty() { + assert!(scan("").is_empty()); + } + + #[test] + fn lone_hash_does_not_panic() { + let _ = scan("#"); + } + + #[test] + fn hash_then_eof_does_not_panic() { + let _ = scan("#include"); + } + + #[test] + fn null_bytes_do_not_panic() { + // Adversary: embedded NUL inside source. Real toolchains reject + // these but the scanner must not crash. + let _ = scan("foo\0bar\n#include \n"); + } + + #[test] + fn very_long_line_does_not_panic() { + // 64 KB single line. + let mut s = String::from("// "); + s.push_str(&"x".repeat(64 * 1024)); + s.push('\n'); + s.push_str("#include \n"); + let refs = scan(&s); + assert_eq!(refs.len(), 1); + } + + #[test] + fn deeply_nested_block_comments_do_not_panic() { + // C/C++ block comments don't nest, but we still shouldn't choke on + // pathological input. + let s = "/* /* /* */\n#include \n"; + let refs = scan(s); + // After the first `*/`, we're back in code state, so the include + // must be picked up. + assert_eq!(refs.len(), 1); + } +} diff --git a/crates/fbuild-header-scan/src/walker.rs b/crates/fbuild-header-scan/src/walker.rs new file mode 100644 index 00000000..0ec14bb2 --- /dev/null +++ b/crates/fbuild-header-scan/src/walker.rs @@ -0,0 +1,237 @@ +//! Transitive include-graph walker. +//! +//! Given a set of seed source files and an ordered list of search paths, walks +//! every reachable `#include` and returns the set of resolved files (sorted) +//! plus the set of include strings that could not be resolved. The walker is +//! BFS over a visited set so cycles, diamonds, and arbitrary depth all +//! terminate correctly. + +use std::collections::{BTreeSet, HashSet, VecDeque}; +use std::path::{Path, PathBuf}; + +use crate::scanner::{scan, IncludeKind, IncludeRef}; + +/// Result of a walk. `reached` and `unresolved` are sorted for deterministic +/// cache keys. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct WalkResult { + pub reached: Vec, + pub unresolved: Vec, +} + +/// Walk the include graph starting from `seeds` over `search_paths`. +/// +/// `search_paths` is consulted in order for `<...>` includes and as a +/// secondary lookup for `"..."` includes (after the same-directory check). +/// A file is added to `reached` exactly once. Files outside `search_paths` +/// are still reached if they are seeds or `"..."`-resolved relative to a +/// seed/visited file. +pub fn walk(seeds: &[PathBuf], search_paths: &[PathBuf]) -> WalkResult { + let mut reached: BTreeSet = BTreeSet::new(); + let mut unresolved: BTreeSet = BTreeSet::new(); + let mut visited: HashSet = HashSet::new(); + let mut queue: VecDeque = VecDeque::new(); + + for seed in seeds { + let canon = canon(seed); + if visited.insert(canon.clone()) { + queue.push_back(canon.clone()); + reached.insert(canon); + } + } + + while let Some(file) = queue.pop_front() { + let Ok(text) = std::fs::read_to_string(&file) else { + continue; + }; + for inc in scan(&text) { + match resolve(&inc, &file, search_paths) { + Some(resolved) => { + let canon = canon(&resolved); + if visited.insert(canon.clone()) { + reached.insert(canon.clone()); + queue.push_back(canon); + } + } + None => { + unresolved.insert(inc.path.clone()); + } + } + } + } + + WalkResult { + reached: reached.into_iter().collect(), + unresolved: unresolved.into_iter().collect(), + } +} + +fn resolve(inc: &IncludeRef, from: &Path, search_paths: &[PathBuf]) -> Option { + if inc.kind == IncludeKind::Quoted { + if let Some(parent) = from.parent() { + let candidate = parent.join(&inc.path); + if candidate.is_file() { + return Some(candidate); + } + } + } + for sp in search_paths { + let candidate = sp.join(&inc.path); + if candidate.is_file() { + return Some(candidate); + } + } + None +} + +fn canon(p: &Path) -> PathBuf { + std::fs::canonicalize(p).unwrap_or_else(|_| p.to_path_buf()) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + fn write(path: &Path, contents: &str) { + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent).unwrap(); + } + std::fs::write(path, contents).unwrap(); + } + + #[test] + fn w01_quoted_resolves_same_dir_first() { + let tmp = TempDir::new().unwrap(); + let main = tmp.path().join("main.cpp"); + let local = tmp.path().join("foo.h"); + let other = tmp.path().join("other").join("foo.h"); + write(&main, "#include \"foo.h\"\n"); + write(&local, "// local\n"); + write(&other, "// other\n"); + + let res = walk(std::slice::from_ref(&main), &[tmp.path().join("other")]); + assert!( + res.reached + .iter() + .any(|p| p.ends_with("foo.h") && !p.starts_with(tmp.path().join("other"))), + "expected local foo.h, got: {:?}", + res.reached + ); + } + + #[test] + fn w02_angled_skips_same_dir() { + let tmp = TempDir::new().unwrap(); + let main = tmp.path().join("main.cpp"); + let local = tmp.path().join("foo.h"); + let other_dir = tmp.path().join("other"); + let other = other_dir.join("foo.h"); + write(&main, "#include \n"); + write(&local, "// local\n"); + write(&other, "// other\n"); + + let res = walk( + std::slice::from_ref(&main), + std::slice::from_ref(&other_dir), + ); + let canon_other = std::fs::canonicalize(&other).unwrap(); + assert!( + res.reached.contains(&canon_other), + "expected angled to resolve via search path, got: {:?}", + res.reached + ); + } + + #[test] + fn w03_search_path_precedence_first_hit_wins() { + let tmp = TempDir::new().unwrap(); + let main = tmp.path().join("main.cpp"); + let a = tmp.path().join("a"); + let b = tmp.path().join("b"); + write(&a.join("dup.h"), "// a\n"); + write(&b.join("dup.h"), "// b\n"); + write(&main, "#include \n"); + + let res = walk(std::slice::from_ref(&main), &[a.clone(), b.clone()]); + let canon_a = std::fs::canonicalize(a.join("dup.h")).unwrap(); + assert!(res.reached.contains(&canon_a)); + } + + #[test] + fn w04_missing_header_goes_to_unresolved() { + let tmp = TempDir::new().unwrap(); + let main = tmp.path().join("main.cpp"); + write(&main, "#include \n"); + let res = walk(std::slice::from_ref(&main), &[]); + assert!(res.unresolved.iter().any(|s| s == "does_not_exist.h")); + } + + #[test] + fn w10_cycle_terminates() { + let tmp = TempDir::new().unwrap(); + let a = tmp.path().join("a.h"); + let b = tmp.path().join("b.h"); + write(&a, "#include \"b.h\"\n"); + write(&b, "#include \"a.h\"\n"); + + let res = walk(std::slice::from_ref(&a), &[]); + let ca = std::fs::canonicalize(&a).unwrap(); + let cb = std::fs::canonicalize(&b).unwrap(); + assert!(res.reached.contains(&ca)); + assert!(res.reached.contains(&cb)); + } + + #[test] + fn w11_diamond_dedupes() { + let tmp = TempDir::new().unwrap(); + let main = tmp.path().join("main.cpp"); + let a = tmp.path().join("a.h"); + let b = tmp.path().join("b.h"); + let common = tmp.path().join("common.h"); + write(&main, "#include \"a.h\"\n#include \"b.h\"\n"); + write(&a, "#include \"common.h\"\n"); + write(&b, "#include \"common.h\"\n"); + write(&common, "// common\n"); + + let res = walk(std::slice::from_ref(&main), &[]); + let cc = std::fs::canonicalize(&common).unwrap(); + let count = res.reached.iter().filter(|p| **p == cc).count(); + assert_eq!(count, 1); + } + + #[test] + fn w12_depth_5_chain() { + let tmp = TempDir::new().unwrap(); + for i in 1..=5 { + let next = if i == 5 { + String::new() + } else { + format!("#include \"h{}.h\"\n", i + 1) + }; + write(&tmp.path().join(format!("h{}.h", i)), &next); + } + let main = tmp.path().join("main.cpp"); + write(&main, "#include \"h1.h\"\n"); + let res = walk(std::slice::from_ref(&main), &[]); + for i in 1..=5 { + let p = std::fs::canonicalize(tmp.path().join(format!("h{}.h", i))).unwrap(); + assert!(res.reached.contains(&p), "missing h{}.h", i); + } + } + + #[test] + fn w20_deterministic_order() { + let tmp = TempDir::new().unwrap(); + let main = tmp.path().join("main.cpp"); + let z = tmp.path().join("z.h"); + let a = tmp.path().join("a.h"); + write(&z, ""); + write(&a, ""); + write(&main, "#include \"z.h\"\n#include \"a.h\"\n"); + let seeds = std::slice::from_ref(&main); + let r1 = walk(seeds, &[]); + let r2 = walk(seeds, &[]); + assert_eq!(r1, r2); + } +} diff --git a/crates/fbuild-library-select/Cargo.toml b/crates/fbuild-library-select/Cargo.toml new file mode 100644 index 00000000..ad6d8ad5 --- /dev/null +++ b/crates/fbuild-library-select/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "fbuild-library-select" +description = "PlatformIO-LDF-style library resolver using the fbuild header scanner" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true + +[dependencies] +fbuild-header-scan = { path = "../fbuild-header-scan" } +fbuild-packages = { path = "../fbuild-packages" } +tracing = { workspace = true } +walkdir = { workspace = true } + +[dev-dependencies] +tempfile = { workspace = true } diff --git a/crates/fbuild-library-select/README.md b/crates/fbuild-library-select/README.md new file mode 100644 index 00000000..8c6ceebd --- /dev/null +++ b/crates/fbuild-library-select/README.md @@ -0,0 +1,22 @@ +# fbuild-library-select + +PlatformIO-LDF-style library resolver. Given a set of project seed sources, a +list of framework libraries, and the project's include roots, it returns the +set of framework libraries transitively reachable from the seeds plus the +compile-set for each selected library. + +The resolver uses path-prefix attribution (PlatformIO's `search_deps_recursive` +semantics, not basename matching): each `#include` is first resolved to an +absolute path via the walker, then attributed to whichever library's +`include_dirs` contain the resolved path as a prefix. This handles Teensyduino +/ STM32duino / Arduino layouts uniformly. + +Convergence is two-pass: + +1. BFS from project seeds. Any library whose header is reached is marked + dependent and its other headers are enqueued. +2. One reconciliation pass over each dependent library's full source set to + catch anything the header-only pass missed. + +This is exactly what PlatformIO LDF chain mode does, just without the Python +overhead. diff --git a/crates/fbuild-library-select/src/README.md b/crates/fbuild-library-select/src/README.md new file mode 100644 index 00000000..0f32f03f --- /dev/null +++ b/crates/fbuild-library-select/src/README.md @@ -0,0 +1,3 @@ +# fbuild-library-select sources + +- `lib.rs` — public `Selection` type and `resolve()` entry point. diff --git a/crates/fbuild-library-select/src/lib.rs b/crates/fbuild-library-select/src/lib.rs new file mode 100644 index 00000000..c9a70c1e --- /dev/null +++ b/crates/fbuild-library-select/src/lib.rs @@ -0,0 +1,418 @@ +//! PlatformIO-LDF-style library resolver. +//! +//! Given a set of seed source files (the project's `src/`, `lib/`, `include/` +//! trees), a list of discovered framework libraries, and the project's include +//! roots, `resolve()` returns the set of framework libraries transitively +//! reachable from the seeds plus the compile-set for each selected library. +//! +//! Attribution is by path-prefix: each `#include` is resolved to an absolute +//! path via the walker, then attributed to whichever library's `include_dirs` +//! contain the resolved path as a prefix. No basename-only matching, no +//! filesystem globbing of `.h` files, no mystery overlaps. +//! +//! Convergence is PlatformIO's 2-pass LDF chain: +//! 1. BFS from project seeds. Any library whose include dir contains the +//! resolved path is marked dependent. +//! 2. Reconciliation: re-walk each dependent library's full source set to +//! catch anything the header-only pass missed. Libraries newly reached in +//! pass 2 are also marked dependent. + +use std::collections::{BTreeMap, BTreeSet}; +use std::path::{Path, PathBuf}; + +use fbuild_header_scan::walk; +use fbuild_packages::library::FrameworkLibrary; + +/// Resolved library selection plus the transitive include closure. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct Selection { + /// Canonicalized paths of every file reached by the walker. + pub included_files: Vec, + /// Names of framework libraries whose headers were reached, sorted + /// lexicographically and deduplicated. The sort is intentional so the + /// value is a pure function of the *set* of libraries reached, not their + /// position in the input slice — required for stable cache keys. + pub required_libraries: Vec, + /// Source files to compile (sorted, deduped). + pub source_files: Vec, + /// Include dirs to pass to the compiler (sorted, deduped). + pub include_dirs: Vec, + /// Include strings the walker could not resolve (sorted, deduped). + pub unresolved: Vec, +} + +/// Resolve the transitive library selection for a project. +/// +/// `seeds` are the source files to walk from (sketch, project `src/`, +/// `include/`, `lib/` trees). +/// `project_search_paths` are the project's own include roots — consulted for +/// `<...>` includes before framework libs. +/// `libraries` is the full set of framework libraries discovered under the +/// framework's `libraries/` dir. +pub fn resolve( + seeds: &[PathBuf], + project_search_paths: &[PathBuf], + libraries: &[FrameworkLibrary], +) -> Selection { + let mut selected: BTreeSet = BTreeSet::new(); + let mut all_included: BTreeSet = BTreeSet::new(); + let mut all_unresolved: BTreeSet = BTreeSet::new(); + + let canon_lib_dirs: Vec> = libraries + .iter() + .map(|lib| lib.include_dirs.iter().map(|d| canon(d)).collect()) + .collect(); + + // The walker's search paths include the project's include roots first, then + // every framework library's include dirs. A reached path is attributed to a + // library by prefix match, not by which search-path entry matched it — PIO's + // `search_deps_recursive` semantics. Having all lib include dirs present + // from the start means pass 1's BFS naturally traverses lib-to-lib edges. + let mut full_search_paths: Vec = project_search_paths.to_vec(); + for lib in libraries { + for d in &lib.include_dirs { + if !full_search_paths.contains(d) { + full_search_paths.push(d.clone()); + } + } + } + + // Pass 1: BFS from project seeds. + let res = walk(seeds, &full_search_paths); + for p in &res.reached { + all_included.insert(p.clone()); + } + for u in &res.unresolved { + all_unresolved.insert(u.clone()); + } + for (idx, dirs) in canon_lib_dirs.iter().enumerate() { + if res.reached.iter().any(|p| path_in_any(p, dirs)) { + selected.insert(idx); + } + } + + // Pass 2: reconciliation. Re-walk with each selected library's full source + // set as seeds, in case a lib-to-lib dep is only visible through a `.cpp` + // (not a header). Keeps iterating until the selection stabilizes, which for + // realistic Arduino-library graphs is 1–2 rounds. + loop { + let mut recon_seeds: Vec = seeds.to_vec(); + for idx in &selected { + for src in &libraries[*idx].source_files { + recon_seeds.push(src.clone()); + } + } + let res = walk(&recon_seeds, &full_search_paths); + for p in &res.reached { + all_included.insert(p.clone()); + } + for u in &res.unresolved { + all_unresolved.insert(u.clone()); + } + let before = selected.len(); + for (idx, dirs) in canon_lib_dirs.iter().enumerate() { + if selected.contains(&idx) { + continue; + } + if res.reached.iter().any(|p| path_in_any(p, dirs)) { + selected.insert(idx); + } + } + if selected.len() == before { + break; + } + } + + let mut required_libraries: Vec = Vec::new(); + let mut source_files: BTreeSet = BTreeSet::new(); + let mut include_dirs: BTreeMap = BTreeMap::new(); + for idx in &selected { + let lib = &libraries[*idx]; + required_libraries.push(lib.name.clone()); + for s in &lib.source_files { + source_files.insert(s.clone()); + } + for d in &lib.include_dirs { + include_dirs.insert(d.clone(), ()); + } + } + // Sort by name so the output is a deterministic function of the input + // *set* of libraries rather than their input order — required for stable + // cache keys in #205 Phase 4. + required_libraries.sort(); + required_libraries.dedup(); + + Selection { + included_files: all_included.into_iter().collect(), + required_libraries, + source_files: source_files.into_iter().collect(), + include_dirs: include_dirs.into_keys().collect(), + unresolved: all_unresolved.into_iter().collect(), + } +} + +fn canon(p: &Path) -> PathBuf { + match std::fs::canonicalize(p) { + Ok(c) => c, + Err(err) => { + // The walker canonicalizes every reached path, so an + // un-canonicalized library include dir won't `starts_with`-match + // anything on macOS (`/var` vs `/private/var`) or Windows (`\\?\` + // vs plain). Warn loudly so a missing/relocated framework install + // shows up in logs instead of as a silent "library not selected" + // false negative at link time. + tracing::warn!( + path = %p.display(), + error = %err, + "fbuild-library-select: failed to canonicalize path; \ + prefix-attribution may miss this directory" + ); + p.to_path_buf() + } + } +} + +fn path_in_any(path: &Path, dirs: &[PathBuf]) -> bool { + dirs.iter().any(|d| path.starts_with(d)) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + fn write(path: &Path, contents: &str) { + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent).unwrap(); + } + std::fs::write(path, contents).unwrap(); + } + + fn lib(tmp: &Path, name: &str) -> FrameworkLibrary { + let dir = tmp.join("libraries").join(name); + let src = dir.join("src"); + std::fs::create_dir_all(&src).unwrap(); + FrameworkLibrary { + name: name.to_string(), + dir: dir.clone(), + include_dirs: vec![src.clone()], + source_files: Vec::new(), + } + } + + #[test] + fn r01_direct_include_selects_library() { + let tmp = TempDir::new().unwrap(); + let project_src = tmp.path().join("project").join("src"); + write(&project_src.join("main.cpp"), "#include \n"); + let mut spi = lib(tmp.path(), "SPI"); + write(&spi.include_dirs[0].join("SPI.h"), ""); + let spi_cpp = spi.include_dirs[0].join("SPI.cpp"); + write(&spi_cpp, ""); + spi.source_files.push(spi_cpp.clone()); + + let seeds = vec![project_src.join("main.cpp")]; + let sel = resolve(&seeds, &[project_src], &[spi]); + assert_eq!(sel.required_libraries, vec!["SPI".to_string()]); + assert!(sel.source_files.contains(&canon(&spi_cpp)) || sel.source_files.contains(&spi_cpp)); + } + + #[test] + fn r02_transitive_library_selection() { + let tmp = TempDir::new().unwrap(); + let project_src = tmp.path().join("project").join("src"); + write(&project_src.join("main.cpp"), "#include \n"); + + let mut spi = lib(tmp.path(), "SPI"); + write(&spi.include_dirs[0].join("SPI.h"), "#include \n"); + let spi_cpp = spi.include_dirs[0].join("SPI.cpp"); + write(&spi_cpp, ""); + spi.source_files.push(spi_cpp); + + let mut wire = lib(tmp.path(), "Wire"); + write(&wire.include_dirs[0].join("Wire.h"), ""); + let wire_cpp = wire.include_dirs[0].join("Wire.cpp"); + write(&wire_cpp, ""); + wire.source_files.push(wire_cpp); + + let seeds = vec![project_src.join("main.cpp")]; + let sel = resolve(&seeds, &[project_src], &[spi, wire]); + assert_eq!( + sel.required_libraries, + vec!["SPI".to_string(), "Wire".to_string()] + ); + } + + #[test] + fn r03_no_includes_selects_nothing() { + let tmp = TempDir::new().unwrap(); + let project_src = tmp.path().join("project").join("src"); + write(&project_src.join("main.cpp"), "int main() { return 0; }\n"); + let spi = lib(tmp.path(), "SPI"); + write(&spi.include_dirs[0].join("SPI.h"), ""); + + let seeds = vec![project_src.join("main.cpp")]; + let sel = resolve(&seeds, &[project_src], &[spi]); + assert!(sel.required_libraries.is_empty()); + } + + #[test] + fn r13_unrelated_library_not_selected() { + let tmp = TempDir::new().unwrap(); + let project_src = tmp.path().join("project").join("src"); + write(&project_src.join("main.cpp"), "#include \n"); + + let mut spi = lib(tmp.path(), "SPI"); + write(&spi.include_dirs[0].join("SPI.h"), ""); + let spi_cpp = spi.include_dirs[0].join("SPI.cpp"); + write(&spi_cpp, ""); + spi.source_files.push(spi_cpp); + + let mut fnet = lib(tmp.path(), "FNET"); + write(&fnet.include_dirs[0].join("fnet.h"), ""); + let fnet_cpp = fnet.include_dirs[0].join("fnet.cpp"); + write(&fnet_cpp, ""); + fnet.source_files.push(fnet_cpp); + + let seeds = vec![project_src.join("main.cpp")]; + let sel = resolve(&seeds, &[project_src], &[spi, fnet]); + assert_eq!(sel.required_libraries, vec!["SPI".to_string()]); + } + + #[test] + fn path_prefix_attribution_distinguishes_same_basename() { + let tmp = TempDir::new().unwrap(); + let project_src = tmp.path().join("project").join("src"); + write(&project_src.join("main.cpp"), "#include \"foo/config.h\"\n"); + + let mut foo = lib(tmp.path(), "Foo"); + write(&foo.include_dirs[0].join("foo").join("config.h"), ""); + let foo_cpp = foo.include_dirs[0].join("Foo.cpp"); + write(&foo_cpp, ""); + foo.source_files.push(foo_cpp); + + let mut bar = lib(tmp.path(), "Bar"); + // Bar also has a config.h but at its own path — must NOT be selected + // when the project only includes "foo/config.h". + write(&bar.include_dirs[0].join("bar").join("config.h"), ""); + let bar_cpp = bar.include_dirs[0].join("Bar.cpp"); + write(&bar_cpp, ""); + bar.source_files.push(bar_cpp); + + let seeds = vec![project_src.join("main.cpp")]; + let sel = resolve( + &seeds, + &[ + project_src, + foo.include_dirs[0].clone(), + bar.include_dirs[0].clone(), + ], + &[foo, bar], + ); + assert_eq!(sel.required_libraries, vec!["Foo".to_string()]); + } + + #[test] + fn empty_libraries_yields_empty_selection() { + // Adversary: no libraries at all. resolve must terminate cleanly with + // no required_libraries, no panics, and any reached files limited to + // what the walker found from seeds alone. + let tmp = TempDir::new().unwrap(); + let project_src = tmp.path().join("project").join("src"); + write(&project_src.join("main.cpp"), "int main() { return 0; }\n"); + let seeds = vec![project_src.join("main.cpp")]; + let sel = resolve(&seeds, &[project_src], &[]); + assert!(sel.required_libraries.is_empty()); + assert!(sel.source_files.is_empty()); + } + + #[test] + fn missing_library_include_dir_does_not_panic() { + // Adversary: a FrameworkLibrary whose include_dirs point at a path + // that doesn't exist on disk (broken framework install, lib not yet + // downloaded). canon() falls back and emits a tracing::warn; the + // resolver must not panic and must return a sensible empty + // selection. + let tmp = TempDir::new().unwrap(); + let project_src = tmp.path().join("project").join("src"); + write(&project_src.join("main.cpp"), "int main() { return 0; }\n"); + let phantom = FrameworkLibrary { + name: "Phantom".to_string(), + dir: tmp.path().join("nonexistent").join("Phantom"), + include_dirs: vec![tmp.path().join("nonexistent").join("Phantom").join("src")], + source_files: Vec::new(), + }; + let seeds = vec![project_src.join("main.cpp")]; + let sel = resolve(&seeds, &[project_src], &[phantom]); + assert!(sel.required_libraries.is_empty()); + } + + #[test] + fn many_libraries_in_random_order_returns_sorted() { + // Adversary: 6 libs in deliberately scrambled input order. The + // output must be sorted lexicographically, independent of input + // order — required for stable cache keys (#205 Phase 4). + let tmp = TempDir::new().unwrap(); + let project_src = tmp.path().join("project").join("src"); + write( + &project_src.join("main.cpp"), + "#include \n#include \n#include \n\ + #include \n#include \n#include \n", + ); + + let mut libs = Vec::new(); + for name in ["Z", "A", "M", "B", "Y", "K"] { + let mut l = lib(tmp.path(), name); + write(&l.include_dirs[0].join(format!("{name}.h")), ""); + let cpp = l.include_dirs[0].join(format!("{name}.cpp")); + write(&cpp, ""); + l.source_files.push(cpp); + libs.push(l); + } + + let seeds = vec![project_src.join("main.cpp")]; + let sel = resolve(&seeds, &[project_src], &libs); + assert_eq!( + sel.required_libraries, + ["A", "B", "K", "M", "Y", "Z"] + .iter() + .map(|s| s.to_string()) + .collect::>() + ); + } + + #[test] + fn required_libraries_returned_sorted_by_name_not_input_order() { + // Regression guard: pass the libraries in REVERSE name order (Wire + // before SPI) and confirm the output is sorted lexicographically. + // The doc on `Selection::required_libraries` and the cache-key story + // in #205 Phase 4 both depend on this being a pure function of the + // selected *set* of libraries, not their input position. + let tmp = TempDir::new().unwrap(); + let project_src = tmp.path().join("project").join("src"); + write( + &project_src.join("main.cpp"), + "#include \n#include \n", + ); + + let mut spi = lib(tmp.path(), "SPI"); + write(&spi.include_dirs[0].join("SPI.h"), ""); + let spi_cpp = spi.include_dirs[0].join("SPI.cpp"); + write(&spi_cpp, ""); + spi.source_files.push(spi_cpp); + + let mut wire = lib(tmp.path(), "Wire"); + write(&wire.include_dirs[0].join("Wire.h"), ""); + let wire_cpp = wire.include_dirs[0].join("Wire.cpp"); + write(&wire_cpp, ""); + wire.source_files.push(wire_cpp); + + let seeds = vec![project_src.join("main.cpp")]; + // Wire is passed BEFORE SPI in the input slice. + let sel = resolve(&seeds, &[project_src], &[wire, spi]); + assert_eq!( + sel.required_libraries, + vec!["SPI".to_string(), "Wire".to_string()] + ); + } +} diff --git a/crates/fbuild-test-support/Cargo.toml b/crates/fbuild-test-support/Cargo.toml index c686207f..4eea054f 100644 --- a/crates/fbuild-test-support/Cargo.toml +++ b/crates/fbuild-test-support/Cargo.toml @@ -7,7 +7,14 @@ rust-version.workspace = true license.workspace = true [dependencies] +object = { workspace = true } +serde_json = { workspace = true } +shell-words = { workspace = true } tempfile = { workspace = true } +thiserror = { workspace = true } tracing = { workspace = true } tracing-subscriber = { workspace = true } tokio = { workspace = true } +fbuild-packages = { path = "../fbuild-packages" } +fbuild-header-scan = { path = "../fbuild-header-scan" } +fbuild-library-select = { path = "../fbuild-library-select" } diff --git a/crates/fbuild-test-support/README.md b/crates/fbuild-test-support/README.md index 274bd57b..fb912d96 100644 --- a/crates/fbuild-test-support/README.md +++ b/crates/fbuild-test-support/README.md @@ -6,6 +6,100 @@ Test utilities and fixtures for fbuild workspace crates. - `create_test_project(env_name, platform, board)` -- Creates a `tempfile::TempDir` containing a minimal `platformio.ini`, `src/` directory, and `src/main.cpp` with Arduino stubs (`setup`/`loop`) +## CompileDb + +`CompileDb` parses clangd-style `compile_commands.json` files for use in +acceptance tests that need to assert properties of a build's translation +units (TU count, presence of files under specific subtrees, etc.). The +spec is documented at +. + +```rust +use fbuild_test_support::CompileDb; + +let db = CompileDb::from_path(".fbuild/compile_commands.json")?; + +// Bound TU count (acceptance probes A-20..A-22 for issue #205). +assert!(db.tu_count() <= 250); + +// Assert no compile-DB entries point inside forbidden subtrees. +let leaks = db.forbidden_present(&["FNET", "Snooze", "RadioHead", "mbedtls"]); +assert!(leaks.is_empty(), "unexpected libraries compiled: {leaks:?}"); + +// Drill into matching entries when an assertion fails. +for e in db.entries_matching("FNET") { + eprintln!("FNET TU still in DB: {}", e.file.display()); +} +# Ok::<(), Box>(()) +``` + +Both forms of the spec are accepted: + +- `"arguments": [...]` -- taken verbatim. +- `"command": "..."` -- tokenized via the `shell-words` crate (POSIX `sh` + rules: single quotes, double quotes, backslash escapes). + +When both fields are present, `arguments` wins. Relative `file` and +`output` paths are joined onto `directory` (no canonicalization, since +the source files may not exist on test runners). + +## MiniFramework + +`MiniFramework` is a fluent builder that materializes a fake Teensyduino / +STM32duino / Arduino framework tree under a fresh `tempfile::TempDir`. The +on-disk layout matches what +`fbuild_packages::library::framework_library::discover_framework_libraries` +expects, so anything you build with `MiniFramework` round-trips through the +production walker and the LDF-style resolver in `fbuild-library-select`. + +Layout: + +```text +/framework/ + libraries/ + SPI/src/SPI.h +/project/ + src/ + include/ (created on demand) +``` + +### API + +- `MiniFramework::new()` — create the tree. +- `add_library(name)` — eagerly creates `libraries//src/.h` + (empty by default) and returns a `LibraryBuilder`. +- `LibraryBuilder` chain: `.header(s)`, `.cpp(s)`, `.extra(rel, s)`, + `.example(rel, s)`, `.extras(rel, s)`, `.tests(rel, s)`, `.done()`. +- `add_project_source(rel, s)` / `add_project_include(rel, s)` / + `sketch(s)` — write project files under `src/` or `include/`. +- `project_seeds()` — every `.c/.cpp/.cc/.cxx/.s` under `project/src/**`, + sorted, suitable as walker seeds. +- `project_search_paths()` — `[include, src]` in PIO order; `include/` + appears only when populated. +- `libraries_dir()`, `project_root()`, `framework_root()`, `project_src()` + for downstream APIs. + +### Example + +```rust +use fbuild_test_support::MiniFramework; +use fbuild_packages::library::framework_library::discover_framework_libraries; +use fbuild_library_select::resolve; + +let mut fx = MiniFramework::new(); +fx.add_library("SPI").cpp("// impl\n").done(); +fx.add_library("Wire").cpp("// wire\n").done(); +fx.sketch("#include \nvoid setup() {}\nvoid loop() {}\n"); + +let libs = discover_framework_libraries(&fx.libraries_dir()); +let sel = resolve(&fx.project_seeds(), &fx.project_search_paths(), &libs); +assert_eq!(sel.required_libraries, vec!["SPI".to_string()]); +``` + +The `example()` / `extras()` / `tests()` builder methods exist as fodder for +regression tests that prove `collect_library_sources` excludes those +subtrees. + ## Usage Used by other crates as a `[dev-dependencies]` entry to get realistic temporary project directories for integration tests without manual setup. diff --git a/crates/fbuild-test-support/src/compile_db.rs b/crates/fbuild-test-support/src/compile_db.rs new file mode 100644 index 00000000..20756780 --- /dev/null +++ b/crates/fbuild-test-support/src/compile_db.rs @@ -0,0 +1,365 @@ +//! Parser for clangd-style `compile_commands.json` files. +//! +//! See for the +//! format spec. Each entry is an object with required `directory` and `file` +//! fields and one of `command` (a single shell-quoted string) or `arguments` +//! (a JSON array of pre-tokenized argv). An optional `output` field names the +//! emitted artifact. +//! +//! This module is intentionally small: it powers test-suite assertions over +//! the contents of `.fbuild/compile_commands.json` (TU counts, presence of +//! files under specific subtrees, etc.) without forcing every test to +//! reimplement JSON walking. + +use std::collections::HashSet; +use std::path::{Path, PathBuf}; + +/// One row from a clangd `compile_commands.json`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CompileEntry { + /// Working directory for the command (absolute, as written in JSON). + pub directory: PathBuf, + /// Source file path. If the JSON value was relative it is joined onto + /// `directory` (without canonicalization, since the file may not exist + /// on test runners). + pub file: PathBuf, + /// Output object path, resolved like `file`. `None` when the JSON entry + /// omits the field. + pub output: Option, + /// Either the parsed `arguments` array verbatim, or `command` split via + /// shell-style tokenization (`shell-words` crate semantics). + pub arguments: Vec, +} + +/// Parsed `compile_commands.json`. +#[derive(Debug, Clone, Default)] +pub struct CompileDb { + entries: Vec, +} + +/// Errors produced by [`CompileDb`] parsing. +#[derive(Debug, thiserror::Error)] +pub enum CompileDbError { + /// I/O failure while reading the file from disk. + #[error("io: {0}")] + Io(#[from] std::io::Error), + /// The bytes were not valid JSON. + #[error("json: {0}")] + Json(#[from] serde_json::Error), + /// The JSON parsed but did not match the compilation-database schema. + #[error("malformed compile_commands.json: {0}")] + Malformed(String), +} + +impl CompileDb { + /// Read and parse a compilation database from disk. + pub fn from_path>(path: P) -> Result { + let text = std::fs::read_to_string(path.as_ref())?; + Self::from_str(&text) + } + + /// Parse a compilation database from an in-memory JSON string. + /// + /// When an entry has both `arguments` and `command`, `arguments` wins + /// (matching clangd behavior). `command` is tokenized with the + /// `shell-words` crate, which handles single quotes, double quotes, and + /// backslash escapes the same way POSIX `sh` does. + #[allow(clippy::should_implement_trait)] // intentional: returns CompileDbError, not FromStr::Err + pub fn from_str(json: &str) -> Result { + let value: serde_json::Value = serde_json::from_str(json)?; + let array = value + .as_array() + .ok_or_else(|| CompileDbError::Malformed("top-level value is not an array".into()))?; + + let mut entries = Vec::with_capacity(array.len()); + for (idx, item) in array.iter().enumerate() { + let obj = item.as_object().ok_or_else(|| { + CompileDbError::Malformed(format!("entry {idx} is not an object")) + })?; + + let directory = obj + .get("directory") + .and_then(serde_json::Value::as_str) + .ok_or_else(|| { + CompileDbError::Malformed(format!( + "entry {idx} missing required string field `directory`" + )) + })?; + let directory = PathBuf::from(directory); + + let file_raw = obj + .get("file") + .and_then(serde_json::Value::as_str) + .ok_or_else(|| { + CompileDbError::Malformed(format!( + "entry {idx} missing required string field `file`" + )) + })?; + let file = resolve(&directory, file_raw); + + let output = match obj.get("output") { + None | Some(serde_json::Value::Null) => None, + Some(v) => { + let s = v.as_str().ok_or_else(|| { + CompileDbError::Malformed(format!( + "entry {idx} field `output` is not a string" + )) + })?; + Some(resolve(&directory, s)) + } + }; + + let arguments = if let Some(arr) = obj.get("arguments") { + let arr = arr.as_array().ok_or_else(|| { + CompileDbError::Malformed(format!( + "entry {idx} field `arguments` is not an array" + )) + })?; + let mut out = Vec::with_capacity(arr.len()); + for (ai, av) in arr.iter().enumerate() { + let s = av.as_str().ok_or_else(|| { + CompileDbError::Malformed(format!( + "entry {idx} arguments[{ai}] is not a string" + )) + })?; + out.push(s.to_owned()); + } + out + } else if let Some(cmd) = obj.get("command") { + let cmd = cmd.as_str().ok_or_else(|| { + CompileDbError::Malformed(format!( + "entry {idx} field `command` is not a string" + )) + })?; + shell_words::split(cmd).map_err(|e| { + CompileDbError::Malformed(format!( + "entry {idx} `command` shell-split failed: {e}" + )) + })? + } else { + return Err(CompileDbError::Malformed(format!( + "entry {idx} requires one of `arguments` or `command`" + ))); + }; + + entries.push(CompileEntry { + directory, + file, + output, + arguments, + }); + } + + Ok(Self { entries }) + } + + /// All parsed entries in source order. + pub fn entries(&self) -> &[CompileEntry] { + &self.entries + } + + /// Number of raw entries (may exceed [`Self::tu_count`] when a build + /// system emits duplicate rows for multi-pass compiles). + pub fn len(&self) -> usize { + self.entries.len() + } + + /// True when there are no entries. + pub fn is_empty(&self) -> bool { + self.entries.is_empty() + } + + /// Set of distinct (resolved) source files referenced by any entry. + pub fn files(&self) -> HashSet { + self.entries.iter().map(|e| e.file.clone()).collect() + } + + /// Translation-unit count: distinct source files. + pub fn tu_count(&self) -> usize { + self.files().len() + } + + /// Iterator over entries whose `file` path string contains `needle`. + /// + /// Useful for assertions like "no entries reference `libraries/FNET/`". + pub fn entries_matching<'a>( + &'a self, + needle: &'a str, + ) -> impl Iterator + 'a { + self.entries + .iter() + .filter(move |e| path_contains(&e.file, needle)) + } + + /// Returns the subset of `needles` that match at least one entry's `file`. + /// + /// Designed for crisp failure messages: an empty result means "clean"; + /// a non-empty result lists exactly which forbidden subtrees leaked in. + pub fn forbidden_present(&self, needles: &[&str]) -> Vec { + needles + .iter() + .filter(|needle| self.entries.iter().any(|e| path_contains(&e.file, needle))) + .map(|s| (*s).to_owned()) + .collect() + } +} + +fn resolve(directory: &Path, raw: &str) -> PathBuf { + let p = Path::new(raw); + if p.is_absolute() { + p.to_path_buf() + } else { + directory.join(p) + } +} + +fn path_contains(path: &Path, needle: &str) -> bool { + // Match against the path's string form. Use `to_string_lossy` so that + // non-UTF-8 paths still produce a best-effort haystack rather than + // silently skipping the entry. + path.to_string_lossy().contains(needle) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn from_str_parses_arguments_form() { + let json = r#"[ + { + "directory": "/work", + "file": "src/main.cpp", + "arguments": ["clang", "-c", "src/main.cpp"] + } + ]"#; + let db = CompileDb::from_str(json).unwrap(); + assert_eq!(db.len(), 1); + let e = &db.entries()[0]; + assert_eq!(e.directory, PathBuf::from("/work")); + assert_eq!(e.file, PathBuf::from("/work").join("src/main.cpp")); + assert_eq!(e.arguments, vec!["clang", "-c", "src/main.cpp"]); + assert_eq!(e.output, None); + } + + #[test] + fn from_str_parses_command_form_with_shell_split() { + let json = r#"[ + { + "directory": "/work", + "file": "src/path with spaces.cpp", + "command": "clang -c -DFOO=1 \"src/path with spaces.cpp\"" + } + ]"#; + let db = CompileDb::from_str(json).unwrap(); + let args = &db.entries()[0].arguments; + assert_eq!( + args, + &vec![ + "clang".to_string(), + "-c".to_string(), + "-DFOO=1".to_string(), + "src/path with spaces.cpp".to_string(), + ] + ); + } + + #[test] + fn from_str_arguments_takes_priority_over_command() { + let json = r#"[ + { + "directory": "/w", + "file": "a.c", + "arguments": ["from-args"], + "command": "from-command should-be-ignored" + } + ]"#; + let db = CompileDb::from_str(json).unwrap(); + assert_eq!(db.entries()[0].arguments, vec!["from-args".to_string()]); + } + + #[test] + fn from_path_reads_file() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("compile_commands.json"); + std::fs::write( + &path, + r#"[{"directory":"/w","file":"a.c","arguments":["cc","a.c"]}]"#, + ) + .unwrap(); + let db = CompileDb::from_path(&path).unwrap(); + assert_eq!(db.len(), 1); + } + + #[test] + fn relative_paths_resolved_against_directory() { + let json = r#"[ + { + "directory": "/work", + "file": "src/main.cpp", + "output": "build/main.o", + "arguments": ["cc"] + } + ]"#; + let db = CompileDb::from_str(json).unwrap(); + let e = &db.entries()[0]; + assert_eq!(e.file, PathBuf::from("/work").join("src/main.cpp")); + assert_eq!(e.output, Some(PathBuf::from("/work").join("build/main.o"))); + } + + #[test] + fn output_is_optional() { + let json = r#"[ + {"directory":"/w","file":"a.c","arguments":["cc","a.c"]} + ]"#; + let db = CompileDb::from_str(json).unwrap(); + assert_eq!(db.entries()[0].output, None); + } + + #[test] + fn tu_count_dedupes_repeated_files() { + let json = r#"[ + {"directory":"/w","file":"a.c","arguments":["cc","-O0","a.c"]}, + {"directory":"/w","file":"a.c","arguments":["cc","-O2","a.c"]} + ]"#; + let db = CompileDb::from_str(json).unwrap(); + assert_eq!(db.len(), 2); + assert_eq!(db.tu_count(), 1); + } + + #[test] + fn entries_matching_filters_by_substring() { + let json = r#"[ + {"directory":"/w","file":"libraries/SPI/SPI.cpp","arguments":["cc"]}, + {"directory":"/w","file":"libraries/FNET/fnet.c","arguments":["cc"]}, + {"directory":"/w","file":"src/main.cpp","arguments":["cc"]} + ]"#; + let db = CompileDb::from_str(json).unwrap(); + let hits: Vec<_> = db.entries_matching("FNET").collect(); + assert_eq!(hits.len(), 1); + assert!(hits[0].file.to_string_lossy().contains("FNET")); + } + + #[test] + fn forbidden_present_returns_only_hit_needles() { + let json = r#"[ + {"directory":"/w","file":"libraries/FNET/fnet.c","arguments":["cc"]} + ]"#; + let db = CompileDb::from_str(json).unwrap(); + let hits = db.forbidden_present(&["FNET", "Snooze"]); + assert_eq!(hits, vec!["FNET".to_string()]); + } + + #[test] + fn malformed_json_returns_json_error() { + let err = CompileDb::from_str("not json").unwrap_err(); + assert!(matches!(err, CompileDbError::Json(_))); + } + + #[test] + fn entry_without_file_field_returns_malformed() { + let json = r#"[{"directory":"/w","arguments":["cc"]}]"#; + let err = CompileDb::from_str(json).unwrap_err(); + assert!(matches!(err, CompileDbError::Malformed(_))); + } +} diff --git a/crates/fbuild-test-support/src/elf_probe.rs b/crates/fbuild-test-support/src/elf_probe.rs new file mode 100644 index 00000000..5be819b8 --- /dev/null +++ b/crates/fbuild-test-support/src/elf_probe.rs @@ -0,0 +1,457 @@ +//! In-process ELF file probe for test assertions. +//! +//! `ElfProbe` reads an ELF binary into memory and exposes section sizes and +//! symbol-table queries without shelling out to `nm`/`size`/`readelf`. It is +//! designed for per-board acceptance tests that assert ELF contents (e.g., +//! `.bss <= 3 KB`, no `fnet_*`/`mbedtls_*` symbols on Blink targets). +//! +//! Parsing uses the [`object`] crate. The probe owns the bytes and re-parses +//! on every call; `object` is zero-copy, so the cost is negligible for tests. +//! +//! # Example +//! +//! ```no_run +//! use fbuild_test_support::ElfProbe; +//! +//! let probe = ElfProbe::open("firmware.elf").unwrap(); +//! assert!(probe.section_size(".bss").unwrap() <= 3 * 1024); +//! assert!(!probe.has_symbol_containing("RadioHead").unwrap()); +//! ``` +//! +//! Demangling is intentionally out of scope. `has_symbol_containing` matches +//! against raw (possibly mangled) names — searching for a class or namespace +//! substring such as "RadioHead" or "mbedtls" is sufficient for the +//! presence/absence checks in #205 acceptance criteria. + +use std::fs; +use std::path::Path; + +use object::{Object, ObjectSection, ObjectSymbol}; + +/// In-memory ELF file probe. See module docs. +#[derive(Debug, Clone)] +pub struct ElfProbe { + bytes: Vec, +} + +/// Information about a single ELF section. +#[derive(Debug, Clone)] +pub struct SectionInfo { + /// Section name (e.g., `.text`, `.bss`). + pub name: String, + /// Section size in bytes (`sh_size`). + pub size: u64, + /// Virtual address of the section (`sh_addr`). + pub address: u64, +} + +/// Information about a single ELF symbol. +#[derive(Debug, Clone)] +pub struct SymbolInfo { + /// Symbol name (raw / mangled — no demangling). + pub name: String, + /// Symbol size in bytes (`st_size`). + pub size: u64, + /// Symbol address (`st_value`). + pub address: u64, + /// True if the symbol is undefined (i.e., refers to an external). + pub is_undefined: bool, +} + +/// Errors returned from [`ElfProbe`] operations. +#[derive(Debug, thiserror::Error)] +pub enum ElfProbeError { + /// Filesystem I/O failed (e.g., file missing, permission denied). + #[error("io: {0}")] + Io(#[from] std::io::Error), + /// Parsing the ELF bytes failed. + #[error("parse: {0}")] + Parse(String), +} + +impl ElfProbe { + /// Read and memoize the ELF bytes for repeated probing. + pub fn open>(path: P) -> Result { + let bytes = fs::read(path)?; + Ok(Self { bytes }) + } + + /// Construct from an in-memory buffer (for fixtures). + pub fn from_bytes(bytes: Vec) -> Self { + Self { bytes } + } + + /// Every named section, including zero-sized ones, in original ELF + /// section-header order. Unnamed (empty-name) sections are skipped. + /// + /// Zero-sized sections are kept so callers that want a faithful section + /// list (e.g. "is `.bss` present at all?") get the right answer. + /// `section_size(name)` still returns 0 when the section is absent. + pub fn sections(&self) -> Result, ElfProbeError> { + let file = self.parse()?; + let mut out = Vec::new(); + for section in file.sections() { + let Ok(name) = section.name() else { continue }; + if name.is_empty() { + continue; + } + out.push(SectionInfo { + name: name.to_string(), + size: section.size(), + address: section.address(), + }); + } + Ok(out) + } + + /// Look up a section by exact name. Returns `None` if absent. + pub fn section(&self, name: &str) -> Result, ElfProbeError> { + Ok(self.sections()?.into_iter().find(|s| s.name == name)) + } + + /// Convenience: just the size in bytes, or 0 if section is absent. + pub fn section_size(&self, name: &str) -> Result { + Ok(self.section(name)?.map(|s| s.size).unwrap_or(0)) + } + + /// Every symbol in the static `.symtab`, in symbol-table order. + /// + /// Demangling is out of scope — names are returned exactly as stored. + pub fn symbols(&self) -> Result, ElfProbeError> { + let file = self.parse()?; + let mut out = Vec::new(); + for sym in file.symbols() { + let Ok(name) = sym.name() else { continue }; + if name.is_empty() { + continue; + } + out.push(SymbolInfo { + name: name.to_string(), + size: sym.size(), + address: sym.address(), + is_undefined: sym.is_undefined(), + }); + } + Ok(out) + } + + /// Whether ANY symbol has the given exact name. + pub fn has_symbol(&self, name: &str) -> Result { + Ok(self.symbols()?.iter().any(|s| s.name == name)) + } + + /// Whether ANY symbol's name CONTAINS the given substring. + /// + /// Useful for spotting mangled C++ names by class/namespace — e.g., + /// `has_symbol_containing("RadioHead")` matches `_ZN8RadioHead4sendEv`. + pub fn has_symbol_containing(&self, needle: &str) -> Result { + Ok(self.symbols()?.iter().any(|s| s.name.contains(needle))) + } + + /// Sum of `.text + .data + .bss`. Missing sections contribute 0. + pub fn text_data_bss_sum(&self) -> Result { + let mut total: u64 = 0; + for name in [".text", ".data", ".bss"] { + total = total.saturating_add(self.section_size(name)?); + } + Ok(total) + } + + /// Parse the bytes into a generic `object::File`. Errors surface as + /// [`ElfProbeError::Parse`]. + fn parse(&self) -> Result, ElfProbeError> { + // Reject anything that doesn't start with an ELF magic; `object` will + // happily parse Mach-O / PE / etc. otherwise. + if self.bytes.len() < 4 || &self.bytes[..4] != b"\x7fELF" { + return Err(ElfProbeError::Parse("not an ELF file".to_string())); + } + object::File::parse(self.bytes.as_slice()).map_err(|e| ElfProbeError::Parse(e.to_string())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use object::elf; + use object::write::elf::{FileHeader, SectionHeader, Sym, Writer}; + use object::write::StringId; + use object::Endianness; + + /// Build a minimal little-endian 32-bit ARM ELF executable with the named + /// sections (each filled with `size` zero bytes so `.text/.data` contribute + /// real bytes; `.bss` is special-cased as `SHT_NOBITS`) and the named + /// global symbols. Returns an in-memory ELF byte buffer suitable for + /// `ElfProbe::from_bytes`. + /// + /// All sections are placed contiguously in memory starting at vaddr 0x1000. + fn build_elf(sections: &[(&str, u64)], symbols: &[(&str, u64)]) -> Vec { + let mut buf = Vec::new(); + let mut writer = Writer::new(Endianness::Little, false, &mut buf); + + // Reserve indices ---------------------------------------------------- + writer.reserve_file_header(); + + // One null section + one per requested section + .symtab + .strtab + .shstrtab. + // We let the Writer manage symtab/strtab/shstrtab sections itself. + let section_ids: Vec<_> = (0..sections.len()) + .map(|_| writer.reserve_section_index()) + .collect(); + let section_names: Vec = sections + .iter() + .map(|(name, _)| writer.add_section_name(name.as_bytes())) + .collect(); + + writer.reserve_null_symbol_index(); + let mut sym_entries: Vec<(StringId, u64, usize)> = Vec::new(); + for (sym_name, _) in symbols { + let _ = writer.reserve_symbol_index(None); + let name_id = writer.add_string(sym_name.as_bytes()); + // Place symbols inside the first section, address 0 within it. + sym_entries.push((name_id, 0, 0)); + } + writer.reserve_symtab_section_index(); + writer.reserve_strtab_section_index(); + writer.reserve_shstrtab_section_index(); + + // Reserve file offsets ---------------------------------------------- + // Section data: `.bss` is NOBITS (no file space). Others get `size` bytes. + let mut section_offsets: Vec = Vec::with_capacity(sections.len()); + for (name, size) in sections { + if *name == ".bss" { + section_offsets.push(0); + } else { + let offset = writer.reserve(*size as usize, 1) as u64; + section_offsets.push(offset); + } + } + writer.reserve_symtab(); + writer.reserve_strtab(); + writer.reserve_shstrtab(); + writer.reserve_section_headers(); + + // Write file header -------------------------------------------------- + writer + .write_file_header(&FileHeader { + os_abi: elf::ELFOSABI_NONE, + abi_version: 0, + e_type: elf::ET_REL, + e_machine: elf::EM_ARM, + e_entry: 0, + e_flags: 0, + }) + .expect("write file header"); + + // Write section data ------------------------------------------------- + for (i, (name, size)) in sections.iter().enumerate() { + if *name == ".bss" { + continue; + } + writer.pad_until(section_offsets[i] as usize); + writer.write(&vec![0u8; *size as usize]); + } + + // Symbol table (entry 0 is null and written automatically) ---------- + writer.write_null_symbol(); + for (sym_idx, (sym_name, _)) in symbols.iter().enumerate() { + let (name_id, _addr, _section_idx) = sym_entries[sym_idx]; + // Bind in the first defined section if any, else SHN_ABS. + let st_shndx = if sections.is_empty() { + elf::SHN_ABS + } else { + section_ids[0].0 as u16 + }; + writer.write_symbol(&Sym { + name: Some(name_id), + section: if sections.is_empty() { + None + } else { + Some(section_ids[0]) + }, + st_info: (elf::STB_GLOBAL << 4) | elf::STT_OBJECT, + st_other: 0, + st_shndx, + st_value: 0, + st_size: 0, + }); + let _ = sym_name; // keep clippy happy — name is consumed via name_id + } + + writer.write_strtab(); + writer.write_shstrtab(); + + // Section headers --------------------------------------------------- + writer.write_null_section_header(); + for (i, (name, size)) in sections.iter().enumerate() { + let (sh_type, sh_flags): (u32, u32) = match *name { + ".text" => (elf::SHT_PROGBITS, elf::SHF_ALLOC | elf::SHF_EXECINSTR), + ".bss" => (elf::SHT_NOBITS, elf::SHF_ALLOC | elf::SHF_WRITE), + ".data" => (elf::SHT_PROGBITS, elf::SHF_ALLOC | elf::SHF_WRITE), + _ => (elf::SHT_PROGBITS, elf::SHF_ALLOC), + }; + writer.write_section_header(&SectionHeader { + name: Some(section_names[i]), + sh_type, + sh_flags: u64::from(sh_flags), + sh_addr: 0x1000 + (i as u64) * 0x1000, + sh_offset: section_offsets[i], + sh_size: *size, + sh_link: 0, + sh_info: 0, + sh_addralign: 1, + sh_entsize: 0, + }); + } + writer.write_symtab_section_header(1); // first non-local symbol index + writer.write_strtab_section_header(); + writer.write_shstrtab_section_header(); + + buf + } + + #[test] + fn from_bytes_round_trip() { + let bytes = build_elf(&[(".text", 4)], &[("setup", 0)]); + let probe = ElfProbe::from_bytes(bytes.clone()); + assert_eq!(probe.bytes.len(), bytes.len()); + } + + #[test] + fn sections_lists_text_data_bss_in_known_fixture() { + let bytes = build_elf( + &[(".text", 64), (".data", 16), (".bss", 32)], + &[("setup", 0)], + ); + let probe = ElfProbe::from_bytes(bytes); + let sections = probe.sections().expect("sections"); + let by_name: std::collections::HashMap<_, _> = + sections.iter().map(|s| (s.name.clone(), s.size)).collect(); + assert_eq!(by_name.get(".text").copied(), Some(64)); + assert_eq!(by_name.get(".data").copied(), Some(16)); + assert_eq!(by_name.get(".bss").copied(), Some(32)); + } + + #[test] + fn section_size_returns_zero_for_missing() { + let bytes = build_elf(&[(".text", 8)], &[]); + let probe = ElfProbe::from_bytes(bytes); + assert_eq!(probe.section_size(".dmabuffers").expect("size"), 0); + } + + #[test] + fn text_data_bss_sum_aggregates() { + let bytes = build_elf(&[(".text", 100), (".data", 20), (".bss", 8)], &[]); + let probe = ElfProbe::from_bytes(bytes); + assert_eq!(probe.text_data_bss_sum().expect("sum"), 128); + } + + #[test] + fn text_data_bss_sum_handles_partial_sections() { + let bytes = build_elf(&[(".text", 50)], &[]); + let probe = ElfProbe::from_bytes(bytes); + assert_eq!(probe.text_data_bss_sum().expect("sum"), 50); + } + + #[test] + fn symbols_lists_known_symbols() { + let bytes = build_elf( + &[(".text", 4)], + &[("setup", 0), ("loop", 0), ("digitalWrite", 0)], + ); + let probe = ElfProbe::from_bytes(bytes); + assert!(probe.has_symbol("setup").expect("setup")); + assert!(probe.has_symbol("loop").expect("loop")); + assert!(probe.has_symbol("digitalWrite").expect("digitalWrite")); + assert!(!probe.has_symbol("not_present").expect("missing")); + } + + #[test] + fn has_symbol_containing_handles_substring() { + let bytes = build_elf( + &[(".text", 4)], + &[("_ZN8RadioHead4sendEv", 0), ("setup", 0)], + ); + let probe = ElfProbe::from_bytes(bytes); + assert!(probe.has_symbol_containing("RadioHead").expect("RadioHead")); + assert!(!probe.has_symbol_containing("mbedtls").expect("mbedtls")); + } + + #[test] + fn non_elf_input_returns_parse_error() { + let probe = ElfProbe::from_bytes(b"not an elf".to_vec()); + match probe.sections() { + Err(ElfProbeError::Parse(_)) => {} + other => panic!("expected Parse error, got {other:?}"), + } + } + + #[test] + fn open_returns_io_error_for_missing_file() { + let path = std::env::temp_dir().join("fbuild-elfprobe-no-such-file-xyz123.elf"); + // Ensure it really doesn't exist. + let _ = std::fs::remove_file(&path); + match ElfProbe::open(&path) { + Err(ElfProbeError::Io(_)) => {} + other => panic!("expected Io error, got {other:?}"), + } + } + + #[test] + fn section_lookup_returns_none_when_absent() { + let bytes = build_elf(&[(".text", 4)], &[]); + let probe = ElfProbe::from_bytes(bytes); + assert!(probe.section(".dmabuffers").expect("section").is_none()); + assert!(probe.section(".text").expect("text").is_some()); + } + + #[test] + fn zero_sized_section_appears_in_listing() { + // Adversary: a target with .bss legitimately sized 0. Old behaviour + // (size > 0 filter) hid such sections from `sections()` and made + // `section(".bss")` return None even though the section header was + // present. The new contract returns every named section regardless + // of size — `section(".bss")` returns Some with size=0. + let bytes = build_elf(&[(".text", 16), (".bss", 0)], &[]); + let probe = ElfProbe::from_bytes(bytes); + let sections = probe.sections().expect("sections"); + assert!( + sections.iter().any(|s| s.name == ".bss" && s.size == 0), + "expected zero-sized .bss in listing, got {:?}", + sections + ); + let bss = probe.section(".bss").expect("bss query"); + assert_eq!(bss.map(|s| s.size), Some(0)); + // section_size still returns 0 either way. + assert_eq!(probe.section_size(".bss").expect("size"), 0); + } + + #[test] + fn truncated_elf_returns_parse_error() { + // Adversary: ELF magic present but body truncated to under the + // smallest viable header. Must not panic — should surface a Parse + // error. + let bytes = b"\x7fELF\x00\x00\x00".to_vec(); + let probe = ElfProbe::from_bytes(bytes); + match probe.sections() { + Err(ElfProbeError::Parse(_)) => {} + other => panic!("expected Parse error, got {other:?}"), + } + } + + #[test] + fn empty_input_returns_parse_error() { + let probe = ElfProbe::from_bytes(Vec::new()); + match probe.sections() { + Err(ElfProbeError::Parse(_)) => {} + other => panic!("expected Parse error, got {other:?}"), + } + } + + #[test] + fn missing_symbol_returns_false() { + let bytes = build_elf(&[(".text", 4)], &[("setup", 0x1000)]); + let probe = ElfProbe::from_bytes(bytes); + assert!(!probe.has_symbol("nonexistent_symbol").expect("query")); + assert!(!probe.has_symbol_containing("nonexistent").expect("query")); + } +} diff --git a/crates/fbuild-test-support/src/lib.rs b/crates/fbuild-test-support/src/lib.rs index 3685d67d..e6bbb6f0 100644 --- a/crates/fbuild-test-support/src/lib.rs +++ b/crates/fbuild-test-support/src/lib.rs @@ -1,5 +1,13 @@ //! Test utilities and fixtures for fbuild. +pub mod compile_db; +pub mod elf_probe; +pub mod mini_framework; + +pub use compile_db::{CompileDb, CompileDbError, CompileEntry}; +pub use elf_probe::{ElfProbe, ElfProbeError, SectionInfo, SymbolInfo}; +pub use mini_framework::{LibraryBuilder, MiniFramework}; + /// Create a temporary project directory with a minimal platformio.ini. pub fn create_test_project(env_name: &str, platform: &str, board: &str) -> tempfile::TempDir { let dir = tempfile::tempdir().expect("failed to create temp dir"); diff --git a/crates/fbuild-test-support/src/mini_framework.rs b/crates/fbuild-test-support/src/mini_framework.rs new file mode 100644 index 00000000..70c7e44a --- /dev/null +++ b/crates/fbuild-test-support/src/mini_framework.rs @@ -0,0 +1,477 @@ +//! Fluent builder for fake Teensyduino / STM32duino / Arduino framework +//! trees backed by a `tempfile::TempDir`. +//! +//! Phases 1–3 of shipped a +//! header scanner ([`fbuild_header_scan::scan`]), include-graph walker +//! ([`fbuild_header_scan::walk`]) and a PlatformIO-LDF-style resolver +//! ([`fbuild_library_select::resolve`]). Their unit tests inline their own +//! tempdir scaffolding. Future phase tests (#205 §2/§3/§5) need a reusable +//! fixture so the per-orchestrator integration tests can stop reinventing the +//! same Arduino-library directory layout. +//! +//! [`MiniFramework`] writes its trees in the layout +//! [`fbuild_packages::library::framework_library::discover_framework_libraries`] +//! recognizes — that is the contract: anything you build with this fixture +//! must be discoverable by the production walk. +//! +//! # Layout +//! +//! Two roots live under a fresh `TempDir`: +//! +//! ```text +//! /framework/ +//! libraries/ +//! SPI/ +//! src/ +//! SPI.h +//! /project/ +//! src/ +//! include/ (created on demand by add_project_include / sketch) +//! ``` +//! +//! # Example +//! +//! ```no_run +//! use fbuild_test_support::MiniFramework; +//! +//! let mut fx = MiniFramework::new(); +//! fx.add_library("SPI") +//! .header("// SPI header\n") +//! .cpp("// SPI impl\n") +//! .done(); +//! fx.sketch("#include \nvoid setup() {}\nvoid loop() {}\n"); +//! +//! let libs = fbuild_packages::library::framework_library::discover_framework_libraries( +//! &fx.libraries_dir(), +//! ); +//! assert_eq!(libs[0].name, "SPI"); +//! ``` + +use std::path::{Path, PathBuf}; + +use tempfile::TempDir; + +/// Fake Arduino-style framework + project tree backed by a `TempDir`. +/// +/// See module docs for the on-disk layout. The `TempDir` is dropped (and +/// scrubbed) when this struct is dropped, so callers should keep the fixture +/// alive for the duration of the test. +pub struct MiniFramework { + /// Owning handle for the temp tree. Held for its `Drop` side effect. + _tmp: TempDir, + framework_root: PathBuf, + project_root: PathBuf, +} + +impl MiniFramework { + /// Create a new fake framework tree under a fresh `TempDir`. + /// + /// `framework_root/libraries/`, `project_root/src/` are created eagerly. + /// `project_root/include/` is created lazily by [`add_project_include`] + /// or [`project_search_paths`]'s callers. + /// + /// [`add_project_include`]: MiniFramework::add_project_include + /// [`project_search_paths`]: MiniFramework::project_search_paths + pub fn new() -> Self { + let tmp = tempfile::tempdir().expect("MiniFramework: failed to create temp dir"); + let framework_root = tmp.path().join("framework"); + let project_root = tmp.path().join("project"); + + std::fs::create_dir_all(framework_root.join("libraries")) + .expect("MiniFramework: failed to create framework/libraries"); + std::fs::create_dir_all(project_root.join("src")) + .expect("MiniFramework: failed to create project/src"); + + Self { + _tmp: tmp, + framework_root, + project_root, + } + } + + /// Begin adding a framework library named `name`. + /// + /// `/libraries//src/` is created eagerly and a + /// default empty `.h` is written so trivial cases — "library `SPI` + /// exists with header `SPI.h`" — don't need any builder methods. + pub fn add_library(&mut self, name: &str) -> LibraryBuilder<'_> { + let lib_dir = self.framework_root.join("libraries").join(name); + let src_dir = lib_dir.join("src"); + std::fs::create_dir_all(&src_dir) + .unwrap_or_else(|e| panic!("MiniFramework: failed to create {src_dir:?}: {e}")); + + // Default empty header so callers don't need .header("") for trivial + // libs. + let default_header = src_dir.join(format!("{name}.h")); + std::fs::write(&default_header, "") + .unwrap_or_else(|e| panic!("MiniFramework: failed to write {default_header:?}: {e}")); + + LibraryBuilder { + name: name.to_string(), + lib_dir, + src_dir, + _phantom: std::marker::PhantomData, + } + } + + /// Write a project source file relative to `/src/`. + pub fn add_project_source(&mut self, rel_path: &str, contents: &str) -> &mut Self { + let dst = self.project_root.join("src").join(rel_path); + write_file(&dst, contents); + self + } + + /// Write a project include header relative to `/include/`. + pub fn add_project_include(&mut self, rel_path: &str, contents: &str) -> &mut Self { + let dst = self.project_root.join("include").join(rel_path); + write_file(&dst, contents); + self + } + + /// Convenience: write `/src/main.cpp` with `contents`. + pub fn sketch(&mut self, contents: &str) -> &mut Self { + self.add_project_source("main.cpp", contents) + } + + /// Absolute path to the project root (`/project`). + pub fn project_root(&self) -> &Path { + &self.project_root + } + + /// Absolute path to the framework root (`/framework`). + pub fn framework_root(&self) -> &Path { + &self.framework_root + } + + /// Absolute path to the framework's `libraries/` dir. + pub fn libraries_dir(&self) -> PathBuf { + self.framework_root.join("libraries") + } + + /// Absolute path to the project's `src/` dir. + pub fn project_src(&self) -> PathBuf { + self.project_root.join("src") + } + + /// Walk `/src/**` recursively for compilable source files + /// and return them as walker seeds. + /// + /// Extensions match the set used by + /// [`fbuild_packages::library::framework_library::collect_library_sources`] + /// (`.c`, `.cpp`, `.cc`, `.cxx`, `.s`). Headers under `src/` are not + /// seeds — they are reached via `#include`. Returned paths are sorted for + /// determinism. + pub fn project_seeds(&self) -> Vec { + let mut seeds = Vec::new(); + collect_seeds(&self.project_root.join("src"), &mut seeds); + seeds.sort(); + seeds + } + + /// Project-level include search paths, in PlatformIO order. + /// + /// Returns `[/include, /src]` if `include/` exists on + /// disk, otherwise just `[/src]`. Both returned paths are + /// guaranteed to exist. + pub fn project_search_paths(&self) -> Vec { + let mut paths = Vec::new(); + let include = self.project_root.join("include"); + if include.is_dir() { + paths.push(include); + } + paths.push(self.project_root.join("src")); + paths + } +} + +impl Default for MiniFramework { + fn default() -> Self { + Self::new() + } +} + +/// Fluent builder for content of a single framework library. +/// +/// Returned by [`MiniFramework::add_library`]; finish with [`done`] (or just +/// drop the builder). +/// +/// [`done`]: LibraryBuilder::done +#[must_use = "LibraryBuilder is a fluent builder; call .done() or chain methods"] +pub struct LibraryBuilder<'a> { + name: String, + lib_dir: PathBuf, + src_dir: PathBuf, + /// Borrow-checker tether so each library builder is bounded to its parent + /// `MiniFramework`'s lifetime. + _phantom: std::marker::PhantomData<&'a mut MiniFramework>, +} + +impl<'a> LibraryBuilder<'a> { + /// Overwrite the default `/src/.h` with `contents`. + pub fn header(self, contents: &str) -> Self { + let dst = self.src_dir.join(format!("{}.h", self.name)); + write_file(&dst, contents); + self + } + + /// Write `/src/.cpp` with `contents`. + pub fn cpp(self, contents: &str) -> Self { + let dst = self.src_dir.join(format!("{}.cpp", self.name)); + write_file(&dst, contents); + self + } + + /// Write any additional file under `/src/`. + /// + /// `rel_path` may contain subdirectories — they are created as needed. + pub fn extra(self, rel_path: &str, contents: &str) -> Self { + let dst = self.src_dir.join(rel_path); + write_file(&dst, contents); + self + } + + /// Write `/examples/` — exists so resolver tests can prove + /// `examples/` content is excluded from the compile set. + pub fn example(self, rel_path: &str, contents: &str) -> Self { + let dst = self.lib_dir.join("examples").join(rel_path); + write_file(&dst, contents); + self + } + + /// Write `/extras/` — exists so resolver tests can prove + /// `extras/` content is excluded. + pub fn extras(self, rel_path: &str, contents: &str) -> Self { + let dst = self.lib_dir.join("extras").join(rel_path); + write_file(&dst, contents); + self + } + + /// Write `/tests/` — exists so resolver tests can prove + /// `tests/` content is excluded. + pub fn tests(self, rel_path: &str, contents: &str) -> Self { + let dst = self.lib_dir.join("tests").join(rel_path); + write_file(&dst, contents); + self + } + + /// Finish the library (drop the builder). + pub fn done(self) {} +} + +fn write_file(dst: &Path, contents: &str) { + if let Some(parent) = dst.parent() { + std::fs::create_dir_all(parent) + .unwrap_or_else(|e| panic!("MiniFramework: failed to create {parent:?}: {e}")); + } + std::fs::write(dst, contents) + .unwrap_or_else(|e| panic!("MiniFramework: failed to write {dst:?}: {e}")); +} + +fn collect_seeds(dir: &Path, out: &mut Vec) { + let Ok(entries) = std::fs::read_dir(dir) else { + return; + }; + for entry in entries.flatten() { + let path = entry.path(); + if path.is_dir() { + collect_seeds(&path, out); + } else { + let ext = path + .extension() + .unwrap_or_default() + .to_string_lossy() + .to_lowercase(); + if matches!(ext.as_str(), "c" | "cpp" | "cc" | "cxx" | "s") { + out.push(path); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use fbuild_header_scan::walk; + use fbuild_library_select::resolve; + use fbuild_packages::library::framework_library::{ + collect_library_sources, discover_framework_libraries, + }; + + #[test] + fn mini_framework_new_creates_expected_dir_layout() { + let fx = MiniFramework::new(); + assert!(fx.libraries_dir().is_dir(), "framework/libraries/ missing"); + assert!(fx.project_src().is_dir(), "project/src/ missing"); + // include/ is lazy. + assert!(!fx.project_root().join("include").exists()); + } + + #[test] + fn add_library_creates_default_header() { + let mut fx = MiniFramework::new(); + fx.add_library("SPI").done(); + let header = fx.libraries_dir().join("SPI").join("src").join("SPI.h"); + assert!(header.is_file(), "default SPI.h missing: {header:?}"); + let bytes = std::fs::read(&header).unwrap(); + assert!(bytes.is_empty(), "default header should be empty"); + } + + #[test] + fn library_builder_chaining() { + let mut fx = MiniFramework::new(); + fx.add_library("SPI") + .header("// hi\n") + .cpp("// impl\n") + .done(); + let src = fx.libraries_dir().join("SPI").join("src"); + assert_eq!( + std::fs::read_to_string(src.join("SPI.h")).unwrap(), + "// hi\n" + ); + assert_eq!( + std::fs::read_to_string(src.join("SPI.cpp")).unwrap(), + "// impl\n", + ); + } + + #[test] + fn extra_writes_nested_path() { + let mut fx = MiniFramework::new(); + fx.add_library("SPI").extra("utility/foo.h", "x").done(); + let nested = fx + .libraries_dir() + .join("SPI") + .join("src") + .join("utility") + .join("foo.h"); + assert_eq!(std::fs::read_to_string(&nested).unwrap(), "x"); + } + + #[test] + fn examples_extras_tests_are_excluded_from_collect_library_sources() { + let mut fx = MiniFramework::new(); + fx.add_library("SPI") + .cpp("// real impl\n") + .example("Demo.cpp", "// demo\n") + .extras("tool.cpp", "// tool\n") + .tests("test_spi.cpp", "// test\n") + .done(); + let lib_dir = fx.libraries_dir().join("SPI"); + let sources = collect_library_sources(&lib_dir); + // Only the real .cpp under src/ should appear. + assert_eq!( + sources, + vec![lib_dir.join("src").join("SPI.cpp")], + "examples/extras/tests must be excluded", + ); + } + + #[test] + fn discover_framework_libraries_finds_all_libs() { + let mut fx = MiniFramework::new(); + fx.add_library("SPI").done(); + fx.add_library("Wire").done(); + fx.add_library("EEPROM").done(); + + let libs = discover_framework_libraries(&fx.libraries_dir()); + let names: Vec<_> = libs.iter().map(|l| l.name.as_str()).collect(); + assert_eq!(names, vec!["EEPROM", "SPI", "Wire"]); + } + + #[test] + fn project_seeds_returns_all_src_sources() { + let mut fx = MiniFramework::new(); + fx.add_project_source("main.cpp", "// main\n"); + fx.add_project_source("helpers/util.cpp", "// util\n"); + + let seeds = fx.project_seeds(); + let expected = { + let mut v = vec![ + fx.project_src().join("helpers").join("util.cpp"), + fx.project_src().join("main.cpp"), + ]; + v.sort(); + v + }; + assert_eq!(seeds, expected); + } + + #[test] + fn project_seeds_skips_headers() { + let mut fx = MiniFramework::new(); + fx.add_project_source("main.cpp", ""); + fx.add_project_source("local.h", ""); + let seeds = fx.project_seeds(); + assert_eq!(seeds, vec![fx.project_src().join("main.cpp")]); + } + + #[test] + fn project_search_paths_includes_dir_only_when_present() { + let mut fx = MiniFramework::new(); + // No include/ yet. + let paths = fx.project_search_paths(); + assert_eq!(paths, vec![fx.project_src()]); + + // Populate include/. + fx.add_project_include("Project.h", "// project header\n"); + let paths = fx.project_search_paths(); + assert_eq!( + paths, + vec![fx.project_root().join("include"), fx.project_src()], + ); + } + + #[test] + fn sketch_helper_writes_main_cpp() { + let mut fx = MiniFramework::new(); + fx.sketch("// sketch\n"); + let main = fx.project_src().join("main.cpp"); + assert_eq!(std::fs::read_to_string(&main).unwrap(), "// sketch\n"); + } + + #[test] + fn walker_round_trip() { + let mut fx = MiniFramework::new(); + fx.add_library("SPI").done(); + fx.sketch("#include \n"); + + let libs = discover_framework_libraries(&fx.libraries_dir()); + // Search paths: project search paths + every lib's include dirs. + let mut search_paths = fx.project_search_paths(); + for lib in &libs { + for d in &lib.include_dirs { + search_paths.push(d.clone()); + } + } + + let res = walk(&fx.project_seeds(), &search_paths); + let spi_h = std::fs::canonicalize(fx.libraries_dir().join("SPI").join("src").join("SPI.h")) + .unwrap(); + assert!( + res.reached.contains(&spi_h), + "walker did not reach SPI.h via fixture; reached={:?}", + res.reached, + ); + } + + #[test] + fn resolver_round_trip() { + let mut fx = MiniFramework::new(); + fx.add_library("SPI").cpp("// impl\n").done(); + fx.add_library("Wire").cpp("// wire impl\n").done(); + fx.sketch("#include \nvoid setup() {}\nvoid loop() {}\n"); + + let libs = discover_framework_libraries(&fx.libraries_dir()); + let sel = resolve(&fx.project_seeds(), &fx.project_search_paths(), &libs); + + assert_eq!( + sel.required_libraries, + vec!["SPI".to_string()], + "only SPI should be selected; got {:?}", + sel.required_libraries, + ); + // Wire must NOT bleed in via mere existence under libraries/. + assert!(!sel.required_libraries.iter().any(|n| n == "Wire")); + } +} diff --git a/tasks/README.md b/tasks/README.md index cae4f766..ec9751e4 100644 --- a/tasks/README.md +++ b/tasks/README.md @@ -6,3 +6,5 @@ Planning and tracking for the fbuild Rust port. - **`todo.md`** -- Platform-by-platform migration checklist with completed and pending items - **`lessons.md`** -- Lessons learned from development (toolchain conflicts, clippy patterns, etc.) +- **`baseline-205.md`** -- Baseline ELF / TU-count measurements captured at the foundation-landed SHA for #205 (regenerate via `uv run python ci/measure_baseline_205.py`) +- **`zccache-kv-design.md`** -- Design note for the namespaced K/V store added to zccache (filed as `zackees/zccache#130`); prerequisite for #205 Phase 4 memoization. diff --git a/tasks/baseline-205.md b/tasks/baseline-205.md new file mode 100644 index 00000000..4d37312a --- /dev/null +++ b/tasks/baseline-205.md @@ -0,0 +1,68 @@ +# Baseline measurements for #205 — DEFERRED + +Captured: 2026-04-24 +Git SHA: (this PR's foundation commit — see PR description) +Branch: main +Tooling: `uv run python ci/measure_baseline_205.py` + +## Status + +The capture script (`ci/measure_baseline_205.py`) is implemented and +runnable. The actual data capture is **deferred to a follow-up step** +because Teensy/STM32 builds against the foundation-landed resolver are +heavyweight (multi-minute per board on a cold cache) and the build +infrastructure on the development workstation could not complete all +four boards within the agent run-window. Running the script in a clean +CI environment with all toolchains pre-warmed will populate the table +below. + +## How to run + +```bash +uv run python ci/measure_baseline_205.py --out tasks/baseline-205.md +uv run python ci/measure_baseline_205.py --targets teensyLC teensy41 +``` + +The script: + +1. Builds `tests/platform/` for `teensyLC`, `teensy30`, + `teensy41`, `stm32f103c8` via the existing `fbuild` CLI. +2. Counts distinct `file` entries in the resulting + `compile_commands.json` (TU count). +3. Probes `firmware.elf` section sizes (`.text`, `.data`, `.bss`, + `.dmabuffers`) via `arm-none-eabi-size` (preferred) or `llvm-size`. +4. Scans `compile_commands.json` for `FNET` / `Snooze` / `RadioHead` + / `mbedtls` references — the four libraries that were wrongly + selected before the foundation phases of #205 landed. + +## Expected once captured + +| env | TU count | .text | .data | .bss | .dmabuffers | excluded libs | +|--------------|----------|-------|-------|------|-------------|----------------| +| teensyLC | (≤ 250 per AC#1) | … | … | (≤ 3 KB per AC#1) | — | none of FNET/Snooze/RadioHead/mbedtls present | +| teensy30 | … | … | … | … | (≤ 1 KB per AC#2) | none | +| teensy41 | … | … | … | … | … | (regression baseline) | +| stm32f103c8 | … | … | … | … | — | (must include SPI per AC#4) | + +## Why not just ship the placeholder and call it done + +Phase 6 (acceptance gates) needs *real* numbers to anchor the +"+1%" / "≤ 250" / "≤ 3 KB" thresholds in the issue body. A guess will +be argued about during Phase 6 reviews. The capture has to happen on a +host that can actually link these four ELFs, which means either (a) a +clean CI runner with the Teensy/STM32 toolchains pinned, or (b) a +warmed local install where every framework download has already +completed. Neither was ready inside this PR's window. + +## Tracking + +This file is replaced wholesale on the next successful run of +`measure_baseline_205.py`. The non-empty rows above will be filled in +with measured numbers + an ISO timestamp + the exact git SHA the +measurement was taken against. + +## Run command + +```bash +uv run python ci/measure_baseline_205.py --out tasks/baseline-205.md +``` diff --git a/tasks/zccache-kv-design.md b/tasks/zccache-kv-design.md new file mode 100644 index 00000000..800fa44f --- /dev/null +++ b/tasks/zccache-kv-design.md @@ -0,0 +1,322 @@ +# zccache K/V — feature proposal (origin: FastLED/fbuild#205, Phase 4) + +## Summary + +Extend the **existing** `zccache` crate workspace and CLI with a small, +namespaced, blake3-keyed key/value store. **Not a new binary**, not a new +top-level crate — the K/V API lives next to `ArtifactStore` in the existing +`zccache-artifact` crate, reuses the existing `~/.zccache/index.redb` +database file (separate redb table), and surfaces through the existing +`zccache` CLI as new subcommands (`zccache kv get|put|ls|rm|clear`). + +The motivation is fbuild's #205 Phase 4 — memoizing PlatformIO-LDF-style +library-selection results between builds. The data shape (`Selection`: +include closure + selected library names + compile/include sets) does not +fit the compile-action-shaped `ArtifactStore`. A general-purpose K/V is the +clean answer and useful beyond fbuild. + +## Why fold into the existing crate, not split + +- Avoids a new `zccache-kv` crate, a new bin, a new release artifact. + Today `zccache-artifact` already owns redb. Adding a second redb table + alongside `ARTIFACTS_TABLE` is one file and ~150 lines. +- Single binary surface for users: `zccache kv ...` parallels + `zccache artifact ...`. No discoverability fragmentation. +- Single backup / nuke target: `~/.zccache/` purges everything. +- Versioned together: a zccache release advances both surfaces atomically. + +## Public API + +In `crates/zccache-artifact/src/kv.rs` (new file), exported from the +crate's `lib.rs`: + +```rust +use std::path::Path; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct Key(pub [u8; 32]); + +impl Key { + pub fn from_hash(h: blake3::Hash) -> Self; + pub fn as_bytes(&self) -> &[u8; 32]; + pub fn to_hex(&self) -> String; // 64-char lowercase + pub fn from_hex(hex: &str) -> Result; +} + +#[derive(Debug, thiserror::Error)] +pub enum KvError { + #[error("io: {0}")] Io(#[from] std::io::Error), + #[error("redb: {0}")] Redb(String), + #[error("namespace must be 1..=64 chars of [a-z0-9-]")] BadNamespace, + #[error("key must be 32 bytes (64 hex chars)")] BadKey, + #[error("corrupt entry for key {0}: {1}")] Corrupt(String, String), + #[error("value too large: {0} bytes (max {1})")] TooLarge(usize, usize), +} +pub type KvResult = std::result::Result; + +pub struct KvStore { /* private: holds &Database or owns one */ } + +impl KvStore { + /// Open under the canonical zccache root (`~/.zccache/` or + /// `$ZCCACHE_DIR`). Reuses the same redb file as `ArtifactStore`. + pub fn open_default() -> KvResult; + /// Open at an explicit dir (test / ephemeral use). + pub fn open>(dir: P) -> KvResult; + /// Share an already-open redb `Database` so the artifact and KV stores + /// can co-exist without contending on Database creation. + pub fn from_database(db: std::sync::Arc) -> Self; + + /// Cache miss returns `Ok(None)`. `Err` only on backend or corruption. + pub fn get(&self, namespace: &str, key: &Key) -> KvResult>>; + /// Last-writer-wins. Returns bytes written. + pub fn put(&self, namespace: &str, key: &Key, value: &[u8]) -> KvResult; + /// Idempotent (missing key is not an error). + pub fn remove(&self, namespace: &str, key: &Key) -> KvResult<()>; + /// Drop every entry under one namespace. + pub fn clear_namespace(&self, namespace: &str) -> KvResult<()>; + /// Iterator-by-collection (sorted by hex key) for `kv ls`. + pub fn list_namespace(&self, namespace: &str) -> KvResult>; + + /// Total bytes across every namespace. + pub fn total_bytes(&self) -> KvResult; + pub fn namespace_bytes(&self, namespace: &str) -> KvResult; +} +``` + +### Storage layout + +``` +~/.zccache/ + index.redb # SHARED with ArtifactStore (separate redb table) + kv/ + / + <64-hex>.bin # raw value bytes for values > INLINE_THRESHOLD +``` + +- Values **≤ `INLINE_THRESHOLD = 4 KiB`** are stored inline in the redb + table (low overhead, single fsync, no second open). +- Values **> 4 KiB** are spilled to disk under `kv//.bin`. + The redb table holds a marker indicating spill + payload length + a + blake3 of the file body (for corruption detection on read). +- Hard cap: `MAX_VALUE_BYTES = 64 MiB`. Over-cap → `Err(TooLarge)`. + +### Redb table schema + +```rust +const KV_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("kv"); +``` + +Composite key encoding: `format!("{namespace}::{hex_key}")`. Namespaces +are validated `[a-z0-9-]` so `::` is unambiguous. + +Row value (bincode-serialized): + +```rust +struct KvRow { + schema_version: u32, // bump on layout change + body: KvBody, // Inline(Vec) or Spilled { len, blake3 } +} +``` + +### Namespace rules + +- `[a-z0-9-]`, 1..=64 chars. Anything else → `KvError::BadNamespace`. +- Reserved namespaces (no enforcement, just convention): + - `library-selection` — fbuild #205. + - `compile-graph` — future use. + - `_test` — anything beginning with `_test` is fair game for tests. + +## CLI surface (in `zccache-cli`) + +``` +zccache kv get # writes value to stdout +zccache kv put [--value-from |--value-from-stdin] +zccache kv rm +zccache kv ls # one row per entry: +zccache kv clear +zccache kv stats # total / per-namespace bytes +``` + +Exit codes match the rest of the CLI. `kv get` on a miss exits 2 with no +stdout (parseable from shell). + +--- + +## Test plan — comprehensive + adversarial + +All tests live in `crates/zccache-artifact/src/kv.rs` mod tests, plus a +small CLI integration test under `crates/zccache-cli/tests/kv.rs`. No +mocks; `tempfile::TempDir` per test. + +### Functional + +- **F1**: `put` then `get` returns the same bytes. Value sizes: 0, 1, + 100, 4 KiB - 1, 4 KiB, 4 KiB + 1, 100 KiB, 4 MiB. +- **F2**: `get` on missing key returns `Ok(None)`. +- **F3**: `put` overwrite returns the new value on subsequent `get`. +- **F4**: `remove` on a present key drops it; `get` is `None`. `remove` + on missing key is `Ok(())` (idempotent). +- **F5**: `clear_namespace` drops every entry under that namespace and + leaves entries in other namespaces untouched. +- **F6**: `list_namespace` returns hex-key-sorted `(Key, u64-len)` pairs. +- **F7**: `total_bytes` = sum of `namespace_bytes(ns)` over all namespaces. +- **F8**: Inline/spill threshold is byte-exact: 4096 stays inline, 4097 + spills. Spilled file size on disk equals `value.len()`. +- **F9**: Spilled file body blake3 matches the stored hash. Tampered + spilled file → `KvError::Corrupt` on `get`. +- **F10**: `Key::from_hex` round-trips for valid input; rejects + non-lowercase, non-64-char, non-hex. +- **F11**: Namespace validator: accepts `a`, `0`, `library-selection`; + rejects `""`, `"A"`, `"name with space"`, `"a/b"`, `"日本語"`, + 65-char input. +- **F12**: Schema-version mismatch on read returns + `KvError::Corrupt(_, "schema_version=…")` rather than silent garbage. + +### Adversarial — concurrency + +These run on Linux + macOS + Windows CI matrix. They MUST be +deterministic on every OS — no `sleep`-based timing, only +join-on-thread synchronization. + +- **C1 — Same-key thundering herd**: 16 threads each call + `put(ns, k, [thread_id; 1024])` 100 times. Final `get(ns, k)` returns + one of the values (no torn read, no `None`, no error). Use + `std::thread::scope` so the test owns the threads. +- **C2 — Distinct-key parallel writers**: 32 threads, each writing 100 + unique keys. After join, every key reads back. Ordering doesn't + matter; correctness does. +- **C3 — Reader/writer race**: 8 reader threads spinning on + `get(ns, k)` while 8 writer threads spam `put(ns, k, ...)`. Readers + must observe only well-formed responses — `Ok(Some(_))` with bytes + matching some written value, or `Ok(None)` (if the very first read + beats the first write). Never `Err`. +- **C4 — Open-while-write**: thread A holds an open `KvStore`, thread B + opens a second `KvStore` on the same dir and `put`s. Thread A's `get` + sees the new value (redb's `WriteTransaction::commit` makes it visible + to subsequent read txns). +- **C5 — `clear_namespace` during writes**: thread A `clear_namespace`s + while thread B writes 1000 entries. After both join, the namespace is + either empty or contains the post-clear writes — no partial state, no + panic. (We don't promise atomicity vs. concurrent writes; we promise + the store stays consistent.) + +### Adversarial — durability / crash + +- **D1 — Tempfile + rename atomicity (spill path)**: monkeypatch the + spill writer to `panic!()` between `write_all` and `rename`. Reopen + the store. Verify (a) the destination spill file does not exist, (b) + the redb row for that key does not exist. Use `std::panic::catch_unwind` + + a feature-gated `#[cfg(test)] static FAIL_AT: AtomicU8`. +- **D2 — Mid-commit redb crash sim**: write a row, drop the `KvStore` + *without* committing (impossible in normal use — we always commit per + call — but the test exercises the path by simulating an interrupted + commit via the `redb` test API if available, or by killing a + subprocess running the writer; whichever is easier). On reopen, the + pre-crash state is intact and the partial write is absent. +- **D3 — Repeated open/close**: open the store, `put`, close, reopen, + `get`, repeat 100 times in a tight loop. No file-handle leaks (file + descriptor count stable on Linux/macOS via `/proc/self/fd`; on Windows + via `GetProcessHandleCount`). + +### Adversarial — platform compliance + +- **P1 — Path-separator portability**: under `cfg(windows)`, `kv/` + subdir uses backslashes via `Path::join`. Under `cfg(unix)` use + forward slashes. Sanity-check `entry_path.parent() == kv_dir`. +- **P2 — Windows long path**: open the store at a deeply nested + `TempDir` such that the spilled file path is > 260 chars. Confirm + spill + readback succeed. Required path manipulation: prefix with + `\\?\` (Rust's `std::fs` does this automatically on + Windows ≥ 1.42 stdlib; verify with a test file written to the path). + Skip on non-Windows. +- **P3 — Case-insensitive FS**: on macOS APFS (default case-insensitive) + and Windows NTFS, two keys differing only in hex case must not + collide. Easy: `Key::from_hex` lowercases; "DEADBEEF" and "deadbeef" + parse to the same Key. Test by attempting to insert both and + asserting the second is an overwrite, not a new entry. The hex + representation we *write* to disk is always lowercase. +- **P4 — Symlinked store dir** (`cfg(unix)` only): create a TempDir, + symlink it under another path, open the store via the symlink. Verify + put/get round-trips and the symlink target receives the data. +- **P5 — Read-only directory**: chmod store dir to `0o555` (Unix) / + attrib +R (Windows), `put` returns `Err(KvError::Io(_))`. Cleanup + resets permissions. +- **P6 — UTF-8 namespace rejection**: `put("中文", ...)` → BadNamespace + on every OS. +- **P7 — Large path on Linux**: write to a path at exactly NAME_MAX + (255) for the spill file. Pass on Linux/Mac. (NAME_MAX irrelevant on + Windows, where the limit is on the full path.) +- **P8 — fsync round-trip**: `put`, drop the `KvStore`, reopen, `get` + returns the value. This is the most basic durability check; it must + pass on every OS without any platform-specific code in the test. +- **P9 — Concurrent open-from-two-processes** (separate process test + using `std::process::Command` so we exercise OS-level file locking, + not just Rust's): spawn child A that holds the store open with a + blocking write, spawn child B that opens the same dir. Verify either + (a) B blocks until A finishes (redb file lock), or (b) B fails fast + with a clear error. Do NOT silently corrupt. + +### Adversarial — input + +- **I1 — Empty namespace** → BadNamespace. +- **I2 — Namespace at limit (64 chars)** → ok. +- **I3 — Namespace at 65 chars** → BadNamespace. +- **I4 — Namespace with `::`** → BadNamespace (would collide with + composite-key encoding). +- **I5 — Value at `MAX_VALUE_BYTES`** (64 MiB) → ok. +- **I6 — Value at `MAX_VALUE_BYTES + 1`** → TooLarge. +- **I7 — Same key reused across namespaces** → values are independent; + asserting both round-trip. +- **I8 — `put` then crash before reading** → simulate via fresh + `KvStore::open` after committing; `get` succeeds (redb durability). + +### CLI integration + +Under `crates/zccache-cli/tests/kv.rs`, using `assert_cmd`: + +- `zccache kv put --value-from ` then + `zccache kv get ` round-trips bytes (binary safe — pipe + through a temp file rather than capturing stdout as UTF-8). +- `zccache kv ls ` lists exactly the keys put. +- `zccache kv rm` then `zccache kv get` exits 2 with empty stdout. +- `zccache kv clear ` followed by `kv ls` is empty. +- `zccache kv stats` reports nonzero `total_bytes` after a `put`. + +--- + +## Out of scope for the first release + +- TTLs / eviction (cache is grow-forever; we'll add when usage demands). +- Compression of stored values. +- Async API. The store is sync; fbuild's resolver runs on Rayon, not + Tokio. +- Cross-process advisory locking *beyond* what redb already provides. +- Arbitrary-precision keys. The 32-byte blake3 fingerprint is enough. + +## Versioning + +- Lands in the next minor zccache bump (currently 1.3.0 → 1.4.0). +- Breaking changes to the K/V API for one release while we shake out + layout under #205 traffic; this is documented in CHANGELOG. +- The redb row format carries `schema_version: u32`; we do NOT promise + forward compatibility within 1.x for the K/V table — opening a higher + version with a lower binary is a hard error, not silent corruption. + +## Coordination back to fbuild#205 + +When this lands and a release is cut, the fbuild side will: + +1. Bump `zccache` workspace dep to the released version in + `~/dev/fbuild3/Cargo.toml`. +2. Add `zccache-artifact = ""` as a Rust dep of + `crates/fbuild-library-select/`. +3. Use `KvStore::open_default()` + namespace `"library-selection"` + + the cache key composed in #205 Q9 + (blake3 of source hashes + canonical lib headers + search paths + + toolchain triple + framework version + `SCANNER_VERSION` + + `LDF_MODE_VERSION`). +4. Phase 4 integration tests (`#205 §4.1, §4.2`) exercise hit/miss, + key stability, and corruption fallback on the real crate. + +We post a status comment on FastLED/fbuild#205 with the released +zccache version, the bump PR link, and the integration test results.