From 6bff9d099c22758b6578a19c06503a9dd1d0f6bd Mon Sep 17 00:00:00 2001 From: Sam-Si <13261099+Sam-Si@users.noreply.github.com> Date: Sat, 16 May 2026 19:45:27 +0530 Subject: [PATCH 1/7] fix: resolve Bazel sandbox file system and race condition errors by configuring writable /dev/null and disabling directory reuse for tests --- .bazelrc | 16 ++++++++++++++-- dcodex-setup.sh | 31 +++++++++++++++++++++---------- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/.bazelrc b/.bazelrc index b776f81..340b30a 100755 --- a/.bazelrc +++ b/.bazelrc @@ -45,14 +45,18 @@ build --local_resources=cpu=16 build --local_resources=memory=57344 # Linux Sandbox Hardening -# --sandbox_tmpfs_path=/tmp : clean /tmp per action (isolation) -# --sandbox_add_mount_pair : ensures /dev/null is available +# --sandbox_tmpfs_path=/tmp : clean /tmp per action (isolation) +# --sandbox_add_mount_pair : bind-mounts /dev/null into sandbox +# --sandbox_writable_path : makes /dev/null writable (Bazel's +# test-setup.sh redirects to /dev/null; without this the bind-mount +# is read-only and tests fail with "Read-only file system") # NOTE: Do NOT use --sandbox_base=/dev/shm — Docker containers default # to 64MB /dev/shm which is too small for linking large binaries. # Stale sandbox state ("File exists" errors) is handled by cleaning # .bazel/output_base/sandbox before builds (see dcodex-setup.sh). build:linux --sandbox_tmpfs_path=/tmp build:linux --sandbox_add_mount_pair=/dev/null +build:linux --sandbox_writable_path=/dev/null build:linux --dynamic_mode=off build:linux --linkopt=-Wl,--threads=16 @@ -68,6 +72,14 @@ build --strategy=CppCompile=sandboxed,standalone build --strategy=CppLink=sandboxed,standalone build --genrule_strategy=sandboxed,standalone +# Disable sandbox directory reuse for tests. Bazel 7.x defaults +# --experimental_reuse_sandbox_directories=true which recycles sandbox +# dirs across actions. With high --runs_per_test (TSan uses 20) and +# concurrent --local_test_jobs, the async cleanup races against new +# sandbox setup, producing "Could not copy inputs into sandbox (File +# exists)" failures. Builds still benefit from reuse; only tests opt out. +test --noexperimental_reuse_sandbox_directories + # ------------------------------------------------------------ # 6. CACHING # ------------------------------------------------------------ diff --git a/dcodex-setup.sh b/dcodex-setup.sh index fc60b5a..6ffa095 100755 --- a/dcodex-setup.sh +++ b/dcodex-setup.sh @@ -113,6 +113,20 @@ version_gte() { [[ "$(printf '%s\n%s' "$2" "$1" | sort -V | head -n1)" == "$2" ]] } +# Remove stale sandbox working directories. Leftover sandbox state from +# a previous build/test (Ctrl+C, OOM kill, crash, or Bazel's own async +# cleanup not finishing in time) causes "Could not copy inputs into +# sandbox … (File exists)" on the next run. This is cheap (~instant) +# and only removes sandbox working dirs — disk cache & repo cache are +# untouched. Called once at startup AND before every `bazel test` +# invocation so no stale state ever leaks across sanitizer suites. +purge_sandbox_dirs() { + local sandbox_dir="${REPO_DIR}/.bazel/output_base/sandbox" + if [[ -d "$sandbox_dir" ]]; then + rm -rf "$sandbox_dir" + fi +} + # ───────────────────────────────────────────────────────────────────────────── # STEP 1 — Pre-flight checks # ───────────────────────────────────────────────────────────────────────────── @@ -309,16 +323,8 @@ else ok "Skipping bazel clean (incremental build — disk cache preserved)" fi -# Always purge stale sandbox directories. If a previous build was interrupted -# (Ctrl+C, OOM kill, crash), leftover files cause "File exists" errors on the -# next run. This is cheap (~instant) and only removes sandbox working dirs — -# the disk cache and repo cache are untouched. -if [[ -d "${REPO_DIR}/.bazel/output_base/sandbox" ]]; then - rm -rf "${REPO_DIR}/.bazel/output_base/sandbox" - ok "Purged stale sandbox directories" -else - ok "No stale sandbox directories to clean" -fi +purge_sandbox_dirs +ok "Sandbox directories clean" timer @@ -439,6 +445,10 @@ run_sanitizer_suite() { info "Running ${config_name} tests: ${targets[*]}" + # Purge sandbox dirs from previous suite so no stale state leaks across + # sanitizer configurations (asan → tsan → msan). + purge_sandbox_dirs + # Timestamp file for dump_test_logs() to find only fresh logs. touch /tmp/dcodex-test-ts-"$config_name" @@ -508,6 +518,7 @@ if [[ "$MODE" == "test" ]]; then # The sandbox_test forks clang++ which has high memory overhead under TSan. if [[ $TEST_STATUS_TSAN -eq 0 ]]; then info "Running sandbox_test under TSan (constrained: 1 job, 1 run)..." + purge_sandbox_dirs touch /tmp/dcodex-test-ts-tsan-sandbox set +e bazel "${BAZEL_JVM_FLAGS[@]}" test \ From 7a1e4a3daa4e8222578a20d2528aeebce1b3ac57 Mon Sep 17 00:00:00 2001 From: Sam-Si <13261099+Sam-Si@users.noreply.github.com> Date: Sat, 16 May 2026 20:00:31 +0530 Subject: [PATCH 2/7] refactor: implement collision-free test sandboxing with isolated roots and synchronous cleanup --- .bazelrc | 34 +++++++++++++++++++++++----------- dcodex-setup.sh | 16 +++++++++++++--- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/.bazelrc b/.bazelrc index 340b30a..c6a16f4 100755 --- a/.bazelrc +++ b/.bazelrc @@ -50,10 +50,10 @@ build --local_resources=memory=57344 # --sandbox_writable_path : makes /dev/null writable (Bazel's # test-setup.sh redirects to /dev/null; without this the bind-mount # is read-only and tests fail with "Read-only file system") -# NOTE: Do NOT use --sandbox_base=/dev/shm — Docker containers default -# to 64MB /dev/shm which is too small for linking large binaries. -# Stale sandbox state ("File exists" errors) is handled by cleaning -# .bazel/output_base/sandbox before builds (see dcodex-setup.sh). +# NOTE: Build sandboxes stay under output_base (not /dev/shm — Docker +# defaults to 64MB which is too small for linking large binaries). +# Test sandboxes are redirected to /tmp/bazel-sandbox (see §5 below). +# Stale sandbox state is purged by dcodex-setup.sh between suites. build:linux --sandbox_tmpfs_path=/tmp build:linux --sandbox_add_mount_pair=/dev/null build:linux --sandbox_writable_path=/dev/null @@ -72,13 +72,25 @@ build --strategy=CppCompile=sandboxed,standalone build --strategy=CppLink=sandboxed,standalone build --genrule_strategy=sandboxed,standalone -# Disable sandbox directory reuse for tests. Bazel 7.x defaults -# --experimental_reuse_sandbox_directories=true which recycles sandbox -# dirs across actions. With high --runs_per_test (TSan uses 20) and -# concurrent --local_test_jobs, the async cleanup races against new -# sandbox setup, producing "Could not copy inputs into sandbox (File -# exists)" failures. Builds still benefit from reuse; only tests opt out. -test --noexperimental_reuse_sandbox_directories +# Collision-free test sandboxing. Three independent safeguards: +# +# 1. Separate sandbox root — test sandboxes live under /tmp/bazel-sandbox, +# completely isolated from build sandboxes (which stay under output_base). +# Eliminates cross-action path contention in Docker's overlay filesystem. +# +# 2. No directory reuse — Bazel 7.x defaults --reuse_sandbox_directories=true +# which recycles sandbox dirs across actions. With high --runs_per_test +# (TSan uses 20) and concurrent --local_test_jobs, the async cleanup +# races against new sandbox setup → "Could not copy inputs (File exists)". +# Builds still benefit from reuse; only tests opt out. +# +# 3. Synchronous cleanup — the default async mode (4 threads) can fall behind +# during bursts of short-lived test actions, leaving stale inodes that trip +# up the next sandbox setup. Synchronous deletion guarantees a clean slate +# after every action. +test:linux --sandbox_base=/tmp/bazel-sandbox +test --noreuse_sandbox_directories +test --experimental_sandbox_async_tree_delete_idle_threads=0 # ------------------------------------------------------------ # 6. CACHING diff --git a/dcodex-setup.sh b/dcodex-setup.sh index 6ffa095..f24ae7d 100755 --- a/dcodex-setup.sh +++ b/dcodex-setup.sh @@ -120,10 +120,20 @@ version_gte() { # and only removes sandbox working dirs — disk cache & repo cache are # untouched. Called once at startup AND before every `bazel test` # invocation so no stale state ever leaks across sanitizer suites. +# +# Two locations are cleaned: +# 1. output_base/sandbox — default sandbox root (used by build actions) +# 2. /tmp/bazel-sandbox — dedicated test sandbox root (set via +# --sandbox_base in .bazelrc) to keep test sandboxes isolated from +# build sandboxes. purge_sandbox_dirs() { - local sandbox_dir="${REPO_DIR}/.bazel/output_base/sandbox" - if [[ -d "$sandbox_dir" ]]; then - rm -rf "$sandbox_dir" + local build_sandbox="${REPO_DIR}/.bazel/output_base/sandbox" + local test_sandbox="/tmp/bazel-sandbox" + if [[ -d "$build_sandbox" ]]; then + rm -rf "$build_sandbox" + fi + if [[ -d "$test_sandbox" ]]; then + rm -rf "$test_sandbox" fi } From c96ec05540ed1688bb8ae653fb7a54be5a1fe25a Mon Sep 17 00:00:00 2001 From: Sam-Si <13261099+Sam-Si@users.noreply.github.com> Date: Sat, 16 May 2026 20:14:53 +0530 Subject: [PATCH 3/7] fix: create sandbox_base directory after purge to avoid 'No such file or directory' --- dcodex-setup.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/dcodex-setup.sh b/dcodex-setup.sh index f24ae7d..141eacc 100755 --- a/dcodex-setup.sh +++ b/dcodex-setup.sh @@ -135,6 +135,7 @@ purge_sandbox_dirs() { if [[ -d "$test_sandbox" ]]; then rm -rf "$test_sandbox" fi + mkdir -p "$test_sandbox" } # ───────────────────────────────────────────────────────────────────────────── From 23b14ba8af576cc2f0fb2c998730a84f3c5130ae Mon Sep 17 00:00:00 2001 From: Sam-Si <13261099+Sam-Si@users.noreply.github.com> Date: Sat, 16 May 2026 21:24:18 +0530 Subject: [PATCH 4/7] fix: remove sandbox_base under /tmp (conflicts with sandbox_tmpfs_path) and fix /dev writable scope - Remove test:linux --sandbox_base=/tmp/bazel-sandbox: Bazel's linux-sandbox refuses to run when the sandbox working directory is below a tmpfs mount point (--sandbox_tmpfs_path=/tmp mounts tmpfs at /tmp, wiping sandbox dirs). - Change --sandbox_writable_path=/dev/null to --sandbox_writable_path=/dev: writable_path only works on directories, not individual device nodes. - Simplify purge_sandbox_dirs() back to single output_base/sandbox location. --- .bazelrc | 33 ++++++++++++++++----------------- dcodex-setup.sh | 17 +++-------------- 2 files changed, 19 insertions(+), 31 deletions(-) diff --git a/.bazelrc b/.bazelrc index c6a16f4..8ce614d 100755 --- a/.bazelrc +++ b/.bazelrc @@ -45,18 +45,18 @@ build --local_resources=cpu=16 build --local_resources=memory=57344 # Linux Sandbox Hardening -# --sandbox_tmpfs_path=/tmp : clean /tmp per action (isolation) -# --sandbox_add_mount_pair : bind-mounts /dev/null into sandbox -# --sandbox_writable_path : makes /dev/null writable (Bazel's -# test-setup.sh redirects to /dev/null; without this the bind-mount -# is read-only and tests fail with "Read-only file system") -# NOTE: Build sandboxes stay under output_base (not /dev/shm — Docker -# defaults to 64MB which is too small for linking large binaries). -# Test sandboxes are redirected to /tmp/bazel-sandbox (see §5 below). +# --sandbox_tmpfs_path=/tmp : clean /tmp per action (isolation) +# --sandbox_add_mount_pair : bind-mounts /dev/null into sandbox +# --sandbox_writable_path : makes /dev writable so /dev/null is +# usable by Bazel's test-setup.sh (which redirects to /dev/null). +# NOTE: writable_path only works on directories, not device nodes, +# so we mark /dev rather than /dev/null. +# NOTE: --sandbox_base CANNOT be under /tmp when --sandbox_tmpfs_path=/tmp +# is set — the tmpfs mount would wipe out the sandbox working directory. # Stale sandbox state is purged by dcodex-setup.sh between suites. build:linux --sandbox_tmpfs_path=/tmp build:linux --sandbox_add_mount_pair=/dev/null -build:linux --sandbox_writable_path=/dev/null +build:linux --sandbox_writable_path=/dev build:linux --dynamic_mode=off build:linux --linkopt=-Wl,--threads=16 @@ -72,23 +72,22 @@ build --strategy=CppCompile=sandboxed,standalone build --strategy=CppLink=sandboxed,standalone build --genrule_strategy=sandboxed,standalone -# Collision-free test sandboxing. Three independent safeguards: +# Collision-free test sandboxing. Two independent safeguards: # -# 1. Separate sandbox root — test sandboxes live under /tmp/bazel-sandbox, -# completely isolated from build sandboxes (which stay under output_base). -# Eliminates cross-action path contention in Docker's overlay filesystem. -# -# 2. No directory reuse — Bazel 7.x defaults --reuse_sandbox_directories=true +# 1. No directory reuse — Bazel 7.x defaults --reuse_sandbox_directories=true # which recycles sandbox dirs across actions. With high --runs_per_test # (TSan uses 20) and concurrent --local_test_jobs, the async cleanup # races against new sandbox setup → "Could not copy inputs (File exists)". # Builds still benefit from reuse; only tests opt out. # -# 3. Synchronous cleanup — the default async mode (4 threads) can fall behind +# 2. Synchronous cleanup — the default async mode (4 threads) can fall behind # during bursts of short-lived test actions, leaving stale inodes that trip # up the next sandbox setup. Synchronous deletion guarantees a clean slate # after every action. -test:linux --sandbox_base=/tmp/bazel-sandbox +# +# NOTE: Do NOT set --sandbox_base under /tmp here — it conflicts with +# --sandbox_tmpfs_path=/tmp (Bazel refuses to run when the sandbox working +# directory is below a tmpfs mount point). test --noreuse_sandbox_directories test --experimental_sandbox_async_tree_delete_idle_threads=0 diff --git a/dcodex-setup.sh b/dcodex-setup.sh index 141eacc..6ffa095 100755 --- a/dcodex-setup.sh +++ b/dcodex-setup.sh @@ -120,22 +120,11 @@ version_gte() { # and only removes sandbox working dirs — disk cache & repo cache are # untouched. Called once at startup AND before every `bazel test` # invocation so no stale state ever leaks across sanitizer suites. -# -# Two locations are cleaned: -# 1. output_base/sandbox — default sandbox root (used by build actions) -# 2. /tmp/bazel-sandbox — dedicated test sandbox root (set via -# --sandbox_base in .bazelrc) to keep test sandboxes isolated from -# build sandboxes. purge_sandbox_dirs() { - local build_sandbox="${REPO_DIR}/.bazel/output_base/sandbox" - local test_sandbox="/tmp/bazel-sandbox" - if [[ -d "$build_sandbox" ]]; then - rm -rf "$build_sandbox" - fi - if [[ -d "$test_sandbox" ]]; then - rm -rf "$test_sandbox" + local sandbox_dir="${REPO_DIR}/.bazel/output_base/sandbox" + if [[ -d "$sandbox_dir" ]]; then + rm -rf "$sandbox_dir" fi - mkdir -p "$test_sandbox" } # ───────────────────────────────────────────────────────────────────────────── From fc4557997c9f4d30e7b43acff631aa3ceea76c40 Mon Sep 17 00:00:00 2001 From: Sam-Si <13261099+Sam-Si@users.noreply.github.com> Date: Sun, 17 May 2026 00:44:50 +0530 Subject: [PATCH 5/7] feat: enable MSan test suite with libc++ instrumentation support - Install libc++-dev and libc++abi-dev alongside clang/lld in setup - Configure MSan to use -stdlib=libc++ (MSan runtime has interceptors for libc++ but not libstdc++, eliminating false positives) - Enable MSan tests by default (no longer gated behind RUN_MSAN=1) - MSan targets: warm_worker_pool_test, dynamic_worker_coordinator_test (sandbox_test excluded: spawns uninstrumented subprocesses; tsan_checker excluded: TSan-specific canary) - Update test summary to include MSan in pass/fail reporting --- .bazelrc | 11 +++++++---- dcodex-setup.sh | 36 ++++++++++++++---------------------- 2 files changed, 21 insertions(+), 26 deletions(-) diff --git a/.bazelrc b/.bazelrc index 8ce614d..c87ab62 100755 --- a/.bazelrc +++ b/.bazelrc @@ -157,15 +157,18 @@ build:coverage --combined_report=lcov # -- MSan: MemorySanitizer -- # Usage: bazel test --config=msan //... # Detects: reads of uninitialized memory. -# WARNING: MSan requires ALL linked libraries (including libc++) to be -# compiled with -fsanitize=memory. If you see false positives from -# std::string / std::vector, you must provide an MSan-instrumented -# libc++ via: build:msan --linkopt=-stdlib=libc++ (custom build). +# MSan requires -stdlib=libc++ because the MSan runtime has interceptors +# for libc++ but NOT for libstdc++. Without libc++, googletest itself +# triggers false positives before any DCodeX code runs. +# Requires: libc++-dev and libc++abi-dev (installed by dcodex-setup.sh). build:msan --config=sanitizer_common build:msan --copt=-fsanitize=memory build:msan --copt=-fsanitize-memory-track-origins=2 build:msan --linkopt=-fsanitize=memory build:msan --copt=-DMEMORY_SANITIZER +build:msan --copt=-stdlib=libc++ +build:msan --linkopt=-stdlib=libc++ +build:msan --linkopt=-lc++abi test:msan --action_env=MSAN_OPTIONS="halt_on_error=1:exitcode=77" # -- ASan + UBSan: AddressSanitizer + UndefinedBehaviorSanitizer -- diff --git a/dcodex-setup.sh b/dcodex-setup.sh index 6ffa095..8a447e7 100755 --- a/dcodex-setup.sh +++ b/dcodex-setup.sh @@ -205,8 +205,10 @@ http://apt.llvm.org/${UBUNTU_CODENAME}/ llvm-toolchain-${UBUNTU_CODENAME}-${LLVM DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ "clang-${LLVM_VERSION}" \ "lld-${LLVM_VERSION}" \ + "libc++-${LLVM_VERSION}-dev" \ + "libc++abi-${LLVM_VERSION}-dev" \ 2>/dev/null - ok "LLVM ${LLVM_VERSION} installed" + ok "LLVM ${LLVM_VERSION} installed (includes libc++ for MSan)" fi # ── Sanitizer runtime headers (needed for --config=asan/msan/tsan) ─────── @@ -484,24 +486,14 @@ if [[ "$MODE" == "test" ]]; then run_sanitizer_suite asan "${ENGINE_TESTS[@]}" || TEST_STATUS_ASAN=$? # ── MSan ────────────────────────────────────────────────────────────── - step "6b/7 MSan Tests (SKIPPED)" - warn "MSan requires ALL linked libraries (including system libstdc++) to be" - warn " compiled with -fsanitize=memory. The system libstdc++ is NOT instrumented," - warn " causing false positives in googletest before any DCodeX code runs." - warn " → Matching CI decision: MSan is excluded from automated testing." - warn " → To run MSan, build a custom toolchain with instrumented libc++." - warn " → See: https://clang.llvm.org/docs/MemorySanitizer.html#handling-external-code" - info "MSan skipped (set RUN_MSAN=1 to force-run with suppressions)" - - if [[ "${RUN_MSAN:-0}" == "1" ]]; then - warn "RUN_MSAN=1 — running MSan anyway (expect false positives)..." - MSAN_TARGETS=( - "//src/engine:warm_worker_pool_test" - "//src/engine:dynamic_worker_coordinator_test" - "//src/engine:tsan_checker" - ) - run_sanitizer_suite msan "${MSAN_TARGETS[@]}" || TEST_STATUS_MSAN=$? - fi + step "6b/7 MSan Tests" + # MSan targets exclude sandbox_test (spawns uninstrumented clang++/python3 + # subprocesses which produce false positives) and tsan_checker (TSan-specific). + MSAN_TARGETS=( + "//src/engine:warm_worker_pool_test" + "//src/engine:dynamic_worker_coordinator_test" + ) + run_sanitizer_suite msan "${MSAN_TARGETS[@]}" || TEST_STATUS_MSAN=$? # ── TSan ────────────────────────────────────────────────────────────── step "6c/7 TSan Tests" @@ -548,13 +540,13 @@ if [[ "$MODE" == "test" ]]; then echo "" echo -e "${BOLD}${CYAN}━━━ Test Summary ━━━${NC}" echo -e " ASan + UBSan: $(if [[ $TEST_STATUS_ASAN -eq 0 ]]; then echo -e "${GREEN}PASS${NC}"; else echo -e "${RED}FAIL (exit $TEST_STATUS_ASAN)${NC}"; fi)" - echo -e " MSan: $(if [[ "${RUN_MSAN:-0}" == "1" ]]; then if [[ $TEST_STATUS_MSAN -eq 0 ]]; then echo -e "${GREEN}PASS${NC}"; else echo -e "${RED}FAIL (exit $TEST_STATUS_MSAN)${NC}"; fi; else echo -e "${YELLOW}SKIPPED${NC}"; fi)" + echo -e " MSan: $(if [[ $TEST_STATUS_MSAN -eq 0 ]]; then echo -e "${GREEN}PASS${NC}"; else echo -e "${RED}FAIL (exit $TEST_STATUS_MSAN)${NC}"; fi)" echo -e " TSan: $(if [[ $TEST_STATUS_TSAN -eq 0 ]]; then echo -e "${GREEN}PASS${NC}"; else echo -e "${RED}FAIL (exit $TEST_STATUS_TSAN)${NC}"; fi)" echo -e " Duration: $(( TEST_END - TEST_START ))s" - echo -e " Logs: /tmp/dcodex-test-{asan,tsan}.log" + echo -e " Logs: /tmp/dcodex-test-{asan,msan,tsan}.log" echo "" - if [[ $TEST_STATUS_ASAN -eq 0 && $TEST_STATUS_TSAN -eq 0 ]]; then + if [[ $TEST_STATUS_ASAN -eq 0 && $TEST_STATUS_MSAN -eq 0 && $TEST_STATUS_TSAN -eq 0 ]]; then ok "All active test suites passed in $(( TEST_END - TEST_START ))s" else FAILED_SUITES="" From c0f60b966f4d50b4f92db722166f497f795ce62d Mon Sep 17 00:00:00 2001 From: Sam-Si <13261099+Sam-Si@users.noreply.github.com> Date: Sun, 17 May 2026 00:51:43 +0530 Subject: [PATCH 6/7] fix: install libc++ unconditionally (not gated behind clang-exists check) The LLVM install block is skipped when clang is already present, so libc++ was never installed on subsequent runs. Add a dedicated install step (like the sanitizer runtime headers) that always runs. --- dcodex-setup.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/dcodex-setup.sh b/dcodex-setup.sh index 8a447e7..9556ebb 100755 --- a/dcodex-setup.sh +++ b/dcodex-setup.sh @@ -222,6 +222,18 @@ http://apt.llvm.org/${UBUNTU_CODENAME}/ llvm-toolchain-${UBUNTU_CODENAME}-${LLVM || warn "libclang-rt-${LLVM_VERSION}-dev not available — sanitizer builds may fail" ok "Sanitizer runtime headers installed" + # ── libc++ (required for MSan — see .bazelrc msan config) ──────────── + # MSan needs -stdlib=libc++ because its runtime has interceptors for + # libc++ but not libstdc++. Without libc++, and other + # standard headers are missing and compilation fails. + info "Ensuring libc++ is installed (required for MSan)..." + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + "libc++-${LLVM_VERSION}-dev" \ + "libc++abi-${LLVM_VERSION}-dev" \ + 2>/dev/null \ + || warn "libc++ packages not available — MSan builds will fail" + ok "libc++ installed" + # ── Symlinks ───────────────────────────────────────────────────────────── info "Creating LLVM symlinks..." ln -sf "/usr/bin/clang-${LLVM_VERSION}" /usr/bin/clang From 8526e52463fab8167f14515724bbd882700e68c0 Mon Sep 17 00:00:00 2001 From: Sam-Si <13261099+Sam-Si@users.noreply.github.com> Date: Wed, 27 May 2026 07:57:35 +0530 Subject: [PATCH 7/7] fix: resolve sandbox mount, MSan build, and TSan collision failures Three infrastructure fixes for dcodex-setup.sh --test: 1. /dev/null read-only: Remove --sandbox_add_mount_pair=/dev/null which created a separate read-only bind mount overriding the writable /dev directory. --sandbox_writable_path=/dev alone provides /dev/null as a read-write bind mount. 2. MSan 'absolute path inclusion' build error: Add --spawn_strategy=standalone to MSan config. -stdlib=libc++ pulls headers from /usr/lib/llvm-19/ include/c++/v1/ which are not in Bazel's auto-detected cxx_builtin_include_directories (detected with libstdc++). Standalone bypasses the sandbox include validator, which is appropriate since MSan inherently depends on system-installed libc++. Also gate MSan tests behind RUN_MSAN=1 in dcodex-setup.sh since system libc++ is not MSan-instrumented (false positives in googletest/abseil internals). 3. TSan sandbox collisions: Lower --local_test_jobs from 10 to 4. With --runs_per_test=20, sandbox cleanup cannot keep pace at higher concurrency, causing 'Could not copy inputs into sandbox (File exists)' errors. --- .bazelrc | 26 ++++++++++++++++++-------- dcodex-setup.sh | 33 +++++++++++++++++++++++---------- 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/.bazelrc b/.bazelrc index c87ab62..fcea679 100755 --- a/.bazelrc +++ b/.bazelrc @@ -46,16 +46,15 @@ build --local_resources=memory=57344 # Linux Sandbox Hardening # --sandbox_tmpfs_path=/tmp : clean /tmp per action (isolation) -# --sandbox_add_mount_pair : bind-mounts /dev/null into sandbox -# --sandbox_writable_path : makes /dev writable so /dev/null is -# usable by Bazel's test-setup.sh (which redirects to /dev/null). -# NOTE: writable_path only works on directories, not device nodes, -# so we mark /dev rather than /dev/null. +# --sandbox_writable_path=/dev: makes /dev (including /dev/null) available +# as a read-write bind mount inside the sandbox. Bazel's test-setup.sh +# redirects to /dev/null and fails if it's read-only. +# NOTE: Do NOT also add --sandbox_add_mount_pair=/dev/null — it creates +# a separate read-only bind mount that overrides the writable /dev mount. # NOTE: --sandbox_base CANNOT be under /tmp when --sandbox_tmpfs_path=/tmp # is set — the tmpfs mount would wipe out the sandbox working directory. # Stale sandbox state is purged by dcodex-setup.sh between suites. build:linux --sandbox_tmpfs_path=/tmp -build:linux --sandbox_add_mount_pair=/dev/null build:linux --sandbox_writable_path=/dev build:linux --dynamic_mode=off build:linux --linkopt=-Wl,--threads=16 @@ -147,7 +146,11 @@ build:tsan --linkopt=-fsanitize=thread build:tsan --copt=-DTHREAD_SANITIZER test:tsan --action_env=TSAN_OPTIONS="history_size=7:halt_on_error=1:detect_deadlocks=1:second_deadlock_stack=1:strict_memcmp=1:report_atomic_races=1:force_seq_cst_atomics=1:exitcode=66:symbolize=1:print_suppressions=1" test:tsan --runs_per_test=20 -test:tsan --local_test_jobs=10 +# Keep local_test_jobs low to prevent sandbox directory collisions. +# With --runs_per_test=20, Bazel creates 20 sandbox instances per target. +# At concurrency >4, sandbox cleanup can't keep up, causing +# "Could not copy inputs into sandbox (File exists)" errors. +test:tsan --local_test_jobs=4 # -- Code Coverage -- build:coverage --collect_code_coverage @@ -161,7 +164,14 @@ build:coverage --combined_report=lcov # for libc++ but NOT for libstdc++. Without libc++, googletest itself # triggers false positives before any DCodeX code runs. # Requires: libc++-dev and libc++abi-dev (installed by dcodex-setup.sh). +# +# --spawn_strategy=standalone: libc++ headers (e.g. /usr/lib/llvm-19/include/c++/v1/) +# are not in Bazel's auto-detected cxx_builtin_include_directories (which are +# detected using libstdc++). The sandbox include validator rejects these as +# "absolute path inclusion" errors. Standalone bypasses the sandbox, which is +# appropriate since MSan inherently depends on system-installed libc++. build:msan --config=sanitizer_common +build:msan --spawn_strategy=standalone build:msan --copt=-fsanitize=memory build:msan --copt=-fsanitize-memory-track-origins=2 build:msan --linkopt=-fsanitize=memory @@ -169,7 +179,7 @@ build:msan --copt=-DMEMORY_SANITIZER build:msan --copt=-stdlib=libc++ build:msan --linkopt=-stdlib=libc++ build:msan --linkopt=-lc++abi -test:msan --action_env=MSAN_OPTIONS="halt_on_error=1:exitcode=77" +test:msan --action_env=MSAN_OPTIONS="halt_on_error=1:exitcode=77:suppressions=.github/workflows/msan_suppressions.txt" # -- ASan + UBSan: AddressSanitizer + UndefinedBehaviorSanitizer -- # Usage: bazel test --config=asan //... diff --git a/dcodex-setup.sh b/dcodex-setup.sh index 9556ebb..d48ae63 100755 --- a/dcodex-setup.sh +++ b/dcodex-setup.sh @@ -498,14 +498,27 @@ if [[ "$MODE" == "test" ]]; then run_sanitizer_suite asan "${ENGINE_TESTS[@]}" || TEST_STATUS_ASAN=$? # ── MSan ────────────────────────────────────────────────────────────── + # MSan is gated behind RUN_MSAN=1 because it requires ALL linked libraries + # (including libc++) to be compiled with -fsanitize=memory. The system + # libc++ installed via apt is NOT instrumented, producing false positives + # in googletest and abseil internals. To run MSan properly, build a custom + # LLVM toolchain with an MSan-instrumented libc++ (see: + # https://clang.llvm.org/docs/MemorySanitizer.html#handling-external-code). + RUN_MSAN="${RUN_MSAN:-0}" + MSAN_SKIPPED=0 step "6b/7 MSan Tests" - # MSan targets exclude sandbox_test (spawns uninstrumented clang++/python3 - # subprocesses which produce false positives) and tsan_checker (TSan-specific). - MSAN_TARGETS=( - "//src/engine:warm_worker_pool_test" - "//src/engine:dynamic_worker_coordinator_test" - ) - run_sanitizer_suite msan "${MSAN_TARGETS[@]}" || TEST_STATUS_MSAN=$? + if [[ "$RUN_MSAN" == "1" ]]; then + # MSan targets exclude sandbox_test (spawns uninstrumented clang++/python3 + # subprocesses which produce false positives) and tsan_checker (TSan-specific). + MSAN_TARGETS=( + "//src/engine:warm_worker_pool_test" + "//src/engine:dynamic_worker_coordinator_test" + ) + run_sanitizer_suite msan "${MSAN_TARGETS[@]}" || TEST_STATUS_MSAN=$? + else + MSAN_SKIPPED=1 + warn "MSan tests SKIPPED (set RUN_MSAN=1 to enable — requires instrumented libc++)" + fi # ── TSan ────────────────────────────────────────────────────────────── step "6c/7 TSan Tests" @@ -552,18 +565,18 @@ if [[ "$MODE" == "test" ]]; then echo "" echo -e "${BOLD}${CYAN}━━━ Test Summary ━━━${NC}" echo -e " ASan + UBSan: $(if [[ $TEST_STATUS_ASAN -eq 0 ]]; then echo -e "${GREEN}PASS${NC}"; else echo -e "${RED}FAIL (exit $TEST_STATUS_ASAN)${NC}"; fi)" - echo -e " MSan: $(if [[ $TEST_STATUS_MSAN -eq 0 ]]; then echo -e "${GREEN}PASS${NC}"; else echo -e "${RED}FAIL (exit $TEST_STATUS_MSAN)${NC}"; fi)" + echo -e " MSan: $(if [[ $MSAN_SKIPPED -eq 1 ]]; then echo -e "${YELLOW}SKIP${NC}"; elif [[ $TEST_STATUS_MSAN -eq 0 ]]; then echo -e "${GREEN}PASS${NC}"; else echo -e "${RED}FAIL (exit $TEST_STATUS_MSAN)${NC}"; fi)" echo -e " TSan: $(if [[ $TEST_STATUS_TSAN -eq 0 ]]; then echo -e "${GREEN}PASS${NC}"; else echo -e "${RED}FAIL (exit $TEST_STATUS_TSAN)${NC}"; fi)" echo -e " Duration: $(( TEST_END - TEST_START ))s" echo -e " Logs: /tmp/dcodex-test-{asan,msan,tsan}.log" echo "" - if [[ $TEST_STATUS_ASAN -eq 0 && $TEST_STATUS_MSAN -eq 0 && $TEST_STATUS_TSAN -eq 0 ]]; then + if [[ $TEST_STATUS_ASAN -eq 0 && ($TEST_STATUS_MSAN -eq 0 || $MSAN_SKIPPED -eq 1) && $TEST_STATUS_TSAN -eq 0 ]]; then ok "All active test suites passed in $(( TEST_END - TEST_START ))s" else FAILED_SUITES="" [[ $TEST_STATUS_ASAN -ne 0 ]] && FAILED_SUITES+="asan " - [[ $TEST_STATUS_MSAN -ne 0 ]] && FAILED_SUITES+="msan " + [[ $TEST_STATUS_MSAN -ne 0 && $MSAN_SKIPPED -eq 0 ]] && FAILED_SUITES+="msan " [[ $TEST_STATUS_TSAN -ne 0 ]] && FAILED_SUITES+="tsan " die "Tests FAILED: ${FAILED_SUITES}— see diagnostic output above and /tmp/dcodex-test-*.log" fi