Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 48 additions & 12 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,16 @@ build --local_resources=memory=57344

# Linux Sandbox Hardening
# --sandbox_tmpfs_path=/tmp : clean /tmp per action (isolation)
# --sandbox_add_mount_pair : ensures /dev/null is available
# NOTE: Do NOT use --sandbox_base=/dev/shm — Docker containers default
# to 64MB /dev/shm which is too small for linking large binaries.
# Stale sandbox state ("File exists" errors) is handled by cleaning
# .bazel/output_base/sandbox before builds (see dcodex-setup.sh).
# --sandbox_writable_path=/dev: makes /dev (including /dev/null) available
# as a read-write bind mount inside the sandbox. Bazel's test-setup.sh
# redirects to /dev/null and fails if it's read-only.
# NOTE: Do NOT also add --sandbox_add_mount_pair=/dev/null — it creates
# a separate read-only bind mount that overrides the writable /dev mount.
# NOTE: --sandbox_base CANNOT be under /tmp when --sandbox_tmpfs_path=/tmp
# is set — the tmpfs mount would wipe out the sandbox working directory.
# Stale sandbox state is purged by dcodex-setup.sh between suites.
build:linux --sandbox_tmpfs_path=/tmp
build:linux --sandbox_add_mount_pair=/dev/null
build:linux --sandbox_writable_path=/dev
build:linux --dynamic_mode=off
build:linux --linkopt=-Wl,--threads=16

Expand All @@ -68,6 +71,25 @@ build --strategy=CppCompile=sandboxed,standalone
build --strategy=CppLink=sandboxed,standalone
build --genrule_strategy=sandboxed,standalone

# Collision-free test sandboxing. Two independent safeguards:
#
# 1. No directory reuse — Bazel 7.x defaults --reuse_sandbox_directories=true
# which recycles sandbox dirs across actions. With high --runs_per_test
# (TSan uses 20) and concurrent --local_test_jobs, the async cleanup
# races against new sandbox setup → "Could not copy inputs (File exists)".
# Builds still benefit from reuse; only tests opt out.
#
# 2. Synchronous cleanup — the default async mode (4 threads) can fall behind
# during bursts of short-lived test actions, leaving stale inodes that trip
# up the next sandbox setup. Synchronous deletion guarantees a clean slate
# after every action.
#
# NOTE: Do NOT set --sandbox_base under /tmp here — it conflicts with
# --sandbox_tmpfs_path=/tmp (Bazel refuses to run when the sandbox working
# directory is below a tmpfs mount point).
test --noreuse_sandbox_directories
test --experimental_sandbox_async_tree_delete_idle_threads=0

# ------------------------------------------------------------
# 6. CACHING
# ------------------------------------------------------------
Expand Down Expand Up @@ -124,7 +146,11 @@ build:tsan --linkopt=-fsanitize=thread
build:tsan --copt=-DTHREAD_SANITIZER
test:tsan --action_env=TSAN_OPTIONS="history_size=7:halt_on_error=1:detect_deadlocks=1:second_deadlock_stack=1:strict_memcmp=1:report_atomic_races=1:force_seq_cst_atomics=1:exitcode=66:symbolize=1:print_suppressions=1"
test:tsan --runs_per_test=20
test:tsan --local_test_jobs=10
# Keep local_test_jobs low to prevent sandbox directory collisions.
# With --runs_per_test=20, Bazel creates 20 sandbox instances per target.
# At concurrency >4, sandbox cleanup can't keep up, causing
# "Could not copy inputs into sandbox (File exists)" errors.
test:tsan --local_test_jobs=4

# -- Code Coverage --
build:coverage --collect_code_coverage
Expand All @@ -134,16 +160,26 @@ build:coverage --combined_report=lcov
# -- MSan: MemorySanitizer --
# Usage: bazel test --config=msan //...
# Detects: reads of uninitialized memory.
# WARNING: MSan requires ALL linked libraries (including libc++) to be
# compiled with -fsanitize=memory. If you see false positives from
# std::string / std::vector, you must provide an MSan-instrumented
# libc++ via: build:msan --linkopt=-stdlib=libc++ (custom build).
# MSan requires -stdlib=libc++ because the MSan runtime has interceptors
# for libc++ but NOT for libstdc++. Without libc++, googletest itself
# triggers false positives before any DCodeX code runs.
# Requires: libc++-dev and libc++abi-dev (installed by dcodex-setup.sh).
#
# --spawn_strategy=standalone: libc++ headers (e.g. /usr/lib/llvm-19/include/c++/v1/)
# are not in Bazel's auto-detected cxx_builtin_include_directories (which are
# detected using libstdc++). The sandbox include validator rejects these as
# "absolute path inclusion" errors. Standalone bypasses the sandbox, which is
# appropriate since MSan inherently depends on system-installed libc++.
build:msan --config=sanitizer_common
build:msan --spawn_strategy=standalone
build:msan --copt=-fsanitize=memory
build:msan --copt=-fsanitize-memory-track-origins=2
build:msan --linkopt=-fsanitize=memory
build:msan --copt=-DMEMORY_SANITIZER
test:msan --action_env=MSAN_OPTIONS="halt_on_error=1:exitcode=77"
build:msan --copt=-stdlib=libc++
build:msan --linkopt=-stdlib=libc++
build:msan --linkopt=-lc++abi
test:msan --action_env=MSAN_OPTIONS="halt_on_error=1:exitcode=77:suppressions=.github/workflows/msan_suppressions.txt"

# -- ASan + UBSan: AddressSanitizer + UndefinedBehaviorSanitizer --
# Usage: bazel test --config=asan //...
Expand Down
82 changes: 55 additions & 27 deletions dcodex-setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,20 @@ version_gte() {
[[ "$(printf '%s\n%s' "$2" "$1" | sort -V | head -n1)" == "$2" ]]
}

# Remove stale sandbox working directories. Leftover sandbox state from
# a previous build/test (Ctrl+C, OOM kill, crash, or Bazel's own async
# cleanup not finishing in time) causes "Could not copy inputs into
# sandbox … (File exists)" on the next run. This is cheap (~instant)
# and only removes sandbox working dirs — disk cache & repo cache are
# untouched. Called once at startup AND before every `bazel test`
# invocation so no stale state ever leaks across sanitizer suites.
purge_sandbox_dirs() {
local sandbox_dir="${REPO_DIR}/.bazel/output_base/sandbox"
if [[ -d "$sandbox_dir" ]]; then
rm -rf "$sandbox_dir"
fi
}

# ─────────────────────────────────────────────────────────────────────────────
# STEP 1 — Pre-flight checks
# ─────────────────────────────────────────────────────────────────────────────
Expand Down Expand Up @@ -191,8 +205,10 @@ http://apt.llvm.org/${UBUNTU_CODENAME}/ llvm-toolchain-${UBUNTU_CODENAME}-${LLVM
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
"clang-${LLVM_VERSION}" \
"lld-${LLVM_VERSION}" \
"libc++-${LLVM_VERSION}-dev" \
"libc++abi-${LLVM_VERSION}-dev" \
2>/dev/null
ok "LLVM ${LLVM_VERSION} installed"
ok "LLVM ${LLVM_VERSION} installed (includes libc++ for MSan)"
fi

# ── Sanitizer runtime headers (needed for --config=asan/msan/tsan) ───────
Expand All @@ -206,6 +222,18 @@ http://apt.llvm.org/${UBUNTU_CODENAME}/ llvm-toolchain-${UBUNTU_CODENAME}-${LLVM
|| warn "libclang-rt-${LLVM_VERSION}-dev not available — sanitizer builds may fail"
ok "Sanitizer runtime headers installed"

# ── libc++ (required for MSan — see .bazelrc msan config) ────────────
# MSan needs -stdlib=libc++ because its runtime has interceptors for
# libc++ but not libstdc++. Without libc++, <cstdint> and other
# standard headers are missing and compilation fails.
info "Ensuring libc++ is installed (required for MSan)..."
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
"libc++-${LLVM_VERSION}-dev" \
"libc++abi-${LLVM_VERSION}-dev" \
2>/dev/null \
|| warn "libc++ packages not available — MSan builds will fail"
ok "libc++ installed"

# ── Symlinks ─────────────────────────────────────────────────────────────
info "Creating LLVM symlinks..."
ln -sf "/usr/bin/clang-${LLVM_VERSION}" /usr/bin/clang
Expand Down Expand Up @@ -309,16 +337,8 @@ else
ok "Skipping bazel clean (incremental build — disk cache preserved)"
fi

# Always purge stale sandbox directories. If a previous build was interrupted
# (Ctrl+C, OOM kill, crash), leftover files cause "File exists" errors on the
# next run. This is cheap (~instant) and only removes sandbox working dirs —
# the disk cache and repo cache are untouched.
if [[ -d "${REPO_DIR}/.bazel/output_base/sandbox" ]]; then
rm -rf "${REPO_DIR}/.bazel/output_base/sandbox"
ok "Purged stale sandbox directories"
else
ok "No stale sandbox directories to clean"
fi
purge_sandbox_dirs
ok "Sandbox directories clean"

timer

Expand Down Expand Up @@ -439,6 +459,10 @@ run_sanitizer_suite() {

info "Running ${config_name} tests: ${targets[*]}"

# Purge sandbox dirs from previous suite so no stale state leaks across
# sanitizer configurations (asan → tsan → msan).
purge_sandbox_dirs

# Timestamp file for dump_test_logs() to find only fresh logs.
touch /tmp/dcodex-test-ts-"$config_name"

Expand Down Expand Up @@ -474,23 +498,26 @@ if [[ "$MODE" == "test" ]]; then
run_sanitizer_suite asan "${ENGINE_TESTS[@]}" || TEST_STATUS_ASAN=$?

# ── MSan ──────────────────────────────────────────────────────────────
step "6b/7 MSan Tests (SKIPPED)"
warn "MSan requires ALL linked libraries (including system libstdc++) to be"
warn " compiled with -fsanitize=memory. The system libstdc++ is NOT instrumented,"
warn " causing false positives in googletest before any DCodeX code runs."
warn " → Matching CI decision: MSan is excluded from automated testing."
warn " → To run MSan, build a custom toolchain with instrumented libc++."
warn " → See: https://clang.llvm.org/docs/MemorySanitizer.html#handling-external-code"
info "MSan skipped (set RUN_MSAN=1 to force-run with suppressions)"

if [[ "${RUN_MSAN:-0}" == "1" ]]; then
warn "RUN_MSAN=1 — running MSan anyway (expect false positives)..."
# MSan is gated behind RUN_MSAN=1 because it requires ALL linked libraries
# (including libc++) to be compiled with -fsanitize=memory. The system
# libc++ installed via apt is NOT instrumented, producing false positives
# in googletest and abseil internals. To run MSan properly, build a custom
# LLVM toolchain with an MSan-instrumented libc++ (see:
# https://clang.llvm.org/docs/MemorySanitizer.html#handling-external-code).
RUN_MSAN="${RUN_MSAN:-0}"
MSAN_SKIPPED=0
step "6b/7 MSan Tests"
if [[ "$RUN_MSAN" == "1" ]]; then
# MSan targets exclude sandbox_test (spawns uninstrumented clang++/python3
# subprocesses which produce false positives) and tsan_checker (TSan-specific).
MSAN_TARGETS=(
"//src/engine:warm_worker_pool_test"
"//src/engine:dynamic_worker_coordinator_test"
"//src/engine:tsan_checker"
)
run_sanitizer_suite msan "${MSAN_TARGETS[@]}" || TEST_STATUS_MSAN=$?
else
MSAN_SKIPPED=1
warn "MSan tests SKIPPED (set RUN_MSAN=1 to enable — requires instrumented libc++)"
fi

# ── TSan ──────────────────────────────────────────────────────────────
Expand All @@ -508,6 +535,7 @@ if [[ "$MODE" == "test" ]]; then
# The sandbox_test forks clang++ which has high memory overhead under TSan.
if [[ $TEST_STATUS_TSAN -eq 0 ]]; then
info "Running sandbox_test under TSan (constrained: 1 job, 1 run)..."
purge_sandbox_dirs
touch /tmp/dcodex-test-ts-tsan-sandbox
set +e
bazel "${BAZEL_JVM_FLAGS[@]}" test \
Expand Down Expand Up @@ -537,18 +565,18 @@ if [[ "$MODE" == "test" ]]; then
echo ""
echo -e "${BOLD}${CYAN}━━━ Test Summary ━━━${NC}"
echo -e " ASan + UBSan: $(if [[ $TEST_STATUS_ASAN -eq 0 ]]; then echo -e "${GREEN}PASS${NC}"; else echo -e "${RED}FAIL (exit $TEST_STATUS_ASAN)${NC}"; fi)"
echo -e " MSan: $(if [[ "${RUN_MSAN:-0}" == "1" ]]; then if [[ $TEST_STATUS_MSAN -eq 0 ]]; then echo -e "${GREEN}PASS${NC}"; else echo -e "${RED}FAIL (exit $TEST_STATUS_MSAN)${NC}"; fi; else echo -e "${YELLOW}SKIPPED${NC}"; fi)"
echo -e " MSan: $(if [[ $MSAN_SKIPPED -eq 1 ]]; then echo -e "${YELLOW}SKIP${NC}"; elif [[ $TEST_STATUS_MSAN -eq 0 ]]; then echo -e "${GREEN}PASS${NC}"; else echo -e "${RED}FAIL (exit $TEST_STATUS_MSAN)${NC}"; fi)"
echo -e " TSan: $(if [[ $TEST_STATUS_TSAN -eq 0 ]]; then echo -e "${GREEN}PASS${NC}"; else echo -e "${RED}FAIL (exit $TEST_STATUS_TSAN)${NC}"; fi)"
echo -e " Duration: $(( TEST_END - TEST_START ))s"
echo -e " Logs: /tmp/dcodex-test-{asan,tsan}.log"
echo -e " Logs: /tmp/dcodex-test-{asan,msan,tsan}.log"
echo ""

if [[ $TEST_STATUS_ASAN -eq 0 && $TEST_STATUS_TSAN -eq 0 ]]; then
if [[ $TEST_STATUS_ASAN -eq 0 && ($TEST_STATUS_MSAN -eq 0 || $MSAN_SKIPPED -eq 1) && $TEST_STATUS_TSAN -eq 0 ]]; then
ok "All active test suites passed in $(( TEST_END - TEST_START ))s"
else
FAILED_SUITES=""
[[ $TEST_STATUS_ASAN -ne 0 ]] && FAILED_SUITES+="asan "
[[ $TEST_STATUS_MSAN -ne 0 ]] && FAILED_SUITES+="msan "
[[ $TEST_STATUS_MSAN -ne 0 && $MSAN_SKIPPED -eq 0 ]] && FAILED_SUITES+="msan "
[[ $TEST_STATUS_TSAN -ne 0 ]] && FAILED_SUITES+="tsan "
die "Tests FAILED: ${FAILED_SUITES}— see diagnostic output above and /tmp/dcodex-test-*.log"
fi
Expand Down
Loading