From b4c649cf6efe8d57debe8c124de3ac86a3e39b8c Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Tue, 23 Dec 2025 08:58:04 -0500 Subject: [PATCH 01/23] Proper handling for windows mingw flags --- rust/platform/triple_mappings.bzl | 14 +- rust/private/repository_utils.bzl | 2 +- rust/private/rustc.bzl | 51 +++-- test/unit/windows_lib_name/BUILD.bazel | 3 + .../windows_lib_name_test.bzl | 185 ++++++++++++++++++ test/unit/windows_stdlib/BUILD.bazel | 3 + .../windows_stdlib/windows_stdlib_test.bzl | 122 ++++++++++++ 7 files changed, 350 insertions(+), 30 deletions(-) create mode 100644 test/unit/windows_lib_name/BUILD.bazel create mode 100644 test/unit/windows_lib_name/windows_lib_name_test.bzl create mode 100644 test/unit/windows_stdlib/BUILD.bazel create mode 100644 test/unit/windows_stdlib/windows_stdlib_test.bzl diff --git a/rust/platform/triple_mappings.bzl b/rust/platform/triple_mappings.bzl index e213cb1f82..46c928a429 100644 --- a/rust/platform/triple_mappings.bzl +++ b/rust/platform/triple_mappings.bzl @@ -284,7 +284,12 @@ _SYSTEM_TO_STDLIB_LINKFLAGS = { "wasi": [], "wasip1": [], "wasip2": [], - "windows": ["advapi32.lib", "ws2_32.lib", "userenv.lib", "Bcrypt.lib"], + "windows": { + # see https://github.com/rust-lang/rust/blob/c4aa646f15e40bd3e64ddb5017b7b89b3646ac99/src/tools/run-make-support/src/external_deps/c_cxx_compiler/extras.rs#L14-L23 + "gnu": ["-lws2_32", "-luserenv", "-lbcrypt", "-lntdll", "-lsynchronization"], + "gnullvm": ["-lws2_32", "-luserenv", "-lbcrypt", "-lntdll", "-lsynchronization"], + "msvc": ["advapi32.lib", "ws2_32.lib", "userenv.lib", "Bcrypt.lib"], + }, } def cpu_arch_to_constraints(cpu_arch, *, system = None, abi = None): @@ -410,8 +415,11 @@ def system_to_staticlib_ext(system): def system_to_binary_ext(system): return _SYSTEM_TO_BINARY_EXT[system] -def system_to_stdlib_linkflags(system): - return _SYSTEM_TO_STDLIB_LINKFLAGS[system] +def system_to_stdlib_linkflags(target_triple): + val = _SYSTEM_TO_STDLIB_LINKFLAGS[target_triple.system] + if type(val) == "list": + return val + return val[target_triple.abi] def triple_to_constraint_set(target_triple): """Returns a set of constraints for a given platform triple diff --git a/rust/private/repository_utils.bzl b/rust/private/repository_utils.bzl index 67ce739e6a..4f3b89377a 100644 --- a/rust/private/repository_utils.bzl +++ b/rust/private/repository_utils.bzl @@ -417,7 +417,7 @@ def BUILD_for_rust_toolchain( str: A rendered template of a `rust_toolchain` declaration """ if stdlib_linkflags == None: - stdlib_linkflags = ", ".join(['"%s"' % x for x in system_to_stdlib_linkflags(target_triple.system)]) + stdlib_linkflags = ", ".join(['"%s"' % x for x in system_to_stdlib_linkflags(target_triple)]) rustfmt_label = None if include_rustfmt: diff --git a/rust/private/rustc.bzl b/rust/private/rustc.bzl index 776dce73b4..cf88ec8147 100644 --- a/rust/private/rustc.bzl +++ b/rust/private/rustc.bzl @@ -560,9 +560,9 @@ def _symlink_for_ambiguous_lib(actions, toolchain, crate_info, lib): # Take the absolute value of hash() since it could be negative. path_hash = abs(hash(lib.path)) - lib_name = get_lib_name_for_windows(lib) if toolchain.target_os.startswith("windows") else get_lib_name_default(lib) + lib_name = get_lib_name_for_windows(lib) if toolchain.target_abi == "msvc" else get_lib_name_default(lib) - if toolchain.target_os.startswith("windows"): + if toolchain.target_abi == "msvc": prefix = "" extension = ".lib" elif lib_name.endswith(".pic"): @@ -1496,7 +1496,7 @@ def rustc_compile_action( pdb_file = None dsym_folder = None if crate_info.type in ("cdylib", "bin") and not experimental_use_cc_common_link: - if toolchain.target_os == "windows" and compilation_mode.strip_level == "none": + if toolchain.target_abi == "msvc" and compilation_mode.strip_level == "none": pdb_file = ctx.actions.declare_file(crate_info.output.basename[:-len(crate_info.output.extension)] + "pdb", sibling = crate_info.output) action_outputs.append(pdb_file) elif toolchain.target_os in ["macos", "darwin"]: @@ -2195,7 +2195,7 @@ def _get_crate_dirname(crate): """ return crate.output.dirname -def _portable_link_flags(lib, use_pic, ambiguous_libs, get_lib_name, for_windows = False, for_darwin = False, flavor_msvc = False): +def _portable_link_flags(lib, use_pic, ambiguous_libs, get_lib_name, for_darwin = False, flavor_msvc = False): artifact = get_preferred_artifact(lib, use_pic) if ambiguous_libs and artifact.path in ambiguous_libs: artifact = ambiguous_libs[artifact.path] @@ -2235,17 +2235,11 @@ def _portable_link_flags(lib, use_pic, ambiguous_libs, get_lib_name, for_windows ): return [] if for_darwin else ["-lstatic=%s" % get_lib_name(artifact)] - if for_windows: - if flavor_msvc: - return [ - "-lstatic=%s" % get_lib_name(artifact), - "-Clink-arg={}".format(artifact.basename), - ] - else: - return [ - "-lstatic=%s" % get_lib_name(artifact), - "-Clink-arg=-l{}".format(artifact.basename), - ] + if flavor_msvc: + return [ + "-lstatic=%s" % get_lib_name(artifact), + "-Clink-arg={}".format(artifact.basename), + ] else: return [ "-lstatic=%s" % get_lib_name(artifact), @@ -2276,7 +2270,8 @@ def _make_link_flags_windows(make_link_flags_args, flavor_msvc, use_direct_drive ("-Clink-arg=%s--no-whole-archive" % prefix), ]) elif include_link_flags: - ret.extend(_portable_link_flags(lib, use_pic, ambiguous_libs, get_lib_name_for_windows, for_windows = True, flavor_msvc = flavor_msvc)) + get_lib_name = get_lib_name_for_windows if flavor_msvc else get_lib_name_default + ret.extend(_portable_link_flags(lib, use_pic, ambiguous_libs, get_lib_name, flavor_msvc = flavor_msvc)) _add_user_link_flags(ret, linker_input) return ret @@ -2356,19 +2351,19 @@ def _get_make_link_flag_funcs(target_os, target_abi, use_direct_link_driver): - callable: The function for producing link args. - callable: The function for formatting link library names. """ + + get_lib_name = get_lib_name_default + if target_os == "windows": - make_link_flags_windows_msvc = _make_link_flags_windows_msvc_direct if use_direct_link_driver else _make_link_flags_windows_msvc_indirect - make_link_flags_windows_gnu = _make_link_flags_windows_gnu_direct if use_direct_link_driver else _make_link_flags_windows_gnu_indirect - make_link_flags = make_link_flags_windows_msvc if target_abi == "msvc" else make_link_flags_windows_gnu - get_lib_name = get_lib_name_for_windows + if target_abi == "msvc": + make_link_flags = _make_link_flags_windows_msvc_direct if use_direct_link_driver else _make_link_flags_windows_msvc_indirect + get_lib_name = get_lib_name_for_windows + else: + make_link_flags = _make_link_flags_windows_gnu_direct if use_direct_link_driver else _make_link_flags_windows_gnu_indirect elif target_os.startswith(("mac", "darwin", "ios")): - make_link_flags_darwin = _make_link_flags_darwin_direct if use_direct_link_driver else _make_link_flags_darwin_indirect - make_link_flags = make_link_flags_darwin - get_lib_name = get_lib_name_default + make_link_flags = _make_link_flags_darwin_direct if use_direct_link_driver else _make_link_flags_darwin_indirect else: - make_link_flags_default = _make_link_flags_default_direct if use_direct_link_driver else _make_link_flags_default_indirect - make_link_flags = make_link_flags_default - get_lib_name = get_lib_name_default + make_link_flags = _make_link_flags_default_direct if use_direct_link_driver else _make_link_flags_default_indirect return (make_link_flags, get_lib_name) @@ -2701,3 +2696,7 @@ no_std = rule( }, implementation = _no_std_impl, ) + +# Test-only exports for private helpers. +portable_link_flags_for_testing = _portable_link_flags +symlink_for_ambiguous_lib_for_testing = _symlink_for_ambiguous_lib diff --git a/test/unit/windows_lib_name/BUILD.bazel b/test/unit/windows_lib_name/BUILD.bazel new file mode 100644 index 0000000000..e2a5113ec1 --- /dev/null +++ b/test/unit/windows_lib_name/BUILD.bazel @@ -0,0 +1,3 @@ +load(":windows_lib_name_test.bzl", "windows_lib_name_test_suite") + +windows_lib_name_test_suite(name = "windows_lib_name_test_suite") diff --git a/test/unit/windows_lib_name/windows_lib_name_test.bzl b/test/unit/windows_lib_name/windows_lib_name_test.bzl new file mode 100644 index 0000000000..1749e16700 --- /dev/null +++ b/test/unit/windows_lib_name/windows_lib_name_test.bzl @@ -0,0 +1,185 @@ +"""Analysistests for Windows-specific library naming and link flags.""" + +load("@bazel_skylib//lib:unittest.bzl", "analysistest", "asserts") + +# buildifier: disable=bzl-visibility +load("//rust/private:rustc.bzl", "portable_link_flags_for_testing", "symlink_for_ambiguous_lib_for_testing") + +# buildifier: disable=bzl-visibility +load("//rust/private:utils.bzl", "get_lib_name_default", "get_lib_name_for_windows") + +# buildifier: disable=provider-params +LinkFlagsInfo = provider(fields = {"flags": "List[str]"}) + +# buildifier: disable=provider-params +SymlinkInfo = provider(fields = {"symlink": "File"}) + +def _portable_link_flags_probe_impl(ctx): + lib_artifact = ctx.actions.declare_file(ctx.attr.lib_basename) + ctx.actions.write(lib_artifact, "", is_executable = False) + library_to_link = struct( + static_library = lib_artifact, + pic_static_library = None, + dynamic_library = None, + interface_library = None, + alwayslink = False, + ) + + get_lib_name = get_lib_name_for_windows if ctx.attr.flavor_msvc else get_lib_name_default + flags = portable_link_flags_for_testing( + lib = library_to_link, + use_pic = False, + ambiguous_libs = {}, + get_lib_name = get_lib_name, + for_windows = True, + flavor_msvc = ctx.attr.flavor_msvc, + ) + + return [ + DefaultInfo(files = depset([])), + LinkFlagsInfo(flags = flags), + ] + +portable_link_flags_probe = rule( + implementation = _portable_link_flags_probe_impl, + attrs = { + "flavor_msvc": attr.bool(default = False), + "lib_basename": attr.string(mandatory = True), + }, +) + +def _symlink_probe_impl(ctx): + lib_artifact = ctx.actions.declare_file(ctx.attr.lib_basename) + ctx.actions.write(lib_artifact, "", is_executable = False) + crate_output = ctx.actions.declare_file("crate.rlib") + ctx.actions.write(crate_output, "", is_executable = False) + symlink = symlink_for_ambiguous_lib_for_testing( + ctx.actions, + toolchain = struct(target_abi = ctx.attr.target_abi), + crate_info = struct(output = crate_output), + lib = lib_artifact, + ) + + return [ + SymlinkInfo(symlink = symlink), + DefaultInfo(files = depset([symlink])), + ] + +symlink_probe = rule( + implementation = _symlink_probe_impl, + attrs = { + "lib_basename": attr.string(mandatory = True), + "target_abi": attr.string(mandatory = True), + }, +) + +def _portable_link_flags_windows_gnu_test_impl(ctx): + env = analysistest.begin(ctx) + flags = analysistest.target_under_test(env)[LinkFlagsInfo].flags + + asserts.equals( + env, + ["-lstatic=foo.dll", "-Clink-arg=-lfoo.dll"], + flags, + ) + return analysistest.end(env) + +portable_link_flags_windows_gnu_test = analysistest.make( + _portable_link_flags_windows_gnu_test_impl, +) + +def _portable_link_flags_windows_msvc_test_impl(ctx): + env = analysistest.begin(ctx) + flags = analysistest.target_under_test(env)[LinkFlagsInfo].flags + + asserts.equals( + env, + ["-lstatic=libfoo.dll", "-Clink-arg=libfoo.dll.lib"], + flags, + ) + return analysistest.end(env) + +portable_link_flags_windows_msvc_test = analysistest.make( + _portable_link_flags_windows_msvc_test_impl, +) + +def _symlink_name_windows_gnu_test_impl(ctx): + env = analysistest.begin(ctx) + symlink = analysistest.target_under_test(env)[SymlinkInfo].symlink + + asserts.true(env, symlink.basename.startswith("libfoo.dll-")) + asserts.true(env, symlink.basename.endswith(".a")) + asserts.false(env, symlink.basename.startswith("liblib")) + + return analysistest.end(env) + +symlink_name_windows_gnu_test = analysistest.make(_symlink_name_windows_gnu_test_impl) + +def _symlink_name_windows_msvc_test_impl(ctx): + env = analysistest.begin(ctx) + symlink = analysistest.target_under_test(env)[SymlinkInfo].symlink + + asserts.true(env, symlink.basename.startswith("native_dep-")) + asserts.true(env, symlink.basename.endswith(".lib")) + + return analysistest.end(env) + +symlink_name_windows_msvc_test = analysistest.make(_symlink_name_windows_msvc_test_impl) + +def _define_targets(): + portable_link_flags_probe( + name = "portable_link_flags_windows_gnu_probe", + flavor_msvc = False, + lib_basename = "libfoo.dll.a", + ) + portable_link_flags_probe( + name = "portable_link_flags_windows_msvc_probe", + flavor_msvc = True, + lib_basename = "libfoo.dll.lib", + ) + + symlink_probe( + name = "symlink_windows_gnu_probe", + lib_basename = "libfoo.dll.a", + target_abi = "gnu", + ) + symlink_probe( + name = "symlink_windows_msvc_probe", + lib_basename = "native_dep.lib", + target_abi = "msvc", + ) + +def windows_lib_name_test_suite(name): + """Entry-point macro for Windows library naming tests. + + Args: + name: test suite name + """ + _define_targets() + + portable_link_flags_windows_gnu_test( + name = "portable_link_flags_windows_gnu_test", + target_under_test = ":portable_link_flags_windows_gnu_probe", + ) + portable_link_flags_windows_msvc_test( + name = "portable_link_flags_windows_msvc_test", + target_under_test = ":portable_link_flags_windows_msvc_probe", + ) + symlink_name_windows_gnu_test( + name = "symlink_name_windows_gnu_test", + target_under_test = ":symlink_windows_gnu_probe", + ) + symlink_name_windows_msvc_test( + name = "symlink_name_windows_msvc_test", + target_under_test = ":symlink_windows_msvc_probe", + ) + + native.test_suite( + name = name, + tests = [ + ":portable_link_flags_windows_gnu_test", + ":portable_link_flags_windows_msvc_test", + ":symlink_name_windows_gnu_test", + ":symlink_name_windows_msvc_test", + ], + ) diff --git a/test/unit/windows_stdlib/BUILD.bazel b/test/unit/windows_stdlib/BUILD.bazel new file mode 100644 index 0000000000..91b803e6a7 --- /dev/null +++ b/test/unit/windows_stdlib/BUILD.bazel @@ -0,0 +1,3 @@ +load(":windows_stdlib_test.bzl", "windows_stdlib_test_suite") + +windows_stdlib_test_suite(name = "windows_stdlib_test_suite") diff --git a/test/unit/windows_stdlib/windows_stdlib_test.bzl b/test/unit/windows_stdlib/windows_stdlib_test.bzl new file mode 100644 index 0000000000..7008a86aa6 --- /dev/null +++ b/test/unit/windows_stdlib/windows_stdlib_test.bzl @@ -0,0 +1,122 @@ +"""Analysistests covering Windows-specific stdlib link flags.""" + +load("@bazel_skylib//lib:unittest.bzl", "analysistest", "asserts") +load("//rust/platform:triple.bzl", "triple") +load("//rust/platform:triple_mappings.bzl", "system_to_stdlib_linkflags") + +# buildifier: disable=bzl-visibility +load("//rust/private:repository_utils.bzl", "BUILD_for_rust_toolchain") + +def _stdlib_linkflags_windows_test_impl(ctx): + env = analysistest.begin(ctx) + analysistest.target_under_test(env) # Ensure target is configured. + + msvc_flags = system_to_stdlib_linkflags(triple("x86_64-pc-windows-msvc")) + gnu_flags = system_to_stdlib_linkflags(triple("x86_64-pc-windows-gnu")) + gnullvm_flags = system_to_stdlib_linkflags(triple("aarch64-pc-windows-gnullvm")) + + asserts.equals( + env, + ["advapi32.lib", "ws2_32.lib", "userenv.lib", "Bcrypt.lib"], + msvc_flags, + ) + asserts.equals( + env, + ["-ladvapi32", "-lws2_32", "-luserenv"], + gnu_flags, + ) + asserts.equals(env, gnu_flags, gnullvm_flags) + + return analysistest.end(env) + +stdlib_linkflags_windows_test = analysistest.make(_stdlib_linkflags_windows_test_impl) + +def _build_for_rust_toolchain_windows_flags_test_impl(ctx): + env = analysistest.begin(ctx) + analysistest.target_under_test(env) + + msvc_triple = triple("x86_64-pc-windows-msvc") + gnu_triple = triple("x86_64-pc-windows-gnu") + + rendered_msvc = BUILD_for_rust_toolchain( + name = "tc_msvc", + exec_triple = msvc_triple, + target_triple = msvc_triple, + version = "1.75.0", + allocator_library = None, + global_allocator_library = None, + default_edition = "2021", + include_rustfmt = False, + include_llvm_tools = False, + include_linker = False, + stdlib_linkflags = None, + extra_rustc_flags = None, + extra_exec_rustc_flags = None, + opt_level = None, + strip_level = None, + ) + rendered_gnu = BUILD_for_rust_toolchain( + name = "tc_gnu", + exec_triple = gnu_triple, + target_triple = gnu_triple, + version = "1.75.0", + allocator_library = None, + global_allocator_library = None, + default_edition = "2021", + include_rustfmt = False, + include_llvm_tools = False, + include_linker = False, + stdlib_linkflags = None, + extra_rustc_flags = None, + extra_exec_rustc_flags = None, + opt_level = None, + strip_level = None, + ) + + asserts.true( + env, + 'stdlib_linkflags = ["advapi32.lib", "ws2_32.lib", "userenv.lib", "Bcrypt.lib"],' in rendered_msvc, + "MSVC toolchain should render .lib stdlib linkflags:\n%s" % rendered_msvc, + ) + asserts.true( + env, + 'stdlib_linkflags = ["-ladvapi32", "-lws2_32", "-luserenv"],' in rendered_gnu, + "GNU toolchain should render -l stdlib linkflags:\n%s" % rendered_gnu, + ) + + return analysistest.end(env) + +build_for_rust_toolchain_windows_flags_test = analysistest.make( + _build_for_rust_toolchain_windows_flags_test_impl, +) + +def _define_targets(): + # Target under test is unused beyond satisfying analysistest requirements. + native.filegroup( + name = "dummy_target", + srcs = [], + ) + +def windows_stdlib_test_suite(name): + """Entry-point macro for Windows stdlib linkflag tests. + + Args: + name: test suite name""" + _define_targets() + + stdlib_linkflags_windows_test( + name = "stdlib_linkflags_windows_test", + target_under_test = ":dummy_target", + ) + build_for_rust_toolchain_windows_flags_test( + name = "build_for_rust_toolchain_windows_flags_test", + target_under_test = ":dummy_target", + ) + + native.test_suite( + name = name, + tests = [ + ":build_for_rust_toolchain_windows_flags_test", + ":stdlib_linkflags_windows_test", + ], + ) From 5403348e0dbf5d7e96f680aca9a801df621aed58 Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Thu, 18 Dec 2025 14:04:34 -0500 Subject: [PATCH 02/23] Properly handle artifact_name_patterns --- rust/private/rustc.bzl | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/private/rustc.bzl b/rust/private/rustc.bzl index cf88ec8147..8c2bc5bac0 100644 --- a/rust/private/rustc.bzl +++ b/rust/private/rustc.bzl @@ -1639,6 +1639,7 @@ def rustc_compile_action( compilation_outputs = compilation_outputs, name = output_relative_to_package, stamp = ctx.attr.stamp, + main_output = crate_info.output, output_type = "executable" if crate_info.type == "bin" else "dynamic_library", additional_outputs = additional_linker_outputs, ) From 4dc9ac6c43232d1bc62d7c404a9b6928d9583cab Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Tue, 20 Jan 2026 10:31:15 -0500 Subject: [PATCH 03/23] Rearrange link paths on Windows to reduce size overruns and fix errors --- util/process_wrapper/main.rs | 186 ++++++++++++++++++++++++++++++++++- 1 file changed, 184 insertions(+), 2 deletions(-) diff --git a/util/process_wrapper/main.rs b/util/process_wrapper/main.rs index 39a6d6db16..7ffd66a8c1 100644 --- a/util/process_wrapper/main.rs +++ b/util/process_wrapper/main.rs @@ -19,10 +19,15 @@ mod rustc; mod util; use std::collections::HashMap; +#[cfg(windows)] +use std::collections::HashSet; use std::fmt; -use std::fs::{copy, OpenOptions}; +use std::fs::{self, copy, OpenOptions}; use std::io; +use std::path::PathBuf; use std::process::{exit, Command, ExitStatus, Stdio}; +#[cfg(windows)] +use std::time::{SystemTime, UNIX_EPOCH}; use tinyjson::JsonValue; @@ -73,6 +78,175 @@ macro_rules! debug_log { }; } +#[cfg(windows)] +struct TemporaryDirectoryGuard { + path: Option, +} + +#[cfg(windows)] +impl TemporaryDirectoryGuard { + fn new(path: Option) -> Self { + Self { path } + } + + fn take(&mut self) -> Option { + self.path.take() + } +} + +#[cfg(windows)] +impl Drop for TemporaryDirectoryGuard { + fn drop(&mut self) { + if let Some(path) = self.path.take() { + let _ = fs::remove_dir_all(path); + } + } +} + +#[cfg(not(windows))] +struct TemporaryDirectoryGuard; + +#[cfg(not(windows))] +impl TemporaryDirectoryGuard { + fn new(_: Option) -> Self { + TemporaryDirectoryGuard + } + + fn take(&mut self) -> Option { + None + } +} + +#[cfg(windows)] +fn consolidate_dependency_search_paths( + args: &[String], +) -> Result<(Vec, Option), ProcessWrapperError> { + let mut dependency_paths = Vec::new(); + let mut filtered_args = Vec::with_capacity(args.len()); + + let mut i = 0; + while i < args.len() { + let arg = &args[i]; + if arg == "-L" { + if let Some(next) = args.get(i + 1) { + if let Some(path) = next.strip_prefix("dependency=") { + dependency_paths.push(PathBuf::from(path)); + i += 2; + continue; + } + } + } + + if let Some(path) = arg.strip_prefix("-Ldependency=") { + dependency_paths.push(PathBuf::from(path)); + i += 1; + continue; + } + + filtered_args.push(arg.clone()); + i += 1; + } + + if dependency_paths.is_empty() { + return Ok((filtered_args, None)); + } + + let unique_suffix = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis(); + let dir_name = format!( + "rules_rust_process_wrapper_deps_{}_{}", + std::process::id(), + unique_suffix + ); + + let base_dir = std::env::current_dir().map_err(|e| { + ProcessWrapperError(format!("unable to read current working directory: {}", e)) + })?; + let unified_dir = base_dir.join(&dir_name); + fs::create_dir_all(&unified_dir).map_err(|e| { + ProcessWrapperError(format!( + "unable to create unified dependency directory {}: {}", + unified_dir.display(), + e + )) + })?; + + let mut seen = HashSet::new(); + for path in dependency_paths { + let entries = fs::read_dir(&path).map_err(|e| { + ProcessWrapperError(format!( + "unable to read dependency search path {}: {}", + path.display(), + e + )) + })?; + + for entry in entries { + let entry = entry.map_err(|e| { + ProcessWrapperError(format!( + "unable to iterate dependency search path {}: {}", + path.display(), + e + )) + })?; + let file_type = entry.file_type().map_err(|e| { + ProcessWrapperError(format!( + "unable to inspect dependency search path {}: {}", + path.display(), + e + )) + })?; + if !(file_type.is_file() || file_type.is_symlink()) { + continue; + } + + let file_name = entry.file_name(); + let file_name_lower = file_name + .to_string_lossy() + .to_ascii_lowercase(); + if !seen.insert(file_name_lower) { + continue; + } + + let dest = unified_dir.join(&file_name); + let src = entry.path(); + match fs::hard_link(&src, &dest) { + Ok(_) => {} + Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => {} + Err(err) => { + debug_log!( + "failed to hardlink {} to {} ({}), falling back to copy", + src.display(), + dest.display(), + err + ); + fs::copy(&src, &dest).map_err(|copy_err| { + ProcessWrapperError(format!( + "unable to copy {} into unified dependency dir {}: {}", + src.display(), + dest.display(), + copy_err + )) + })?; + } + } + } + } + + filtered_args.push(format!("-Ldependency={}", unified_dir.display())); + + Ok((filtered_args, Some(unified_dir))) +} + +#[cfg(not(windows))] +fn consolidate_dependency_search_paths( + args: &[String], +) -> Result<(Vec, Option), ProcessWrapperError> { + Ok((args.to_vec(), None)) +} + fn json_warning(line: &str) -> JsonValue { JsonValue::Object(HashMap::from([ ( @@ -120,9 +294,13 @@ fn process_line( fn main() -> Result<(), ProcessWrapperError> { let opts = options().map_err(|e| ProcessWrapperError(e.to_string()))?; + let (child_arguments, dep_dir_cleanup) = + consolidate_dependency_search_paths(&opts.child_arguments)?; + let mut temp_dir_guard = TemporaryDirectoryGuard::new(dep_dir_cleanup); + let mut command = Command::new(opts.executable); command - .args(opts.child_arguments) + .args(child_arguments) .env_clear() .envs(opts.child_environment) .stdout(if let Some(stdout_file) = opts.stdout_file { @@ -228,6 +406,10 @@ fn main() -> Result<(), ProcessWrapperError> { } } + if let Some(path) = temp_dir_guard.take() { + let _ = fs::remove_dir_all(path); + } + exit(code) } From 09cc47726b9da0be9bb7822e8de3d3d8d2d8a1a9 Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Sat, 31 Jan 2026 18:34:15 -0500 Subject: [PATCH 04/23] Fix some triple-mapping errors --- rust/platform/triple_mappings.bzl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rust/platform/triple_mappings.bzl b/rust/platform/triple_mappings.bzl index 46c928a429..d6265f0a51 100644 --- a/rust/platform/triple_mappings.bzl +++ b/rust/platform/triple_mappings.bzl @@ -121,8 +121,8 @@ _CPU_ARCH_TO_BUILTIN_PLAT_SUFFIX = { "le32": None, "mips": None, "mipsel": None, - "powerpc": "ppc", - "powerpc64": None, + "powerpc": "ppc32", + "powerpc64": "ppc", "powerpc64le": "ppc64le", "riscv32": "riscv32", "riscv32imc": "riscv32", @@ -154,7 +154,7 @@ _SYSTEM_TO_BUILTIN_SYS_SUFFIX = { "linux": "linux", "macos": "osx", "nacl": None, - "netbsd": None, + "netbsd": "netbsd", "nixos": "nixos", "none": "none", "nto": "qnx", @@ -162,9 +162,9 @@ _SYSTEM_TO_BUILTIN_SYS_SUFFIX = { "solaris": None, "uefi": "uefi", "unknown": None, - "wasi": None, - "wasip1": None, - "wasip2": None, + "wasi": "wasi", + "wasip1": "wasi", + "wasip2": "wasi", "windows": "windows", } From 93206e6d9e02cd0018c9a0f960fbba9ab520ce82 Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Fri, 6 Feb 2026 19:30:18 -0500 Subject: [PATCH 05/23] Strip -pthread for Windows link args --- rust/private/rustc.bzl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/rust/private/rustc.bzl b/rust/private/rustc.bzl index 8c2bc5bac0..905ca7cadc 100644 --- a/rust/private/rustc.bzl +++ b/rust/private/rustc.bzl @@ -2273,7 +2273,13 @@ def _make_link_flags_windows(make_link_flags_args, flavor_msvc, use_direct_drive elif include_link_flags: get_lib_name = get_lib_name_for_windows if flavor_msvc else get_lib_name_default ret.extend(_portable_link_flags(lib, use_pic, ambiguous_libs, get_lib_name, flavor_msvc = flavor_msvc)) - _add_user_link_flags(ret, linker_input) + + # Windows toolchains can inherit POSIX defaults like -pthread from C deps, + # which fails to link with the MinGW/LLD toolchain. Drop them here. + for flag in linker_input.user_link_flags: + if flag in ("-pthread", "-lpthread"): + continue + ret.append("--codegen=link-arg={}".format(flag)) return ret def _make_link_flags_windows_msvc(make_link_flags_args, use_direct_driver): From e0f1c1375ffc04a324d64b006e586f2888f17af7 Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Fri, 6 Feb 2026 20:00:57 -0500 Subject: [PATCH 06/23] Revert "Fix stamping for rules that don't have a stamp attribute (#3829)" This reverts commit f198ddee7f49ac351d27204b1488df0af2512fac. --- rust/private/stamp.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/private/stamp.bzl b/rust/private/stamp.bzl index bff7cbadf3..a05c255c9a 100644 --- a/rust/private/stamp.bzl +++ b/rust/private/stamp.bzl @@ -10,7 +10,7 @@ def is_stamping_enabled(ctx, attr): Returns: bool: The stamp value """ - stamp_num = getattr(attr, "stamp", 0) + stamp_num = getattr(attr, "stamp", -1) if stamp_num == 1: return True elif stamp_num == 0: From f8d867c3a2cc8d58db044149490be0bab6917e0c Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Fri, 6 Feb 2026 20:01:07 -0500 Subject: [PATCH 07/23] Revert "Switch stamping detection to ctx.configuration.stamp_binaries() (#3816)" This reverts commit 9586468d1eaa8d22f527966cb5f43a4870463649. --- rust/private/BUILD.bazel | 3 ++ rust/private/rust.bzl | 4 +++ rust/private/rustc.bzl | 2 +- rust/private/stamp.bzl | 62 +++++++++++++++++++++++++++++++++++++--- 4 files changed, 66 insertions(+), 5 deletions(-) diff --git a/rust/private/BUILD.bazel b/rust/private/BUILD.bazel index 89444bb9bc..d18e895493 100644 --- a/rust/private/BUILD.bazel +++ b/rust/private/BUILD.bazel @@ -1,6 +1,7 @@ load("@bazel_skylib//:bzl_library.bzl", "bzl_library") load("//rust/private:rust_analyzer.bzl", "rust_analyzer_detect_sysroot") load("//rust/private:rustc.bzl", "is_proc_macro_dep", "is_proc_macro_dep_enabled") +load("//rust/private:stamp.bzl", "stamp_build_setting") # Exported for docs exports_files(["providers.bzl"]) @@ -32,6 +33,8 @@ bzl_library( ], ) +stamp_build_setting(name = "stamp") + # This setting may be used to identify dependencies of proc-macro-s. # This feature is only enabled if `is_proc_macro_dep_enabled` is true. # Its value controls the BAZEL_RULES_RUST_IS_PROC_MACRO_DEP environment variable diff --git a/rust/private/rust.bzl b/rust/private/rust.bzl index d06f5fa71f..dcef44424f 100644 --- a/rust/private/rust.bzl +++ b/rust/private/rust.bzl @@ -826,6 +826,10 @@ _common_attrs = { doc = "Enable collection of cfg flags with results stored in CrateInfo.cfgs.", default = Label("//rust/settings:collect_cfgs"), ), + "_stamp_flag": attr.label( + doc = "A setting used to determine whether or not the `--stamp` flag is enabled", + default = Label("//rust/private:stamp"), + ), } | RUSTC_ATTRS | RUSTC_ALLOCATOR_LIBRARIES_ATTRS _coverage_attrs = { diff --git a/rust/private/rustc.bzl b/rust/private/rustc.bzl index 905ca7cadc..dd63dc1a02 100644 --- a/rust/private/rustc.bzl +++ b/rust/private/rustc.bzl @@ -1349,7 +1349,7 @@ def rustc_compile_action( linkstamps = depset([]) # Determine if the build is currently running with --stamp - stamp = is_stamping_enabled(ctx, attr) + stamp = is_stamping_enabled(attr) # Add flags for any 'rustc' lints that are specified. # diff --git a/rust/private/stamp.bzl b/rust/private/stamp.bzl index a05c255c9a..1a7cab65cc 100644 --- a/rust/private/stamp.bzl +++ b/rust/private/stamp.bzl @@ -1,10 +1,63 @@ -"""A small utility module dedicated to detecting whether or not the `--stamp` flag is enabled""" +"""A small utility module dedicated to detecting whether or not the `--stamp` flag is enabled -def is_stamping_enabled(ctx, attr): +This module can be removed likely after the following PRs ar addressed: +- https://github.com/bazelbuild/bazel/issues/11164 +""" + +load("//rust/private:utils.bzl", "dedent") + +StampSettingInfo = provider( + doc = "Information about the `--stamp` command line flag", + fields = { + "value": "bool: Whether or not the `--stamp` flag was enabled", + }, +) + +def _stamp_build_setting_impl(ctx): + return StampSettingInfo(value = ctx.attr.value) + +_stamp_build_setting = rule( + doc = dedent("""\ + Whether to encode build information into the binary. Possible values: + + - stamp = 1: Always stamp the build information into the binary, even in [--nostamp][stamp] builds. \ + This setting should be avoided, since it potentially kills remote caching for the binary and \ + any downstream actions that depend on it. + - stamp = 0: Always replace build information by constant values. This gives good build result caching. + - stamp = -1: Embedding of build information is controlled by the [--[no]stamp][stamp] flag. + + Stamped binaries are not rebuilt unless their dependencies change. + [stamp]: https://docs.bazel.build/versions/main/user-manual.html#flag--stamp + """), + implementation = _stamp_build_setting_impl, + attrs = { + "value": attr.bool( + doc = "The default value of the stamp build flag", + mandatory = True, + ), + }, +) + +def stamp_build_setting(name, visibility = ["//visibility:public"]): + native.config_setting( + name = "stamp_detect", + values = {"stamp": "1"}, + visibility = visibility, + ) + + _stamp_build_setting( + name = name, + value = select({ + ":stamp_detect": True, + "//conditions:default": False, + }), + visibility = visibility, + ) + +def is_stamping_enabled(attr): """Determine whether or not build stamping is enabled Args: - ctx (ctx): The rule's context object attr (struct): A rule's struct of attributes (`ctx.attr`) Returns: @@ -16,6 +69,7 @@ def is_stamping_enabled(ctx, attr): elif stamp_num == 0: return False elif stamp_num == -1: - return ctx.configuration.stamp_binaries() + stamp_flag = getattr(attr, "_stamp_flag", None) + return stamp_flag[StampSettingInfo].value if stamp_flag else False else: fail("Unexpected `stamp` value: {}".format(stamp_num)) From 8efa3fa589901394909f3a2bc410bb682e565bf2 Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Mon, 16 Feb 2026 15:50:51 -0500 Subject: [PATCH 08/23] Fix process-wrapper link lib handling when using argfiles --- util/process_wrapper/main.rs | 73 +++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 21 deletions(-) diff --git a/util/process_wrapper/main.rs b/util/process_wrapper/main.rs index 7ffd66a8c1..5d057b08cf 100644 --- a/util/process_wrapper/main.rs +++ b/util/process_wrapper/main.rs @@ -20,7 +20,7 @@ mod util; use std::collections::HashMap; #[cfg(windows)] -use std::collections::HashSet; +use std::collections::{HashSet, VecDeque}; use std::fmt; use std::fs::{self, copy, OpenOptions}; use std::io; @@ -34,6 +34,8 @@ use tinyjson::JsonValue; use crate::options::options; use crate::output::{process_output, LineOutput}; use crate::rustc::ErrorFormat; +#[cfg(windows)] +use crate::util::read_file_to_array; #[cfg(windows)] fn status_code(status: ExitStatus, was_killed: bool) -> i32 { @@ -118,35 +120,64 @@ impl TemporaryDirectoryGuard { } #[cfg(windows)] -fn consolidate_dependency_search_paths( - args: &[String], -) -> Result<(Vec, Option), ProcessWrapperError> { +fn get_dependency_search_paths_from_args( + initial_args: &[String], +) -> Result<(Vec, Vec), ProcessWrapperError> { let mut dependency_paths = Vec::new(); - let mut filtered_args = Vec::with_capacity(args.len()); + let mut filtered_args = Vec::new(); + let mut argfile_contents: HashMap> = HashMap::new(); + + let mut queue: VecDeque<(String, Option)> = initial_args + .iter() + .map(|arg| (arg.clone(), None)) + .collect(); + + while let Some((arg, parent_argfile)) = queue.pop_front() { + let target = match &parent_argfile { + Some(p) => argfile_contents.entry(format!("{}.filtered", p)).or_default(), + None => &mut filtered_args, + }; - let mut i = 0; - while i < args.len() { - let arg = &args[i]; if arg == "-L" { - if let Some(next) = args.get(i + 1) { - if let Some(path) = next.strip_prefix("dependency=") { - dependency_paths.push(PathBuf::from(path)); - i += 2; - continue; - } + let next_arg = queue.front().map(|(a, _)| a.as_str()); + if let Some(path) = next_arg.and_then(|n| n.strip_prefix("dependency=")) { + dependency_paths.push(PathBuf::from(path)); + queue.pop_front(); + } else { + target.push(arg); } - } - - if let Some(path) = arg.strip_prefix("-Ldependency=") { + } else if let Some(path) = arg.strip_prefix("-Ldependency=") { dependency_paths.push(PathBuf::from(path)); - i += 1; - continue; + } else if let Some(argfile_path) = arg.strip_prefix('@') { + let lines = read_file_to_array(argfile_path).map_err(|e| { + ProcessWrapperError(format!("unable to read argfile {}: {}", argfile_path, e)) + })?; + + for line in lines { + queue.push_back((line, Some(argfile_path.to_string()))); + } + + target.push(format!("@{}.filtered", argfile_path)); + } else { + target.push(arg); } + } - filtered_args.push(arg.clone()); - i += 1; + for (path, content) in argfile_contents { + fs::write(&path, content.join("\n")).map_err(|e| { + ProcessWrapperError(format!("unable to write filtered argfile {}: {}", path, e)) + })?; } + Ok((dependency_paths, filtered_args)) +} + +#[cfg(windows)] +fn consolidate_dependency_search_paths( + args: &[String], +) -> Result<(Vec, Option), ProcessWrapperError> { + let (dependency_paths, mut filtered_args) = get_dependency_search_paths_from_args(args)?; + if dependency_paths.is_empty() { return Ok((filtered_args, None)); } From 70777e4b79cddc063a7f7c0b5b16599e490a1786 Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Wed, 18 Feb 2026 05:12:26 -0500 Subject: [PATCH 09/23] Rewrite process_wrapper_bootstrap to cc --- rust/settings/BUILD.bazel | 3 - rust/settings/settings.bzl | 9 -- test/process_wrapper_bootstrap/BUILD.bazel | 20 +++-- .../bootstrap_process_wrapper_probe.rs | 10 +++ .../bootstrap_process_wrapper_test.rs | 60 +++++++++---- .../process_wrapper_bootstrap_test.bzl | 78 ---------------- util/process_wrapper/BUILD.bazel | 8 +- util/process_wrapper/private/BUILD.bazel | 11 ++- .../private/bootstrap_process_wrapper.bzl | 73 --------------- .../private/bootstrap_process_wrapper.cc | 90 +++++++++++++++++++ .../private/process_wrapper.bat | 31 ------- .../private/process_wrapper.sh | 18 ---- 12 files changed, 169 insertions(+), 242 deletions(-) create mode 100644 test/process_wrapper_bootstrap/bootstrap_process_wrapper_probe.rs delete mode 100644 test/process_wrapper_bootstrap/process_wrapper_bootstrap_test.bzl delete mode 100644 util/process_wrapper/private/bootstrap_process_wrapper.bzl create mode 100644 util/process_wrapper/private/bootstrap_process_wrapper.cc delete mode 100755 util/process_wrapper/private/process_wrapper.bat delete mode 100755 util/process_wrapper/private/process_wrapper.sh diff --git a/rust/settings/BUILD.bazel b/rust/settings/BUILD.bazel index 06cfb39e4c..e57f5ec5d3 100644 --- a/rust/settings/BUILD.bazel +++ b/rust/settings/BUILD.bazel @@ -18,7 +18,6 @@ load( "experimental_use_cc_common_link", "experimental_use_coverage_metadata_files", "experimental_use_global_allocator", - "experimental_use_sh_toolchain_for_bootstrap_process_wrapper", "extra_exec_rustc_env", "extra_exec_rustc_flag", "extra_exec_rustc_flags", @@ -94,8 +93,6 @@ experimental_use_global_allocator() experimental_use_allocator_libraries_with_mangled_symbols() -experimental_use_sh_toolchain_for_bootstrap_process_wrapper() - extra_exec_rustc_env() extra_exec_rustc_flag() diff --git a/rust/settings/settings.bzl b/rust/settings/settings.bzl index aaed525204..7a114e6e3c 100644 --- a/rust/settings/settings.bzl +++ b/rust/settings/settings.bzl @@ -275,15 +275,6 @@ def experimental_link_std_dylib(): build_setting_default = False, ) -def experimental_use_sh_toolchain_for_bootstrap_process_wrapper(): - """A flag to control whether the shell path from a shell toolchain (`@bazel_tools//tools/sh:toolchain_type`) \ - is embedded into the bootstrap process wrapper for the `.sh` file. - """ - bool_flag( - name = "experimental_use_sh_toolchain_for_bootstrap_process_wrapper", - build_setting_default = False, - ) - def toolchain_linker_preference(): """A flag to control which linker is preferred for linking Rust binaries. diff --git a/test/process_wrapper_bootstrap/BUILD.bazel b/test/process_wrapper_bootstrap/BUILD.bazel index d2c8419b81..6a0c167adc 100644 --- a/test/process_wrapper_bootstrap/BUILD.bazel +++ b/test/process_wrapper_bootstrap/BUILD.bazel @@ -1,12 +1,22 @@ -load("//rust:defs.bzl", "rust_test") -load(":process_wrapper_bootstrap_test.bzl", "process_wrapper_bootstrap_test_suite") +load("//rust:defs.bzl", "rust_binary", "rust_test") + +rust_binary( + name = "bootstrap_process_wrapper_probe", + srcs = ["bootstrap_process_wrapper_probe.rs"], + edition = "2021", +) rust_test( name = "bootstrap_process_wrapper_test", srcs = ["bootstrap_process_wrapper_test.rs"], - data = ["//util/process_wrapper/private:process_wrapper.sh"], + data = [ + ":bootstrap_process_wrapper_probe", + "//util/process_wrapper:bootstrap_process_wrapper", + ], edition = "2021", + env = { + "BOOTSTRAP_PROCESS_WRAPPER_PROBE_RLOCATIONPATH": "$(rlocationpath :bootstrap_process_wrapper_probe)", + "BOOTSTRAP_PROCESS_WRAPPER_RLOCATIONPATH": "$(rlocationpath //util/process_wrapper:bootstrap_process_wrapper)", + }, deps = ["//rust/runfiles"], ) - -process_wrapper_bootstrap_test_suite(name = "process_wrapper_bootstrap_test_suite") diff --git a/test/process_wrapper_bootstrap/bootstrap_process_wrapper_probe.rs b/test/process_wrapper_bootstrap/bootstrap_process_wrapper_probe.rs new file mode 100644 index 0000000000..35ecc496cf --- /dev/null +++ b/test/process_wrapper_bootstrap/bootstrap_process_wrapper_probe.rs @@ -0,0 +1,10 @@ +fn main() { + let arg = std::env::args().nth(1).unwrap_or_default(); + println!("{arg}"); + + let exit_code = std::env::var("BOOTSTRAP_PROCESS_WRAPPER_PROBE_EXIT_CODE") + .ok() + .and_then(|v| v.parse::().ok()) + .unwrap_or(0); + std::process::exit(exit_code); +} diff --git a/test/process_wrapper_bootstrap/bootstrap_process_wrapper_test.rs b/test/process_wrapper_bootstrap/bootstrap_process_wrapper_test.rs index 3fbf45484c..43935b1b6e 100644 --- a/test/process_wrapper_bootstrap/bootstrap_process_wrapper_test.rs +++ b/test/process_wrapper_bootstrap/bootstrap_process_wrapper_test.rs @@ -1,24 +1,54 @@ -//! Tests for the bootstrap process wrapper +//! Tests for the bootstrap process wrapper. -use std::fs::read_to_string; +use std::env; +use std::process::Command; use runfiles::Runfiles; -/// Test that the shell process wrapper starts with the expected shebang to -/// avoid breaking the contract with the `bootstrap_process_wrapper` rule. -#[test] -fn test_shebang() { +fn resolve_runfile(env_var: &str) -> String { let rfiles = Runfiles::create().unwrap(); + let rlocationpath = env::var(env_var).unwrap(); + runfiles::rlocation!(rfiles, rlocationpath.as_str()) + .unwrap() + .display() + .to_string() +} - let script = runfiles::rlocation!( - rfiles, - "rules_rust/util/process_wrapper/private/process_wrapper.sh" - ) - .unwrap(); +#[test] +fn test_substitutes_pwd() { + let wrapper = resolve_runfile("BOOTSTRAP_PROCESS_WRAPPER_RLOCATIONPATH"); + let probe = resolve_runfile("BOOTSTRAP_PROCESS_WRAPPER_PROBE_RLOCATIONPATH"); + let pwd = env::current_dir().unwrap().display().to_string(); + + let output = Command::new(wrapper) + .arg("--") + .arg(probe) + .arg("${pwd}/suffix") + .output() + .unwrap(); - let content = read_to_string(script).unwrap(); assert!( - content.starts_with("#!/usr/bin/env bash"), - "The shell script does not start with the expected shebang." - ) + output.status.success(), + "wrapper failed: status={:?}, stderr={}", + output.status, + String::from_utf8_lossy(&output.stderr), + ); + + let stdout = String::from_utf8(output.stdout).unwrap(); + assert_eq!(stdout.trim_end(), format!("{}/suffix", pwd)); +} + +#[test] +fn test_propagates_exit_code() { + let wrapper = resolve_runfile("BOOTSTRAP_PROCESS_WRAPPER_RLOCATIONPATH"); + let probe = resolve_runfile("BOOTSTRAP_PROCESS_WRAPPER_PROBE_RLOCATIONPATH"); + + let status = Command::new(wrapper) + .arg("--") + .arg(probe) + .env("BOOTSTRAP_PROCESS_WRAPPER_PROBE_EXIT_CODE", "23") + .status() + .unwrap(); + + assert_eq!(status.code(), Some(23)); } diff --git a/test/process_wrapper_bootstrap/process_wrapper_bootstrap_test.bzl b/test/process_wrapper_bootstrap/process_wrapper_bootstrap_test.bzl deleted file mode 100644 index 7e0e4ea571..0000000000 --- a/test/process_wrapper_bootstrap/process_wrapper_bootstrap_test.bzl +++ /dev/null @@ -1,78 +0,0 @@ -"""Starlark unit tests for the bootstrap process wrapper""" - -load("@bazel_skylib//lib:unittest.bzl", "analysistest") -load("//test/unit:common.bzl", "assert_action_mnemonic") - -def _enable_sh_toolchain_test_impl(ctx): - env = analysistest.begin(ctx) - target = analysistest.target_under_test(env) - - if ctx.attr.expected_ext == ".bat": - assert_action_mnemonic(env, target.actions[0], "ExecutableSymlink") - else: - assert_action_mnemonic(env, target.actions[0], "TemplateExpand") - - return analysistest.end(env) - -_enable_sh_toolchain_test = analysistest.make( - _enable_sh_toolchain_test_impl, - config_settings = { - str(Label("//rust/settings:experimental_use_sh_toolchain_for_bootstrap_process_wrapper")): True, - }, - attrs = { - "expected_ext": attr.string( - doc = "The expected extension for the bootstrap script.", - mandatory = True, - values = [ - ".bat", - ".sh", - ], - ), - }, -) - -def _disable_sh_toolchain_test_impl(ctx): - env = analysistest.begin(ctx) - target = analysistest.target_under_test(env) - - assert_action_mnemonic(env, target.actions[0], "ExecutableSymlink") - - return analysistest.end(env) - -_disable_sh_toolchain_test = analysistest.make( - _disable_sh_toolchain_test_impl, - config_settings = { - str(Label("//rust/settings:experimental_use_sh_toolchain_for_bootstrap_process_wrapper")): False, - }, -) - -def process_wrapper_bootstrap_test_suite(name, **kwargs): - """Entry-point macro called from the BUILD file. - - Args: - name (str): Name of the macro. - **kwargs (dict): Additional keyword arguments. - """ - - _enable_sh_toolchain_test( - name = "enable_sh_toolchain_test", - target_under_test = Label("//util/process_wrapper:bootstrap_process_wrapper"), - expected_ext = select({ - "@platforms//os:windows": ".bat", - "//conditions:default": ".sh", - }), - ) - - _disable_sh_toolchain_test( - name = "disable_sh_toolchain_test", - target_under_test = Label("//util/process_wrapper:bootstrap_process_wrapper"), - ) - - native.test_suite( - name = name, - tests = [ - ":disable_sh_toolchain_test", - ":enable_sh_toolchain_test", - ], - **kwargs - ) diff --git a/util/process_wrapper/BUILD.bazel b/util/process_wrapper/BUILD.bazel index af56264e4c..08dd7ecadf 100644 --- a/util/process_wrapper/BUILD.bazel +++ b/util/process_wrapper/BUILD.bazel @@ -2,7 +2,6 @@ load("@bazel_skylib//lib:selects.bzl", "selects") # buildifier: disable=bzl-visibility load("//rust/private:rust.bzl", "rust_binary_without_process_wrapper", "rust_test_without_process_wrapper_test") -load("//util/process_wrapper/private:bootstrap_process_wrapper.bzl", "bootstrap_process_wrapper") config_setting( name = "compilation_mode_opt", @@ -55,11 +54,8 @@ rust_test_without_process_wrapper_test( edition = "2018", ) -bootstrap_process_wrapper( +alias( name = "bootstrap_process_wrapper", - is_windows = select({ - "@platforms//os:windows": True, - "//conditions:default": False, - }), + actual = "//util/process_wrapper/private:bootstrap_process_wrapper", visibility = ["//visibility:public"], ) diff --git a/util/process_wrapper/private/BUILD.bazel b/util/process_wrapper/private/BUILD.bazel index badd4a695d..6cbbbc07da 100644 --- a/util/process_wrapper/private/BUILD.bazel +++ b/util/process_wrapper/private/BUILD.bazel @@ -1,4 +1,7 @@ -exports_files([ - "process_wrapper.sh", - "process_wrapper.bat", -]) +load("@rules_cc//cc:cc_binary.bzl", "cc_binary") + +cc_binary( + name = "bootstrap_process_wrapper", + srcs = ["bootstrap_process_wrapper.cc"], + visibility = ["//util/process_wrapper:__pkg__"], +) diff --git a/util/process_wrapper/private/bootstrap_process_wrapper.bzl b/util/process_wrapper/private/bootstrap_process_wrapper.bzl deleted file mode 100644 index c47a12fd7b..0000000000 --- a/util/process_wrapper/private/bootstrap_process_wrapper.bzl +++ /dev/null @@ -1,73 +0,0 @@ -"""Bootstrap rustc process wrapper""" - -load("@bazel_skylib//rules:common_settings.bzl", "BuildSettingInfo") - -def _bootstrap_process_wrapper_impl_unix(ctx): - output = ctx.actions.declare_file("{}.sh".format(ctx.label.name)) - - setting = ctx.attr._use_sh_toolchain_for_bootstrap_process_wrapper[BuildSettingInfo].value - sh_toolchain = ctx.toolchains["@bazel_tools//tools/sh:toolchain_type"] - if setting and sh_toolchain: - shebang = "#!{}".format(sh_toolchain.path) - ctx.actions.expand_template( - output = output, - template = ctx.file._bash, - substitutions = { - # Replace the shebang with one constructed from the configured - # shell toolchain. - "#!/usr/bin/env bash": shebang, - }, - ) - else: - ctx.actions.symlink( - output = output, - target_file = ctx.file._bash, - is_executable = True, - ) - - return [DefaultInfo( - files = depset([output]), - executable = output, - )] - -def _bootstrap_process_wrapper_impl_windows(ctx): - output = ctx.actions.declare_file("{}.bat".format(ctx.label.name)) - ctx.actions.symlink( - output = output, - target_file = ctx.file._batch, - is_executable = True, - ) - - return [DefaultInfo( - files = depset([output]), - executable = output, - )] - -def _bootstrap_process_wrapper_impl(ctx): - if ctx.attr.is_windows: - return _bootstrap_process_wrapper_impl_windows(ctx) - return _bootstrap_process_wrapper_impl_unix(ctx) - -bootstrap_process_wrapper = rule( - doc = "A rule which produces a bootstrapping script for the rustc process wrapper.", - implementation = _bootstrap_process_wrapper_impl, - attrs = { - "is_windows": attr.bool( - doc = "Indicate whether or not the target platform is windows.", - mandatory = True, - ), - "_bash": attr.label( - allow_single_file = True, - default = Label("//util/process_wrapper/private:process_wrapper.sh"), - ), - "_batch": attr.label( - allow_single_file = True, - default = Label("//util/process_wrapper/private:process_wrapper.bat"), - ), - "_use_sh_toolchain_for_bootstrap_process_wrapper": attr.label( - default = Label("//rust/settings:experimental_use_sh_toolchain_for_bootstrap_process_wrapper"), - ), - }, - toolchains = [config_common.toolchain_type("@bazel_tools//tools/sh:toolchain_type", mandatory = False)], - executable = True, -) diff --git a/util/process_wrapper/private/bootstrap_process_wrapper.cc b/util/process_wrapper/private/bootstrap_process_wrapper.cc new file mode 100644 index 0000000000..e6e58513e3 --- /dev/null +++ b/util/process_wrapper/private/bootstrap_process_wrapper.cc @@ -0,0 +1,90 @@ +#include +#include +#include +#include +#include +#include + +#if defined(_WIN32) +#include +#include +#define getcwd _getcwd +#else +#include +#endif + +namespace { + +constexpr const char* kPwdPlaceholder = "${pwd}"; + +std::string replace_pwd_placeholder(const std::string& arg, + const std::string& pwd) { + std::string out = arg; + std::string::size_type pos = 0; + while ((pos = out.find(kPwdPlaceholder, pos)) != std::string::npos) { + out.replace(pos, std::strlen(kPwdPlaceholder), pwd); + pos += pwd.size(); + } + return out; +} + +std::vector build_exec_argv(const std::vector& args) { + std::vector exec_argv; + exec_argv.reserve(args.size() + 1); + for (const std::string& arg : args) { + exec_argv.push_back(const_cast(arg.c_str())); + } + exec_argv.push_back(nullptr); + return exec_argv; +} + +} // namespace + +int main(int argc, char** argv) { + int first_arg_index = 1; + if (argc > 1 && std::strcmp(argv[1], "--") == 0) { + first_arg_index = 2; + } + + if (first_arg_index >= argc) { + std::fprintf(stderr, "bootstrap_process_wrapper: missing command\n"); + return 1; + } + + char* pwd_raw = getcwd(nullptr, 0); + if (pwd_raw == nullptr) { + std::perror("bootstrap_process_wrapper: getcwd"); + return 1; + } + std::string pwd = pwd_raw; + std::free(pwd_raw); + + std::vector command_args; + command_args.reserve(static_cast(argc - first_arg_index)); + for (int i = first_arg_index; i < argc; ++i) { + command_args.push_back(replace_pwd_placeholder(argv[i], pwd)); + } + +#if defined(_WIN32) + for (char& c : command_args[0]) { + if (c == '/') { + c = '\\'; + } + } +#endif + + std::vector exec_argv = build_exec_argv(command_args); + +#if defined(_WIN32) + int exit_code = _spawnvp(_P_WAIT, exec_argv[0], exec_argv.data()); + if (exit_code == -1) { + std::perror("bootstrap_process_wrapper: _spawnvp"); + return 1; + } + return exit_code; +#else + execvp(exec_argv[0], exec_argv.data()); + std::perror("bootstrap_process_wrapper: execvp"); + return 1; +#endif +} diff --git a/util/process_wrapper/private/process_wrapper.bat b/util/process_wrapper/private/process_wrapper.bat deleted file mode 100755 index 36fff8699a..0000000000 --- a/util/process_wrapper/private/process_wrapper.bat +++ /dev/null @@ -1,31 +0,0 @@ -@ECHO OFF -SETLOCAL enabledelayedexpansion - -SET command=%* - -:: Resolve the `${pwd}` placeholders -SET command=!command:${pwd}=%CD%! - -:: Strip out the leading `--` argument. -SET command=!command:~3! - -:: Find the rustc.exe argument and sanitize it's path -for %%A in (%*) do ( - SET arg=%%~A - if "!arg:~-9!"=="rustc.exe" ( - SET sanitized=!arg:/=\! - - SET command=!sanitized! !command:%%~A=! - goto :break - ) -) - -:break - -%command% - -:: Capture the exit code of rustc.exe -SET exit_code=!errorlevel! - -:: Exit with the same exit code -EXIT /b %exit_code% diff --git a/util/process_wrapper/private/process_wrapper.sh b/util/process_wrapper/private/process_wrapper.sh deleted file mode 100755 index 97b3478c9f..0000000000 --- a/util/process_wrapper/private/process_wrapper.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash - -set -euo pipefail - -# Skip the first argument which is expected to be `--` -shift - -args=() - -for arg in "$@"; do - # Check if the argument contains "${PWD}" and replace it with the actual value of PWD - if [[ "${arg}" == *'${pwd}'* ]]; then - arg="${arg//\$\{pwd\}/$PWD}" - fi - args+=("${arg}") -done - -exec "${args[@]}" From c24d82ee9fa9b86bc18be0d8688bbff33e905bde Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Wed, 18 Feb 2026 09:09:52 -0500 Subject: [PATCH 10/23] Attempt to fix CopyFile for windows --- MODULE.bazel | 1 + cargo/private/BUILD.bazel | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/MODULE.bazel b/MODULE.bazel index 9876238c48..5fce02f767 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -9,6 +9,7 @@ module( ## Core ############################################################################### +bazel_dep(name = "bazel_lib", version = "3.0.0") bazel_dep(name = "bazel_features", version = "1.32.0") bazel_dep(name = "bazel_skylib", version = "1.8.2") bazel_dep(name = "platforms", version = "1.0.0") diff --git a/cargo/private/BUILD.bazel b/cargo/private/BUILD.bazel index fd60c4ff62..536490441d 100644 --- a/cargo/private/BUILD.bazel +++ b/cargo/private/BUILD.bazel @@ -1,5 +1,5 @@ +load("@bazel_lib//lib:copy_file.bzl", "copy_file") load("@bazel_skylib//:bzl_library.bzl", "bzl_library") -load("@bazel_skylib//rules:copy_file.bzl", "copy_file") load("//rust:defs.bzl", "rust_binary") rust_binary( From 53fb4fa8710c471f23b370e28d60447224c70814 Mon Sep 17 00:00:00 2001 From: isaacparker0 <128327439+isaacparker0@users.noreply.github.com> Date: Fri, 20 Feb 2026 09:19:32 -0500 Subject: [PATCH 11/23] Apply lint config in exec configuration (#2) --- rust/private/rustc.bzl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/rust/private/rustc.bzl b/rust/private/rustc.bzl index dd63dc1a02..88fd36c248 100644 --- a/rust/private/rustc.bzl +++ b/rust/private/rustc.bzl @@ -1352,11 +1352,8 @@ def rustc_compile_action( stamp = is_stamping_enabled(attr) # Add flags for any 'rustc' lints that are specified. - # - # Exclude lints if we're building in the exec configuration to prevent crates - # used in build scripts from generating warnings. lint_files = [] - if hasattr(ctx.attr, "lint_config") and ctx.attr.lint_config and not is_exec_configuration(ctx): + if hasattr(ctx.attr, "lint_config") and ctx.attr.lint_config: rust_flags = rust_flags + ctx.attr.lint_config[LintsInfo].rustc_lint_flags lint_files = lint_files + ctx.attr.lint_config[LintsInfo].rustc_lint_files From 775d23ae5b522fc23543c6b793fd721481519a54 Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Fri, 20 Feb 2026 09:43:40 -0500 Subject: [PATCH 12/23] Fix up rules_rust bzl_library targets --- cargo/private/BUILD.bazel | 5 ++++- rust/platform/BUILD.bazel | 1 - rust/private/BUILD.bazel | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cargo/private/BUILD.bazel b/cargo/private/BUILD.bazel index 536490441d..db04afa7b1 100644 --- a/cargo/private/BUILD.bazel +++ b/cargo/private/BUILD.bazel @@ -39,6 +39,9 @@ copy_file( bzl_library( name = "bzl_lib", + deps = [ + "//rust:bzl_lib", + ], srcs = glob(["**/*.bzl"]), - visibility = ["//:__subpackages__"], + visibility = ["//visibility:public"], ) diff --git a/rust/platform/BUILD.bazel b/rust/platform/BUILD.bazel index d5513b1e79..91b095ebed 100644 --- a/rust/platform/BUILD.bazel +++ b/rust/platform/BUILD.bazel @@ -31,5 +31,4 @@ package_group( bzl_library( name = "bzl_lib", srcs = glob(["**/*.bzl"]), - visibility = ["//rust:__subpackages__"], ) diff --git a/rust/private/BUILD.bazel b/rust/private/BUILD.bazel index d18e895493..d3cea4650d 100644 --- a/rust/private/BUILD.bazel +++ b/rust/private/BUILD.bazel @@ -22,7 +22,7 @@ bzl_library( bzl_library( name = "bzl_lib", srcs = glob(["**/*.bzl"]), - visibility = ["//rust:__subpackages__"], + visibility = ["//visibility:public"], deps = [ ":bazel_tools_bzl_lib", ":rules_cc_bzl_lib", From 81cbd5d1b2e827ea0ccea3207bf92ac1695c0beb Mon Sep 17 00:00:00 2001 From: isaacparker0 <128327439+isaacparker0@users.noreply.github.com> Date: Fri, 20 Feb 2026 13:32:15 -0500 Subject: [PATCH 13/23] rust-analyzer: include Bazel package dir in crate source include_dirs (#3) * 0 * Add rust analyzer test coverage --- rust/private/rust_analyzer.bzl | 9 +++ .../rust_project_json_test.rs | 32 ++++---- .../BUILD.bazel | 29 +++++++ .../subdir_test_crates_same_package/lib.rs | 1 + .../rust_project_json_test.rs | 76 +++++++++++++++++++ .../subdir/subdir_test.rs | 5 ++ 6 files changed, 138 insertions(+), 14 deletions(-) create mode 100644 test/rust_analyzer/subdir_test_crates_same_package/BUILD.bazel create mode 100644 test/rust_analyzer/subdir_test_crates_same_package/lib.rs create mode 100644 test/rust_analyzer/subdir_test_crates_same_package/rust_project_json_test.rs create mode 100644 test/rust_analyzer/subdir_test_crates_same_package/subdir/subdir_test.rs diff --git a/rust/private/rust_analyzer.bzl b/rust/private/rust_analyzer.bzl index da41867985..1dd8244c98 100644 --- a/rust/private/rust_analyzer.bzl +++ b/rust/private/rust_analyzer.bzl @@ -257,6 +257,15 @@ def _create_single_crate(ctx, attrs, info): if info.crate.root.short_path in src_map: crate["root_module"] = _WORKSPACE_TEMPLATE + src_map[info.crate.root.short_path].path crate["source"]["include_dirs"].append(path_prefix + info.crate.root.dirname) + + # Ensure workspace crates in the same Bazel package share one source root. + # + # rust-analyzer picks candidate crates by source root (`relevant_crates`). + # Widening include_dirs at the package level keeps related crates in a + # shared candidate set; final membership is still resolved by each crate's + # module tree. + if not is_external: + crate["source"]["include_dirs"].append(_WORKSPACE_TEMPLATE + ctx.label.package) if info.build_info != None and info.build_info.out_dir != None: out_dir_path = info.build_info.out_dir.path diff --git a/test/rust_analyzer/generated_srcs_test/rust_project_json_test.rs b/test/rust_analyzer/generated_srcs_test/rust_project_json_test.rs index d9ce7579c5..2138c62e19 100644 --- a/test/rust_analyzer/generated_srcs_test/rust_project_json_test.rs +++ b/test/rust_analyzer/generated_srcs_test/rust_project_json_test.rs @@ -2,11 +2,12 @@ mod tests { use serde::Deserialize; use std::env; + use std::fs; + use std::path::Path; use std::path::PathBuf; #[derive(Deserialize)] struct Project { - sysroot_src: String, crates: Vec, } @@ -25,22 +26,12 @@ mod tests { #[test] fn test_generated_srcs() { let rust_project_path = PathBuf::from(env::var("RUST_PROJECT_JSON").unwrap()); + let rust_project_path = fs::canonicalize(&rust_project_path).unwrap(); let content = std::fs::read_to_string(&rust_project_path) .unwrap_or_else(|_| panic!("couldn't open {:?}", &rust_project_path)); let project: Project = serde_json::from_str(&content).expect("Failed to deserialize project JSON"); - // /tmp/_bazel/12345678/external/tools/rustlib/library => /tmp/_bazel - let output_base = project - .sysroot_src - .rsplitn(2, "/external/") - .last() - .unwrap() - .rsplitn(2, '/') - .last() - .unwrap(); - println!("output_base: {output_base}"); - let with_gen = project .crates .iter() @@ -50,7 +41,20 @@ mod tests { assert!(with_gen.root_module.ends_with("/lib.rs")); let include_dirs = &with_gen.source.as_ref().unwrap().include_dirs; - assert!(include_dirs.len() == 1); - assert!(include_dirs[0].starts_with(output_base)); + assert_eq!(include_dirs.len(), 2); + + let root_module_parent = Path::new(&with_gen.root_module).parent().unwrap(); + let workspace_dir = rust_project_path.parent().unwrap(); + + assert!( + include_dirs.iter().any(|p| Path::new(p) == root_module_parent), + "expected include_dirs to contain root_module parent, got include_dirs={include_dirs:?}, root_module={}", + with_gen.root_module, + ); + assert!( + include_dirs.iter().any(|p| Path::new(p) == workspace_dir), + "expected include_dirs to contain workspace dir, got include_dirs={include_dirs:?}, workspace_dir={}", + workspace_dir.display(), + ); } } diff --git a/test/rust_analyzer/subdir_test_crates_same_package/BUILD.bazel b/test/rust_analyzer/subdir_test_crates_same_package/BUILD.bazel new file mode 100644 index 0000000000..eac2fe6e4e --- /dev/null +++ b/test/rust_analyzer/subdir_test_crates_same_package/BUILD.bazel @@ -0,0 +1,29 @@ +load("@rules_rust//rust:defs.bzl", "rust_library", "rust_test") + +rust_library( + name = "mylib", + srcs = ["lib.rs"], + edition = "2018", +) + +rust_test( + name = "mylib_test", + srcs = ["subdir/subdir_test.rs"], + edition = "2018", +) + +rust_test( + name = "rust_project_json_test", + srcs = ["rust_project_json_test.rs"], + data = [":rust-project.json"], + edition = "2018", + env = {"RUST_PROJECT_JSON": "$(rootpath :rust-project.json)"}, + # This target is tagged as manual since it's not expected to pass in + # contexts outside of `//test/rust_analyzer:rust_analyzer_test`. Run + # that target to execute this test. + tags = ["manual"], + deps = [ + "//test/rust_analyzer/3rdparty/crates:serde", + "//test/rust_analyzer/3rdparty/crates:serde_json", + ], +) diff --git a/test/rust_analyzer/subdir_test_crates_same_package/lib.rs b/test/rust_analyzer/subdir_test_crates_same_package/lib.rs new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/test/rust_analyzer/subdir_test_crates_same_package/lib.rs @@ -0,0 +1 @@ + diff --git a/test/rust_analyzer/subdir_test_crates_same_package/rust_project_json_test.rs b/test/rust_analyzer/subdir_test_crates_same_package/rust_project_json_test.rs new file mode 100644 index 0000000000..77a99d75ed --- /dev/null +++ b/test/rust_analyzer/subdir_test_crates_same_package/rust_project_json_test.rs @@ -0,0 +1,76 @@ +#[cfg(test)] +mod tests { + use serde::Deserialize; + use std::collections::BTreeSet; + use std::env; + use std::path::PathBuf; + + #[derive(Deserialize)] + struct Project { + crates: Vec, + } + + #[derive(Deserialize)] + struct Crate { + root_module: String, + is_workspace_member: Option, + source: Option, + } + + #[derive(Deserialize)] + struct Source { + include_dirs: Vec, + } + + fn normalize(path: &str) -> String { + path.trim_end_matches('/').to_owned() + } + + #[test] + fn test_same_package_crates_share_include_dir() { + let rust_project_path = PathBuf::from(env::var("RUST_PROJECT_JSON").unwrap()); + let content = std::fs::read_to_string(&rust_project_path) + .unwrap_or_else(|_| panic!("couldn't open {:?}", &rust_project_path)); + let project: Project = + serde_json::from_str(&content).expect("Failed to deserialize project JSON"); + + let lib = project + .crates + .iter() + .find(|c| c.is_workspace_member == Some(true) && c.root_module.ends_with("/lib.rs")) + .expect("missing library crate"); + let test = project + .crates + .iter() + .find(|c| { + c.is_workspace_member == Some(true) + && c.root_module.ends_with("/subdir/subdir_test.rs") + }) + .expect("missing subdir test crate"); + + let lib_include_dirs: BTreeSet<_> = lib + .source + .as_ref() + .expect("lib crate missing source field") + .include_dirs + .iter() + .map(|p| normalize(p)) + .collect(); + let test_include_dirs: BTreeSet<_> = test + .source + .as_ref() + .expect("test crate missing source field") + .include_dirs + .iter() + .map(|p| normalize(p)) + .collect(); + + let shared_dir = lib_include_dirs + .intersection(&test_include_dirs) + .next() + .expect("expected crates in same package to share an include_dir"); + + assert!(lib.root_module.starts_with(&format!("{}/", shared_dir))); + assert!(test.root_module.starts_with(&format!("{}/", shared_dir))); + } +} diff --git a/test/rust_analyzer/subdir_test_crates_same_package/subdir/subdir_test.rs b/test/rust_analyzer/subdir_test_crates_same_package/subdir/subdir_test.rs new file mode 100644 index 0000000000..915d7c6130 --- /dev/null +++ b/test/rust_analyzer/subdir_test_crates_same_package/subdir/subdir_test.rs @@ -0,0 +1,5 @@ +#[test] +fn test_subdir_fixture() { + let marker = String::from("ok"); + assert_eq!(marker.len(), 2); +} From fa7a428858fc289fcd486e3eece529af98413182 Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Tue, 9 Sep 2025 09:19:46 -0400 Subject: [PATCH 14/23] Improve proc_macro_deps ergonomics --- .../private/wasm_bindgen_test.bzl | 7 +- rust/defs.bzl | 26 ++++-- rust/private/rust.bzl | 81 +++++++------------ rust/private/rustc.bzl | 2 +- rust/private/rustdoc/BUILD.bazel | 2 +- rust/private/rustdoc_test.bzl | 8 +- rust/private/utils.bzl | 28 ++++++- 7 files changed, 85 insertions(+), 69 deletions(-) diff --git a/extensions/wasm_bindgen/private/wasm_bindgen_test.bzl b/extensions/wasm_bindgen/private/wasm_bindgen_test.bzl index 8a2dcb5738..ee8591d71d 100644 --- a/extensions/wasm_bindgen/private/wasm_bindgen_test.bzl +++ b/extensions/wasm_bindgen/private/wasm_bindgen_test.bzl @@ -13,6 +13,7 @@ load( "@rules_rust//rust/private:utils.bzl", "determine_output_hash", "expand_dict_value_locations", + "filter_deps", "find_toolchain", "generate_output_diagnostics", "get_import_macro_deps", @@ -64,8 +65,10 @@ def _rust_wasm_bindgen_test_impl(ctx): toolchain = find_toolchain(ctx) crate_type = "bin" - deps = transform_deps(ctx.attr.deps + [wb_toolchain.wasm_bindgen_test]) - proc_macro_deps = transform_deps(ctx.attr.proc_macro_deps + get_import_macro_deps(ctx)) + + deps, proc_macro_deps = filter_deps(ctx) + deps = transform_deps(deps + [wb_toolchain.wasm_bindgen_test]) + proc_macro_deps = transform_deps(proc_macro_deps + get_import_macro_deps(ctx)) # Target is building the crate in `test` config if WasmBindgenTestCrateInfo in ctx.attr.wasm: diff --git a/rust/defs.bzl b/rust/defs.bzl index 4f2ef72582..09be66d253 100644 --- a/rust/defs.bzl +++ b/rust/defs.bzl @@ -78,25 +78,37 @@ load( _rust_unpretty_aspect = "rust_unpretty_aspect", ) -rust_library = _rust_library +def _rule_wrapper(rule): + def _wrapped(name, deps = [], proc_macro_deps = [], **kwargs): + rule( + name = name, + deps = deps + proc_macro_deps, + # TODO(zbarsky): This attribute would ideally be called `exec_configured_deps` or similar. + proc_macro_deps = deps + proc_macro_deps, + **kwargs + ) + + return _wrapped + +rust_library = _rule_wrapper(_rust_library) # See @rules_rust//rust/private:rust.bzl for a complete description. -rust_static_library = _rust_static_library +rust_static_library = _rule_wrapper(_rust_static_library) # See @rules_rust//rust/private:rust.bzl for a complete description. -rust_shared_library = _rust_shared_library +rust_shared_library = _rule_wrapper(_rust_shared_library) # See @rules_rust//rust/private:rust.bzl for a complete description. -rust_proc_macro = _rust_proc_macro +rust_proc_macro = _rule_wrapper(_rust_proc_macro) # See @rules_rust//rust/private:rust.bzl for a complete description. -rust_binary = _rust_binary +rust_binary = _rule_wrapper(_rust_binary) # See @rules_rust//rust/private:rust.bzl for a complete description. rust_library_group = _rust_library_group # See @rules_rust//rust/private:rust.bzl for a complete description. -rust_test = _rust_test +rust_test = _rule_wrapper(_rust_test) # See @rules_rust//rust/private:rust.bzl for a complete description. rust_test_suite = _rust_test_suite @@ -105,7 +117,7 @@ rust_test_suite = _rust_test_suite rust_doc = _rust_doc # See @rules_rust//rust/private:rustdoc.bzl for a complete description. -rust_doc_test = _rust_doc_test +rust_doc_test = _rule_wrapper(_rust_doc_test) # See @rules_rust//rust/private:rustdoc_test.bzl for a complete description. clippy_flag = _clippy_flag diff --git a/rust/private/rust.bzl b/rust/private/rust.bzl index dcef44424f..0307f47098 100644 --- a/rust/private/rust.bzl +++ b/rust/private/rust.bzl @@ -23,6 +23,7 @@ load( "BuildInfo", "CrateGroupInfo", "CrateInfo", + "DepInfo", "LintsInfo", ) load( @@ -46,6 +47,7 @@ load( "determine_lib_name", "determine_output_hash", "expand_dict_value_locations", + "filter_deps", "find_toolchain", "generate_output_diagnostics", "get_edition", @@ -64,41 +66,6 @@ def _assert_no_deprecated_attributes(_ctx): """ pass -def _assert_correct_dep_mapping(ctx): - """Forces a failure if proc_macro_deps and deps are mixed inappropriately - - Args: - ctx (ctx): The current rule's context object - """ - for dep in ctx.attr.deps: - if rust_common.crate_info in dep: - if dep[rust_common.crate_info].type == "proc-macro": - fail( - "{} listed {} in its deps, but it is a proc-macro. It should instead be in the bazel property proc_macro_deps.".format( - ctx.label, - dep.label, - ), - ) - for dep in ctx.attr.proc_macro_deps: - if CrateInfo in dep: - types = [dep[CrateInfo].type] - else: - types = [ - dep_variant_info.crate_info.type - for dep_variant_info in dep[CrateGroupInfo].dep_variant_infos.to_list() - if dep_variant_info.crate_info - ] - - for type in types: - if type != "proc-macro": - fail( - "{} listed {} in its proc_macro_deps, but it is not proc-macro, it is a {}. It should probably instead be listed in deps.".format( - ctx.label, - dep.label, - type, - ), - ) - def _rust_library_impl(ctx): """The implementation of the `rust_library` rule. @@ -168,7 +135,7 @@ def _rust_library_common(ctx, crate_type): list: A list of providers. See `rustc_compile_action` """ _assert_no_deprecated_attributes(ctx) - _assert_correct_dep_mapping(ctx) + deps, proc_macro_deps = filter_deps(ctx) toolchain = find_toolchain(ctx) @@ -215,8 +182,8 @@ def _rust_library_common(ctx, crate_type): not ctx.attr.disable_pipelining ) - deps = transform_deps(ctx.attr.deps) - proc_macro_deps = transform_deps(ctx.attr.proc_macro_deps + get_import_macro_deps(ctx)) + deps = transform_deps(deps) + proc_macro_deps = transform_deps(proc_macro_deps + get_import_macro_deps(ctx)) return rustc_compile_action( ctx = ctx, @@ -259,7 +226,7 @@ def _rust_binary_impl(ctx): """ toolchain = find_toolchain(ctx) crate_name = compute_crate_name(ctx.workspace_name, ctx.label, toolchain, ctx.attr.crate_name) - _assert_correct_dep_mapping(ctx) + deps, proc_macro_deps = filter_deps(ctx) if ctx.attr.binary_name: output_filename = ctx.attr.binary_name @@ -267,8 +234,8 @@ def _rust_binary_impl(ctx): output_filename = ctx.label.name output = ctx.actions.declare_file(output_filename + toolchain.binary_ext) - deps = transform_deps(ctx.attr.deps) - proc_macro_deps = transform_deps(ctx.attr.proc_macro_deps + get_import_macro_deps(ctx)) + deps = transform_deps(deps) + proc_macro_deps = transform_deps(proc_macro_deps + get_import_macro_deps(ctx)) crate_root = getattr(ctx.file, "crate_root", None) if not crate_root: @@ -349,13 +316,13 @@ def _rust_test_impl(ctx): list: The list of providers. See `rustc_compile_action` """ _assert_no_deprecated_attributes(ctx) - _assert_correct_dep_mapping(ctx) + deps, proc_macro_deps = filter_deps(ctx) toolchain = find_toolchain(ctx) crate_type = "bin" - deps = transform_deps(ctx.attr.deps) - proc_macro_deps = transform_deps(ctx.attr.proc_macro_deps + get_import_macro_deps(ctx)) + deps = transform_deps(deps) + proc_macro_deps = transform_deps(proc_macro_deps + get_import_macro_deps(ctx)) if ctx.attr.crate and ctx.attr.srcs: fail("rust_test.crate and rust_test.srcs are mutually exclusive. Update {} to use only one of these attributes".format( @@ -557,16 +524,16 @@ def _rust_library_group_impl(ctx): runfiles = [] for dep in ctx.attr.deps: - if rust_common.crate_info in dep: + if CrateInfo in dep: dep_variant_infos.append(rust_common.dep_variant_info( - crate_info = dep[rust_common.crate_info] if rust_common.crate_info in dep else None, - dep_info = dep[rust_common.dep_info] if rust_common.crate_info in dep else None, + crate_info = dep[CrateInfo] if CrateInfo in dep else None, + dep_info = dep[DepInfo] if DepInfo in dep else None, build_info = dep[BuildInfo] if BuildInfo in dep else None, cc_info = dep[CcInfo] if CcInfo in dep else None, crate_group_info = None, )) - elif rust_common.crate_group_info in dep: - dep_variant_transitive_infos.append(dep[rust_common.crate_group_info].dep_variant_infos) + elif CrateGroupInfo in dep: + dep_variant_transitive_infos.append(dep[CrateGroupInfo].dep_variant_infos) else: fail("crate_group_info targets can only depend on rust_library or rust_library_group targets.") @@ -742,10 +709,12 @@ _common_attrs = { # `@local_config_platform//:exec` exposed. "proc_macro_deps": attr.label_list( doc = dedent("""\ - List of `rust_proc_macro` targets used to help build this library target. + Copy of deps in exec configuration. This should really be called `exec_configured_deps`. + + Rule implementations use this to select exec-configured `rust_proc_macro` targets. + User code should pass all deps to `deps` for the macros loaded from `defs.bzl`. """), cfg = "exec", - providers = [[CrateInfo], [CrateGroupInfo]], ), "require_explicit_unstable_features": attr.int( doc = ( @@ -1353,7 +1322,9 @@ rust_binary_without_process_wrapper = rule( implementation = _rust_binary_without_process_wrapper_impl, doc = "A variant of `rust_binary` that uses a minimal process wrapper for `Rustc` actions.", provides = COMMON_PROVIDERS + [_RustBuiltWithoutProcessWrapperInfo], - attrs = _common_attrs_for_binary_without_process_wrapper(_common_attrs | _rust_binary_attrs), + attrs = _common_attrs_for_binary_without_process_wrapper(_common_attrs | _rust_binary_attrs | { + "_skip_deps_verification": attr.bool(default = True), + }), executable = True, fragments = ["cpp"], toolchains = [ @@ -1597,7 +1568,7 @@ rust_test = rule( """), ) -def rust_test_suite(name, srcs, shared_srcs = [], **kwargs): +def rust_test_suite(name, srcs, shared_srcs = [], deps = [], proc_macro_deps = [], **kwargs): """A rule for creating a test suite for a set of `rust_test` targets. This rule can be used for setting up typical rust [integration tests][it]. Given the following @@ -1650,6 +1621,8 @@ def rust_test_suite(name, srcs, shared_srcs = [], **kwargs): name (str): The name of the `test_suite`. srcs (list): All test sources, typically `glob(["tests/**/*.rs"])`. shared_srcs (list): Optional argument for sources shared among tests, typically helper functions. + deps (list): Deps and proc_macro_deps for underlying test. + proc_macro_deps (list): Deprecated; do not use. **kwargs (dict): Additional keyword arguments for the underlying [rust_test](#rust_test) targets. The `tags` argument is also passed to the generated `test_suite` target. """ @@ -1680,6 +1653,8 @@ def rust_test_suite(name, srcs, shared_srcs = [], **kwargs): srcs = [src] + shared_srcs, tags = tags, crate_name = crate_name, + deps = deps + proc_macro_deps, + proc_macro_deps = deps + proc_macro_deps, **kwargs ) tests.append(test_name) diff --git a/rust/private/rustc.bzl b/rust/private/rustc.bzl index 88fd36c248..cff7567c58 100644 --- a/rust/private/rustc.bzl +++ b/rust/private/rustc.bzl @@ -226,7 +226,7 @@ def collect_deps( Args: deps (list): The deps from ctx.attr.deps. - proc_macro_deps (list): The proc_macro deps from ctx.attr.proc_macro_deps. + proc_macro_deps (list): The proc_macro deps from `filter_deps(ctx)`. aliases (dict): A dict mapping aliased targets to their actual Crate information. Returns: diff --git a/rust/private/rustdoc/BUILD.bazel b/rust/private/rustdoc/BUILD.bazel index ee2067a87d..85cda1cd7e 100644 --- a/rust/private/rustdoc/BUILD.bazel +++ b/rust/private/rustdoc/BUILD.bazel @@ -1,4 +1,4 @@ -load("//rust/private:rust.bzl", "rust_binary") +load("//rust:defs.bzl", "rust_binary") package(default_visibility = ["//visibility:public"]) diff --git a/rust/private/rustdoc_test.bzl b/rust/private/rustdoc_test.bzl index 522c048cd1..dcf847f10a 100644 --- a/rust/private/rustdoc_test.bzl +++ b/rust/private/rustdoc_test.bzl @@ -18,7 +18,7 @@ load("@rules_cc//cc/common:cc_info.bzl", "CcInfo") load("//rust/private:common.bzl", "rust_common") load("//rust/private:providers.bzl", "CrateInfo") load("//rust/private:rustdoc.bzl", "rustdoc_compile_action") -load("//rust/private:utils.bzl", "dedent", "find_toolchain", "transform_deps") +load("//rust/private:utils.bzl", "dedent", "filter_deps", "find_toolchain", "transform_deps") def _construct_writer_arguments(ctx, test_runner, opt_test_params, action, crate_info): """Construct arguments and environment variables specific to `rustdoc_test_writer`. @@ -110,8 +110,10 @@ def _rust_doc_test_impl(ctx): toolchain = find_toolchain(ctx) crate = ctx.attr.crate[rust_common.crate_info] - deps = transform_deps(ctx.attr.deps) - proc_macro_deps = transform_deps(ctx.attr.proc_macro_deps) + + deps, proc_macro_deps = filter_deps(ctx) + deps = transform_deps(deps) + proc_macro_deps = transform_deps(proc_macro_deps) crate_info = rust_common.create_crate_info( name = crate.name, diff --git a/rust/private/utils.bzl b/rust/private/utils.bzl index b3f5b8437c..4d710b7fa9 100644 --- a/rust/private/utils.bzl +++ b/rust/private/utils.bzl @@ -511,14 +511,38 @@ def is_exec_configuration(ctx): # TODO(djmarcin): Is there any better way to determine cfg=exec? return ctx.genfiles_dir.path.find("-exec") != -1 +def filter_deps(ctx): + """Filters the provided (combined) deps into normal deps and proc_macro deps. + + Args: + ctx (ctx): The current rule's context object + + Returns: + deps and proc_macro_deps + """ + if len(ctx.attr.deps) != len(ctx.attr.proc_macro_deps) and not getattr(ctx.attr, "_skip_deps_verification", False): + fail("All deps should be passed to both `deps` and `proc_macro_deps`; please use the macros in //rust:defs.bzl") + + deps = [] + for dep in ctx.attr.deps: + if CrateInfo not in dep or dep[CrateInfo].type != "proc-macro": + deps.append(dep) + + proc_macro_deps = [] + for dep in ctx.attr.proc_macro_deps: + if CrateInfo in dep and dep[CrateInfo].type == "proc-macro": + proc_macro_deps.append(dep) + + return deps, proc_macro_deps + def transform_deps(deps): """Transforms a [Target] into [DepVariantInfo]. - This helper function is used to transform ctx.attr.deps and ctx.attr.proc_macro_deps into + This helper function is used to transform deps and .proc_macro_deps coming from `filter_deps` into [DepVariantInfo]. Args: - deps (list of Targets): Dependencies coming from ctx.attr.deps or ctx.attr.proc_macro_deps + deps (list of Targets): Dependencies coming from `filter_deps` Returns: list of DepVariantInfos. From e1d254a3aa9650bf457b7f03999b49ffb8aa67e4 Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Wed, 25 Feb 2026 04:15:34 -0500 Subject: [PATCH 15/23] Always use param file for process wrapper --- rust/private/rustc.bzl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rust/private/rustc.bzl b/rust/private/rustc.bzl index cff7567c58..87c6cdd5c5 100644 --- a/rust/private/rustc.bzl +++ b/rust/private/rustc.bzl @@ -911,6 +911,7 @@ def construct_arguments( force_depend_on_objects = False, skip_expanding_rustc_env = False, require_explicit_unstable_features = False, + always_use_param_file = False, error_format = None): """Builds an Args object containing common rustc flags @@ -1016,7 +1017,7 @@ def construct_arguments( # Rustc arguments rustc_flags = ctx.actions.args() rustc_flags.set_param_file_format("multiline") - rustc_flags.use_param_file("@%s", use_always = False) + rustc_flags.use_param_file("@%s", use_always = always_use_param_file) rustc_flags.add(crate_info.root) rustc_flags.add(crate_info.name, format = "--crate-name=%s") rustc_flags.add(crate_info.type, format = "--crate-type=%s") @@ -1420,6 +1421,7 @@ def rustc_compile_action( use_json_output = bool(build_metadata) or bool(rustc_output) or bool(rustc_rmeta_output), skip_expanding_rustc_env = skip_expanding_rustc_env, require_explicit_unstable_features = require_explicit_unstable_features, + always_use_param_file = not ctx.executable._process_wrapper, ) args_metadata = None From c69567dd6ba28a99df1b9f6304bc9f603594e85b Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Thu, 26 Feb 2026 10:49:57 -0500 Subject: [PATCH 16/23] Avoid hashing RustAnalyzerInfo in rust_analyzer alias mapping --- rust/private/rust_analyzer.bzl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/rust/private/rust_analyzer.bzl b/rust/private/rust_analyzer.bzl index 1dd8244c98..12bc6fa5ab 100644 --- a/rust/private/rust_analyzer.bzl +++ b/rust/private/rust_analyzer.bzl @@ -132,10 +132,13 @@ def _rust_analyzer_aspect_impl(target, ctx): else: fail("Unexpected target type: {}".format(target)) - aliases = {} + # Keep aliases as a list of (RustAnalyzerInfo, alias_name) tuples. + # Using RustAnalyzerInfo as a dict key can trigger expensive recursive hashing. + aliases = [] for aliased_target, aliased_name in getattr(ctx.rule.attr, "aliases", {}).items(): - if aliased_target.label in labels_to_rais: - aliases[labels_to_rais[aliased_target.label]] = aliased_name + dep_info = labels_to_rais.get(aliased_target.label) + if dep_info: + aliases.append((dep_info, aliased_name)) proc_macro_dylib = find_proc_macro_dylib(toolchain, target) proc_macro_dylibs = [proc_macro_dylib] if proc_macro_dylib else None @@ -294,7 +297,7 @@ def _create_single_crate(ctx, attrs, info): # the crate being processed, we don't add it as a dependency to itself. This is # common and expected - `rust_test.crate` pointing to the `rust_library`. crate["deps"] = [_crate_id(dep.crate) for dep in info.deps if _crate_id(dep.crate) != crate_id] - crate["aliases"] = {_crate_id(alias_target.crate): alias_name for alias_target, alias_name in info.aliases.items()} + crate["aliases"] = {_crate_id(alias_target.crate): alias_name for alias_target, alias_name in info.aliases} crate["cfg"] = info.cfgs toolchain = find_toolchain(ctx) crate["target"] = (_EXEC_ROOT_TEMPLATE + toolchain.target_json.path) if toolchain.target_json else toolchain.target_flag_value From 1272c031f3f48beec843ec27cc49d2db852b3ff3 Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Thu, 26 Feb 2026 11:33:43 -0500 Subject: [PATCH 17/23] Convert wrappers to symbolic macros --- rust/defs.bzl | 27 ++++++++++++++++++++++++--- rust/rust_binary.bzl | 2 +- rust/rust_library.bzl | 2 +- rust/rust_test.bzl | 2 +- 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/rust/defs.bzl b/rust/defs.bzl index 09be66d253..8c6c14813d 100644 --- a/rust/defs.bzl +++ b/rust/defs.bzl @@ -14,6 +14,7 @@ """Public entry point to all Rust rules and supported APIs.""" +load("@bazel_features//:features.bzl", "bazel_features") load( "//rust:toolchain.bzl", _rust_stdlib_filegroup = "rust_stdlib_filegroup", @@ -90,7 +91,27 @@ def _rule_wrapper(rule): return _wrapped -rust_library = _rule_wrapper(_rust_library) +def _symbolic_rule_wrapper(rule, macro_fn): + def _wrapped(name, visibility, deps, proc_macro_deps, **kwargs): + rule( + name = name, + visibility = visibility, + deps = deps + proc_macro_deps, + # TODO(zbarsky): This attribute would ideally be called `exec_configured_deps` or similar. + proc_macro_deps = deps + proc_macro_deps, + **kwargs + ) + + return macro_fn( + implementation = _wrapped, + inherit_attrs = rule, + attrs = { + "deps": attr.label_list(default = []), + "proc_macro_deps": attr.label_list(default = []), + }, + ) + +rust_library = _symbolic_rule_wrapper(_rust_library, bazel_features.globals.macro) if bazel_features.globals.macro else _rule_wrapper(_rust_library) # See @rules_rust//rust/private:rust.bzl for a complete description. rust_static_library = _rule_wrapper(_rust_static_library) @@ -102,13 +123,13 @@ rust_shared_library = _rule_wrapper(_rust_shared_library) rust_proc_macro = _rule_wrapper(_rust_proc_macro) # See @rules_rust//rust/private:rust.bzl for a complete description. -rust_binary = _rule_wrapper(_rust_binary) +rust_binary = _symbolic_rule_wrapper(_rust_binary, bazel_features.globals.macro) if bazel_features.globals.macro else _rule_wrapper(_rust_binary) # See @rules_rust//rust/private:rust.bzl for a complete description. rust_library_group = _rust_library_group # See @rules_rust//rust/private:rust.bzl for a complete description. -rust_test = _rule_wrapper(_rust_test) +rust_test = _symbolic_rule_wrapper(_rust_test, bazel_features.globals.macro) if bazel_features.globals.macro else _rule_wrapper(_rust_test) # See @rules_rust//rust/private:rust.bzl for a complete description. rust_test_suite = _rust_test_suite diff --git a/rust/rust_binary.bzl b/rust/rust_binary.bzl index dbeba03911..518c7c3f01 100644 --- a/rust/rust_binary.bzl +++ b/rust/rust_binary.bzl @@ -1,7 +1,7 @@ """rust_binary""" load( - "//rust/private:rust.bzl", + "//rust:defs.bzl", _rust_binary = "rust_binary", ) diff --git a/rust/rust_library.bzl b/rust/rust_library.bzl index b1e63494a6..a694ed970c 100644 --- a/rust/rust_library.bzl +++ b/rust/rust_library.bzl @@ -1,7 +1,7 @@ """rust_library""" load( - "//rust/private:rust.bzl", + "//rust:defs.bzl", _rust_library = "rust_library", ) diff --git a/rust/rust_test.bzl b/rust/rust_test.bzl index 001963fccc..abd61ba8c5 100644 --- a/rust/rust_test.bzl +++ b/rust/rust_test.bzl @@ -1,7 +1,7 @@ """rust_test""" load( - "//rust/private:rust.bzl", + "//rust:defs.bzl", _rust_test = "rust_test", ) From 8e568ba389c0e1115134f80ea75938c652822095 Mon Sep 17 00:00:00 2001 From: Titouan BION Date: Sat, 28 Feb 2026 08:47:56 +0100 Subject: [PATCH 18/23] Add missing system keys to `triple_mappings.bzl` (#5) --- rust/platform/triple_mappings.bzl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/rust/platform/triple_mappings.bzl b/rust/platform/triple_mappings.bzl index d6265f0a51..72dd64225c 100644 --- a/rust/platform/triple_mappings.bzl +++ b/rust/platform/triple_mappings.bzl @@ -179,6 +179,7 @@ _SYSTEM_TO_BINARY_EXT = { "ios": "", "linux": "", "macos": "", + "netbsd": "", "nixos": "", "none": "", "nto": "", @@ -187,6 +188,7 @@ _SYSTEM_TO_BINARY_EXT = { # generated extension for the wasm target, similarly to the # windows target "unknown": ".wasm", + "threads": ".wasm", "wasi": ".wasm", "wasip1": ".wasm", "wasip2": ".wasm", @@ -204,11 +206,13 @@ _SYSTEM_TO_STATICLIB_EXT = { "ios": ".a", "linux": ".a", "macos": ".a", + "netbsd": ".a", "nixos": ".a", "none": ".a", "nto": ".a", "uefi": ".lib", "unknown": "", + "threads": "", "wasi": "", "wasip1": "", "wasip2": "", @@ -226,11 +230,13 @@ _SYSTEM_TO_DYLIB_EXT = { "ios": ".dylib", "linux": ".so", "macos": ".dylib", + "netbsd": ".so", "nixos": ".so", "none": ".so", "nto": ".a", "uefi": "", # UEFI doesn't have dynamic linking "unknown": ".wasm", + "threads": ".wasm", "wasi": ".wasm", "wasip1": ".wasm", "wasip2": ".wasm", From 333382b874f6c5ebfda36f89c147a0d1b1071737 Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Sat, 28 Feb 2026 12:52:38 -0500 Subject: [PATCH 19/23] Handle toolchain registration when not registered as a bazel_dep --- rust/extensions.bzl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rust/extensions.bzl b/rust/extensions.bzl index 749740032e..b059d6e02e 100644 --- a/rust/extensions.bzl +++ b/rust/extensions.bzl @@ -26,8 +26,6 @@ def _find_modules(module_ctx): our_module = mod if root == None: root = our_module - if our_module == None: - fail("Unable to find rules_rust module") return root, our_module @@ -93,7 +91,9 @@ def _rust_impl(module_ctx): if toolchain_triples.get(repository_set["exec_triple"]) == repository_set["name"]: toolchain_triples.pop(repository_set["exec_triple"], None) - toolchains = root.tags.toolchain or rules_rust.tags.toolchain + toolchains = root.tags.toolchain + if not toolchains and rules_rust: + toolchains = rules_rust.tags.toolchain for toolchain in toolchains: if toolchain.extra_rustc_flags and toolchain.extra_rustc_flags_triples: From 627b4245f4fa260ec0ff1579558b496c85f99f27 Mon Sep 17 00:00:00 2001 From: Walter Gray Date: Wed, 25 Feb 2026 18:32:17 -0800 Subject: [PATCH 20/23] Switch pipelining metadata action to hollow rlib (-Zno-codegen) Replace the --rustc-quit-on-rmeta / .rmeta approach with Buck2-style hollow rlibs: the RustcMetadata action runs rustc to completion with -Zno-codegen, emitting a .rlib archive (named -hollow.rlib) that contains only metadata and optimized MIR. Key fixes: - Main Rustc action uses full rlib --extern deps so the SVH embedded in the full rlib matches what downstream binaries expect (avoiding E0460 with non-deterministic proc macros). - RUSTC_BOOTSTRAP=1 is set on both actions: it changes the crate hash, so inconsistent use would cause SVH mismatch even for deterministic crates. - -Ldependency= points to the _hollow/ subdirectory so the full rlib and hollow rlib never appear in the same search path (avoids E0463). - transitive_metadata_outputs are always included in the sandbox so rustc can resolve transitive -Ldependency= references. Adds an SVH mismatch test that demonstrates the problem with non-deterministic proc macros (uses a HashMap-based derive macro whose output varies by OS-seeded random seed). --- rust/private/rust.bzl | 8 +- rust/private/rustc.bzl | 151 ++++++++++++-- rust/settings/settings.bzl | 19 +- .../pipelined_compilation_test.bzl | 194 ++++++++++++++---- .../svh_mismatch/svh_mismatch_consumer.rs | 6 + .../svh_mismatch/svh_mismatch_lib.rs | 8 + .../svh_mismatch_nondeterministic_macro.rs | 37 ++++ .../svh_mismatch/svh_mismatch_test.rs | 28 +++ test/unit/pipelined_compilation/wrap.bzl | 26 ++- 9 files changed, 409 insertions(+), 68 deletions(-) create mode 100644 test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_consumer.rs create mode 100644 test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_lib.rs create mode 100644 test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_nondeterministic_macro.rs create mode 100644 test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_test.rs diff --git a/rust/private/rust.bzl b/rust/private/rust.bzl index 0307f47098..c7a04a2515 100644 --- a/rust/private/rust.bzl +++ b/rust/private/rust.bzl @@ -172,11 +172,15 @@ def _rust_library_common(ctx, crate_type): crate_type, disable_pipelining = getattr(ctx.attr, "disable_pipelining", False), ): + # The hollow rlib uses .rlib extension (not .rmeta) so rustc reads it as an + # rlib archive containing lib.rmeta with optimized MIR. It is placed in a + # "_hollow/" subdirectory so the full rlib and hollow rlib never appear in the + # same -Ldependency= search directory (which would cause E0463). rust_metadata = ctx.actions.declare_file( - paths.replace_extension(rust_lib_name, ".rmeta"), - sibling = rust_lib, + "_hollow/" + rust_lib_name[:-len(".rlib")] + "-hollow.rlib", ) rustc_rmeta_output = generate_output_diagnostics(ctx, rust_metadata) + metadata_supports_pipelining = ( can_use_metadata_for_pipelining(toolchain, crate_type) and not ctx.attr.disable_pipelining diff --git a/rust/private/rustc.bzl b/rust/private/rustc.bzl index 87c6cdd5c5..c1f58c9716 100644 --- a/rust/private/rustc.bzl +++ b/rust/private/rustc.bzl @@ -663,7 +663,7 @@ def _disambiguate_libs(actions, toolchain, crate_info, dep_info, use_pic): visited_libs[name] = artifact return ambiguous_libs -def _depend_on_metadata(crate_info, force_depend_on_objects): +def _depend_on_metadata(crate_info, force_depend_on_objects, experimental_use_cc_common_link = False): """Determines if we can depend on metadata for this crate. By default (when pipelining is disabled or when the crate type needs to link against @@ -673,9 +673,22 @@ def _depend_on_metadata(crate_info, force_depend_on_objects): In some rare cases, even if both of those conditions are true, we still want to depend on objects. This is what force_depend_on_objects is. + When experimental_use_cc_common_link is True, bin/cdylib crates also use hollow + rlib deps. The rustc step only emits .o files (no rustc linking), so SVH chain + consistency is sufficient; the actual linking is done by cc_common.link, which + does not check SVH. + + Callers are responsible for zeroing out experimental_use_cc_common_link for + exec-platform builds before calling this function (see rustc_compile_action). + Exec-platform binaries (build scripts) must use full rlib deps because their + CcInfo linking contexts may lack a CC toolchain. + Args: crate_info (CrateInfo): The Crate to determine this for. force_depend_on_objects (bool): if set we will not depend on metadata. + experimental_use_cc_common_link (bool): if set, bin/cdylib crates also use + hollow rlib deps for SVH consistency. Must already be False for + exec-platform builds when this function is called. Returns: Whether we can depend on metadata for this crate. @@ -683,6 +696,11 @@ def _depend_on_metadata(crate_info, force_depend_on_objects): if force_depend_on_objects: return False + if experimental_use_cc_common_link and crate_info.type in ("bin", "cdylib"): + # cc_common.link: rustc only emits .o files, so hollow rlib deps are safe and + # keep the SVH chain consistent (avoiding E0460 from nondeterministic proc macros). + return True + return crate_info.type in ("rlib", "lib") def collect_inputs( @@ -770,7 +788,7 @@ def collect_inputs( linkstamp_outs = [] transitive_crate_outputs = dep_info.transitive_crate_outputs - if _depend_on_metadata(crate_info, force_depend_on_objects): + if _depend_on_metadata(crate_info, force_depend_on_objects, experimental_use_cc_common_link): transitive_crate_outputs = dep_info.transitive_metadata_outputs nolinkstamp_compile_direct_inputs = [] @@ -806,6 +824,12 @@ def collect_inputs( transitive = [ crate_info.srcs, transitive_crate_outputs, + # Always include hollow rlibs so they are present in the sandbox for + # -Ldependency= resolution. Binaries and proc-macros compile against full + # rlib --extern deps but need hollow rlibs available for transitive + # dependency resolution when those rlibs were themselves compiled against + # hollow deps. For rlib/lib crates this is a no-op (already included above). + dep_info.transitive_metadata_outputs, crate_info.compile_data, dep_info.transitive_proc_macro_data, toolchain.all_files, @@ -909,6 +933,7 @@ def construct_arguments( use_json_output = False, build_metadata = False, force_depend_on_objects = False, + experimental_use_cc_common_link = False, skip_expanding_rustc_env = False, require_explicit_unstable_features = False, always_use_param_file = False, @@ -1045,8 +1070,14 @@ def construct_arguments( error_format = "json" if build_metadata: - # Configure process_wrapper to terminate rustc when metadata are emitted - process_wrapper_flags.add("--rustc-quit-on-rmeta", "true") + if crate_info.type in ("rlib", "lib"): + # Hollow rlib approach (Buck2-style): rustc runs to completion with -Zno-codegen, + # producing a hollow .rlib (metadata only, no object code) via --emit=link=. + # No need to kill rustc — -Zno-codegen skips codegen entirely and exits quickly. + rustc_flags.add("-Zno-codegen") + + # else: IDE-only metadata for non-rlib types (bin, proc-macro, etc.): rustc exits + # naturally after writing .rmeta via --emit=dep-info,metadata (no kill needed). if crate_info.rustc_rmeta_output: process_wrapper_flags.add("--output-file", crate_info.rustc_rmeta_output.path) elif crate_info.rustc_output: @@ -1079,7 +1110,14 @@ def construct_arguments( emit_without_paths = [] for kind in emit: - if kind == "link" and crate_info.type == "bin" and crate_info.output != None: + if kind == "link" and build_metadata and crate_info.type in ("rlib", "lib") and crate_info.metadata: + # Hollow rlib: direct rustc's link output to the -hollow.rlib path. + # The file has .rlib extension so rustc reads it as an rlib archive + # (with optimized MIR in lib.rmeta). Using a .rmeta path would cause + # E0786 "found invalid metadata files" because rustc parses .rmeta files + # as raw metadata blobs, not rlib archives. + rustc_flags.add(crate_info.metadata, format = "--emit=link=%s") + elif kind == "link" and crate_info.type == "bin" and crate_info.output != None: rustc_flags.add(crate_info.output, format = "--emit=link=%s") else: emit_without_paths.append(kind) @@ -1154,7 +1192,7 @@ def construct_arguments( include_link_flags = include_link_flags, ) - use_metadata = _depend_on_metadata(crate_info, force_depend_on_objects) + use_metadata = _depend_on_metadata(crate_info, force_depend_on_objects, experimental_use_cc_common_link) # These always need to be added, even if not linking this crate. add_crate_link_flags(rustc_flags, dep_info, force_all_deps_direct, use_metadata) @@ -1323,6 +1361,13 @@ def rustc_compile_action( rustc_output = crate_info.rustc_output rustc_rmeta_output = crate_info.rustc_rmeta_output + # Use the hollow rlib approach (Buck2-style) for rlib/lib crate types when a metadata + # action is being created. This always applies for rlib/lib regardless of whether + # pipelining is globally enabled — the hollow rlib is simpler than killing rustc. + # Non-rlib types (bin, proc-macro, etc.) use --emit=dep-info,metadata instead + # (rustc exits naturally after writing .rmeta, no process-wrapper kill needed). + use_hollow_rlib = bool(build_metadata) and crate_info.type in ("rlib", "lib") + # Determine whether to use cc_common.link: # * either if experimental_use_cc_common_link is 1, # * or if experimental_use_cc_common_link is -1 and @@ -1336,6 +1381,12 @@ def rustc_compile_action( elif ctx.attr.experimental_use_cc_common_link == -1: experimental_use_cc_common_link = toolchain._experimental_use_cc_common_link + # Exec-platform binaries (build scripts) skip cc_common.link: exec-configuration + # rlib deps may lack a CC toolchain, causing empty CcInfo linking contexts. They + # use standard rustc linking with full rlib deps instead. + if experimental_use_cc_common_link and is_exec_configuration(ctx): + experimental_use_cc_common_link = False + dep_info, build_info, linkstamps = collect_deps( deps = deps, proc_macro_deps = proc_macro_deps, @@ -1374,17 +1425,34 @@ def rustc_compile_action( experimental_use_cc_common_link = experimental_use_cc_common_link, ) - # The types of rustc outputs to emit. - # If we build metadata, we need to keep the command line of the two invocations - # (rlib and rmeta) as similar as possible, otherwise rustc rejects the rmeta as - # a candidate. - # Because of that we need to add emit=metadata to both the rlib and rmeta invocation. - # - # When cc_common linking is enabled, emit a `.o` file, which is later - # passed to the cc_common.link action. + # The main Rustc action uses FULL rlib deps so the full rlib it produces records + # full-rlib SVHs. A downstream binary links against full rlibs; if the Rustc action + # had used hollow rlib deps instead, nondeterministic proc macros could produce + # different SVHs for the hollow vs full rlib, causing E0460 in the binary build. + # The RustcMetadata action still uses hollow rlibs (compile_inputs_for_metadata) + # so it can start before full codegen of its deps completes. + compile_inputs_for_metadata = compile_inputs + if use_hollow_rlib: + compile_inputs, _, _, _, _, _ = collect_inputs( + ctx = ctx, + file = ctx.file, + files = ctx.files, + linkstamps = linkstamps, + toolchain = toolchain, + cc_toolchain = cc_toolchain, + feature_configuration = feature_configuration, + crate_info = crate_info, + dep_info = dep_info, + build_info = build_info, + lint_files = lint_files, + stamp = stamp, + force_depend_on_objects = True, + experimental_use_cc_common_link = experimental_use_cc_common_link, + ) + + # The main Rustc action emits dep-info and link (the full rlib/binary/cdylib). + # When cc_common linking is enabled, emit a `.o` file instead. emit = ["dep-info", "link"] - if build_metadata: - emit.append("metadata") if experimental_use_cc_common_link: emit = ["obj"] @@ -1419,6 +1487,9 @@ def rustc_compile_action( force_all_deps_direct = force_all_deps_direct, stamp = stamp, use_json_output = bool(build_metadata) or bool(rustc_output) or bool(rustc_rmeta_output), + # Force full rlib --extern deps so the full rlib records full-rlib SVHs. + force_depend_on_objects = use_hollow_rlib, + experimental_use_cc_common_link = experimental_use_cc_common_link, skip_expanding_rustc_env = skip_expanding_rustc_env, require_explicit_unstable_features = require_explicit_unstable_features, always_use_param_file = not ctx.executable._process_wrapper, @@ -1426,6 +1497,15 @@ def rustc_compile_action( args_metadata = None if build_metadata: + if use_hollow_rlib: + # Hollow rlib: emit dep-info and link (directed to the -hollow.rlib path via + # -Zno-codegen). dep-info must be included: it affects the SVH stored in the + # rlib, so both actions must include it to keep SVHs consistent. + metadata_emit = ["dep-info", "link"] + else: + # IDE-only metadata for non-rlib types (bin, proc-macro, etc.): rustc exits + # naturally after writing .rmeta with --emit=dep-info,metadata. + metadata_emit = ["dep-info", "metadata"] args_metadata, _ = construct_arguments( ctx = ctx, attr = attr, @@ -1433,7 +1513,7 @@ def rustc_compile_action( toolchain = toolchain, tool_path = toolchain.rustc.path, cc_toolchain = cc_toolchain, - emit = emit, + emit = metadata_emit, feature_configuration = feature_configuration, crate_info = crate_info, dep_info = dep_info, @@ -1448,6 +1528,7 @@ def rustc_compile_action( stamp = stamp, use_json_output = True, build_metadata = True, + experimental_use_cc_common_link = experimental_use_cc_common_link, require_explicit_unstable_features = require_explicit_unstable_features, ) @@ -1456,6 +1537,13 @@ def rustc_compile_action( # this is the final list of env vars env.update(env_from_args) + if use_hollow_rlib: + # Both the metadata action and the full Rustc action must have RUSTC_BOOTSTRAP=1 + # for SVH compatibility. RUSTC_BOOTSTRAP=1 changes the crate hash — setting it + # on only one action would cause SVH mismatch even for deterministic crates. + # This enables -Zno-codegen on stable Rust compilers for the metadata action. + env["RUSTC_BOOTSTRAP"] = "1" + if hasattr(attr, "version") and attr.version != "0.0.0": formatted_version = " v{}".format(attr.version) else: @@ -1524,7 +1612,7 @@ def rustc_compile_action( if args_metadata: ctx.actions.run( executable = ctx.executable._process_wrapper, - inputs = compile_inputs, + inputs = compile_inputs_for_metadata, outputs = [build_metadata] + [x for x in [rustc_rmeta_output] if x], env = env, arguments = args_metadata.all, @@ -2135,9 +2223,14 @@ def add_crate_link_flags(args, dep_info, force_all_deps_direct = False, use_meta crate_to_link_flags = _crate_to_link_flag_metadata if use_metadata else _crate_to_link_flag args.add_all(direct_crates, uniquify = True, map_each = crate_to_link_flags) + # Use hollow rlib directories for -Ldependency= when use_metadata=True (rlib/lib) + # so that both --extern= and -Ldependency= point to the same hollow rlib file. + # When use_metadata=False (bins, proc-macros), use full rlib directories; pointing + # to hollow dirs alongside full --extern= args would cause E0463 (ambiguous crate). + get_dirname = _get_crate_dirname_pipelined if use_metadata else _get_crate_dirname args.add_all( dep_info.transitive_crates, - map_each = _get_crate_dirname, + map_each = get_dirname, uniquify = True, format_each = "-Ldependency=%s", ) @@ -2195,7 +2288,25 @@ def _get_crate_dirname(crate): """ return crate.output.dirname -def _portable_link_flags(lib, use_pic, ambiguous_libs, get_lib_name, for_darwin = False, flavor_msvc = False): +def _get_crate_dirname_pipelined(crate): + """For pipelined compilation: returns the _hollow/ directory for pipelined crates + + When a crate supports pipelining and has a hollow rlib in its _hollow/ subdirectory, + pointing -Ldependency= to that subdirectory lets rustc find the hollow rlib (which has + the correct SVH matching downstream metadata). Pointing to the parent directory instead + would expose the full rlib (compiled separately, with a different SVH), causing E0460. + + Args: + crate (CrateInfo): A CrateInfo provider from the current rule + + Returns: + str: The directory to use for -Ldependency= search. + """ + if crate.metadata and crate.metadata_supports_pipelining: + return crate.metadata.dirname + return crate.output.dirname + +def _portable_link_flags(lib, use_pic, ambiguous_libs, get_lib_name, for_windows = False, for_darwin = False, flavor_msvc = False): artifact = get_preferred_artifact(lib, use_pic) if ambiguous_libs and artifact.path in ambiguous_libs: artifact = ambiguous_libs[artifact.path] diff --git a/rust/settings/settings.bzl b/rust/settings/settings.bzl index 7a114e6e3c..b155eb172a 100644 --- a/rust/settings/settings.bzl +++ b/rust/settings/settings.bzl @@ -112,10 +112,18 @@ def use_real_import_macro(): ) def pipelined_compilation(): - """When set, this flag causes rustc to emit `*.rmeta` files and use them for `rlib -> rlib` dependencies. + """When set, this flag enables pipelined compilation for rlib/lib crates. - While this involves one extra (short) rustc invocation to build the rmeta file, - it allows library dependencies to be unlocked much sooner, increasing parallelism during compilation. + For each rlib/lib, a separate RustcMetadata action produces a hollow rlib + (via `-Zno-codegen`) containing only metadata. Downstream rlib/lib crates + can begin compiling against the hollow rlib before the upstream full codegen + action completes, increasing build parallelism. + + Pipelining applies to rlib→rlib dependencies by default. To also pipeline + bin/cdylib crates (starting their compile step before upstream full codegen + finishes), enable `experimental_use_cc_common_link` alongside this flag. + With cc_common.link, rustc only emits `.o` files for binaries (linking is + handled separately), so hollow rlib deps are safe for bins too. """ bool_flag( name = "pipelined_compilation", @@ -126,6 +134,11 @@ def pipelined_compilation(): def experimental_use_cc_common_link(): """A flag to control whether to link rust_binary and rust_test targets using \ cc_common.link instead of rustc. + + When combined with `pipelined_compilation`, bin/cdylib crates also participate + in the hollow-rlib dependency chain: rustc only emits `.o` files (linking is + done by cc_common.link and does not check SVH), so bin compile steps can start + as soon as upstream hollow rlibs are ready rather than waiting for full codegen. """ bool_flag( name = "experimental_use_cc_common_link", diff --git a/test/unit/pipelined_compilation/pipelined_compilation_test.bzl b/test/unit/pipelined_compilation/pipelined_compilation_test.bzl index 36a3de891b..0f638c3ee6 100644 --- a/test/unit/pipelined_compilation/pipelined_compilation_test.bzl +++ b/test/unit/pipelined_compilation/pipelined_compilation_test.bzl @@ -1,8 +1,8 @@ """Unittests for rust rules.""" load("@bazel_skylib//lib:unittest.bzl", "analysistest", "asserts") -load("//rust:defs.bzl", "rust_binary", "rust_library", "rust_proc_macro") -load("//test/unit:common.bzl", "assert_argv_contains", "assert_list_contains_adjacent_elements", "assert_list_contains_adjacent_elements_not") +load("//rust:defs.bzl", "rust_binary", "rust_library", "rust_proc_macro", "rust_test") +load("//test/unit:common.bzl", "assert_argv_contains", "assert_list_contains_adjacent_elements_not") load(":wrap.bzl", "wrap") ENABLE_PIPELINING = { @@ -22,49 +22,77 @@ def _second_lib_test_impl(ctx): rlib_action = [act for act in tut.actions if act.mnemonic == "Rustc"][0] metadata_action = [act for act in tut.actions if act.mnemonic == "RustcMetadata"][0] - # Both actions should use the same --emit= - assert_argv_contains(env, rlib_action, "--emit=dep-info,link,metadata") - assert_argv_contains(env, metadata_action, "--emit=dep-info,link,metadata") + # Hollow rlib approach: Rustc action uses --emit=dep-info,link (no metadata). + assert_argv_contains(env, rlib_action, "--emit=dep-info,link") - # The metadata action should have a .rmeta as output and the rlib action a .rlib + # Metadata action uses --emit=link=-hollow.rlib (hollow rlib, .rlib extension). + # The .rlib extension is required so rustc reads it as an rlib archive (extracting + # lib.rmeta with optimized MIR). Using .rmeta extension causes E0786, and using + # --emit=metadata produces raw .rmeta without optimized MIR (causes "missing + # optimized MIR" errors on Rust 1.85+). + metadata_emit_link = [arg for arg in metadata_action.argv if arg.startswith("--emit=link=") and arg.endswith("-hollow.rlib")] + asserts.true( + env, + len(metadata_emit_link) == 1, + "expected --emit=link=*-hollow.rlib for hollow rlib, got: " + str([arg for arg in metadata_action.argv if arg.startswith("--emit=")]), + ) + + # The rlib action produces a .rlib; the metadata action produces a -hollow.rlib. path = rlib_action.outputs.to_list()[0].path asserts.true( env, - path.endswith(".rlib"), - "expected Rustc to output .rlib, got " + path, + path.endswith(".rlib") and not path.endswith("-hollow.rlib"), + "expected Rustc to output .rlib (not hollow), got " + path, ) path = metadata_action.outputs.to_list()[0].path asserts.true( env, - path.endswith(".rmeta"), - "expected RustcMetadata to output .rmeta, got " + path, + path.endswith("-hollow.rlib"), + "expected RustcMetadata to output -hollow.rlib, got " + path, ) - # Only the action building metadata should contain --rustc-quit-on-rmeta + # Neither action should use --rustc-quit-on-rmeta (hollow rlib exits naturally). assert_list_contains_adjacent_elements_not(env, rlib_action.argv, ["--rustc-quit-on-rmeta", "true"]) - assert_list_contains_adjacent_elements(env, metadata_action.argv, ["--rustc-quit-on-rmeta", "true"]) - - # Check that both actions refer to the metadata of :first, not the rlib - extern_metadata = [arg for arg in metadata_action.argv if arg.startswith("--extern=first=") and "libfirst" in arg and arg.endswith(".rmeta")] + assert_list_contains_adjacent_elements_not(env, metadata_action.argv, ["--rustc-quit-on-rmeta", "true"]) + + # The metadata action should use -Zno-codegen for the hollow rlib approach. + assert_argv_contains(env, metadata_action, "-Zno-codegen") + + # The Rustc action should NOT use -Zno-codegen. + no_codegen_in_rlib = [arg for arg in rlib_action.argv if arg == "-Zno-codegen"] + asserts.true(env, len(no_codegen_in_rlib) == 0, "Rustc action should not have -Zno-codegen") + + # The metadata action references first's hollow rlib for --extern (pipelining: starts + # before first's full codegen finishes). The Rustc action uses the full rlib for + # --extern so the full rlib's embedded SVH matches the full rlib that downstream + # binaries (without cc_common.link) see in their -Ldependency path. If both actions + # used the hollow rlib, nondeterministic proc macros could produce different SVHs + # for the hollow vs full rlib, causing E0460 in downstream binary builds. + extern_metadata = [arg for arg in metadata_action.argv if arg.startswith("--extern=first=") and "libfirst" in arg and arg.endswith("-hollow.rlib")] asserts.true( env, len(extern_metadata) == 1, - "did not find a --extern=first=*.rmeta but expected one", + "did not find --extern=first=*-hollow.rlib for metadata action, got: " + str([arg for arg in metadata_action.argv if arg.startswith("--extern=first=")]), ) - extern_rlib = [arg for arg in rlib_action.argv if arg.startswith("--extern=first=") and "libfirst" in arg and arg.endswith(".rmeta")] + extern_rlib_full = [arg for arg in rlib_action.argv if arg.startswith("--extern=first=") and "libfirst" in arg and not arg.endswith("-hollow.rlib")] asserts.true( env, - len(extern_rlib) == 1, - "did not find a --extern=first=*.rlib but expected one", + len(extern_rlib_full) == 1, + "expected --extern=first=libfirst*.rlib (full rlib) for rlib action, got: " + str([arg for arg in rlib_action.argv if arg.startswith("--extern=first=")]), ) - # Check that the input to both actions is the metadata of :first + # The metadata action's input is first's hollow rlib only (no full rlib needed). input_metadata = [i for i in metadata_action.inputs.to_list() if i.basename.startswith("libfirst")] - asserts.true(env, len(input_metadata) == 1, "expected only one libfirst input, found " + str([i.path for i in input_metadata])) - asserts.true(env, input_metadata[0].extension == "rmeta", "expected libfirst dependency to be rmeta, found " + input_metadata[0].path) - input_rlib = [i for i in rlib_action.inputs.to_list() if i.basename.startswith("libfirst")] - asserts.true(env, len(input_rlib) == 1, "expected only one libfirst input, found " + str([i.path for i in input_rlib])) - asserts.true(env, input_rlib[0].extension == "rmeta", "expected libfirst dependency to be rmeta, found " + input_rlib[0].path) + asserts.true(env, len(input_metadata) == 1, "expected only one libfirst input for metadata, found " + str([i.path for i in input_metadata])) + asserts.true(env, input_metadata[0].basename.endswith("-hollow.rlib"), "expected hollow rlib for metadata action, found " + input_metadata[0].path) + + # The Rustc action's inputs contain the full rlib (referenced by --extern) and the + # hollow rlib (present in the sandbox for -Ldependency=<_hollow_dir> resolution of + # transitive deps that were compiled against hollow rlibs). + input_rlib_full = [i for i in rlib_action.inputs.to_list() if i.basename.startswith("libfirst") and not i.basename.endswith("-hollow.rlib")] + input_rlib_hollow = [i for i in rlib_action.inputs.to_list() if i.basename.startswith("libfirst") and i.basename.endswith("-hollow.rlib")] + asserts.true(env, len(input_rlib_full) == 1, "expected full rlib in rlib action inputs, found " + str([i.path for i in input_rlib_full])) + asserts.true(env, len(input_rlib_hollow) == 1, "expected hollow rlib in rlib action inputs (for sandbox), found " + str([i.path for i in input_rlib_hollow])) return analysistest.end(env) @@ -124,10 +152,16 @@ def _pipelined_compilation_test(): target_under_test = ":bin", target_compatible_with = _NO_WINDOWS, ) + hollow_rlib_env_test( + name = "hollow_rlib_env_test", + target_under_test = ":second", + target_compatible_with = _NO_WINDOWS, + ) return [ ":second_lib_test", ":bin_test", + ":hollow_rlib_env_test", ] def _rmeta_is_propagated_through_custom_rule_test_impl(ctx): @@ -138,8 +172,8 @@ def _rmeta_is_propagated_through_custom_rule_test_impl(ctx): # also depend on metadata for 'wrapper'. rust_action = [act for act in tut.actions if act.mnemonic == "RustcMetadata"][0] - metadata_inputs = [i for i in rust_action.inputs.to_list() if i.path.endswith(".rmeta")] - rlib_inputs = [i for i in rust_action.inputs.to_list() if i.path.endswith(".rlib")] + metadata_inputs = [i for i in rust_action.inputs.to_list() if i.path.endswith("-hollow.rlib")] + rlib_inputs = [i for i in rust_action.inputs.to_list() if i.path.endswith(".rlib") and not i.path.endswith("-hollow.rlib")] seen_wrapper_metadata = False seen_to_wrap_metadata = False @@ -176,22 +210,30 @@ def _rmeta_is_used_when_building_custom_rule_test_impl(ctx): # This is the custom rule invocation of rustc. rust_action = [act for act in tut.actions if act.mnemonic == "Rustc"][0] - # We want to check that the action depends on metadata, regardless of ctx.attr.generate_metadata seen_to_wrap_rlib = False - seen_to_wrap_rmeta = False + seen_to_wrap_hollow = False for act in rust_action.inputs.to_list(): - if "libto_wrap" in act.path and act.path.endswith(".rlib"): + if "libto_wrap" in act.path and act.path.endswith("-hollow.rlib"): + seen_to_wrap_hollow = True + elif "libto_wrap" in act.path and act.path.endswith(".rlib") and not act.path.endswith("-hollow.rlib"): seen_to_wrap_rlib = True - elif "libto_wrap" in act.path and act.path.endswith(".rmeta"): - seen_to_wrap_rmeta = True - asserts.true(env, seen_to_wrap_rmeta, "expected dependency on metadata for 'to_wrap' but not found") - asserts.false(env, seen_to_wrap_rlib, "expected no dependency on object for 'to_wrap' but it was found") + if ctx.attr.generate_metadata: + # When wrapper generates its own hollow rlib, the Rustc action uses the full + # rlib of to_wrap for --extern (SVH consistency) and also has the hollow rlib + # in the sandbox for -Ldependency= resolution. + asserts.true(env, seen_to_wrap_hollow, "expected hollow rlib in inputs (for sandbox) when generate_metadata=True") + asserts.true(env, seen_to_wrap_rlib, "expected full rlib in inputs for --extern when generate_metadata=True") + else: + # When wrapper does not generate its own hollow rlib, the Rustc action uses + # hollow rlib deps via normal _depend_on_metadata logic (pipelined rlib deps). + asserts.true(env, seen_to_wrap_hollow, "expected dependency on metadata for 'to_wrap' but not found") + asserts.false(env, seen_to_wrap_rlib, "expected no dependency on object for 'to_wrap' but it was found") return analysistest.end(env) rmeta_is_propagated_through_custom_rule_test = analysistest.make(_rmeta_is_propagated_through_custom_rule_test_impl, attrs = {"generate_metadata": attr.bool()}, config_settings = ENABLE_PIPELINING) -rmeta_is_used_when_building_custom_rule_test = analysistest.make(_rmeta_is_used_when_building_custom_rule_test_impl, config_settings = ENABLE_PIPELINING) +rmeta_is_used_when_building_custom_rule_test = analysistest.make(_rmeta_is_used_when_building_custom_rule_test_impl, attrs = {"generate_metadata": attr.bool()}, config_settings = ENABLE_PIPELINING) def _rmeta_not_produced_if_pipelining_disabled_test_impl(ctx): env = analysistest.begin(ctx) @@ -204,6 +246,33 @@ def _rmeta_not_produced_if_pipelining_disabled_test_impl(ctx): rmeta_not_produced_if_pipelining_disabled_test = analysistest.make(_rmeta_not_produced_if_pipelining_disabled_test_impl, config_settings = ENABLE_PIPELINING) +def _hollow_rlib_env_test_impl(ctx): + """Verify RUSTC_BOOTSTRAP=1 is set consistently on both Rustc and RustcMetadata actions. + + RUSTC_BOOTSTRAP=1 changes the crate hash (SVH), so it must be set on both actions + to keep the hollow rlib and full rlib SVHs consistent.""" + env = analysistest.begin(ctx) + tut = analysistest.target_under_test(env) + metadata_action = [act for act in tut.actions if act.mnemonic == "RustcMetadata"][0] + rlib_action = [act for act in tut.actions if act.mnemonic == "Rustc"][0] + + asserts.equals( + env, + "1", + metadata_action.env.get("RUSTC_BOOTSTRAP", ""), + "Metadata action should have RUSTC_BOOTSTRAP=1 for hollow rlib approach", + ) + asserts.equals( + env, + "1", + rlib_action.env.get("RUSTC_BOOTSTRAP", ""), + "Rustc action should have RUSTC_BOOTSTRAP=1 for SVH compatibility with hollow rlib", + ) + + return analysistest.end(env) + +hollow_rlib_env_test = analysistest.make(_hollow_rlib_env_test_impl, config_settings = ENABLE_PIPELINING) + def _disable_pipelining_test(): rust_library( name = "lib", @@ -249,6 +318,7 @@ def _custom_rule_test(generate_metadata, suffix): rmeta_is_used_when_building_custom_rule_test( name = "rmeta_is_used_when_building_custom_rule_test" + suffix, + generate_metadata = generate_metadata, target_under_test = ":wrapper" + suffix, target_compatible_with = _NO_WINDOWS, ) @@ -258,6 +328,59 @@ def _custom_rule_test(generate_metadata, suffix): ":rmeta_is_used_when_building_custom_rule_test" + suffix, ] +def _svh_mismatch_test(): + """Creates a rust_test demonstrating SVH mismatch with non-deterministic proc macros. + + Without pipelining (default): each library is compiled exactly once, SVH + is consistent across the dependency graph, and the test builds and passes. + + With pipelining (//rust/settings:pipelined_compilation=true): rules_rust + compiles svh_lib twice in separate rustc invocations — once for the hollow + metadata (.rmeta), once for the full .rlib. Because the proc macro uses + HashMap with OS-seeded randomness, these two invocations typically produce + different token streams and therefore different SVH values. The consumer is + compiled against the hollow .rmeta (recording SVH_1); when rustc links the + test binary against the full .rlib (SVH_2), it detects SVH_1 ≠ SVH_2 and + fails with E0460. The test is therefore expected to FAIL TO BUILD most of + the time (~99.2% with 5 HashMap entries) when pipelining is enabled. + + The test is marked flaky because the SVH mismatch is non-deterministic: + on rare occasions (~0.8%) both rustc invocations produce the same HashMap + iteration order and the build succeeds even with pipelining enabled. + """ + + rust_proc_macro( + name = "svh_nondeterministic_macro", + srcs = ["svh_mismatch/svh_mismatch_nondeterministic_macro.rs"], + crate_name = "nondeterministic_macro", + edition = "2021", + ) + + rust_library( + name = "svh_lib", + srcs = ["svh_mismatch/svh_mismatch_lib.rs"], + edition = "2021", + proc_macro_deps = [":svh_nondeterministic_macro"], + ) + + rust_library( + name = "svh_consumer", + srcs = ["svh_mismatch/svh_mismatch_consumer.rs"], + edition = "2021", + deps = [":svh_lib"], + ) + + rust_test( + name = "svh_mismatch_test", + srcs = ["svh_mismatch/svh_mismatch_test.rs"], + edition = "2021", + deps = [":svh_consumer"], + flaky = True, + target_compatible_with = _NO_WINDOWS, + ) + + return [":svh_mismatch_test"] + def pipelined_compilation_test_suite(name): """Entry-point macro called from the BUILD file. @@ -269,6 +392,7 @@ def pipelined_compilation_test_suite(name): tests.extend(_disable_pipelining_test()) tests.extend(_custom_rule_test(generate_metadata = True, suffix = "_with_metadata")) tests.extend(_custom_rule_test(generate_metadata = False, suffix = "_without_metadata")) + tests.extend(_svh_mismatch_test()) native.test_suite( name = name, diff --git a/test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_consumer.rs b/test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_consumer.rs new file mode 100644 index 0000000000..99b0ea9bf4 --- /dev/null +++ b/test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_consumer.rs @@ -0,0 +1,6 @@ +/// A library that depends on svh_lib. When compiled against a hollow `.rmeta` +/// of svh_lib, this crate's metadata records svh_lib's SVH at that point in +/// time. If the full `.rlib` of svh_lib was produced by a separate rustc +/// invocation (with a different HashMap seed), it may have a different SVH, +/// causing a mismatch when a downstream binary tries to link against both. +pub use svh_lib::Widget; diff --git a/test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_lib.rs b/test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_lib.rs new file mode 100644 index 0000000000..e2f3985399 --- /dev/null +++ b/test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_lib.rs @@ -0,0 +1,8 @@ +use nondeterministic_macro::NondeterministicHash; + +/// A struct whose derivation runs the non-deterministic proc macro. +/// The macro generates a public constant whose value depends on HashMap +/// iteration order, so this crate's SVH varies between separate rustc +/// invocations. +#[derive(NondeterministicHash)] +pub struct Widget; diff --git a/test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_nondeterministic_macro.rs b/test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_nondeterministic_macro.rs new file mode 100644 index 0000000000..7ba44425b7 --- /dev/null +++ b/test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_nondeterministic_macro.rs @@ -0,0 +1,37 @@ +extern crate proc_macro; +use proc_macro::TokenStream; +use std::collections::HashMap; + +/// A derive macro that produces non-deterministic output due to HashMap's +/// random iteration order. Each separate process invocation initializes +/// `HashMap` with a different OS-seeded `RandomState`, so iteration order +/// varies between invocations. This makes the generated constant—and thus +/// the crate's SVH—differ when the macro is run twice (e.g., once for a +/// hollow `.rmeta` and once for a full `.rlib` in pipelined compilation). +#[proc_macro_derive(NondeterministicHash)] +pub fn nondeterministic_hash_derive(_input: TokenStream) -> TokenStream { + // HashMap::new() uses RandomState, which seeds from OS entropy. + // Each separate process invocation gets a different seed, so iteration + // order over the map is non-deterministic across invocations. + let mut map = HashMap::new(); + map.insert("alpha", 1u64); + map.insert("beta", 2u64); + map.insert("gamma", 4u64); + map.insert("delta", 8u64); + map.insert("epsilon", 16u64); + + // Position-weighted sum: not commutative, so different iteration orders + // produce different values. With 5 entries (5! = 120 orderings), the + // probability of identical output in two separate invocations is ~0.8%. + let fingerprint: u64 = map + .iter() + .enumerate() + .map(|(pos, (_, &val))| val.wrapping_mul(pos as u64 + 1)) + .fold(0u64, u64::wrapping_add); + + // Exposing this as a public constant makes it part of the crate's + // exported API, which is included in the SVH computation. + format!("pub const NONDETERMINISTIC_HASH_FINGERPRINT: u64 = {};", fingerprint) + .parse() + .unwrap() +} diff --git a/test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_test.rs b/test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_test.rs new file mode 100644 index 0000000000..6ecfe83553 --- /dev/null +++ b/test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_test.rs @@ -0,0 +1,28 @@ +/// Demonstrates SVH (Strict Version Hash) mismatch with pipelined compilation. +/// +/// Without pipelining this test always builds and passes: each library is +/// compiled exactly once, so the SVH embedded in every `.rmeta` and `.rlib` +/// is identical. +/// +/// With `//rust/settings:pipelined_compilation=true` rules_rust compiles +/// `svh_lib` **twice** in separate rustc processes — once to emit the hollow +/// `.rmeta` (metadata only), once to emit the full `.rlib`. Because +/// `nondeterministic_macro` uses `HashMap` with OS-seeded randomness, the two +/// rustc invocations typically produce different token streams and therefore +/// different SVH values. `svh_consumer` is compiled against the hollow `.rmeta` +/// and records SVH_1 in its own metadata; when rustc later tries to link the +/// test binary against the full `.rlib` (which carries SVH_2), it detects the +/// mismatch and fails with E0460. The test therefore **fails to build** most of +/// the time (~99.2% probability) when pipelining is enabled. +/// +/// The `flaky = True` attribute on this target acknowledges that the mismatch +/// is non-deterministic: on rare occasions (~0.8%) both rustc invocations +/// happen to produce the same HashMap iteration order, the SVHs agree, and the +/// build succeeds. +use svh_consumer::Widget; + +#[test] +fn svh_consistent() { + // If we reach here the SVH was consistent (no pipelining, or a lucky run). + let _: Widget = Widget; +} diff --git a/test/unit/pipelined_compilation/wrap.bzl b/test/unit/pipelined_compilation/wrap.bzl index f24a0e421a..e3f4ac5482 100644 --- a/test/unit/pipelined_compilation/wrap.bzl +++ b/test/unit/pipelined_compilation/wrap.bzl @@ -40,12 +40,23 @@ def _wrap_impl(ctx): lib_hash = output_hash, extension = ".rlib", ) - rust_metadata_name = "{prefix}{name}-{lib_hash}{extension}".format( - prefix = "lib", - name = crate_name, - lib_hash = output_hash, - extension = ".rmeta", - ) + + # Use -hollow.rlib extension (not .rmeta) so rustc reads it as an rlib archive + # containing optimized MIR. See rust/private/rust.bzl for the same logic. + # The hollow rlib is placed in a "_hollow/" subdirectory to avoid the full rlib + # and hollow rlib appearing in the same -Ldependency= search directory, which + # would cause E0463 "can't find crate" errors due to ambiguous crate candidates. + metadata_supports_pipelining = can_use_metadata_for_pipelining(toolchain, crate_type) and ctx.attr.generate_metadata + if metadata_supports_pipelining: + rust_metadata_name = "_hollow/lib{name}-{lib_hash}-hollow.rlib".format( + name = crate_name, + lib_hash = output_hash, + ) + else: + rust_metadata_name = "lib{name}-{lib_hash}.rmeta".format( + name = crate_name, + lib_hash = output_hash, + ) tgt = ctx.attr.target deps = [DepVariantInfo( @@ -73,8 +84,7 @@ def _wrap_impl(ctx): aliases = {}, output = rust_lib, metadata = rust_metadata, - metadata_supports_pipelining = can_use_metadata_for_pipelining(toolchain, crate_type) and - ctx.attr.generate_metadata, + metadata_supports_pipelining = metadata_supports_pipelining, owner = ctx.label, edition = "2018", compile_data = depset([]), From 6f3357cd987e2218f01618942c42a63cbc7cfb0d Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Mon, 2 Mar 2026 11:32:58 -0500 Subject: [PATCH 21/23] Cleanup some process_wrapper code --- rust/private/rustc.bzl | 3 +- util/process_wrapper/main.rs | 78 ++++----------------------------- util/process_wrapper/options.rs | 16 +------ util/process_wrapper/output.rs | 7 +-- util/process_wrapper/rustc.rs | 60 +++---------------------- 5 files changed, 17 insertions(+), 147 deletions(-) diff --git a/rust/private/rustc.bzl b/rust/private/rustc.bzl index c1f58c9716..08c632e663 100644 --- a/rust/private/rustc.bzl +++ b/rust/private/rustc.bzl @@ -1056,8 +1056,7 @@ def construct_arguments( process_wrapper_flags.add("--rustc-output-format", "json" if error_format == "json" else "rendered") # Configure rustc json output by adding artifact notifications. - # These will always be filtered out by process_wrapper and will be use to terminate - # rustc when appropriate. + # These are filtered out by process_wrapper. json = ["artifacts"] if error_format == "short": json.append("diagnostic-short") diff --git a/util/process_wrapper/main.rs b/util/process_wrapper/main.rs index 5d057b08cf..2a7cbd8565 100644 --- a/util/process_wrapper/main.rs +++ b/util/process_wrapper/main.rs @@ -25,7 +25,7 @@ use std::fmt; use std::fs::{self, copy, OpenOptions}; use std::io; use std::path::PathBuf; -use std::process::{exit, Command, ExitStatus, Stdio}; +use std::process::{exit, Command, Stdio}; #[cfg(windows)] use std::time::{SystemTime, UNIX_EPOCH}; @@ -37,30 +37,6 @@ use crate::rustc::ErrorFormat; #[cfg(windows)] use crate::util::read_file_to_array; -#[cfg(windows)] -fn status_code(status: ExitStatus, was_killed: bool) -> i32 { - // On windows, there's no good way to know if the process was killed by a signal. - // If we killed the process, we override the code to signal success. - if was_killed { - 0 - } else { - status.code().unwrap_or(1) - } -} - -#[cfg(not(windows))] -fn status_code(status: ExitStatus, was_killed: bool) -> i32 { - // On unix, if code is None it means that the process was killed by a signal. - // https://doc.rust-lang.org/std/process/struct.ExitStatus.html#method.success - match status.code() { - Some(code) => code, - // If we killed the process, we expect None here - None if was_killed => 0, - // Otherwise it's some unexpected signal - None => 1, - } -} - #[derive(Debug)] struct ProcessWrapperError(String); @@ -298,9 +274,7 @@ fn json_warning(line: &str) -> JsonValue { fn process_line( mut line: String, - quit_on_rmeta: bool, format: ErrorFormat, - metadata_emitted: &mut bool, ) -> Result { // LLVM can emit lines that look like the following, and these will be interspersed // with the regular JSON output. Arguably, rustc should be fixed not to emit lines @@ -315,11 +289,7 @@ fn process_line( return Ok(LineOutput::Skip); } } - if quit_on_rmeta { - rustc::stop_on_rmeta_completion(line, format, metadata_emitted) - } else { - rustc::process_json(line, format) - } + rustc::process_json(line, format) } fn main() -> Result<(), ProcessWrapperError> { @@ -381,26 +351,13 @@ fn main() -> Result<(), ProcessWrapperError> { None }; - let mut was_killed = false; let result = if let Some(format) = opts.rustc_output_format { - let quit_on_rmeta = opts.rustc_quit_on_rmeta; - // Process json rustc output and kill the subprocess when we get a signal - // that we emitted a metadata file. - let mut me = false; - let metadata_emitted = &mut me; - let result = process_output( + process_output( &mut child_stderr, stderr.as_mut(), output_file.as_mut(), - move |line| process_line(line, quit_on_rmeta, format, metadata_emitted), - ); - if me { - // If recv returns Ok(), a signal was sent in this channel so we should terminate the child process. - // We can safely ignore the Result from kill() as we don't care if the process already terminated. - let _ = child.kill(); - was_killed = true; - } - result + move |line| process_line(line, format), + ) } else { // Process output normally by forwarding stderr process_output( @@ -415,10 +372,8 @@ fn main() -> Result<(), ProcessWrapperError> { let status = child .wait() .map_err(|e| ProcessWrapperError(format!("failed to wait for child process: {}", e)))?; - // If the child process is rustc and is killed after metadata generation, that's also a success. - let code = status_code(status, was_killed); - let success = code == 0; - if success { + let code = status.code().unwrap_or(1); + if code == 0 { if let Some(tf) = opts.touch_file { OpenOptions::new() .create(true) @@ -454,7 +409,6 @@ mod test { #[test] fn test_process_line_diagnostic_json() -> Result<(), String> { - let mut metadata_emitted = false; let LineOutput::Message(msg) = process_line( r#" { @@ -463,9 +417,7 @@ mod test { } "# .to_string(), - false, ErrorFormat::Json, - &mut metadata_emitted, )? else { return Err("Expected a LineOutput::Message".to_string()); @@ -486,7 +438,6 @@ mod test { #[test] fn test_process_line_diagnostic_rendered() -> Result<(), String> { - let mut metadata_emitted = false; let LineOutput::Message(msg) = process_line( r#" { @@ -495,9 +446,7 @@ mod test { } "# .to_string(), - /*quit_on_rmeta=*/ false, ErrorFormat::Rendered, - &mut metadata_emitted, )? else { return Err("Expected a LineOutput::Message".to_string()); @@ -508,16 +457,13 @@ mod test { #[test] fn test_process_line_noise() -> Result<(), String> { - let mut metadata_emitted = false; for text in [ "'+zaamo' is not a recognized feature for this target (ignoring feature)", " WARN rustc_errors::emitter Invalid span...", ] { let LineOutput::Message(msg) = process_line( text.to_string(), - /*quit_on_rmeta=*/ false, ErrorFormat::Json, - &mut metadata_emitted, )? else { return Err("Expected a LineOutput::Message".to_string()); @@ -543,7 +489,6 @@ mod test { #[test] fn test_process_line_emit_link() -> Result<(), String> { - let mut metadata_emitted = false; assert!(matches!( process_line( r#" @@ -553,19 +498,15 @@ mod test { } "# .to_string(), - /*quit_on_rmeta=*/ true, ErrorFormat::Rendered, - &mut metadata_emitted, )?, LineOutput::Skip )); - assert!(!metadata_emitted); Ok(()) } #[test] fn test_process_line_emit_metadata() -> Result<(), String> { - let mut metadata_emitted = false; assert!(matches!( process_line( r#" @@ -575,13 +516,10 @@ mod test { } "# .to_string(), - /*quit_on_rmeta=*/ true, ErrorFormat::Rendered, - &mut metadata_emitted, )?, - LineOutput::Terminate + LineOutput::Skip )); - assert!(metadata_emitted); Ok(()) } } diff --git a/util/process_wrapper/options.rs b/util/process_wrapper/options.rs index 2f252cadc7..6dbc898a11 100644 --- a/util/process_wrapper/options.rs +++ b/util/process_wrapper/options.rs @@ -44,9 +44,6 @@ pub(crate) struct Options { // If set, also logs all unprocessed output from the rustc output to this file. // Meant to be used to get json output out of rustc for tooling usage. pub(crate) output_file: Option, - // If set, it configures rustc to emit an rmeta file and then - // quit. - pub(crate) rustc_quit_on_rmeta: bool, // This controls the output format of rustc messages. pub(crate) rustc_output_format: Option, } @@ -64,7 +61,6 @@ pub(crate) fn options() -> Result { let mut stdout_file = None; let mut stderr_file = None; let mut output_file = None; - let mut rustc_quit_on_rmeta_raw = None; let mut rustc_output_format_raw = None; let mut flags = Flags::new(); let mut require_explicit_unstable_features = None; @@ -102,17 +98,9 @@ pub(crate) fn options() -> Result { "Log all unprocessed subprocess stderr in this file.", &mut output_file, ); - flags.define_flag( - "--rustc-quit-on-rmeta", - "If enabled, this wrapper will terminate rustc after rmeta has been emitted.", - &mut rustc_quit_on_rmeta_raw, - ); flags.define_flag( "--rustc-output-format", - "Controls the rustc output format if --rustc-quit-on-rmeta is set.\n\ - 'json' will cause the json output to be output, \ - 'rendered' will extract the rendered message and print that.\n\ - Default: `rendered`", + "The expected rustc output format. Valid values: json, rendered.", &mut rustc_output_format_raw, ); flags.define_flag( @@ -179,7 +167,6 @@ pub(crate) fn options() -> Result { }) .transpose()?; - let rustc_quit_on_rmeta = rustc_quit_on_rmeta_raw.is_some_and(|s| s == "true"); let rustc_output_format = rustc_output_format_raw .map(|v| match v.as_str() { "json" => Ok(rustc::ErrorFormat::Json), @@ -227,7 +214,6 @@ pub(crate) fn options() -> Result { stdout_file, stderr_file, output_file, - rustc_quit_on_rmeta, rustc_output_format, }) } diff --git a/util/process_wrapper/output.rs b/util/process_wrapper/output.rs index 4b3604b18d..5dabad8179 100644 --- a/util/process_wrapper/output.rs +++ b/util/process_wrapper/output.rs @@ -18,15 +18,11 @@ use std::io::{self, prelude::*}; /// LineOutput tells process_output what to do when a line is processed. /// If a Message is returned, it will be written to write_end, if -/// Skip is returned nothing will be printed and execution continues, -/// if Terminate is returned, process_output returns immediately. -/// Terminate is used to stop processing when we see an emit metadata -/// message. +/// Skip is returned nothing will be printed and execution continues. #[derive(Debug)] pub(crate) enum LineOutput { Message(String), Skip, - Terminate, } #[derive(Debug)] @@ -95,7 +91,6 @@ where match process_line(line.clone()) { Ok(LineOutput::Message(to_write)) => output_writer.write_all(to_write.as_bytes())?, Ok(LineOutput::Skip) => {} - Ok(LineOutput::Terminate) => return Ok(()), Err(msg) => { failed_on = Some((line, msg)); break; diff --git a/util/process_wrapper/rustc.rs b/util/process_wrapper/rustc.rs index 97ee466337..3bb4a8c2d9 100644 --- a/util/process_wrapper/rustc.rs +++ b/util/process_wrapper/rustc.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::convert::{TryFrom, TryInto}; - use tinyjson::JsonValue; use crate::output::{LineOutput, LineResult}; @@ -37,66 +35,20 @@ fn get_key(value: &JsonValue, key: &str) -> Option { } } -#[derive(Debug)] -enum RustcMessage { - Emit(String), - Message(String), -} - -impl TryFrom for RustcMessage { - type Error = (); - fn try_from(val: JsonValue) -> Result { - if let Some(emit) = get_key(&val, "emit") { - return Ok(Self::Emit(emit)); - } - if let Some(rendered) = get_key(&val, "rendered") { - return Ok(Self::Message(rendered)); - } - Err(()) - } -} - /// process_rustc_json takes an output line from rustc configured with /// --error-format=json, parses the json and returns the appropriate output /// according to the original --error-format supplied. -/// Only messages are returned, emits are ignored. +/// Only diagnostics with a rendered message are returned. /// Returns an errors if parsing json fails. pub(crate) fn process_json(line: String, error_format: ErrorFormat) -> LineResult { let parsed: JsonValue = line .parse() .map_err(|_| "error parsing rustc output as json".to_owned())?; - Ok(match parsed.try_into() { - Ok(RustcMessage::Message(rendered)) => { - output_based_on_error_format(line, rendered, error_format) - } - _ => LineOutput::Skip, - }) -} - -/// stop_on_rmeta_completion parses the json output of rustc in the same way -/// process_rustc_json does. In addition, it will signal to stop when metadata -/// is emitted so the compiler can be terminated. -/// This is used to implement pipelining in rules_rust, please see -/// https://internals.rust-lang.org/t/evaluating-pipelined-rustc-compilation/10199 -/// Returns an error if parsing json fails. -/// TODO: pass a function to handle the emit event and merge with process_json -pub(crate) fn stop_on_rmeta_completion( - line: String, - error_format: ErrorFormat, - kill: &mut bool, -) -> LineResult { - let parsed: JsonValue = line - .parse() - .map_err(|_| "error parsing rustc output as json".to_owned())?; - Ok(match parsed.try_into() { - Ok(RustcMessage::Emit(emit)) if emit == "metadata" => { - *kill = true; - LineOutput::Terminate - } - Ok(RustcMessage::Message(rendered)) => { - output_based_on_error_format(line, rendered, error_format) - } - _ => LineOutput::Skip, + Ok(if let Some(rendered) = get_key(&parsed, "rendered") { + output_based_on_error_format(line, rendered, error_format) + } else { + // Ignore non-diagnostic messages such as artifact notifications. + LineOutput::Skip }) } From 7ed8a24a37be47378b8a266ae3016148b9cb5c49 Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Fri, 13 Mar 2026 14:19:28 -0400 Subject: [PATCH 22/23] Fix prost to be compatible with multiplatform --- extensions/prost/private/BUILD.bazel | 2 ++ extensions/prost/private/prost.bzl | 19 ++++++++++--------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/extensions/prost/private/BUILD.bazel b/extensions/prost/private/BUILD.bazel index ed8e508537..aaf6c22d8d 100644 --- a/extensions/prost/private/BUILD.bazel +++ b/extensions/prost/private/BUILD.bazel @@ -5,6 +5,8 @@ load("//:defs.bzl", "rust_prost_toolchain") load(":legacy_proto_toolchain.bzl", "legacy_proto_toolchain") load(":prost.bzl", "RUST_EDITION", "current_prost_runtime") +exports_files(["protoc_wrapper.rs"]) + current_prost_runtime( name = "current_prost_runtime", ) diff --git a/extensions/prost/private/prost.bzl b/extensions/prost/private/prost.bzl index 4eecf85a06..386f0d7d72 100644 --- a/extensions/prost/private/prost.bzl +++ b/extensions/prost/private/prost.bzl @@ -31,14 +31,14 @@ RUST_EDITION = "2021" TOOLCHAIN_TYPE = "@rules_rust_prost//:toolchain_type" -def _create_proto_lang_toolchain(ctx, prost_toolchain): +def _create_proto_lang_toolchain(prost_toolchain): proto_lang_toolchain = proto_common.ProtoLangToolchainInfo( out_replacement_format_flag = "--prost_out=%s", plugin_format_flag = prost_toolchain.prost_plugin_flag, plugin = prost_toolchain.prost_plugin[DefaultInfo].files_to_run, runtime = prost_toolchain.prost_runtime, provided_proto_sources = depset(), - proto_compiler = ctx.attr._prost_process_wrapper[DefaultInfo].files_to_run, + proto_compiler = prost_toolchain.prost_process_wrapper[DefaultInfo].files_to_run, protoc_opts = prost_toolchain.protoc_opts, progress_message = "ProstGenProto %{label}", mnemonic = "ProstGenProto", @@ -118,7 +118,7 @@ def _compile_proto( additional_inputs = additional_inputs, additional_args = additional_args, generated_files = [lib_rs, package_info_file], - proto_lang_toolchain_info = _create_proto_lang_toolchain(ctx, prost_toolchain), + proto_lang_toolchain_info = _create_proto_lang_toolchain(prost_toolchain), plugin_output = ctx.bin_dir.path, ) @@ -377,12 +377,6 @@ rust_prost_aspect = aspect( default = Label("@bazel_tools//tools/cpp:grep-includes"), cfg = "exec", ), - "_prost_process_wrapper": attr.label( - doc = "The wrapper script for the Prost protoc plugin.", - cfg = "exec", - executable = True, - default = Label("//private:protoc_wrapper"), - ), } | RUSTC_ATTRS | { # Need to override this attribute to explicitly set the workspace. "_always_enable_metadata_output_groups": attr.label( @@ -473,6 +467,7 @@ def _rust_prost_toolchain_impl(ctx): prost_plugin = ctx.attr.prost_plugin, prost_plugin_flag = ctx.attr.prost_plugin_flag, prost_runtime = ctx.attr.prost_runtime, + prost_process_wrapper = ctx.attr._prost_process_wrapper, prost_types = ctx.attr.prost_types, proto_compiler = proto_compiler, protoc_opts = ctx.fragments.proto.experimental_protoc_opts, @@ -516,6 +511,12 @@ rust_prost_toolchain = rule( mandatory = True, aspects = [rust_analyzer_aspect], ), + "_prost_process_wrapper": attr.label( + doc = "The wrapper script for the Prost protoc plugin.", + cfg = "exec", + executable = True, + default = Label("@rules_rust_prost//private:protoc_wrapper"), + ), "prost_types": attr.label( doc = "The Prost types crates to use.", providers = [[rust_common.crate_info], [rust_common.crate_group_info]], From 3bba328a1459ad948d7651f2aa0b62a7c76d4e1a Mon Sep 17 00:00:00 2001 From: Walter Gray Date: Thu, 26 Mar 2026 10:44:37 -0700 Subject: [PATCH 23/23] Add worker-managed pipelined compilation and incremental build support Co-Authored-By: Claude Opus 4.6 (1M context) --- cargo/private/BUILD.bazel | 4 +- cargo/private/cargo_build_script.bzl | 6 + .../private/cargo_build_script_runner/bin.rs | 105 +- .../cargo_manifest_dir.rs | 323 ++- cargo/settings/BUILD.bazel | 3 + cargo/settings/settings.bzl | 23 + extensions/prost/private/prost.bzl | 12 +- rust/platform/triple_mappings.bzl | 6 +- rust/private/clippy.bzl | 2 +- rust/private/incremental.bzl | 76 + rust/private/rust.bzl | 43 +- rust/private/rust_analyzer.bzl | 2 +- rust/private/rustc.bzl | 365 +++- rust/private/rustdoc.bzl | 2 +- rust/private/unpretty.bzl | 2 +- rust/private/utils.bzl | 2 +- rust/settings/BUILD.bazel | 6 + rust/settings/settings.bzl | 87 + rust/toolchain.bzl | 6 + test/chained_direct_deps/mod1.rs | 9 + test/unit/incremental/BUILD.bazel | 5 + .../incremental/incremental_test_suite.bzl | 139 ++ test/unit/pipelined_compilation/BUILD.bazel | 16 + .../pipelined_compilation_test.bzl | 128 ++ .../strace_rustc_post_metadata_test.sh | 229 +++ thoughts/shared/bench_cargo_progress.log | 7 + thoughts/shared/bench_cargo_raw.csv | 17 + .../bench_multiplex_sandbox_overhead.sh | 519 +++++ thoughts/shared/bench_sdk.sh | 229 +++ thoughts/shared/bench_sdk_analysis.md | 243 +++ thoughts/shared/bench_sdk_progress.log | 153 ++ thoughts/shared/bench_sdk_raw.csv | 31 + thoughts/shared/benchmark_analysis.md | 158 ++ thoughts/shared/benchmark_raw_data.csv | 52 + ...-25-consolidated-worker-pipelining-plan.md | 441 +++++ util/process_wrapper/main.rs | 458 ++++- util/process_wrapper/options.rs | 406 +++- .../private/bootstrap_process_wrapper.cc | 98 +- util/process_wrapper/util.rs | 86 + util/process_wrapper/worker.rs | 1737 +++++++++++++++++ util/process_wrapper/worker_pipeline.rs | 1493 ++++++++++++++ util/process_wrapper/worker_protocol.rs | 223 +++ util/process_wrapper/worker_sandbox.rs | 389 ++++ 43 files changed, 7996 insertions(+), 345 deletions(-) create mode 100644 rust/private/incremental.bzl create mode 100644 test/unit/incremental/BUILD.bazel create mode 100644 test/unit/incremental/incremental_test_suite.bzl create mode 100755 test/unit/pipelined_compilation/strace_rustc_post_metadata_test.sh create mode 100644 thoughts/shared/bench_cargo_progress.log create mode 100644 thoughts/shared/bench_cargo_raw.csv create mode 100755 thoughts/shared/bench_multiplex_sandbox_overhead.sh create mode 100755 thoughts/shared/bench_sdk.sh create mode 100644 thoughts/shared/bench_sdk_analysis.md create mode 100644 thoughts/shared/bench_sdk_progress.log create mode 100644 thoughts/shared/bench_sdk_raw.csv create mode 100644 thoughts/shared/benchmark_analysis.md create mode 100644 thoughts/shared/benchmark_raw_data.csv create mode 100644 thoughts/shared/plans/2026-03-25-consolidated-worker-pipelining-plan.md create mode 100644 util/process_wrapper/worker.rs create mode 100644 util/process_wrapper/worker_pipeline.rs create mode 100644 util/process_wrapper/worker_protocol.rs create mode 100644 util/process_wrapper/worker_sandbox.rs diff --git a/cargo/private/BUILD.bazel b/cargo/private/BUILD.bazel index db04afa7b1..6872881cbc 100644 --- a/cargo/private/BUILD.bazel +++ b/cargo/private/BUILD.bazel @@ -39,9 +39,9 @@ copy_file( bzl_library( name = "bzl_lib", + srcs = glob(["**/*.bzl"]), + visibility = ["//visibility:public"], deps = [ "//rust:bzl_lib", ], - srcs = glob(["**/*.bzl"]), - visibility = ["//visibility:public"], ) diff --git a/cargo/private/cargo_build_script.bzl b/cargo/private/cargo_build_script.bzl index c23108851a..b68010746d 100644 --- a/cargo/private/cargo_build_script.bzl +++ b/cargo/private/cargo_build_script.bzl @@ -632,6 +632,9 @@ def _cargo_build_script_impl(ctx): if experimental_symlink_execroot: env["RULES_RUST_SYMLINK_EXEC_ROOT"] = "1" + skip_patterns = ctx.attr._symlink_exec_root_skip_patterns[BuildSettingInfo].value + if skip_patterns: + env["RULES_RUST_SYMLINK_EXEC_ROOT_SKIP_PATTERNS"] = ",".join(skip_patterns) ctx.actions.run( executable = ctx.executable._cargo_build_script_runner, @@ -787,6 +790,9 @@ cargo_build_script = rule( "_experimental_symlink_execroot": attr.label( default = Label("//cargo/settings:experimental_symlink_execroot"), ), + "_symlink_exec_root_skip_patterns": attr.label( + default = Label("//cargo/settings:symlink_exec_root_skip_patterns"), + ), "_fallback_ar": attr.label( cfg = "exec", executable = True, diff --git a/cargo/private/cargo_build_script_runner/bin.rs b/cargo/private/cargo_build_script_runner/bin.rs index 27f5848edc..92eb66e83d 100644 --- a/cargo/private/cargo_build_script_runner/bin.rs +++ b/cargo/private/cargo_build_script_runner/bin.rs @@ -62,6 +62,7 @@ fn run_buildrs() -> Result<(), String> { let mut exec_root_links = Vec::new(); if should_symlink_exec_root() { + let exec_root_skip_patterns = symlink_exec_root_skip_patterns(); // Symlink the execroot to the manifest_dir so that we can use relative paths in the arguments. let exec_root_paths = std::fs::read_dir(&exec_root) .map_err(|err| format!("Failed while listing exec root: {err:?}"))?; @@ -75,12 +76,26 @@ fn run_buildrs() -> Result<(), String> { let file_name = path .file_name() .ok_or_else(|| "Failed while getting file name".to_string())?; + + // Skip entries matching user-configurable patterns from + // RULES_RUST_SYMLINK_EXEC_ROOT_SKIP_PATTERNS (comma-separated). + // Patterns support trailing '*' as a prefix glob. + let name = file_name.to_string_lossy(); + if exec_root_skip_patterns + .iter() + .any(|p| match_skip_pattern(p, &name)) + { + continue; + } + let link = manifest_dir.join(file_name); - symlink_if_not_exists(&path, &link) + let created = symlink_if_not_exists(&path, &link) .map_err(|err| format!("Failed to symlink {path:?} to {link:?}: {err}"))?; - exec_root_links.push(link) + if created { + exec_root_links.push(link); + } } } @@ -219,15 +234,25 @@ fn run_buildrs() -> Result<(), String> { ) }); - if !exec_root_links.is_empty() { - for link in exec_root_links { - remove_symlink(&link).map_err(|e| { - format!( - "Failed to remove exec_root link '{}' with {:?}", + for link in exec_root_links { + if let Err(e) = remove_symlink(&link) { + if cfg!(target_family = "windows") { + // On Windows, symlink removal can fail with PermissionDenied if + // another process still holds a handle to the target directory. + // These are temporary symlinks in the build sandbox that Bazel + // will clean up, so we log and continue rather than failing. + eprintln!( + "Warning: could not remove exec_root link '{}': {:?}", link.display(), e - ) - })?; + ); + } else { + return Err(format!( + "Failed to remove exec_root link '{}': {:?}", + link.display(), + e + )); + } } } @@ -246,11 +271,34 @@ fn should_symlink_exec_root() -> bool { .unwrap_or(false) } +/// Parse skip patterns from `RULES_RUST_SYMLINK_EXEC_ROOT_SKIP_PATTERNS`. +/// Returns a comma-separated list of patterns. Each pattern is either an exact +/// match or a prefix glob (trailing `*`). +fn symlink_exec_root_skip_patterns() -> Vec { + env::var("RULES_RUST_SYMLINK_EXEC_ROOT_SKIP_PATTERNS") + .map(|s| s.split(',').map(|p| p.to_owned()).collect()) + .unwrap_or_default() +} + +/// Match a skip pattern against a file name. Supports exact match and +/// trailing `*` as a prefix glob (e.g. `local-spawn-runner.*` matches +/// `local-spawn-runner.12345`). +fn match_skip_pattern(pattern: &str, name: &str) -> bool { + if let Some(prefix) = pattern.strip_suffix('*') { + name.starts_with(prefix) + } else { + name == pattern + } +} + /// Create a symlink from `link` to `original` if `link` doesn't already exist. -fn symlink_if_not_exists(original: &Path, link: &Path) -> Result<(), String> { - symlink(original, link) - .or_else(swallow_already_exists) - .map_err(|err| format!("Failed to create symlink: {err}")) +/// Returns `true` if a new symlink was created, `false` if the path already existed. +fn symlink_if_not_exists(original: &Path, link: &Path) -> Result { + match symlink(original, link) { + Ok(()) => Ok(true), + Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => Ok(false), + Err(err) => Err(format!("Failed to create symlink: {err}")), + } } fn resolve_rundir(rundir: &str, exec_root: &Path, manifest_dir: &Path) -> Result { @@ -270,14 +318,6 @@ fn resolve_rundir(rundir: &str, exec_root: &Path, manifest_dir: &Path) -> Result Ok(exec_root.join(rundir_path)) } -fn swallow_already_exists(err: std::io::Error) -> std::io::Result<()> { - if err.kind() == std::io::ErrorKind::AlreadyExists { - Ok(()) - } else { - Err(err) - } -} - /// A representation of expected command line arguments. struct Args { progname: String, @@ -470,4 +510,27 @@ windows assert_eq!(tree["CARGO_CFG_WINDOWS"], ""); assert_eq!(tree["CARGO_CFG_TARGET_FAMILY"], "windows"); } + + #[test] + fn skip_pattern_exact_match() { + assert!(match_skip_pattern(".git", ".git")); + assert!(!match_skip_pattern(".git", ".github")); + assert!(!match_skip_pattern(".git", ".gi")); + assert!(!match_skip_pattern(".git", "")); + } + + #[test] + fn skip_pattern_prefix_glob() { + assert!(match_skip_pattern("local-spawn-runner.*", "local-spawn-runner.12345")); + assert!(match_skip_pattern("local-spawn-runner.*", "local-spawn-runner.")); + assert!(!match_skip_pattern("local-spawn-runner.*", "local-spawn-runner")); + assert!(!match_skip_pattern("local-spawn-runner.*", "other-thing")); + } + + #[test] + fn skip_pattern_star_alone() { + // A bare "*" pattern matches everything. + assert!(match_skip_pattern("*", "anything")); + assert!(match_skip_pattern("*", "")); + } } diff --git a/cargo/private/cargo_build_script_runner/cargo_manifest_dir.rs b/cargo/private/cargo_build_script_runner/cargo_manifest_dir.rs index def343a34e..895ef6155d 100644 --- a/cargo/private/cargo_build_script_runner/cargo_manifest_dir.rs +++ b/cargo/private/cargo_build_script_runner/cargo_manifest_dir.rs @@ -27,14 +27,41 @@ pub fn remove_symlink(path: &Path) -> Result<(), std::io::Error> { std::fs::remove_file(path) } -/// Create a symlink file on windows systems +/// Remove a symlink or junction on Windows. +/// +/// Windows has three kinds of reparse points we may encounter: +/// 1. File symlinks — `remove_file` works. +/// 2. Directory symlinks — `remove_dir` removes the link itself (not the +/// target contents), but `remove_file` also works on some Windows versions. +/// 3. Junctions — similar to directory symlinks; `remove_dir` removes the +/// junction entry. +/// +/// We use `symlink_metadata` + `FileTypeExt` to classify the entry and try +/// the most appropriate removal call first, with a fallback for edge cases. #[cfg(target_family = "windows")] pub fn remove_symlink(path: &Path) -> Result<(), std::io::Error> { - if path.is_dir() { - std::fs::remove_dir(path) - } else { - std::fs::remove_file(path) + use std::os::windows::fs::FileTypeExt; + + let metadata = std::fs::symlink_metadata(path)?; + let ft = metadata.file_type(); + + if ft.is_symlink_file() { + return std::fs::remove_file(path); } + + if ft.is_symlink_dir() { + // remove_dir removes the symlink entry itself, not the target contents. + // Fall back to remove_file if remove_dir fails (some Windows versions). + return std::fs::remove_dir(path).or_else(|_| std::fs::remove_file(path)); + } + + // Junctions appear as directories but are not symlinks per FileTypeExt. + // remove_dir removes the junction entry itself. + if ft.is_dir() { + return std::fs::remove_dir(path).or_else(|_| std::fs::remove_file(path)); + } + + std::fs::remove_file(path) } /// Check if the system supports symlinks by attempting to create one. @@ -227,73 +254,84 @@ impl RunfilesMaker { Ok(()) } - /// Delete runfiles from the runfiles directory that do not match user defined suffixes + /// Strip runfiles that do not match a retained suffix. /// - /// The Unix implementation assumes symlinks are supported and that the runfiles directory - /// was created using symlinks. - fn drain_runfiles_dir_unix(&self) -> Result<(), String> { + /// When `symlinks_used` is true the runfiles directory was populated with + /// symlinks: every entry is removed and only retained entries are copied + /// back as real files. When false, real file copies were used (Windows + /// without symlink support) and only retained entries are deleted so that + /// downstream steps can recreate them. + /// + /// Missing entries are tolerated in either mode — on Windows the runfiles + /// directory may be incomplete (e.g. a Cargo.lock that was never created). + fn drain_runfiles_dir_impl(&self, symlinks_used: bool) -> Result<(), String> { for (src, dest) in &self.runfiles { let abs_dest = self.output_dir.join(dest); - - remove_symlink(&abs_dest).map_err(|e| { - format!( - "Failed to delete symlink '{}' with {:?}", - abs_dest.display(), - e - ) - })?; - - if !self + let should_retain = self .filename_suffixes_to_retain .iter() - .any(|suffix| dest.ends_with(suffix)) - { - if let Some(parent) = abs_dest.parent() { - if is_dir_empty(parent).map_err(|e| { - format!("Failed to determine if directory was empty with: {:?}", e) - })? { - std::fs::remove_dir(parent).map_err(|e| { - format!( - "Failed to delete directory {} with {:?}", - parent.display(), - e - ) - })?; + .any(|suffix| dest.ends_with(suffix)); + + if symlinks_used { + match remove_symlink(&abs_dest) { + Ok(()) => {} + Err(e) if e.kind() == std::io::ErrorKind::NotFound => { + if !should_retain { + continue; + } + } + Err(e) => { + return Err(format!( + "Failed to delete symlink '{}' with {:?}", + abs_dest.display(), + e + )); } } - continue; - } - std::fs::copy(src, &abs_dest).map_err(|e| { - format!( - "Failed to copy `{} -> {}` with {:?}", - src.display(), - abs_dest.display(), - e - ) - })?; - } - Ok(()) - } + if !should_retain { + if let Some(parent) = abs_dest.parent() { + if is_dir_empty(parent).map_err(|e| { + format!("Failed to determine if directory was empty with: {:?}", e) + })? { + std::fs::remove_dir(parent).map_err(|e| { + format!( + "Failed to delete directory {} with {:?}", + parent.display(), + e + ) + })?; + } + } + continue; + } - /// Delete runfiles from the runfiles directory that do not match user defined suffixes - /// - /// The Windows implementation assumes symlinks are not supported and real files will have - /// been copied into the runfiles directory. - fn drain_runfiles_dir_windows(&self) -> Result<(), String> { - for dest in self.runfiles.values() { - if !self - .filename_suffixes_to_retain - .iter() - .any(|suffix| dest.ends_with(suffix)) - { + std::fs::copy(src, &abs_dest).map_err(|e| { + format!( + "Failed to copy `{} -> {}` with {:?}", + src.display(), + abs_dest.display(), + e + ) + })?; + } else if !should_retain { + // Non-symlink mode: non-retained files are left as-is (no + // empty-directory cleanup needed since the files were never + // removed in the first place). continue; + } else { + match std::fs::remove_file(&abs_dest) { + Ok(()) => {} + Err(e) if e.kind() == std::io::ErrorKind::NotFound => {} + Err(e) => { + return Err(format!( + "Failed to remove file {} with {:?}", + abs_dest.display(), + e + )); + } + } } - - let abs_dest = self.output_dir.join(dest); - std::fs::remove_file(&abs_dest).map_err(|e| { - format!("Failed to remove file {} with {:?}", abs_dest.display(), e) - })?; } Ok(()) } @@ -301,15 +339,10 @@ impl RunfilesMaker { /// Delete runfiles from the runfiles directory that do not match user defined suffixes pub fn drain_runfiles_dir(&self, out_dir: &Path) -> Result<(), String> { if cfg!(target_family = "windows") { - // If symlinks are supported then symlinks will have been used. let supports_symlinks = system_supports_symlinks(&self.output_dir)?; - if supports_symlinks { - self.drain_runfiles_dir_unix()?; - } else { - self.drain_runfiles_dir_windows()?; - } + self.drain_runfiles_dir_impl(supports_symlinks)?; } else { - self.drain_runfiles_dir_unix()?; + self.drain_runfiles_dir_impl(true)?; } // Due to the symlinks in `CARGO_MANIFEST_DIR`, some build scripts @@ -409,6 +442,160 @@ mod tests { out_dir } + /// Create a `RunfilesMaker` for testing without needing a param file. + fn make_runfiles_maker( + output_dir: PathBuf, + suffixes: &[&str], + runfiles: Vec<(PathBuf, RlocationPath)>, + ) -> RunfilesMaker { + RunfilesMaker { + output_dir, + filename_suffixes_to_retain: suffixes.iter().map(|s| s.to_string()).collect(), + runfiles: runfiles.into_iter().collect(), + } + } + + /// Helper to create a unique test directory under TEST_TMPDIR. + fn test_dir(name: &str) -> PathBuf { + let test_tmp = PathBuf::from(std::env::var("TEST_TMPDIR").unwrap()); + let dir = test_tmp.join(name); + if dir.exists() { + fs::remove_dir_all(&dir).unwrap(); + } + fs::create_dir_all(&dir).unwrap(); + dir + } + + #[cfg(any(target_family = "windows", target_family = "unix"))] + #[test] + fn drain_symlinks_tolerates_missing_symlinks() { + let base = test_dir("drain_sym_missing"); + let output_dir = base.join("runfiles"); + fs::create_dir_all(&output_dir).unwrap(); + + // Two distinct source files so BTreeMap keeps both entries. + let src_real = base.join("real.txt"); + fs::write(&src_real, "content").unwrap(); + let src_lock = base.join("Cargo.lock"); + fs::write(&src_lock, "lock data").unwrap(); + + // Two runfile entries: one exists as a symlink, one does not. + let existing_dest = "pkg/real.txt"; + let missing_dest = "pkg/Cargo.lock"; + let abs_existing = output_dir.join(existing_dest); + fs::create_dir_all(abs_existing.parent().unwrap()).unwrap(); + symlink(&src_real, &abs_existing).unwrap(); + // Intentionally do NOT create a symlink for missing_dest. + + let maker = make_runfiles_maker( + output_dir.clone(), + &[], // retain nothing + vec![ + (src_real.clone(), existing_dest.to_string()), + (src_lock.clone(), missing_dest.to_string()), + ], + ); + + // Should succeed despite the missing symlink. + maker.drain_runfiles_dir_impl(true).unwrap(); + + // The existing symlink should have been removed. + assert!(!abs_existing.exists()); + } + + #[cfg(any(target_family = "windows", target_family = "unix"))] + #[test] + fn drain_symlinks_retains_matching_suffixes() { + let base = test_dir("drain_sym_retain"); + let output_dir = base.join("runfiles"); + fs::create_dir_all(&output_dir).unwrap(); + + let src_file = base.join("lib.rs"); + fs::write(&src_file, "fn main() {}").unwrap(); + + let src_lock = base.join("Cargo.lock"); + fs::write(&src_lock, "lock contents").unwrap(); + + let rs_dest = "pkg/lib.rs"; + let lock_dest = "pkg/Cargo.lock"; + + // Create symlinks for both entries. + let abs_rs = output_dir.join(rs_dest); + let abs_lock = output_dir.join(lock_dest); + fs::create_dir_all(abs_rs.parent().unwrap()).unwrap(); + symlink(&src_file, &abs_rs).unwrap(); + symlink(&src_lock, &abs_lock).unwrap(); + + let maker = make_runfiles_maker( + output_dir.clone(), + &[".rs"], // only retain .rs files + vec![ + (src_file.clone(), rs_dest.to_string()), + (src_lock.clone(), lock_dest.to_string()), + ], + ); + + maker.drain_runfiles_dir_impl(true).unwrap(); + + // .rs file should be retained (copied back as a real file, not a symlink). + assert!(abs_rs.exists()); + assert!(!abs_rs.is_symlink()); + assert_eq!(fs::read_to_string(&abs_rs).unwrap(), "fn main() {}"); + + // .lock file should have been removed. + assert!(!abs_lock.exists()); + } + + #[cfg(any(target_family = "windows", target_family = "unix"))] + #[test] + fn drain_symlinks_missing_with_retained_suffix_still_copies() { + let base = test_dir("drain_sym_missing_retain"); + let output_dir = base.join("runfiles"); + fs::create_dir_all(&output_dir).unwrap(); + + let src_file = base.join("lib.rs"); + fs::write(&src_file, "fn main() {}").unwrap(); + + let dest = "pkg/lib.rs"; + // Create the parent dir but NOT the symlink. + fs::create_dir_all(output_dir.join("pkg")).unwrap(); + + let maker = make_runfiles_maker( + output_dir.clone(), + &[".rs"], // retain .rs files + vec![(src_file.clone(), dest.to_string())], + ); + + // Should succeed — missing symlink is tolerated, file is still copied. + maker.drain_runfiles_dir_impl(true).unwrap(); + + let abs_dest = output_dir.join(dest); + assert!(abs_dest.exists()); + assert!(!abs_dest.is_symlink()); + assert_eq!(fs::read_to_string(&abs_dest).unwrap(), "fn main() {}"); + } + + #[cfg(any(target_family = "windows", target_family = "unix"))] + #[test] + fn drain_no_symlinks_tolerates_missing_files() { + let base = test_dir("drain_nosym_missing"); + let output_dir = base.join("runfiles"); + fs::create_dir_all(&output_dir).unwrap(); + + let src_file = base.join("real.txt"); + fs::write(&src_file, "content").unwrap(); + + // Retain .txt but the file doesn't exist in the runfiles dir. + let maker = make_runfiles_maker( + output_dir.clone(), + &[".txt"], + vec![(src_file.clone(), "pkg/real.txt".to_string())], + ); + + // Should succeed despite the missing file. + maker.drain_runfiles_dir_impl(false).unwrap(); + } + #[cfg(any(target_family = "windows", target_family = "unix"))] #[test] fn replace_symlinks_in_out_dir() { diff --git a/cargo/settings/BUILD.bazel b/cargo/settings/BUILD.bazel index 21d6cb7ef4..1bd5427101 100644 --- a/cargo/settings/BUILD.bazel +++ b/cargo/settings/BUILD.bazel @@ -5,6 +5,7 @@ load( "debug_std_streams_output_group", "experimental_symlink_execroot", "incompatible_runfiles_cargo_manifest_dir", + "symlink_exec_root_skip_patterns", "use_default_shell_env", ) @@ -29,4 +30,6 @@ experimental_symlink_execroot() incompatible_runfiles_cargo_manifest_dir() +symlink_exec_root_skip_patterns() + use_default_shell_env() diff --git a/cargo/settings/settings.bzl b/cargo/settings/settings.bzl index adfcf6bf21..168ac8cea4 100644 --- a/cargo/settings/settings.bzl +++ b/cargo/settings/settings.bzl @@ -38,6 +38,29 @@ def cargo_manifest_dir_filename_suffixes_to_retain(): ], ) +def symlink_exec_root_skip_patterns(): + """A flag which specifies glob-like patterns for exec root entries to skip when symlinking + the exec root into `CARGO_MANIFEST_DIR`. + + Each pattern is matched against exec root entry names. A pattern matches if it equals the + name exactly, or if it ends with `*` and the name starts with the prefix before the `*`. + + Defaults cover Bazel worker directories (which hold locked handles on Windows and are + unnecessary on all platforms) and VCS directories (which can trick build scripts into + thinking they're in a checkout). + """ + string_list_flag( + name = "symlink_exec_root_skip_patterns", + build_setting_default = [ + # Worker temp dirs — hold locked handles on Windows; harmless to skip everywhere. + "local-spawn-runner.*", + "rules_rust_process_wrapper_deps_*", + # VCS directories — prevent build scripts from detecting a checkout. + ".git", + ".github", + ], + ) + def debug_std_streams_output_group(): """A flag which adds a `streams` output group to `cargo_build_script` targets that contain \ the raw `stderr` and `stdout` streams from the build script. diff --git a/extensions/prost/private/prost.bzl b/extensions/prost/private/prost.bzl index 386f0d7d72..95b7e26c1b 100644 --- a/extensions/prost/private/prost.bzl +++ b/extensions/prost/private/prost.bzl @@ -511,12 +511,6 @@ rust_prost_toolchain = rule( mandatory = True, aspects = [rust_analyzer_aspect], ), - "_prost_process_wrapper": attr.label( - doc = "The wrapper script for the Prost protoc plugin.", - cfg = "exec", - executable = True, - default = Label("@rules_rust_prost//private:protoc_wrapper"), - ), "prost_types": attr.label( doc = "The Prost types crates to use.", providers = [[rust_common.crate_info], [rust_common.crate_group_info]], @@ -544,6 +538,12 @@ rust_prost_toolchain = rule( providers = [[rust_common.crate_info], [rust_common.crate_group_info]], aspects = [rust_analyzer_aspect], ), + "_prost_process_wrapper": attr.label( + doc = "The wrapper script for the Prost protoc plugin.", + cfg = "exec", + executable = True, + default = Label("@rules_rust_prost//private:protoc_wrapper"), + ), }, **proto_toolchains.if_legacy_toolchain({ "_legacy_proto_toolchain": attr.label( default = Label("//private:legacy_proto_toolchain"), diff --git a/rust/platform/triple_mappings.bzl b/rust/platform/triple_mappings.bzl index 72dd64225c..90f6a3bd4d 100644 --- a/rust/platform/triple_mappings.bzl +++ b/rust/platform/triple_mappings.bzl @@ -183,12 +183,12 @@ _SYSTEM_TO_BINARY_EXT = { "nixos": "", "none": "", "nto": "", + "threads": ".wasm", "uefi": ".efi", # This is currently a hack allowing us to have the proper # generated extension for the wasm target, similarly to the # windows target "unknown": ".wasm", - "threads": ".wasm", "wasi": ".wasm", "wasip1": ".wasm", "wasip2": ".wasm", @@ -210,9 +210,9 @@ _SYSTEM_TO_STATICLIB_EXT = { "nixos": ".a", "none": ".a", "nto": ".a", + "threads": "", "uefi": ".lib", "unknown": "", - "threads": "", "wasi": "", "wasip1": "", "wasip2": "", @@ -234,9 +234,9 @@ _SYSTEM_TO_DYLIB_EXT = { "nixos": ".so", "none": ".so", "nto": ".a", + "threads": ".wasm", "uefi": "", # UEFI doesn't have dynamic linking "unknown": ".wasm", - "threads": ".wasm", "wasi": ".wasm", "wasip1": ".wasm", "wasip2": ".wasm", diff --git a/rust/private/clippy.bzl b/rust/private/clippy.bzl index 318c05af8d..58a82b600f 100644 --- a/rust/private/clippy.bzl +++ b/rust/private/clippy.bzl @@ -165,7 +165,7 @@ def rust_clippy_action(ctx, clippy_executable, process_wrapper, crate_info, conf attr = ctx.rule.attr, file = ctx.file, toolchain = toolchain, - tool_path = clippy_executable.path, + tool_path = clippy_executable, cc_toolchain = cc_toolchain, feature_configuration = feature_configuration, crate_info = crate_info, diff --git a/rust/private/incremental.bzl b/rust/private/incremental.bzl new file mode 100644 index 0000000000..ada2fd0f6a --- /dev/null +++ b/rust/private/incremental.bzl @@ -0,0 +1,76 @@ +"""A module defining Rust incremental compilation support""" + +load("@bazel_skylib//rules:common_settings.bzl", "BuildSettingInfo") +load("//rust/private:utils.bzl", "is_exec_configuration") + +def _is_incremental_enabled(ctx, crate_info): + """Returns True if incremental compilation is enabled for this target. + + Args: + ctx (ctx): The calling rule's context object. + crate_info (CrateInfo): The CrateInfo provider of the target crate. + + Returns: + bool: True if incremental compilation is enabled. + """ + if not hasattr(ctx.attr, "_incremental"): + return False + if is_exec_configuration(ctx): + return False + if not ctx.attr._incremental[BuildSettingInfo].value: + return False + if crate_info.type == "proc-macro": + return False + + # Don't enable incremental for external/third-party crates, mirroring cargo's + # behavior. External crates rarely change, so incremental saves little; more + # importantly, the disk cache hardlinks their outputs as read-only, and running + # without sandboxing (which worker/no-sandbox requires) would cause rustc to + # fail trying to overwrite those read-only hardlinks. + if ctx.label.workspace_name: + return False + return True + +def construct_incremental_arguments(ctx, crate_info, is_metadata = False): + """Returns a list of 'rustc' flags to configure incremental compilation. + + Args: + ctx (ctx): The calling rule's context object. + crate_info (CrateInfo): The CrateInfo provider of the target crate. + is_metadata (bool): True when building a RustcMetadata (--emit=metadata only) action. + + Returns: + list: A list of strings that are valid flags for 'rustc'. + """ + if not _is_incremental_enabled(ctx, crate_info): + return [] + + # Use a separate cache directory for metadata-only (RustcMetadata) actions. + # Both RustcMetadata(A) and Rustc(A) compile the same crate, so they produce + # the same SVH — but sharing the same incremental path causes a rustc ICE + # ("no entry found for key") because the metadata-only session state is + # incompatible with a full-compilation session. Using distinct paths lets + # both actions benefit from incremental caching without interfering. + suffix = "-meta" if is_metadata else "" + cache_path = "/tmp/rules_rust_incremental/{}{}".format(crate_info.name, suffix) + + # Explicitly set codegen-units=16 to match Cargo's dev profile default + # (since Cargo 1.73). Without this, rustc silently bumps CGUs from 16 to + # 256 when -Cincremental is present, adding ~37% of the cold-build overhead + # for no rebuild benefit at opt-level=0. + return ["-Cincremental={}".format(cache_path), "-Ccodegen-units=16"] + +def is_incremental_enabled(ctx, crate_info): + """Returns True if incremental compilation is enabled for this target. + + This is the public API used by rustc_compile_action to determine whether + to set execution_requirements = {"no-sandbox": "1"}. + + Args: + ctx (ctx): The calling rule's context object. + crate_info (CrateInfo): The CrateInfo provider of the target crate. + + Returns: + bool: True if incremental compilation is enabled. + """ + return _is_incremental_enabled(ctx, crate_info) diff --git a/rust/private/rust.bzl b/rust/private/rust.bzl index c7a04a2515..fb8bf878ff 100644 --- a/rust/private/rust.bzl +++ b/rust/private/rust.bzl @@ -52,6 +52,7 @@ load( "generate_output_diagnostics", "get_edition", "get_import_macro_deps", + "is_exec_configuration", "transform_deps", "transform_sources", ) @@ -166,24 +167,42 @@ def _rust_library_common(ctx, crate_type): rust_metadata = None rustc_rmeta_output = None metadata_supports_pipelining = False + + # Worker pipelining uses a single rustc invocation (no SVH mismatch risk), + # so disable_pipelining (which works around SVH issues in hollow rlib mode) + # should be ignored when worker pipelining is active. + effective_disable_pipelining = getattr(ctx.attr, "disable_pipelining", False) and not toolchain._worker_pipelining if can_build_metadata( toolchain, ctx, crate_type, - disable_pipelining = getattr(ctx.attr, "disable_pipelining", False), + disable_pipelining = effective_disable_pipelining, ): - # The hollow rlib uses .rlib extension (not .rmeta) so rustc reads it as an - # rlib archive containing lib.rmeta with optimized MIR. It is placed in a - # "_hollow/" subdirectory so the full rlib and hollow rlib never appear in the - # same -Ldependency= search directory (which would cause E0463). - rust_metadata = ctx.actions.declare_file( - "_hollow/" + rust_lib_name[:-len(".rlib")] + "-hollow.rlib", - ) + if can_use_metadata_for_pipelining(toolchain, crate_type) and toolchain._worker_pipelining and not is_exec_configuration(ctx): + # Worker pipelining: single rustc invocation emitting both .rmeta and .rlib. + # Use a real .rmeta file (not a hollow rlib) so downstream crates can use + # --extern name=path.rmeta. No -Zno-codegen, no RUSTC_BOOTSTRAP needed. + # The .rmeta is placed in a "_pipeline/" subdirectory so it never coexists + # with the .rlib in the same -Ldependency= search directory (which would + # cause E0463 if they're from different build runs with different SVHs). + # Exec-platform builds always use hollow rlib (with RUSTC_BOOTSTRAP=1) + # to maintain consistent SVH across all three pipelining configurations. + rust_metadata = ctx.actions.declare_file( + "_pipeline/" + rust_lib_name[:-len(".rlib")] + ".rmeta", + ) + else: + # The hollow rlib uses .rlib extension (not .rmeta) so rustc reads it as an + # rlib archive containing lib.rmeta with optimized MIR. It is placed in a + # "_hollow/" subdirectory so the full rlib and hollow rlib never appear in the + # same -Ldependency= search directory (which would cause E0463). + rust_metadata = ctx.actions.declare_file( + "_hollow/" + rust_lib_name[:-len(".rlib")] + "-hollow.rlib", + ) rustc_rmeta_output = generate_output_diagnostics(ctx, rust_metadata) metadata_supports_pipelining = ( can_use_metadata_for_pipelining(toolchain, crate_type) and - not ctx.attr.disable_pipelining + not effective_disable_pipelining ) deps = transform_deps(deps) @@ -607,6 +626,9 @@ RUSTC_ATTRS = { "_extra_rustc_flags": attr.label( default = Label("//rust/settings:extra_rustc_flags"), ), + "_incremental": attr.label( + default = Label("//rust/settings:experimental_incremental"), + ), "_is_proc_macro_dep": attr.label( default = Label("//rust/private:is_proc_macro_dep"), ), @@ -626,6 +648,9 @@ RUSTC_ATTRS = { "_rustc_output_diagnostics": attr.label( default = Label("//rust/settings:rustc_output_diagnostics"), ), + "_worker_pipelining": attr.label( + default = Label("//rust/settings:experimental_worker_pipelining"), + ), } _common_attrs = { diff --git a/rust/private/rust_analyzer.bzl b/rust/private/rust_analyzer.bzl index 12bc6fa5ab..39d65dae75 100644 --- a/rust/private/rust_analyzer.bzl +++ b/rust/private/rust_analyzer.bzl @@ -260,7 +260,7 @@ def _create_single_crate(ctx, attrs, info): if info.crate.root.short_path in src_map: crate["root_module"] = _WORKSPACE_TEMPLATE + src_map[info.crate.root.short_path].path crate["source"]["include_dirs"].append(path_prefix + info.crate.root.dirname) - + # Ensure workspace crates in the same Bazel package share one source root. # # rust-analyzer picks candidate crates by source root (`relevant_crates`). diff --git a/rust/private/rustc.bzl b/rust/private/rustc.bzl index 08c632e663..5be64def31 100644 --- a/rust/private/rustc.bzl +++ b/rust/private/rustc.bzl @@ -26,6 +26,7 @@ load("@rules_cc//cc/common:cc_common.bzl", "cc_common") load("@rules_cc//cc/common:cc_info.bzl", "CcInfo") load(":common.bzl", "rust_common") load(":compat.bzl", "abs") +load(":incremental.bzl", "construct_incremental_arguments", "is_incremental_enabled") load(":lto.bzl", "construct_lto_arguments") load( ":providers.bzl", @@ -261,10 +262,24 @@ def collect_deps( transitive_metadata_outputs = [] crate_deps = [] - for dep in deps + proc_macro_deps: + for dep in deps: crate_group = getattr(dep, "crate_group_info", None) if crate_group: - crate_deps.extend(crate_group.dep_variant_infos.to_list()) + for dvi in crate_group.dep_variant_infos.to_list(): + # Skip proc macros from target-config deps; they are handled + # via proc_macro_deps (exec configuration) below. + if not (dvi.crate_info and _is_proc_macro(dvi.crate_info)): + crate_deps.append(dvi) + else: + crate_deps.append(dep) + + for dep in proc_macro_deps: + crate_group = getattr(dep, "crate_group_info", None) + if crate_group: + for dvi in crate_group.dep_variant_infos.to_list(): + # Only include proc macros from exec-config proc_macro_deps. + if dvi.crate_info and _is_proc_macro(dvi.crate_info): + crate_deps.append(dvi) else: crate_deps.append(dep) @@ -703,6 +718,25 @@ def _depend_on_metadata(crate_info, force_depend_on_objects, experimental_use_cc return crate_info.type in ("rlib", "lib") +def _use_worker_pipelining(toolchain, crate_info): + """Returns True if worker-managed pipelining should be used for this crate. + + Worker pipelining requires pipelined_compilation AND experimental_worker_pipelining, + and only applies to rlib/lib crate types (the same as hollow rlib pipelining). + + Args: + toolchain (rust_toolchain): The current target's rust_toolchain. + crate_info (CrateInfo): The crate being compiled. + + Returns: + bool: True if worker pipelining is active for this crate. + """ + return ( + toolchain._worker_pipelining and + toolchain._pipelined_compilation and + crate_info.type in ("rlib", "lib") + ) + def collect_inputs( ctx, file, @@ -819,8 +853,8 @@ def collect_inputs( runtime_libs = cc_toolchain.static_runtime_lib(feature_configuration = feature_configuration) nolinkstamp_compile_inputs = depset( - nolinkstamp_compile_direct_inputs + - ([] if experimental_use_cc_common_link else libs_from_linker_inputs), + direct = nolinkstamp_compile_direct_inputs + + ([] if experimental_use_cc_common_link else libs_from_linker_inputs), transitive = [ crate_info.srcs, transitive_crate_outputs, @@ -906,6 +940,10 @@ def _will_emit_object_file(emit): def _remove_codegen_units(flag): return None if flag.startswith("-Ccodegen-units") else flag +def _parent_dir(file): + """Returns the parent directory of a File. For use as a map_each callback.""" + return file.dirname + def construct_arguments( *, ctx, @@ -937,7 +975,8 @@ def construct_arguments( skip_expanding_rustc_env = False, require_explicit_unstable_features = False, always_use_param_file = False, - error_format = None): + error_format = None, + use_worker_pipelining = False): """Builds an Args object containing common rustc flags Args: @@ -945,7 +984,7 @@ def construct_arguments( attr (struct): The attributes for the target. These may be different from ctx.attr in an aspect context. file (struct): A struct containing files defined in label type attributes marked as `allow_single_file`. toolchain (rust_toolchain): The current target's `rust_toolchain` - tool_path (str): Path to rustc + tool_path (File): The rustc executable File object (or str path for backwards compat) cc_toolchain (CcToolchain): The CcToolchain for the current target. feature_configuration (FeatureConfiguration): Class used to construct command lines from CROSSTOOL features. crate_info (CrateInfo): The CrateInfo provider of the target crate @@ -968,9 +1007,12 @@ def construct_arguments( use_json_output (bool): Have rustc emit json and process_wrapper parse json messages to output rendered output. build_metadata (bool): Generate CLI arguments for building *only* .rmeta files. This requires use_json_output. force_depend_on_objects (bool): Force using `.rlib` object files instead of metadata (`.rmeta`) files even if they are available. + experimental_use_cc_common_link (bool): Whether to use cc_common.link for the final binary link step. skip_expanding_rustc_env (bool): Whether to skip expanding CrateInfo.rustc_env_attr require_explicit_unstable_features (bool): Whether to require all unstable features to be explicitly opted in to using `-Zallow-features=...`. + always_use_param_file (bool): Whether to always use a param file for rustc arguments. error_format (str, optional): Error format to pass to the `--error-format` command line argument. If set to None, uses the "_error_format" entry in `attr`. + use_worker_pipelining (bool): Whether worker-managed pipelining is active. When True, per-action flags are routed to the paramfile for worker key stability. Returns: tuple: A tuple of the following items @@ -990,13 +1032,26 @@ def construct_arguments( env = _get_rustc_env(attr, toolchain, crate_info.name) + # Determine worker pipelining mode early so we can route per-action flags + # to the right Args object. When worker pipelining is active, per-action + # flags must go in the @paramfile (rustc_flags) rather than the startup + # args (process_wrapper_flags). Bazel derives the worker key from startup + # args — if per-action values like --output-file are in startup args, every + # action gets a unique worker key and thus a separate OS process, defeating + # the purpose of persistent workers. + use_worker_pipe = use_worker_pipelining + # Wrapper args first process_wrapper_flags = ctx.actions.args() - for build_env_file in build_env_files: - process_wrapper_flags.add("--env-file", build_env_file) - - process_wrapper_flags.add_all(build_flags_files, before_each = "--arg-file") + # --env-file and --arg-file are per-action (different build script deps per + # crate). For worker pipelining they must go into the paramfile so that all + # actions share a single worker key. The non-worker-pipe path adds them here; + # the worker-pipe path adds them after rustc_flags is created below. + if not use_worker_pipe: + for build_env_file in build_env_files: + process_wrapper_flags.add("--env-file", build_env_file) + process_wrapper_flags.add_all(build_flags_files, before_each = "--arg-file") if require_explicit_unstable_features: process_wrapper_flags.add("--require-explicit-unstable-features", "true") @@ -1015,11 +1070,6 @@ def construct_arguments( # use `${pwd}` which resolves the `exec_root` at action execution time. process_wrapper_flags.add("--subst", "pwd=${pwd}") - # If stamping is enabled, enable the functionality in the process wrapper - if stamp: - process_wrapper_flags.add("--volatile-status-file", ctx.version_file) - process_wrapper_flags.add("--stable-status-file", ctx.info_file) - # Both ctx.label.workspace_root and ctx.label.package are relative paths # and either can be empty strings. Avoid trailing/double slashes in the path. components = "${{pwd}}/{}/{}".format(ctx.label.workspace_root, ctx.label.package).split("/") @@ -1047,13 +1097,31 @@ def construct_arguments( rustc_flags.add(crate_info.name, format = "--crate-name=%s") rustc_flags.add(crate_info.type, format = "--crate-type=%s") + # Stamp files are per-action inputs. Keep them out of worker startup args + # when worker pipelining is active so stamped actions still share a WorkerKey. + if stamp: + if use_worker_pipe: + rustc_flags.add("--volatile-status-file", ctx.version_file) + rustc_flags.add("--stable-status-file", ctx.info_file) + else: + process_wrapper_flags.add("--volatile-status-file", ctx.version_file) + process_wrapper_flags.add("--stable-status-file", ctx.info_file) + if error_format == None: error_format = get_error_format(attr, "_error_format") if use_json_output: # If --error-format was set to json, we just pass the output through # Otherwise process_wrapper uses the "rendered" field. - process_wrapper_flags.add("--rustc-output-format", "json" if error_format == "json" else "rendered") + # + # For worker pipelining, put this in the @paramfile (per-request args) + # rather than startup args, so all actions share the same worker key. + # prepare_param_file strips it before rustc sees it. + output_format = "json" if error_format == "json" else "rendered" + if use_worker_pipe: + rustc_flags.add("--rustc-output-format", output_format) + else: + process_wrapper_flags.add("--rustc-output-format", output_format) # Configure rustc json output by adding artifact notifications. # These are filtered out by process_wrapper. @@ -1070,17 +1138,37 @@ def construct_arguments( if build_metadata: if crate_info.type in ("rlib", "lib"): - # Hollow rlib approach (Buck2-style): rustc runs to completion with -Zno-codegen, - # producing a hollow .rlib (metadata only, no object code) via --emit=link=. - # No need to kill rustc — -Zno-codegen skips codegen entirely and exits quickly. - rustc_flags.add("-Zno-codegen") + # Hollow rlib approach (Buck2-style): rustc runs with -Zno-codegen, producing + # a hollow rlib (metadata only, no object code) via --emit=link=. + # Worker pipelining uses --emit=dep-info,metadata,link instead (no -Zno-codegen): + # the background rustc produces the full rlib, so codegen must not be skipped. + # Exec-platform builds always use hollow rlib (not worker pipelining). + if not use_worker_pipe: + rustc_flags.add("-Zno-codegen") # else: IDE-only metadata for non-rlib types (bin, proc-macro, etc.): rustc exits # naturally after writing .rmeta via --emit=dep-info,metadata (no kill needed). if crate_info.rustc_rmeta_output: - process_wrapper_flags.add("--output-file", crate_info.rustc_rmeta_output.path) + # For worker pipelining, --output-file goes in the @paramfile (per-request) + # so all actions share the same worker key. prepare_param_file strips it + # before rustc sees it; the worker relocates it before --. + if use_worker_pipe: + rustc_flags.add("--output-file", crate_info.rustc_rmeta_output) + else: + process_wrapper_flags.add("--output-file", crate_info.rustc_rmeta_output) elif crate_info.rustc_output: - process_wrapper_flags.add("--output-file", crate_info.rustc_output.path) + if use_worker_pipe: + rustc_flags.add("--output-file", crate_info.rustc_output) + else: + process_wrapper_flags.add("--output-file", crate_info.rustc_output) + + # For worker pipelining, add --env-file and --arg-file to the paramfile + # (deferred from above where the non-worker-pipe path adds them to + # process_wrapper_flags). + if use_worker_pipe: + for build_env_file in build_env_files: + rustc_flags.add("--env-file", build_env_file) + rustc_flags.add_all(build_flags_files, before_each = "--arg-file") rustc_flags.add(error_format, format = "--error-format=%s") @@ -1096,7 +1184,7 @@ def construct_arguments( rustc_flags.add(output_hash, format = "--codegen=extra-filename=-%s") if output_dir: - rustc_flags.add(output_dir, format = "--out-dir=%s") + rustc_flags.add_all([crate_info.output], map_each = _parent_dir, format_each = "--out-dir=%s") compilation_mode = get_compilation_mode_opts(ctx, toolchain) rustc_flags.add(compilation_mode.opt_level, format = "--codegen=opt-level=%s") @@ -1109,15 +1197,23 @@ def construct_arguments( emit_without_paths = [] for kind in emit: - if kind == "link" and build_metadata and crate_info.type in ("rlib", "lib") and crate_info.metadata: + if kind == "link" and build_metadata and crate_info.type in ("rlib", "lib") and crate_info.metadata and not use_worker_pipe: # Hollow rlib: direct rustc's link output to the -hollow.rlib path. # The file has .rlib extension so rustc reads it as an rlib archive # (with optimized MIR in lib.rmeta). Using a .rmeta path would cause # E0786 "found invalid metadata files" because rustc parses .rmeta files # as raw metadata blobs, not rlib archives. + # Worker pipelining: let link go to --out-dir normally (no redirect); + # the background rustc produces the full rlib directly. rustc_flags.add(crate_info.metadata, format = "--emit=link=%s") elif kind == "link" and crate_info.type == "bin" and crate_info.output != None: rustc_flags.add(crate_info.output, format = "--emit=link=%s") + elif kind == "metadata" and build_metadata and use_worker_pipe and crate_info.metadata: + # Worker pipelining: direct the .rmeta to the declared _pipeline/ output + # path so it's produced correctly in both worker and sandbox execution. + # Without this, rustc writes .rmeta to --out-dir (the base directory), + # but the declared output is in the _pipeline/ subdirectory. + rustc_flags.add(crate_info.metadata, format = "--emit=metadata=%s") else: emit_without_paths.append(kind) @@ -1132,8 +1228,9 @@ def construct_arguments( if linker_script: rustc_flags.add(linker_script, format = "--codegen=link-arg=-T%s") - # Tell Rustc where to find the standard library (or libcore) - rustc_flags.add_all(toolchain.rust_std_paths, before_each = "-L", format_each = "%s") + # Tell Rustc where to find the standard library (or libcore). + # Use the File depset with map_each=_parent_dir so PathMapper can rewrite paths. + rustc_flags.add_all(toolchain.rust_std, map_each = _parent_dir, format_each = "-L%s", uniquify = True) rustc_flags.add_all(rust_flags, map_each = map_flag) # Gather data path from crate_info since it is inherited from real crate for rust_doc and rust_test @@ -1144,6 +1241,16 @@ def construct_arguments( _add_lto_flags(ctx, toolchain, rustc_flags, crate_info) _add_codegen_units_flags(toolchain, emit, rustc_flags) + # RustcMetadata and Rustc both use incremental compilation, but with separate + # cache directories (see construct_incremental_arguments). Using the same path + # for both causes a rustc ICE ("no entry found for key") because the metadata- + # only session state is incompatible with a full-compilation session. The + # distinct paths allow both to benefit from caching; because SVH does not + # depend on the incremental cache path, both actions produce the same SVH value + # for the same source, so Rustc(A) overwriting libA-HASH.rmeta in execroot is + # safe for concurrently running sandboxed RustcMetadata(B) actions. + _add_incremental_flags(ctx, rustc_flags, crate_info, is_metadata = build_metadata) + # Use linker_type to determine whether to use direct or indirect linker invocation # If linker_type is not explicitly set, infer from which linker is actually being used ld_is_direct_driver = False @@ -1231,9 +1338,14 @@ def construct_arguments( {}, )) - # Ensure the sysroot is set for the target platform + # Ensure the sysroot is set for the target platform. + # Use the sysroot_anchor File (not the string path) so Bazel's PathMapper + # can rewrite the config segment for --experimental_output_paths=strip. if toolchain._toolchain_generated_sysroot: - rustc_flags.add(toolchain.sysroot, format = "--sysroot=%s") + if hasattr(toolchain, "sysroot_anchor"): + rustc_flags.add_all([toolchain.sysroot_anchor], map_each = _parent_dir, format_each = "--sysroot=%s") + else: + rustc_flags.add(toolchain.sysroot, format = "--sysroot=%s") if toolchain._rename_first_party_crates: env["RULES_RUST_THIRD_PARTY_DIR"] = toolchain._third_party_dir @@ -1314,6 +1426,45 @@ def collect_extra_rustc_flags(ctx, toolchain, crate_root, crate_type): return flags +def _build_worker_exec_reqs(use_worker_pipelining, is_incremental, has_out_dir = False): + """Builds execution_requirements for Rustc worker actions. + + Args: + use_worker_pipelining: Whether worker-managed pipelining is active. + is_incremental: Whether incremental compilation is enabled. + has_out_dir: Whether the crate has a build script OUT_DIR. If True, + path mapping is disabled because OUT_DIR is an env var that + PathMapper cannot rewrite. + + Returns: + A dict of execution_requirements. + """ + reqs = {} + if is_incremental or use_worker_pipelining: + reqs["requires-worker-protocol"] = "json" + if use_worker_pipelining: + reqs["supports-multiplex-workers"] = "1" + reqs["supports-multiplex-sandboxing"] = "1" + + # Cancellation is fully effective for pipelined requests (kills the + # background rustc). Non-pipelined requests within the same worker + # (e.g. proc-macros) acknowledge the cancel but the subprocess runs + # to completion — this is consistent with Bazel's best-effort semantics. + reqs["supports-worker-cancellation"] = "1" + else: + reqs["supports-workers"] = "1" + + # no-sandbox is no longer needed — the worker uses real execroot CWD + # (or sandbox CWD when sandboxed), so incremental cache paths are + # stable regardless of sandboxing. + + # Enable path mapping for --experimental_output_paths=strip deduplication. + # Disabled when a build script OUT_DIR is present because env vars are not + # rewritten by PathMapper, causing include!() to reference unrewritten paths. + if not has_out_dir: + reqs["supports-path-mapping"] = "1" + return reqs + def rustc_compile_action( *, ctx, @@ -1360,12 +1511,39 @@ def rustc_compile_action( rustc_output = crate_info.rustc_output rustc_rmeta_output = crate_info.rustc_rmeta_output + # Use worker pipelining (single rustc invocation, .rmeta output) when enabled. + # This takes precedence over the hollow rlib approach for rlib/lib crates. + # Exec-platform builds (build script deps) skip worker pipelining: they always + # use hollow rlib so RUSTC_BOOTSTRAP=1 is set consistently. Without this, switching + # between hollow-rlib and worker-pipe modes changes the SVH for exec-platform rlibs, + # causing E0460 when Bazel action-cache-hits some exec crates but recompiles others. + use_worker_pipelining = _use_worker_pipelining(toolchain, crate_info) and not is_exec_configuration(ctx) and bool(build_metadata) + + # Worker pipelining requires RustcMetadata and Rustc to share the same worker + # process (so they share PipelineState). Bazel worker key = startup args = + # everything before the @paramfile. The only startup-arg difference between + # RustcMetadata and Rustc is --output-file (companion .rustc-output files). + # Suppress those companion files when worker pipelining is active so both + # actions have identical startup args → same worker key → same process. + if use_worker_pipelining: + rustc_output = None + rustc_rmeta_output = None + # Use the hollow rlib approach (Buck2-style) for rlib/lib crate types when a metadata - # action is being created. This always applies for rlib/lib regardless of whether - # pipelining is globally enabled — the hollow rlib is simpler than killing rustc. + # action is being created, UNLESS worker pipelining is active (which uses a single + # rustc invocation with --emit=dep-info,metadata,link and .rmeta output instead). # Non-rlib types (bin, proc-macro, etc.) use --emit=dep-info,metadata instead # (rustc exits naturally after writing .rmeta, no process-wrapper kill needed). - use_hollow_rlib = bool(build_metadata) and crate_info.type in ("rlib", "lib") + use_hollow_rlib = bool(build_metadata) and crate_info.type in ("rlib", "lib") and not use_worker_pipelining + + # Include pipelining mode in rustc flags so the action cache key differs + # between pipelining modes. Different modes produce .rlib/.rmeta with + # different SVH chains that are incompatible — sharing cached outputs + # across modes causes "can't find crate" errors (E0463). + if use_worker_pipelining: + rust_flags = rust_flags + ["--cfg=rules_rust_worker_pipelining"] + elif use_hollow_rlib: + rust_flags = rust_flags + ["--cfg=rules_rust_hollow_rlib"] # Determine whether to use cc_common.link: # * either if experimental_use_cc_common_link is 1, @@ -1465,12 +1643,18 @@ def rustc_compile_action( elif ctx.attr.require_explicit_unstable_features == -1: require_explicit_unstable_features = toolchain.require_explicit_unstable_features + # When incremental compilation or worker pipelining is enabled, force a param file + # so the worker strategy sees exactly one @flagfile in the command line (Bazel + # requirement). For worker pipelining, the metadata handler parses the param file + # to spawn rustc directly; it needs the args in a file to apply substitutions. + use_param_file_always = is_incremental_enabled(ctx, crate_info) or use_worker_pipelining + args, env_from_args = construct_arguments( ctx = ctx, attr = attr, file = ctx.file, toolchain = toolchain, - tool_path = toolchain.rustc.path, + tool_path = toolchain.rustc, cc_toolchain = cc_toolchain, emit = emit, feature_configuration = feature_configuration, @@ -1491,7 +1675,8 @@ def rustc_compile_action( experimental_use_cc_common_link = experimental_use_cc_common_link, skip_expanding_rustc_env = skip_expanding_rustc_env, require_explicit_unstable_features = require_explicit_unstable_features, - always_use_param_file = not ctx.executable._process_wrapper, + always_use_param_file = use_param_file_always or not ctx.executable._process_wrapper, + use_worker_pipelining = use_worker_pipelining, ) args_metadata = None @@ -1501,6 +1686,11 @@ def rustc_compile_action( # -Zno-codegen). dep-info must be included: it affects the SVH stored in the # rlib, so both actions must include it to keep SVHs consistent. metadata_emit = ["dep-info", "link"] + elif use_worker_pipelining: + # Worker pipelining: single rustc invocation emits metadata+link in one pass. + # The worker monitors stderr for the rmeta artifact JSON, returns the .rmeta + # early, and keeps rustc running in the background to finish codegen. + metadata_emit = ["dep-info", "metadata", "link"] else: # IDE-only metadata for non-rlib types (bin, proc-macro, etc.): rustc exits # naturally after writing .rmeta with --emit=dep-info,metadata. @@ -1510,7 +1700,7 @@ def rustc_compile_action( attr = attr, file = ctx.file, toolchain = toolchain, - tool_path = toolchain.rustc.path, + tool_path = toolchain.rustc, cc_toolchain = cc_toolchain, emit = metadata_emit, feature_configuration = feature_configuration, @@ -1529,13 +1719,67 @@ def rustc_compile_action( build_metadata = True, experimental_use_cc_common_link = experimental_use_cc_common_link, require_explicit_unstable_features = require_explicit_unstable_features, + always_use_param_file = use_param_file_always, + use_worker_pipelining = use_worker_pipelining, ) + # Worker pipelining: add pipelining mode flags to rustc_flags (the @paramfile). + # IMPORTANT: These must NOT go in process_wrapper_flags (startup args). Startup + # args determine the Bazel worker key — if RustcMetadata and Rustc have different + # startup args, Bazel routes them to different worker processes and they cannot + # share PipelineState. With these flags in rustc_flags (per-request @paramfile), + # both actions share the same startup args → same worker key → same worker. + # + # --json=artifacts is already emitted by construct_arguments via use_json_output=True. + if use_worker_pipelining and build_metadata: + # Use crate_info.output.short_path (unique per output artifact) sanitized for + # filesystem use. This is collision-free and human-readable. + pipeline_key = crate_info.output.short_path.replace("/", "_").replace(".", "_") + + # Metadata action: tell the worker to start rustc and return .rmeta early. + args_metadata.rustc_flags.add("--pipelining-metadata") + args_metadata.rustc_flags.add("--pipelining-key={}".format(pipeline_key)) + + # Full action: tell the worker to wait for the background rustc started above. + args.rustc_flags.add("--pipelining-full") + args.rustc_flags.add("--pipelining-key={}".format(pipeline_key)) + + # Pass the expected .rlib path for the local-mode no-op optimization. + # When the process_wrapper runs outside a worker (local/sandboxed fallback), + # it checks whether this file already exists (produced as a side-effect by + # the metadata action's rustc). If so, it skips the redundant second rustc + # invocation, guaranteeing SVH consistency (single invocation per crate). + args.rustc_flags.add("--pipelining-rlib-path={}".format(crate_info.output.path)) + env = dict(ctx.configuration.default_shell_env) # this is the final list of env vars env.update(env_from_args) + # Worker pipelining: Bazel's worker key includes the action env. Per-crate env vars + # (CARGO_CRATE_NAME, CARGO_MANIFEST_DIR, OUT_DIR, REPOSITORY_NAME, etc.) differ per + # crate, creating a unique worker key per crate → separate OS process per action → + # metadata and full can never share PipelineState. Fix: write per-crate env vars to + # an env file passed via --env-file in the @paramfile, keeping only stable vars + # (PATH, etc.) in the action env so all actions share the same worker key. + worker_env_file = None + if use_worker_pipelining: + # Write all per-crate env vars to a file. The process_wrapper reads these + # via --env-file and sets them before running rustc. + env_content = "\n".join(["{}={}".format(k, v) for k, v in sorted(env_from_args.items())]) + worker_env_file = ctx.actions.declare_file(crate_info.output.basename + ".worker_env") + ctx.actions.write(worker_env_file, env_content) + + # Add --env-file to the @paramfile for both metadata and full actions. + # This goes in rustc_flags (the paramfile) so it doesn't affect the worker key. + # prepare_param_file / the worker handler strips it before rustc sees it. + args.rustc_flags.add("--env-file", worker_env_file) + if args_metadata: + args_metadata.rustc_flags.add("--env-file", worker_env_file) + + # Strip per-crate vars from action env — keep only default_shell_env (PATH etc.) + env = dict(ctx.configuration.default_shell_env) + if use_hollow_rlib: # Both the metadata action and the full Rustc action must have RUSTC_BOOTSTRAP=1 # for SVH compatibility. RUSTC_BOOTSTRAP=1 changes the crate hash — setting it @@ -1590,10 +1834,31 @@ def rustc_compile_action( action_outputs.append(dsym_folder) if ctx.executable._process_wrapper: + # Compute execution requirements for incremental compilation. + # - "no-sandbox": ensures local fallback builds see stable source paths + # (avoids the rustc ICE that occurs when sandbox paths change between builds). + # - "supports-workers": declares that process_wrapper supports Bazel's + # persistent worker protocol. When --strategy=Rustc=worker,local is set, + # Bazel uses the worker (which runs in execroot, also avoiding the sandbox + # path problem), enabling dynamic execution strategy as well. + exec_reqs = _build_worker_exec_reqs(use_worker_pipelining, is_incremental_enabled(ctx, crate_info), has_out_dir = bool(out_dir)) + + # When incremental compilation or worker pipelining is active and pipelining is + # enabled, add build_metadata as an ordering dep so Rustc(A) starts only after + # RustcMetadata(A) completes. For worker pipelining, this ensures the metadata + # action has started rustc before the full action tries to look it up. + # For incremental, prepare_outputs() chmods rmeta writable before rustc overwrites. + rustc_inputs = compile_inputs + if worker_env_file: + rustc_inputs = depset([worker_env_file], transitive = [rustc_inputs]) + compile_inputs_for_metadata = depset([worker_env_file], transitive = [compile_inputs_for_metadata]) + if build_metadata and (is_incremental_enabled(ctx, crate_info) or use_worker_pipelining): + rustc_inputs = depset([build_metadata], transitive = [rustc_inputs]) + # Run as normal ctx.actions.run( executable = ctx.executable._process_wrapper, - inputs = compile_inputs, + inputs = rustc_inputs, outputs = action_outputs, env = env, arguments = args.all, @@ -1607,15 +1872,29 @@ def rustc_compile_action( ), toolchain = "@rules_rust//rust:toolchain_type", resource_set = get_rustc_resource_set(toolchain), + execution_requirements = exec_reqs, ) if args_metadata: + # When incremental compilation is enabled, RustcMetadata also runs as a + # worker (no-sandbox) so it can read and write the -meta-suffixed + # incremental cache at /tmp/rules_rust_incremental/-meta. + # Without worker mode it would be sandboxed and unable to accumulate + # incremental state, making every rebuild a cold compilation. + meta_exec_reqs = _build_worker_exec_reqs(use_worker_pipelining, is_incremental_enabled(ctx, crate_info), has_out_dir = bool(out_dir)) ctx.actions.run( executable = ctx.executable._process_wrapper, inputs = compile_inputs_for_metadata, outputs = [build_metadata] + [x for x in [rustc_rmeta_output] if x], env = env, arguments = args_metadata.all, - mnemonic = "RustcMetadata", + # When worker pipelining is active, use the same mnemonic as the + # full Rustc action so both actions share the same multiplex worker + # process. This is required because Bazel's worker key is derived + # from (mnemonic + executable + startup_args), and PipelineState is + # an in-process HashMap. With different mnemonics, RustcMetadata and + # Rustc would always go to different worker processes and could never + # share pipeline state. + mnemonic = "Rustc" if use_worker_pipelining else "RustcMetadata", progress_message = "Compiling Rust metadata {} {}{} ({} file{})".format( crate_info.type, ctx.label.name, @@ -1624,6 +1903,7 @@ def rustc_compile_action( "" if len(srcs) == 1 else "s", ), toolchain = "@rules_rust//rust:toolchain_type", + execution_requirements = meta_exec_reqs, ) elif hasattr(ctx.executable, "_bootstrap_process_wrapper"): # Run without process_wrapper @@ -1953,6 +2233,17 @@ def _add_codegen_units_flags(toolchain, emit, args): args.add("-Ccodegen-units={}".format(toolchain._codegen_units)) +def _add_incremental_flags(ctx, args, crate_info, is_metadata = False): + """Adds flags to an Args object to configure incremental compilation for 'rustc'. + + Args: + ctx (ctx): The calling rule's context object. + args (Args): A reference to an Args object. + crate_info (CrateInfo): The CrateInfo provider of the target crate. + is_metadata (bool): True when building a RustcMetadata action. + """ + args.add_all(construct_incremental_arguments(ctx, crate_info, is_metadata = is_metadata)) + def establish_cc_info(ctx, attr, crate_info, toolchain, cc_toolchain, feature_configuration, interface_library): """If the produced crate is suitable yield a CcInfo to allow for interop with cc rules @@ -2305,7 +2596,7 @@ def _get_crate_dirname_pipelined(crate): return crate.metadata.dirname return crate.output.dirname -def _portable_link_flags(lib, use_pic, ambiguous_libs, get_lib_name, for_windows = False, for_darwin = False, flavor_msvc = False): +def _portable_link_flags(lib, use_pic, ambiguous_libs, get_lib_name, for_darwin = False, flavor_msvc = False): artifact = get_preferred_artifact(lib, use_pic) if ambiguous_libs and artifact.path in ambiguous_libs: artifact = ambiguous_libs[artifact.path] diff --git a/rust/private/rustdoc.bzl b/rust/private/rustdoc.bzl index f302bab743..c530752cd8 100644 --- a/rust/private/rustdoc.bzl +++ b/rust/private/rustdoc.bzl @@ -131,7 +131,7 @@ def rustdoc_compile_action( attr = ctx.attr, file = ctx.file, toolchain = toolchain, - tool_path = toolchain.rust_doc.short_path if is_test else toolchain.rust_doc.path, + tool_path = toolchain.rust_doc.short_path if is_test else toolchain.rust_doc, cc_toolchain = cc_toolchain, feature_configuration = feature_configuration, crate_info = rustdoc_crate_info, diff --git a/rust/private/unpretty.bzl b/rust/private/unpretty.bzl index be111c83a3..66c7a4b63f 100644 --- a/rust/private/unpretty.bzl +++ b/rust/private/unpretty.bzl @@ -190,7 +190,7 @@ def _rust_unpretty_aspect_impl(target, ctx): attr = ctx.rule.attr, file = ctx.file, toolchain = toolchain, - tool_path = toolchain.rustc.path, + tool_path = toolchain.rustc, cc_toolchain = cc_toolchain, feature_configuration = feature_configuration, crate_info = crate_info, diff --git a/rust/private/utils.bzl b/rust/private/utils.bzl index 4d710b7fa9..34c5043122 100644 --- a/rust/private/utils.bzl +++ b/rust/private/utils.bzl @@ -530,7 +530,7 @@ def filter_deps(ctx): proc_macro_deps = [] for dep in ctx.attr.proc_macro_deps: - if CrateInfo in dep and dep[CrateInfo].type == "proc-macro": + if (CrateInfo in dep and dep[CrateInfo].type == "proc-macro") or CrateGroupInfo in dep: proc_macro_deps.append(dep) return deps, proc_macro_deps diff --git a/rust/settings/BUILD.bazel b/rust/settings/BUILD.bazel index e57f5ec5d3..58945ae5c9 100644 --- a/rust/settings/BUILD.bazel +++ b/rust/settings/BUILD.bazel @@ -12,12 +12,14 @@ load( "collect_cfgs", "default_allocator_library", "error_format", + "experimental_incremental", "experimental_link_std_dylib", "experimental_per_crate_rustc_flag", "experimental_use_allocator_libraries_with_mangled_symbols", "experimental_use_cc_common_link", "experimental_use_coverage_metadata_files", "experimental_use_global_allocator", + "experimental_worker_pipelining", "extra_exec_rustc_env", "extra_exec_rustc_flag", "extra_exec_rustc_flags", @@ -75,6 +77,10 @@ codegen_units() collect_cfgs() +experimental_incremental() + +experimental_worker_pipelining() + default_allocator_library() error_format() diff --git a/rust/settings/settings.bzl b/rust/settings/settings.bzl index b155eb172a..5f98a67ffb 100644 --- a/rust/settings/settings.bzl +++ b/rust/settings/settings.bzl @@ -562,6 +562,93 @@ def codegen_units(): build_setting_default = -1, ) +def experimental_incremental(): + """A flag to enable incremental compilation for Rust targets. + + When enabled, rustc is invoked with `-Cincremental=/tmp/rules_rust_incremental/` + and sandboxing is disabled for Rustc actions so the incremental cache persists between builds. + + This flag is intended for local development builds only. Do not use in CI or release builds + as it disables sandboxing and produces non-hermetic outputs. + + Usage in .bazelrc: + build:dev --@rules_rust//rust/settings:experimental_incremental + """ + bool_flag( + name = "experimental_incremental", + build_setting_default = False, + ) + +def experimental_worker_pipelining(): + """A flag to enable worker-managed pipelined compilation. + + When enabled (alongside pipelined_compilation), the persistent worker uses a single rustc + invocation per pipelined rlib/lib crate instead of two. The worker starts rustc with + --emit=dep-info,metadata,link, returns the .rmeta file as soon as metadata is ready, + and caches the running process so the full compile action can retrieve the .rlib without + re-invoking rustc. + + Benefits over the default two-invocation (hollow rlib) approach: + - Eliminates SVH mismatch with non-deterministic proc macros (proc macro runs once) + - No -Zno-codegen / RUSTC_BOOTSTRAP=1 required + - Reduces total rustc invocations by ~50% for pipelined crates + + Requires pipelined_compilation=true and worker strategy: + build --@rules_rust//rust/settings:pipelined_compilation=true + build --@rules_rust//rust/settings:experimental_worker_pipelining=true + build --strategy=Rustc=worker + + For sandboxed worker pipelining (recommended for hermetic builds): + build --@rules_rust//rust/settings:pipelined_compilation=true + build --@rules_rust//rust/settings:experimental_worker_pipelining=true + build --experimental_worker_multiplex_sandboxing + build --strategy=Rustc=worker,sandboxed + + For dynamic execution (local worker racing against remote execution): + build --@rules_rust//rust/settings:pipelined_compilation=true + build --@rules_rust//rust/settings:experimental_worker_pipelining=true + build --experimental_worker_multiplex_sandboxing + build --internal_spawn_scheduler + build --strategy=Rustc=dynamic + build --dynamic_local_strategy=Rustc=worker,sandboxed + build --dynamic_remote_strategy=Rustc=remote + + NOTE: The remote leg MUST use actual remote execution (not --dynamic_remote_strategy= + Rustc=sandboxed). When the sandboxed leg wins, it produces .rmeta and .rlib from + separate rustc invocations, causing SVH mismatch errors in downstream binary targets. + With real remote execution, the remote leg runs the action independently and produces + consistent artifacts. + + With incremental compilation (compatible with sandboxing): + build --@rules_rust//rust/settings:pipelined_compilation=true + build --@rules_rust//rust/settings:experimental_worker_pipelining=true + build --@rules_rust//rust/settings:experimental_incremental=true + build --experimental_worker_multiplex_sandboxing + build --strategy=Rustc=worker,sandboxed + + Non-worker fallback behavior: + When workers are unavailable and Bazel falls back to local or sandboxed + execution, worker pipelining actions run two separate rustc invocations + (metadata + full) instead of one. The process_wrapper mitigates this with + a no-op optimization: the metadata action's rustc produces the .rlib as a + side-effect (via --emit=link); if the .rlib persists on disk (unsandboxed + local execution), the full action detects it and skips its own rustc, + guaranteeing SVH consistency from a single invocation per crate. + + When the .rlib side-effect is NOT available (sandboxed execution discards + undeclared outputs, or the metadata action was an action-cache hit), the + full action falls through to running rustc normally. This works correctly + for deterministic proc macros (identical inputs produce identical SVH). + Nondeterministic proc macros (e.g. HashMap iteration in macro expansion) + may produce E0460 (SVH mismatch); the process_wrapper emits a diagnostic + directing the user to set experimental_worker_pipelining=false to fall + back to hollow-rlib pipelining, which is safe for all execution strategies. + """ + bool_flag( + name = "experimental_worker_pipelining", + build_setting_default = False, + ) + # buildifier: disable=unnamed-macro def collect_cfgs(): """Enable collection of cfg flags with results stored in CrateInfo.cfgs. diff --git a/rust/toolchain.bzl b/rust/toolchain.bzl index c2aa484ac2..8356426397 100644 --- a/rust/toolchain.bzl +++ b/rust/toolchain.bzl @@ -393,6 +393,7 @@ def _rust_toolchain_impl(ctx): rename_first_party_crates = ctx.attr._rename_first_party_crates[BuildSettingInfo].value third_party_dir = ctx.attr._third_party_dir[BuildSettingInfo].value pipelined_compilation = ctx.attr._pipelined_compilation[BuildSettingInfo].value + worker_pipelining = ctx.attr._worker_pipelining[BuildSettingInfo].value no_std = ctx.attr._no_std[BuildSettingInfo].value lto = ctx.attr.lto[RustLtoInfo] @@ -604,6 +605,7 @@ def _rust_toolchain_impl(ctx): extra_exec_rustc_flags = expanded_extra_exec_rustc_flags, per_crate_rustc_flags = ctx.attr.per_crate_rustc_flags, sysroot = sysroot_path, + sysroot_anchor = sysroot.sysroot_anchor, sysroot_short_path = sysroot_short_path, target_arch = target_arch, target_flag_value = target_json.path if target_json else target_triple.str, @@ -617,6 +619,7 @@ def _rust_toolchain_impl(ctx): _rename_first_party_crates = rename_first_party_crates, _third_party_dir = third_party_dir, _pipelined_compilation = pipelined_compilation, + _worker_pipelining = worker_pipelining, _experimental_link_std_dylib = _experimental_link_std_dylib(ctx), _experimental_use_cc_common_link = _experimental_use_cc_common_link(ctx), _experimental_use_global_allocator = experimental_use_global_allocator, @@ -892,6 +895,9 @@ rust_toolchain = rule( "This flag is only relevant when used together with --@rules_rust//rust/settings:toolchain_generated_sysroot." ), ), + "_worker_pipelining": attr.label( + default = Label("//rust/settings:experimental_worker_pipelining"), + ), }, toolchains = [ config_common.toolchain_type("@bazel_tools//tools/cpp:toolchain_type", mandatory = False), diff --git a/test/chained_direct_deps/mod1.rs b/test/chained_direct_deps/mod1.rs index a66f490afb..5963f0522d 100644 --- a/test/chained_direct_deps/mod1.rs +++ b/test/chained_direct_deps/mod1.rs @@ -1,6 +1,9 @@ pub fn world() -> String { "world".to_owned() } +pub fn hello() -> String { + "hello".to_owned() +} #[cfg(test)] mod test { @@ -8,4 +11,10 @@ mod test { fn test_world() { assert_eq!(super::world(), "world"); } + #[test] + fn test_hello() { + assert_eq!(super::world(), "world"); + + assert_eq!(super::hello(), "hello"); + } } diff --git a/test/unit/incremental/BUILD.bazel b/test/unit/incremental/BUILD.bazel new file mode 100644 index 0000000000..c3749d7525 --- /dev/null +++ b/test/unit/incremental/BUILD.bazel @@ -0,0 +1,5 @@ +load(":incremental_test_suite.bzl", "incremental_test_suite") + +incremental_test_suite( + name = "incremental_test_suite", +) diff --git a/test/unit/incremental/incremental_test_suite.bzl b/test/unit/incremental/incremental_test_suite.bzl new file mode 100644 index 0000000000..54869e1bbf --- /dev/null +++ b/test/unit/incremental/incremental_test_suite.bzl @@ -0,0 +1,139 @@ +"""Starlark tests for `//rust/settings:experimental_incremental`""" + +load("@bazel_skylib//lib:unittest.bzl", "analysistest") +load("@bazel_skylib//rules:write_file.bzl", "write_file") +load("//rust:defs.bzl", "rust_library", "rust_proc_macro") +load( + "//test/unit:common.bzl", + "assert_action_mnemonic", + "assert_argv_contains_prefix", + "assert_argv_contains_prefix_not", +) + +# Checks that -Cincremental flag is present in Rustc action +def _incremental_enabled_test_impl(ctx): + env = analysistest.begin(ctx) + target = analysistest.target_under_test(env) + + action = target.actions[0] + assert_action_mnemonic(env, action, "Rustc") + assert_argv_contains_prefix(env, action, "-Cincremental=") + + return analysistest.end(env) + +_incremental_enabled_test = analysistest.make( + _incremental_enabled_test_impl, + config_settings = { + str(Label("//rust/settings:experimental_incremental")): True, + }, +) + +# Checks that -Cincremental flag is absent by default +def _incremental_disabled_test_impl(ctx): + env = analysistest.begin(ctx) + target = analysistest.target_under_test(env) + + action = target.actions[0] + assert_action_mnemonic(env, action, "Rustc") + assert_argv_contains_prefix_not(env, action, "-Cincremental") + + return analysistest.end(env) + +_incremental_disabled_test = analysistest.make( + _incremental_disabled_test_impl, + config_settings = {}, +) + +# Checks that -Cincremental flag is NOT added for proc-macros even when enabled +def _incremental_proc_macro_test_impl(ctx): + env = analysistest.begin(ctx) + target = analysistest.target_under_test(env) + + action = target.actions[0] + assert_action_mnemonic(env, action, "Rustc") + assert_argv_contains_prefix_not(env, action, "-Cincremental") + + return analysistest.end(env) + +_incremental_proc_macro_test = analysistest.make( + _incremental_proc_macro_test_impl, + config_settings = { + str(Label("//rust/settings:experimental_incremental")): True, + }, +) + +# Checks the incremental cache path contains the crate name +def _incremental_cache_path_test_impl(ctx): + env = analysistest.begin(ctx) + target = analysistest.target_under_test(env) + + action = target.actions[0] + assert_action_mnemonic(env, action, "Rustc") + assert_argv_contains_prefix(env, action, "-Cincremental=/tmp/rules_rust_incremental/") + + return analysistest.end(env) + +_incremental_cache_path_test = analysistest.make( + _incremental_cache_path_test_impl, + config_settings = { + str(Label("//rust/settings:experimental_incremental")): True, + }, +) + +def incremental_test_suite(name): + """Entry-point macro called from the BUILD file. + + Args: + name (str): The name of the test suite. + """ + write_file( + name = "crate_lib", + out = "lib.rs", + content = [ + "#[allow(dead_code)]", + "fn add() {}", + "", + ], + ) + + rust_library( + name = "lib", + srcs = [":lib.rs"], + edition = "2021", + ) + + rust_proc_macro( + name = "proc_macro", + srcs = [":lib.rs"], + edition = "2021", + ) + + _incremental_enabled_test( + name = "incremental_enabled_test", + target_under_test = ":lib", + ) + + _incremental_disabled_test( + name = "incremental_disabled_test", + target_under_test = ":lib", + ) + + _incremental_proc_macro_test( + name = "incremental_proc_macro_test", + target_under_test = ":proc_macro", + ) + + _incremental_cache_path_test( + name = "incremental_cache_path_test", + target_under_test = ":lib", + ) + + native.test_suite( + name = name, + tests = [ + ":incremental_enabled_test", + ":incremental_disabled_test", + ":incremental_proc_macro_test", + ":incremental_cache_path_test", + ], + ) diff --git a/test/unit/pipelined_compilation/BUILD.bazel b/test/unit/pipelined_compilation/BUILD.bazel index 8d363e03ed..fb8d218afe 100644 --- a/test/unit/pipelined_compilation/BUILD.bazel +++ b/test/unit/pipelined_compilation/BUILD.bazel @@ -1,4 +1,20 @@ +load("@rules_shell//shell:sh_test.bzl", "sh_test") load(":pipelined_compilation_test.bzl", "pipelined_compilation_test_suite") ############################ UNIT TESTS ############################# pipelined_compilation_test_suite(name = "pipelined_compilation_test_suite") + +############################ STRACE TEST ############################# +sh_test( + name = "strace_rustc_post_metadata_test", + srcs = ["strace_rustc_post_metadata_test.sh"], + tags = [ + "local", + "manual", + "no-sandbox", + ], + target_compatible_with = select({ + "@platforms//os:linux": [], + "//conditions:default": ["@platforms//:incompatible"], + }), +) diff --git a/test/unit/pipelined_compilation/pipelined_compilation_test.bzl b/test/unit/pipelined_compilation/pipelined_compilation_test.bzl index 0f638c3ee6..d8c3354be4 100644 --- a/test/unit/pipelined_compilation/pipelined_compilation_test.bzl +++ b/test/unit/pipelined_compilation/pipelined_compilation_test.bzl @@ -9,6 +9,11 @@ ENABLE_PIPELINING = { str(Label("//rust/settings:pipelined_compilation")): True, } +ENABLE_WORKER_PIPELINING = { + str(Label("//rust/settings:pipelined_compilation")): True, + str(Label("//rust/settings:experimental_worker_pipelining")): True, +} + # TODO: Fix pipeline compilation on windows # https://github.com/bazelbuild/rules_rust/issues/3383 _NO_WINDOWS = select({ @@ -273,6 +278,128 @@ def _hollow_rlib_env_test_impl(ctx): hollow_rlib_env_test = analysistest.make(_hollow_rlib_env_test_impl, config_settings = ENABLE_PIPELINING) +def _worker_pipelining_second_lib_test_impl(ctx): + """Verify worker pipelining uses .rmeta output (not hollow rlib) for pipelined libs. + + With experimental_worker_pipelining enabled, both the metadata and full actions use + mnemonic "Rustc" (same mnemonic ensures they share the same worker process and + PipelineState). They are distinguished by their outputs: + - Metadata action: produces .rmeta file + - Full action: produces .rlib file + + The metadata action must: + - Produce a .rmeta file (not -hollow.rlib) — single rustc invocation, no -Zno-codegen + - NOT set RUSTC_BOOTSTRAP=1 (no unstable flags needed) + - Take first's .rmeta as input (not first's hollow rlib) + + The Rustc (full) action must: + - NOT set RUSTC_BOOTSTRAP=1 + - Also take first's .rmeta as input (same input set as metadata — no force_depend_on_objects) + """ + env = analysistest.begin(ctx) + tut = analysistest.target_under_test(env) + + # Both metadata and full actions share mnemonic "Rustc" with worker pipelining. + # Distinguish by output: metadata action outputs .rmeta; full action outputs .rlib. + rustc_actions = [act for act in tut.actions if act.mnemonic == "Rustc"] + metadata_actions = [ + act + for act in rustc_actions + if len([o for o in act.outputs.to_list() if o.path.endswith(".rmeta")]) > 0 + ] + rlib_actions = [ + act + for act in rustc_actions + if len([ + o + for o in act.outputs.to_list() + if o.path.endswith(".rlib") and not o.path.endswith("-hollow.rlib") + ]) > 0 + ] + asserts.true( + env, + len(metadata_actions) >= 1, + "expected a Rustc action with .rmeta output for worker pipelining metadata", + ) + asserts.true( + env, + len(rlib_actions) >= 1, + "expected a Rustc action with .rlib output", + ) + metadata_action = metadata_actions[0] + rlib_action = rlib_actions[0] + + # Metadata output must be .rmeta, not -hollow.rlib. + metadata_outputs = metadata_action.outputs.to_list() + rmeta_outputs = [o for o in metadata_outputs if o.path.endswith(".rmeta")] + hollow_outputs = [o for o in metadata_outputs if o.path.endswith("-hollow.rlib")] + asserts.true( + env, + len(rmeta_outputs) >= 1, + "expected .rmeta output for worker pipelining, got: " + str([o.path for o in metadata_outputs]), + ) + asserts.true( + env, + len(hollow_outputs) == 0, + "unexpected -hollow.rlib output (hollow rlib should not be used with worker pipelining): " + str([o.path for o in hollow_outputs]), + ) + + # Neither action should set RUSTC_BOOTSTRAP=1 (no -Zno-codegen needed). + asserts.equals( + env, + "", + metadata_action.env.get("RUSTC_BOOTSTRAP", ""), + "RUSTC_BOOTSTRAP must not be set with worker pipelining (no -Zno-codegen needed)", + ) + asserts.equals( + env, + "", + rlib_action.env.get("RUSTC_BOOTSTRAP", ""), + "RUSTC_BOOTSTRAP must not be set with worker pipelining", + ) + + # Both actions take first's .rmeta as input (not hollow rlib). + # Worker pipelining does not use force_depend_on_objects, so both actions + # use the same pipelined (rmeta) input set. + first_inputs_metadata = [i for i in metadata_action.inputs.to_list() if "libfirst" in i.path] + first_inputs_full = [i for i in rlib_action.inputs.to_list() if "libfirst" in i.path] + + asserts.true( + env, + len([i for i in first_inputs_metadata if i.path.endswith(".rmeta")]) >= 1, + "expected first's .rmeta in metadata action inputs, found: " + str([i.path for i in first_inputs_metadata]), + ) + asserts.true( + env, + len([i for i in first_inputs_metadata if i.path.endswith("-hollow.rlib")]) == 0, + "unexpected hollow rlib in metadata action inputs: " + str([i.path for i in first_inputs_metadata]), + ) + asserts.true( + env, + len([i for i in first_inputs_full if i.path.endswith(".rmeta")]) >= 1, + "expected first's .rmeta in full Rustc action inputs (no force_depend_on_objects), found: " + str([i.path for i in first_inputs_full]), + ) + asserts.true( + env, + len([i for i in first_inputs_full if i.path.endswith("-hollow.rlib")]) == 0, + "unexpected hollow rlib in full Rustc action inputs: " + str([i.path for i in first_inputs_full]), + ) + + return analysistest.end(env) + +worker_pipelining_second_lib_test = analysistest.make( + _worker_pipelining_second_lib_test_impl, + config_settings = ENABLE_WORKER_PIPELINING, +) + +def _worker_pipelining_test(): + worker_pipelining_second_lib_test( + name = "worker_pipelining_second_lib_test", + target_under_test = ":second", + target_compatible_with = _NO_WINDOWS, + ) + return [":worker_pipelining_second_lib_test"] + def _disable_pipelining_test(): rust_library( name = "lib", @@ -389,6 +516,7 @@ def pipelined_compilation_test_suite(name): """ tests = [] tests.extend(_pipelined_compilation_test()) + tests.extend(_worker_pipelining_test()) tests.extend(_disable_pipelining_test()) tests.extend(_custom_rule_test(generate_metadata = True, suffix = "_with_metadata")) tests.extend(_custom_rule_test(generate_metadata = False, suffix = "_without_metadata")) diff --git a/test/unit/pipelined_compilation/strace_rustc_post_metadata_test.sh b/test/unit/pipelined_compilation/strace_rustc_post_metadata_test.sh new file mode 100755 index 0000000000..9be1a04ad7 --- /dev/null +++ b/test/unit/pipelined_compilation/strace_rustc_post_metadata_test.sh @@ -0,0 +1,229 @@ +#!/usr/bin/env bash +# Regression test: rustc makes zero input file reads after emitting .rmeta. +# +# This is the critical invariant for worker-managed pipelining: after the +# metadata response is sent, background rustc must not read any sandbox inputs. +# Gate 0 investigation (project_gate0_strace_results.md) proved this holds on +# rustc 1.94.0. This test provides ongoing regression coverage. +# +# Tagged manual + no-sandbox + local; requires strace (Linux only). +set -euo pipefail + +RUSTC="${RUSTC:-rustc}" +STRACE="${STRACE:-strace}" + +# --------------------------------------------------------------------------- +# Locate tools +# --------------------------------------------------------------------------- +if ! command -v "$STRACE" &>/dev/null; then + echo "SKIP: strace not found (set STRACE= to override)" + exit 0 +fi +if ! command -v "$RUSTC" &>/dev/null; then + echo "SKIP: rustc not found (set RUSTC= to override)" + exit 0 +fi + +RUSTC_VERSION=$("$RUSTC" --version) +echo "Using rustc: $RUSTC_VERSION" +echo "Using strace: $("$STRACE" --version 2>&1 | head -1)" + +# --------------------------------------------------------------------------- +# Temp workspace +# --------------------------------------------------------------------------- +WORKDIR=$(mktemp -d) +trap 'rm -rf "$WORKDIR"' EXIT + +# dep crate +cat > "$WORKDIR/dep.rs" <<'EOF' +pub fn dep_fn() -> i32 { 42 } +EOF + +# included.txt for include_str! +cat > "$WORKDIR/included.txt" <<'EOF' +hello from include_str +EOF + +# main lib crate: depends on dep and uses include_str! +cat > "$WORKDIR/lib.rs" <<'EOF' +extern crate dep; + +const INCLUDED: &str = include_str!("included.txt"); + +pub fn answer() -> i32 { + let _ = INCLUDED; + dep::dep_fn() +} +EOF + +OUTDIR="$WORKDIR/out" +mkdir -p "$OUTDIR" + +# --------------------------------------------------------------------------- +# Step 1: compile dep.rs to get dep.rmeta (no strace needed) +# --------------------------------------------------------------------------- +"$RUSTC" \ + --edition 2021 \ + --crate-type lib \ + --crate-name dep \ + --emit=metadata,link \ + --out-dir "$OUTDIR" \ + "$WORKDIR/dep.rs" + +DEP_RMETA="$OUTDIR/libdep.rmeta" +if [[ ! -f "$DEP_RMETA" ]]; then + echo "FAIL: dep.rmeta not produced" + exit 1 +fi + +# --------------------------------------------------------------------------- +# Step 2: compile lib.rs under strace +# +# Rustc writes .rmeta to a temp dir (e.g. out/rmeta/full.rmeta) then +# renames it to libmylib.rmeta. We trace openat+read+close to capture all +# file I/O; the artifact JSON lines go to stderr separately. +# --------------------------------------------------------------------------- +STRACE_LOG="$WORKDIR/strace.log" + +"$STRACE" \ + -f \ + -e trace=openat,read,close \ + -o "$STRACE_LOG" \ + "$RUSTC" \ + --edition 2021 \ + --crate-type lib \ + --crate-name mylib \ + --emit=dep-info,metadata,link \ + --error-format=json \ + --json=artifacts \ + --extern "dep=$DEP_RMETA" \ + -L "$OUTDIR" \ + --out-dir "$OUTDIR" \ + "$WORKDIR/lib.rs" 2>/dev/null + +RMETA_OUT="$OUTDIR/libmylib.rmeta" +if [[ ! -f "$RMETA_OUT" ]]; then + echo "FAIL: libmylib.rmeta not produced" + exit 1 +fi + +# --------------------------------------------------------------------------- +# Step 3: find the .rmeta write boundary +# +# Rustc writes metadata to a temporary path like out/rmeta/full.rmeta +# using O_RDWR|O_CREAT before renaming it to libmylib.rmeta. This openat() +# is the earliest observable "metadata write started" event. +# +# We also accept the pattern of writing directly to a path ending in .rmeta +# with O_CREAT (in case rustc internals change). +# --------------------------------------------------------------------------- +# Pattern 1: temp rmeta dir (rmeta/full.rmeta or similar) with O_CREAT +BOUNDARY_LINE=$(grep -n "openat.*rmeta.*full\.rmeta.*O_.*CREAT\|openat.*full\.rmeta.*O_.*CREAT" "$STRACE_LOG" | head -1 | cut -d: -f1) + +# Pattern 2: fallback — any openat with O_WRONLY or O_CREAT for a path in OUTDIR +if [[ -z "$BOUNDARY_LINE" ]]; then + ESCAPED_OUTDIR=$(printf '%s\n' "$OUTDIR" | sed 's/[[\.*^$()+?{|]/\\&/g') + BOUNDARY_LINE=$(grep -n "openat.*${ESCAPED_OUTDIR}.*O_.*CREAT\|openat.*${ESCAPED_OUTDIR}.*O_WRONLY" "$STRACE_LOG" | head -1 | cut -d: -f1) +fi + +if [[ -z "$BOUNDARY_LINE" ]]; then + echo "FAIL: could not find .rmeta write openat() in strace log" + echo "--- strace log (openat lines) ---" + grep "openat" "$STRACE_LOG" | head -30 || true + exit 1 +fi + +echo "Boundary: strace line $BOUNDARY_LINE (first output-file write)" + +# Lines after the boundary (post-metadata I/O) +POST_LOG="$WORKDIR/post_boundary.log" +tail -n +"$((BOUNDARY_LINE + 1))" "$STRACE_LOG" > "$POST_LOG" + +# Lines before and including the boundary (pre-metadata I/O) +PRE_LOG="$WORKDIR/pre_boundary.log" +head -n "$BOUNDARY_LINE" "$STRACE_LOG" > "$PRE_LOG" + +# --------------------------------------------------------------------------- +# Step 4: assert zero input-file openat() reads after the boundary +# +# Input files to watch: lib.rs, dep.rs, included.txt, *.rmeta deps, *.rlib deps +# +# Exclusions (legitimate post-boundary opens): +# O_WRONLY / O_CREAT / O_RDWR — output writes +# ENOENT — probing for nonexistent files +# O_DIRECTORY — directory traversal +# /proc /sys /dev — kernel pseudo-files +# /home /rustup toolchain paths — rustc runtime libs (legitimate) +# --------------------------------------------------------------------------- +FAIL=0 +INPUT_PATTERNS=( + "lib\.rs" + "dep\.rs" + "included\.txt" + "libdep\.rmeta" + "libdep\.rlib" +) + +for pat in "${INPUT_PATTERNS[@]}"; do + BAD=$(grep -E "openat.*${pat}" "$POST_LOG" \ + | grep -vE "O_WRONLY|O_CREAT|O_RDWR|ENOENT|O_DIRECTORY" \ + | grep -vE "/proc/|/sys/|/dev/" \ + || true) + if [[ -n "$BAD" ]]; then + echo "FAIL: post-metadata read of input file matching '${pat}':" + echo "$BAD" + FAIL=1 + fi +done + +# Also flag any .so reads that look like proc-macro loads after the boundary +# (only flag .so files from OUTDIR or workdir — not system/toolchain .so) +ESCAPED_OUTDIR=$(printf '%s\n' "$OUTDIR" | sed 's/[[\.*^$()+?{|]/\\&/g') +ESCAPED_WORKDIR=$(printf '%s\n' "$WORKDIR" | sed 's/[[\.*^$()+?{|]/\\&/g') +BAD_SO=$(grep -E "openat.*(${ESCAPED_OUTDIR}|${ESCAPED_WORKDIR}).*\.so" "$POST_LOG" \ + | grep -vE "O_WRONLY|O_CREAT|O_RDWR|ENOENT|O_DIRECTORY" \ + || true) +if [[ -n "$BAD_SO" ]]; then + echo "FAIL: post-metadata openat() of .so in workdir/outdir (proc macro?) after boundary:" + echo "$BAD_SO" + FAIL=1 +fi + +# --------------------------------------------------------------------------- +# Step 5: assert all input FDs are closed before the boundary +# +# For each input file opened read-only before the boundary, find its FD +# (the return value of openat) and verify close($fd) appears before the +# boundary line. +# --------------------------------------------------------------------------- +while IFS= read -r line; do + # Extract the FD: last token after "= " on the line + fd=$(printf '%s' "$line" | grep -oE '= [0-9]+$' | grep -oE '[0-9]+' || true) + [[ -z "$fd" ]] && continue + + if ! grep -qE "close\($fd\)[[:space:]]*= 0" "$PRE_LOG"; then + echo "FAIL: FD $fd (opened for input) not closed before .rmeta write boundary" + echo " Opened by: $line" + FAIL=1 + fi +done < <(grep -E "openat.*(lib\.rs|dep\.rs|included\.txt|libdep\.rmeta|libdep\.rlib)" "$PRE_LOG" \ + | grep -vE "O_WRONLY|O_CREAT|O_RDWR|ENOENT|O_DIRECTORY" \ + | grep -E '= [0-9]+$' \ + || true) + +# --------------------------------------------------------------------------- +# Result +# --------------------------------------------------------------------------- +echo "" +echo "--- Summary ---" +echo "rustc version: $RUSTC_VERSION" +echo "strace boundary line: $BOUNDARY_LINE / $(wc -l < "$STRACE_LOG") total" +echo "post-boundary strace lines: $(wc -l < "$POST_LOG")" + +if [[ $FAIL -ne 0 ]]; then + echo "RESULT: FAIL" + exit 1 +fi + +echo "RESULT: PASS — zero input file reads after .rmeta emission, all input FDs closed before boundary" +exit 0 diff --git a/thoughts/shared/bench_cargo_progress.log b/thoughts/shared/bench_cargo_progress.log new file mode 100644 index 0000000000..157a1ef2ab --- /dev/null +++ b/thoughts/shared/bench_cargo_progress.log @@ -0,0 +1,7 @@ +[cargo-bench] === Iteration 1 / 3 === +[cargo-bench] cargo cold (incremental=on)... +[cargo-bench] cargo rebuild (incremental=on)... +[cargo-bench] cargo cold (incremental=off)... +[cargo-bench] cargo rebuild (incremental=off)... +[cargo-bench] === Iteration 2 / 3 === +[cargo-bench] cargo cold (incremental=on)... diff --git a/thoughts/shared/bench_cargo_raw.csv b/thoughts/shared/bench_cargo_raw.csv new file mode 100644 index 0000000000..a122afa4ec --- /dev/null +++ b/thoughts/shared/bench_cargo_raw.csv @@ -0,0 +1,17 @@ +iter,config,wall_ms,wall_s + Compiling asset_manager v0.1.0 (/var/mnt/dev/reactor-repo-2/helium/asset_manager) + Compiling zm_cli v0.1.0 (/var/mnt/dev/reactor-repo-2/zm_cli) + Finished `dev` profile [unoptimized + debuginfo] target(s) in 1m 21s +1,cargo-incr-cold,81229,81.2 + Compiling scenario_generator v0.1.0 (/var/mnt/dev/reactor-repo-2/lib/scenario_generator) + Compiling zm_cli v0.1.0 (/var/mnt/dev/reactor-repo-2/zm_cli) + Finished `dev` profile [unoptimized + debuginfo] target(s) in 22.18s +1,cargo-incr-rb,22424,22.4 + Compiling asset_manager v0.1.0 (/var/mnt/dev/reactor-repo-2/helium/asset_manager) + Compiling zm_cli v0.1.0 (/var/mnt/dev/reactor-repo-2/zm_cli) + Finished `dev` profile [unoptimized + debuginfo] target(s) in 1m 25s +1,cargo-no-incr-cold,85402,85.4 + Compiling scenario_generator v0.1.0 (/var/mnt/dev/reactor-repo-2/lib/scenario_generator) + Compiling zm_cli v0.1.0 (/var/mnt/dev/reactor-repo-2/zm_cli) + Finished `dev` profile [unoptimized + debuginfo] target(s) in 24.15s +1,cargo-no-incr-rb,24272,24.3 diff --git a/thoughts/shared/bench_multiplex_sandbox_overhead.sh b/thoughts/shared/bench_multiplex_sandbox_overhead.sh new file mode 100755 index 0000000000..d7668ae25e --- /dev/null +++ b/thoughts/shared/bench_multiplex_sandbox_overhead.sh @@ -0,0 +1,519 @@ +#!/usr/bin/env bash +# Benchmark multiplex-sandboxed worker pipelining across a worker-topology matrix. +# +# Default behavior follows thoughts/shared/plans/2026-03-25-consolidated-worker-pipelining-plan.md: +# - target: //sdk/sdk_builder:sdk_builder_lib +# - 1 warmup + 2 measured iterations +# - sweep worker_max_instances=1,2,4 +# - sweep worker_max_multiplex_instances=8,12,16 +# +# Outputs: +# - CSV on stdout +# - per-run Bazel logs/profiles under --output-dir +# +# Notes: +# - Successful pipelined builds delete per-pipeline pipeline.log files, so the benchmark prefers +# worker-owned _pw_state/metrics.log aggregates when available. Slot reuse metrics are derived +# from persistent stage-slot manifests. + +set -euo pipefail + +BAZEL="${BAZEL:-bazel}" +REPO="${REPO:-/var/mnt/dev/reactor-repo-2}" +TARGETS="${TARGETS:-//sdk/sdk_builder:sdk_builder_lib}" +ITERS="${ITERS:-2}" +WARMUPS="${WARMUPS:-1}" +MAX_INSTANCES="${MAX_INSTANCES:-1 2 4}" +MULTIPLEX_VALUES="${MULTIPLEX_VALUES:-8 12 16}" +OUTPUT_DIR="${OUTPUT_DIR:-/tmp/multiplex_sandbox_overhead_$(date +%Y%m%d_%H%M%S)}" +RUN_ID="$(date +%s)" + +usage() { + cat <<'EOF' +Usage: + bench_multiplex_sandbox_overhead.sh [options] + +Options: + --repo PATH Repo to benchmark (default: /var/mnt/dev/reactor-repo-2) + --targets "T1 T2" Space-separated Bazel targets + --iters N Measured iterations per config (default: 2) + --warmups N Warmup iterations per config (default: 1) + --max-instances "1 2 4" Sweep values for --worker_max_instances=Rustc= + --multiplex "8 12 16" Sweep values for --worker_max_multiplex_instances=Rustc= + --output-dir DIR Where logs/profiles/results are written + --help Show this help + +Environment variables with the same names are also honored. +EOF +} + +log() { + echo "[bench] $*" >&2 +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --repo) + REPO="$2" + shift 2 + ;; + --targets) + TARGETS="$2" + shift 2 + ;; + --iters) + ITERS="$2" + shift 2 + ;; + --warmups) + WARMUPS="$2" + shift 2 + ;; + --max-instances) + MAX_INSTANCES="$2" + shift 2 + ;; + --multiplex) + MULTIPLEX_VALUES="$2" + shift 2 + ;; + --output-dir) + OUTPUT_DIR="$2" + shift 2 + ;; + --help|-h) + usage + exit 0 + ;; + *) + echo "unknown argument: $1" >&2 + usage >&2 + exit 1 + ;; + esac +done + +mkdir -p "$OUTPUT_DIR" + +WORKER_PIPE_FLAGS=( + "--@rules_rust//rust/settings:pipelined_compilation=true" + "--@rules_rust//rust/settings:experimental_worker_pipelining=true" + "--experimental_worker_multiplex_sandboxing" + "--strategy=Rustc=worker,sandboxed" + "--strategy=RustcMetadata=worker,sandboxed" +) + +run_label() { + local target="$1" max_instances="$2" multiplex="$3" phase="$4" iter="$5" + local target_id + target_id=$(echo "$target" | tr '/:+' '_' | tr -s '_' | sed 's/^_//; s/_$//') + echo "${target_id}_mi${max_instances}_mx${multiplex}_${phase}${iter}" +} + +prepare_repo_state() { + local target="$1" + cd "$REPO" + "$BAZEL" shutdown >/dev/null 2>&1 || true + "$BAZEL" clean --expunge >/dev/null 2>&1 || true + # Recreate the Bazel server and discover fresh paths for this run. + EXECROOT=$("$BAZEL" info execution_root 2>/dev/null) + OUTPUT_BASE=$("$BAZEL" info output_base 2>/dev/null) + BAZEL_WORKERS_DIR="$OUTPUT_BASE/bazel-workers" + rm -rf "$BAZEL_WORKERS_DIR" "$EXECROOT/_pw_state" + mkdir -p "$BAZEL_WORKERS_DIR" + CURRENT_TARGET="$target" +} + +## PID sampling — runs pgrep in a background loop during builds to count +## distinct OS processes vs distinct Bazel worker directories. +PID_SAMPLE_FILE="" +PID_SAMPLER_PID="" + +start_pid_sampler() { + PID_SAMPLE_FILE="$OUTPUT_DIR/${1}_pids.txt" + : > "$PID_SAMPLE_FILE" + ( + while true; do + pgrep -f 'process_wrapper.*persistent_worker' >> "$PID_SAMPLE_FILE" 2>/dev/null || true + sleep 0.5 + done + ) & + PID_SAMPLER_PID=$! +} + +stop_pid_sampler() { + if [[ -n "$PID_SAMPLER_PID" ]]; then + kill "$PID_SAMPLER_PID" 2>/dev/null || true + wait "$PID_SAMPLER_PID" 2>/dev/null || true + PID_SAMPLER_PID="" + fi + if [[ -n "$PID_SAMPLE_FILE" && -f "$PID_SAMPLE_FILE" ]]; then + local distinct + distinct=$(sort -u "$PID_SAMPLE_FILE" | grep -c . || echo 0) + LAST_DISTINCT_PIDS="$distinct" + log "distinct_pids=$distinct (from pgrep sampling)" + else + LAST_DISTINCT_PIDS="" + fi +} + +build_with_profile() { + local target="$1" max_instances="$2" multiplex="$3" phase="$4" iter="$5" + local label log_file profile_file rust_cfg + label=$(run_label "$target" "$max_instances" "$multiplex" "$phase" "$iter") + log_file="$OUTPUT_DIR/${label}.log" + profile_file="$OUTPUT_DIR/${label}.profile.gz" + rust_cfg="bench_multiplex_${RUN_ID}_${label}" + + LAST_LABEL="$label" + LAST_LOG_FILE="$log_file" + LAST_PROFILE_FILE="$profile_file" + LAST_DISTINCT_PIDS="" + + log "run=$label target=$target" + start_pid_sampler "$label" + local start_ns end_ns + start_ns=$(date +%s%N) + ( + cd "$REPO" + "$BAZEL" build "$target" \ + "${WORKER_PIPE_FLAGS[@]}" \ + "--@rules_rust//rust/settings:extra_rustc_flag=--cfg=${rust_cfg}" \ + "--worker_max_instances=Rustc=${max_instances}" \ + "--worker_max_multiplex_instances=Rustc=${multiplex}" \ + "--profile=${profile_file}" + ) 2>&1 | tee "$log_file" >/dev/null + end_ns=$(date +%s%N) + stop_pid_sampler + LAST_WALL_MS=$(( (end_ns - start_ns) / 1000000 )) +} + +extract_basic_metrics() { + local log_file="$1" + python3 - "$log_file" "$LAST_WALL_MS" <<'PY' +import pathlib +import re +import sys + +log_path = pathlib.Path(sys.argv[1]) +wall_ms = sys.argv[2] +text = log_path.read_text(errors="replace") if log_path.exists() else "" + +def pick(pattern, default=""): + matches = re.findall(pattern, text, flags=re.MULTILINE) + return matches[-1] if matches else default + +crit_s = pick(r"Critical Path:\s*([0-9.]+)") +total_actions = pick(r"([0-9]+)\s+total actions") +worker_count = pick(r"([0-9]+)\s+worker\b") +sandbox_count = pick(r"([0-9]+)\s+linux-sandbox\b") +print("|".join([wall_ms, crit_s, total_actions, worker_count, sandbox_count])) +PY +} + +extract_profile_metrics() { + local profile_file="$1" + local raw summary + raw=$(cd /tmp && "$BAZEL" analyze-profile --dump=raw "$profile_file" 2>/dev/null || true) + summary=$(cd /tmp && "$BAZEL" analyze-profile "$profile_file" 2>/dev/null || true) + python3 - <<'PY' "$raw" "$summary" +import re +import sys + +raw = sys.argv[1] +summary = sys.argv[2] + +def parse_summary(text, key): + for line in text.splitlines(): + lower = line.lower() + if key not in lower: + continue + total = "" + count = "" + avg = "" + m_total = re.search(r"([0-9]+(?:\.[0-9]+)?)\s*s", line) + if m_total: + total = m_total.group(1) + m_count = re.search(r"([0-9]+)\s+(?:events|actions|spawns)", line, flags=re.I) + if m_count: + count = m_count.group(1) + m_avg = re.search(r"([0-9]+(?:\.[0-9]+)?)\s*ms", line, flags=re.I) + if m_avg: + avg = m_avg.group(1) + if total or count or avg: + return total, count, avg + return "", "", "" + +def parse_raw(text, key): + total_ms = 0.0 + count = 0 + for line in text.splitlines(): + lower = line.lower() + if key not in lower: + continue + count += 1 + m_ms = re.search(r"\b([0-9]+(?:\.[0-9]+)?)ms\b", lower) + if m_ms: + total_ms += float(m_ms.group(1)) + continue + m_us = re.search(r"\b([0-9]+(?:\.[0-9]+)?)us\b", lower) + if m_us: + total_ms += float(m_us.group(1)) / 1000.0 + continue + m_ns = re.search(r"\b([0-9]+(?:\.[0-9]+)?)ns\b", lower) + if m_ns: + total_ms += float(m_ns.group(1)) / 1_000_000.0 + continue + m_s = re.search(r"\b([0-9]+(?:\.[0-9]+)?)s\b", lower) + if m_s: + total_ms += float(m_s.group(1)) * 1000.0 + if count and total_ms: + return f"{total_ms / 1000.0:.3f}", str(count), f"{total_ms / count:.1f}" + return "", "", "" + +prep = parse_summary(summary, "worker_preparing") +work = parse_summary(summary, "worker_working") +if not any(prep): + prep = parse_raw(raw, "worker_preparing") +if not any(work): + work = parse_raw(raw, "worker_working") +if not any(prep): + prep = parse_summary(summary, "worker setup") +if not any(work): + work = parse_summary(summary, "worker working") + +print("|".join([ + prep[0], prep[1], prep[2], + work[0], work[1], work[2], +])) +PY +} + +extract_worker_fs_metrics() { + local workers_dir="$1" + python3 - "$workers_dir" <<'PY' +import json +import pathlib +import statistics +import sys +from collections import Counter + +workers_dir = pathlib.Path(sys.argv[1]) +distinct_workers = set() +reuse_values = [] +pipeline_logs = [] +metrics_logs = [] + +if workers_dir.exists(): + for path in workers_dir.rglob("*"): + if path.is_dir() and path.name.endswith("-workdir") and "Rustc" in path.name: + distinct_workers.add(str(path)) + for manifest in workers_dir.rglob("manifest.json"): + if "_pw_state/stage_pool/" not in manifest.as_posix(): + continue + try: + data = json.loads(manifest.read_text()) + except Exception: + continue + reuse_values.append(int(data.get("reuse_count", 0))) + for log in workers_dir.rglob("pipeline.log"): + if "/_pw_state/pipeline/" in log.as_posix(): + pipeline_logs.append(log) + for log in workers_dir.rglob("metrics.log"): + if "/_pw_state/metrics.log" in log.as_posix(): + metrics_logs.append(log) + +reuse_gt1 = sum(1 for value in reuse_values if value > 1) +max_reuse = max(reuse_values) if reuse_values else 0 +hist = Counter(reuse_values) +reuse_hist = ";".join(f"{k}:{hist[k]}" for k in sorted(hist)) if hist else "" + +setup_values = [] +stage_values = [] +stage_io_values = [] +declared_input_values = [] +metadata_actions = 0 +source_logs = metrics_logs if metrics_logs else pipeline_logs +for log in source_logs: + try: + lines = log.read_text(errors="replace").splitlines() + except Exception: + continue + for line in lines: + if not line.startswith("staging "): + continue + metadata_actions += 1 + setup = "" + stage = "" + stage_io = "" + declared_inputs = "" + for token in line.split(): + if token.startswith("total_setup_ms="): + setup = token.split("=", 1)[1] + elif token.startswith("diff_ms="): + stage = token.split("=", 1)[1] + elif token.startswith("inputs_ms="): + stage = token.split("=", 1)[1] + elif token.startswith("stage_io_ms="): + stage_io = token.split("=", 1)[1] + elif token.startswith("declared_inputs="): + declared_inputs = token.split("=", 1)[1] + if setup: + try: + setup_values.append(float(setup)) + except ValueError: + pass + if stage: + try: + stage_values.append(float(stage)) + except ValueError: + pass + if stage_io: + try: + stage_io_values.append(float(stage_io)) + except ValueError: + pass + if declared_inputs: + try: + declared_input_values.append(float(declared_inputs)) + except ValueError: + pass + +def fmt_avg(values): + if not values: + return "" + return f"{statistics.fmean(values):.1f}" + +def fmt_p90(values): + if not values: + return "" + ordered = sorted(values) + idx = max(0, min(len(ordered) - 1, int((len(ordered) - 1) * 0.9))) + return f"{ordered[idx]:.1f}" + +print("|".join([ + str(len(distinct_workers)), + str(metadata_actions if metadata_actions else ""), + fmt_avg(setup_values), + fmt_p90(setup_values), + fmt_avg(stage_values), + fmt_avg(stage_io_values), + fmt_avg(declared_input_values), + (f"{statistics.fmean(setup_values) / statistics.fmean(declared_input_values):.3f}" + if setup_values and declared_input_values and statistics.fmean(declared_input_values) > 0 + else ""), + str(reuse_gt1 if reuse_values else ""), + str(max_reuse if reuse_values else ""), + reuse_hist, + "metrics" if metrics_logs else ("pipeline" if pipeline_logs else "no"), +])) +PY +} + +emit_csv_row() { + local target="$1" max_instances="$2" multiplex="$3" iter="$4" phase="$5" + local basic profile fs + basic=$(extract_basic_metrics "$LAST_LOG_FILE") + IFS='|' read -r wall_ms crit_s total_actions worker_count sandbox_count <<<"$basic" + + profile=$(extract_profile_metrics "$LAST_PROFILE_FILE") + IFS='|' read -r worker_preparing_s worker_preparing_events worker_preparing_avg_ms \ + worker_working_s worker_working_events worker_working_avg_ms <<<"$profile" + + fs=$(extract_worker_fs_metrics "$BAZEL_WORKERS_DIR") + IFS='|' read -r distinct_workers metadata_actions avg_setup_ms p90_setup_ms avg_stage_ms \ + avg_stage_io_ms avg_declared_inputs avg_setup_per_input_ms slot_reuse_gt1 \ + max_reuse_count reuse_hist pipeline_logs_present <<<"$fs" + + local notes="" + if [[ "$pipeline_logs_present" == "no" ]]; then + notes="worker_metrics_unavailable" + elif [[ "$pipeline_logs_present" == "pipeline" ]]; then + notes="worker_metrics_fell_back_to_pipeline_logs" + fi + if [[ -n "$reuse_hist" ]]; then + if [[ -n "$notes" ]]; then + notes="${notes};" + fi + notes="${notes}reuse_hist=${reuse_hist}" + fi + if [[ -n "$worker_preparing_events" ]]; then + if [[ -n "$notes" ]]; then + notes="${notes};" + fi + notes="${notes}worker_preparing_events=${worker_preparing_events}" + fi + if [[ -n "$worker_working_events" ]]; then + if [[ -n "$notes" ]]; then + notes="${notes};" + fi + notes="${notes}worker_working_events=${worker_working_events}" + fi + + printf '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n' \ + "$target" \ + "$max_instances" \ + "$multiplex" \ + "$iter" \ + "$phase" \ + "$wall_ms" \ + "$crit_s" \ + "$worker_preparing_s" \ + "$worker_working_s" \ + "$distinct_workers" \ + "$LAST_DISTINCT_PIDS" \ + "$metadata_actions" \ + "$avg_setup_ms" \ + "$p90_setup_ms" \ + "$avg_stage_ms" \ + "$avg_stage_io_ms" \ + "$avg_declared_inputs" \ + "$avg_setup_per_input_ms" \ + "$slot_reuse_gt1" \ + "$max_reuse_count" \ + "$total_actions" \ + "$worker_count" \ + "$sandbox_count" \ + "$notes" +} + +RESULTS_CSV="$OUTPUT_DIR/results.csv" +CSV_HEADER="target,max_instances,multiplex,iter,phase,wall_ms,crit_s,worker_preparing_s,worker_working_s,distinct_workers,distinct_pids,metadata_actions,avg_setup_ms,p90_setup_ms,avg_stage_ms,avg_stage_io_ms,avg_declared_inputs,avg_setup_per_input_ms,slot_reuse_gt1,max_reuse_count,total_actions,worker_count,sandbox_count,notes" +echo "$CSV_HEADER" | tee "$RESULTS_CSV" + +for target in $TARGETS; do + for max_instances in $MAX_INSTANCES; do + for multiplex in $MULTIPLEX_VALUES; do + log "=== target=$target max_instances=$max_instances multiplex=$multiplex ===" + + for warmup_iter in $(seq 1 "$WARMUPS"); do + prepare_repo_state "$target" + build_with_profile "$target" "$max_instances" "$multiplex" "warmup" "$warmup_iter" + log "warmup completed: $LAST_LABEL" + done + + for iter in $(seq 1 "$ITERS"); do + prepare_repo_state "$target" + build_with_profile "$target" "$max_instances" "$multiplex" "measured" "$iter" + emit_csv_row "$target" "$max_instances" "$multiplex" "$iter" "measured" \ + | tee -a "$RESULTS_CSV" + done + done + done +done + +# Aggregate all PID sample files into a single distinct_pids.txt summary +DISTINCT_PIDS_FILE="$OUTPUT_DIR/distinct_pids.txt" +{ + echo "# Distinct PIDs observed across all benchmark runs" + echo "# Generated: $(date -Iseconds)" + for pid_file in "$OUTPUT_DIR"/*_pids.txt; do + [[ -f "$pid_file" ]] || continue + label=$(basename "$pid_file" _pids.txt) + distinct=$(sort -u "$pid_file" | grep -c . || echo 0) + echo "$label: $distinct distinct PIDs" + done +} > "$DISTINCT_PIDS_FILE" +log "distinct PID summary written to $DISTINCT_PIDS_FILE" + +log "results written to $RESULTS_CSV" diff --git a/thoughts/shared/bench_sdk.sh b/thoughts/shared/bench_sdk.sh new file mode 100755 index 0000000000..cdcf821fe3 --- /dev/null +++ b/thoughts/shared/bench_sdk.sh @@ -0,0 +1,229 @@ +#!/usr/bin/env bash +# Benchmark pipelining configurations against reactor-repo-2 //sdk +# +# Usage: +# ./bench_sdk.sh [ITERATIONS] +# +# Configs measured (cold builds — all Rust actions forced to cache-miss via --cfg): +# no-pipeline pipelined_compilation=false (baseline) +# hollow-rlib pipelined_compilation=true, no worker pipelining +# worker-pipe-nosand worker pipelining, no multiplex sandboxing +# worker-pipe worker pipelining, multiplex sandboxing +# worker-pipe+incr worker pipelining, multiplex sandboxing + incremental +# +# Configs measured (warm rebuilds — prime build then append a comment to lib/hash): +# *-rb variants of each cold config +# +# No separate warmup needed: the disk cache at ../bazel-disk-cache is assumed +# to be warm from prior development builds. C/build-script/proc-macro actions +# are exec-configuration and unaffected by --extra_rustc_flag (target-config +# only), so they stay cached across all benchmark runs. +# +# Output: CSV to stdout, progress to stderr. + +set -euo pipefail + +REPO="/var/mnt/dev/reactor-repo-2" +TARGET="//sdk" +ITERS="${1:-5}" +RUN_ID=$(date +%s) + +BAZEL="bazel" + +# First-party crate to touch for rebuild tests. +# lib/hash has ~27 first-party rdeps in the //sdk dependency graph. +TOUCH_FILE="$REPO/lib/hash/src/lib.rs" + +INCR_CACHE="/tmp/rules_rust_incremental" + +# ── Config flag arrays ──────────────────────────────────────────────────────── + +NO_PIPE_FLAGS=( + "--@rules_rust//rust/settings:pipelined_compilation=false" + "--@rules_rust//rust/settings:experimental_worker_pipelining=false" +) + +HOLLOW_RLIB_FLAGS=( + "--@rules_rust//rust/settings:pipelined_compilation=true" + "--@rules_rust//rust/settings:experimental_worker_pipelining=false" +) + +WORKER_PIPE_NOSAND_FLAGS=( + "--@rules_rust//rust/settings:pipelined_compilation=true" + "--@rules_rust//rust/settings:experimental_worker_pipelining=true" +) + +WORKER_PIPE_FLAGS=( + "--@rules_rust//rust/settings:pipelined_compilation=true" + "--@rules_rust//rust/settings:experimental_worker_pipelining=true" + "--experimental_worker_multiplex_sandboxing" + "--strategy=Rustc=worker,sandboxed" + "--strategy=RustcMetadata=worker,sandboxed" +) + +WORKER_PIPE_INCR_FLAGS=( + "--@rules_rust//rust/settings:pipelined_compilation=true" + "--@rules_rust//rust/settings:experimental_worker_pipelining=true" + "--@rules_rust//rust/settings:experimental_incremental=true" + "--experimental_worker_multiplex_sandboxing" + "--strategy=Rustc=worker,sandboxed" + "--strategy=RustcMetadata=worker,sandboxed" +) + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +log() { echo "[bench] $*" >&2; } + +cfg_flags() { + case "$1" in + no-pipeline) printf '%s\n' "${NO_PIPE_FLAGS[@]}" ;; + hollow-rlib) printf '%s\n' "${HOLLOW_RLIB_FLAGS[@]}" ;; + worker-pipe-nosand) printf '%s\n' "${WORKER_PIPE_NOSAND_FLAGS[@]}" ;; + worker-pipe) printf '%s\n' "${WORKER_PIPE_FLAGS[@]}" ;; + worker-pipe+incr) printf '%s\n' "${WORKER_PIPE_INCR_FLAGS[@]}" ;; + *) echo "unknown config: $1" >&2; exit 1 ;; + esac +} + +# cfg_to_id CFG → safe Rust identifier (no hyphens or plus signs) +cfg_to_id() { + local s="${1//-/_}" # hyphens → underscores + s="${s//+/p}" # plus → p + echo "$s" +} + +# timed_build LABEL CFG_RUSTFLAG [extra_flags...] +# Runs `bazel build $TARGET` and emits one CSV data row (no newline prefix): +# wall_ms,wall_s,crit_s,total_actions,worker_count,sandbox_count +# Returns 1 on build failure. +timed_build() { + local label="$1" cfg_flag="$2" + shift 2 + local extra_flags=("$@") + local outfile + outfile=$(mktemp /tmp/bazel_bench_XXXXXX.txt) + + local start_ns end_ns + start_ns=$(date +%s%N) + + cd "$REPO" + if $BAZEL build "$TARGET" \ + --@rules_rust//rust/settings:extra_rustc_flag="--cfg=${cfg_flag}" \ + "${extra_flags[@]}" \ + 2>&1 | tee "$outfile" >/dev/null; then + : + else + log "ERROR: build failed (label=$label cfg=$cfg_flag)" + cat "$outfile" | grep -E 'ERROR:|FAILED' | head -5 >&2 || true + rm -f "$outfile" + return 1 + fi + + end_ns=$(date +%s%N) + local wall_ms=$(( (end_ns - start_ns) / 1000000 )) + + local crit_s total_actions workers sandboxes + crit_s=$(grep -oP 'Critical Path: \K[\d.]+' "$outfile" | tail -1 || echo "0") + total_actions=$(grep -oP '\K\d+(?= total actions)' "$outfile" | tail -1 || echo "0") + workers=$(grep -oP '\K\d+(?= worker)' "$outfile" | head -1 || echo "0") + sandboxes=$(grep -oP '\K\d+(?= linux-sandbox)' "$outfile" | head -1 || echo "0") + rm -f "$outfile" + + local wall_s + wall_s=$(awk "BEGIN{printf \"%.1f\", $wall_ms/1000}") + echo "$wall_ms,$wall_s,$crit_s,$total_actions,$workers,$sandboxes" +} + +# ── CSV header ──────────────────────────────────────────────────────────────── + +echo "iter,config,wall_ms,wall_s,crit_s,total_actions,worker_count,sandbox_count" + +# ── Main loop ───────────────────────────────────────────────────────────────── + +for iter in $(seq 1 "$ITERS"); do + log "=== Iteration $iter / $ITERS ===" + + # ── Cold builds ────────────────────────────────────────────────────────── + # Each cold build: + # - Shuts down the Bazel server (clears in-memory action cache, stops workers) + # - Clears the incremental rustc cache so no prior state exists + # - Uses a unique --cfg key (iter + run_id) to force all target-config Rust + # actions to be disk-cache misses; exec-config actions (C, build scripts, + # proc-macros) are unaffected and stay cached + + rm -rf "$INCR_CACHE" + + for cfg in no-pipeline hollow-rlib worker-pipe-nosand worker-pipe worker-pipe+incr; do + log " [cold] $cfg" + $BAZEL shutdown >/dev/null 2>&1 || true + + mapfile -t flags < <(cfg_flags "$cfg") + id=$(cfg_to_id "$cfg") + cfg_flag="bench_cold_${id}_i${iter}_r${RUN_ID}" + + if row=$(timed_build "cold/$cfg" "$cfg_flag" "${flags[@]}"); then + echo "$iter,$cfg,$row" + else + echo "$iter,$cfg,FAILED,,,," + fi + done + + # ── Warm rebuilds ──────────────────────────────────────────────────────── + # For each config: + # 1. Shutdown (do NOT clear incremental cache — the stable prime key's + # incremental state written in iter 1 must persist for iter 2+, because + # in iter 2+ the prime hits the Bazel disk cache and rustc doesn't run) + # 2. Prime build with a stable --cfg (iter 1: full Rust build; iter 2+: + # all disk-cache hits, incremental state from iter 1 persists on disk) + # 3. Append a comment to lib/hash/src/lib.rs to change its content digest + # 4. Rebuild immediately (no server shutdown — Bazel in-memory cache intact, + # only lib/hash and its ~27 rdeps are re-run; incremental state valid + # because the stable key hasn't changed between prime runs) + # 5. Revert file via git + + for cfg in no-pipeline hollow-rlib worker-pipe-nosand worker-pipe worker-pipe+incr; do + rb_cfg="${cfg}-rb" + log " [rebuild] $rb_cfg" + + $BAZEL shutdown >/dev/null 2>&1 || true + # Do NOT rm -rf $INCR_CACHE here: incremental state for the stable prime + # key must survive across iterations. The cold builds above use unique + # --cfg keys and write to different incremental session subdirs, so they + # don't interfere with the stable prime key's incremental state. + + mapfile -t flags < <(cfg_flags "$cfg") + id=$(cfg_to_id "$cfg") + # stable key: same across iterations so the prime hits disk cache on iter 2+ + prime_flag="bench_prime_${id}" + + log " priming..." + cd "$REPO" + if ! $BAZEL build "$TARGET" \ + --@rules_rust//rust/settings:extra_rustc_flag="--cfg=${prime_flag}" \ + "${flags[@]}" \ + 2>&1 | tail -2 >&2; then + log " prime FAILED, skipping rebuild" + echo "$iter,$rb_cfg,FAILED,,,," + continue + fi + + # Modify lib/hash to change its content digest + echo "// bench-rebuild-${RUN_ID}-i${iter}" >> "$TOUCH_FILE" + log " modified $TOUCH_FILE" + + # Rebuild (no shutdown — in-memory cache preserved) + if row=$(timed_build "rebuild/$cfg" "$prime_flag" "${flags[@]}"); then + echo "$iter,$rb_cfg,$row" + else + echo "$iter,$rb_cfg,FAILED,,,," + fi + + # Revert + cd "$REPO" && git checkout -- "$TOUCH_FILE" + log " reverted $TOUCH_FILE" + done + + log " iteration $iter done." +done + +log "Benchmark complete." diff --git a/thoughts/shared/bench_sdk_analysis.md b/thoughts/shared/bench_sdk_analysis.md new file mode 100644 index 0000000000..9b3a972f2b --- /dev/null +++ b/thoughts/shared/bench_sdk_analysis.md @@ -0,0 +1,243 @@ +# SDK Benchmark Analysis: Worker Pipelining with Multiplex Sandboxing + +**Date:** 2026-03-09 +**Target:** `reactor-repo-2 //sdk` (73 first-party Rust libraries, ~165 total) +**Machine:** 16 jobs, Linux 6.17.7, x86_64 +**Bazel:** 9.0.0 +**Script:** `thoughts/shared/bench_sdk.sh`, 5 iterations +**Previous benchmark:** 2026-03-06 (Bazel 8.4.2, no multiplex sandboxing) + +## Methodology + +Three cold-build configs and three warm-rebuild configs were measured: + +| Config | Flags | +|---|---| +| `no-pipeline` | `pipelined_compilation=false` (baseline) | +| `worker-pipe` | `experimental_worker_pipelining=true`, `--experimental_worker_multiplex_sandboxing`, `--strategy=Rustc=worker,sandboxed` | +| `worker-pipe+incr` | same as worker-pipe + `experimental_incremental=true` | +| `*-rb` | corresponding rebuild: prime build → append comment to `lib/hash/src/lib.rs` → rebuild | + +**Key change from previous benchmark:** This run uses `--experimental_worker_multiplex_sandboxing` +(per-request sandbox isolation within the multiplex worker) and `worker,sandboxed` fallback +strategy. The previous benchmark used unsandboxed multiplex workers with `worker,local` fallback. + +**Forcing Rust cache misses:** each cold build uses a unique `--extra_rustc_flag=--cfg=bench__i_r`. This is a target-config flag only; exec-config actions (C/CC, build scripts, proc-macros) stay disk-cached across all runs. + +**Note on iteration 1:** Iteration 1 cold builds show higher variance (157.7s no-pipeline, 126.9s worker-pipe) because some disk-cache entries were not yet populated. By iteration 2 all non-Rust actions are cached. Stable means below use iterations 2–5. + +**Note on incremental rebuild validity:** Only iteration 1's `worker-pipe+incr-rb` (4.3s) reflects true warm-incremental performance. In iterations 2+, the rebuild prime hits the Bazel disk cache (rustc doesn't run), so no incremental state gets written. The iters 2–5 `worker-pipe+incr-rb` results (~21.4s) are effectively cold-incremental rebuilds. + +--- + +## Raw Data + +``` +iter,config,wall_ms,wall_s,crit_s,total_actions,worker_count,sandbox_count +1,no-pipeline,157741,157.7,106.63,1086,0,1043 +1,worker-pipe,126874,126.9,72.77,1661,1150,15 +1,worker-pipe+incr,97381,97.4,78.77,1167,1165,0 +1,no-pipeline-rb,26438,26.4,25.87,106,0,105 +1,worker-pipe-rb,27624,27.6,26.95,174,106,67 +1,worker-pipe+incr-rb,4310,4.3,3.84,64,7,0 +2,no-pipeline,86856,86.9,71.85,1087,0,590 +2,worker-pipe,79841,79.8,52.59,1676,1150,15 +2,worker-pipe+incr,109799,109.8,82.11,1167,1165,0 +2,no-pipeline-rb,28022,28.0,27.49,106,0,105 +2,worker-pipe-rb,29418,29.4,28.83,174,106,67 +2,worker-pipe+incr-rb,21176,21.2,20.59,174,117,0 +3,no-pipeline,86055,86.1,72.46,1087,0,590 +3,worker-pipe,87662,87.7,53.41,1676,1150,15 +3,worker-pipe+incr,109580,109.6,82.51,1167,1165,0 +3,no-pipeline-rb,28596,28.6,28.05,106,0,105 +3,worker-pipe-rb,29962,30.0,29.38,174,106,67 +3,worker-pipe+incr-rb,21503,21.5,21.01,174,117,0 +4,no-pipeline,87759,87.8,71.19,1087,0,590 +4,worker-pipe,85072,85.1,55.23,1676,1150,15 +4,worker-pipe+incr,110241,110.2,82.63,1167,1165,0 +4,no-pipeline-rb,28258,28.3,27.69,106,0,105 +4,worker-pipe-rb,28717,28.7,28.20,174,106,67 +4,worker-pipe+incr-rb,21360,21.4,20.85,174,117,0 +5,no-pipeline,86292,86.3,70.73,1087,0,590 +5,worker-pipe,86607,86.6,53.24,1676,1150,15 +5,worker-pipe+incr,106365,106.4,80.83,1167,1165,0 +5,no-pipeline-rb,28515,28.5,27.96,106,0,105 +5,worker-pipe-rb,28888,28.9,28.05,174,106,67 +5,worker-pipe+incr-rb,21467,21.5,20.93,174,117,0 +``` + +--- + +## Cold Build Summary (iters 2–5, stable) + +| Config | Mean wall (s) | Mean crit path (s) | Overhead (wall - crit) | vs no-pipeline | Actions | Workers | Sandbox | +|---|---|---|---|---|---|---|---| +| `no-pipeline` | 86.8 | 71.6 | 15.2s | — | ~1087 | 0 | 590 | +| `worker-pipe` | 84.8 | 53.6 | 31.2s | **1.02× faster** | ~1676 | 1150 | 15 | +| `worker-pipe+incr` | 109.0 | 82.0 | 27.0s | 0.80× (26% slower) | ~1167 | 1165 | 0 | + +## Warm Rebuild Summary + +| Config | Iter 1 (s) | Mean iters 2–5 (s) | Actions | Workers | +|---|---|---|---|---| +| `no-pipeline-rb` | 26.4 | 28.4 | 106 | 0 | +| `worker-pipe-rb` | 27.6 | 29.2 | 174 | 106 | +| `worker-pipe+incr-rb` | **4.3** | 21.4 | 64/174 | 7/117 | + +--- + +## Analysis + +### Multiplex sandboxing eliminates the wall-time benefit of worker pipelining + +The headline finding: **worker-pipe is only 2.3% faster than no-pipeline** (84.8s vs 86.8s). The +critical path is 25% shorter (53.6s vs 71.6s), confirming that pipelining works — downstream crates +start earlier. But the Bazel overhead doubles: 31.2s for worker-pipe vs 15.2s for no-pipeline. + +The overhead comes from `--experimental_worker_multiplex_sandboxing`, which creates a per-request +sandbox directory, stages inputs via hardlinks/symlinks into a worker-owned execroot +(`_pw_state/pipeline//`), and copies outputs back after completion. With 1150 worker requests, +this I/O adds up. + +### Comparison with previous unsandboxed benchmark (2026-03-06, Bazel 8.4.2) + +| Config | Old wall (s) | New wall (s) | Old overhead | New overhead | +|---|---|---|---|---| +| `no-pipeline` | 102.7 | 86.8 | 20.5s | 15.2s | +| `worker-pipe` | 63.5 | 84.8 | 20.9s | 31.2s | +| `worker-pipe+incr` | 100.8 | 109.0 | 19.0s | 27.0s | + +The no-pipeline baseline improved 15.5% (Bazel 9 vs 8.4.2). Worker-pipe regressed 33.5% due to +sandboxing overhead. The old unsandboxed worker-pipe was **1.62× faster** than its baseline; +the new sandboxed version is only **1.02× faster**. + +### Incremental rebuild remains the star + +`worker-pipe+incr-rb` iteration 1 at **4.3s** (vs 13.8s in the old benchmark) is a 3.2× improvement. +This is the CGU fix (`-Ccodegen-units=16`) from the previous analysis working as expected: incremental +codegen with 16 CGUs instead of 256 eliminates the overhead that was masking the incremental benefit. + +### Rebuild pipelining shows no benefit with sandboxing + +`worker-pipe-rb` (29.2s) is slightly slower than `no-pipeline-rb` (28.4s). For small rebuilds +(~27 crates), the sandboxing overhead per-action dominates any pipelining benefit. Without +sandboxing, the old benchmark showed worker-pipe-rb at 27.9s vs no-pipeline-rb at 30.4s (8% faster). + +--- + +## Recommendations + +### Do NOT enable `--experimental_worker_multiplex_sandboxing` for performance + +The per-request sandboxing overhead (input staging, output copying) negates the pipelining speedup. +Worker pipelining's critical-path reduction only translates to wall-time improvement when the +per-action overhead is low — which unsandboxed multiplex workers achieve but sandboxed ones do not. + +### Updated strategy recommendation + +| Use case | Recommended config | +|---|---| +| CI / performance-sensitive builds | `worker-pipe` **without** `--experimental_worker_multiplex_sandboxing` | +| Hermetic / security-sensitive builds | `worker-pipe` **with** `--experimental_worker_multiplex_sandboxing` (accept ~30% overhead) | +| Local development (frequent rebuilds) | `worker-pipe` + `experimental_incremental=true` (without multiplex sandboxing) | + +On Bazel 9+, no `--strategy` flags are needed — Bazel auto-selects the multiplex worker from +`supports-multiplex-workers` in exec_reqs. Use `--strategy=Rustc=worker,sandboxed` on Bazel 8 +as fallback (the `sandboxed` fallback is only used when the worker strategy is unavailable, which +is not the sandboxing-overhead case measured here). + +### Future optimization opportunities + +The sandboxing overhead is dominated by input staging (hardlinking/symlinking ~1000+ files per +request into `_pw_state/pipeline//execroot/`). Potential improvements: +1. **Shared read-only layer:** Instead of staging inputs per-request, use a single symlink tree + updated incrementally, with per-request output directories only. +2. **Lazy input resolution:** Only stage inputs that rustc actually reads (use strace/seccomp to + detect), rather than all declared inputs. +3. **Skip staging for non-pipelined requests:** Only pipeline requests (metadata+full pairs) need + persistent execroots. Non-pipelined requests could use `current_dir(sandbox_dir)` directly. + +--- + +## Benchmark Improvements Applied + +Compared to the 2026-03-06 benchmark: +1. **Bazel 9.0.0** (was Bazel 8.4.2) +2. **Multiplex sandboxing enabled** (`--experimental_worker_multiplex_sandboxing`) +3. **Sandboxed fallback** (`worker,sandboxed` instead of `worker,local`) +4. **RustcMetadata strategy** added (`--strategy=RustcMetadata=worker,sandboxed`) +5. **CGU fix** (`-Ccodegen-units=16` with incremental) from prior analysis + +## Remaining improvements needed + +The `worker-pipe+incr-rb` result is only valid for iteration 1 (same issue as previous benchmark). +Fix the benchmark script to avoid clearing incremental cache between rebuild primes. + +--- + +## 2026-03-10 Focused Follow-up: `//zm_cli:zm_cli_lib` + +To separate SDK-level cache effects from actual worker sandbox cost, a colder Rust-heavy target +in the `//sdk` graph was measured with remote cache hits disabled: + +| Config | Wall (s) | Crit path (s) | Actions | Workers | Sandbox | +|---|---|---|---|---|---| +| `worker-pipe` | 69.6 | 43.34 | 746 | 736 | 9 | +| `no-pipeline` | 115.0 | 75.91 | 927 | 0 | 912 | + +For the `worker-pipe` run, pipeline logs from 280 metadata actions show: + +- `stage_ms`: 55.4s total, 197.9ms average per action, p90 392ms, p99 1083ms +- `setup_ms`: 56.1s total, effectively identical to `stage_ms` +- metadata output materialization: 29ms total, all via hardlinks, zero copies +- staged inputs: 295,571 declared inputs, all preserved as symlinks, zero file copies + +This narrows the remaining problem: + +- On a real cold Rust-heavy target, sandboxed worker pipelining still clearly helps. +- The dominant residual overhead is staged-input setup, even when it is entirely symlink-based. +- Output copying is not the bottleneck for the safe path. + +So the next safe optimization direction is staged-execroot reuse or narrower input staging, not +further output materialization work. + +--- + +## 2026-03-10 Profile Follow-up: `//sdk/sdk_builder:sdk_builder_lib` + +To check whether the `//sdk` slowdown was mostly top-level packaging or a broader graph issue, I +profiled `//sdk/sdk_builder:sdk_builder_lib` with Bazel profiles in both modes. + +Profiled comparison: + +| Config | Wall (s) | Crit path (s) | Processes | +|---|---|---|---| +| `no-pipeline` | 19.4 | 12.25 | 231 `linux-sandbox` | +| `worker-pipe` | 23.0 | 14.03 | 228 `worker`, 4 `linux-sandbox` | + +Key findings: + +- The slowdown is **not** primarily a top-level packaging/linking issue. In both profiles, the + critical path ends in Rust compilation work, not a final binary/packaging action. +- The worker profile shows a large new bucket in worker setup: + - `worker_preparing`: 91.8s total across 199 events, 461ms average + - `worker_working`: 71.6s total across 126 events, 568ms average +- `ACTION_EXECUTION` grows from 116.1s to 191.2s summed across threads, while analysis/load phases + only increase modestly. +- The process_wrapper pipeline logs for the same run show setup time is almost entirely input + staging: + - 226 metadata actions + - `stage_ms`: 29.9s total, 132ms average, p90 238ms + - `setup_ms`: 30.4s total, 134ms average + - sandbox symlink seeding: 4ms total + - worker entry seeding: 35ms total + - output materialization remains negligible + +Interpretation: + +- `sdk_builder_lib` is a genuine mixed-graph regression case for sandboxed `worker-pipe`, not a + misleading artifact of the final `//sdk` action. +- The dominant loss is still pre-execution setup in the pipelined worker path, especially staged + input construction. +- On this target, pipelining does not recover that cost through a shorter critical path; the + critical path is slightly worse under sandboxed `worker-pipe`. diff --git a/thoughts/shared/bench_sdk_progress.log b/thoughts/shared/bench_sdk_progress.log new file mode 100644 index 0000000000..3e4cc31eaa --- /dev/null +++ b/thoughts/shared/bench_sdk_progress.log @@ -0,0 +1,153 @@ +[bench] === Iteration 1 / 5 === +[bench] [cold] no-pipeline +[bench] [cold] worker-pipe +[bench] [cold] worker-pipe+incr +[bench] [rebuild] no-pipeline-rb +[bench] priming... +INFO: 1087 processes: 3691 action cache hit, 453 disk cache hit, 43 internal, 590 linux-sandbox, 1 local. +INFO: Build completed successfully, 1087 total actions +[bench] modified /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] reverted /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] [rebuild] worker-pipe-rb +[bench] priming... +INFO: 1661 processes: 4024 action cache hit, 453 disk cache hit, 43 internal, 15 linux-sandbox, 1150 worker. +INFO: Build completed successfully, 1661 total actions +[bench] modified /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] reverted /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] [rebuild] worker-pipe+incr-rb +[bench] priming... +INFO: 1223 processes: 4462 action cache hit, 56 disk cache hit, 1 internal, 1 local, 1165 worker. +INFO: Build completed successfully, 1223 total actions +[bench] modified /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] reverted /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] iteration 1 done. +[bench] === Iteration 2 / 5 === +[bench] [cold] no-pipeline +[bench] [cold] worker-pipe +[bench] [cold] worker-pipe+incr +[bench] [rebuild] no-pipeline-rb +[bench] priming... +INFO: 1087 processes: 3691 action cache hit, 1043 disk cache hit, 43 internal, 1 local. +INFO: Build completed successfully, 1087 total actions +[bench] modified /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] reverted /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] [rebuild] worker-pipe-rb +[bench] priming... +INFO: 1661 processes: 4024 action cache hit, 1618 disk cache hit, 43 internal. +INFO: Build completed successfully, 1661 total actions +[bench] modified /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] reverted /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] [rebuild] worker-pipe+incr-rb +[bench] priming... +INFO: 1223 processes: 4462 action cache hit, 1221 disk cache hit, 1 internal, 1 local. +INFO: Build completed successfully, 1223 total actions +[bench] modified /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] reverted /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] iteration 2 done. +[bench] === Iteration 3 / 5 === +[bench] [cold] no-pipeline +[bench] [cold] worker-pipe +[bench] [cold] worker-pipe+incr +[bench] [rebuild] no-pipeline-rb +[bench] priming... +INFO: 1087 processes: 3691 action cache hit, 1043 disk cache hit, 43 internal, 1 local. +INFO: Build completed successfully, 1087 total actions +[bench] modified /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] reverted /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] [rebuild] worker-pipe-rb +[bench] priming... +INFO: 1661 processes: 4024 action cache hit, 1618 disk cache hit, 43 internal. +INFO: Build completed successfully, 1661 total actions +[bench] modified /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] reverted /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] [rebuild] worker-pipe+incr-rb +[bench] priming... +INFO: 1223 processes: 4462 action cache hit, 1221 disk cache hit, 1 internal, 1 local. +INFO: Build completed successfully, 1223 total actions +[bench] modified /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] reverted /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] iteration 3 done. +[bench] === Iteration 4 / 5 === +[bench] [cold] no-pipeline +[bench] [cold] worker-pipe +[bench] [cold] worker-pipe+incr +[bench] [rebuild] no-pipeline-rb +[bench] priming... +INFO: 1087 processes: 3691 action cache hit, 1043 disk cache hit, 43 internal, 1 local. +INFO: Build completed successfully, 1087 total actions +[bench] modified /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] reverted /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] [rebuild] worker-pipe-rb +[bench] priming... +INFO: 1661 processes: 4024 action cache hit, 1618 disk cache hit, 43 internal. +INFO: Build completed successfully, 1661 total actions +[bench] modified /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] reverted /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] [rebuild] worker-pipe+incr-rb +[bench] priming... +INFO: 1223 processes: 4462 action cache hit, 1221 disk cache hit, 1 internal, 1 local. +INFO: Build completed successfully, 1223 total actions +[bench] modified /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] reverted /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] iteration 4 done. +[bench] === Iteration 5 / 5 === +[bench] [cold] no-pipeline +[bench] [cold] worker-pipe +[bench] [cold] worker-pipe+incr +[bench] [rebuild] no-pipeline-rb +[bench] priming... +INFO: 1087 processes: 3691 action cache hit, 1043 disk cache hit, 43 internal, 1 local. +INFO: Build completed successfully, 1087 total actions +[bench] modified /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] reverted /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] [rebuild] worker-pipe-rb +[bench] priming... +INFO: 1661 processes: 4024 action cache hit, 1618 disk cache hit, 43 internal. +INFO: Build completed successfully, 1661 total actions +[bench] modified /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] reverted /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] [rebuild] worker-pipe+incr-rb +[bench] priming... +INFO: 1223 processes: 4462 action cache hit, 1221 disk cache hit, 1 internal, 1 local. +INFO: Build completed successfully, 1223 total actions +[bench] modified /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] reverted /var/mnt/dev/reactor-repo-2/lib/hash/src/lib.rs +[bench] iteration 5 done. +[bench] Benchmark complete. + +[2026-03-10] Focused cold instrumentation run on //zm_cli:zm_cli_lib with remote cache hits disabled + +- worker-pipe: + - command shape: `bazel build //zm_cli:zm_cli_lib --remote_accept_cached=false --remote_upload_local_results=false --@rules_rust//rust/settings:pipelined_compilation=true --@rules_rust//rust/settings:experimental_worker_pipelining=true --experimental_worker_multiplex_sandboxing --strategy=Rustc=worker,sandboxed --strategy=RustcMetadata=worker,sandboxed --@rules_rust//rust/settings:extra_rustc_flag=--cfg=bench_zmcli_instr_workerpipe` + - result: `wall_ms=69589`, `crit_s=43.34`, `total_actions=746`, `worker_count=736`, `sandbox_count=9` +- no-pipeline: + - command shape: `bazel build //zm_cli:zm_cli_lib --remote_accept_cached=false --remote_upload_local_results=false --@rules_rust//rust/settings:pipelined_compilation=false --@rules_rust//rust/settings:experimental_worker_pipelining=false --@rules_rust//rust/settings:extra_rustc_flag=--cfg=bench_zmcli_instr_nopipe` + - result: `wall_ms=115045`, `crit_s=75.91`, `total_actions=927`, `worker_count=0`, `sandbox_count=912` + +Pipeline log aggregation for the same worker-pipe run (280 metadata actions): + +- `stage_ms`: total `55408`, avg `197.89`, p50 `139`, p90 `392`, p99 `1083`, max `1233` +- `setup_ms`: total `56093`, avg `200.33`, p50 `142`, p90 `396`, p99 `1086`, max `1237` +- metadata materialization: total `29ms` across `280` files, all hardlinked, zero copies +- staged inputs: `295571` declared inputs, all preserved as symlinks, zero hardlinks/copies + +Interpretation: + +- On this colder Rust-heavy target, sandboxed worker pipelining is still decisively faster than no-pipeline. +- The remaining sandbox overhead is almost entirely staged-input setup time; output materialization is negligible. +- The next optimization target should stay on `stage_request_inputs()` and staged-execroot reuse, not output copying. + +[2026-03-10] Additional cold no-cache matrix for //sdk dependency targets + +- `//sdk/sdk_builder:sdk_builder_lib` + - `no-pipeline`: `wall_ms=56048`, `crit_s=41.46`, `total_actions=449`, `worker_count=0`, `sandbox_count=445` + - `worker-pipe`: `wall_ms=60284`, `crit_s=41.45`, `total_actions=554`, `worker_count=228`, `sandbox_count=323` +- `//helium/asset_manager:asset_manager` + - `no-pipeline`: `wall_ms=58797`, `crit_s=37.20`, `total_actions=641`, `worker_count=0`, `sandbox_count=507` + - `worker-pipe`: `wall_ms=72561`, `crit_s=40.13`, `total_actions=666`, `worker_count=442`, `sandbox_count=90` + +Takeaway: + +- `//sdk` is not an isolated anomaly. +- Sandbox `worker-pipe` wins clearly when the target is dominated by Rust metadata/full-compile worker actions (`//zm_cli:zm_cli_lib`). +- It loses on more mixed targets where a substantial share of the graph still runs outside the pipelined worker path (`sdk_builder_lib`, `asset_manager`, and the full `//sdk` target). diff --git a/thoughts/shared/bench_sdk_raw.csv b/thoughts/shared/bench_sdk_raw.csv new file mode 100644 index 0000000000..70e90b4468 --- /dev/null +++ b/thoughts/shared/bench_sdk_raw.csv @@ -0,0 +1,31 @@ +iter,config,wall_ms,wall_s,crit_s,total_actions,worker_count,sandbox_count +1,no-pipeline,157741,157.7,106.63,1086,0,1043 +1,worker-pipe,126874,126.9,72.77,1661,1150,15 +1,worker-pipe+incr,97381,97.4,78.77,1167,1165,0 +1,no-pipeline-rb,26438,26.4,25.87,106,0,105 +1,worker-pipe-rb,27624,27.6,26.95,174,106,67 +1,worker-pipe+incr-rb,4310,4.3,3.84,64,7,0 +2,no-pipeline,86856,86.9,71.85,1087,0,590 +2,worker-pipe,79841,79.8,52.59,1676,1150,15 +2,worker-pipe+incr,109799,109.8,82.11,1167,1165,0 +2,no-pipeline-rb,28022,28.0,27.49,106,0,105 +2,worker-pipe-rb,29418,29.4,28.83,174,106,67 +2,worker-pipe+incr-rb,21176,21.2,20.59,174,117,0 +3,no-pipeline,86055,86.1,72.46,1087,0,590 +3,worker-pipe,87662,87.7,53.41,1676,1150,15 +3,worker-pipe+incr,109580,109.6,82.51,1167,1165,0 +3,no-pipeline-rb,28596,28.6,28.05,106,0,105 +3,worker-pipe-rb,29962,30.0,29.38,174,106,67 +3,worker-pipe+incr-rb,21503,21.5,21.01,174,117,0 +4,no-pipeline,87759,87.8,71.19,1087,0,590 +4,worker-pipe,85072,85.1,55.23,1676,1150,15 +4,worker-pipe+incr,110241,110.2,82.63,1167,1165,0 +4,no-pipeline-rb,28258,28.3,27.69,106,0,105 +4,worker-pipe-rb,28717,28.7,28.20,174,106,67 +4,worker-pipe+incr-rb,21360,21.4,20.85,174,117,0 +5,no-pipeline,86292,86.3,70.73,1087,0,590 +5,worker-pipe,86607,86.6,53.24,1676,1150,15 +5,worker-pipe+incr,106365,106.4,80.83,1167,1165,0 +5,no-pipeline-rb,28515,28.5,27.96,106,0,105 +5,worker-pipe-rb,28888,28.9,28.05,174,106,67 +5,worker-pipe+incr-rb,21467,21.5,20.93,174,117,0 diff --git a/thoughts/shared/benchmark_analysis.md b/thoughts/shared/benchmark_analysis.md new file mode 100644 index 0000000000..12b61e1e63 --- /dev/null +++ b/thoughts/shared/benchmark_analysis.md @@ -0,0 +1,158 @@ +# Rust Pipelining Benchmark Analysis + +**Date:** 2026-03-05 +**Iterations:** 10 +**Machine:** Linux 6.17.7, x86_64 +**Parallelism:** 16 jobs +**Rustc:** 1.91.1 (ed61e7d7e 2025-11-07) + +## Workload + +Five widely-used crates covering common real-world patterns (networking, serialization, +proc-macros, async runtime): + +| Target | Type | Notes | +|---|---|---| +| tokio 1.49.0 | async runtime | fs, io, macros, net, process, rt-multi-thread, signal, sync, time | +| serde_json 1.0.149 | serialization | default features | +| cookie_store 0.22.1 | HTTP cookies | transitive via reqwest | +| hyper 1.8.1 | HTTP library | full features | +| reqwest 0.12.28 | HTTP client | cookies, http2, json, rustls-tls, stream | + +## Configurations + +| Config | Description | Actions | Strategy | +|---|---|---|---| +| **no-pipeline** | `pipelined_compilation=false` | 143 | 142 linux-sandbox | +| **hollow-rlib** | `pipelined_compilation=true` (-Zno-codegen + full) | 251 | 217 linux-sandbox | +| **worker-pipe** | `pipelined_compilation=true` + `experimental_worker_pipelining=true` | 115 | 114 multiplex-worker | +| **cargo** | `cargo build` (pipelining enabled by default) | — | 116 crates | + +**Bazel methodology:** Each iteration starts with `bazel shutdown && bazel clean` and a fresh +disk cache. A warmup build populates the disk cache with C/build-script actions (ring, aws-lc-sys, +proc-macro compilation for exec-platform). The three Bazel configs then measure only Rust +compilation time; each uses a unique `--cfg` flag to force Rust re-compilation while sharing +the cached non-Rust actions. The worker-pipe config used Bazel's default +`--worker_max_multiplex_instances=8` (see note in worker-pipe analysis section). + +**Cargo methodology:** Each iteration runs `cargo clean && cargo build -j 16` from a project +with matching crate versions and features. Cargo builds include ring's build.rs (C assembly +compilation, ~0.5s) and all proc-macro compilations. Cargo uses `ring` as TLS backend; +Bazel uses `aws-lc-rs` (heavier C build, cached in warmup). + +## Results Summary + +``` + WALL TIME (seconds) +Config Mean Median Min Max Stdev CV +───────────── ───── ────── ──── ──── ───── ──── +no-pipeline 20.7 20.5 19.9 21.9 0.64 3.1% +hollow-rlib 11.4 11.3 11.0 12.1 0.34 3.0% +worker-pipe 8.4 8.5 8.1 8.8 0.29 3.4% +cargo 8.2 8.1 7.6 9.1 0.50 6.1% +``` + +``` + CRITICAL PATH (seconds, Bazel only) +Config Mean Stdev Overhead (wall - crit) +───────────── ───── ───── ────────────────────── +no-pipeline 19.46 0.56 1.23s +hollow-rlib 9.91 0.27 1.50s +worker-pipe 7.99 0.29 0.45s +``` + +## Speedups + +| Comparison | Speedup | Ratio | +|---|---|---| +| hollow-rlib vs no-pipeline | 44.9% faster | 1.81x | +| worker-pipe vs no-pipeline | 59.3% faster | 2.45x | +| cargo vs no-pipeline | 60.3% faster | 2.52x | +| worker-pipe vs hollow-rlib | 26.1% faster | 1.35x | +| worker-pipe vs cargo | ~2.7% slower | 0.97x | + +## Raw Data (all 10 iterations) + +``` +Iter no-pipeline hollow-rlib worker-pipe cargo +──── ─────────── ─────────── ─────────── ───── + 1 20.4s 11.4s 8.6s 8.1s + 2 20.4s 11.5s 8.4s 8.1s + 3 20.8s 11.6s 8.6s 8.4s + 4 20.5s 11.0s 8.0s 7.6s + 5 19.9s 11.1s 8.0s 7.5s + 6 20.1s 11.1s 8.1s 7.8s + 7 20.1s 11.1s 8.0s 7.8s + 8 21.9s 11.1s 8.7s 9.1s + 9 20.8s 12.1s 8.7s 8.7s +10 21.5s 11.6s 8.6s 8.6s +``` + +## Analysis + +### Worker pipelining closes the gap with Cargo + +Worker pipelining (8.4s mean) is within 3% of Cargo's build time (8.2s mean). The +remaining gap is attributable to Bazel's per-action overhead (sandbox setup, action cache +lookups, output file hashing). Notably, the worker-pipe config has the lowest Bazel +overhead at 0.45s (wall minus critical path), vs 1.23s for no-pipeline and 1.50s for +hollow-rlib. This is because worker-pipe runs 114 actions via multiplex workers (no +sandbox overhead), while hollow-rlib runs 217 sandboxed actions. + +### Why hollow-rlib has more actions but is faster than no-pipeline + +Hollow-rlib creates 251 actions (2 per pipelined crate: one metadata + one full) vs +143 for no-pipeline. Despite the higher action count, hollow-rlib is 1.81x faster +because downstream crates can begin compilation as soon as the metadata action +(~50ms with -Zno-codegen) completes, without waiting for the upstream's full codegen. +This parallelism reduces the critical path from 19.5s to 9.9s. + +### Worker-pipe achieves the best critical path + +Worker-pipe's critical path (8.0s) is 19% shorter than hollow-rlib's (9.9s). This +improvement comes from eliminating the double-compilation overhead: hollow-rlib runs +rustc twice per pipelined crate (once for metadata, once for full), while worker-pipe +runs rustc once (the worker returns .rmeta early from the same process that produces +the .rlib). The single-invocation approach saves ~0.5s of per-crate rustc startup time +across the dependency graph. + +### Action count reduction + +Worker-pipe uses only 115 total actions (114 worker + 1 internal), compared to 251 for +hollow-rlib. Each pipelined rlib crate produces just one worker action instead of two +sandboxed actions. This 54% reduction in action count also reduces Bazel's scheduling +and I/O overhead. + +### Variance + +All configurations show low variance (CV 3.0-3.4%) for the Bazel configs, indicating +stable, reproducible measurements. Cargo has slightly higher variance (CV 6.1%), +likely due to ring's build.rs C compilation being more sensitive to system load. +Iteration 8 was an outlier across all configs (hottest for no-pipeline at 21.9s and +cargo at 9.1s), suggesting transient system load. + +### Caveats + +1. **Bazel times exclude C/build-script compilation** (cached in warmup); Cargo times + include ring's build.rs (~0.5s C assembly). Subtracting ring's overhead would bring + Cargo to ~7.7s, making worker-pipe ~9% slower than Cargo's pure Rust time. + +2. **Different TLS backends:** Bazel builds use aws-lc-rs (heavier C build, cached); + Cargo builds use ring (lighter C build, included in timing). + +3. **Bazel uses linux-sandbox strategy** for non-worker actions, adding per-action + overhead vs Cargo's direct process spawning. + +4. **Exec-platform crates** (build scripts, proc-macros) use hollow-rlib mode in all + three Bazel configs to maintain consistent SVH (stable version hash). Worker + pipelining applies only to target-configuration rlib/lib crates. + +5. **`--worker_max_multiplex_instances` tuning:** A follow-up run with + `--worker_max_multiplex_instances=Rustc=32` (allowing up to 16 concurrent rustc + processes on this 16-CPU machine) measured **9.2s mean** — 9.6% *slower* than the + default-8 run. With 16 concurrent rustc processes competing for 16 CPUs, each process + gets ~1 CPU, whereas the default-8 limit gives each process ~2 CPUs. The reduced + per-process CPU availability hurts LLVM codegen parallelism and increases context- + switching overhead. Bazel's default of 8 coincides with CPU_count/2, which is the + empirically optimal concurrency for rustc on this machine. The settings.bzl + recommendation has been updated accordingly: do not exceed ~CPU_count/2 for this flag. diff --git a/thoughts/shared/benchmark_raw_data.csv b/thoughts/shared/benchmark_raw_data.csv new file mode 100644 index 0000000000..6c56dc4e35 --- /dev/null +++ b/thoughts/shared/benchmark_raw_data.csv @@ -0,0 +1,52 @@ +iteration,config,wall_ms,wall_s,total_actions,executed_processes,critical_path_s,worker_count,sandbox_count,cache_hit_count,disk_cache_hit_count,crates_compiled +1,no-pipeline,20415,20.4,143,143,19.21,0,142,218,0, +1,hollow-rlib,11485,11.4,251,251,9.94,0,217,236,33, +1,worker-pipe,8656,8.6,115,115,8.21,114,0,269,0, +1,cargo,8146,8.1,,,,,,,,116 +2,no-pipeline,20470,20.4,143,143,19.26,0,142,218,0, +2,hollow-rlib,11558,11.5,251,251,10.06,0,217,236,33, +2,worker-pipe,8474,8.4,115,115,7.86,114,0,269,0, +2,cargo,8121,8.1,,,,,,,,116 +3,no-pipeline,20886,20.8,143,143,19.65,0,142,218,0, +3,hollow-rlib,11698,11.6,251,251,10.07,0,217,236,33, +3,worker-pipe,8666,8.6,115,115,8.22,114,0,269,0, +3,cargo,8409,8.4,,,,,,,,116 +4,no-pipeline,20593,20.5,143,143,19.33,0,142,218,0, +4,hollow-rlib,11030,11.0,251,251,9.71,0,217,236,33, +4,worker-pipe,8074,8.0,115,115,7.73,114,0,269,0, +4,cargo,7637,7.6,,,,,,,,116 +5,no-pipeline,19922,19.9,143,143,18.70,0,142,218,0, +5,hollow-rlib,11122,11.1,251,251,9.82,0,217,236,33, +5,worker-pipe,8078,8.0,115,115,7.55,114,0,269,0, +5,cargo,7590,7.5,,,,,,,,116 +6,no-pipeline,20146,20.1,143,143,18.98,0,142,218,0, +6,hollow-rlib,11110,11.1,251,251,9.63,0,217,236,33, +6,worker-pipe,8197,8.1,115,115,7.77,114,0,269,0, +6,cargo,7897,7.8,,,,,,,,116 +7,no-pipeline,20184,20.1,143,143,19.07,0,142,218,0, +7,hollow-rlib,11176,11.1,251,251,9.71,0,217,236,33, +7,worker-pipe,8098,8.0,115,115,7.70,114,0,269,0, +7,cargo,7810,7.8,,,,,,,,116 +8,no-pipeline,21928,21.9,143,143,20.45,0,142,218,0, +8,hollow-rlib,11185,11.1,251,251,9.56,0,217,236,33, +8,worker-pipe,8712,8.7,115,115,8.34,114,0,269,0, +8,cargo,9130,9.1,,,,,,,,116 +9,no-pipeline,20843,20.8,143,143,19.69,0,142,218,0, +9,hollow-rlib,12100,12.1,251,251,10.43,0,217,236,33, +9,worker-pipe,8752,8.7,115,115,8.34,114,0,269,0, +9,cargo,8708,8.7,,,,,,,,116 +10,no-pipeline,21562,21.5,143,143,20.28,0,142,218,0, +10,hollow-rlib,11635,11.6,251,251,10.12,0,217,236,33, +10,worker-pipe,8606,8.6,115,115,8.13,114,0,269,0, +10,cargo,8625,8.6,,,,,,,,116 +# worker-pipe-32: re-run with --worker_max_multiplex_instances=Rustc=32 (instances=default 8 above) +1,worker-pipe-32,9267,9.2,115,115,8.09,114,0,269,0, +2,worker-pipe-32,8777,8.7,115,115,8.10,114,0,269,0, +3,worker-pipe-32,8794,8.7,115,115,8.14,114,0,269,0, +4,worker-pipe-32,8943,8.9,115,115,7.82,114,0,269,0, +5,worker-pipe-32,8903,8.9,115,115,8.07,114,0,269,0, +6,worker-pipe-32,9599,9.5,115,115,8.84,114,0,269,0, +7,worker-pipe-32,9266,9.2,115,115,8.10,114,0,269,0, +8,worker-pipe-32,9386,9.3,115,115,8.65,114,0,269,0, +9,worker-pipe-32,9629,9.6,115,115,8.79,114,0,269,0, +10,worker-pipe-32,9504,9.5,115,115,8.25,114,0,269,0, diff --git a/thoughts/shared/plans/2026-03-25-consolidated-worker-pipelining-plan.md b/thoughts/shared/plans/2026-03-25-consolidated-worker-pipelining-plan.md new file mode 100644 index 0000000000..e298dec7b2 --- /dev/null +++ b/thoughts/shared/plans/2026-03-25-consolidated-worker-pipelining-plan.md @@ -0,0 +1,441 @@ +# Consolidated Plan: Rust Worker Pipelining and Multiplex Sandboxing + +## Status + +Canonical reference document. + +This is now the only plan file for this design area. Earlier dated working notes were removed after +their still-useful conclusions were merged here. + +## Purpose + +The original plan stack was useful while the design was moving quickly, but it left multiple +mutually incompatible states reading as if they were all current at once. This document keeps only +what should survive that cleanup: + +- what is actually implemented on this branch, +- which approaches failed, were abandoned, or were superseded and why, +- which conclusions still hold, +- and which contract-sensitive questions remain open. + +It intentionally preserves the design history without preserving the old file stack as a second +source of truth. + +## Current Implementation On This Branch + +The current branch has the following behavior: + +1. Worker-managed pipelining exists for pipelined `rlib` and `lib` crates. +2. Metadata and full actions are wired to the same worker key by: + - moving per-request process-wrapper flags into the param file, + - moving per-crate environment into an env file, + - suppressing companion `--output-file` artifacts that would otherwise perturb startup args, + - and aligning the worker-facing action shape so the metadata and full requests can share + in-process state. +3. In sandboxed mode, rustc now runs with `cwd = sandbox_dir`. +4. The worker still redirects `--out-dir` to worker-owned `_pw_state/pipeline//outputs/` + and copies declared outputs back into Bazel-visible output locations later. +5. The background rustc process still spans the metadata response and the later full request. +6. The older two-invocation hollow-rlib path still exists and remains the important fallback / + compatibility path. +7. Incremental-compilation and dynamic-execution wiring both exist, but the sandboxed + worker-pipelining path should still be treated as contract-sensitive and experimental rather + than as a fully settled final architecture. + +The important negative statement is: + +- the current branch is **not** using staged execroot reuse, +- **not** using cross-process stage pools, +- **not** using resolve-through to the real execroot as the current sandbox story, +- and **not** using the alias-root (`__rr`) design. + +## Bazel Contract Constraints That Still Matter + +Any future design should continue to treat Bazel's documented worker behavior as the contract: + +1. Multiplex sandboxing is rooted at `sandbox_dir`. +2. The worker protocol expects per-request output to be returned through `WorkResponse`. +3. Once a worker has responded to a request, any continued touching of worker-visible files is + contract-sensitive and should not be hand-waved away by older strace-based reasoning. +4. If cancellation is advertised, the worker must not rely on "best effort" semantics that leave a + request mutating outputs after the cancel response unless that behavior is intentionally + documented as a limitation. + +This consolidated plan does not try to re-litigate the Bazel documentation. It simply records that +future design work should start from the documented contract, not from superseded assumptions in the +older plan files. + +## Sandbox Contract Compliance Analysis + +This section records what is known about how the current implementation interacts with the two +primary rules of the Bazel multiplex sandbox contract. + +### The Two Rules + +From [Creating Persistent Workers](https://bazel.build/remote/creating): + +- **Rule 1**: The worker must use `sandbox_dir` as a prefix for all file reads and writes. +- **Rule 2**: "Once a response has been sent for a WorkRequest, the worker must not touch the files + in its working directory. The server is free to clean up the files, including temporary files." + +### How The Current Implementation Addresses Each Rule + +**Rule 1 (sandbox_dir for all I/O):** +Satisfied. In sandboxed mode, rustc runs with `cwd = sandbox_dir` (`worker_pipeline.rs`, +`create_pipeline_context`). All relative paths in rustc args (`--extern`, `-Ldependency`, source +files) resolve against `sandbox_dir`. Outputs are redirected to `_pw_state/pipeline//outputs/` +(a persistent worker-owned directory outside the sandbox). + +**Rule 2 (no file access after response):** +The metadata `WorkResponse` is sent as soon as `.rmeta` is emitted. The background rustc continues +doing codegen. The safety argument has three layers: + +1. **Rustc architecture**: metadata is encoded at the boundary between analysis and codegen + (`rustc_interface/src/passes.rs`, `start_codegen` → `encode_and_write_metadata` → + `codegen_crate`). All parsing, type checking, borrow checking, and MIR passes complete before + metadata. Source files are read once during parsing into `Arc` in the `SourceMap` (no + re-reads). Dependency `.rmeta` files are memory-mapped once during name resolution into + `CrateMetadata` in the `CStore`. Proc macros are fully expanded during parsing. + +2. **Empirical verification**: strace on rustc 1.94.0 across three cases (simple deps, + `include_str!`, serde derive proc macro) confirmed zero input file reads after `.rmeta` emission. + FDs to input files are fully closed before the `.rmeta` write, not just unused. + +3. **Output isolation**: `--out-dir` is redirected to `_pw_state/pipeline//outputs/`, so all + codegen writes (`.o`, `.rlib`, `.d`) go to a persistent worker-owned directory outside + `sandbox_dir`. + +### Strength of the Evidence + +The practical safety story is strong: rustc's compilation pipeline architecture guarantees input I/O +is complete before metadata emission, the strace evidence confirms it, and Linux mmap semantics +provide an additional safety net (mmap survives `unlink`). + +The contractual story is weaker: we rely on undocumented rustc implementation details (the +compilation pipeline ordering is not a stable API), and we operate outside the documented Bazel +worker contract (no precedent for background work spanning two requests). The strace evidence covers +sampled rustc versions and crate shapes, not all possible configurations. + +### Known Caveats + +1. **Incremental compilation**: `-C incremental=` causes reads and writes to the incremental + cache during codegen. The incremental directory must be outside `sandbox_dir` (currently placed + in `_pw_state/pipeline//`). + +2. **mmap page faults**: dependency metadata is mmap'd. On Linux, mmap holds an inode reference so + file deletion doesn't break access. Cross-platform behavior is less well characterized. + +3. **Cancellation**: the cancel handler must kill the background rustc to prevent wasted CPU and + ensure no further file mutation after a cancel response. The full-phase cancellation gap (where + the full handler has taken the `BackgroundRustc` from `PipelineState`) is addressed by the + request-ID index design in the "Cancellation Direction" section below. + +### Sources + +- [rustc compilation pipeline — passes.rs](https://github.com/rust-lang/rust/blob/master/compiler/rustc_interface/src/passes.rs) — `encode_and_write_metadata` called before `codegen_crate` +- [Libraries and metadata — Rustc Dev Guide](https://rustc-dev-guide.rust-lang.org/backend/libs-and-metadata.html) — "As early as it can, rustc will save the rmeta file to disk before it continues to the code generation phase" +- [Pipelining stabilization — rust-lang/rust#60988](https://github.com/rust-lang/rust/issues/60988) — "metadata is now generated right at the start of code generation" +- [SourceMap — rustc_span](https://github.com/rust-lang/rust/blob/main/compiler/rustc_span/src/source_map.rs) — source files read via `read_to_string` into `Arc`, no re-reads +- [Mmap for rmeta — rust-lang/rust#55556](https://github.com/rust-lang/rust/pull/55556) — dependency metadata mmap'd once +- [Creating Persistent Workers — Bazel](https://bazel.build/remote/creating) — "must not touch the files in its working directory" after response +- [Multiplex Workers — Bazel](https://bazel.build/remote/multiplex) — sandbox_dir contract +- [SandboxedWorkerProxy.java — bazelbuild/bazel](https://github.com/bazelbuild/bazel/blob/master/src/main/java/com/google/devtools/build/lib/worker/SandboxedWorkerProxy.java) — sandbox_dir lifecycle (cleaned before next request, not deleted after response) + +## Aborted, Failed, And Superseded Approaches + +| Approach | Outcome | Why It Stopped | What To Keep | +| --- | --- | --- | --- | +| Initial worker-managed one-rustc pipelining | Partially landed | The core model was useful, but later plan layers overstated how settled the sandboxed form was | Keep the worker-managed metadata-to-full handoff, the worker protocol handling, and the hollow-rlib fallback | +| Per-worker staged execroot reuse | Abandoned | Measured reuse was effectively nonexistent under actual multiplex-sandbox worker lifetimes, so the added slot and manifest machinery optimized the wrong boundary | Keep the evidence that worker-side restaging was real overhead and that early `.rmeta` still helped the critical path | +| Cross-process shared stage pool | Abandoned before a prototype landed | It added even more leasing and invalidation complexity, and part of the motivation was later explained by worker-key fragmentation rather than a fundamentally shared-pool-sized problem | Keep the lesson that stable worker keys matter more than elaborate pool sharing | +| Resolve-through via the real execroot | Partially landed, then superseded | It materially reduced worker-side staging cost, but it reads outside `sandbox_dir` and therefore does not match Bazel's documented multiplex-sandbox contract | Keep the performance insight that removing worker-side restaging matters; do not treat the contract story as settled | +| Broad metadata input pruning as a cheap sandbox fix | Failed investigation | A broad pruning attempt regressed real builds with `E0463` missing-crate failures | Keep the rule that any future input narrowing must be trace-driven and validated against full graphs | +| Alias-root strict-sandbox alternative | Explored, not landed | It matched the `sandbox_dir` contract better, but its viability relied on strace-based reasoning about post-`.rmeta` rustc I/O and would require a larger rewrite and validation pass than justified so far | Keep the stricter contract framing and explicit kill criteria; do not treat the provisional Gate 0 reasoning as final product guidance | +| Promotion of sandboxed worker pipelining to a stable, final story | Deferred | Benchmark improvements arrived before cancellation, teardown, and background-lifetime questions were settled strongly enough | Keep the reminder that good local benchmark numbers are not enough to claim the sandboxed path is fully supported | + +## Historical Evidence Worth Keeping + +These points are worth preserving even though the documents that first recorded them are gone: + +1. Stable worker keys were a prerequisite, not a detail. + Earlier measurements that looked like proof of inherently short-lived workers were partly + distorted by per-action process-wrapper flags living in startup args. Moving those request- + specific flags into per-request files was necessary for metadata and full requests to share one + worker process and one in-process pipeline state. The key offenders were per-action + `--output-file`, `--env-file`, `--arg-file`, `--rustc-output-format`, and stamped-action + status-file flags. Earlier measurements that showed roughly one worker process per action were + therefore mixing a real worker-lifetime problem with avoidable worker-key fragmentation. + +2. The staged-execroot family failed for measured reasons, not just taste. + On the representative `//sdk` benchmarks, stage-pool reuse effectively stayed at one use per + slot, so the added reuse machinery delivered only weak overall improvement. The critical-path + win was coming from early metadata availability, not from successful staged-root reuse. One + benchmark pass recorded reuse staying at `1` across all 617 used slots, only about 7% overhead + improvement versus the pre-stage-pool baseline, and an unchanged critical-path win from early + `.rmeta`. + +3. Bazel-side sandbox preparation may still dominate some runs, but that conclusion is not + universal enough to carry as a standing benchmark narrative. + One investigation captured Bazel-side prep at materially higher cost than worker-side staging, + which is worth remembering as a clue. It was not stable enough across later runs to keep as a + canonical result. + +4. The alias-root strict-sandbox investigation did produce real evidence, but only sampled + evidence. + In the sampled strace runs that motivated the alias-root work, rustc did not read inputs after + `.rmeta` emission for simple dependency, include-file, and proc-macro cases. That is useful + context for why the idea was explored, but it is still not strong enough to override Bazel's + documented contract or to serve as product-level proof. + +5. Shutdown and teardown behavior was a real investigation thread, not just a generic testing gap. + Earlier debugging found reproducible multiplex-worker teardown trouble around `bazel clean`, + including `SIGTERM`-driven worker death and Bazel-side "Could not parse json work request + correctly" storms. Even though that investigation did not fully settle the root cause, it is + part of why worker shutdown and cancellation coverage remain explicit open items. + +## Surviving Conclusions + +The following conclusions still appear sound and should survive the cleanup: + +1. Worker-key stabilization matters. + Metadata and full actions only share in-process pipeline state if their worker-facing startup + shape is intentionally normalized. + +2. The staged-execroot / stage-pool family is not the preferred direction. + It was useful as a diagnostic step, but too much of its complexity was compensating for + worker-side restaging cost rather than removing the real source of overhead. + +3. Broad analysis-time metadata input pruning is still too risky to treat as a cheap fix. + Earlier iterations recorded real regressions here. Any future narrowing should be + evidence-driven. + +4. The hollow-rlib path remains strategically important. + It is still the stable fallback when the single-rustc worker-managed handoff is not acceptable + for a particular execution mode. + +5. Benchmark data should live in benchmark docs and raw data, not in the plan. + The plan files became stale in part because they mixed architecture decisions with quickly + changing measurement narratives. + +## Conclusions That Should No Longer Be Treated As Current + +The cleanup is specifically intended to stop the following stale conclusions from reading as live +guidance: + +1. "Resolve-through to the real execroot is the current sandboxed design." + This is no longer true on this branch. + +2. "The stage-pool or cross-process pool work is likely the path forward." + It is not. + +3. "Alias-root is implemented or is the active next step." + It is not implemented on this branch. + +4. "Strace-based evidence settled the background-rustc lifetime question for product purposes." + It did not. At most it provided an empirical clue about sampled rustc behavior. + +5. "Sandboxed worker pipelining is already the fully supported, final hermetic story." + The current branch still has contract-sensitive behavior here and should be documented that way. + +## Current Open Questions + +The plan surface is now much smaller. The remaining questions are concrete: + +1. What support level should sandboxed worker pipelining have right now? + - keep it experimental and document the contract caveats clearly, + - or split supported unsandboxed worker-pipelining from a stricter sandbox-safe mode. + +2. If strict sandbox compliance is required, what replaces the current one-rustc / two-request + handoff in sandboxed mode? + Candidate directions are: + - fall back to the hollow-rlib / two-invocation model for sandboxed and dynamic modes, + - or develop a new strict-sandbox design without relying on post-response background work. + +3. What cancellation and shutdown coverage is still missing? + Current state: + - metadata-phase pipelined cancellation exists, + - full-phase pipelined cancellation still needs an atomic ownership-transfer fix so a full + request never becomes cancel-acknowledgeable before a kill path is registered, + - and non-pipelined worker requests still use acknowledge-only cancellation semantics and must + remain documented as such. + At minimum: + - cancellation during metadata phase with a live background rustc, + - cancellation during full phase across the metadata-to-full ownership handoff, + - worker shutdown with active pipeline entries, + - explicit `bazel clean` / teardown behavior for multiplex workers, + - metadata-cache-hit / full-request-fallback paths, + - dynamic execution with a real remote executor and explicit worker cancellation behavior. + +4. Which public docs should be downgraded from recommendation to experiment? + The settings docs and code comments should reflect the actual maturity of the sandboxed path. + +## Cancellation Direction + +Cancellation should be tightened using an atomic request-ownership design rather than treated as +fully settled. + +Goal: + +- every cancellable pipelined request ID must have a kill target installed before Bazel can receive + `wasCancelled=true` for that request. + +Design invariants: + +- a pipelined request must not become cancel-acknowledgeable until its cancel target is registered, +- ownership transfer from metadata request ID to full request ID must be atomic, +- and after a cancel response is sent, the worker must have either: + - killed the background rustc, + - or proven that the request already completed and no further file mutation can occur. + +Data model: + +- keep pipeline state keyed by pipeline key, +- add a second index from active request ID to pipeline key / phase, +- and avoid a bare PID as the primary abstraction so cancellation remains tied to owned process + state rather than to a reusable kernel identifier. + +Flow: + +1. Metadata phase: + - when storing `BackgroundRustc`, register the metadata request ID in the request-ID index in + the same critical section. +2. Full phase: + - when the full request takes ownership of the background rustc, atomically rewrite the + request-ID index from metadata request ID to full request ID before releasing the state lock. +3. Cancel: + - resolve request ID through the request-ID index, + - if it maps to a live pipelined entry, kill that entry before sending `wasCancelled=true`, + - if no mapping exists, treat the request as already completed and ignore the cancel. +4. Cleanup: + - remove the request-ID mapping when the full handler finishes or when cancellation reaps the + child. + +Why this shape: + +- it closes the metadata-to-full handoff race, +- it avoids acknowledging cancellation for a full request that still has no kill path, +- and it keeps cancellation semantics tied to worker-owned process state instead of a raw PID. + +## Implementation: Contract Documentation, Strace Test, and Cancellation Fix + +This section contains concrete implementation tasks for the three workstreams described above. + +### Task 1: Add design documentation to worker_pipeline.rs + +Replace the module-level doc comment in `worker_pipeline.rs` (line 15) with a comprehensive doc +covering the single-rustc pipelining architecture, sandbox contract compliance rationale (Rule 1 +and Rule 2), caveats (incremental, mmap, experimental status), and cancellation design. Include +links to the rustc dev guide, passes.rs, SourceMap, and Bazel worker docs. + +### Task 2: Extend PipelineState with request_index and active_pids + +Add two new fields to `PipelineState` in `worker_pipeline.rs`: + +- `request_index: HashMap` — maps active request IDs to pipeline keys. +- `active_pids: HashMap` — maps pipeline keys to child PIDs, retained after the full + handler takes ownership of `BackgroundRustc`. + +Change `store()` to accept `request_id` and populate all three maps atomically. Add +`take_and_transfer(key, full_request_id)` that removes from `active`, rewrites `request_index` +(using `bg.metadata_request_id` for O(1) removal), and keeps `active_pids`. Add `pre_register()`, +`cleanup()`, and `kill_by_request_id()`. + +`kill_by_request_id` checks `active` first (metadata phase: `child.kill()` + `child.wait()`), then +falls back to `active_pids` (full phase: `libc::kill(pid, SIGKILL)`). The PID fallback has a +theoretical PID-reuse race (documented with a SAFETY comment); the window is microseconds because +`cleanup()` removes the PID immediately after `child.wait()` returns. If this becomes a concern, +upgrade to a shared `Child` handle. + +### Task 3: Update handle_pipelining_metadata + +Pass `request.request_id` to the new `store()` signature. No other changes needed — the metadata +handler already stores `BackgroundRustc` in a single critical section. + +### Task 4: Update handle_pipelining_full + +Replace `take(&key)` with `take_and_transfer(&key, request.request_id)`. Add +`cleanup(&key, request.request_id)` to all exit paths in the `Some(bg)` arm (success, error) and +to the `None` arm (fallback one-shot compilation). The `None` arm cleanup is required because +`pre_register()` from Task 5 will have inserted a `request_index` entry even when no +`BackgroundRustc` is found. + +### Task 5: Pre-register on main thread and early-cancel check in worker thread + +In `worker.rs`, on the main thread: + +1. After the cancel handler block and before `in_flight.insert()`, call + `pipeline_state.pre_register(request.request_id, key)` for pipelined requests. This ensures the + cancel target exists before the request becomes cancel-acknowledgeable. + +In the worker thread: + +2. After `detect_pipelining_mode()` and before `match pipelining`, check + `claim_flag.load(Ordering::SeqCst)`. If the flag is already set (cancel won the race), call + `cleanup()` and return early without starting rustc. This prevents wasted CPU and satisfies the + invariant: "after cancel response, the worker must have killed the background rustc or proven + that no further file mutation can occur." + +### Task 6: Replace kill_pipelined_request + +Replace the standalone `kill_pipelined_request` function with a thin wrapper that delegates to +`PipelineState::kill_by_request_id()`. + +### Task 7: Unit tests for PipelineState cancel tracking + +Add tests to `worker.rs`'s test module: + +- `test_pipeline_state_store_and_kill_metadata_phase` — store + kill via metadata request ID +- `test_pipeline_state_take_and_transfer_then_kill_full_phase` — store + transfer + kill via full + request ID (PID path) +- `test_pipeline_state_kill_nonexistent_request` — returns false +- `test_pipeline_state_pre_register` — pre-register + kill returns false (no process) +- `test_pipeline_state_cleanup_removes_all_entries` — cleanup after pre-register + +### Task 8: Strace regression test + +Add `test/unit/pipelined_compilation/strace_rustc_post_metadata_test.sh` as an `sh_test` tagged +`manual` and `local`, Linux-only. The test: + +1. Compiles a small crate (with a dependency and `include_str!`) under + `strace -f -e trace=openat,read,close`. +2. Finds the `.rmeta` write boundary in the strace log. +3. Asserts zero `openat()` calls referencing input files after the boundary. +4. Asserts all input FDs are closed before the boundary. +5. Prints the rustc version for traceability. + +This test is not part of the normal CI suite. Run manually per rustc version: +``` +bazel test //test/unit/pipelined_compilation:strace_rustc_post_metadata_test --test_output=streamed +``` + +### Task 9: Full test suite verification + +Run `cargo test` in `util/process_wrapper/` and +`bazel test //test/unit/pipelined_compilation:pipelined_compilation_test_suite` to confirm no +regressions. + +## Recommended Next Steps + +1. Keep this file as the single current plan. +2. Do not recreate a parallel dated plan stack for the same topic unless the problem scope changes + materially. +3. Move future benchmark updates into benchmark docs or raw-data summaries rather than back into + the plan stack. +4. Implement the tasks in the "Implementation" section above. +5. Make one explicit product decision about sandboxed worker pipelining: + - either narrow the supported scope and document the current limitations, + - or start a fresh strict-sandbox design from the remaining open questions above. +6. Update code comments and user-facing settings docs so they do not overstate the sandboxed + contract story. + +## Benchmark And Artifact References + +The following files remain useful and should not be collapsed into this plan: + +- `thoughts/shared/bench_sdk_analysis.md` +- `thoughts/shared/benchmark_analysis.md` +- `thoughts/shared/bench_sdk_raw.csv` +- `thoughts/shared/bench_cargo_raw.csv` +- `thoughts/shared/benchmark_raw_data.csv` + +Those files contain raw or summarized measurements. This file is only for architecture and status. diff --git a/util/process_wrapper/main.rs b/util/process_wrapper/main.rs index 2a7cbd8565..b6a3f266df 100644 --- a/util/process_wrapper/main.rs +++ b/util/process_wrapper/main.rs @@ -17,10 +17,11 @@ mod options; mod output; mod rustc; mod util; +mod worker; use std::collections::HashMap; #[cfg(windows)] -use std::collections::{HashSet, VecDeque}; +use std::collections::VecDeque; use std::fmt; use std::fs::{self, copy, OpenOptions}; use std::io; @@ -31,14 +32,14 @@ use std::time::{SystemTime, UNIX_EPOCH}; use tinyjson::JsonValue; -use crate::options::options; +use crate::options::{options, SubprocessPipeliningMode}; use crate::output::{process_output, LineOutput}; use crate::rustc::ErrorFormat; #[cfg(windows)] use crate::util::read_file_to_array; #[derive(Debug)] -struct ProcessWrapperError(String); +pub(crate) struct ProcessWrapperError(String); impl fmt::Display for ProcessWrapperError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -57,12 +58,12 @@ macro_rules! debug_log { } #[cfg(windows)] -struct TemporaryDirectoryGuard { +struct TemporaryFileGuard { path: Option, } #[cfg(windows)] -impl TemporaryDirectoryGuard { +impl TemporaryFileGuard { fn new(path: Option) -> Self { Self { path } } @@ -73,21 +74,22 @@ impl TemporaryDirectoryGuard { } #[cfg(windows)] -impl Drop for TemporaryDirectoryGuard { +impl Drop for TemporaryFileGuard { fn drop(&mut self) { if let Some(path) = self.path.take() { - let _ = fs::remove_dir_all(path); + // May be a file (argfile) or directory (consolidated deps dir). + let _ = fs::remove_dir_all(&path); } } } #[cfg(not(windows))] -struct TemporaryDirectoryGuard; +struct TemporaryFileGuard; #[cfg(not(windows))] -impl TemporaryDirectoryGuard { +impl TemporaryFileGuard { fn new(_: Option) -> Self { - TemporaryDirectoryGuard + TemporaryFileGuard } fn take(&mut self) -> Option { @@ -95,22 +97,28 @@ impl TemporaryDirectoryGuard { } } +#[cfg(windows)] +struct ParsedDependencyArgs { + dependency_paths: Vec, + filtered_args: Vec, +} + #[cfg(windows)] fn get_dependency_search_paths_from_args( initial_args: &[String], -) -> Result<(Vec, Vec), ProcessWrapperError> { +) -> Result { let mut dependency_paths = Vec::new(); let mut filtered_args = Vec::new(); let mut argfile_contents: HashMap> = HashMap::new(); - let mut queue: VecDeque<(String, Option)> = initial_args - .iter() - .map(|arg| (arg.clone(), None)) - .collect(); + let mut queue: VecDeque<(String, Option)> = + initial_args.iter().map(|arg| (arg.clone(), None)).collect(); while let Some((arg, parent_argfile)) = queue.pop_front() { let target = match &parent_argfile { - Some(p) => argfile_contents.entry(format!("{}.filtered", p)).or_default(), + Some(p) => argfile_contents + .entry(format!("{}.filtered", p)) + .or_default(), None => &mut filtered_args, }; @@ -145,14 +153,31 @@ fn get_dependency_search_paths_from_args( })?; } - Ok((dependency_paths, filtered_args)) + Ok(ParsedDependencyArgs { + dependency_paths, + filtered_args, + }) } +// On Windows, rustc's internal search-path buffer appears to be limited to +// ~32K characters. With many transitive dependencies (400+ `-Ldependency` +// entries), the cumulative path length exceeds this limit and rustc silently +// fails to resolve crates, reporting E0463 ("can't find crate"). This applies +// even if the -Ldependencies are passed via @argfile. +// +// Fix: hard-link all rlib/rmeta files from all `-Ldependency` directories +// into a single consolidated directory, replacing hundreds of search paths +// with one. Hard links share the same inode/content so rustc sees identical +// SVH values and E0460 (SVH mismatch) does not occur. #[cfg(windows)] fn consolidate_dependency_search_paths( args: &[String], ) -> Result<(Vec, Option), ProcessWrapperError> { - let (dependency_paths, mut filtered_args) = get_dependency_search_paths_from_args(args)?; + let parsed = get_dependency_search_paths_from_args(args)?; + let ParsedDependencyArgs { + dependency_paths, + mut filtered_args, + } = parsed; if dependency_paths.is_empty() { return Ok((filtered_args, None)); @@ -180,67 +205,7 @@ fn consolidate_dependency_search_paths( )) })?; - let mut seen = HashSet::new(); - for path in dependency_paths { - let entries = fs::read_dir(&path).map_err(|e| { - ProcessWrapperError(format!( - "unable to read dependency search path {}: {}", - path.display(), - e - )) - })?; - - for entry in entries { - let entry = entry.map_err(|e| { - ProcessWrapperError(format!( - "unable to iterate dependency search path {}: {}", - path.display(), - e - )) - })?; - let file_type = entry.file_type().map_err(|e| { - ProcessWrapperError(format!( - "unable to inspect dependency search path {}: {}", - path.display(), - e - )) - })?; - if !(file_type.is_file() || file_type.is_symlink()) { - continue; - } - - let file_name = entry.file_name(); - let file_name_lower = file_name - .to_string_lossy() - .to_ascii_lowercase(); - if !seen.insert(file_name_lower) { - continue; - } - - let dest = unified_dir.join(&file_name); - let src = entry.path(); - match fs::hard_link(&src, &dest) { - Ok(_) => {} - Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => {} - Err(err) => { - debug_log!( - "failed to hardlink {} to {} ({}), falling back to copy", - src.display(), - dest.display(), - err - ); - fs::copy(&src, &dest).map_err(|copy_err| { - ProcessWrapperError(format!( - "unable to copy {} into unified dependency dir {}: {}", - src.display(), - dest.display(), - copy_err - )) - })?; - } - } - } - } + crate::util::consolidate_deps_into(&dependency_paths, &unified_dir); filtered_args.push(format!("-Ldependency={}", unified_dir.display())); @@ -254,6 +219,156 @@ fn consolidate_dependency_search_paths( Ok((args.to_vec(), None)) } +#[cfg(unix)] +fn symlink_dir(src: &std::path::Path, dest: &std::path::Path) -> Result<(), std::io::Error> { + std::os::unix::fs::symlink(src, dest) +} + +#[cfg(windows)] +fn symlink_dir(src: &std::path::Path, dest: &std::path::Path) -> Result<(), std::io::Error> { + std::os::windows::fs::symlink_dir(src, dest) +} + +enum CacheSeedOutcome { + AlreadyPresent, + Seeded { _source: PathBuf }, + NotFound, +} + +fn cache_root_from_execroot_ancestor(cwd: &std::path::Path) -> Option { + // Walk up from cwd looking for a sibling "cache" directory at each level. + // Skip directories named "execroot" — cache is never inside execroot itself, + // but its parent (e.g. ) typically has a sibling "cache" dir. + // Typical Bazel layout: /execroot/_main/ (cwd) + // /cache/ (target) + for ancestor in cwd.ancestors() { + if ancestor.file_name().is_some_and(|name| name == "execroot") { + continue; + } + + let candidate = ancestor.join("cache"); + if candidate.is_dir() { + return candidate.canonicalize().ok().or(Some(candidate)); + } + } + + None +} + +fn ensure_cache_loopback_for_path( + resolved_path: &std::path::Path, + cache_root: &std::path::Path, +) -> Result, ProcessWrapperError> { + let Ok(relative) = resolved_path.strip_prefix(cache_root) else { + return Ok(None); + }; + let mut components = relative.components(); + if components + .next() + .is_none_or(|component| component.as_os_str() != "repos") + { + return Ok(None); + } + let Some(version) = components.next() else { + return Ok(None); + }; + if components + .next() + .is_none_or(|component| component.as_os_str() != "contents") + { + return Ok(None); + } + + let version_dir = cache_root.join("repos").join(version.as_os_str()); + let loopback = version_dir.join("cache"); + if loopback.exists() { + return Ok(Some(loopback)); + } + + symlink_dir(cache_root, &loopback).map_err(|e| { + ProcessWrapperError(format!( + "unable to seed cache loopback {} -> {}: {}", + cache_root.display(), + loopback.display(), + e + )) + })?; + Ok(Some(loopback)) +} + +fn ensure_cache_loopback_from_args( + cwd: &std::path::Path, + child_arguments: &[String], + cache_root: &std::path::Path, +) -> Result, ProcessWrapperError> { + for arg in child_arguments { + let candidate = cwd.join(arg); + let Ok(resolved) = candidate.canonicalize() else { + continue; + }; + if let Some(loopback) = ensure_cache_loopback_for_path(&resolved, cache_root)? { + return Ok(Some(loopback)); + } + } + + Ok(None) +} + +fn seed_cache_root_for_current_dir() -> Result { + let cwd = std::env::current_dir().map_err(|e| { + ProcessWrapperError(format!("unable to read current working directory: {e}")) + })?; + let dest = cwd.join("cache"); + if dest.exists() { + return Ok(CacheSeedOutcome::AlreadyPresent); + } + + if let Some(cache_root) = cache_root_from_execroot_ancestor(&cwd) { + symlink_dir(&cache_root, &dest).map_err(|e| { + ProcessWrapperError(format!( + "unable to seed cache root {} -> {}: {}", + cache_root.display(), + dest.display(), + e + )) + })?; + return Ok(CacheSeedOutcome::Seeded { + _source: cache_root, + }); + } + + for entry in fs::read_dir(&cwd).map_err(|e| { + ProcessWrapperError(format!("unable to read current working directory: {e}")) + })? { + let entry = entry.map_err(|e| { + ProcessWrapperError(format!( + "unable to enumerate current working directory: {e}" + )) + })?; + let Ok(resolved) = entry.path().canonicalize() else { + continue; + }; + + for ancestor in resolved.ancestors() { + if ancestor.file_name().is_some_and(|name| name == "cache") { + symlink_dir(ancestor, &dest).map_err(|e| { + ProcessWrapperError(format!( + "unable to seed cache root {} -> {}: {}", + ancestor.display(), + dest.display(), + e + )) + })?; + return Ok(CacheSeedOutcome::Seeded { + _source: ancestor.to_path_buf(), + }); + } + } + } + + Ok(CacheSeedOutcome::NotFound) +} + fn json_warning(line: &str) -> JsonValue { JsonValue::Object(HashMap::from([ ( @@ -272,10 +387,7 @@ fn json_warning(line: &str) -> JsonValue { ])) } -fn process_line( - mut line: String, - format: ErrorFormat, -) -> Result { +fn process_line(mut line: String, format: ErrorFormat) -> Result { // LLVM can emit lines that look like the following, and these will be interspersed // with the regular JSON output. Arguably, rustc should be fixed not to emit lines // like these (or to convert them to JSON), but for now we convert them to JSON @@ -293,11 +405,78 @@ fn process_line( } fn main() -> Result<(), ProcessWrapperError> { + // Check if Bazel is invoking us as a persistent worker. + if std::env::args().any(|a| a == "--persistent_worker") { + return worker::worker_main(); + } + let opts = options().map_err(|e| ProcessWrapperError(e.to_string()))?; - let (child_arguments, dep_dir_cleanup) = + // Worker pipelining local-mode no-op optimization. + // + // When the process_wrapper runs outside a persistent worker (local or + // sandboxed-without-sandbox fallback) and the action is --pipelining-full, + // the metadata action has already run a complete rustc invocation that + // produced both the .rmeta (declared output) and the .rlib (side-effect). + // If the .rlib exists on disk, we can skip the redundant second rustc + // invocation entirely. This guarantees SVH consistency because the .rmeta + // and .rlib came from the same compilation. + // + // If the .rlib does NOT exist (e.g. sandboxed execution discarded the + // side-effect, or the metadata action was an action-cache hit), we fall + // through to running rustc normally. + if opts.pipelining_mode == Some(SubprocessPipeliningMode::Full) { + if let Some(ref rlib_path) = opts.pipelining_rlib_path { + if std::path::Path::new(rlib_path).exists() { + debug_log!( + "pipelining no-op: .rlib already exists at {}, skipping rustc", + rlib_path + ); + // Handle post-success actions that the normal path would do. + if let Some(ref tf) = opts.touch_file { + OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(tf) + .map_err(|e| { + ProcessWrapperError(format!("failed to create touch file: {}", e)) + })?; + } + if let Some((ref copy_source, ref copy_dest)) = opts.copy_output { + copy(copy_source, copy_dest).map_err(|e| { + ProcessWrapperError(format!( + "failed to copy {} into {}: {}", + copy_source, copy_dest, e + )) + })?; + } + exit(0); + } + eprintln!(concat!( + "WARNING: [rules_rust] Worker pipelining full action executing outside a worker.\n", + "The metadata action's .rlib side-effect was not found, so a redundant second\n", + "rustc invocation will run. This happens when Bazel falls back from worker to\n", + "sandboxed execution (sandbox discards undeclared outputs). The build may still\n", + "succeed if all proc macros are deterministic, but nondeterministic proc macros\n", + "will cause E0460 (SVH mismatch).\n", + "\n", + "To fix: set --@rules_rust//rust/settings:experimental_worker_pipelining=false\n", + " to use hollow-rlib pipelining (safe for all execution strategies).\n", + )); + } + } + + let (child_arguments, dep_argfile_cleanup) = consolidate_dependency_search_paths(&opts.child_arguments)?; - let mut temp_dir_guard = TemporaryDirectoryGuard::new(dep_dir_cleanup); + let mut temp_file_guard = TemporaryFileGuard::new(dep_argfile_cleanup); + let cwd = std::env::current_dir().map_err(|e| { + ProcessWrapperError(format!("unable to read current working directory: {e}")) + })?; + let _ = seed_cache_root_for_current_dir(); + if let Some(cache_root) = cache_root_from_execroot_ancestor(&cwd) { + let _ = ensure_cache_loopback_from_args(&cwd, &child_arguments, &cache_root); + } let mut command = Command::new(opts.executable); command @@ -392,8 +571,25 @@ fn main() -> Result<(), ProcessWrapperError> { } } - if let Some(path) = temp_dir_guard.take() { - let _ = fs::remove_dir_all(path); + // When a pipelining-full action fails outside a worker (the warning above + // was already printed), repeat the fix suggestion next to the error output. + if code != 0 + && opts.pipelining_mode == Some(SubprocessPipeliningMode::Full) + && opts + .pipelining_rlib_path + .as_ref() + .is_some_and(|p| !std::path::Path::new(p).exists()) + { + eprintln!(concat!( + "\nERROR: [rules_rust] Redundant rustc invocation failed (see warning above).\n", + "If the error is E0460 (SVH mismatch), set:\n", + " --@rules_rust//rust/settings:experimental_worker_pipelining=false\n", + )); + } + + if let Some(path) = temp_file_guard.take() { + // Consolidated dependency dir: remove the whole directory tree. + let _ = fs::remove_dir_all(&path); } exit(code) @@ -461,10 +657,7 @@ mod test { "'+zaamo' is not a recognized feature for this target (ignoring feature)", " WARN rustc_errors::emitter Invalid span...", ] { - let LineOutput::Message(msg) = process_line( - text.to_string(), - ErrorFormat::Json, - )? + let LineOutput::Message(msg) = process_line(text.to_string(), ErrorFormat::Json)? else { return Err("Expected a LineOutput::Message".to_string()); }; @@ -522,4 +715,87 @@ mod test { )); Ok(()) } + + #[test] + #[cfg(unix)] + fn test_seed_cache_root_for_current_dir() -> Result<(), String> { + let tmp = std::env::temp_dir().join("pw_test_seed_cache_root_for_current_dir"); + let sandbox_dir = tmp.join("sandbox"); + let cache_repo = tmp.join("cache/repos/v1/contents/hash/repo"); + fs::create_dir_all(&sandbox_dir).map_err(|e| e.to_string())?; + fs::create_dir_all(cache_repo.join("tool/src")).map_err(|e| e.to_string())?; + symlink_dir(&cache_repo, &sandbox_dir.join("external_repo")).map_err(|e| e.to_string())?; + + let old_cwd = std::env::current_dir().map_err(|e| e.to_string())?; + std::env::set_current_dir(&sandbox_dir).map_err(|e| e.to_string())?; + let result = seed_cache_root_for_current_dir().map_err(|e| e.to_string()); + let restore = std::env::set_current_dir(old_cwd).map_err(|e| e.to_string()); + let seeded_target = sandbox_dir + .join("cache") + .canonicalize() + .map_err(|e| e.to_string()); + + let _ = fs::remove_dir_all(&tmp); + + result?; + restore?; + assert_eq!(seeded_target?, tmp.join("cache")); + Ok(()) + } + + #[test] + #[cfg(unix)] + fn test_seed_cache_root_from_execroot_ancestor() -> Result<(), String> { + let tmp = std::env::temp_dir().join("pw_test_seed_cache_root_from_execroot_ancestor"); + let cwd = tmp.join("output-base/execroot/_main"); + fs::create_dir_all(tmp.join("output-base/cache/repos")).map_err(|e| e.to_string())?; + fs::create_dir_all(&cwd).map_err(|e| e.to_string())?; + + let old_cwd = std::env::current_dir().map_err(|e| e.to_string())?; + std::env::set_current_dir(&cwd).map_err(|e| e.to_string())?; + let result = seed_cache_root_for_current_dir().map_err(|e| e.to_string()); + let restore = std::env::set_current_dir(old_cwd).map_err(|e| e.to_string()); + let seeded_target = cwd.join("cache").canonicalize().map_err(|e| e.to_string()); + + let _ = fs::remove_dir_all(&tmp); + + result?; + restore?; + assert_eq!(seeded_target?, tmp.join("output-base/cache")); + Ok(()) + } + + #[test] + #[cfg(unix)] + fn test_ensure_cache_loopback_from_args() -> Result<(), String> { + let tmp = std::env::temp_dir().join("pw_test_ensure_cache_loopback_from_args"); + let cwd = tmp.join("output-base/execroot/_main"); + let cache_root = tmp.join("output-base/cache"); + let source = cache_root.join("repos/v1/contents/hash/repo/.tmp_git_root/tool/src/lib.rs"); + fs::create_dir_all(source.parent().unwrap()).map_err(|e| e.to_string())?; + fs::create_dir_all(&cwd).map_err(|e| e.to_string())?; + fs::write(&source, "").map_err(|e| e.to_string())?; + symlink_dir( + &cache_root.join("repos/v1/contents/hash/repo"), + &cwd.join("external_repo"), + ) + .map_err(|e| e.to_string())?; + + let loopback = ensure_cache_loopback_from_args( + &cwd, + &[String::from("external_repo/.tmp_git_root/tool/src/lib.rs")], + &cache_root, + ) + .map_err(|e| e.to_string())?; + let loopback_target = cache_root + .join("repos/v1/cache") + .canonicalize() + .map_err(|e| e.to_string())?; + + let _ = fs::remove_dir_all(&tmp); + + assert_eq!(loopback, Some(cache_root.join("repos/v1/cache"))); + assert_eq!(loopback_target, cache_root); + Ok(()) + } } diff --git a/util/process_wrapper/options.rs b/util/process_wrapper/options.rs index 6dbc898a11..ca09673875 100644 --- a/util/process_wrapper/options.rs +++ b/util/process_wrapper/options.rs @@ -24,6 +24,12 @@ impl fmt::Display for OptionError { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum SubprocessPipeliningMode { + Metadata, + Full, +} + #[derive(Debug)] pub(crate) struct Options { // Contains the path to the child executable @@ -46,6 +52,15 @@ pub(crate) struct Options { pub(crate) output_file: Option, // This controls the output format of rustc messages. pub(crate) rustc_output_format: Option, + // Worker pipelining mode detected from @paramfile flags. + // Set when --pipelining-metadata or --pipelining-full is found. + // None when running outside of worker pipelining. + pub(crate) pipelining_mode: Option, + // The expected .rlib output path, passed via --pipelining-rlib-path= + // in the @paramfile. Used by the local-mode no-op optimization: if this + // file already exists (produced as a side-effect by the metadata action's + // rustc invocation), the full action can skip running rustc entirely. + pub(crate) pipelining_rlib_path: Option, } pub(crate) fn options() -> Result { @@ -141,12 +156,6 @@ pub(crate) fn options() -> Result { Ok((key.to_owned(), v)) }) .collect::, OptionError>>()?; - let stable_stamp_mappings = - stable_status_file_raw.map_or_else(Vec::new, |s| read_stamp_status_to_array(s).unwrap()); - let volatile_stamp_mappings = - volatile_status_file_raw.map_or_else(Vec::new, |s| read_stamp_status_to_array(s).unwrap()); - let environment_file_block = env_from_files(env_file_raw.unwrap_or_default())?; - let mut file_arguments = args_from_file(arg_file_raw.unwrap_or_default())?; // Process --copy-output let copy_output = copy_output_raw .map(|co| { @@ -167,6 +176,51 @@ pub(crate) fn options() -> Result { }) .transpose()?; + let require_explicit_unstable_features = + require_explicit_unstable_features.is_some_and(|s| s == "true"); + + // Expand @paramfiles and collect any relocated PW flags found inside them. + // This must happen before environment_block() so that relocated --env-file + // and --stable/volatile-status-file values are incorporated. + let mut file_arguments = args_from_file(arg_file_raw.unwrap_or_default())?; + child_args.append(&mut file_arguments); + let (child_args, relocated) = prepare_args( + child_args, + &subst_mappings, + require_explicit_unstable_features, + None, + None, + )?; + + // Merge relocated env-files from @paramfile with those from startup args. + let mut env_files = env_file_raw.unwrap_or_default(); + env_files.extend(relocated.env_files); + let environment_file_block = env_from_files(env_files)?; + + // Merge relocated arg-files: append their contents to child_args, + // applying ${pwd} and other substitutions to each line (matching the + // worker path which calls apply_substs on every arg-file line). + let mut child_args = child_args; + if !relocated.arg_files.is_empty() { + for arg in args_from_file(relocated.arg_files)? { + let mut arg = arg; + crate::util::apply_substitutions(&mut arg, &subst_mappings); + child_args.push(arg); + } + } + + // Merge relocated stamp files with startup stamp files. + let stable_status_file = relocated.stable_status_file.or(stable_status_file_raw); + let volatile_status_file = relocated.volatile_status_file.or(volatile_status_file_raw); + let stable_stamp_mappings = + stable_status_file.map_or_else(Vec::new, |s| read_stamp_status_to_array(s).unwrap()); + let volatile_stamp_mappings = + volatile_status_file.map_or_else(Vec::new, |s| read_stamp_status_to_array(s).unwrap()); + + // Override output_file and rustc_output_format if relocated versions found. + let output_file = relocated.output_file.or(output_file); + let rustc_output_format_raw = relocated.rustc_output_format.or(rustc_output_format_raw); + let rustc_output_format = rustc_output_format_raw .map(|v| match v.as_str() { "json" => Ok(rustc::ErrorFormat::Json), @@ -186,18 +240,6 @@ pub(crate) fn options() -> Result { &subst_mappings, ); - let require_explicit_unstable_features = - require_explicit_unstable_features.is_some_and(|s| s == "true"); - - // Append all the arguments fetched from files to those provided via command line. - child_args.append(&mut file_arguments); - let child_args = prepare_args( - child_args, - &subst_mappings, - require_explicit_unstable_features, - None, - None, - )?; // Split the executable path from the rest of the arguments. let (exec_path, args) = child_args.split_first().ok_or_else(|| { OptionError::Generic( @@ -215,6 +257,8 @@ pub(crate) fn options() -> Result { stderr_file, output_file, rustc_output_format, + pipelining_mode: relocated.pipelining_mode, + pipelining_rlib_path: relocated.pipelining_rlib_path, }) } @@ -250,45 +294,177 @@ fn is_allow_features_flag(arg: &str) -> bool { arg.starts_with("-Zallow-features=") || arg.starts_with("allow-features=") } -fn prepare_arg(mut arg: String, subst_mappings: &[(String, String)]) -> String { - for (f, replace_with) in subst_mappings { - let from = format!("${{{f}}}"); - arg = arg.replace(&from, replace_with); +/// Returns true for worker-pipelining protocol flags that should never be +/// forwarded to rustc. These flags live in the @paramfile (rustc_flags) so +/// both RustcMetadata and Rustc actions share identical startup args (same +/// worker key). They must be stripped before the args reach rustc. +pub(crate) fn is_pipelining_flag(arg: &str) -> bool { + arg == "--pipelining-metadata" + || arg == "--pipelining-full" + || arg.starts_with("--pipelining-key=") + || arg.starts_with("--pipelining-rlib-path=") +} + +/// Returns true if `arg` is a process_wrapper flag that may appear in the +/// @paramfile when worker pipelining is active. These flags are placed in +/// the paramfile (per-request args) instead of startup args so that all +/// worker actions share the same WorkerKey. They must be stripped before the +/// expanded paramfile reaches rustc. +/// +/// Unlike pipelining flags (which are standalone), these flags consume the +/// *next* argument as their value, so the caller must skip it too. +pub(crate) fn is_relocated_pw_flag(arg: &str) -> bool { + arg == "--output-file" + || arg == "--rustc-output-format" + || arg == "--env-file" + || arg == "--arg-file" + || arg == "--stable-status-file" + || arg == "--volatile-status-file" +} + +#[derive(Default, Debug)] +pub(crate) struct RelocatedPwFlags { + pub(crate) env_files: Vec, + pub(crate) arg_files: Vec, + pub(crate) output_file: Option, + pub(crate) rustc_output_format: Option, + pub(crate) stable_status_file: Option, + pub(crate) volatile_status_file: Option, + pub(crate) pipelining_mode: Option, + pub(crate) pipelining_rlib_path: Option, +} + +/// On Windows, resolve `.rs` source file paths that pass through junctions +/// containing relative symlinks. Windows cannot resolve chained reparse +/// points (junction -> relative symlink -> symlink) in a single traversal, +/// causing rustc to fail with ERROR_PATH_NOT_FOUND. +/// +/// Only resolves paths ending in `.rs` to avoid changing crate identity +/// for `--extern` and `-L` paths (which would cause crate version mismatches). +#[cfg(windows)] +pub(crate) fn resolve_external_path(arg: &str) -> std::borrow::Cow<'_, str> { + use std::borrow::Cow; + use std::path::Path; + if !arg.ends_with(".rs") { + return Cow::Borrowed(arg); + } + if !arg.starts_with("external/") && !arg.starts_with("external\\") { + return Cow::Borrowed(arg); } - arg + let path = Path::new(arg); + let mut components = path.components(); + let Some(_external) = components.next() else { + return Cow::Borrowed(arg); + }; + let Some(repo_name) = components.next() else { + return Cow::Borrowed(arg); + }; + let junction = Path::new("external").join(repo_name); + let Ok(resolved) = std::fs::read_link(&junction) else { + return Cow::Borrowed(arg); + }; + let remainder: std::path::PathBuf = components.collect(); + if remainder.as_os_str().is_empty() { + return Cow::Borrowed(arg); + } + Cow::Owned(resolved.join(remainder).to_string_lossy().into_owned()) } -/// Apply substitutions to the given param file. Returns true iff any allow-features flags were found. +/// No-op on non-Windows: returns the argument unchanged without allocating. +#[cfg(not(windows))] +#[inline] +pub(crate) fn resolve_external_path(arg: &str) -> std::borrow::Cow<'_, str> { + std::borrow::Cow::Borrowed(arg) +} + +/// Apply substitutions to the given param file. +/// Returns `(has_allow_features, relocated_pw_flags)`. +/// Relocated PW flags (--env-file, --output-file, etc.) are collected into +/// `RelocatedPwFlags` so the caller can apply them, rather than being silently +/// discarded. fn prepare_param_file( filename: &str, subst_mappings: &[(String, String)], read_file: &mut impl FnMut(&str) -> Result, OptionError>, write_to_file: &mut impl FnMut(&str) -> Result<(), OptionError>, -) -> Result { +) -> Result<(bool, RelocatedPwFlags), OptionError> { fn process_file( filename: &str, subst_mappings: &[(String, String)], read_file: &mut impl FnMut(&str) -> Result, OptionError>, write_to_file: &mut impl FnMut(&str) -> Result<(), OptionError>, + relocated: &mut RelocatedPwFlags, ) -> Result { let mut has_allow_features_flag = false; + // When set, the next arg is the value of this relocated pw flag. + let mut pending_flag: Option = None; for arg in read_file(filename)? { - let arg = prepare_arg(arg, subst_mappings); + if let Some(flag) = pending_flag.take() { + let mut value = arg; + crate::util::apply_substitutions(&mut value, subst_mappings); + match flag.as_str() { + "--env-file" => relocated.env_files.push(value), + "--arg-file" => relocated.arg_files.push(value), + "--output-file" => relocated.output_file = Some(value), + "--rustc-output-format" => relocated.rustc_output_format = Some(value), + "--stable-status-file" => relocated.stable_status_file = Some(value), + "--volatile-status-file" => relocated.volatile_status_file = Some(value), + _ => {} + } + continue; + } + let mut arg = arg; + crate::util::apply_substitutions(&mut arg, subst_mappings); + // Strip worker-pipelining protocol flags; they must not reach rustc. + // Collect mode and rlib-path so the local-mode no-op optimization + // can detect when the full action's .rlib already exists. + if is_pipelining_flag(&arg) { + if arg == "--pipelining-metadata" { + relocated.pipelining_mode = Some(SubprocessPipeliningMode::Metadata); + } else if arg == "--pipelining-full" { + relocated.pipelining_mode = Some(SubprocessPipeliningMode::Full); + } else if let Some(path) = arg.strip_prefix("--pipelining-rlib-path=") { + relocated.pipelining_rlib_path = Some(path.to_string()); + } + continue; + } + // Collect relocated process_wrapper flags (--output-file, etc.) that + // were placed in the paramfile for worker key stability. These are + // two-part flags: the flag name on one line, its value on the next. + if is_relocated_pw_flag(&arg) { + pending_flag = Some(arg); + continue; + } has_allow_features_flag |= is_allow_features_flag(&arg); if let Some(arg_file) = arg.strip_prefix('@') { - has_allow_features_flag |= - process_file(arg_file, subst_mappings, read_file, write_to_file)?; + has_allow_features_flag |= process_file( + arg_file, + subst_mappings, + read_file, + write_to_file, + relocated, + )?; } else { write_to_file(&arg)?; } } Ok(has_allow_features_flag) } - let has_allow_features_flag = process_file(filename, subst_mappings, read_file, write_to_file)?; - Ok(has_allow_features_flag) + let mut relocated = RelocatedPwFlags::default(); + let has_allow_features_flag = process_file( + filename, + subst_mappings, + read_file, + write_to_file, + &mut relocated, + )?; + Ok((has_allow_features_flag, relocated)) } /// Apply substitutions to the provided arguments, recursing into param files. +/// Returns `(processed_args, relocated_pw_flags)` — any process_wrapper flags +/// found inside `@paramfile`s are collected rather than discarded so the caller +/// can apply them. #[allow(clippy::type_complexity)] fn prepare_args( args: Vec, @@ -296,17 +472,41 @@ fn prepare_args( require_explicit_unstable_features: bool, read_file: Option<&mut dyn FnMut(&str) -> Result, OptionError>>, mut write_file: Option<&mut dyn FnMut(&str, &str) -> Result<(), OptionError>>, -) -> Result, OptionError> { +) -> Result<(Vec, RelocatedPwFlags), OptionError> { let mut allowed_features = false; let mut processed_args = Vec::::new(); + let mut relocated = RelocatedPwFlags::default(); let mut read_file_wrapper = |s: &str| read_file_to_array(s).map_err(OptionError::Generic); let mut read_file = read_file.unwrap_or(&mut read_file_wrapper); for arg in args.into_iter() { - let arg = prepare_arg(arg, subst_mappings); + let mut arg = arg; + crate::util::apply_substitutions(&mut arg, subst_mappings); if let Some(param_file) = arg.strip_prefix('@') { - let expanded_file = format!("{param_file}.expanded"); + // Write the expanded paramfile to a temp directory to avoid issues + // with sandbox filesystems where bazel-out symlinks may prevent the + // expanded file from being visible to the child process. + let expanded_file = match write_file { + Some(_) => format!("{param_file}.expanded"), + None => { + let basename = std::path::Path::new(param_file) + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or("params"); + format!( + "{}/pw_expanded_{}_{}", + std::env::temp_dir().display(), + std::process::id(), + basename, + ) + } + }; + + enum Writer<'f, F: FnMut(&str, &str) -> Result<(), OptionError>> { + Function(&'f mut F), + BufWriter(io::BufWriter), + } let format_err = |err: io::Error| { OptionError::Generic(format!( "{} writing path: {:?}, current directory: {:?}", @@ -315,11 +515,6 @@ fn prepare_args( std::env::current_dir() )) }; - - enum Writer<'f, F: FnMut(&str, &str) -> Result<(), OptionError>> { - Function(&'f mut F), - BufWriter(io::BufWriter), - } let mut out = match write_file { Some(ref mut f) => Writer::Function(f), None => Writer::BufWriter(io::BufWriter::new( @@ -327,31 +522,57 @@ fn prepare_args( )), }; let mut write_to_file = |s: &str| -> Result<(), OptionError> { + let s = resolve_external_path(s); match out { - Writer::Function(ref mut f) => f(&expanded_file, s), + Writer::Function(ref mut f) => f(&expanded_file, &s), Writer::BufWriter(ref mut bw) => writeln!(bw, "{s}").map_err(format_err), } }; // Note that substitutions may also apply to the param file path! - let (file, allowed) = prepare_param_file( + let (file, (allowed, pf_relocated)) = prepare_param_file( param_file, subst_mappings, &mut read_file, &mut write_to_file, ) - .map(|af| (format!("@{expanded_file}"), af))?; + .map(|(af, rel)| (format!("@{expanded_file}"), (af, rel)))?; allowed_features |= allowed; + // Merge relocated flags from this paramfile. + relocated.env_files.extend(pf_relocated.env_files); + relocated.arg_files.extend(pf_relocated.arg_files); + if pf_relocated.output_file.is_some() { + relocated.output_file = pf_relocated.output_file; + } + if pf_relocated.rustc_output_format.is_some() { + relocated.rustc_output_format = pf_relocated.rustc_output_format; + } + if pf_relocated.stable_status_file.is_some() { + relocated.stable_status_file = pf_relocated.stable_status_file; + } + if pf_relocated.volatile_status_file.is_some() { + relocated.volatile_status_file = pf_relocated.volatile_status_file; + } + if pf_relocated.pipelining_mode.is_some() { + relocated.pipelining_mode = pf_relocated.pipelining_mode; + } + if pf_relocated.pipelining_rlib_path.is_some() { + relocated.pipelining_rlib_path = pf_relocated.pipelining_rlib_path; + } processed_args.push(file); } else { allowed_features |= is_allow_features_flag(&arg); - processed_args.push(arg); + let resolved = resolve_external_path(&arg); + processed_args.push(match resolved { + std::borrow::Cow::Borrowed(_) => arg, + std::borrow::Cow::Owned(s) => s, + }); } } if !allowed_features && require_explicit_unstable_features { processed_args.push("-Zallow-features=".to_string()); } - Ok(processed_args) + Ok((processed_args, relocated)) } fn environment_block( @@ -374,12 +595,8 @@ fn environment_block( *value = new; } } - for (f, replace_with) in subst_mappings { - for value in environment_variables.values_mut() { - let from = format!("${{{f}}}"); - let new = value.replace(from.as_str(), replace_with); - *value = new; - } + for value in environment_variables.values_mut() { + crate::util::apply_substitutions(value, subst_mappings); } environment_variables } @@ -392,7 +609,7 @@ mod test { fn test_enforce_allow_features_flag_user_didnt_say() { let args = vec!["rustc".to_string()]; let subst_mappings: Vec<(String, String)> = vec![]; - let args = prepare_args(args, &subst_mappings, true, None, None).unwrap(); + let (args, _) = prepare_args(args, &subst_mappings, true, None, None).unwrap(); assert_eq!( args, vec!["rustc".to_string(), "-Zallow-features=".to_string(),] @@ -406,7 +623,7 @@ mod test { "-Zallow-features=whitespace_instead_of_curly_braces".to_string(), ]; let subst_mappings: Vec<(String, String)> = vec![]; - let args = prepare_args(args, &subst_mappings, true, None, None).unwrap(); + let (args, _) = prepare_args(args, &subst_mappings, true, None, None).unwrap(); assert_eq!( args, vec![ @@ -443,16 +660,17 @@ mod test { let args = vec!["rustc".to_string(), "@rustc_params".to_string()]; let subst_mappings: Vec<(String, String)> = vec![]; - let args = prepare_args( + let (args, _) = prepare_args( args, &subst_mappings, true, Some(&mut read_file), Some(&mut write_file), - ); + ) + .unwrap(); assert_eq!( - args.unwrap(), + args, vec!["rustc".to_string(), "@rustc_params.expanded".to_string(),] ); @@ -464,4 +682,84 @@ mod test { )]) ); } + + #[test] + fn test_prepare_param_file_strips_and_collects_relocated_pw_flags() { + let mut written = String::new(); + let mut read_file = |_filename: &str| -> Result, OptionError> { + Ok(vec![ + "--output-file".to_string(), + "bazel-out/foo/libbar.rmeta".to_string(), + "--env-file".to_string(), + "bazel-out/foo/build_script.env".to_string(), + "src/lib.rs".to_string(), + "--crate-name=foo".to_string(), + "--arg-file".to_string(), + "bazel-out/foo/build_script.linksearchpaths".to_string(), + "--rustc-output-format".to_string(), + "rendered".to_string(), + "--stable-status-file".to_string(), + "bazel-out/stable-status.txt".to_string(), + "--volatile-status-file".to_string(), + "bazel-out/volatile-status.txt".to_string(), + "--crate-type=rlib".to_string(), + ]) + }; + let mut write_to_file = |s: &str| -> Result<(), OptionError> { + if !written.is_empty() { + written.push('\n'); + } + written.push_str(s); + Ok(()) + }; + + let (_, relocated) = + prepare_param_file("test.params", &[], &mut read_file, &mut write_to_file).unwrap(); + + // All relocated pw flags + values should be stripped from output. + // Only the rustc flags should remain. + assert_eq!(written, "src/lib.rs\n--crate-name=foo\n--crate-type=rlib"); + + // Verify collected relocated flags. + assert_eq!( + relocated.output_file.as_deref(), + Some("bazel-out/foo/libbar.rmeta") + ); + assert_eq!(relocated.env_files, vec!["bazel-out/foo/build_script.env"]); + assert_eq!( + relocated.arg_files, + vec!["bazel-out/foo/build_script.linksearchpaths"] + ); + assert_eq!(relocated.rustc_output_format.as_deref(), Some("rendered")); + assert_eq!( + relocated.stable_status_file.as_deref(), + Some("bazel-out/stable-status.txt") + ); + assert_eq!( + relocated.volatile_status_file.as_deref(), + Some("bazel-out/volatile-status.txt") + ); + } + + #[test] + fn resolve_external_path_non_rs_unchanged() { + let arg = "external/some_repo/src/lib.txt"; + let result = resolve_external_path(arg); + assert_eq!(&*result, arg); + } + + #[test] + fn resolve_external_path_non_external_unchanged() { + let arg = "src/main.rs"; + let result = resolve_external_path(arg); + assert_eq!(&*result, arg); + } + + #[test] + fn resolve_external_path_no_junction_unchanged() { + // When the junction doesn't exist (read_link fails), returns unchanged. + let arg = "external/nonexistent_repo_12345/src/lib.rs"; + let result = resolve_external_path(arg); + assert_eq!(&*result, arg); + } } diff --git a/util/process_wrapper/private/bootstrap_process_wrapper.cc b/util/process_wrapper/private/bootstrap_process_wrapper.cc index e6e58513e3..6b84083fb1 100644 --- a/util/process_wrapper/private/bootstrap_process_wrapper.cc +++ b/util/process_wrapper/private/bootstrap_process_wrapper.cc @@ -1,6 +1,6 @@ #include -#include #include +#include #include #include #include @@ -19,72 +19,72 @@ constexpr const char* kPwdPlaceholder = "${pwd}"; std::string replace_pwd_placeholder(const std::string& arg, const std::string& pwd) { - std::string out = arg; - std::string::size_type pos = 0; - while ((pos = out.find(kPwdPlaceholder, pos)) != std::string::npos) { - out.replace(pos, std::strlen(kPwdPlaceholder), pwd); - pos += pwd.size(); - } - return out; + std::string out = arg; + std::string::size_type pos = 0; + while ((pos = out.find(kPwdPlaceholder, pos)) != std::string::npos) { + out.replace(pos, std::strlen(kPwdPlaceholder), pwd); + pos += pwd.size(); + } + return out; } std::vector build_exec_argv(const std::vector& args) { - std::vector exec_argv; - exec_argv.reserve(args.size() + 1); - for (const std::string& arg : args) { - exec_argv.push_back(const_cast(arg.c_str())); - } - exec_argv.push_back(nullptr); - return exec_argv; + std::vector exec_argv; + exec_argv.reserve(args.size() + 1); + for (const std::string& arg : args) { + exec_argv.push_back(const_cast(arg.c_str())); + } + exec_argv.push_back(nullptr); + return exec_argv; } } // namespace int main(int argc, char** argv) { - int first_arg_index = 1; - if (argc > 1 && std::strcmp(argv[1], "--") == 0) { - first_arg_index = 2; - } + int first_arg_index = 1; + if (argc > 1 && std::strcmp(argv[1], "--") == 0) { + first_arg_index = 2; + } - if (first_arg_index >= argc) { - std::fprintf(stderr, "bootstrap_process_wrapper: missing command\n"); - return 1; - } + if (first_arg_index >= argc) { + std::fprintf(stderr, "bootstrap_process_wrapper: missing command\n"); + return 1; + } - char* pwd_raw = getcwd(nullptr, 0); - if (pwd_raw == nullptr) { - std::perror("bootstrap_process_wrapper: getcwd"); - return 1; - } - std::string pwd = pwd_raw; - std::free(pwd_raw); + char* pwd_raw = getcwd(nullptr, 0); + if (pwd_raw == nullptr) { + std::perror("bootstrap_process_wrapper: getcwd"); + return 1; + } + std::string pwd = pwd_raw; + std::free(pwd_raw); - std::vector command_args; - command_args.reserve(static_cast(argc - first_arg_index)); - for (int i = first_arg_index; i < argc; ++i) { - command_args.push_back(replace_pwd_placeholder(argv[i], pwd)); - } + std::vector command_args; + command_args.reserve(static_cast(argc - first_arg_index)); + for (int i = first_arg_index; i < argc; ++i) { + command_args.push_back(replace_pwd_placeholder(argv[i], pwd)); + } #if defined(_WIN32) - for (char& c : command_args[0]) { - if (c == '/') { - c = '\\'; + for (char& c : command_args[0]) { + if (c == '/') { + c = '\\'; + } } - } #endif - std::vector exec_argv = build_exec_argv(command_args); + std::vector exec_argv = build_exec_argv(command_args); #if defined(_WIN32) - int exit_code = _spawnvp(_P_WAIT, exec_argv[0], exec_argv.data()); - if (exit_code == -1) { - std::perror("bootstrap_process_wrapper: _spawnvp"); - return 1; - } - return exit_code; + int exit_code = _spawnvp(_P_WAIT, exec_argv[0], exec_argv.data()); + if (exit_code == -1) { + std::perror("bootstrap_process_wrapper: _spawnvp"); + return 1; + } + return exit_code; #else - execvp(exec_argv[0], exec_argv.data()); - std::perror("bootstrap_process_wrapper: execvp"); - return 1; + execvp(exec_argv[0], exec_argv.data()); + std::perror("bootstrap_process_wrapper: execvp"); + return 1; #endif } diff --git a/util/process_wrapper/util.rs b/util/process_wrapper/util.rs index 7bab19ef79..e97094a812 100644 --- a/util/process_wrapper/util.rs +++ b/util/process_wrapper/util.rs @@ -78,6 +78,92 @@ fn stamp_status_to_array(reader: impl Read) -> Result, Str .collect() } +/// Consolidates files from multiple `-Ldependency` directories into a single +/// `unified_dir` using hard links (with copy fallback). This works around +/// Windows rustc's ~32K search-path buffer limit that causes E0463 when many +/// transitive `-Ldependency` entries are present. +/// +/// Returns the number of files linked/copied. Skips directories that don't +/// exist (e.g. action-cache artifacts not yet materialized) and duplicate +/// filenames (first occurrence wins, case-insensitive on Windows). +#[cfg(windows)] +pub(crate) fn consolidate_deps_into( + dependency_dirs: &[impl AsRef], + unified_dir: &std::path::Path, +) -> usize { + use std::collections::HashSet; + + let mut seen = HashSet::new(); + let mut count = 0usize; + for dir in dependency_dirs { + let entries = match std::fs::read_dir(dir.as_ref()) { + Ok(e) => e, + Err(e) => { + eprintln!( + "consolidate_deps: skipping {}: {}", + dir.as_ref().display(), + e + ); + continue; + } + }; + for entry in entries.flatten() { + let file_type = match entry.file_type() { + Ok(ft) => ft, + Err(_) => continue, + }; + if !(file_type.is_file() || file_type.is_symlink()) { + continue; + } + let file_name = entry.file_name(); + let file_name_lower = file_name.to_string_lossy().to_ascii_lowercase(); + if !seen.insert(file_name_lower) { + continue; + } + let dest = unified_dir.join(&file_name); + let src = entry.path(); + match std::fs::hard_link(&src, &dest) { + Ok(_) => {} + Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => {} + Err(e) => { + eprintln!( + "consolidate_deps: hard_link {} -> {} failed ({}), falling back to copy", + src.display(), + dest.display(), + e + ); + if let Err(copy_err) = std::fs::copy(&src, &dest) { + eprintln!( + "consolidate_deps: copy {} -> {} also failed: {}", + src.display(), + dest.display(), + copy_err + ); + } + } + } + count += 1; + } + } + count +} + +/// Applies `${key}` → `value` substitution mappings to `s`. +/// +/// On Windows, `std::fs::canonicalize` produces `\\?\` verbatim paths where +/// forward slashes are literal, not separators. After substituting a value +/// that contains `\\?\`, any remaining `/` in the result would break path +/// resolution. This step can be omitted on other platforms. +pub(crate) fn apply_substitutions(s: &mut String, subst: &[(String, String)]) { + for (k, v) in subst { + *s = s.replace(&format!("${{{k}}}"), v); + } + #[cfg(windows)] + if s.contains(r"\\?\") { + *s = s.replace('/', r"\"); + } +} + #[cfg(test)] mod test { use super::*; diff --git a/util/process_wrapper/worker.rs b/util/process_wrapper/worker.rs new file mode 100644 index 0000000000..f1858eb9cf --- /dev/null +++ b/util/process_wrapper/worker.rs @@ -0,0 +1,1737 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Bazel JSON persistent worker protocol implementation. +//! +//! When Bazel invokes process_wrapper with `--persistent_worker`, this module +//! takes over. It reads newline-delimited JSON WorkRequest messages from stdin, +//! executes each request by spawning process_wrapper itself with the request's +//! arguments, and writes a JSON WorkResponse to stdout. +//! +//! The worker supports both singleplex (requestId == 0) and multiplex +//! (requestId > 0) modes. Multiplex requests are dispatched to separate threads, +//! allowing concurrent processing. This enables worker-managed pipelined +//! compilation where a metadata action and a full compile action for the same +//! crate can share state through the `PipelineState` map. +//! +//! The worker supports both sandboxed (multiplex sandboxing) and unsandboxed +//! modes. In unsandboxed mode it runs directly in Bazel's execroot; in +//! sandboxed mode each request receives a per-request sandbox directory. +//! Incremental compilation caches see stable source file paths between +//! requests, avoiding the ICE that occurs when sandbox paths change between +//! builds. +//! +//! Protocol reference: https://bazel.build/remote/persistent + +#[path = "worker_pipeline.rs"] +mod pipeline; +#[path = "worker_protocol.rs"] +mod protocol; +#[path = "worker_sandbox.rs"] +mod sandbox; + +use std::io::{self, BufRead, Write}; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex}; +use std::thread; +use std::time::{Duration, Instant}; + +use crate::ProcessWrapperError; + +// Imports used by worker_main +use pipeline::{ + detect_pipelining_mode, handle_pipelining_full, handle_pipelining_metadata, + kill_pipelined_request, relocate_pw_flags, PipelineState, PipeliningMode, WorkerStateRoots, +}; +use protocol::{ + build_cancel_response, build_response, build_shutdown_response, extract_request_id, + extract_request_id_from_raw_line, WorkRequestContext, +}; +use sandbox::{prepare_outputs, prepare_outputs_sandboxed, run_request, run_sandboxed_request}; + +// --------------------------------------------------------------------------- +// Worker lifecycle and signal handling +// --------------------------------------------------------------------------- + +/// Locks a mutex, recovering from poisoning instead of panicking. +/// +/// If a worker thread panics while holding a mutex, the mutex becomes +/// "poisoned". Rather than cascading the panic to all other threads, +/// we recover the inner value — the data is still valid because +/// `catch_unwind` prevents partial updates from escaping. +fn lock_or_recover(mutex: &Mutex) -> std::sync::MutexGuard<'_, T> { + mutex + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()) +} + +fn current_pid() -> u32 { + std::process::id() +} + +fn current_thread_label() -> String { + format!("{:?}", thread::current().id()) +} + +static WORKER_SHUTTING_DOWN: AtomicBool = AtomicBool::new(false); + +#[cfg(unix)] +const SIG_TERM: i32 = 15; + +#[cfg(unix)] +unsafe extern "C" { + fn signal(signum: i32, handler: usize) -> usize; + fn close(fd: i32) -> i32; + fn write(fd: i32, buf: *const std::ffi::c_void, count: usize) -> isize; +} + +fn append_worker_lifecycle_log(message: &str) { + let root = std::path::Path::new("_pw_state"); + let _ = std::fs::create_dir_all(root); + let path = root.join("worker_lifecycle.log"); + let mut file = match std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(path) + { + Ok(file) => file, + Err(_) => return, + }; + let _ = writeln!(file, "{message}"); +} + +fn worker_is_shutting_down() -> bool { + WORKER_SHUTTING_DOWN.load(Ordering::SeqCst) +} + +fn begin_worker_shutdown(reason: &str) { + if WORKER_SHUTTING_DOWN + .compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst) + .is_ok() + { + append_worker_lifecycle_log(&format!( + "pid={} event=shutdown_begin thread={} reason={}", + current_pid(), + current_thread_label(), + reason, + )); + } +} + +#[cfg(unix)] +extern "C" fn worker_signal_handler(_signum: i32) { + WORKER_SHUTTING_DOWN.store(true, Ordering::SeqCst); + unsafe { + close(0); + } // close stdin to unblock main loop +} + +#[cfg(unix)] +fn install_worker_signal_handlers() { + static ONCE: std::sync::Once = std::sync::Once::new(); + ONCE.call_once(|| unsafe { + signal(SIG_TERM, worker_signal_handler as *const () as usize); + }); +} + +#[cfg(not(unix))] +fn install_worker_signal_handlers() {} + +struct WorkerLifecycleGuard { + pid: u32, + start: Instant, + request_counter: Arc, +} + +impl WorkerLifecycleGuard { + fn new(argv: &[String], request_counter: &Arc) -> Self { + let pid = current_pid(); + let cwd = std::env::current_dir() + .map(|cwd| cwd.display().to_string()) + .unwrap_or_else(|_| "".to_string()); + append_worker_lifecycle_log(&format!( + "pid={} event=start thread={} cwd={} argv_len={}", + pid, + current_thread_label(), + cwd, + argv.len(), + )); + Self { + pid, + start: Instant::now(), + request_counter: Arc::clone(request_counter), + } + } +} + +impl Drop for WorkerLifecycleGuard { + fn drop(&mut self) { + let uptime = self.start.elapsed(); + let requests = self.request_counter.load(Ordering::SeqCst); + append_worker_lifecycle_log(&format!( + "pid={} event=exit uptime_ms={} requests_seen={}", + self.pid, + uptime.as_millis(), + requests, + )); + // Structured summary line for easy extraction by benchmark tooling. + append_worker_lifecycle_log(&format!( + "worker_exit pid={} requests_handled={} uptime_s={:.1}", + self.pid, + requests, + uptime.as_secs_f64(), + )); + } +} + +fn install_worker_panic_hook() { + static ONCE: std::sync::Once = std::sync::Once::new(); + ONCE.call_once(|| { + std::panic::set_hook(Box::new(|info| { + append_worker_lifecycle_log(&format!( + "pid={} event=panic thread={} info={}", + current_pid(), + current_thread_label(), + info + )); + })); + }); +} + +// --------------------------------------------------------------------------- +// Helper functions used in worker_main +// --------------------------------------------------------------------------- + +fn crate_name_from_args(args: &[String]) -> Option<&str> { + args.iter() + .find_map(|arg| arg.strip_prefix("--crate-name=")) +} + +fn emit_arg_from_args(args: &[String]) -> Option<&str> { + args.iter().find_map(|arg| arg.strip_prefix("--emit=")) +} + +fn pipeline_key_from_args(args: &[String]) -> Option<&str> { + args.iter() + .find_map(|arg| arg.strip_prefix("--pipelining-key=")) +} + +fn write_worker_response( + stdout: &Arc>, + response: &str, +) -> Result<(), ProcessWrapperError> { + let _guard = lock_or_recover(stdout); + write_all_stdout_fd(response.as_bytes()) + .and_then(|_| write_all_stdout_fd(b"\n")) + .map_err(|e| ProcessWrapperError(format!("failed to write WorkResponse: {e}")))?; + Ok(()) +} + +#[cfg(unix)] +fn write_all_stdout_fd(mut bytes: &[u8]) -> io::Result<()> { + while !bytes.is_empty() { + let written = unsafe { write(1, bytes.as_ptr().cast(), bytes.len()) }; + if written < 0 { + let err = io::Error::last_os_error(); + if err.kind() == io::ErrorKind::Interrupted { + continue; + } + return Err(err); + } + let written = written as usize; + if written == 0 { + return Err(io::Error::new( + io::ErrorKind::WriteZero, + "short write to worker stdout", + )); + } + bytes = &bytes[written..]; + } + Ok(()) +} + +#[cfg(not(unix))] +fn write_all_stdout_fd(bytes: &[u8]) -> io::Result<()> { + let mut out = io::stdout().lock(); + out.write_all(bytes)?; + out.flush() +} + +// --------------------------------------------------------------------------- +// Main worker loop +// --------------------------------------------------------------------------- + +/// Entry point for persistent worker mode. +/// +/// Loops reading JSON WorkRequest messages from stdin until EOF. +/// - Singleplex requests (requestId == 0): processed inline on the main thread +/// (backward-compatible with Bazel's singleplex worker protocol). +/// - Multiplex requests (requestId > 0): dispatched to a new thread, allowing +/// concurrent processing and in-process state sharing for pipelined builds. +/// +/// Bazel starts the worker with: +/// `process_wrapper [startup_args] --persistent_worker` +/// where `startup_args` are the fixed parts of the action command line +/// (e.g. `--subst pwd=${pwd} -- /path/to/rustc`). +/// +/// Each WorkRequest.arguments contains the per-request part (the `@flagfile`). +/// The worker must combine startup_args + per-request args when spawning the +/// subprocess, so process_wrapper receives the full argument list it expects. +pub(crate) fn worker_main() -> Result<(), ProcessWrapperError> { + let request_counter = Arc::new(AtomicUsize::new(0)); + install_worker_panic_hook(); + let _lifecycle = + WorkerLifecycleGuard::new(&std::env::args().collect::>(), &request_counter); + install_worker_signal_handlers(); + + let self_path = std::env::current_exe() + .map_err(|e| ProcessWrapperError(format!("failed to get worker executable path: {e}")))?; + + // Collect the startup args that Bazel passed when spawning this worker + // process. These are the fixed action args (e.g. `--subst pwd=${pwd} -- + // /path/to/rustc`). We skip argv[0] (the binary path) and strip + // `--persistent_worker` since that flag is what triggered worker mode. + let startup_args: Vec = std::env::args() + .skip(1) + .filter(|a| a != "--persistent_worker") + .collect(); + + let stdin = io::stdin(); + // Serialize writes to fd 1 so multiplexed responses remain newline-delimited + // JSON records with no byte interleaving. + let stdout = Arc::new(Mutex::new(())); + + // Shared state for worker-managed pipelined compilation. + // The metadata action stores a running rustc Child here; the full compile + // action retrieves it and waits for completion. + let pipeline_state: Arc> = Arc::new(Mutex::new(PipelineState::new())); + let state_roots = Arc::new(WorkerStateRoots::ensure()?); + + // Track spawned worker threads so we can join them on shutdown. + let in_flight: Arc>>> = Arc::new(Mutex::new(Vec::new())); + + for line in stdin.lock().lines() { + let line = match line { + Ok(line) => line, + Err(e) => { + begin_worker_shutdown("stdin_read_error"); + append_worker_lifecycle_log(&format!( + "pid={} event=stdin_read_error thread={} error={}", + current_pid(), + current_thread_label(), + e + )); + return Err(ProcessWrapperError(format!( + "failed to read WorkRequest: {e}" + ))); + } + }; + if line.is_empty() { + continue; + } + if worker_is_shutting_down() { + append_worker_lifecycle_log(&format!( + "pid={} event=request_ignored_for_shutdown thread={} bytes={}", + current_pid(), + current_thread_label(), + line.len(), + )); + break; + } + request_counter.fetch_add(1, Ordering::SeqCst); + + let request: tinyjson::JsonValue = match line.parse::() { + Ok(request) => request, + Err(e) => { + // Try to extract requestId so we can send an error response + // rather than leaving Bazel hanging on the missing response. + if let Some(request_id) = extract_request_id_from_raw_line(&line) { + append_worker_lifecycle_log(&format!( + "pid={} thread={} request_parse_error request_id={} bytes={} error={}", + current_pid(), + current_thread_label(), + request_id, + line.len(), + e + )); + let response = + build_response(1, &format!("worker protocol parse error: {e}"), request_id); + let _ = write_worker_response(&stdout, &response); + } + continue; + } + }; + let request = match WorkRequestContext::from_json(&request) { + Ok(ctx) => ctx, + Err(e) => { + let request_id = extract_request_id(&request); + let response = build_response(1, &e, request_id); + let _ = write_worker_response(&stdout, &response); + continue; + } + }; + append_worker_lifecycle_log(&format!( + "pid={} thread={} request_received request_id={} cancel={} crate={} emit={} pipeline_key={}", + current_pid(), + current_thread_label(), + request.request_id, + request.cancel, + crate_name_from_args(&request.arguments).unwrap_or("-"), + emit_arg_from_args(&request.arguments).unwrap_or("-"), + pipeline_key_from_args(&request.arguments).unwrap_or("-"), + )); + + if worker_is_shutting_down() { + let response = build_shutdown_response(request.request_id); + let _ = write_worker_response(&stdout, &response); + continue; + } + + if request.request_id == 0 { + // Singleplex: process inline on the main thread (backward-compatible). + let mut full_args = startup_args.clone(); + full_args.extend(request.arguments.clone()); + relocate_pw_flags(&mut full_args); + + // Workers run in execroot without sandboxing. Bazel marks action outputs + // read-only after each successful action. Make them writable first. + prepare_outputs(&full_args); + + let (exit_code, output) = run_request(&self_path, full_args)?; + + let response = build_response(exit_code, &output, request.request_id); + write_worker_response(&stdout, &response)?; + append_worker_lifecycle_log(&format!( + "pid={} thread={} request_complete request_id={} exit_code={} output_bytes={} mode=singleplex", + current_pid(), + current_thread_label(), + request.request_id, + exit_code, + output.len(), + )); + } else { + let stdout = Arc::clone(&stdout); + + // Cancel request: Bazel no longer needs the result for this requestId. + // Respond with wasCancelled=true immediately if we haven't already responded. + // + // For pipelined requests, `kill_pipelined_request` kills the background + // rustc process to avoid wasting CPU. For non-pipelined requests (normal + // subprocess via `run_request`/`run_sandboxed_request`), the subprocess + // continues running — `Command::output()` provides no kill handle. The + // claim_flag prevents a duplicate response; the only cost is wasted CPU + // until the subprocess exits naturally. This is consistent with Bazel's + // best-effort cancellation semantics. + if request.cancel { + // Look up the flag for this in-flight request. + let flag = lock_or_recover(&pipeline_state) + .get_claim_flag(request.request_id); + if let Some(flag) = flag { + // Try to claim the response slot atomically. + if !flag.swap(true, Ordering::SeqCst) { + // We claimed it — kill any associated background rustc + // to avoid wasting CPU when the remote leg wins. + kill_pipelined_request(&pipeline_state, request.request_id); + let response = build_cancel_response(request.request_id); + let _ = write_worker_response(&stdout, &response); + } + // If swap returned true, the worker thread already sent the normal + // response before we could cancel — nothing more to do. + } + // If the flag is not found, the request already completed and cleaned up. + continue; + } + + // Register claim flag and pre-register pipelined requests in PipelineState + // before they become cancel-acknowledgeable. For pipelined requests, also + // creates the pipeline entry so the cancel handler can find them immediately. + let claim_flag = if let Some(key) = pipeline_key_from_args(&request.arguments) { + lock_or_recover(&pipeline_state).pre_register( + request.request_id, + key.to_string(), + ) + } else { + lock_or_recover(&pipeline_state).register_claim(request.request_id) + }; + + // Multiplex: dispatch to a new thread. Bazel bounds concurrency via + // --worker_max_multiplex_instances (default: 8), so no in-process + // thread pool is needed. + let self_path = self_path.clone(); + let startup_args = startup_args.clone(); + let pipeline_state = Arc::clone(&pipeline_state); + let state_roots = Arc::clone(&state_roots); + let request = request.clone(); + + let handle = std::thread::spawn(move || { + append_worker_lifecycle_log(&format!( + "pid={} thread={} request_thread_start request_id={} crate={} emit={} pipeline_key={}", + current_pid(), + current_thread_label(), + request.request_id, + crate_name_from_args(&request.arguments).unwrap_or("-"), + emit_arg_from_args(&request.arguments).unwrap_or("-"), + pipeline_key_from_args(&request.arguments).unwrap_or("-"), + )); + if worker_is_shutting_down() { + if !claim_flag.swap(true, Ordering::SeqCst) { + let response = build_shutdown_response(request.request_id); + let _ = write_worker_response(&stdout, &response); + } + lock_or_recover(&pipeline_state).remove_claim(request.request_id); + append_worker_lifecycle_log(&format!( + "pid={} thread={} request_thread_skipped_for_shutdown request_id={} claimed={}", + current_pid(), + current_thread_label(), + request.request_id, + claim_flag.load(Ordering::SeqCst), + )); + return; + } + let (exit_code, output) = + match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + let mut full_args = startup_args; + full_args.extend(request.arguments.clone()); + relocate_pw_flags(&mut full_args); + + let sandbox_opt = request.sandbox_dir.clone(); + + // Make output files writable (Bazel marks previous outputs read-only). + match sandbox_opt { + Some(ref dir) => { + prepare_outputs_sandboxed(&full_args, dir); + } + None => prepare_outputs(&full_args), + } + + // Check for pipelining mode flags (--pipelining-metadata, + // --pipelining-full, --pipelining-key=). When present these + // are handled specially; otherwise fall through to a normal subprocess. + let pipelining = detect_pipelining_mode(&full_args); + + // If cancel already claimed this request, bail out without starting rustc. + if claim_flag.load(Ordering::SeqCst) { + if let PipeliningMode::Metadata { ref key } + | PipeliningMode::Full { ref key } = pipelining + { + lock_or_recover(&pipeline_state).cleanup(key, request.request_id); + } + return (0, String::new()); + } + + match pipelining { + PipeliningMode::Metadata { key } => { + let result = handle_pipelining_metadata( + &request, + full_args, + key.clone(), + &state_roots, + &pipeline_state, + ); + // Clean up the PreRegistered entry if metadata failed early + // (before it transitioned to MetadataRunning). + if result.0 != 0 { + lock_or_recover(&pipeline_state) + .cleanup(&key, request.request_id); + } + result + } + PipeliningMode::Full { key } => handle_pipelining_full( + &request, + full_args, + key, + &pipeline_state, + &self_path, + ), + PipeliningMode::None => match sandbox_opt { + Some(ref dir) => run_sandboxed_request(&self_path, full_args, dir) + .unwrap_or_else(|e| { + (1, format!("sandboxed worker error: {e}")) + }), + None => run_request(&self_path, full_args) + .unwrap_or_else(|e| (1, format!("worker thread error: {e}"))), + }, + } + })) { + Ok(result) => result, + Err(_) => (1, "internal error: worker thread panicked".to_string()), + }; + + // Remove our claim flag regardless of who sends the response. + // This keeps the map from growing indefinitely and allows request_id + // to be reused in the next build. + lock_or_recover(&pipeline_state).remove_claim(request.request_id); + + // Only send a response if a cancel acknowledgment hasn't already been sent. + if !claim_flag.swap(true, Ordering::SeqCst) { + let response = build_response(exit_code, &output, request.request_id); + let _ = write_worker_response(&stdout, &response); + } + append_worker_lifecycle_log(&format!( + "pid={} thread={} request_thread_complete request_id={} exit_code={} output_bytes={} claimed={}", + current_pid(), + current_thread_label(), + request.request_id, + exit_code, + output.len(), + claim_flag.load(Ordering::SeqCst), + )); + }); + lock_or_recover(&in_flight).push(handle); + } + } + + begin_worker_shutdown("stdin_eof"); + + // Kill all in-flight pipeline children, then join worker threads. + { + let cancelled_entries = lock_or_recover(&pipeline_state).drain_all(); + for entry in cancelled_entries { + entry.kill(); + } + } + { + let handles: Vec<_> = lock_or_recover(&in_flight).drain(..).collect(); + let deadline = Instant::now() + Duration::from_secs(10); + for handle in handles { + let remaining = deadline.saturating_duration_since(Instant::now()); + if remaining.is_zero() { + break; + } + // std::thread::JoinHandle has no timed join, so just join. + // Pipeline children are already killed above, so threads should + // unblock quickly. The 10s deadline is a safety net. + let _ = handle.join(); + } + } + + append_worker_lifecycle_log(&format!( + "pid={} event=stdin_eof thread={} requests_seen={}", + current_pid(), + current_thread_label(), + request_counter.load(Ordering::SeqCst), + )); + + Ok(()) +} + +#[cfg(test)] +mod test { + use super::pipeline::{ + apply_substs, build_rustc_env, expand_rustc_args, extract_rmeta_path, + find_out_dir_in_expanded, parse_pw_args, prepare_expanded_rustc_outputs, + rewrite_out_dir_in_expanded, scan_pipelining_flags, strip_pipelining_flags, + BackgroundRustc, CancelledEntry, PipelineState, + }; + use super::protocol::{ + extract_arguments, extract_cancel, extract_inputs, extract_request_id, extract_sandbox_dir, + WorkRequestInput, + }; + use super::sandbox::resolve_sandbox_path; + #[cfg(unix)] + use super::sandbox::{ + copy_all_outputs_to_sandbox, copy_output_to_sandbox, seed_sandbox_cache_root, symlink_path, + }; + use super::*; + use crate::options::is_pipelining_flag; + use tinyjson::JsonValue; + + fn parse_json(s: &str) -> JsonValue { + s.parse().unwrap() + } + + #[test] + fn test_extract_request_id_present() { + let req = parse_json(r#"{"requestId": 42, "arguments": []}"#); + assert_eq!(extract_request_id(&req), 42); + } + + #[test] + fn test_extract_request_id_missing() { + let req = parse_json(r#"{"arguments": []}"#); + assert_eq!(extract_request_id(&req), 0); + } + + #[test] + fn test_extract_arguments() { + let req = + parse_json(r#"{"requestId": 0, "arguments": ["--subst", "pwd=/work", "--", "rustc"]}"#); + assert_eq!( + extract_arguments(&req), + vec!["--subst", "pwd=/work", "--", "rustc"] + ); + } + + #[test] + fn test_extract_arguments_empty() { + let req = parse_json(r#"{"requestId": 0, "arguments": []}"#); + assert_eq!(extract_arguments(&req), Vec::::new()); + } + + #[test] + fn test_build_response_sanitizes_control_characters() { + let response = build_response(1, "hello\u{0}world\u{7}", 9); + let parsed = parse_json(&response); + let JsonValue::Object(map) = parsed else { + panic!("expected object response"); + }; + let Some(JsonValue::String(output)) = map.get("output") else { + panic!("expected string output"); + }; + assert_eq!(output, "hello world "); + } + + #[test] + #[cfg(unix)] + fn test_prepare_outputs_inline_out_dir() { + use std::fs; + use std::os::unix::fs::PermissionsExt; + + let dir = std::env::temp_dir().join("pw_test_prepare_inline"); + fs::create_dir_all(&dir).unwrap(); + let file_path = dir.join("libfoo.rmeta"); + fs::write(&file_path, b"content").unwrap(); + + let mut perms = fs::metadata(&file_path).unwrap().permissions(); + perms.set_mode(0o444); + fs::set_permissions(&file_path, perms).unwrap(); + assert!(fs::metadata(&file_path).unwrap().permissions().readonly()); + + let args = vec![format!("--out-dir={}", dir.display())]; + prepare_outputs(&args); + + assert!(!fs::metadata(&file_path).unwrap().permissions().readonly()); + let _ = fs::remove_dir_all(&dir); + } + + #[test] + #[cfg(unix)] + fn test_prepare_outputs_arg_file() { + use std::fs; + use std::os::unix::fs::PermissionsExt; + + let tmp = std::env::temp_dir().join("pw_test_prepare_argfile"); + fs::create_dir_all(&tmp).unwrap(); + + // Create the output dir and a read-only file in it. + let out_dir = tmp.join("out"); + fs::create_dir_all(&out_dir).unwrap(); + let file_path = out_dir.join("libfoo.rmeta"); + fs::write(&file_path, b"content").unwrap(); + let mut perms = fs::metadata(&file_path).unwrap().permissions(); + perms.set_mode(0o444); + fs::set_permissions(&file_path, perms).unwrap(); + assert!(fs::metadata(&file_path).unwrap().permissions().readonly()); + + // Write an --arg-file containing --out-dir. + let arg_file = tmp.join("rustc.params"); + fs::write( + &arg_file, + format!("--out-dir={}\n--crate-name=foo\n", out_dir.display()), + ) + .unwrap(); + + let args = vec!["--arg-file".to_string(), arg_file.display().to_string()]; + prepare_outputs(&args); + + assert!(!fs::metadata(&file_path).unwrap().permissions().readonly()); + let _ = fs::remove_dir_all(&tmp); + } + + #[test] + #[cfg(unix)] + fn test_prepare_expanded_rustc_outputs_emit_path() { + use std::fs; + use std::os::unix::fs::PermissionsExt; + + let tmp = std::env::temp_dir().join("pw_test_prepare_emit_path"); + fs::create_dir_all(&tmp).unwrap(); + + let emit_path = tmp.join("libfoo.rmeta"); + fs::write(&emit_path, b"content").unwrap(); + let mut perms = fs::metadata(&emit_path).unwrap().permissions(); + perms.set_mode(0o555); + fs::set_permissions(&emit_path, perms).unwrap(); + assert!(fs::metadata(&emit_path).unwrap().permissions().readonly()); + + let args = vec![format!("--emit=metadata={}", emit_path.display())]; + prepare_expanded_rustc_outputs(&args); + + assert!(!fs::metadata(&emit_path).unwrap().permissions().readonly()); + let _ = fs::remove_dir_all(&tmp); + } + + #[test] + fn test_build_response_success() { + let response = build_response(0, "", 0); + assert_eq!(response, r#"{"exitCode":0,"output":"","requestId":0}"#); + let parsed = parse_json(&response); + if let JsonValue::Object(map) = parsed { + assert!(matches!(map.get("exitCode"), Some(JsonValue::Number(n)) if *n == 0.0)); + assert!(matches!(map.get("requestId"), Some(JsonValue::Number(n)) if *n == 0.0)); + } else { + panic!("expected object"); + } + } + + #[test] + fn test_build_response_failure() { + let response = build_response(1, "error: type mismatch", 0); + let parsed = parse_json(&response); + if let JsonValue::Object(map) = parsed { + assert!(matches!(map.get("exitCode"), Some(JsonValue::Number(n)) if *n == 1.0)); + assert!( + matches!(map.get("output"), Some(JsonValue::String(s)) if s == "error: type mismatch") + ); + } else { + panic!("expected object"); + } + } + + #[test] + fn test_detect_pipelining_mode_none() { + let args = vec!["--subst".to_string(), "pwd=/work".to_string()]; + assert!(matches!( + detect_pipelining_mode(&args), + PipeliningMode::None + )); + } + + #[test] + fn test_detect_pipelining_mode_metadata() { + let args = vec![ + "--pipelining-metadata".to_string(), + "--pipelining-key=my_crate_abc123".to_string(), + ]; + match detect_pipelining_mode(&args) { + PipeliningMode::Metadata { key } => assert_eq!(key, "my_crate_abc123"), + other => panic!( + "expected Metadata, got {:?}", + std::mem::discriminant(&other) + ), + } + } + + #[test] + fn test_detect_pipelining_mode_full() { + let args = vec![ + "--pipelining-full".to_string(), + "--pipelining-key=my_crate_abc123".to_string(), + ]; + match detect_pipelining_mode(&args) { + PipeliningMode::Full { key } => assert_eq!(key, "my_crate_abc123"), + other => panic!("expected Full, got {:?}", std::mem::discriminant(&other)), + } + } + + #[test] + fn test_detect_pipelining_mode_no_key() { + // If pipelining flag present but no key, fall back to None. + let args = vec!["--pipelining-metadata".to_string()]; + assert!(matches!( + detect_pipelining_mode(&args), + PipeliningMode::None + )); + } + + #[test] + fn test_strip_pipelining_flags() { + let args = vec![ + "--pipelining-metadata".to_string(), + "--pipelining-key=my_crate_abc123".to_string(), + "--arg-file".to_string(), + "rustc.params".to_string(), + ]; + let filtered = strip_pipelining_flags(&args); + assert_eq!(filtered, vec!["--arg-file", "rustc.params"]); + } + + #[test] + fn test_pipeline_state_take_for_full_empty() { + let mut state = PipelineState::new(); + // Verify that take_for_full on an empty state returns None. + assert!(state.take_for_full("nonexistent", 1).is_none()); + } + + // --- Tests for new helpers added in the worker-key fix --- + + #[test] + fn test_is_pipelining_flag() { + assert!(is_pipelining_flag("--pipelining-metadata")); + assert!(is_pipelining_flag("--pipelining-full")); + assert!(is_pipelining_flag("--pipelining-key=foo_abc")); + assert!(!is_pipelining_flag("--crate-name=foo")); + assert!(!is_pipelining_flag("--emit=dep-info,metadata,link")); + assert!(!is_pipelining_flag("-Zno-codegen")); + } + + #[test] + fn test_apply_substs() { + let subst = vec![ + ("pwd".to_string(), "/work".to_string()), + ("out".to_string(), "bazel-out/k8/bin".to_string()), + ]; + assert_eq!(apply_substs("${pwd}/src", &subst), "/work/src"); + assert_eq!( + apply_substs("${out}/foo.rlib", &subst), + "bazel-out/k8/bin/foo.rlib" + ); + assert_eq!(apply_substs("--crate-name=foo", &subst), "--crate-name=foo"); + } + + #[test] + fn test_scan_pipelining_flags_metadata() { + let (is_metadata, is_full, key) = scan_pipelining_flags( + ["--pipelining-metadata", "--pipelining-key=foo_abc"] + .iter() + .copied(), + ); + assert!(is_metadata); + assert!(!is_full); + assert_eq!(key, Some("foo_abc".to_string())); + } + + #[test] + fn test_scan_pipelining_flags_full() { + let (is_metadata, is_full, key) = scan_pipelining_flags( + ["--pipelining-full", "--pipelining-key=bar_xyz"] + .iter() + .copied(), + ); + assert!(!is_metadata); + assert!(is_full); + assert_eq!(key, Some("bar_xyz".to_string())); + } + + #[test] + fn test_scan_pipelining_flags_none() { + let (is_metadata, is_full, key) = + scan_pipelining_flags(["--emit=link", "--crate-name=foo"].iter().copied()); + assert!(!is_metadata); + assert!(!is_full); + assert_eq!(key, None); + } + + #[test] + fn test_detect_pipelining_mode_from_paramfile() { + use std::io::Write; + // Write a temporary paramfile with pipelining flags. + let tmp = std::env::temp_dir().join("pw_test_detect_paramfile"); + let param_path = tmp.join("rustc.params"); + std::fs::create_dir_all(&tmp).unwrap(); + let mut f = std::fs::File::create(¶m_path).unwrap(); + writeln!(f, "--emit=dep-info,metadata,link").unwrap(); + writeln!(f, "--crate-name=foo").unwrap(); + writeln!(f, "--pipelining-metadata").unwrap(); + writeln!(f, "--pipelining-key=foo_abc123").unwrap(); + drop(f); + + // Full args: startup args before "--", then rustc + @paramfile. + let args = vec![ + "--subst".to_string(), + "pwd=/work".to_string(), + "--".to_string(), + "/path/to/rustc".to_string(), + format!("@{}", param_path.display()), + ]; + + match detect_pipelining_mode(&args) { + PipeliningMode::Metadata { key } => assert_eq!(key, "foo_abc123"), + other => panic!( + "expected Metadata, got {:?}", + std::mem::discriminant(&other) + ), + } + + let _ = std::fs::remove_dir_all(&tmp); + } + + #[test] + fn test_expand_rustc_args_strips_pipelining_flags() { + use std::io::Write; + let tmp = std::env::temp_dir().join("pw_test_expand_rustc"); + let param_path = tmp.join("rustc.params"); + std::fs::create_dir_all(&tmp).unwrap(); + let mut f = std::fs::File::create(¶m_path).unwrap(); + writeln!(f, "--emit=dep-info,metadata,link").unwrap(); + writeln!(f, "--crate-name=foo").unwrap(); + writeln!(f, "--pipelining-metadata").unwrap(); + writeln!(f, "--pipelining-key=foo_abc123").unwrap(); + drop(f); + + let rustc_and_after = vec![ + "/path/to/rustc".to_string(), + format!("@{}", param_path.display()), + ]; + let subst: Vec<(String, String)> = vec![]; + let expanded = expand_rustc_args(&rustc_and_after, &subst, std::path::Path::new(".")); + + assert_eq!(expanded[0], "/path/to/rustc"); + assert!(expanded.contains(&"--emit=dep-info,metadata,link".to_string())); + assert!(expanded.contains(&"--crate-name=foo".to_string())); + // Pipelining flags must be stripped. + assert!(!expanded.contains(&"--pipelining-metadata".to_string())); + assert!(!expanded.iter().any(|a| a.starts_with("--pipelining-key="))); + + let _ = std::fs::remove_dir_all(&tmp); + } + + #[test] + fn test_expand_rustc_args_applies_substs() { + use std::io::Write; + let tmp = std::env::temp_dir().join("pw_test_expand_subst"); + let param_path = tmp.join("rustc.params"); + std::fs::create_dir_all(&tmp).unwrap(); + let mut f = std::fs::File::create(¶m_path).unwrap(); + writeln!(f, "--out-dir=${{pwd}}/out").unwrap(); + drop(f); + + let rustc_and_after = vec![ + "/path/to/rustc".to_string(), + format!("@{}", param_path.display()), + ]; + let subst = vec![("pwd".to_string(), "/work".to_string())]; + let expanded = expand_rustc_args(&rustc_and_after, &subst, std::path::Path::new(".")); + + assert!( + expanded.contains(&"--out-dir=/work/out".to_string()), + "expected substituted arg, got: {:?}", + expanded + ); + + let _ = std::fs::remove_dir_all(&tmp); + } + + // --- Tests for Phase 4 sandbox helpers --- + + #[test] + fn test_extract_sandbox_dir_absent() { + let req = parse_json(r#"{"requestId": 1}"#); + assert_eq!(extract_sandbox_dir(&req), Ok(None)); + } + + #[test] + fn test_extract_sandbox_dir_empty_string_returns_none() { + let req = parse_json(r#"{"requestId": 1, "sandboxDir": ""}"#); + assert_eq!(extract_sandbox_dir(&req), Ok(None)); + } + + /// A nonexistent sandbox directory is an error — it means the platform + /// doesn't support sandboxing and the user should remove the flag. + #[test] + fn test_extract_sandbox_dir_nonexistent_is_err() { + let req = parse_json(r#"{"requestId": 1, "sandboxDir": "/no/such/sandbox/dir"}"#); + let result = extract_sandbox_dir(&req); + assert!(result.is_err(), "expected Err for nonexistent sandbox dir"); + let msg = result.unwrap_err(); + assert!( + msg.contains("--experimental_worker_multiplex_sandboxing"), + "error should mention the flag: {msg}" + ); + } + + /// An existing but empty sandbox directory is an error. On Windows, Bazel + /// creates the directory without populating it with symlinks because there + /// is no real sandbox implementation. + #[test] + #[cfg(unix)] + fn test_extract_sandbox_dir_empty_dir_is_err_unix() { + let dir = std::env::temp_dir().join("pw_test_sandbox_empty_unix"); + let _ = std::fs::remove_dir_all(&dir); + std::fs::create_dir_all(&dir).unwrap(); + let dir_str = dir.to_string_lossy().into_owned(); + let json = format!(r#"{{"requestId": 1, "sandboxDir": "{}"}}"#, dir_str); + let req = parse_json(&json); + let result = extract_sandbox_dir(&req); + assert!(result.is_err(), "expected Err for empty sandbox dir"); + let _ = std::fs::remove_dir_all(&dir); + } + + #[test] + #[cfg(windows)] + fn test_extract_sandbox_dir_empty_dir_is_err_windows() { + let dir = std::env::temp_dir().join("pw_test_sandbox_empty_win"); + let _ = std::fs::remove_dir_all(&dir); + std::fs::create_dir_all(&dir).unwrap(); + let dir_str = dir.to_string_lossy().into_owned(); + let escaped = dir_str.replace('\\', "\\\\"); + let json = format!(r#"{{"requestId": 1, "sandboxDir": "{}"}}"#, escaped); + let req = parse_json(&json); + let result = extract_sandbox_dir(&req); + assert!(result.is_err(), "expected Err for empty sandbox dir"); + let _ = std::fs::remove_dir_all(&dir); + } + + /// On Unix, a populated sandbox directory is accepted. + #[test] + #[cfg(unix)] + fn test_extract_sandbox_dir_populated_unix() { + let dir = std::env::temp_dir().join("pw_test_sandbox_pop_unix"); + let _ = std::fs::remove_dir_all(&dir); + std::fs::create_dir_all(&dir).unwrap(); + std::fs::write(dir.join("marker"), b"").unwrap(); + let dir_str = dir.to_string_lossy().into_owned(); + let json = format!(r#"{{"requestId": 1, "sandboxDir": "{}"}}"#, dir_str); + let req = parse_json(&json); + assert_eq!(extract_sandbox_dir(&req), Ok(Some(dir_str))); + let _ = std::fs::remove_dir_all(&dir); + } + + /// On Windows, a populated sandbox directory is accepted. + /// Backslashes in the path must be escaped in JSON. + #[test] + #[cfg(windows)] + fn test_extract_sandbox_dir_populated_windows() { + let dir = std::env::temp_dir().join("pw_test_sandbox_pop_win"); + let _ = std::fs::remove_dir_all(&dir); + std::fs::create_dir_all(&dir).unwrap(); + std::fs::write(dir.join("marker"), b"").unwrap(); + let dir_str = dir.to_string_lossy().into_owned(); + let escaped = dir_str.replace('\\', "\\\\"); + let json = format!(r#"{{"requestId": 1, "sandboxDir": "{}"}}"#, escaped); + let req = parse_json(&json); + assert_eq!(extract_sandbox_dir(&req), Ok(Some(dir_str))); + let _ = std::fs::remove_dir_all(&dir); + } + + #[test] + fn test_extract_inputs() { + let req = parse_json( + r#"{ + "requestId": 1, + "inputs": [ + {"path": "foo/bar.rs", "digest": "abc"}, + {"path": "flagfile.params"} + ] + }"#, + ); + assert_eq!( + extract_inputs(&req), + vec![ + WorkRequestInput { + path: "foo/bar.rs".to_string(), + digest: Some("abc".to_string()), + }, + WorkRequestInput { + path: "flagfile.params".to_string(), + digest: None, + }, + ] + ); + } + + #[test] + fn test_extract_cancel_true() { + let req = parse_json(r#"{"requestId": 1, "cancel": true}"#); + assert!(extract_cancel(&req)); + } + + #[test] + fn test_extract_cancel_false() { + let req = parse_json(r#"{"requestId": 1, "cancel": false}"#); + assert!(!extract_cancel(&req)); + } + + #[test] + fn test_extract_cancel_absent() { + let req = parse_json(r#"{"requestId": 1}"#); + assert!(!extract_cancel(&req)); + } + + #[test] + fn test_build_cancel_response() { + let response = build_cancel_response(7); + assert_eq!( + response, + r#"{"exitCode":0,"output":"","requestId":7,"wasCancelled":true}"# + ); + let parsed = parse_json(&response); + if let JsonValue::Object(map) = parsed { + assert!(matches!(map.get("requestId"), Some(JsonValue::Number(n)) if *n == 7.0)); + assert!(matches!(map.get("exitCode"), Some(JsonValue::Number(n)) if *n == 0.0)); + assert!(matches!( + map.get("wasCancelled"), + Some(JsonValue::Boolean(true)) + )); + } else { + panic!("expected object"); + } + } + + #[test] + #[cfg(unix)] + fn test_resolve_sandbox_path_relative_unix() { + let result = resolve_sandbox_path("bazel-out/k8/bin/pkg", "/sandbox/42"); + assert_eq!(result, "/sandbox/42/bazel-out/k8/bin/pkg"); + } + + #[test] + #[cfg(windows)] + fn test_resolve_sandbox_path_relative_windows() { + // On Windows, Path::join produces backslash separators. + let result = resolve_sandbox_path("bazel-out/k8/bin/pkg", "/sandbox/42"); + assert_eq!(result, "/sandbox/42\\bazel-out/k8/bin/pkg"); + } + + #[test] + fn test_resolve_sandbox_path_absolute() { + let result = resolve_sandbox_path("/absolute/path/out", "/sandbox/42"); + assert_eq!(result, "/absolute/path/out"); + } + + #[test] + fn test_find_out_dir_in_expanded() { + let args = vec![ + "--crate-name=foo".to_string(), + "--out-dir=/work/bazel-out/k8/bin/pkg".to_string(), + "--emit=link".to_string(), + ]; + assert_eq!( + find_out_dir_in_expanded(&args), + Some("/work/bazel-out/k8/bin/pkg".to_string()) + ); + } + + #[test] + fn test_find_out_dir_in_expanded_missing() { + let args = vec!["--crate-name=foo".to_string(), "--emit=link".to_string()]; + assert_eq!(find_out_dir_in_expanded(&args), None); + } + + #[test] + fn test_rewrite_out_dir_in_expanded() { + let args = vec![ + "--crate-name=foo".to_string(), + "--out-dir=/old/path".to_string(), + "--emit=link".to_string(), + ]; + let new_dir = std::path::Path::new("/_pw_pipeline/foo_abc"); + let result = rewrite_out_dir_in_expanded(args, new_dir); + assert_eq!( + result, + vec![ + "--crate-name=foo", + "--out-dir=/_pw_pipeline/foo_abc", + "--emit=link", + ] + ); + } + + #[test] + fn test_parse_pw_args_substitutes_pwd_from_real_execroot() { + let parsed = parse_pw_args( + &[ + "--subst".to_string(), + "pwd=${pwd}".to_string(), + "--output-file".to_string(), + "diag.txt".to_string(), + ], + std::path::Path::new("/real/execroot"), + ); + + assert_eq!( + parsed.subst, + vec![("pwd".to_string(), "/real/execroot".to_string())] + ); + assert_eq!(parsed.output_file, Some("diag.txt".to_string())); + assert_eq!(parsed.stable_status_file, None); + assert_eq!(parsed.volatile_status_file, None); + } + + #[test] + fn test_build_rustc_env_applies_stamp_and_subst_mappings() { + let tmp = + std::env::temp_dir().join(format!("pw_test_build_rustc_env_{}", std::process::id())); + std::fs::create_dir_all(&tmp).unwrap(); + + let env_file = tmp.join("env.txt"); + let stable_status = tmp.join("stable-status.txt"); + let volatile_status = tmp.join("volatile-status.txt"); + + std::fs::write( + &env_file, + "STAMPED={BUILD_USER}:{BUILD_SCM_REVISION}:${pwd}\nUNCHANGED=value\n", + ) + .unwrap(); + std::fs::write(&stable_status, "BUILD_USER alice\n").unwrap(); + std::fs::write(&volatile_status, "BUILD_SCM_REVISION deadbeef\n").unwrap(); + + let env = build_rustc_env( + &[env_file.display().to_string()], + Some(stable_status.to_str().unwrap()), + Some(volatile_status.to_str().unwrap()), + &[("pwd".to_string(), "/real/execroot".to_string())], + ); + + assert_eq!( + env.get("STAMPED"), + Some(&"alice:deadbeef:/real/execroot".to_string()) + ); + assert_eq!(env.get("UNCHANGED"), Some(&"value".to_string())); + + let _ = std::fs::remove_dir_all(&tmp); + } + + #[test] + fn test_build_shutdown_response() { + let response = build_shutdown_response(11); + assert_eq!( + response, + r#"{"exitCode":1,"output":"worker shutting down","requestId":11}"# + ); + } + + #[test] + fn test_begin_worker_shutdown_sets_flag() { + WORKER_SHUTTING_DOWN.store(false, Ordering::SeqCst); + begin_worker_shutdown("test"); + assert!(worker_is_shutting_down()); + WORKER_SHUTTING_DOWN.store(false, Ordering::SeqCst); + } + + #[test] + fn test_extract_rmeta_path_valid() { + let line = r#"{"artifact":"/work/out/libfoo.rmeta","emit":"metadata"}"#; + assert_eq!( + extract_rmeta_path(line), + Some("/work/out/libfoo.rmeta".to_string()) + ); + } + + #[test] + fn test_extract_rmeta_path_rlib() { + // rlib artifact should not match (only rmeta) + let line = r#"{"artifact":"/work/out/libfoo.rlib","emit":"link"}"#; + assert_eq!(extract_rmeta_path(line), None); + } + + #[test] + #[cfg(unix)] + fn test_copy_output_to_sandbox() { + use std::fs; + + let tmp = std::env::temp_dir().join("pw_test_copy_to_sandbox"); + let pipeline_dir = tmp.join("pipeline"); + let sandbox_dir = tmp.join("sandbox"); + let out_rel = "bazel-out/k8/bin/pkg"; + + fs::create_dir_all(&pipeline_dir).unwrap(); + fs::create_dir_all(&sandbox_dir).unwrap(); + + // Write a fake rmeta into the pipeline dir. + let rmeta_path = pipeline_dir.join("libfoo.rmeta"); + fs::write(&rmeta_path, b"fake rmeta content").unwrap(); + + copy_output_to_sandbox( + &rmeta_path.display().to_string(), + &sandbox_dir.display().to_string(), + out_rel, + "_pipeline", + ); + + let dest = sandbox_dir + .join(out_rel) + .join("_pipeline") + .join("libfoo.rmeta"); + assert!(dest.exists(), "expected rmeta copied to sandbox/_pipeline/"); + assert_eq!(fs::read(&dest).unwrap(), b"fake rmeta content"); + + let _ = fs::remove_dir_all(&tmp); + } + + #[test] + #[cfg(unix)] + fn test_copy_all_outputs_to_sandbox() { + use std::fs; + + let tmp = std::env::temp_dir().join("pw_test_copy_all_to_sandbox"); + let pipeline_dir = tmp.join("pipeline"); + let sandbox_dir = tmp.join("sandbox"); + let out_rel = "bazel-out/k8/bin/pkg"; + + fs::create_dir_all(&pipeline_dir).unwrap(); + fs::create_dir_all(&sandbox_dir).unwrap(); + + fs::write(pipeline_dir.join("libfoo.rlib"), b"fake rlib").unwrap(); + fs::write(pipeline_dir.join("libfoo.rmeta"), b"fake rmeta").unwrap(); + fs::write(pipeline_dir.join("libfoo.d"), b"fake dep-info").unwrap(); + + copy_all_outputs_to_sandbox(&pipeline_dir, &sandbox_dir.display().to_string(), out_rel); + + let dest = sandbox_dir.join(out_rel); + assert!(dest.join("libfoo.rlib").exists()); + assert!(dest.join("libfoo.rmeta").exists()); + assert!(dest.join("libfoo.d").exists()); + + let _ = fs::remove_dir_all(&tmp); + } + + #[test] + #[cfg(unix)] + fn test_copy_all_outputs_to_sandbox_prefers_hardlinks() { + use std::fs; + use std::os::unix::fs::MetadataExt; + + let tmp = + std::env::temp_dir().join("pw_test_copy_all_outputs_to_sandbox_prefers_hardlinks"); + let pipeline_dir = tmp.join("pipeline"); + let sandbox_dir = tmp.join("sandbox"); + let out_rel = "bazel-out/k8/bin/pkg"; + + fs::create_dir_all(&pipeline_dir).unwrap(); + fs::create_dir_all(&sandbox_dir).unwrap(); + + let src = pipeline_dir.join("libfoo.rlib"); + fs::write(&src, b"fake rlib").unwrap(); + + copy_all_outputs_to_sandbox(&pipeline_dir, &sandbox_dir.display().to_string(), out_rel); + + let dest = sandbox_dir.join(out_rel).join("libfoo.rlib"); + assert!(dest.exists()); + assert_eq!( + fs::metadata(&src).unwrap().ino(), + fs::metadata(&dest).unwrap().ino() + ); + + let _ = fs::remove_dir_all(&tmp); + } + + #[test] + #[cfg(unix)] + fn test_seed_sandbox_cache_root() { + use std::fs; + + let tmp = std::env::temp_dir().join("pw_test_seed_sandbox_cache_root"); + let sandbox_dir = tmp.join("sandbox"); + let cache_repo = tmp.join("cache/repos/v1/contents/hash/repo"); + fs::create_dir_all(&sandbox_dir).unwrap(); + fs::create_dir_all(cache_repo.join("tool/src")).unwrap(); + symlink_path(&cache_repo, &sandbox_dir.join("external_repo"), true).unwrap(); + + seed_sandbox_cache_root(&sandbox_dir).unwrap(); + + let cache_link = sandbox_dir.join("cache"); + assert!(cache_link.exists()); + assert_eq!(cache_link.canonicalize().unwrap(), tmp.join("cache")); + + let _ = fs::remove_dir_all(&tmp); + } + + // --- relocate_pw_flags tests --- + + #[test] + fn test_relocate_pw_flags_moves_output_file_before_separator() { + let mut args = vec![ + "--subst".into(), + "pwd=${pwd}".into(), + "--".into(), + "/path/to/rustc".into(), + "--output-file".into(), + "bazel-out/foo/libbar.rmeta".into(), + "src/lib.rs".into(), + "--crate-name=foo".into(), + ]; + relocate_pw_flags(&mut args); + assert_eq!( + args, + vec![ + "--subst", + "pwd=${pwd}", + "--output-file", + "bazel-out/foo/libbar.rmeta", + "--", + "/path/to/rustc", + "src/lib.rs", + "--crate-name=foo", + ] + ); + } + + #[test] + fn test_relocate_pw_flags_moves_multiple_flags() { + let mut args = vec![ + "--subst".into(), + "pwd=${pwd}".into(), + "--".into(), + "/path/to/rustc".into(), + "--output-file".into(), + "out.rmeta".into(), + "--rustc-output-format".into(), + "rendered".into(), + "--env-file".into(), + "build_script.env".into(), + "--arg-file".into(), + "build_script.linksearchpaths".into(), + "--stable-status-file".into(), + "stable.status".into(), + "--volatile-status-file".into(), + "volatile.status".into(), + "src/lib.rs".into(), + ]; + relocate_pw_flags(&mut args); + let sep = args.iter().position(|a| a == "--").unwrap(); + // All pw flags should be before -- + assert!(args[..sep].contains(&"--output-file".to_string())); + assert!(args[..sep].contains(&"--rustc-output-format".to_string())); + assert!(args[..sep].contains(&"--env-file".to_string())); + assert!(args[..sep].contains(&"--arg-file".to_string())); + assert!(args[..sep].contains(&"--stable-status-file".to_string())); + assert!(args[..sep].contains(&"--volatile-status-file".to_string())); + // Rustc args should be after -- + assert!(args[sep + 1..].contains(&"/path/to/rustc".to_string())); + assert!(args[sep + 1..].contains(&"src/lib.rs".to_string())); + } + + #[test] + fn test_relocate_pw_flags_noop_when_no_flags() { + let mut args = vec![ + "--subst".into(), + "pwd=${pwd}".into(), + "--".into(), + "/path/to/rustc".into(), + "src/lib.rs".into(), + ]; + let expected = args.clone(); + relocate_pw_flags(&mut args); + assert_eq!(args, expected); + } + + #[test] + fn test_relocate_pw_flags_noop_when_no_separator() { + let mut args = vec!["--output-file".into(), "foo".into()]; + let expected = args.clone(); + relocate_pw_flags(&mut args); + assert_eq!(args, expected); + } + + // ------------------------------------------------------------------------- + // PipelineState cancel-tracking unit tests + // ------------------------------------------------------------------------- + + fn make_test_bg() -> BackgroundRustc { + use std::process::Command; + BackgroundRustc { + child: Command::new("sleep").arg("60").spawn().unwrap(), + diagnostics_before: String::new(), + stderr_drain: std::thread::spawn(|| String::new()), + pipeline_root_dir: std::path::PathBuf::from("/tmp"), + pipeline_output_dir: std::path::PathBuf::from("/tmp"), + original_out_dir: String::from("/tmp"), + } + } + + #[test] + fn test_pipeline_state_store_and_cancel_metadata_phase() { + let mut state = PipelineState::new(); + let _flag = state.pre_register(42, "key1".to_string()); + let bg = make_test_bg(); + assert!(state.store("key1", bg).is_none(), "store should succeed"); + assert!(state.has_entry("key1")); + assert!(state.has_request(42)); + + let cancelled = state.cancel_by_request_id(42); + assert!(cancelled.kill(), "cancel should kill the child"); + assert!(state.is_empty(), "state should be empty after cancel"); + } + + #[test] + fn test_pipeline_state_take_for_full_then_cancel() { + let mut state = PipelineState::new(); + let _meta_flag = state.pre_register(42, "key1".to_string()); + let bg = make_test_bg(); + assert!(state.store("key1", bg).is_none()); + + // Simulate full request arriving + let _full_flag = state.pre_register(99, "key1".to_string()); + let (mut taken, child_reaped) = state + .take_for_full("key1", 99) + .expect("take_for_full should return the BackgroundRustc"); + + // After take_for_full: entry is FullWaiting, request_index maps 99 (not 42). + assert!(state.has_entry("key1")); + assert!(state.has_request(99)); + assert!(!state.has_request(42)); + + // Cancel via the full-phase path (FullWaiting — PID-guarded kill). + #[cfg(unix)] + { + let cancelled = state.cancel_by_request_id(99); + assert!(cancelled.kill(), "cancel should kill via PID for full phase"); + assert!(state.is_empty(), "state should be empty after cancel"); + } + + // Verify child_reaped flag is initially false. + assert!(!child_reaped.load(Ordering::SeqCst)); + + // Reap the child to prevent zombies. + let _ = taken.child.kill(); + let _ = taken.child.wait(); + let _ = taken.stderr_drain.join(); + } + + #[test] + fn test_pipeline_state_cancel_nonexistent_request() { + let mut state = PipelineState::new(); + let cancelled = state.cancel_by_request_id(999); + assert!(!cancelled.kill(), "cancel should return false for unknown request_id"); + } + + #[test] + fn test_pipeline_state_pre_register_and_cancel() { + let mut state = PipelineState::new(); + let _flag = state.pre_register(42, "key1".to_string()); + assert!(state.has_request(42)); + assert!(state.has_entry("key1")); + assert!(state.has_claim(42)); + + // No process stored yet — cancel should not kill (no child). + let cancelled = state.cancel_by_request_id(42); + assert!(!cancelled.kill(), "cancel should return false when no process was stored"); + // Entry is cleaned up. + assert!(!state.has_entry("key1")); + assert!(!state.has_request(42)); + } + + #[test] + fn test_pipeline_state_cleanup_removes_all_entries() { + let mut state = PipelineState::new(); + let _flag = state.pre_register(42, "key1".to_string()); + assert!(state.has_request(42)); + assert!(state.has_claim(42)); + state.cleanup("key1", 42); + assert!(state.is_empty(), "state should be empty after cleanup"); + assert!(!state.has_claim(42), "claim should be removed after cleanup"); + } + + #[test] + fn test_pipeline_state_register_claim_non_pipelined() { + let mut state = PipelineState::new(); + let flag = state.register_claim(42); + assert!(state.has_claim(42)); + assert!(!state.has_entry("any_key")); + assert!(!flag.load(Ordering::SeqCst)); + state.remove_claim(42); + assert!(!state.has_claim(42)); + } + + #[test] + fn test_pipeline_state_get_claim_flag() { + let mut state = PipelineState::new(); + assert!(state.get_claim_flag(42).is_none()); + let flag = state.register_claim(42); + let retrieved = state.get_claim_flag(42).expect("should find claim flag"); + assert!(Arc::ptr_eq(&flag, &retrieved)); + } + + /// Demonstrates the stale-entry bug: if pre_register is called but + /// cleanup is NOT called on error, the entry persists and a second + /// pre_register for the same key silently overwrites it, leaving + /// a dangling request_index entry for the old request_id. + #[test] + fn test_stale_preregistered_entry_leaves_dangling_request_index() { + let mut state = PipelineState::new(); + let _flag = state.pre_register(42, "key1".to_string()); + assert!(state.has_entry("key1")); + assert!(state.has_request(42)); + + // BUG PATH: no cleanup() called (simulates metadata handler error). + // A second pre_register for the same key overwrites the entry, but + // the old request_index entry (42 → "key1") is NOT removed. + let _flag2 = state.pre_register(99, "key1".to_string()); + assert!( + state.has_request(42), + "without cleanup, old request_index entry 42 dangles" + ); + assert!(state.has_request(99)); + + // FIX PATH: cleanup before re-register removes both entry and index. + state.cleanup("key1", 99); + // But request 42 is still dangling! Only full cleanup fixes it. + assert!( + state.has_request(42), + "request 42 was never cleaned up — this is the leak" + ); + // Manual cleanup of the leaked entry. + state.remove_claim(42); + } + + /// Verifies that cleanup() before re-register leaves no dangling state. + #[test] + fn test_cleanup_before_reregister_prevents_stale_entries() { + let mut state = PipelineState::new(); + let _flag = state.pre_register(42, "key1".to_string()); + + // FIX: cleanup on error before any re-register. + state.cleanup("key1", 42); + assert!(!state.has_entry("key1")); + assert!(!state.has_request(42)); + assert!(!state.has_claim(42)); + + // Now re-register works cleanly. + let _flag2 = state.pre_register(99, "key1".to_string()); + assert!(state.has_entry("key1")); + assert!(state.has_request(99)); + assert!(!state.has_request(42), "old request_id must not reappear"); + state.cleanup("key1", 99); + } + + /// Regression: CancelledEntry::PidOnly used raw kill(pid, SIGKILL) without + /// checking whether the child had already been reaped. If the full handler + /// already called child.wait(), the PID could be recycled and the kill + /// would hit an unrelated process. + #[test] + #[cfg(unix)] + fn test_pid_only_cancel_respects_child_reaped_flag() { + use std::process::Command; + + // Spawn a real child so we can observe kill behavior. + let mut child = Command::new("sleep").arg("60").spawn().unwrap(); + let pid = child.id(); + + // Case 1: child_reaped=false → kill should send SIGKILL (child dies). + let reaped = Arc::new(AtomicBool::new(false)); + let cancelled = CancelledEntry::PidOnly(pid, reaped); + assert!(cancelled.kill()); + // Child should now be dead. Reap to confirm. + let status = child.wait().unwrap(); + assert!(!status.success(), "child should have been killed"); + + // Case 2: child_reaped=true → kill must NOT send SIGKILL. + // Use our own PID — if SIGKILL were sent, this test process would die. + let self_pid = std::process::id(); + let reaped = Arc::new(AtomicBool::new(true)); + let cancelled = CancelledEntry::PidOnly(self_pid, reaped); + assert!(cancelled.kill()); + // If we're still running, the guard worked. + } + + /// Regression: build_response blanked output for exit_code==0, silently + /// discarding rustc warnings from successful compilations. + #[test] + fn test_build_response_preserves_warnings_on_success() { + let warning = "warning: unused variable `x`"; + let response = build_response(0, warning, 42); + let parsed = parse_json(&response); + let JsonValue::Object(map) = parsed else { + panic!("expected object response"); + }; + let Some(JsonValue::String(output)) = map.get("output") else { + panic!("expected string output"); + }; + assert_eq!( + output, warning, + "build_response should preserve warnings on success (exit_code=0)" + ); + } +} diff --git a/util/process_wrapper/worker_pipeline.rs b/util/process_wrapper/worker_pipeline.rs new file mode 100644 index 0000000000..b73dc4c350 --- /dev/null +++ b/util/process_wrapper/worker_pipeline.rs @@ -0,0 +1,1493 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Pipelining state and handlers for the persistent worker. +//! +//! # Architecture: Single-rustc Pipelining +//! +//! Each crate is compiled by a single rustc invocation that produces both `.rmeta` +//! (metadata, encoding type/trait information for downstream crates) and `.rlib` +//! (the full compiled artifact including object code). Rustc emits `.rmeta` at the +//! boundary between analysis and codegen — specifically in `encode_and_write_metadata` +//! inside `passes.rs`, before `codegen_crate` is called — so downstream crates can +//! begin their own compilation as soon as the metadata is flushed. +//! +//! This module implements a two-phase split of that single rustc invocation across +//! two Bazel worker requests: +//! +//! 1. **Metadata request** (`--pipelining-metadata --pipelining-key=`): +//! Spawns rustc as a background child process. A dedicated thread reads rustc's +//! stdout line-by-line and blocks until it sees the sentinel that signals `.rmeta` +//! has been written to disk. At that point a [`WorkResponse`] is sent back to +//! Bazel so downstream actions can start immediately, while the child continues +//! running codegen in the background. +//! +//! 2. **Full request** (`--pipelining-full --pipelining-key=`): +//! Retrieves the still-running child from [`PipelineState`] by key and waits for +//! it to exit. Copies outputs from the pipeline output directory back into the +//! Bazel sandbox before sending the final [`WorkResponse`]. +//! +//! # Sandbox Contract Compliance +//! +//! Bazel's persistent-worker sandbox contract has two rules: +//! +//! **Rule 1 — all I/O goes through `sandbox_dir`.** +//! Satisfied by setting the worker process's `cwd` to `sandbox_dir` so that every +//! relative path resolves inside the sandbox. Outputs that must persist across the +//! two requests (`.rmeta`, `.rlib`, `.d` files, etc.) are redirected to a +//! worker-owned directory outside Bazel control: +//! `_pw_state/pipeline//outputs/`. The full handler copies them back into the +//! sandbox before returning. +//! +//! **Rule 2 — no file access after the [`WorkResponse`] is sent.** +//! The metadata response is sent before codegen begins. After that point the +//! background rustc process continues running, but it does NOT access any sandbox +//! input files because: +//! +//! - Source files are read once into `Arc` entries in rustc's `SourceMap` +//! during parsing, before `.rmeta` is emitted. +//! See: +//! - Dependency `.rmeta` files are memory-mapped once during the "resolve crate" +//! phase, also before codegen. +//! See: +//! - Proc macros are fully expanded during the parsing/expansion phase, before +//! `.rmeta` is written. +//! See: +//! +//! This has been empirically verified via strace on rustc 1.94.0: zero `open`/`read` +//! syscalls to sandbox input paths are observed after `.rmeta` is written. +//! See the regression test: +//! `test/unit/pipelined_compilation/strace_rustc_post_metadata_test.sh` +//! +//! # Caveats +//! +//! - **Undocumented rustc internals.** The ordering guarantee (all sandbox reads +//! complete before `.rmeta` emission) is an observable consequence of rustc's +//! current pass ordering, not a documented API contract. A future rustc refactor +//! (e.g. parallel front-end, lazy source loading) could break this assumption. +//! The strace test provides a regression signal. +//! +//! - **Incremental compilation.** The incremental cache directory must reside +//! outside the Bazel sandbox (e.g. in `_pw_state/`) so it persists across both +//! requests and across rebuilds. Enabling incremental inside the sandbox causes +//! cache misses and potential corruption. +//! +//! - **No precedent.** Spanning background work across two separate Bazel worker +//! requests is not an officially supported pattern. This implementation is +//! experimental and may interact unexpectedly with Bazel features such as dynamic +//! execution, worker cancellation, or future sandboxing policy changes. +//! +//! # Cancellation +//! +//! [`PipelineState`] maintains a `request_index`: a `HashMap` from active Bazel +//! request IDs to pipeline keys. This index enables the cancel handler to locate +//! the correct in-flight pipeline entry when Bazel sends a cancel signal. +//! +//! Invariants: +//! +//! - A pipeline entry is registered in `request_index` **before** the metadata +//! [`WorkResponse`] is sent (i.e., before the request becomes cancel-acknowledgeable). +//! - Ownership of a pipeline entry transfers atomically from the metadata handler to +//! the full handler: the metadata handler inserts the entry; the full handler +//! removes it. +//! - After a cancel response is sent, the background rustc child is killed (or the +//! request has already completed and the child has exited normally). +//! +//! See the "Cancellation Direction" section of the consolidated worker-pipelining +//! plan at `thoughts/shared/plans/2026-03-25-consolidated-worker-pipelining-plan.md` +//! for the rationale behind these invariants. + +use std::collections::HashMap; +use std::io::{BufRead, BufReader, Write}; +use std::path::PathBuf; +use std::process::{Command, Stdio}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Arc, Mutex}; +use std::thread; + +use tinyjson::JsonValue; + +use crate::options::{is_pipelining_flag, is_relocated_pw_flag}; +use crate::util::read_stamp_status_to_array; +use crate::ProcessWrapperError; + +use super::protocol::WorkRequestContext; +use super::sandbox::{ + copy_all_outputs_to_sandbox, copy_output_to_sandbox, make_dir_files_writable, + make_path_writable, prepare_outputs, resolve_relative_to, run_request, + run_sandboxed_request, +}; +use super::{append_worker_lifecycle_log, current_pid, lock_or_recover}; + +/// Pipelining mode for a worker request, parsed from process_wrapper flags. +pub(super) enum PipeliningMode { + /// No pipelining flags present — handle as a normal subprocess request. + None, + /// `--pipelining-metadata --pipelining-key=` present. + /// Start a full rustc, return as soon as `.rmeta` is ready, cache the Child. + Metadata { key: String }, + /// `--pipelining-full --pipelining-key=` present. + /// Retrieve the cached Child from PipelineState and wait for it to finish. + Full { key: String }, +} + +/// Lifecycle phase of a single pipelined compilation. +/// +/// PreRegistered ──store()──> MetadataRunning ──take_for_full()──> FullWaiting ──cleanup()──> (removed) +/// Any phase ──cancel_by_request_id()──> (removed, child killed if applicable) +pub(super) enum PipelinePhase { + /// Main thread registered the request; worker thread not yet spawned rustc. + PreRegistered { + metadata_request_id: i64, + }, + + /// Rustc spawned, .rmeta emitted, metadata response sent. Background codegen continues. + MetadataRunning { + metadata_request_id: i64, + bg: BackgroundRustc, + pid: u32, + }, + + /// Full handler took BackgroundRustc; waiting for child exit + output copy. + /// PID retained for cancel-via-signal, guarded by `child_reaped` to prevent + /// killing a recycled PID after the full handler has called `child.wait()`. + FullWaiting { + #[allow(dead_code)] + full_request_id: i64, + pid: u32, + /// Set to `true` by the full handler immediately after `child.wait()` + /// returns. The cancel handler checks this before sending a raw signal. + child_reaped: Arc, + }, +} + +/// Result of cancelling a pipeline entry. The blocking kill/wait/join must +/// happen **outside** the `PipelineState` lock to avoid holding the mutex +/// during I/O. +pub(super) enum CancelledEntry { + /// Request ID was not found in the pipeline. + NotFound, + /// Entry existed but had no running child (PreRegistered or missing). + NoChild, + /// We own the BackgroundRustc — kill + wait + join the child. + OwnedChild(BackgroundRustc), + /// Child was taken by the full handler; only the PID remains for raw kill. + /// The `child_reaped` flag prevents killing a recycled PID after the full + /// handler has already waited on the child. + PidOnly(u32, Arc), +} + +impl CancelledEntry { + /// Perform blocking cleanup. Safe to call without any lock held. + pub(super) fn kill(self) -> bool { + match self { + CancelledEntry::NotFound | CancelledEntry::NoChild => false, + CancelledEntry::OwnedChild(mut bg) => { + let _ = bg.child.kill(); + let _ = bg.child.wait(); + let _ = bg.stderr_drain.join(); + true + } + CancelledEntry::PidOnly(pid, child_reaped) => { + // Only send SIGKILL if the full handler hasn't already reaped + // the child. Without this check, we could kill a recycled PID. + if !child_reaped.load(Ordering::SeqCst) { + #[cfg(unix)] + unsafe { + kill(pid as i32, 9); + } + let _ = pid; // suppress unused warning on non-unix + } + true + } + } + } +} + +/// A background rustc process started by a RustcMetadata action. +/// +/// After the `.rmeta` artifact notification, the handler stores the Child +/// here and spawns a background thread to drain the remaining stderr output. +/// The full compile handler retrieves this, joins the drain thread, and waits +/// for the child to exit. +pub(super) struct BackgroundRustc { + pub(super) child: std::process::Child, + /// Diagnostics captured from rustc stderr before the metadata signal. + pub(super) diagnostics_before: String, + /// Background thread draining rustc's remaining stderr output after the + /// metadata signal. Must be joined before waiting on `child` to avoid + /// deadlock (child blocks on stderr write if the pipe buffer fills up). + /// Returns the diagnostics captured after the metadata signal. + pub(super) stderr_drain: thread::JoinHandle, + /// Worker-managed persistent root for this pipelined compile. + pub(super) pipeline_root_dir: PathBuf, + /// Worker-managed persistent output directory used by the background rustc. + pub(super) pipeline_output_dir: PathBuf, + /// Original `--out-dir` value (before rewriting to `pipeline_output_dir`). + /// Used by the full handler to copy outputs from the persistent dir to the + /// correct sandbox-relative location. + pub(super) original_out_dir: String, +} + +/// In-process store of background rustc processes for worker-managed pipelining. +/// +/// Keyed by the pipeline key (crate name + output hash), set by the Bazel-side +/// `--pipelining-key=` argument. Each pipeline entry follows a lifecycle +/// tracked by [`PipelinePhase`]: +/// +/// PreRegistered → MetadataRunning → FullWaiting → (removed) +/// +/// `claim_flags` also tracks non-pipelined in-flight requests, unifying the +/// cancel/completion race prevention into a single data structure. +pub(super) struct PipelineState { + /// Pipeline key → current phase. + entries: HashMap, + /// Reverse index: request_id → pipeline key (for O(1) cancel lookup). + request_index: HashMap, + /// Claim flags for ALL in-flight requests (pipelined + non-pipelined). + /// Whoever atomically swaps the flag first sends the WorkResponse. + claim_flags: HashMap>, +} + +impl PipelineState { + pub(super) fn new() -> Self { + Self { + entries: HashMap::new(), + request_index: HashMap::new(), + claim_flags: HashMap::new(), + } + } + + /// Register a non-pipelined request's claim flag. + pub(super) fn register_claim(&mut self, request_id: i64) -> Arc { + let flag = Arc::new(AtomicBool::new(false)); + self.claim_flags.insert(request_id, Arc::clone(&flag)); + flag + } + + /// (none) → PreRegistered. Returns claim flag. + /// + /// For metadata requests, creates a new PreRegistered entry. + /// For full requests (entry already exists as MetadataRunning), just + /// registers the claim flag and request_index mapping. + pub(super) fn pre_register(&mut self, request_id: i64, key: String) -> Arc { + let flag = Arc::new(AtomicBool::new(false)); + self.claim_flags.insert(request_id, Arc::clone(&flag)); + self.request_index.insert(request_id, key.clone()); + self.entries + .entry(key) + .or_insert(PipelinePhase::PreRegistered { + metadata_request_id: request_id, + }); + flag + } + + /// Stores a background rustc in the pipeline entry. + /// + /// Handles two cases: + /// - PreRegistered → MetadataRunning (normal first-time store) + /// - MetadataRunning → MetadataRunning (Bazel retried the metadata action; + /// the old child is returned for the caller to kill outside the lock) + /// + /// Returns `Some(bg)` if the new child could not be stored (entry was + /// removed by cancel) or if an old child was replaced (retry). The caller + /// must kill the returned BackgroundRustc **after releasing the lock**. + pub(super) fn store(&mut self, key: &str, bg: BackgroundRustc) -> Option { + let pid = bg.child.id(); + if let Some(entry) = self.entries.get_mut(key) { + match entry { + PipelinePhase::PreRegistered { + metadata_request_id, + } => { + let req_id = *metadata_request_id; + *entry = PipelinePhase::MetadataRunning { + metadata_request_id: req_id, + bg, + pid, + }; + return None; + } + PipelinePhase::MetadataRunning { + metadata_request_id, + .. + } => { + // Bazel retried the metadata action (same key). Replace the + // old child with the new one; return the old for cleanup. + let req_id = *metadata_request_id; + let old = std::mem::replace( + entry, + PipelinePhase::MetadataRunning { + metadata_request_id: req_id, + bg, + pid, + }, + ); + if let PipelinePhase::MetadataRunning { bg: old_bg, .. } = old { + return Some(old_bg); + } + unreachable!(); + } + _ => {} + } + } + // Entry was removed (cancelled) or in unexpected phase. + Some(bg) + } + + /// MetadataRunning → FullWaiting. Returns BackgroundRustc and a + /// `child_reaped` flag for the full handler to set after `child.wait()`. + pub(super) fn take_for_full( + &mut self, + key: &str, + full_request_id: i64, + ) -> Option<(BackgroundRustc, Arc)> { + let entry = self.entries.get_mut(key)?; + if let PipelinePhase::MetadataRunning { + metadata_request_id, + pid, + .. + } = entry + { + let old_req = *metadata_request_id; + let pid_val = *pid; + let child_reaped = Arc::new(AtomicBool::new(false)); + let old = std::mem::replace( + entry, + PipelinePhase::FullWaiting { + full_request_id, + pid: pid_val, + child_reaped: Arc::clone(&child_reaped), + }, + ); + self.request_index.remove(&old_req); + // full_request_id is already in request_index from pre_register + if let PipelinePhase::MetadataRunning { bg, .. } = old { + Some((bg, child_reaped)) + } else { + unreachable!() + } + } else { + None + } + } + + /// Terminal: remove entry entirely. + pub(super) fn cleanup(&mut self, key: &str, request_id: i64) { + self.entries.remove(key); + self.request_index.remove(&request_id); + self.claim_flags.remove(&request_id); + } + + /// Remove a claim flag (called when a worker thread finishes). + pub(super) fn remove_claim(&mut self, request_id: i64) { + self.claim_flags.remove(&request_id); + } + + /// Get a clone of the claim flag for a request. + pub(super) fn get_claim_flag(&self, request_id: i64) -> Option> { + self.claim_flags.get(&request_id).cloned() + } + + /// Cancel a pipelined request. Removes the entry and returns a + /// [`CancelledEntry`] describing what cleanup is needed. The caller + /// must perform the blocking kill/wait/join **after releasing the lock** + /// to avoid holding the mutex during I/O. + pub(super) fn cancel_by_request_id(&mut self, request_id: i64) -> CancelledEntry { + let key = match self.request_index.remove(&request_id) { + Some(k) => k, + None => return CancelledEntry::NotFound, + }; + match self.entries.remove(&key) { + Some(PipelinePhase::PreRegistered { .. }) => CancelledEntry::NoChild, + Some(PipelinePhase::MetadataRunning { bg, .. }) => { + CancelledEntry::OwnedChild(bg) + } + Some(PipelinePhase::FullWaiting { + pid, child_reaped, .. + }) => CancelledEntry::PidOnly(pid, child_reaped), + None => CancelledEntry::NoChild, + } + } + + /// Drain all pipeline entries for shutdown. Returns all entries that + /// have running children so the caller can kill them outside the lock. + pub(super) fn drain_all(&mut self) -> Vec { + let mut result = Vec::new(); + for (_key, entry) in self.entries.drain() { + match entry { + PipelinePhase::PreRegistered { .. } => {} + PipelinePhase::MetadataRunning { bg, .. } => { + result.push(CancelledEntry::OwnedChild(bg)); + } + PipelinePhase::FullWaiting { + pid, child_reaped, .. + } => { + result.push(CancelledEntry::PidOnly(pid, child_reaped)); + } + } + } + self.request_index.clear(); + result + } + + // --- Test accessors --- + + #[cfg(test)] + pub(super) fn entry_count(&self) -> usize { + self.entries.len() + } + + #[cfg(test)] + pub(super) fn has_entry(&self, key: &str) -> bool { + self.entries.contains_key(key) + } + + #[cfg(test)] + pub(super) fn is_empty(&self) -> bool { + self.entries.is_empty() && self.request_index.is_empty() + } + + #[cfg(test)] + pub(super) fn has_request(&self, id: i64) -> bool { + self.request_index.contains_key(&id) + } + + #[cfg(test)] + pub(super) fn has_claim(&self, id: i64) -> bool { + self.claim_flags.contains_key(&id) + } +} + +#[cfg(unix)] +extern "C" { + fn kill(pid: i32, sig: i32) -> i32; +} + +/// Parsed process_wrapper arguments from before the `--` separator. +pub(super) struct ParsedPwArgs { + pub(super) subst: Vec<(String, String)>, + pub(super) env_files: Vec, + pub(super) arg_files: Vec, + pub(super) stable_status_file: Option, + pub(super) volatile_status_file: Option, + pub(super) output_file: Option, +} + +/// Pipeline context for worker-managed pipelining. +/// +/// Two modes: +/// - **Unsandboxed**: uses the real execroot as rustc's CWD. +/// - **Sandboxed**: uses the Bazel-provided `sandbox_dir` as CWD, keeping all +/// reads rooted in the sandbox per the multiplex sandbox contract. +pub(super) struct PipelineContext { + pub(super) root_dir: PathBuf, + /// Directory used as rustc's CWD and for resolving relative paths. + /// Sandboxed: absolute `sandbox_dir`. Unsandboxed: canonicalized real execroot. + pub(super) execroot_dir: PathBuf, + pub(super) outputs_dir: PathBuf, +} + +#[derive(Default)] +pub(super) struct OutputMaterializationStats { + pub(super) files: usize, + pub(super) hardlinked_files: usize, + pub(super) copied_files: usize, +} + +#[derive(Clone, Debug)] +pub(super) struct WorkerStateRoots { + pipeline_root: PathBuf, +} + +impl WorkerStateRoots { + pub(super) fn ensure() -> Result { + let pipeline_root = PathBuf::from("_pw_state/pipeline"); + std::fs::create_dir_all(&pipeline_root).map_err(|e| { + ProcessWrapperError(format!("failed to create worker pipeline root: {e}")) + })?; + Ok(Self { pipeline_root }) + } + + pub(super) fn pipeline_dir(&self, key: &str) -> PathBuf { + self.pipeline_root.join(key) + } +} + +/// Parses pipelining mode from worker request arguments. +/// +/// Pipelining flags live in `rustc_flags` (the @paramfile) so both +/// RustcMetadata and Rustc actions have identical startup args (same worker +/// key). This function checks both direct args and any @paramfile content +/// found after the `--` separator. +pub(super) fn detect_pipelining_mode(args: &[String]) -> PipeliningMode { + // First pass: check direct args (handles the no-paramfile case and is fast). + let (mut is_metadata, mut is_full, mut key) = + scan_pipelining_flags(args.iter().map(String::as_str)); + + // Second pass: if not found yet, read @paramfiles from the rustc args + // (everything after "--"). With always_use_param_file, pipelining flags + // are inside the @paramfile rather than in direct args. + if !is_metadata && !is_full { + let sep_pos = args.iter().position(|a| a == "--"); + let rustc_args = match sep_pos { + Some(pos) => &args[pos + 1..], + None => &[][..], + }; + for arg in rustc_args { + if let Some(path) = arg.strip_prefix('@') { + if let Ok(content) = std::fs::read_to_string(path) { + let (m, f, k) = scan_pipelining_flags(content.lines()); + is_metadata |= m; + is_full |= f; + if k.is_some() { + key = k; + } + if is_metadata || is_full { + break; + } + } + } + } + } + + match (is_metadata, is_full, key) { + (true, _, Some(k)) => PipeliningMode::Metadata { key: k }, + (_, true, Some(k)) => PipeliningMode::Full { key: k }, + _ => PipeliningMode::None, + } +} + +/// Scans an iterator of argument strings for pipelining flags. +/// Returns `(is_metadata, is_full, pipeline_key)`. +pub(super) fn scan_pipelining_flags<'a>( + iter: impl Iterator, +) -> (bool, bool, Option) { + let mut is_metadata = false; + let mut is_full = false; + let mut key: Option = None; + for arg in iter { + if arg == "--pipelining-metadata" { + is_metadata = true; + } else if arg == "--pipelining-full" { + is_full = true; + } else if let Some(k) = arg.strip_prefix("--pipelining-key=") { + key = Some(k.to_string()); + } + } + (is_metadata, is_full, key) +} + +/// Strips pipelining protocol flags from a direct arg list. +/// +/// Used for the full-action fallback path (where pipelining flags may appear +/// in direct args if no @paramfile was used). When flags are in a @paramfile, +/// `options.rs` `prepare_param_file` handles stripping during expansion. +pub(super) fn strip_pipelining_flags(args: &[String]) -> Vec { + args.iter() + .filter(|a| !is_pipelining_flag(a)) + .cloned() + .collect() +} + +/// Move process_wrapper flags that appear after `--` to before it. +/// +/// When worker pipelining is active, per-action flags like `--output-file` +/// are placed in the @paramfile (so all actions share the same WorkerKey). +/// After the worker concatenates startup_args + request.arguments, these +/// flags end up after the `--` separator. Both the subprocess path +/// (`options.rs`) and the pipelining path (`parse_pw_args`) expect them +/// before `--`, so we relocate them here. +pub(super) fn relocate_pw_flags(args: &mut Vec) { + let sep_pos = match args.iter().position(|a| a == "--") { + Some(pos) => pos, + None => return, + }; + + // Collect indices of relocated pw flags (and their values) after --. + let mut to_relocate: Vec = Vec::new(); + let mut remove_indices: Vec = Vec::new(); + let mut i = sep_pos + 1; + while i < args.len() { + if is_relocated_pw_flag(&args[i]) { + remove_indices.push(i); + to_relocate.push(args[i].clone()); + if i + 1 < args.len() { + remove_indices.push(i + 1); + to_relocate.push(args[i + 1].clone()); + i += 2; + } else { + i += 1; + } + } else { + i += 1; + } + } + + if to_relocate.is_empty() { + return; + } + + // Remove from after -- in reverse order to preserve indices. + for &idx in remove_indices.iter().rev() { + args.remove(idx); + } + + // Insert before -- (which may have shifted after removals). + let sep_pos = args.iter().position(|a| a == "--").unwrap_or(0); + for (offset, flag) in to_relocate.into_iter().enumerate() { + args.insert(sep_pos + offset, flag); + } +} + +/// Parses process_wrapper flags from the pre-`--` portion of args. +pub(super) fn parse_pw_args(pw_args: &[String], pwd: &std::path::Path) -> ParsedPwArgs { + let current_dir = pwd.to_string_lossy().into_owned(); + let mut parsed = ParsedPwArgs { + subst: Vec::new(), + env_files: Vec::new(), + arg_files: Vec::new(), + stable_status_file: None, + volatile_status_file: None, + output_file: None, + }; + let mut i = 0; + while i < pw_args.len() { + match pw_args[i].as_str() { + "--subst" => { + if let Some(kv) = pw_args.get(i + 1) { + if let Some((k, v)) = kv.split_once('=') { + let resolved = if v == "${pwd}" { ¤t_dir } else { v }; + parsed.subst.push((k.to_owned(), resolved.to_owned())); + } + i += 1; + } + } + "--env-file" => { + if let Some(path) = pw_args.get(i + 1) { + parsed.env_files.push(path.clone()); + i += 1; + } + } + "--arg-file" => { + if let Some(path) = pw_args.get(i + 1) { + parsed.arg_files.push(path.clone()); + i += 1; + } + } + "--output-file" => { + if let Some(path) = pw_args.get(i + 1) { + parsed.output_file = Some(path.clone()); + i += 1; + } + } + "--stable-status-file" => { + if let Some(path) = pw_args.get(i + 1) { + parsed.stable_status_file = Some(path.clone()); + i += 1; + } + } + "--volatile-status-file" => { + if let Some(path) = pw_args.get(i + 1) { + parsed.volatile_status_file = Some(path.clone()); + i += 1; + } + } + _ => {} + } + i += 1; + } + parsed +} + +/// Builds the environment map: inherit current process + env files + apply substitutions. +pub(super) fn build_rustc_env( + env_files: &[String], + stable_status_file: Option<&str>, + volatile_status_file: Option<&str>, + subst: &[(String, String)], +) -> HashMap { + let mut env: HashMap = std::env::vars().collect(); + for path in env_files { + if let Ok(content) = std::fs::read_to_string(path) { + for line in content.lines() { + if line.is_empty() { + continue; + } + if let Some((k, v)) = line.split_once('=') { + env.insert(k.to_owned(), v.to_owned()); + } + } + } + } + let stable_stamp_mappings: Vec<(String, String)> = stable_status_file + .map(|path| read_stamp_status_to_array(path.to_owned())) + .transpose() + .unwrap_or_default() + .unwrap_or_default(); + let volatile_stamp_mappings: Vec<(String, String)> = volatile_status_file + .map(|path| read_stamp_status_to_array(path.to_owned())) + .transpose() + .unwrap_or_default() + .unwrap_or_default(); + for (k, v) in stable_stamp_mappings + .iter() + .chain(volatile_stamp_mappings.iter()) + { + for val in env.values_mut() { + *val = val.replace(&format!("{{{k}}}"), v); + } + } + for val in env.values_mut() { + crate::util::apply_substitutions(val, subst); + } + env +} + +/// Prepares rustc arguments: expand @paramfiles, apply substitutions, strip +/// pipelining flags, and append args from --arg-file files. +/// +/// Returns `(rustc_args, original_out_dir)` on success. +pub(super) fn prepare_rustc_args( + rustc_and_after: &[String], + pw_args: &ParsedPwArgs, + execroot_dir: &std::path::Path, +) -> Result<(Vec, String), (i32, String)> { + let mut rustc_args = expand_rustc_args(rustc_and_after, &pw_args.subst, execroot_dir); + if rustc_args.is_empty() { + return Err(( + 1, + "pipelining: no rustc arguments after expansion".to_string(), + )); + } + + // Append args from --arg-file files (e.g. build script output: --cfg=..., -L ...). + for path in &pw_args.arg_files { + if let Ok(content) = std::fs::read_to_string(path) { + for line in content.lines() { + if !line.is_empty() { + rustc_args.push(apply_substs(line, &pw_args.subst)); + } + } + } + } + + let original_out_dir = find_out_dir_in_expanded(&rustc_args).unwrap_or_default(); + + Ok((rustc_args, original_out_dir)) +} + +/// Applies `${key}` → `value` substitution mappings to a single argument string. +/// +/// Delegates to [`crate::util::apply_substitutions`], which couples substitution +/// with Windows verbatim path normalization so callers cannot forget it. +pub(super) fn apply_substs(arg: &str, subst: &[(String, String)]) -> String { + let mut a = arg.to_owned(); + crate::util::apply_substitutions(&mut a, subst); + a +} + +/// Builds the rustc argument list from the post-`--` section of process_wrapper +/// args, expanding any @paramfile references inline and stripping pipelining flags. +/// +/// Rustc natively supports @paramfile expansion, but the paramfile may contain +/// pipelining protocol flags (`--pipelining-metadata`, `--pipelining-key=*`) that +/// rustc doesn't understand. By expanding and filtering here we avoid passing +/// unknown flags to rustc. +pub(super) fn expand_rustc_args( + rustc_and_after: &[String], + subst: &[(String, String)], + execroot_dir: &std::path::Path, +) -> Vec { + let mut result = Vec::new(); + for raw in rustc_and_after { + let arg = apply_substs(raw, subst); + if let Some(path) = arg.strip_prefix('@') { + let resolved_path = resolve_relative_to(path, execroot_dir); + match std::fs::read_to_string(&resolved_path) { + Ok(content) => { + for line in content.lines() { + if line.is_empty() { + continue; + } + let line = apply_substs(line, subst); + if !is_pipelining_flag(&line) { + let resolved = crate::options::resolve_external_path(&line); + result.push(resolved.into_owned()); + } + } + } + Err(_) => { + // Can't read the paramfile — pass it through and let rustc error. + if !is_pipelining_flag(&arg) { + result.push(arg); + } + } + } + } else if !is_pipelining_flag(&arg) { + let resolved = crate::options::resolve_external_path(&arg); + result.push(match resolved { + std::borrow::Cow::Borrowed(_) => arg, + std::borrow::Cow::Owned(s) => s, + }); + } + } + result +} + +/// Searches already-expanded rustc args for `--out-dir=`. +pub(super) fn find_out_dir_in_expanded(args: &[String]) -> Option { + for arg in args { + if let Some(dir) = arg.strip_prefix("--out-dir=") { + return Some(dir.to_string()); + } + } + None +} + +/// Returns a copy of `args` where `--out-dir=` is replaced by +/// `--out-dir=`. Other args are unchanged. +pub(super) fn rewrite_out_dir_in_expanded( + args: Vec, + new_out_dir: &std::path::Path, +) -> Vec { + args.into_iter() + .map(|arg| { + if arg.starts_with("--out-dir=") { + format!("--out-dir={}", new_out_dir.display()) + } else { + arg + } + }) + .collect() +} + +/// Rewrites `--emit=metadata=` to write the .rmeta into the pipeline outputs dir. +/// The original relative path's filename is preserved; only the directory changes. +pub(super) fn rewrite_emit_metadata_path( + args: Vec, + outputs_dir: &std::path::Path, +) -> Vec { + args.into_iter() + .map(|arg| { + if let Some(path_str) = arg.strip_prefix("--emit=metadata=") { + let filename = std::path::Path::new(path_str) + .file_name() + .unwrap_or_default() + .to_string_lossy(); + format!( + "--emit=metadata={}", + outputs_dir.join(filename.as_ref()).display() + ) + } else { + arg + } + }) + .collect() +} + +pub(super) fn prepare_expanded_rustc_outputs(args: &[String]) { + for arg in args { + if let Some(dir) = arg.strip_prefix("--out-dir=") { + make_dir_files_writable(dir); + let pipeline_dir = format!("{dir}/_pipeline"); + make_dir_files_writable(&pipeline_dir); + continue; + } + + let Some(emit) = arg.strip_prefix("--emit=") else { + continue; + }; + for part in emit.split(',') { + let Some((_, path)) = part.split_once('=') else { + continue; + }; + make_path_writable(std::path::Path::new(path)); + } + } +} + +/// Creates a pipeline context for worker-managed pipelining. +/// +/// When sandboxed, uses sandbox_dir as rustc's CWD so all reads go through the +/// sandbox (Bazel multiplex sandbox contract compliance). When unsandboxed, uses +/// the real execroot. In both cases, outputs are redirected to a persistent +/// worker-owned directory to prevent inter-request interference. +pub(super) fn create_pipeline_context( + state_roots: &WorkerStateRoots, + key: &str, + request: &WorkRequestContext, +) -> Result { + let root_dir = state_roots.pipeline_dir(key); + + // Create the pipeline root and outputs dir. + // Clear any leftover outputs from a previous failed run for this key. + let outputs_dir = root_dir.join("outputs"); + if let Err(e) = std::fs::remove_dir_all(&outputs_dir) { + if e.kind() != std::io::ErrorKind::NotFound { + return Err(( + 1, + format!("pipelining: failed to clear pipeline outputs dir: {e}"), + )); + } + } + std::fs::create_dir_all(&outputs_dir).map_err(|e| { + ( + 1, + format!("pipelining: failed to create pipeline outputs dir: {e}"), + ) + })?; + let root_dir = std::fs::canonicalize(root_dir).map_err(|e| { + ( + 1, + format!("pipelining: failed to resolve pipeline dir: {e}"), + ) + })?; + let outputs_dir = std::fs::canonicalize(outputs_dir).map_err(|e| { + ( + 1, + format!("pipelining: failed to resolve pipeline outputs dir: {e}"), + ) + })?; + + // Two modes for determining rustc's CWD: + // + // SANDBOXED: Use sandbox_dir directly as CWD. All relative paths in rustc + // args (--extern, -Ldependency, source files) resolve against sandbox_dir + // where Bazel placed the inputs. This satisfies Rule 1 of the multiplex + // sandbox contract ("use sandbox_dir as prefix for all reads and writes"). + // After .rmeta emission, background rustc only writes to --out-dir + // (redirected to persistent pipeline dir), so sandbox cleanup after the + // metadata response doesn't affect it (verified via strace — Gate 0). + // + // UNSANDBOXED: Use the worker's real execroot as CWD. + let execroot_dir = if let Some(sandbox_dir) = request.sandbox_dir.as_deref() { + // Make absolute WITHOUT canonicalizing — canonicalize() follows symlinks + // inside the sandbox back to the real execroot, which defeats the purpose. + // We need the sandbox path itself so rustc reads through sandbox_dir. + let sandbox_path = std::path::Path::new(sandbox_dir); + if sandbox_path.is_absolute() { + sandbox_path.to_path_buf() + } else { + let cwd = std::env::current_dir().map_err(|e| { + (1, format!("pipelining: failed to get worker CWD: {e}")) + })?; + cwd.join(sandbox_path) + } + } else { + let cwd = std::env::current_dir() + .map_err(|e| (1, format!("pipelining: failed to get worker CWD: {e}")))?; + std::fs::canonicalize(cwd).map_err(|e| { + ( + 1, + format!("pipelining: failed to canonicalize worker CWD: {e}"), + ) + })? + }; + + Ok(PipelineContext { + root_dir, + execroot_dir, + outputs_dir, + }) +} + +// --------------------------------------------------------------------------- +// Pipelining handlers +// --------------------------------------------------------------------------- + +/// Handles a `--pipelining-metadata` request (sandboxed or unsandboxed). +/// +/// Starts a full rustc with `--emit=dep-info,metadata,link --json=artifacts`, +/// reads stderr until the `{"artifact":"...rmeta","emit":"metadata"}` JSON +/// notification appears, stores the running Child in PipelineState, and returns +/// success immediately so Bazel can unblock downstream rlib compiles. +/// +/// Two modes: +/// - **Sandboxed**: rustc runs from `sandbox_dir` directly. All relative paths +/// in args resolve against the sandbox where Bazel placed inputs. Compliant +/// with the Bazel multiplex sandbox contract (Rule 1: all reads via sandbox_dir). +/// - **Unsandboxed**: rustc runs from the real execroot. +pub(super) fn handle_pipelining_metadata( + request: &WorkRequestContext, + args: Vec, + key: String, + state_roots: &WorkerStateRoots, + pipeline_state: &Arc>, +) -> (i32, String) { + let filtered = strip_pipelining_flags(&args); + + let sep = filtered.iter().position(|a| a == "--"); + let (pw_raw, rustc_and_after) = match sep { + Some(pos) => (&filtered[..pos], &filtered[pos + 1..]), + None => return (1, "pipelining: no '--' separator in args".to_string()), + }; + if rustc_and_after.is_empty() { + return (1, "pipelining: no rustc executable after '--'".to_string()); + } + + // Note: we intentionally do NOT drain completed entries here. Background rustc + // entries must remain in PipelineState until handle_pipelining_full() takes them, + // even if the child has already exited (fast-compiling crates often finish codegen + // before the full action arrives). Entries are cleaned up by take() in the full + // handler, or persist harmlessly until worker exit for orphaned entries. + + let ctx = match create_pipeline_context(state_roots, &key, request) { + Ok(v) => v, + Err(e) => return e, + }; + + // execroot_dir is already canonicalized (absolute) in both sandboxed and + // unsandboxed modes, so ${pwd} substitution produces correct absolute paths + // for env vars like OUT_DIR=${pwd}/bazel-out/... + let raw_pw_args = parse_pw_args(pw_raw, &ctx.execroot_dir); + let pw_args = ParsedPwArgs { + subst: raw_pw_args.subst, + env_files: raw_pw_args + .env_files + .into_iter() + .map(|path| { + resolve_relative_to(&path, &ctx.execroot_dir) + .display() + .to_string() + }) + .collect(), + arg_files: raw_pw_args + .arg_files + .into_iter() + .map(|path| { + resolve_relative_to(&path, &ctx.execroot_dir) + .display() + .to_string() + }) + .collect(), + stable_status_file: raw_pw_args.stable_status_file.map(|path| { + resolve_relative_to(&path, &ctx.execroot_dir) + .display() + .to_string() + }), + volatile_status_file: raw_pw_args.volatile_status_file.map(|path| { + resolve_relative_to(&path, &ctx.execroot_dir) + .display() + .to_string() + }), + output_file: raw_pw_args.output_file.map(|path| { + let base = request + .sandbox_dir + .as_deref() + .map(std::path::Path::new) + .unwrap_or(ctx.execroot_dir.as_path()); + resolve_relative_to(&path, base).display().to_string() + }), + }; + let env = build_rustc_env( + &pw_args.env_files, + pw_args.stable_status_file.as_deref(), + pw_args.volatile_status_file.as_deref(), + &pw_args.subst, + ); + + let (rustc_args, original_out_dir) = + match prepare_rustc_args(rustc_and_after, &pw_args, &ctx.execroot_dir) { + Ok(v) => v, + Err(e) => return e, + }; + + // Redirect --out-dir to our persistent directory so rustc writes all outputs + // (.rlib, .d) there instead of the Bazel-managed out-dir. + let rustc_args = rewrite_out_dir_in_expanded(rustc_args, &ctx.outputs_dir); + // Also redirect --emit=metadata= to the outputs dir so the .rmeta is + // written alongside other outputs in the persistent pipeline dir, not in the + // real execroot where it could conflict with concurrent builds. + let rustc_args = rewrite_emit_metadata_path(rustc_args, &ctx.outputs_dir); + prepare_expanded_rustc_outputs(&rustc_args); + append_pipeline_log( + &ctx.root_dir, + &format!( + "metadata start request_id={} key={} sandbox_dir={:?} inputs={} original_out_dir={} execroot={} outputs={}", + request.request_id, + key, + request.sandbox_dir, + request.inputs.len(), + original_out_dir, + ctx.execroot_dir.display(), + ctx.outputs_dir.display(), + ), + ); + // On Windows, rustc's internal search-path buffer is limited to ~32K characters. + // Consolidate all -Ldependency dirs into one directory with hardlinks, then + // write all args to a response file to also avoid CreateProcessW limits. + #[cfg(windows)] + let _consolidated_dir_guard: Option; + #[cfg(windows)] + let mut rustc_args = rustc_args; + #[cfg(windows)] + { + let unified_dir = ctx.root_dir.join("deps"); + let _ = std::fs::remove_dir_all(&unified_dir); + if let Err(e) = std::fs::create_dir_all(&unified_dir) { + return ( + 1, + format!("pipelining: failed to create deps dir: {e}"), + ); + } + + let dep_dirs: Vec = rustc_args + .iter() + .filter_map(|a| a.strip_prefix("-Ldependency=").map(PathBuf::from)) + .collect(); + crate::util::consolidate_deps_into(&dep_dirs, &unified_dir); + rustc_args.retain(|a| !a.starts_with("-Ldependency=")); + rustc_args.push(format!("-Ldependency={}", unified_dir.display())); + _consolidated_dir_guard = Some(unified_dir); + } + + // Spawn rustc with the prepared env and args. + // On Windows, write args to a response file to avoid CreateProcessW length limits. + let mut cmd = Command::new(&rustc_args[0]); + #[cfg(windows)] + { + let response_file_path = ctx.root_dir.join("metadata_rustc.args"); + let content = rustc_args[1..].join("\n"); + if let Err(e) = std::fs::write(&response_file_path, &content) { + return ( + 1, + format!("pipelining: failed to write response file: {e}"), + ); + } + cmd.arg(format!("@{}", response_file_path.display())); + } + #[cfg(not(windows))] + { + cmd.args(&rustc_args[1..]); + } + cmd.env_clear() + .envs(&env) + .stdout(Stdio::null()) + .stderr(Stdio::piped()) + .current_dir(&ctx.execroot_dir); + let mut child = match cmd.spawn() { + Ok(c) => c, + Err(e) => return (1, format!("pipelining: failed to spawn rustc: {e}")), + }; + + let stderr = child.stderr.take().expect("stderr was piped"); + let mut reader = BufReader::new(stderr); + let mut diagnostics = String::new(); + + loop { + let mut line = String::new(); + match reader.read_line(&mut line) { + Ok(0) => break, + Err(_) => break, + Ok(_) => {} + } + let trimmed = line.trim_end_matches('\n').trim_end_matches('\r'); + + if let Some(rmeta_path_str) = extract_rmeta_path(trimmed) { + // Resolve the rmeta path relative to rustc's CWD (ctx.execroot_dir) + // to get an absolute path, since the worker process has a different CWD. + let rmeta_resolved = resolve_relative_to(&rmeta_path_str, &ctx.execroot_dir); + let rmeta_resolved_str = rmeta_resolved.display().to_string(); + append_pipeline_log( + &ctx.root_dir, + &format!("metadata rmeta ready: {}", rmeta_resolved_str), + ); + // Copy .rmeta to the declared output location (_pipeline/ subdirectory). + match request.sandbox_dir.as_ref() { + Some(dir) => { + copy_output_to_sandbox( + &rmeta_resolved_str, + dir, + &original_out_dir, + "_pipeline", + ); + } + None => { + let rmeta_src = &rmeta_resolved; + if let Some(filename) = rmeta_src.file_name() { + let dest_pipeline = + std::path::Path::new(&original_out_dir).join("_pipeline"); + let _ = std::fs::create_dir_all(&dest_pipeline); + let dest = dest_pipeline.join(filename); + // Skip copy if source and dest resolve to the same file. + let same_file = rmeta_src + .canonicalize() + .ok() + .zip(dest.canonicalize().ok()) + .is_some_and(|(a, b)| a == b); + if !same_file { + let _ = std::fs::copy(rmeta_src, &dest); + } + } + } + } + // .rmeta is ready! Spawn a drain thread to prevent pipe buffer deadlock. + let drain = thread::spawn(move || { + let mut remaining = String::new(); + let mut buf = String::new(); + while reader.read_line(&mut buf).unwrap_or(0) > 0 { + let l = buf.trim_end_matches('\n').trim_end_matches('\r'); + if let Ok(json) = l.parse::() { + if let Some(rendered) = extract_rendered_diagnostic(&json) { + remaining.push_str(&rendered); + remaining.push('\n'); + } + } + buf.clear(); + } + remaining + }); + + let diagnostics_before = diagnostics.clone(); + let orphan = lock_or_recover(pipeline_state).store( + &key, + BackgroundRustc { + child, + diagnostics_before, + stderr_drain: drain, + pipeline_root_dir: ctx.root_dir.clone(), + pipeline_output_dir: ctx.outputs_dir.clone(), + original_out_dir, + }, + ); + // Kill orphaned background rustc outside the lock. + if let Some(mut orphan) = orphan { + let _ = orphan.child.kill(); + let _ = orphan.child.wait(); + let _ = orphan.stderr_drain.join(); + } + append_pipeline_log(&ctx.root_dir, &format!("metadata stored key={}", key)); + if let Some(ref path) = pw_args.output_file { + let _ = std::fs::write(path, &diagnostics); + } + return (0, diagnostics); + } + + if let Ok(json) = trimmed.parse::() { + if let Some(rendered) = extract_rendered_diagnostic(&json) { + diagnostics.push_str(&rendered); + diagnostics.push('\n'); + } + } + } + + // EOF: rustc exited before emitting the metadata artifact (compilation error). + let exit_code = child.wait().ok().and_then(|s| s.code()).unwrap_or(1); + maybe_cleanup_pipeline_dir( + &ctx.root_dir, + true, + "metadata rustc exited before emitting rmeta", + ); + if let Some(ref path) = pw_args.output_file { + let _ = std::fs::write(path, &diagnostics); + } + (exit_code, diagnostics) +} + +/// Handles a `--pipelining-full` request (sandboxed or unsandboxed). +/// +/// Looks up the background rustc by pipeline key. If found, waits for it to +/// finish and copies outputs to the correct location. If not found (worker was +/// restarted), falls back to running rustc normally as a one-shot compilation. +pub(super) fn handle_pipelining_full( + request: &WorkRequestContext, + args: Vec, + key: String, + pipeline_state: &Arc>, + self_path: &std::path::Path, +) -> (i32, String) { + let taken = lock_or_recover(pipeline_state).take_for_full(&key, request.request_id); + + match taken { + Some((mut bg, child_reaped)) => { + append_pipeline_log(&bg.pipeline_root_dir, &format!("full start key={}", key)); + // Join the drain thread first (avoids deadlock: child blocks on stderr + // write if the pipe buffer fills up before we drain it). + let remaining = bg.stderr_drain.join().unwrap_or_default(); + let all_diagnostics = bg.diagnostics_before + &remaining; + + let wait_result = bg.child.wait(); + // Mark the child as reaped immediately so the cancel handler + // won't send SIGKILL to a potentially-recycled PID. + child_reaped.store(true, Ordering::SeqCst); + + match wait_result { + Ok(status) => { + let exit_code = status.code().unwrap_or(1); + if exit_code == 0 { + // Copy all outputs from the persistent pipeline dir. + match request.sandbox_dir.as_ref() { + Some(dir) => { + copy_all_outputs_to_sandbox( + &bg.pipeline_output_dir, + dir, + &bg.original_out_dir, + ); + } + None => { + let dest_dir = std::path::Path::new(&bg.original_out_dir); + let _ = std::fs::create_dir_all(dest_dir); + if let Ok(entries) = std::fs::read_dir(&bg.pipeline_output_dir) { + for entry in entries.flatten() { + if let Ok(meta) = entry.metadata() { + if meta.is_file() { + let dest = dest_dir.join(entry.file_name()); + let same_file = entry + .path() + .canonicalize() + .ok() + .zip(dest.canonicalize().ok()) + .is_some_and(|(a, b)| a == b); + if !same_file { + let _ = std::fs::copy(entry.path(), &dest); + } + } + } + } + } + } + } + } + append_pipeline_log( + &bg.pipeline_root_dir, + &format!("full done key={} exit_code={}", key, exit_code), + ); + maybe_cleanup_pipeline_dir( + &bg.pipeline_root_dir, + exit_code != 0, + "full action failed", + ); + lock_or_recover(pipeline_state).cleanup(&key, request.request_id); + (exit_code, all_diagnostics) + } + Err(e) => { + lock_or_recover(pipeline_state).cleanup(&key, request.request_id); + (1, format!("failed to wait for background rustc: {e}")) + } + } + } + None => { + let worker_state_root = std::env::current_dir() + .ok() + .map(|cwd| cwd.join("_pw_state").join("fallback.log")); + if let Some(path) = worker_state_root { + if let Ok(mut file) = std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(path) + { + let _ = writeln!( + file, + "full missing bg request_id={} key={} sandbox_dir={:?}", + request.request_id, key, request.sandbox_dir + ); + } + } + // No cached process found (worker was restarted between the metadata + // and full actions, or metadata was a cache hit). Fall back to a normal + // one-shot compilation. + let filtered_args = strip_pipelining_flags(&args); + let result = match request.sandbox_dir.as_ref() { + Some(dir) => run_sandboxed_request(self_path, filtered_args, dir) + .unwrap_or_else(|e| (1, format!("pipelining fallback error: {e}"))), + None => { + prepare_outputs(&filtered_args); + run_request(self_path, filtered_args) + .unwrap_or_else(|e| (1, format!("pipelining fallback error: {e}"))) + } + }; + lock_or_recover(pipeline_state).cleanup(&key, request.request_id); + result + } + } +} + +/// Kills the background rustc process associated with a cancelled request. +/// +/// Uses `PipelineState::cancel_by_request_id` to remove the entry under the +/// lock, then performs blocking kill/wait/join **after** releasing the lock +/// to avoid holding the mutex during I/O. +pub(super) fn kill_pipelined_request(pipeline_state: &Arc>, request_id: i64) { + // Remove the entry under the lock (fast, O(1) HashMap ops). + let cancelled = lock_or_recover(pipeline_state).cancel_by_request_id(request_id); + // Blocking kill/wait/join happens here, outside the lock. + let killed = cancelled.kill(); + if killed { + append_worker_lifecycle_log(&format!( + "pid={} event=cancel_kill request_id={}", + current_pid(), + request_id, + )); + } +} + +/// Extracts the artifact path from an rmeta artifact notification JSON line. +/// Returns `Some(path)` for `{"artifact":"path/to/lib.rmeta","emit":"metadata"}`, +/// `None` for all other lines. +pub(super) fn extract_rmeta_path(line: &str) -> Option { + if let Ok(JsonValue::Object(ref map)) = line.parse::() { + if let (Some(JsonValue::String(artifact)), Some(JsonValue::String(emit))) = + (map.get("artifact"), map.get("emit")) + { + if artifact.ends_with(".rmeta") && emit == "metadata" { + return Some(artifact.clone()); + } + } + } + None +} + +/// Extracts the `"rendered"` field from a rustc JSON diagnostic message. +pub(super) fn extract_rendered_diagnostic(json: &JsonValue) -> Option { + if let JsonValue::Object(ref map) = json { + if let Some(JsonValue::String(rendered)) = map.get("rendered") { + return Some(rendered.clone()); + } + } + None +} + +pub(super) fn append_pipeline_log(pipeline_root: &std::path::Path, message: &str) { + let path = pipeline_root.join("pipeline.log"); + let mut file = match std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(path) + { + Ok(file) => file, + Err(_) => return, + }; + let _ = writeln!(file, "{message}"); +} + +pub(super) fn maybe_cleanup_pipeline_dir( + pipeline_root: &std::path::Path, + keep: bool, + reason: &str, +) { + if keep { + append_pipeline_log( + pipeline_root, + &format!("preserving pipeline dir for inspection: {reason}"), + ); + return; + } + + if let Err(err) = std::fs::remove_dir_all(pipeline_root) { + append_pipeline_log( + pipeline_root, + &format!("failed to remove pipeline dir during cleanup: {err}"), + ); + } +} diff --git a/util/process_wrapper/worker_protocol.rs b/util/process_wrapper/worker_protocol.rs new file mode 100644 index 0000000000..4dc6c41208 --- /dev/null +++ b/util/process_wrapper/worker_protocol.rs @@ -0,0 +1,223 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! JSON worker protocol types and helpers. + +use tinyjson::JsonValue; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub(super) struct WorkRequestInput { + pub(super) path: String, + pub(super) digest: Option, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub(super) struct WorkRequestContext { + pub(super) request_id: i64, + pub(super) arguments: Vec, + pub(super) sandbox_dir: Option, + pub(super) inputs: Vec, + pub(super) cancel: bool, +} + +impl WorkRequestContext { + pub(super) fn from_json(request: &JsonValue) -> Result { + Ok(Self { + request_id: extract_request_id(request), + arguments: extract_arguments(request), + sandbox_dir: extract_sandbox_dir(request)?, + inputs: extract_inputs(request), + cancel: extract_cancel(request), + }) + } +} + +pub(super) fn extract_request_id_from_raw_line(line: &str) -> Option { + let key_pos = line.find("\"requestId\"")?; + let after_key = &line[key_pos + "\"requestId\"".len()..]; + let colon = after_key.find(':')?; + let after_colon = after_key[colon + 1..].trim_start(); + let digits: String = after_colon + .chars() + .take_while(|ch| ch.is_ascii_digit()) + .collect(); + if digits.is_empty() { + None + } else { + digits.parse().ok() + } +} + +/// Extracts the `requestId` field from a WorkRequest (defaults to 0). +pub(super) fn extract_request_id(request: &JsonValue) -> i64 { + if let JsonValue::Object(map) = request { + if let Some(JsonValue::Number(id)) = map.get("requestId") { + return *id as i64; + } + } + 0 +} + +/// Extracts the `arguments` array from a WorkRequest. +pub(super) fn extract_arguments(request: &JsonValue) -> Vec { + if let JsonValue::Object(map) = request { + if let Some(JsonValue::Array(args)) = map.get("arguments") { + return args + .iter() + .filter_map(|v| { + if let JsonValue::String(s) = v { + Some(s.clone()) + } else { + None + } + }) + .collect(); + } + } + vec![] +} + +/// Extracts the `sandboxDir` field from a WorkRequest. +/// +/// Returns `Ok(Some(dir))` if a usable sandbox directory is provided, +/// `Ok(None)` if the field is absent, or `Err` if a `sandboxDir` was provided +/// but the directory does not exist or is empty (unpopulated). +/// +/// The error case indicates a misconfiguration: `--experimental_worker_multiplex_sandboxing` +/// is enabled but the platform has no sandbox support (e.g. Windows). Rather than +/// silently falling back — which would cause subtle pipelining failures when +/// rustc's CWD is set to an empty sandbox — we surface a clear error directing +/// the user to fix their Bazel configuration. +pub(super) fn extract_sandbox_dir(request: &JsonValue) -> Result, String> { + if let JsonValue::Object(map) = request { + if let Some(JsonValue::String(dir)) = map.get("sandboxDir") { + if dir.is_empty() { + return Ok(None); + } + if sandbox_dir_is_usable(dir) { + return Ok(Some(dir.clone())); + } + return Err(format!( + "Bazel sent sandboxDir=\"{}\" but the directory {}. \ + This typically means --experimental_worker_multiplex_sandboxing is enabled \ + on a platform without sandbox support (e.g. Windows). \ + Remove this flag or make it platform-specific \ + (e.g. build:linux --experimental_worker_multiplex_sandboxing).", + dir, + if std::path::Path::new(dir).exists() { + "is empty (no symlinks to execroot)" + } else { + "does not exist" + }, + )); + } + } + Ok(None) +} + +/// A sandbox directory is usable if it exists and contains at least one entry. +/// +/// On platforms with real sandbox support (Linux), Bazel populates the directory +/// with symlinks into the real execroot before sending the WorkRequest. On +/// Windows, the directory may be created but left empty because there is no +/// sandboxing implementation — an empty directory is not a usable sandbox. +fn sandbox_dir_is_usable(dir: &str) -> bool { + match std::fs::read_dir(dir) { + Ok(mut entries) => entries.next().is_some(), + Err(_) => false, + } +} + +/// Extracts the `inputs` array from a WorkRequest. +pub(super) fn extract_inputs(request: &JsonValue) -> Vec { + let mut result = Vec::new(); + let JsonValue::Object(map) = request else { + return result; + }; + let Some(JsonValue::Array(inputs)) = map.get("inputs") else { + return result; + }; + + for input in inputs { + let JsonValue::Object(obj) = input else { + continue; + }; + + let path = obj.get("path").and_then(|value| match value { + JsonValue::String(path) => Some(path.clone()), + _ => None, + }); + let digest = obj.get("digest").and_then(|value| match value { + JsonValue::String(digest) => Some(digest.clone()), + _ => None, + }); + + if let Some(path) = path { + result.push(WorkRequestInput { path, digest }); + } + } + + result +} + +/// Extracts the `cancel` field from a WorkRequest (false if absent). +pub(super) fn extract_cancel(request: &JsonValue) -> bool { + if let JsonValue::Object(map) = request { + if let Some(JsonValue::Boolean(cancel)) = map.get("cancel") { + return *cancel; + } + } + false +} + +/// Builds a JSON WorkResponse string. +pub(super) fn build_response(exit_code: i32, output: &str, request_id: i64) -> String { + let output = sanitize_response_output(output); + format!( + "{{\"exitCode\":{},\"output\":{},\"requestId\":{}}}", + exit_code, + json_string_literal(&output), + request_id + ) +} + +/// Builds a JSON WorkResponse with `wasCancelled: true`. +pub(super) fn build_cancel_response(request_id: i64) -> String { + format!( + "{{\"exitCode\":0,\"output\":{},\"requestId\":{},\"wasCancelled\":true}}", + json_string_literal(""), + request_id + ) +} + +pub(super) fn build_shutdown_response(request_id: i64) -> String { + build_response(1, "worker shutting down", request_id) +} + +pub(super) fn sanitize_response_output(output: &str) -> String { + output + .chars() + .map(|ch| match ch { + '\n' | '\r' | '\t' => ch, + ch if ch.is_control() => ' ', + ch => ch, + }) + .collect() +} + +pub(super) fn json_string_literal(value: &str) -> String { + JsonValue::String(value.to_owned()) + .stringify() + .unwrap_or_else(|_| "\"\"".to_string()) +} diff --git a/util/process_wrapper/worker_sandbox.rs b/util/process_wrapper/worker_sandbox.rs new file mode 100644 index 0000000000..2e6505b79f --- /dev/null +++ b/util/process_wrapper/worker_sandbox.rs @@ -0,0 +1,389 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Sandbox helpers for the persistent worker. + +use std::path::PathBuf; +use std::process::{Command, Stdio}; + +use super::pipeline::OutputMaterializationStats; +use crate::ProcessWrapperError; + + +pub(super) fn resolve_relative_to(path: &str, base_dir: &std::path::Path) -> PathBuf { + let path = std::path::Path::new(path); + if path.is_absolute() { + path.to_path_buf() + } else { + base_dir.join(path) + } +} + +pub(super) fn materialize_output_file( + src: &std::path::Path, + dest: &std::path::Path, +) -> Result { + if let Some(parent) = dest.parent() { + std::fs::create_dir_all(parent)?; + } + + // Skip if src and dest resolve to the same file (e.g., when rustc writes + // directly into the sandbox via --emit=metadata= and the + // copy destination is the same location). Removing dest would delete src. + if let (Ok(a), Ok(b)) = (src.canonicalize(), dest.canonicalize()) { + if a == b { + return Ok(false); + } + } + + if dest.exists() { + std::fs::remove_file(dest)?; + } + + match std::fs::hard_link(src, dest) { + Ok(()) => Ok(true), + Err(link_err) => match std::fs::copy(src, dest) { + Ok(_) => Ok(false), + Err(copy_err) => Err(std::io::Error::new( + copy_err.kind(), + format!( + "failed to materialize {} at {} via hardlink ({link_err}) or copy ({copy_err})", + src.display(), + dest.display(), + ), + )), + }, + } +} + +#[cfg(unix)] +pub(super) fn symlink_path( + src: &std::path::Path, + dest: &std::path::Path, + _is_dir: bool, +) -> Result<(), std::io::Error> { + std::os::unix::fs::symlink(src, dest) +} + +#[cfg(windows)] +pub(super) fn symlink_path( + src: &std::path::Path, + dest: &std::path::Path, + is_dir: bool, +) -> Result<(), std::io::Error> { + if is_dir { + std::os::windows::fs::symlink_dir(src, dest) + } else { + std::os::windows::fs::symlink_file(src, dest) + } +} + +pub(super) fn seed_sandbox_cache_root( + sandbox_dir: &std::path::Path, +) -> Result<(), ProcessWrapperError> { + let dest = sandbox_dir.join("cache"); + if dest.exists() { + return Ok(()); + } + + let entries = std::fs::read_dir(sandbox_dir).map_err(|e| { + ProcessWrapperError(format!( + "failed to read request sandbox for cache seeding: {e}" + )) + })?; + + for entry in entries { + let entry = entry.map_err(|e| { + ProcessWrapperError(format!("failed to enumerate request sandbox entry: {e}")) + })?; + let source = entry.path(); + let Ok(resolved) = source.canonicalize() else { + continue; + }; + + let mut cache_root = None; + for ancestor in resolved.ancestors() { + if ancestor.file_name().is_some_and(|name| name == "cache") { + cache_root = Some(ancestor.to_path_buf()); + break; + } + } + + let Some(cache_root) = cache_root else { + continue; + }; + return symlink_path(&cache_root, &dest, true).map_err(|e| { + ProcessWrapperError(format!( + "failed to seed request sandbox cache root {} -> {}: {e}", + cache_root.display(), + dest.display(), + )) + }); + } + + Ok(()) +} + +/// Copies the file at `src` into `///`. +/// +/// Used after the metadata action to make the `.rmeta` file visible to Bazel +/// inside the sandbox before the sandbox is cleaned up. +pub(super) fn copy_output_to_sandbox( + src: &str, + sandbox_dir: &str, + original_out_dir: &str, + dest_subdir: &str, +) -> OutputMaterializationStats { + let mut stats = OutputMaterializationStats::default(); + let src_path = std::path::Path::new(src); + let filename = match src_path.file_name() { + Some(n) => n, + None => return stats, + }; + let dest_dir = std::path::Path::new(sandbox_dir) + .join(original_out_dir) + .join(dest_subdir); + if let Ok(hardlinked) = materialize_output_file(src_path, &dest_dir.join(filename)) { + stats.files = 1; + if hardlinked { + stats.hardlinked_files = 1; + } else { + stats.copied_files = 1; + } + } + stats +} + +/// Copies all regular files from `pipeline_dir` into `//`. +/// +/// Used by the full action to move the `.rlib` (and `.d`, etc.) from the +/// persistent directory into the sandbox before responding to Bazel. +pub(super) fn copy_all_outputs_to_sandbox( + pipeline_dir: &PathBuf, + sandbox_dir: &str, + original_out_dir: &str, +) -> OutputMaterializationStats { + let dest_dir = std::path::Path::new(sandbox_dir).join(original_out_dir); + let mut stats = OutputMaterializationStats::default(); + if let Ok(entries) = std::fs::read_dir(pipeline_dir) { + for entry in entries.flatten() { + if let Ok(meta) = entry.metadata() { + if meta.is_file() { + if let Ok(hardlinked) = + materialize_output_file(&entry.path(), &dest_dir.join(entry.file_name())) + { + stats.files += 1; + if hardlinked { + stats.hardlinked_files += 1; + } else { + stats.copied_files += 1; + } + } + } + } + } + } + stats +} + +/// Like `run_request` but sets `current_dir(sandbox_dir)` on the subprocess. +/// +/// When Bazel provides a `sandboxDir`, setting the subprocess CWD to it makes +/// all relative paths in arguments resolve correctly within the sandbox. +pub(super) fn run_sandboxed_request( + self_path: &std::path::Path, + arguments: Vec, + sandbox_dir: &str, +) -> Result<(i32, String), ProcessWrapperError> { + let _ = seed_sandbox_cache_root(std::path::Path::new(sandbox_dir)); + let output = Command::new(self_path) + .args(&arguments) + .current_dir(sandbox_dir) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .output() + .map_err(|e| ProcessWrapperError(format!("failed to spawn sandboxed subprocess: {e}")))?; + + let exit_code = output.status.code().unwrap_or(1); + let mut combined = String::from_utf8_lossy(&output.stdout).into_owned(); + combined.push_str(&String::from_utf8_lossy(&output.stderr)); + Ok((exit_code, combined)) +} + +/// Resolves `path` relative to `sandbox_dir` if it is not absolute. +pub(super) fn resolve_sandbox_path(path: &str, sandbox_dir: &str) -> String { + let p = std::path::Path::new(path); + if p.is_absolute() { + path.to_string() + } else { + std::path::Path::new(sandbox_dir) + .join(p) + .to_string_lossy() + .into_owned() + } +} + +/// Ensures output files in rustc's `--out-dir` are writable before each request. +/// +/// Workers run in execroot without sandboxing. Bazel marks action outputs +/// read-only after each successful action, and the disk cache hardlinks them +/// as read-only. With pipelined compilation, two separate actions (RustcMetadata +/// and Rustc) both write to the same `.rmeta` path. After the first succeeds, +/// Bazel makes its output read-only; the second worker request then fails with +/// "output file ... is not writeable". +/// +/// This function scans `args` for `--out-dir=` — both inline and inside any +/// `--arg-file ` (process_wrapper's own arg-file mechanism) or `@flagfile` +/// (Bazel's param file convention) — and makes all regular files in those +/// directories writable. +pub(super) fn prepare_outputs(args: &[String]) { + let mut out_dirs: Vec = Vec::new(); + + let mut i = 0; + while i < args.len() { + let arg = &args[i]; + if let Some(dir) = arg.strip_prefix("--out-dir=") { + out_dirs.push(dir.to_string()); + } else if let Some(flagfile_path) = arg.strip_prefix('@') { + // Bazel @flagfile: one arg per line. + scan_file_for_out_dir(flagfile_path, None, &mut out_dirs); + } else if arg == "--arg-file" { + // process_wrapper's --arg-file : reads child (rustc) args from file. + if let Some(path) = args.get(i + 1) { + scan_file_for_out_dir(path, None, &mut out_dirs); + i += 1; // skip the path argument + } + } + i += 1; + } + + for out_dir in out_dirs { + make_dir_files_writable(&out_dir); + // Also make writable any _pipeline/ subdir (worker-pipelining .rmeta files + // from previous runs may be read-only after Bazel marks outputs immutable). + let pipeline_dir = format!("{out_dir}/_pipeline"); + make_dir_files_writable(&pipeline_dir); + } +} + +/// Like `prepare_outputs` but resolves relative `--out-dir` paths against +/// `sandbox_dir` before making files writable. +pub(super) fn prepare_outputs_sandboxed(args: &[String], sandbox_dir: &str) { + let mut out_dirs: Vec = Vec::new(); + + let mut i = 0; + while i < args.len() { + let arg = &args[i]; + if let Some(dir) = arg.strip_prefix("--out-dir=") { + out_dirs.push(resolve_sandbox_path(dir, sandbox_dir)); + } else if let Some(flagfile_path) = arg.strip_prefix('@') { + scan_file_for_out_dir(flagfile_path, Some(sandbox_dir), &mut out_dirs); + } else if arg == "--arg-file" { + if let Some(path) = args.get(i + 1) { + scan_file_for_out_dir(path, Some(sandbox_dir), &mut out_dirs); + i += 1; + } + } + i += 1; + } + + for out_dir in out_dirs { + make_dir_files_writable(&out_dir); + let pipeline_dir = format!("{out_dir}/_pipeline"); + make_dir_files_writable(&pipeline_dir); + } +} + +/// Reads `path` line-by-line, collecting any `--out-dir=` values. +/// When `sandbox_dir` is `Some`, resolves found paths against it. +pub(super) fn scan_file_for_out_dir( + path: &str, + sandbox_dir: Option<&str>, + out_dirs: &mut Vec, +) { + let Ok(content) = std::fs::read_to_string(path) else { + return; + }; + for line in content.lines() { + if let Some(dir) = line.strip_prefix("--out-dir=") { + match sandbox_dir { + Some(sd) => out_dirs.push(resolve_sandbox_path(dir, sd)), + None => out_dirs.push(dir.to_string()), + } + } + } +} + +/// Makes all regular files in `dir` writable (removes read-only bit). +pub(super) fn make_dir_files_writable(dir: &str) { + let Ok(entries) = std::fs::read_dir(dir) else { + return; + }; + for entry in entries.flatten() { + if let Ok(meta) = entry.metadata() { + if meta.is_file() { + let mut perms = meta.permissions(); + if perms.readonly() { + perms.set_readonly(false); + let _ = std::fs::set_permissions(entry.path(), perms); + } + } + } + } +} + +pub(super) fn make_path_writable(path: &std::path::Path) { + let Ok(meta) = std::fs::metadata(path) else { + return; + }; + if !meta.is_file() { + return; + } + + let mut perms = meta.permissions(); + if perms.readonly() { + perms.set_readonly(false); + let _ = std::fs::set_permissions(path, perms); + } +} + +/// Executes a single WorkRequest by spawning process_wrapper with the given +/// arguments. Returns (exit_code, combined_output). +/// +/// The spawned process runs with the worker's environment and working directory +/// (Bazel's execroot), so incremental compilation caches see stable paths. +pub(super) fn run_request( + self_path: &std::path::Path, + arguments: Vec, +) -> Result<(i32, String), ProcessWrapperError> { + let output = Command::new(self_path) + .args(&arguments) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .output() + .map_err(|e| { + ProcessWrapperError(format!("failed to spawn process_wrapper subprocess: {e}")) + })?; + + let exit_code = output.status.code().unwrap_or(1); + + // Combine stdout and stderr for the WorkResponse output field. + // process_wrapper normally writes rustc diagnostics to its stderr, + // so this captures compilation errors/warnings for display in Bazel. + let mut combined = String::from_utf8_lossy(&output.stdout).into_owned(); + combined.push_str(&String::from_utf8_lossy(&output.stderr)); + + Ok((exit_code, combined)) +} +