diff --git a/cargo/private/cargo_build_script_runner/bin.rs b/cargo/private/cargo_build_script_runner/bin.rs index 27f5848edc..8060102776 100644 --- a/cargo/private/cargo_build_script_runner/bin.rs +++ b/cargo/private/cargo_build_script_runner/bin.rs @@ -75,6 +75,16 @@ fn run_buildrs() -> Result<(), String> { let file_name = path .file_name() .ok_or_else(|| "Failed while getting file name".to_string())?; + + // Skip worker infrastructure directories — these are internal to + // rules_rust or Bazel and should never appear in CARGO_MANIFEST_DIR. + let name = file_name.to_string_lossy(); + if name.starts_with("local-spawn-runner.") + || name.starts_with("_pw_state") + { + continue; + } + let link = manifest_dir.join(file_name); symlink_if_not_exists(&path, &link) diff --git a/crate_universe/src/metadata/cargo_bin.rs b/crate_universe/src/metadata/cargo_bin.rs index c0b0cb17c5..c1e93f86f7 100644 --- a/crate_universe/src/metadata/cargo_bin.rs +++ b/crate_universe/src/metadata/cargo_bin.rs @@ -76,7 +76,10 @@ impl Cargo { /// Returns the output of running `cargo version`, trimming any leading or trailing whitespace. /// This function performs normalisation to work around `` pub(crate) fn full_version(&self) -> Result { - let mut full_version = self.full_version.lock().unwrap(); + let mut full_version = self + .full_version + .lock() + .expect("cargo full_version mutex poisoned"); if full_version.is_none() { let observed_version = Digest::bin_version(&self.path)?; *full_version = Some(observed_version); diff --git a/rust/private/clippy.bzl b/rust/private/clippy.bzl index 318c05af8d..58a82b600f 100644 --- a/rust/private/clippy.bzl +++ b/rust/private/clippy.bzl @@ -165,7 +165,7 @@ def rust_clippy_action(ctx, clippy_executable, process_wrapper, crate_info, conf attr = ctx.rule.attr, file = ctx.file, toolchain = toolchain, - tool_path = clippy_executable.path, + tool_path = clippy_executable, cc_toolchain = cc_toolchain, feature_configuration = feature_configuration, crate_info = crate_info, diff --git a/rust/private/incremental.bzl b/rust/private/incremental.bzl new file mode 100644 index 0000000000..ada2fd0f6a --- /dev/null +++ b/rust/private/incremental.bzl @@ -0,0 +1,76 @@ +"""A module defining Rust incremental compilation support""" + +load("@bazel_skylib//rules:common_settings.bzl", "BuildSettingInfo") +load("//rust/private:utils.bzl", "is_exec_configuration") + +def _is_incremental_enabled(ctx, crate_info): + """Returns True if incremental compilation is enabled for this target. + + Args: + ctx (ctx): The calling rule's context object. + crate_info (CrateInfo): The CrateInfo provider of the target crate. + + Returns: + bool: True if incremental compilation is enabled. + """ + if not hasattr(ctx.attr, "_incremental"): + return False + if is_exec_configuration(ctx): + return False + if not ctx.attr._incremental[BuildSettingInfo].value: + return False + if crate_info.type == "proc-macro": + return False + + # Don't enable incremental for external/third-party crates, mirroring cargo's + # behavior. External crates rarely change, so incremental saves little; more + # importantly, the disk cache hardlinks their outputs as read-only, and running + # without sandboxing (which worker/no-sandbox requires) would cause rustc to + # fail trying to overwrite those read-only hardlinks. + if ctx.label.workspace_name: + return False + return True + +def construct_incremental_arguments(ctx, crate_info, is_metadata = False): + """Returns a list of 'rustc' flags to configure incremental compilation. + + Args: + ctx (ctx): The calling rule's context object. + crate_info (CrateInfo): The CrateInfo provider of the target crate. + is_metadata (bool): True when building a RustcMetadata (--emit=metadata only) action. + + Returns: + list: A list of strings that are valid flags for 'rustc'. + """ + if not _is_incremental_enabled(ctx, crate_info): + return [] + + # Use a separate cache directory for metadata-only (RustcMetadata) actions. + # Both RustcMetadata(A) and Rustc(A) compile the same crate, so they produce + # the same SVH — but sharing the same incremental path causes a rustc ICE + # ("no entry found for key") because the metadata-only session state is + # incompatible with a full-compilation session. Using distinct paths lets + # both actions benefit from incremental caching without interfering. + suffix = "-meta" if is_metadata else "" + cache_path = "/tmp/rules_rust_incremental/{}{}".format(crate_info.name, suffix) + + # Explicitly set codegen-units=16 to match Cargo's dev profile default + # (since Cargo 1.73). Without this, rustc silently bumps CGUs from 16 to + # 256 when -Cincremental is present, adding ~37% of the cold-build overhead + # for no rebuild benefit at opt-level=0. + return ["-Cincremental={}".format(cache_path), "-Ccodegen-units=16"] + +def is_incremental_enabled(ctx, crate_info): + """Returns True if incremental compilation is enabled for this target. + + This is the public API used by rustc_compile_action to determine whether + to set execution_requirements = {"no-sandbox": "1"}. + + Args: + ctx (ctx): The calling rule's context object. + crate_info (CrateInfo): The CrateInfo provider of the target crate. + + Returns: + bool: True if incremental compilation is enabled. + """ + return _is_incremental_enabled(ctx, crate_info) diff --git a/rust/private/rust.bzl b/rust/private/rust.bzl index 3afba08713..75931d3bdc 100644 --- a/rust/private/rust.bzl +++ b/rust/private/rust.bzl @@ -52,6 +52,7 @@ load( "generate_output_diagnostics", "get_edition", "get_import_macro_deps", + "is_exec_configuration", "transform_deps", "transform_sources", ) @@ -66,6 +67,25 @@ def _assert_no_deprecated_attributes(_ctx): """ pass +def _pipelining_output_hash_salt(ctx, toolchain, crate_type, disable_pipelining): + """Returns a mode tag for artifact paths that differ across pipelining modes.""" + + if is_exec_configuration(ctx) or not can_use_metadata_for_pipelining(toolchain, crate_type): + return "" + + if toolchain._worker_pipelining: + return "worker_pipelining" + + if can_build_metadata( + toolchain, + ctx, + crate_type, + disable_pipelining = disable_pipelining, + ): + return "hollow_rlib" + + return "" + def _rust_library_impl(ctx): """The implementation of the `rust_library` rule. @@ -146,6 +166,11 @@ def _rust_library_common(ctx, crate_type): crate_root = crate_root_src(ctx.attr.name, ctx.attr.crate_name, ctx.files.srcs, crate_type) srcs, compile_data, crate_root = transform_sources(ctx, ctx.files.srcs, ctx.files.compile_data, crate_root) + # Worker pipelining uses a single rustc invocation (no SVH mismatch risk), + # so disable_pipelining (which works around SVH issues in hollow rlib mode) + # should be ignored when worker pipelining is active. + effective_disable_pipelining = getattr(ctx.attr, "disable_pipelining", False) and not toolchain._worker_pipelining + # Determine unique hash for this rlib. # Note that we don't include a hash for `cdylib` and `staticlib` since they are meant to be consumed externally # and having a deterministic name is important since it ends up embedded in the executable. This is problematic @@ -154,7 +179,16 @@ def _rust_library_common(ctx, crate_type): if crate_type in ["cdylib", "staticlib"]: output_hash = None else: - output_hash = determine_output_hash(crate_root, ctx.label) + output_hash = determine_output_hash( + crate_root, + ctx.label, + salt = _pipelining_output_hash_salt( + ctx, + toolchain, + crate_type, + effective_disable_pipelining, + ), + ) rust_lib_name = determine_lib_name( crate_name, @@ -166,24 +200,38 @@ def _rust_library_common(ctx, crate_type): rust_metadata = None rustc_rmeta_output = None metadata_supports_pipelining = False + if can_build_metadata( toolchain, ctx, crate_type, - disable_pipelining = getattr(ctx.attr, "disable_pipelining", False), + disable_pipelining = effective_disable_pipelining, ): - # The hollow rlib uses .rlib extension (not .rmeta) so rustc reads it as an - # rlib archive containing lib.rmeta with optimized MIR. It is placed in a - # "_hollow/" subdirectory so the full rlib and hollow rlib never appear in the - # same -Ldependency= search directory (which would cause E0463). - rust_metadata = ctx.actions.declare_file( - "_hollow/" + rust_lib_name[:-len(".rlib")] + "-hollow.rlib", - ) + if can_use_metadata_for_pipelining(toolchain, crate_type) and toolchain._worker_pipelining and not is_exec_configuration(ctx): + # Worker pipelining: single rustc invocation emitting both .rmeta and .rlib. + # Use a real .rmeta file (not a hollow rlib) so downstream crates can use + # --extern name=path.rmeta. No -Zno-codegen, no RUSTC_BOOTSTRAP needed. + # The .rmeta is placed in a "_pipeline/" subdirectory so it never coexists + # with the .rlib in the same -Ldependency= search directory (which would + # cause E0463 if they're from different build runs with different SVHs). + # Exec-platform builds always use hollow rlib (with RUSTC_BOOTSTRAP=1) + # to maintain consistent SVH across all three pipelining configurations. + rust_metadata = ctx.actions.declare_file( + "_pipeline/" + rust_lib_name[:-len(".rlib")] + ".rmeta", + ) + else: + # The hollow rlib uses .rlib extension (not .rmeta) so rustc reads it as an + # rlib archive containing lib.rmeta with optimized MIR. It is placed in a + # "_hollow/" subdirectory so the full rlib and hollow rlib never appear in the + # same -Ldependency= search directory (which would cause E0463). + rust_metadata = ctx.actions.declare_file( + "_hollow/" + rust_lib_name[:-len(".rlib")] + "-hollow.rlib", + ) rustc_rmeta_output = generate_output_diagnostics(ctx, rust_metadata) metadata_supports_pipelining = ( can_use_metadata_for_pipelining(toolchain, crate_type) and - not ctx.attr.disable_pipelining + not effective_disable_pipelining ) deps = transform_deps(deps) @@ -589,6 +637,9 @@ RUSTC_ATTRS = { "_extra_rustc_flags": attr.label( default = Label("//rust/settings:extra_rustc_flags"), ), + "_incremental": attr.label( + default = Label("//rust/settings:experimental_incremental"), + ), "_is_proc_macro_dep": attr.label( default = Label("//rust/private:is_proc_macro_dep"), ), @@ -608,6 +659,9 @@ RUSTC_ATTRS = { "_rustc_output_diagnostics": attr.label( default = Label("//rust/settings:rustc_output_diagnostics"), ), + "_experimental_pipelined_compilation": attr.label( + default = Label("//rust/settings:experimental_pipelined_compilation"), + ), } _COMMON_ATTRS = { diff --git a/rust/private/rustc.bzl b/rust/private/rustc.bzl index d593325e8b..e41d368183 100644 --- a/rust/private/rustc.bzl +++ b/rust/private/rustc.bzl @@ -26,6 +26,7 @@ load("@rules_cc//cc/common:cc_common.bzl", "cc_common") load("@rules_cc//cc/common:cc_info.bzl", "CcInfo") load(":common.bzl", "rust_common") load(":compat.bzl", "abs") +load(":incremental.bzl", "construct_incremental_arguments", "is_incremental_enabled") load(":lto.bzl", "construct_lto_arguments") load( ":providers.bzl", @@ -261,10 +262,24 @@ def collect_deps( transitive_metadata_outputs = [] crate_deps = [] - for dep in deps + proc_macro_deps: + for dep in deps: crate_group = getattr(dep, "crate_group_info", None) if crate_group: - crate_deps.extend(crate_group.dep_variant_infos.to_list()) + for dvi in crate_group.dep_variant_infos.to_list(): + # Skip proc macros from target-config deps; they are handled + # via proc_macro_deps (exec configuration) below. + if not (dvi.crate_info and _is_proc_macro(dvi.crate_info)): + crate_deps.append(dvi) + else: + crate_deps.append(dep) + + for dep in proc_macro_deps: + crate_group = getattr(dep, "crate_group_info", None) + if crate_group: + for dvi in crate_group.dep_variant_infos.to_list(): + # Only include proc macros from exec-config proc_macro_deps. + if dvi.crate_info and _is_proc_macro(dvi.crate_info): + crate_deps.append(dvi) else: crate_deps.append(dep) @@ -703,6 +718,24 @@ def _depend_on_metadata(crate_info, force_depend_on_objects, experimental_use_cc return crate_info.type in ("rlib", "lib") +def _use_worker_pipelining(toolchain, crate_info): + """Returns True if worker-managed pipelining should be used for this crate. + + Worker pipelining is active when _worker_pipelining is set (derived from + experimental_pipelined_compilation=worker) and the crate is rlib/lib. + + Args: + toolchain (rust_toolchain): The current target's rust_toolchain. + crate_info (CrateInfo): The crate being compiled. + + Returns: + bool: True if worker pipelining is active for this crate. + """ + return ( + toolchain._worker_pipelining and + crate_info.type in ("rlib", "lib") + ) + def collect_inputs( ctx, file, @@ -819,8 +852,8 @@ def collect_inputs( runtime_libs = cc_toolchain.static_runtime_lib(feature_configuration = feature_configuration) nolinkstamp_compile_inputs = depset( - nolinkstamp_compile_direct_inputs + - ([] if experimental_use_cc_common_link else libs_from_linker_inputs), + direct = nolinkstamp_compile_direct_inputs + + ([] if experimental_use_cc_common_link else libs_from_linker_inputs), transitive = [ crate_info.srcs, transitive_crate_outputs, @@ -907,6 +940,10 @@ def _will_emit_object_file(emit): def _remove_codegen_units(flag): return None if flag.startswith("-Ccodegen-units") else flag +def _parent_dir(file): + """Returns the parent directory of a File. For use as a map_each callback.""" + return file.dirname + def construct_arguments( *, ctx, @@ -938,7 +975,8 @@ def construct_arguments( skip_expanding_rustc_env = False, require_explicit_unstable_features = False, always_use_param_file = False, - error_format = None): + error_format = None, + use_worker_pipelining = False): """Builds an Args object containing common rustc flags Args: @@ -946,7 +984,7 @@ def construct_arguments( attr (struct): The attributes for the target. These may be different from ctx.attr in an aspect context. file (struct): A struct containing files defined in label type attributes marked as `allow_single_file`. toolchain (rust_toolchain): The current target's `rust_toolchain` - tool_path (str): Path to rustc + tool_path (File): The rustc executable File object (or str path for backwards compat) cc_toolchain (CcToolchain): The CcToolchain for the current target. feature_configuration (FeatureConfiguration): Class used to construct command lines from CROSSTOOL features. crate_info (CrateInfo): The CrateInfo provider of the target crate @@ -969,9 +1007,12 @@ def construct_arguments( use_json_output (bool): Have rustc emit json and process_wrapper parse json messages to output rendered output. build_metadata (bool): Generate CLI arguments for building *only* .rmeta files. This requires use_json_output. force_depend_on_objects (bool): Force using `.rlib` object files instead of metadata (`.rmeta`) files even if they are available. + experimental_use_cc_common_link (bool): Whether to use cc_common.link for the final binary link step. skip_expanding_rustc_env (bool): Whether to skip expanding CrateInfo.rustc_env_attr require_explicit_unstable_features (bool): Whether to require all unstable features to be explicitly opted in to using `-Zallow-features=...`. + always_use_param_file (bool): Whether to always use a param file for rustc arguments. error_format (str, optional): Error format to pass to the `--error-format` command line argument. If set to None, uses the "_error_format" entry in `attr`. + use_worker_pipelining (bool): Whether worker-managed pipelining is active. When True, per-action flags are routed to the paramfile for worker key stability. Returns: tuple: A tuple of the following items @@ -991,13 +1032,26 @@ def construct_arguments( env = _get_rustc_env(attr, toolchain, crate_info.name) + # Determine worker pipelining mode early so we can route per-action flags + # to the right Args object. When worker pipelining is active, per-action + # flags must go in the @paramfile (rustc_flags) rather than the startup + # args (process_wrapper_flags). Bazel derives the worker key from startup + # args — if per-action values like --output-file are in startup args, every + # action gets a unique worker key and thus a separate OS process, defeating + # the purpose of persistent workers. + use_worker_pipe = use_worker_pipelining + # Wrapper args first process_wrapper_flags = ctx.actions.args() - for build_env_file in build_env_files: - process_wrapper_flags.add("--env-file", build_env_file) - - process_wrapper_flags.add_all(build_flags_files, before_each = "--arg-file") + # --env-file and --arg-file are per-action (different build script deps per + # crate). For worker pipelining they must go into the paramfile so that all + # actions share a single worker key. The non-worker-pipe path adds them here; + # the worker-pipe path adds them after rustc_flags is created below. + if not use_worker_pipe: + for build_env_file in build_env_files: + process_wrapper_flags.add("--env-file", build_env_file) + process_wrapper_flags.add_all(build_flags_files, before_each = "--arg-file") if require_explicit_unstable_features: process_wrapper_flags.add("--require-explicit-unstable-features", "true") @@ -1016,11 +1070,6 @@ def construct_arguments( # use `${pwd}` which resolves the `exec_root` at action execution time. process_wrapper_flags.add("--subst", "pwd=${pwd}") - # If stamping is enabled, enable the functionality in the process wrapper - if stamp: - process_wrapper_flags.add("--volatile-status-file", ctx.version_file) - process_wrapper_flags.add("--stable-status-file", ctx.info_file) - # Both ctx.label.workspace_root and ctx.label.package are relative paths # and either can be empty strings. Avoid trailing/double slashes in the path. components = "${{pwd}}/{}/{}".format(ctx.label.workspace_root, ctx.label.package).split("/") @@ -1048,13 +1097,31 @@ def construct_arguments( rustc_flags.add(crate_info.name, format = "--crate-name=%s") rustc_flags.add(crate_info.type, format = "--crate-type=%s") + # Stamp files are per-action inputs. Keep them out of worker startup args + # when worker pipelining is active so stamped actions still share a WorkerKey. + if stamp: + if use_worker_pipe: + rustc_flags.add("--volatile-status-file", ctx.version_file) + rustc_flags.add("--stable-status-file", ctx.info_file) + else: + process_wrapper_flags.add("--volatile-status-file", ctx.version_file) + process_wrapper_flags.add("--stable-status-file", ctx.info_file) + if error_format == None: error_format = get_error_format(attr, "_error_format") if use_json_output: # If --error-format was set to json, we just pass the output through # Otherwise process_wrapper uses the "rendered" field. - process_wrapper_flags.add("--rustc-output-format", "json" if error_format == "json" else "rendered") + # + # For worker pipelining, put this in the @paramfile (per-request args) + # rather than startup args, so all actions share the same worker key. + # prepare_param_file strips it before rustc sees it. + output_format = "json" if error_format == "json" else "rendered" + if use_worker_pipe: + rustc_flags.add("--rustc-output-format", output_format) + else: + process_wrapper_flags.add("--rustc-output-format", output_format) # Configure rustc json output by adding artifact notifications. # These are filtered out by process_wrapper. @@ -1071,17 +1138,37 @@ def construct_arguments( if build_metadata: if crate_info.type in ("rlib", "lib"): - # Hollow rlib approach (Buck2-style): rustc runs to completion with -Zno-codegen, - # producing a hollow .rlib (metadata only, no object code) via --emit=link=. - # No need to kill rustc — -Zno-codegen skips codegen entirely and exits quickly. - rustc_flags.add("-Zno-codegen") + # Hollow rlib approach (Buck2-style): rustc runs with -Zno-codegen, producing + # a hollow rlib (metadata only, no object code) via --emit=link=. + # Worker pipelining uses --emit=dep-info,metadata,link instead (no -Zno-codegen): + # the background rustc produces the full rlib, so codegen must not be skipped. + # Exec-platform builds always use hollow rlib (not worker pipelining). + if not use_worker_pipe: + rustc_flags.add("-Zno-codegen") # else: IDE-only metadata for non-rlib types (bin, proc-macro, etc.): rustc exits # naturally after writing .rmeta via --emit=dep-info,metadata (no kill needed). if crate_info.rustc_rmeta_output: - process_wrapper_flags.add("--output-file", crate_info.rustc_rmeta_output.path) + # For worker pipelining, --output-file goes in the @paramfile (per-request) + # so all actions share the same worker key. prepare_param_file strips it + # before rustc sees it; the worker relocates it before --. + if use_worker_pipe: + rustc_flags.add("--output-file", crate_info.rustc_rmeta_output) + else: + process_wrapper_flags.add("--output-file", crate_info.rustc_rmeta_output) elif crate_info.rustc_output: - process_wrapper_flags.add("--output-file", crate_info.rustc_output.path) + if use_worker_pipe: + rustc_flags.add("--output-file", crate_info.rustc_output) + else: + process_wrapper_flags.add("--output-file", crate_info.rustc_output) + + # For worker pipelining, add --env-file and --arg-file to the paramfile + # (deferred from above where the non-worker-pipe path adds them to + # process_wrapper_flags). + if use_worker_pipe: + for build_env_file in build_env_files: + rustc_flags.add("--env-file", build_env_file) + rustc_flags.add_all(build_flags_files, before_each = "--arg-file") rustc_flags.add(error_format, format = "--error-format=%s") @@ -1097,7 +1184,7 @@ def construct_arguments( rustc_flags.add(output_hash, format = "--codegen=extra-filename=-%s") if output_dir: - rustc_flags.add(output_dir, format = "--out-dir=%s") + rustc_flags.add_all([crate_info.output], map_each = _parent_dir, format_each = "--out-dir=%s") compilation_mode = get_compilation_mode_opts(ctx, toolchain) rustc_flags.add(compilation_mode.opt_level, format = "--codegen=opt-level=%s") @@ -1110,15 +1197,23 @@ def construct_arguments( emit_without_paths = [] for kind in emit: - if kind == "link" and build_metadata and crate_info.type in ("rlib", "lib") and crate_info.metadata: + if kind == "link" and build_metadata and crate_info.type in ("rlib", "lib") and crate_info.metadata and not use_worker_pipe: # Hollow rlib: direct rustc's link output to the -hollow.rlib path. # The file has .rlib extension so rustc reads it as an rlib archive # (with optimized MIR in lib.rmeta). Using a .rmeta path would cause # E0786 "found invalid metadata files" because rustc parses .rmeta files # as raw metadata blobs, not rlib archives. + # Worker pipelining: let link go to --out-dir normally (no redirect); + # the background rustc produces the full rlib directly. rustc_flags.add(crate_info.metadata, format = "--emit=link=%s") elif kind == "link" and crate_info.type == "bin" and crate_info.output != None: rustc_flags.add(crate_info.output, format = "--emit=link=%s") + elif kind == "metadata" and build_metadata and use_worker_pipe and crate_info.metadata: + # Worker pipelining: direct the .rmeta to the declared _pipeline/ output + # path so it's produced correctly in both worker and sandbox execution. + # Without this, rustc writes .rmeta to --out-dir (the base directory), + # but the declared output is in the _pipeline/ subdirectory. + rustc_flags.add(crate_info.metadata, format = "--emit=metadata=%s") else: emit_without_paths.append(kind) @@ -1133,8 +1228,9 @@ def construct_arguments( if linker_script: rustc_flags.add(linker_script, format = "--codegen=link-arg=-T%s") - # Tell Rustc where to find the standard library (or libcore) - rustc_flags.add_all(toolchain.rust_std_paths, before_each = "-L", format_each = "%s") + # Tell Rustc where to find the standard library (or libcore). + # Use the File depset with map_each=_parent_dir so PathMapper can rewrite paths. + rustc_flags.add_all(toolchain.rust_std, map_each = _parent_dir, format_each = "-L%s", uniquify = True) rustc_flags.add_all(rust_flags, map_each = map_flag) # Gather data path from crate_info since it is inherited from real crate for rust_doc and rust_test @@ -1145,6 +1241,16 @@ def construct_arguments( _add_lto_flags(ctx, toolchain, rustc_flags, crate_info) _add_codegen_units_flags(toolchain, emit, rustc_flags) + # RustcMetadata and Rustc both use incremental compilation, but with separate + # cache directories (see construct_incremental_arguments). Using the same path + # for both causes a rustc ICE ("no entry found for key") because the metadata- + # only session state is incompatible with a full-compilation session. The + # distinct paths allow both to benefit from caching; because SVH does not + # depend on the incremental cache path, both actions produce the same SVH value + # for the same source, so Rustc(A) overwriting libA-HASH.rmeta in execroot is + # safe for concurrently running sandboxed RustcMetadata(B) actions. + _add_incremental_flags(ctx, rustc_flags, crate_info, is_metadata = build_metadata) + # Use linker_type to determine whether to use direct or indirect linker invocation # If linker_type is not explicitly set, infer from which linker is actually being used ld_is_direct_driver = False @@ -1232,9 +1338,14 @@ def construct_arguments( {}, )) - # Ensure the sysroot is set for the target platform + # Ensure the sysroot is set for the target platform. + # Use the sysroot_anchor File (not the string path) so Bazel's PathMapper + # can rewrite the config segment for --experimental_output_paths=strip. if toolchain._toolchain_generated_sysroot: - rustc_flags.add(toolchain.sysroot, format = "--sysroot=%s") + if hasattr(toolchain, "sysroot_anchor"): + rustc_flags.add_all([toolchain.sysroot_anchor], map_each = _parent_dir, format_each = "--sysroot=%s") + else: + rustc_flags.add(toolchain.sysroot, format = "--sysroot=%s") if toolchain._rename_first_party_crates: env["RULES_RUST_THIRD_PARTY_DIR"] = toolchain._third_party_dir @@ -1315,6 +1426,45 @@ def collect_extra_rustc_flags(ctx, toolchain, crate_root, crate_type): return flags +def _build_worker_exec_reqs(use_worker_pipelining, is_incremental, has_out_dir = False): + """Builds execution_requirements for Rustc worker actions. + + Args: + use_worker_pipelining: Whether worker-managed pipelining is active. + is_incremental: Whether incremental compilation is enabled. + has_out_dir: Whether the crate has a build script OUT_DIR. If True, + path mapping is disabled because OUT_DIR is an env var that + PathMapper cannot rewrite. + + Returns: + A dict of execution_requirements. + """ + reqs = {} + if is_incremental or use_worker_pipelining: + reqs["requires-worker-protocol"] = "json" + if use_worker_pipelining: + reqs["supports-multiplex-workers"] = "1" + reqs["supports-multiplex-sandboxing"] = "1" + + # Cancellation is fully effective for pipelined requests (kills the + # background rustc). Non-pipelined requests within the same worker + # (e.g. proc-macros) acknowledge the cancel but the subprocess runs + # to completion — this is consistent with Bazel's best-effort semantics. + reqs["supports-worker-cancellation"] = "1" + else: + reqs["supports-workers"] = "1" + + # no-sandbox is no longer needed — the worker uses real execroot CWD + # (or sandbox CWD when sandboxed), so incremental cache paths are + # stable regardless of sandboxing. + + # Enable path mapping for --experimental_output_paths=strip deduplication. + # Disabled when a build script OUT_DIR is present because env vars are not + # rewritten by PathMapper, causing include!() to reference unrewritten paths. + if not has_out_dir: + reqs["supports-path-mapping"] = "1" + return reqs + def rustc_compile_action( *, ctx, @@ -1361,12 +1511,37 @@ def rustc_compile_action( rustc_output = crate_info.rustc_output rustc_rmeta_output = crate_info.rustc_rmeta_output + # Use worker pipelining (single rustc invocation, .rmeta output) when enabled. + # This takes precedence over the hollow rlib approach for rlib/lib crates. + # Exec-platform builds (build script deps) skip worker pipelining: they always + # use hollow rlib so RUSTC_BOOTSTRAP=1 is set consistently. Without this, switching + # between hollow-rlib and worker-pipe modes changes the SVH for exec-platform rlibs, + # causing E0460 when Bazel action-cache-hits some exec crates but recompiles others. + use_worker_pipelining = _use_worker_pipelining(toolchain, crate_info) and not is_exec_configuration(ctx) and bool(build_metadata) + + # Worker pipelining requires RustcMetadata and Rustc to share the same worker + # process (so they share PipelineState). Bazel worker key = startup args = + # everything before the @paramfile. The only startup-arg difference between + # RustcMetadata and Rustc is --output-file (companion .rustc-output files). + # Suppress those companion files when worker pipelining is active so both + # actions have identical startup args → same worker key → same process. + if use_worker_pipelining: + rustc_output = None + rustc_rmeta_output = None + # Use the hollow rlib approach (Buck2-style) for rlib/lib crate types when a metadata - # action is being created. This always applies for rlib/lib regardless of whether - # pipelining is globally enabled — the hollow rlib is simpler than killing rustc. + # action is being created, UNLESS worker pipelining is active (which uses a single + # rustc invocation with --emit=dep-info,metadata,link and .rmeta output instead). # Non-rlib types (bin, proc-macro, etc.) use --emit=dep-info,metadata instead # (rustc exits naturally after writing .rmeta, no process-wrapper kill needed). - use_hollow_rlib = bool(build_metadata) and crate_info.type in ("rlib", "lib") + use_hollow_rlib = bool(build_metadata) and crate_info.type in ("rlib", "lib") and not use_worker_pipelining + + # Include a pipelining discriminator on ALL Rustc actions (not just pipelined + # rlibs) so the action cache key differs between pipelining-enabled and + # pipelining-disabled builds. RUSTC_BOOTSTRAP=1 changes SVH for all crates, + # so cached outputs from non-pipelined builds are incompatible (E0463). + if toolchain._pipelined_compilation: + rust_flags = rust_flags + ["--cfg=rules_rust_pipelined"] # Determine whether to use cc_common.link: # * either if experimental_use_cc_common_link is 1, @@ -1426,11 +1601,10 @@ def rustc_compile_action( ) # The main Rustc action uses FULL rlib deps so the full rlib it produces records - # full-rlib SVHs. A downstream binary links against full rlibs; if the Rustc action - # had used hollow rlib deps instead, nondeterministic proc macros could produce - # different SVHs for the hollow vs full rlib, causing E0460 in the binary build. - # The RustcMetadata action still uses hollow rlibs (compile_inputs_for_metadata) - # so it can start before full codegen of its deps completes. + # full-rlib SVHs. This makes the dependency graph tier-consistent: hollow→hollow, + # full→full. Each tier has self-consistent SVH values, preventing E0460 even with + # nondeterministic proc macros. The RustcMetadata action still uses hollow rlibs + # (compile_inputs_for_metadata) so it can start before full codegen of deps completes. compile_inputs_for_metadata = compile_inputs if use_hollow_rlib: compile_inputs, _, _, _, _, _ = collect_inputs( @@ -1466,12 +1640,18 @@ def rustc_compile_action( elif ctx.attr.require_explicit_unstable_features == -1: require_explicit_unstable_features = toolchain.require_explicit_unstable_features + # When incremental compilation or worker pipelining is enabled, force a param file + # so the worker strategy sees exactly one @flagfile in the command line (Bazel + # requirement). For worker pipelining, the metadata handler parses the param file + # to spawn rustc directly; it needs the args in a file to apply substitutions. + use_param_file_always = is_incremental_enabled(ctx, crate_info) or use_worker_pipelining + args, env_from_args = construct_arguments( ctx = ctx, attr = attr, file = ctx.file, toolchain = toolchain, - tool_path = toolchain.rustc.path, + tool_path = toolchain.rustc, cc_toolchain = cc_toolchain, emit = emit, feature_configuration = feature_configuration, @@ -1492,7 +1672,8 @@ def rustc_compile_action( experimental_use_cc_common_link = experimental_use_cc_common_link, skip_expanding_rustc_env = skip_expanding_rustc_env, require_explicit_unstable_features = require_explicit_unstable_features, - always_use_param_file = not ctx.executable._process_wrapper, + always_use_param_file = use_param_file_always or not ctx.executable._process_wrapper, + use_worker_pipelining = use_worker_pipelining, ) args_metadata = None @@ -1502,6 +1683,11 @@ def rustc_compile_action( # -Zno-codegen). dep-info must be included: it affects the SVH stored in the # rlib, so both actions must include it to keep SVHs consistent. metadata_emit = ["dep-info", "link"] + elif use_worker_pipelining: + # Worker pipelining: single rustc invocation emits metadata+link in one pass. + # The worker monitors stderr for the rmeta artifact JSON, returns the .rmeta + # early, and keeps rustc running in the background to finish codegen. + metadata_emit = ["dep-info", "metadata", "link"] else: # IDE-only metadata for non-rlib types (bin, proc-macro, etc.): rustc exits # naturally after writing .rmeta with --emit=dep-info,metadata. @@ -1511,7 +1697,7 @@ def rustc_compile_action( attr = attr, file = ctx.file, toolchain = toolchain, - tool_path = toolchain.rustc.path, + tool_path = toolchain.rustc, cc_toolchain = cc_toolchain, emit = metadata_emit, feature_configuration = feature_configuration, @@ -1530,18 +1716,92 @@ def rustc_compile_action( build_metadata = True, experimental_use_cc_common_link = experimental_use_cc_common_link, require_explicit_unstable_features = require_explicit_unstable_features, + always_use_param_file = use_param_file_always, + use_worker_pipelining = use_worker_pipelining, ) + # Worker pipelining: add pipelining mode flags to rustc_flags (the @paramfile). + # IMPORTANT: These must NOT go in process_wrapper_flags (startup args). Startup + # args determine the Bazel worker key — if RustcMetadata and Rustc have different + # startup args, Bazel routes them to different worker processes and they cannot + # share PipelineState. With these flags in rustc_flags (per-request @paramfile), + # both actions share the same startup args → same worker key → same worker. + # + # --json=artifacts is already emitted by construct_arguments via use_json_output=True. + if use_worker_pipelining and build_metadata: + # Use crate_info.output.path (includes the Bazel configuration hash) sanitized + # for filesystem use. We must use .path, not .short_path, because .short_path + # is the same across different configurations (e.g. k8-fastbuild vs + # k8-fastbuild-ST-). With identical keys, a metadata action from one + # config's rustc invocation could be consumed by a full action from a different + # config, producing an rlib with the wrong SVH chain (E0463). + pipeline_key = crate_info.output.path.replace("/", "_").replace(".", "_") + + # Metadata action: tell the worker to start rustc and return .rmeta early. + args_metadata.rustc_flags.add("--pipelining-metadata") + args_metadata.rustc_flags.add("--pipelining-key={}".format(pipeline_key)) + + # Full action: tell the worker to wait for the background rustc started above. + args.rustc_flags.add("--pipelining-full") + args.rustc_flags.add("--pipelining-key={}".format(pipeline_key)) + + # Pass the expected .rlib path for the local-mode no-op optimization. + # When the process_wrapper runs outside a worker (local/sandboxed fallback), + # it checks whether this file already exists (produced as a side-effect by + # the metadata action's rustc). If so, it skips the redundant second rustc + # invocation, guaranteeing SVH consistency (single invocation per crate). + args.rustc_flags.add("--pipelining-rlib-path={}".format(crate_info.output.path)) + + # Pass the metadata action's declared .rmeta output path so the standalone + # full action can verify SVH consistency after its rustc completes. If the + # .rmeta and the embedded lib.rmeta in the .rlib differ, a non-deterministic + # proc macro is present and the build should fail with a clear diagnostic + # rather than a cryptic E0463 in a downstream consumer. + args.rustc_flags.add("--pipelining-rmeta-path={}".format(crate_info.metadata.path)) + env = dict(ctx.configuration.default_shell_env) # this is the final list of env vars env.update(env_from_args) - if use_hollow_rlib: - # Both the metadata action and the full Rustc action must have RUSTC_BOOTSTRAP=1 - # for SVH compatibility. RUSTC_BOOTSTRAP=1 changes the crate hash — setting it - # on only one action would cause SVH mismatch even for deterministic crates. - # This enables -Zno-codegen on stable Rust compilers for the metadata action. + # Worker pipelining: Bazel's worker key includes the action env. Per-crate env vars + # (CARGO_CRATE_NAME, CARGO_MANIFEST_DIR, OUT_DIR, REPOSITORY_NAME, etc.) differ per + # crate, creating a unique worker key per crate → separate OS process per action → + # metadata and full can never share PipelineState. Fix: write per-crate env vars to + # an env file passed via --env-file in the @paramfile, keeping only stable vars + # (PATH, etc.) in the action env so all actions share the same worker key. + worker_env_file = None + if use_worker_pipelining: + # Build the env file contents. RUSTC_BOOTSTRAP must be included here + # (not just in the action env) because in worker mode the process_wrapper + # reads env from this file, not from the OS-level action environment. + # Without it, the rlib would be compiled without RUSTC_BOOTSTRAP=1, while + # downstream binary/test actions (which run outside the worker) see + # RUSTC_BOOTSTRAP=1 from the action env — causing crate SVH mismatch (E0463). + worker_env = dict(env_from_args) + if toolchain._pipelined_compilation: + worker_env["RUSTC_BOOTSTRAP"] = "1" + env_content = "\n".join(["{}={}".format(k, v) for k, v in sorted(worker_env.items())]) + worker_env_file = ctx.actions.declare_file(crate_info.output.basename + ".worker_env") + ctx.actions.write(worker_env_file, env_content) + + # Add --env-file to the @paramfile for both metadata and full actions. + # This goes in rustc_flags (the paramfile) so it doesn't affect the worker key. + # prepare_param_file / the worker handler strips it before rustc sees it. + args.rustc_flags.add("--env-file", worker_env_file) + if args_metadata: + args_metadata.rustc_flags.add("--env-file", worker_env_file) + + # Strip per-crate vars from action env — keep only default_shell_env (PATH etc.) + env = dict(ctx.configuration.default_shell_env) + + if toolchain._pipelined_compilation: + # RUSTC_BOOTSTRAP=1 must be set on ALL Rustc actions (rlibs, binaries, + # tests, proc-macros) when any pipelining mode is active — not just on + # pipelined rlib actions. RUSTC_BOOTSTRAP changes the crate SVH, so a + # binary compiled without it cannot load rlibs compiled with it (E0463). + # This also enables -Zno-codegen on stable compilers for hollow-rlib + # metadata actions. env["RUSTC_BOOTSTRAP"] = "1" if hasattr(attr, "version") and attr.version != "0.0.0": @@ -1594,10 +1854,31 @@ def rustc_compile_action( action_outputs.append(dsym_folder) if ctx.executable._process_wrapper: + # Compute execution requirements for incremental compilation. + # - "no-sandbox": ensures local fallback builds see stable source paths + # (avoids the rustc ICE that occurs when sandbox paths change between builds). + # - "supports-workers": declares that process_wrapper supports Bazel's + # persistent worker protocol. When --strategy=Rustc=worker,local is set, + # Bazel uses the worker (which runs in execroot, also avoiding the sandbox + # path problem), enabling dynamic execution strategy as well. + exec_reqs = _build_worker_exec_reqs(use_worker_pipelining, is_incremental_enabled(ctx, crate_info), has_out_dir = bool(out_dir)) + + # When incremental compilation or worker pipelining is active and pipelining is + # enabled, add build_metadata as an ordering dep so Rustc(A) starts only after + # RustcMetadata(A) completes. For worker pipelining, this ensures the metadata + # action has started rustc before the full action tries to look it up. + # For incremental, prepare_outputs() chmods rmeta writable before rustc overwrites. + rustc_inputs = compile_inputs + if worker_env_file: + rustc_inputs = depset([worker_env_file], transitive = [rustc_inputs]) + compile_inputs_for_metadata = depset([worker_env_file], transitive = [compile_inputs_for_metadata]) + if build_metadata and (is_incremental_enabled(ctx, crate_info) or use_worker_pipelining): + rustc_inputs = depset([build_metadata], transitive = [rustc_inputs]) + # Run as normal ctx.actions.run( executable = ctx.executable._process_wrapper, - inputs = compile_inputs, + inputs = rustc_inputs, outputs = action_outputs, env = env, arguments = args.all, @@ -1611,15 +1892,35 @@ def rustc_compile_action( ), toolchain = "@rules_rust//rust:toolchain_type", resource_set = get_rustc_resource_set(toolchain), + execution_requirements = exec_reqs, ) if args_metadata: + # When incremental compilation is enabled, RustcMetadata also runs as a + # worker (no-sandbox) so it can read and write the -meta-suffixed + # incremental cache at /tmp/rules_rust_incremental/-meta. + # Without worker mode it would be sandboxed and unable to accumulate + # incremental state, making every rebuild a cold compilation. + meta_exec_reqs = _build_worker_exec_reqs(use_worker_pipelining, is_incremental_enabled(ctx, crate_info), has_out_dir = bool(out_dir)) ctx.actions.run( executable = ctx.executable._process_wrapper, inputs = compile_inputs_for_metadata, outputs = [build_metadata] + [x for x in [rustc_rmeta_output] if x], env = env, arguments = args_metadata.all, - mnemonic = "RustcMetadata", + # All pipelining metadata actions use mnemonic "Rustc" (not + # "RustcMetadata") for two reasons: + # 1. Strategy equivalence: both pipelining modes (hollow-rlib and + # worker) should present the same mnemonic so Bazel treats them + # equivalently for strategy selection and aquery filtering. + # 2. Worker key sharing: Bazel derives the worker key from + # (mnemonic + executable + startup_args). Worker pipelining + # requires metadata and full actions to route to the same worker + # process to share PipelineState. + # NOTE: This is a breaking change from the former "RustcMetadata" + # mnemonic. Users with aquery filters or tooling that matched on + # "RustcMetadata" should switch to filtering by output type + # (-hollow.rlib or .rmeta) instead. + mnemonic = "Rustc", progress_message = "Compiling Rust metadata {} {}{} ({} file{})".format( crate_info.type, ctx.label.name, @@ -1628,6 +1929,7 @@ def rustc_compile_action( "" if len(srcs) == 1 else "s", ), toolchain = "@rules_rust//rust:toolchain_type", + execution_requirements = meta_exec_reqs, ) elif hasattr(ctx.executable, "_bootstrap_process_wrapper"): # Run without process_wrapper @@ -1957,6 +2259,17 @@ def _add_codegen_units_flags(toolchain, emit, args): args.add("-Ccodegen-units={}".format(toolchain._codegen_units)) +def _add_incremental_flags(ctx, args, crate_info, is_metadata = False): + """Adds flags to an Args object to configure incremental compilation for 'rustc'. + + Args: + ctx (ctx): The calling rule's context object. + args (Args): A reference to an Args object. + crate_info (CrateInfo): The CrateInfo provider of the target crate. + is_metadata (bool): True when building a RustcMetadata action. + """ + args.add_all(construct_incremental_arguments(ctx, crate_info, is_metadata = is_metadata)) + def establish_cc_info(ctx, attr, crate_info, toolchain, cc_toolchain, feature_configuration, interface_library): """If the produced crate is suitable yield a CcInfo to allow for interop with cc rules diff --git a/rust/private/rustdoc.bzl b/rust/private/rustdoc.bzl index f302bab743..c530752cd8 100644 --- a/rust/private/rustdoc.bzl +++ b/rust/private/rustdoc.bzl @@ -131,7 +131,7 @@ def rustdoc_compile_action( attr = ctx.attr, file = ctx.file, toolchain = toolchain, - tool_path = toolchain.rust_doc.short_path if is_test else toolchain.rust_doc.path, + tool_path = toolchain.rust_doc.short_path if is_test else toolchain.rust_doc, cc_toolchain = cc_toolchain, feature_configuration = feature_configuration, crate_info = rustdoc_crate_info, diff --git a/rust/private/unpretty.bzl b/rust/private/unpretty.bzl index be111c83a3..66c7a4b63f 100644 --- a/rust/private/unpretty.bzl +++ b/rust/private/unpretty.bzl @@ -190,7 +190,7 @@ def _rust_unpretty_aspect_impl(target, ctx): attr = ctx.rule.attr, file = ctx.file, toolchain = toolchain, - tool_path = toolchain.rustc.path, + tool_path = toolchain.rustc, cc_toolchain = cc_toolchain, feature_configuration = feature_configuration, crate_info = crate_info, diff --git a/rust/private/utils.bzl b/rust/private/utils.bzl index 4d710b7fa9..8feab97b64 100644 --- a/rust/private/utils.bzl +++ b/rust/private/utils.bzl @@ -212,19 +212,22 @@ def get_lib_name_for_windows(lib): return libname -def determine_output_hash(crate_root, label): +def determine_output_hash(crate_root, label, salt = ""): """Generates a hash of the crate root file's path. Args: crate_root (File): The crate's root file (typically `lib.rs`). label (Label): The label of the target. + salt (str, optional): Additional mode-specific disambiguator to fold into + the hash so incompatible artifact variants do not reuse the same + on-disk filename. Returns: str: A string representation of the hash. """ # Take the absolute value of hash() since it could be negative. - h = abs(hash(crate_root.path) + hash(repr(label))) + h = abs(hash(crate_root.path) + hash(repr(label)) + hash(salt)) return repr(h) def get_preferred_artifact(library_to_link, use_pic): @@ -530,7 +533,7 @@ def filter_deps(ctx): proc_macro_deps = [] for dep in ctx.attr.proc_macro_deps: - if CrateInfo in dep and dep[CrateInfo].type == "proc-macro": + if (CrateInfo in dep and dep[CrateInfo].type == "proc-macro") or CrateGroupInfo in dep: proc_macro_deps.append(dep) return deps, proc_macro_deps diff --git a/rust/runfiles/runfiles.rs b/rust/runfiles/runfiles.rs index df427b7481..cae28a212a 100644 --- a/rust/runfiles/runfiles.rs +++ b/rust/runfiles/runfiles.rs @@ -447,7 +447,8 @@ mod test { { let mtx = GLOBAL_MUTEX.get_or_init(|| Mutex::new(0)); - // Ignore poisoning as it's expected to be another test failing an assertion. + // Test-only helper: ignore poisoning so one failed assertion does not + // cascade into unrelated env-mocking tests. let _guard = mtx.lock().unwrap_or_else(|poisoned| poisoned.into_inner()); // track the original state of the environment. diff --git a/rust/settings/BUILD.bazel b/rust/settings/BUILD.bazel index 6f0cd9dbca..4d53e018e6 100644 --- a/rust/settings/BUILD.bazel +++ b/rust/settings/BUILD.bazel @@ -12,8 +12,10 @@ load( "collect_cfgs", "default_allocator_library", "error_format", + "experimental_incremental", "experimental_link_std_dylib", "experimental_per_crate_rustc_flag", + "experimental_pipelined_compilation", "experimental_use_allocator_libraries_with_mangled_symbols", "experimental_use_cc_common_link", "experimental_use_coverage_metadata_files", @@ -29,7 +31,6 @@ load( "incompatible_do_not_include_transitive_data_in_compile_inputs", "lto", "no_std", - "pipelined_compilation", "rename_first_party_crates", "require_explicit_unstable_features", "rustc_output_diagnostics", @@ -75,6 +76,10 @@ codegen_units() collect_cfgs() +experimental_incremental() + +experimental_pipelined_compilation() + default_allocator_library() error_format() @@ -115,8 +120,6 @@ lto() no_std() -pipelined_compilation() - rename_first_party_crates() require_explicit_unstable_features() diff --git a/rust/settings/settings.bzl b/rust/settings/settings.bzl index 1c9bcb1d06..288efa55f1 100644 --- a/rust/settings/settings.bzl +++ b/rust/settings/settings.bzl @@ -111,34 +111,17 @@ def use_real_import_macro(): build_setting_default = False, ) -def pipelined_compilation(): - """When set, this flag enables pipelined compilation for rlib/lib crates. - - For each rlib/lib, a separate RustcMetadata action produces a hollow rlib - (via `-Zno-codegen`) containing only metadata. Downstream rlib/lib crates - can begin compiling against the hollow rlib before the upstream full codegen - action completes, increasing build parallelism. - - Pipelining applies to rlib→rlib dependencies by default. To also pipeline - bin/cdylib crates (starting their compile step before upstream full codegen - finishes), enable `experimental_use_cc_common_link` alongside this flag. - With cc_common.link, rustc only emits `.o` files for binaries (linking is - handled separately), so hollow rlib deps are safe for bins too. - """ - bool_flag( - name = "pipelined_compilation", - build_setting_default = False, - ) - # buildifier: disable=unnamed-macro def experimental_use_cc_common_link(): """A flag to control whether to link rust_binary and rust_test targets using \ cc_common.link instead of rustc. - When combined with `pipelined_compilation`, bin/cdylib crates also participate - in the hollow-rlib dependency chain: rustc only emits `.o` files (linking is - done by cc_common.link and does not check SVH), so bin compile steps can start - as soon as upstream hollow rlibs are ready rather than waiting for full codegen. + When combined with `experimental_pipelined_compilation`, bin/cdylib crates also + participate in the pipelined dependency chain: rustc only emits `.o` files + (linking is done by cc_common.link and does not check SVH), so bin compile + steps can start as soon as upstream metadata (hollow rlib or .rmeta) is ready + rather than waiting for full codegen. This applies to both hollow_rlib and + worker pipelining modes. """ bool_flag( name = "experimental_use_cc_common_link", @@ -561,6 +544,132 @@ def codegen_units(): build_setting_default = -1, ) +def experimental_incremental(): + """A flag to enable incremental compilation for Rust targets. + + When enabled, rustc is invoked with `-Cincremental=/tmp/rules_rust_incremental/`. + Rustc actions run as persistent workers so the incremental cache persists between builds. + + This flag is intended for local development builds only. Do not use in CI or release builds + as incremental compilation produces non-hermetic outputs. + + Compatible with worker pipelining and multiplex sandboxing: + build:dev --@rules_rust//rust/settings:experimental_pipelined_compilation=worker + build:dev --@rules_rust//rust/settings:experimental_incremental=true + build:dev --experimental_worker_multiplex_sandboxing + build:dev --strategy=Rustc=worker,sandboxed + + Without worker pipelining: + build:dev --@rules_rust//rust/settings:experimental_incremental=true + build:dev --strategy=Rustc=worker + """ + bool_flag( + name = "experimental_incremental", + build_setting_default = False, + ) + +def experimental_pipelined_compilation(): + """Pipelined compilation mode for rlib/lib crates. + + This is the preferred way to configure pipelining. It replaces the older + combination of `pipelined_compilation` (bool) + `experimental_worker_pipelining` + (bool). If this flag is set to anything other than "off", it takes precedence + over the legacy boolean flags. + + The two pipelining modes use different metadata classes: + + - hollow_rlib uses "full metadata" — a hollow .rlib produced with -Zno-codegen. + The dependency graph is tier-consistent (hollow→hollow, full→full), so + non-deterministic proc macros never cause SVH mismatch. Compatible with all + execution strategies. This is the Buck2-style approach. + + - worker uses "fast metadata" — a .rmeta produced as an early milestone from a + single rustc process. Safety depends on keeping one rustc per crate. This is + the Cargo-style approach. + + For builds that may use sandboxed, remote, or dynamic execution, or any + configuration where metadata and full actions might run as separate processes, + hollow_rlib is the recommended portable mode. + + Values: + + off (default): + No pipelining. Each crate compiles sequentially after its dependencies + finish full codegen. + + hollow_rlib (recommended for portable builds): + Each pipelined rlib/lib crate gets two actions: a metadata action that + runs rustc with -Zno-codegen to produce a hollow .rlib (full metadata), + and a full action that produces the real .rlib. The tier-consistent graph + ensures SVH compatibility regardless of execution strategy. + + Compatible with: local, sandboxed, remote, dynamic — all strategies. + + build --@rules_rust//rust/settings:experimental_pipelined_compilation=hollow_rlib + + worker: + A persistent multiplex worker runs a single rustc per crate, returning + fast metadata (.rmeta) early and finishing the .rlib in the background. + Reduces total rustc invocations by ~50%. Requires worker execution + strategy; non-worker strategies (sandboxed, local, remote) fall back to + running a second rustc, which fails with non-deterministic proc macros + due to cross-tier SVH mismatch. + + build --@rules_rust//rust/settings:experimental_pipelined_compilation=worker + build --strategy=Rustc=worker + + Both modes set RUSTC_BOOTSTRAP=1 and --cfg=rules_rust_pipelined on all + Rustc actions (rlibs, binaries, tests, proc-macros) for SVH consistency. + + Worker mode execution strategy compatibility: + + local: ✓* (runs a second rustc; nondeterministic proc macros + are detected and fail with a clear diagnostic) + sandboxed: ✓* (same as local) + worker: ✓ (recommended — single rustc via PipelineState) + dynamic: ✓ (local leg uses multiplex sandboxed worker; remote leg + runs standalone — fails fast on SVH mismatch, local + leg wins the race) + remote: ✓* (same as sandboxed) + + * Deterministic proc macros only. Non-deterministic proc macros + (those that iterate HashMap/HashSet) will produce an SVH mismatch + error with fix suggestions. Use worker strategy or hollow_rlib to + support non-deterministic proc macros. + + Recommended configurations: + + # Portable pipelining — safe with all strategies (recommended default): + build --@rules_rust//rust/settings:experimental_pipelined_compilation=hollow_rlib + + # Worker pipelining — maximum parallelism for local builds: + build --@rules_rust//rust/settings:experimental_pipelined_compilation=worker + build --strategy=Rustc=worker + + # Dynamic execution (local worker racing against remote): + build --@rules_rust//rust/settings:experimental_pipelined_compilation=worker + build --experimental_worker_multiplex_sandboxing + build --strategy=Rustc=dynamic + build --dynamic_local_strategy=Rustc=worker,sandboxed + build --dynamic_remote_strategy=Rustc=remote + + # With incremental compilation: + build --@rules_rust//rust/settings:experimental_pipelined_compilation=worker + build --@rules_rust//rust/settings:experimental_incremental=true + build --experimental_worker_multiplex_sandboxing + build --strategy=Rustc=worker,sandboxed + + To also pipeline bin/cdylib crates, enable `experimental_use_cc_common_link`. + + See util/process_wrapper/DESIGN.md for the full strategy compatibility + matrix and design rationale. + """ + string_flag( + name = "experimental_pipelined_compilation", + build_setting_default = "off", + values = ["off", "hollow_rlib", "worker"], + ) + # buildifier: disable=unnamed-macro def collect_cfgs(): """Enable collection of cfg flags with results stored in CrateInfo.cfgs. diff --git a/rust/toolchain.bzl b/rust/toolchain.bzl index 471a28ec7f..579ac1bb1b 100644 --- a/rust/toolchain.bzl +++ b/rust/toolchain.bzl @@ -392,7 +392,9 @@ def _rust_toolchain_impl(ctx): rename_first_party_crates = ctx.attr._rename_first_party_crates[BuildSettingInfo].value third_party_dir = ctx.attr._third_party_dir[BuildSettingInfo].value - pipelined_compilation = ctx.attr._pipelined_compilation[BuildSettingInfo].value + pipelining_mode = ctx.attr._experimental_pipelined_compilation[BuildSettingInfo].value + pipelined_compilation = pipelining_mode != "off" + worker_pipelining = pipelining_mode == "worker" no_std = ctx.attr._no_std[BuildSettingInfo].value lto = ctx.attr.lto[RustLtoInfo] @@ -604,6 +606,7 @@ def _rust_toolchain_impl(ctx): extra_exec_rustc_flags = expanded_extra_exec_rustc_flags, per_crate_rustc_flags = ctx.attr.per_crate_rustc_flags, sysroot = sysroot_path, + sysroot_anchor = sysroot.sysroot_anchor, sysroot_short_path = sysroot_short_path, target_arch = target_arch, target_flag_value = target_json.path if target_json else target_triple.str, @@ -617,6 +620,7 @@ def _rust_toolchain_impl(ctx): _rename_first_party_crates = rename_first_party_crates, _third_party_dir = third_party_dir, _pipelined_compilation = pipelined_compilation, + _worker_pipelining = worker_pipelining, _experimental_link_std_dylib = _experimental_link_std_dylib(ctx), _experimental_use_cc_common_link = _experimental_use_cc_common_link(ctx), _experimental_use_global_allocator = experimental_use_global_allocator, @@ -877,8 +881,8 @@ rust_toolchain = rule( "_no_std": attr.label( default = Label("//rust/settings:no_std"), ), - "_pipelined_compilation": attr.label( - default = Label("//rust/settings:pipelined_compilation"), + "_experimental_pipelined_compilation": attr.label( + default = Label("//rust/settings:experimental_pipelined_compilation"), ), "_rename_first_party_crates": attr.label( default = Label("//rust/settings:rename_first_party_crates"), diff --git a/test/chained_direct_deps/mod1.rs b/test/chained_direct_deps/mod1.rs index a66f490afb..5963f0522d 100644 --- a/test/chained_direct_deps/mod1.rs +++ b/test/chained_direct_deps/mod1.rs @@ -1,6 +1,9 @@ pub fn world() -> String { "world".to_owned() } +pub fn hello() -> String { + "hello".to_owned() +} #[cfg(test)] mod test { @@ -8,4 +11,10 @@ mod test { fn test_world() { assert_eq!(super::world(), "world"); } + #[test] + fn test_hello() { + assert_eq!(super::world(), "world"); + + assert_eq!(super::hello(), "hello"); + } } diff --git a/test/unit/incremental/BUILD.bazel b/test/unit/incremental/BUILD.bazel new file mode 100644 index 0000000000..c3749d7525 --- /dev/null +++ b/test/unit/incremental/BUILD.bazel @@ -0,0 +1,5 @@ +load(":incremental_test_suite.bzl", "incremental_test_suite") + +incremental_test_suite( + name = "incremental_test_suite", +) diff --git a/test/unit/incremental/incremental_test_suite.bzl b/test/unit/incremental/incremental_test_suite.bzl new file mode 100644 index 0000000000..54869e1bbf --- /dev/null +++ b/test/unit/incremental/incremental_test_suite.bzl @@ -0,0 +1,139 @@ +"""Starlark tests for `//rust/settings:experimental_incremental`""" + +load("@bazel_skylib//lib:unittest.bzl", "analysistest") +load("@bazel_skylib//rules:write_file.bzl", "write_file") +load("//rust:defs.bzl", "rust_library", "rust_proc_macro") +load( + "//test/unit:common.bzl", + "assert_action_mnemonic", + "assert_argv_contains_prefix", + "assert_argv_contains_prefix_not", +) + +# Checks that -Cincremental flag is present in Rustc action +def _incremental_enabled_test_impl(ctx): + env = analysistest.begin(ctx) + target = analysistest.target_under_test(env) + + action = target.actions[0] + assert_action_mnemonic(env, action, "Rustc") + assert_argv_contains_prefix(env, action, "-Cincremental=") + + return analysistest.end(env) + +_incremental_enabled_test = analysistest.make( + _incremental_enabled_test_impl, + config_settings = { + str(Label("//rust/settings:experimental_incremental")): True, + }, +) + +# Checks that -Cincremental flag is absent by default +def _incremental_disabled_test_impl(ctx): + env = analysistest.begin(ctx) + target = analysistest.target_under_test(env) + + action = target.actions[0] + assert_action_mnemonic(env, action, "Rustc") + assert_argv_contains_prefix_not(env, action, "-Cincremental") + + return analysistest.end(env) + +_incremental_disabled_test = analysistest.make( + _incremental_disabled_test_impl, + config_settings = {}, +) + +# Checks that -Cincremental flag is NOT added for proc-macros even when enabled +def _incremental_proc_macro_test_impl(ctx): + env = analysistest.begin(ctx) + target = analysistest.target_under_test(env) + + action = target.actions[0] + assert_action_mnemonic(env, action, "Rustc") + assert_argv_contains_prefix_not(env, action, "-Cincremental") + + return analysistest.end(env) + +_incremental_proc_macro_test = analysistest.make( + _incremental_proc_macro_test_impl, + config_settings = { + str(Label("//rust/settings:experimental_incremental")): True, + }, +) + +# Checks the incremental cache path contains the crate name +def _incremental_cache_path_test_impl(ctx): + env = analysistest.begin(ctx) + target = analysistest.target_under_test(env) + + action = target.actions[0] + assert_action_mnemonic(env, action, "Rustc") + assert_argv_contains_prefix(env, action, "-Cincremental=/tmp/rules_rust_incremental/") + + return analysistest.end(env) + +_incremental_cache_path_test = analysistest.make( + _incremental_cache_path_test_impl, + config_settings = { + str(Label("//rust/settings:experimental_incremental")): True, + }, +) + +def incremental_test_suite(name): + """Entry-point macro called from the BUILD file. + + Args: + name (str): The name of the test suite. + """ + write_file( + name = "crate_lib", + out = "lib.rs", + content = [ + "#[allow(dead_code)]", + "fn add() {}", + "", + ], + ) + + rust_library( + name = "lib", + srcs = [":lib.rs"], + edition = "2021", + ) + + rust_proc_macro( + name = "proc_macro", + srcs = [":lib.rs"], + edition = "2021", + ) + + _incremental_enabled_test( + name = "incremental_enabled_test", + target_under_test = ":lib", + ) + + _incremental_disabled_test( + name = "incremental_disabled_test", + target_under_test = ":lib", + ) + + _incremental_proc_macro_test( + name = "incremental_proc_macro_test", + target_under_test = ":proc_macro", + ) + + _incremental_cache_path_test( + name = "incremental_cache_path_test", + target_under_test = ":lib", + ) + + native.test_suite( + name = name, + tests = [ + ":incremental_enabled_test", + ":incremental_disabled_test", + ":incremental_proc_macro_test", + ":incremental_cache_path_test", + ], + ) diff --git a/test/unit/pipelined_compilation/BUILD.bazel b/test/unit/pipelined_compilation/BUILD.bazel index 8d363e03ed..f4ec49223e 100644 --- a/test/unit/pipelined_compilation/BUILD.bazel +++ b/test/unit/pipelined_compilation/BUILD.bazel @@ -1,4 +1,42 @@ +load("@rules_shell//shell:sh_test.bzl", "sh_test") load(":pipelined_compilation_test.bzl", "pipelined_compilation_test_suite") ############################ UNIT TESTS ############################# pipelined_compilation_test_suite(name = "pipelined_compilation_test_suite") + +################### WORKER PIPELINING E2E TEST ###################### +sh_test( + name = "worker_pipelining_nondeterministic_test", + srcs = ["worker_pipelining_nondeterministic_test.sh"], + tags = [ + "local", + "manual", + "no-sandbox", + ], +) + +################### ARTIFACT HASH INSTRUMENTATION ##################### +sh_test( + name = "artifact_hash_check", + srcs = ["artifact_hash_check.sh"], + tags = [ + "local", + "manual", + "no-sandbox", + ], +) + +############################ STRACE TEST ############################# +sh_test( + name = "strace_rustc_post_metadata_test", + srcs = ["strace_rustc_post_metadata_test.sh"], + tags = [ + "local", + "manual", + "no-sandbox", + ], + target_compatible_with = select({ + "@platforms//os:linux": [], + "//conditions:default": ["@platforms//:incompatible"], + }), +) diff --git a/test/unit/pipelined_compilation/artifact_hash_check.sh b/test/unit/pipelined_compilation/artifact_hash_check.sh new file mode 100755 index 0000000000..4b08008d64 --- /dev/null +++ b/test/unit/pipelined_compilation/artifact_hash_check.sh @@ -0,0 +1,172 @@ +#!/usr/bin/env bash +# Artifact hash instrumentation for pipelined compilation debugging. +# +# Computes and displays hashes for the three artifact types relevant to +# pipelined compilation SVH consistency: +# +# 1. Declared metadata artifact (hollow .rlib or .rmeta) +# 2. Full .rlib +# 3. Embedded lib.rmeta extracted from the full .rlib (ar archive member) +# +# This script is useful for: +# - Validating that hollow_rlib (full metadata) and full .rlib produce +# compatible metadata across separate rustc invocations +# - Investigating SVH mismatch regressions +# - Comparing artifact hashes across rustc versions or flag changes +# - Verifying determinism of proc macro expansion +# +# Usage: +# ./artifact_hash_check.sh [pipelining_mode] +# +# Examples: +# # Check hollow-rlib artifacts (default): +# ./artifact_hash_check.sh //my/crate:lib hollow_rlib +# +# # Check worker pipelining artifacts: +# ./artifact_hash_check.sh //my/crate:lib worker +# +# # Compare across modes: +# ./artifact_hash_check.sh //my/crate:lib hollow_rlib > /tmp/hollow.txt +# ./artifact_hash_check.sh //my/crate:lib worker > /tmp/worker.txt +# diff /tmp/hollow.txt /tmp/worker.txt +# +# Tagged manual + local; not part of the automated test suite. +set -euo pipefail + +if [[ -z "${BUILD_WORKSPACE_DIRECTORY:-}" && -z "${1:-}" ]]; then + echo "Usage: $0 [pipelining_mode]" + echo "" + echo " crate_label: Bazel label of a rust_library target" + echo " pipelining_mode: off, hollow_rlib, or worker (default: hollow_rlib)" + exit 1 +fi + +CRATE_LABEL="${1:?crate label required}" +PIPELINING_MODE="${2:-hollow_rlib}" + +# If running under Bazel, cd to workspace +if [[ -n "${BUILD_WORKSPACE_DIRECTORY:-}" ]]; then + cd "${BUILD_WORKSPACE_DIRECTORY}" +fi + +echo "=== Artifact Hash Check ===" +echo "Crate: ${CRATE_LABEL}" +echo "Pipelining mode: ${PIPELINING_MODE}" +echo "" + +# Build the target +echo "--- Building ---" +bazel build "${CRATE_LABEL}" \ + --@rules_rust//rust/settings:experimental_pipelined_compilation="${PIPELINING_MODE}" \ + --disk_cache="" \ + --noremote_accept_cached \ + --noremote_upload_local_results \ + 2>&1 | tail -3 + +# Find output files via aquery +echo "" +echo "--- Locating artifacts ---" + +# Get the crate name from the label for file matching +CRATE_NAME=$(echo "${CRATE_LABEL}" | sed 's|.*:||; s|-|_|g') + +# Find artifacts in bazel-bin +BAZEL_BIN=$(bazel info bazel-bin 2>/dev/null) + +find_artifacts() { + local pattern="$1" + find "${BAZEL_BIN}" -name "${pattern}" -path "*${CRATE_NAME}*" 2>/dev/null | head -5 +} + +echo "" +echo "--- Artifact Hashes ---" +echo "" + +# 1. Declared metadata artifact +echo "# Metadata artifacts (fast .rmeta or full hollow .rlib):" +RMETA_FILES=$(find_artifacts "*.rmeta") +HOLLOW_FILES=$(find_artifacts "*-hollow.rlib") + +for f in ${RMETA_FILES:-}; do + SIZE=$(stat -c%s "$f" 2>/dev/null || stat -f%z "$f" 2>/dev/null) + HASH=$(sha256sum "$f" | cut -d' ' -f1) + echo " .rmeta: ${HASH} ${SIZE} bytes ${f}" +done + +for f in ${HOLLOW_FILES:-}; do + SIZE=$(stat -c%s "$f" 2>/dev/null || stat -f%z "$f" 2>/dev/null) + HASH=$(sha256sum "$f" | cut -d' ' -f1) + echo " hollow .rlib: ${HASH} ${SIZE} bytes ${f}" +done + +if [[ -z "${RMETA_FILES:-}" && -z "${HOLLOW_FILES:-}" ]]; then + echo " (none found)" +fi + +# 2. Full .rlib +echo "" +echo "# Full .rlib artifacts:" +RLIB_FILES=$(find_artifacts "*.rlib" | grep -v "\-hollow\.rlib$" || true) + +for f in ${RLIB_FILES:-}; do + SIZE=$(stat -c%s "$f" 2>/dev/null || stat -f%z "$f" 2>/dev/null) + HASH=$(sha256sum "$f" | cut -d' ' -f1) + echo " .rlib: ${HASH} ${SIZE} bytes ${f}" +done + +if [[ -z "${RLIB_FILES:-}" ]]; then + echo " (none found)" +fi + +# 3. Embedded lib.rmeta from full .rlib +echo "" +echo "# Embedded lib.rmeta extracted from .rlib (ar archive member):" + +TMPDIR=$(mktemp -d) +trap 'rm -rf "$TMPDIR"' EXIT + +for f in ${RLIB_FILES:-}; do + # Extract lib.rmeta from the .rlib ar archive + EXTRACT_DIR="${TMPDIR}/$(basename "$f")" + mkdir -p "${EXTRACT_DIR}" + if ar x --output="${EXTRACT_DIR}" "$f" lib.rmeta 2>/dev/null; then + EMBEDDED="${EXTRACT_DIR}/lib.rmeta" + SIZE=$(stat -c%s "$EMBEDDED" 2>/dev/null || stat -f%z "$EMBEDDED" 2>/dev/null) + HASH=$(sha256sum "$EMBEDDED" | cut -d' ' -f1) + echo " lib.rmeta: ${HASH} ${SIZE} bytes (from ${f})" + else + echo " lib.rmeta: (extraction failed for ${f})" + fi +done + +if [[ -z "${RLIB_FILES:-}" ]]; then + echo " (no .rlib to extract from)" +fi + +# 4. Cross-check: compare standalone .rmeta with embedded lib.rmeta +echo "" +echo "# Cross-check: standalone .rmeta vs embedded lib.rmeta:" + +for rmeta_f in ${RMETA_FILES:-}; do + RMETA_HASH=$(sha256sum "$rmeta_f" | cut -d' ' -f1) + for rlib_f in ${RLIB_FILES:-}; do + EXTRACT_DIR="${TMPDIR}/$(basename "$rlib_f")" + EMBEDDED="${EXTRACT_DIR}/lib.rmeta" + if [[ -f "${EMBEDDED}" ]]; then + EMBEDDED_HASH=$(sha256sum "$EMBEDDED" | cut -d' ' -f1) + if [[ "${RMETA_HASH}" == "${EMBEDDED_HASH}" ]]; then + echo " MATCH: standalone .rmeta == embedded lib.rmeta" + echo " ${RMETA_HASH}" + else + echo " MISMATCH: standalone .rmeta != embedded lib.rmeta" + echo " standalone: ${RMETA_HASH}" + echo " embedded: ${EMBEDDED_HASH}" + echo " This is expected — standalone .rmeta and embedded lib.rmeta" + echo " have different formats (see rustc_metadata::rmeta)." + fi + fi + done +done + +echo "" +echo "Done." diff --git a/test/unit/pipelined_compilation/pipelined_compilation_test.bzl b/test/unit/pipelined_compilation/pipelined_compilation_test.bzl index 0f638c3ee6..e1571c1dc0 100644 --- a/test/unit/pipelined_compilation/pipelined_compilation_test.bzl +++ b/test/unit/pipelined_compilation/pipelined_compilation_test.bzl @@ -1,12 +1,61 @@ -"""Unittests for rust rules.""" +"""Unittests for rust rules. + +Test matrix — pipelining mode × test layer: + + Hollow-rlib pipelining ("full metadata", Buck2-style): two actions per crate, + each a separate rustc process. Metadata action uses -Zno-codegen to produce a + hollow .rlib containing full metadata. The graph is tier-consistent (hollow→hollow, + full→full), so non-deterministic proc macros do NOT cause SVH mismatch. + Compatible with all execution strategies (local, sandboxed, remote, dynamic). + + Worker pipelining ("fast metadata", Cargo-style): one rustc process per crate. + Metadata action returns early when .rmeta is emitted; full action joins the same + process. Safety depends on keeping one rustc per crate. Has a cross-tier + dependency (full action → upstream .rmeta) when running outside a worker, so + non-deterministic proc macros cause SVH mismatch under sandboxed, local, or + remote execution. + + │ no pipelining │ hollow-rlib │ worker pipelining + │ (pipeline=false) │ (pipeline=true, │ (pipeline=true, + │ │ worker=false) │ worker=true) +──────────────────────────┼───────────────────┼───────────────────────┼────────────────────────── +Action graph (analysis) │ (baseline: no │ second_lib_test │ worker_pipelining_ + │ RustcMetadata │ bin_test │ second_lib_test + │ action created) │ hollow_rlib_env_test │ + │ │ rmeta_*_custom_rule_* │ + │ │ rmeta_not_produced_* │ +──────────────────────────┼───────────────────┼───────────────────────┼────────────────────────── +Artifact determinism │ (precondition in │ (covered by analysis: │ test_pipelined_matches_ +(process_wrapper_test) │ pipelined test) │ hollow_rlib_env_test │ standalone (main.rs) + │ │ verifies consistent │ + │ │ flags / RUSTC_BOOT- │ + │ │ STRAP across actions)│ +──────────────────────────┼───────────────────┼───────────────────────┼────────────────────────── +E2E nondet proc macro │ nondeterministic_ │ nondeterministic_ │ nondeterministic_ +(sh_tests / rust_test) │ test.sh Phase 2 │ test.sh Phase 3 │ test.sh Phase 1 + │ (baseline: no │ (full metadata, │ (fast metadata, + │ pipelining, │ tier-consistent │ worker exec, + │ must pass) │ graph, must pass) │ must pass) + │ │ │ + │ │ svh_mismatch_test │ nondeterministic_ + │ │ (rust_test, flaky) │ test.sh Phase 4 + │ │ │ (fast metadata, + │ │ │ sandboxed exec, + │ │ │ expected: E0460 + │ │ │ or E0463) +""" load("@bazel_skylib//lib:unittest.bzl", "analysistest", "asserts") load("//rust:defs.bzl", "rust_binary", "rust_library", "rust_proc_macro", "rust_test") -load("//test/unit:common.bzl", "assert_argv_contains", "assert_list_contains_adjacent_elements_not") +load("//test/unit:common.bzl", "assert_argv_contains", "assert_argv_contains_not", "assert_list_contains_adjacent_elements_not") load(":wrap.bzl", "wrap") ENABLE_PIPELINING = { - str(Label("//rust/settings:pipelined_compilation")): True, + str(Label("//rust/settings:experimental_pipelined_compilation")): "hollow_rlib", +} + +ENABLE_WORKER_PIPELINING = { + str(Label("//rust/settings:experimental_pipelined_compilation")): "worker", } # TODO: Fix pipeline compilation on windows @@ -19,8 +68,19 @@ _NO_WINDOWS = select({ def _second_lib_test_impl(ctx): env = analysistest.begin(ctx) tut = analysistest.target_under_test(env) - rlib_action = [act for act in tut.actions if act.mnemonic == "Rustc"][0] - metadata_action = [act for act in tut.actions if act.mnemonic == "RustcMetadata"][0] + + # Both metadata and full actions use mnemonic "Rustc"; distinguish by output type. + rustc_actions = [act for act in tut.actions if act.mnemonic == "Rustc"] + rlib_action = [ + act + for act in rustc_actions + if len([o for o in act.outputs.to_list() if o.path.endswith(".rlib") and not o.path.endswith("-hollow.rlib")]) > 0 + ][0] + metadata_action = [ + act + for act in rustc_actions + if len([o for o in act.outputs.to_list() if o.path.endswith("-hollow.rlib")]) > 0 + ][0] # Hollow rlib approach: Rustc action uses --emit=dep-info,link (no metadata). assert_argv_contains(env, rlib_action, "--emit=dep-info,link") @@ -64,10 +124,9 @@ def _second_lib_test_impl(ctx): # The metadata action references first's hollow rlib for --extern (pipelining: starts # before first's full codegen finishes). The Rustc action uses the full rlib for - # --extern so the full rlib's embedded SVH matches the full rlib that downstream - # binaries (without cc_common.link) see in their -Ldependency path. If both actions - # used the hollow rlib, nondeterministic proc macros could produce different SVHs - # for the hollow vs full rlib, causing E0460 in downstream binary builds. + # --extern. This tier-consistent wiring (hollow→hollow, full→full) ensures that SVH + # references are self-consistent within each tier, preventing E0460 even with + # nondeterministic proc macros. extern_metadata = [arg for arg in metadata_action.argv if arg.startswith("--extern=first=") and "libfirst" in arg and arg.endswith("-hollow.rlib")] asserts.true( env, @@ -168,9 +227,15 @@ def _rmeta_is_propagated_through_custom_rule_test_impl(ctx): env = analysistest.begin(ctx) tut = analysistest.target_under_test(env) - # This is the metadata-generating action. It should depend on metadata for the library and, if generate_metadata is set - # also depend on metadata for 'wrapper'. - rust_action = [act for act in tut.actions if act.mnemonic == "RustcMetadata"][0] + # This is the metadata-generating action (hollow rlib). It should depend on metadata for + # the library and, if generate_metadata is set, also depend on metadata for 'wrapper'. + # Both actions use mnemonic "Rustc"; find metadata by -hollow.rlib output. + rust_action = [ + act + for act in tut.actions + if act.mnemonic == "Rustc" and + len([o for o in act.outputs.to_list() if o.path.endswith("-hollow.rlib")]) > 0 + ][0] metadata_inputs = [i for i in rust_action.inputs.to_list() if i.path.endswith("-hollow.rlib")] rlib_inputs = [i for i in rust_action.inputs.to_list() if i.path.endswith(".rlib") and not i.path.endswith("-hollow.rlib")] @@ -239,22 +304,44 @@ def _rmeta_not_produced_if_pipelining_disabled_test_impl(ctx): env = analysistest.begin(ctx) tut = analysistest.target_under_test(env) - rust_action = [act for act in tut.actions if act.mnemonic == "RustcMetadata"] - asserts.true(env, len(rust_action) == 0, "expected no metadata to be produced, but found a metadata action") + # With disable_pipelining=True, no metadata action should be created. + # Since all pipelining actions use mnemonic "Rustc", check for absence of + # hollow-rlib or .rmeta outputs instead of checking mnemonic. + metadata_actions = [ + act + for act in tut.actions + if act.mnemonic == "Rustc" and len([ + o + for o in act.outputs.to_list() + if o.path.endswith("-hollow.rlib") or o.path.endswith(".rmeta") + ]) > 0 + ] + asserts.true(env, len(metadata_actions) == 0, "expected no metadata to be produced, but found a metadata action") return analysistest.end(env) rmeta_not_produced_if_pipelining_disabled_test = analysistest.make(_rmeta_not_produced_if_pipelining_disabled_test_impl, config_settings = ENABLE_PIPELINING) def _hollow_rlib_env_test_impl(ctx): - """Verify RUSTC_BOOTSTRAP=1 is set consistently on both Rustc and RustcMetadata actions. + """Verify RUSTC_BOOTSTRAP=1 is set consistently on both metadata and full Rustc actions. RUSTC_BOOTSTRAP=1 changes the crate hash (SVH), so it must be set on both actions to keep the hollow rlib and full rlib SVHs consistent.""" env = analysistest.begin(ctx) tut = analysistest.target_under_test(env) - metadata_action = [act for act in tut.actions if act.mnemonic == "RustcMetadata"][0] - rlib_action = [act for act in tut.actions if act.mnemonic == "Rustc"][0] + + # Both actions use mnemonic "Rustc"; distinguish by output type. + rustc_actions = [act for act in tut.actions if act.mnemonic == "Rustc"] + metadata_action = [ + act + for act in rustc_actions + if len([o for o in act.outputs.to_list() if o.path.endswith("-hollow.rlib")]) > 0 + ][0] + rlib_action = [ + act + for act in rustc_actions + if len([o for o in act.outputs.to_list() if o.path.endswith(".rlib") and not o.path.endswith("-hollow.rlib")]) > 0 + ][0] asserts.equals( env, @@ -273,6 +360,266 @@ def _hollow_rlib_env_test_impl(ctx): hollow_rlib_env_test = analysistest.make(_hollow_rlib_env_test_impl, config_settings = ENABLE_PIPELINING) +def _worker_pipelining_second_lib_test_impl(ctx): + """Verify worker pipelining uses .rmeta output (not hollow rlib) for pipelined libs. + + With experimental_pipelined_compilation=worker, both the metadata and full actions use + mnemonic "Rustc" (same mnemonic ensures they share the same worker process and + PipelineState). They are distinguished by their outputs: + - Metadata action: produces .rmeta file + - Full action: produces .rlib file + + The metadata action must: + - Produce a .rmeta file (not -hollow.rlib) — single rustc invocation, no -Zno-codegen + - Set RUSTC_BOOTSTRAP=1 (for strategy equivalence with hollow-rlib mode) + - Take first's .rmeta as input (not first's hollow rlib) + + The Rustc (full) action must: + - Set RUSTC_BOOTSTRAP=1 (for strategy equivalence with hollow-rlib mode) + - Also take first's .rmeta as input (same input set as metadata — no force_depend_on_objects) + """ + env = analysistest.begin(ctx) + tut = analysistest.target_under_test(env) + + # Both metadata and full actions share mnemonic "Rustc" with worker pipelining. + # Distinguish by output: metadata action outputs .rmeta; full action outputs .rlib. + rustc_actions = [act for act in tut.actions if act.mnemonic == "Rustc"] + metadata_actions = [ + act + for act in rustc_actions + if len([o for o in act.outputs.to_list() if o.path.endswith(".rmeta")]) > 0 + ] + rlib_actions = [ + act + for act in rustc_actions + if len([ + o + for o in act.outputs.to_list() + if o.path.endswith(".rlib") and not o.path.endswith("-hollow.rlib") + ]) > 0 + ] + asserts.true( + env, + len(metadata_actions) >= 1, + "expected a Rustc action with .rmeta output for worker pipelining metadata", + ) + asserts.true( + env, + len(rlib_actions) >= 1, + "expected a Rustc action with .rlib output", + ) + metadata_action = metadata_actions[0] + rlib_action = rlib_actions[0] + + # Metadata output must be .rmeta, not -hollow.rlib. + metadata_outputs = metadata_action.outputs.to_list() + rmeta_outputs = [o for o in metadata_outputs if o.path.endswith(".rmeta")] + hollow_outputs = [o for o in metadata_outputs if o.path.endswith("-hollow.rlib")] + asserts.true( + env, + len(rmeta_outputs) >= 1, + "expected .rmeta output for worker pipelining, got: " + str([o.path for o in metadata_outputs]), + ) + asserts.true( + env, + len(hollow_outputs) == 0, + "unexpected -hollow.rlib output (hollow rlib should not be used with worker pipelining): " + str([o.path for o in hollow_outputs]), + ) + + # Both actions must set RUSTC_BOOTSTRAP=1 for strategy equivalence with hollow-rlib. + asserts.equals( + env, + "1", + metadata_action.env.get("RUSTC_BOOTSTRAP", ""), + "RUSTC_BOOTSTRAP=1 required for strategy equivalence with hollow-rlib mode", + ) + asserts.equals( + env, + "1", + rlib_action.env.get("RUSTC_BOOTSTRAP", ""), + "RUSTC_BOOTSTRAP=1 required for strategy equivalence with hollow-rlib mode", + ) + + # Both actions take first's .rmeta as input (not hollow rlib). + # Worker pipelining does not use force_depend_on_objects, so both actions + # use the same pipelined (rmeta) input set. + first_inputs_metadata = [i for i in metadata_action.inputs.to_list() if "libfirst" in i.path] + first_inputs_full = [i for i in rlib_action.inputs.to_list() if "libfirst" in i.path] + + asserts.true( + env, + len([i for i in first_inputs_metadata if i.path.endswith(".rmeta")]) >= 1, + "expected first's .rmeta in metadata action inputs, found: " + str([i.path for i in first_inputs_metadata]), + ) + asserts.true( + env, + len([i for i in first_inputs_metadata if i.path.endswith("-hollow.rlib")]) == 0, + "unexpected hollow rlib in metadata action inputs: " + str([i.path for i in first_inputs_metadata]), + ) + asserts.true( + env, + len([i for i in first_inputs_full if i.path.endswith(".rmeta")]) >= 1, + "expected first's .rmeta in full Rustc action inputs (no force_depend_on_objects), found: " + str([i.path for i in first_inputs_full]), + ) + asserts.true( + env, + len([i for i in first_inputs_full if i.path.endswith("-hollow.rlib")]) == 0, + "unexpected hollow rlib in full Rustc action inputs: " + str([i.path for i in first_inputs_full]), + ) + + return analysistest.end(env) + +worker_pipelining_second_lib_test = analysistest.make( + _worker_pipelining_second_lib_test_impl, + config_settings = ENABLE_WORKER_PIPELINING, +) + +def _worker_pipelining_test(): + worker_pipelining_second_lib_test( + name = "worker_pipelining_second_lib_test", + target_under_test = ":second", + target_compatible_with = _NO_WINDOWS, + ) + return [":worker_pipelining_second_lib_test"] + +# --- Strategy equivalence tests --- +# +# These tests assert that hollow-rlib and worker-pipelining modes produce +# equivalent rustc-visible behavior (same env vars, same --cfg discriminator, +# same mnemonic). The Bazel invariant requires that switching strategies does +# not change action output (Julio Merino, "What are Bazel's strategies?"). + +def _strategy_equivalence_worker_test_impl(ctx): + """Assert worker-pipelining actions have unified env/flags for strategy equivalence.""" + env = analysistest.begin(ctx) + tut = analysistest.target_under_test(env) + + # Both actions use mnemonic "Rustc" in worker mode; distinguish by output. + rustc_actions = [act for act in tut.actions if act.mnemonic == "Rustc"] + metadata_action = [ + act + for act in rustc_actions + if len([o for o in act.outputs.to_list() if o.path.endswith(".rmeta")]) > 0 + ][0] + rlib_action = [ + act + for act in rustc_actions + if len([ + o + for o in act.outputs.to_list() + if o.path.endswith(".rlib") and not o.path.endswith("-hollow.rlib") + ]) > 0 + ][0] + + # RUSTC_BOOTSTRAP=1 must be set on both actions for SVH compatibility + # with hollow-rlib mode (RUSTC_BOOTSTRAP changes the crate hash). + asserts.equals( + env, + "1", + metadata_action.env.get("RUSTC_BOOTSTRAP", ""), + "Strategy equivalence: worker metadata action must have RUSTC_BOOTSTRAP=1", + ) + asserts.equals( + env, + "1", + rlib_action.env.get("RUSTC_BOOTSTRAP", ""), + "Strategy equivalence: worker full action must have RUSTC_BOOTSTRAP=1", + ) + + # Unified --cfg discriminator (not mode-specific). + assert_argv_contains(env, metadata_action, "--cfg=rules_rust_pipelined") + assert_argv_contains(env, rlib_action, "--cfg=rules_rust_pipelined") + assert_argv_contains_not(env, metadata_action, "--cfg=rules_rust_worker_pipelining") + assert_argv_contains_not(env, rlib_action, "--cfg=rules_rust_worker_pipelining") + + return analysistest.end(env) + +strategy_equivalence_worker_test = analysistest.make( + _strategy_equivalence_worker_test_impl, + config_settings = ENABLE_WORKER_PIPELINING, +) + +def _strategy_equivalence_hollow_test_impl(ctx): + """Assert hollow-rlib actions have unified env/flags and mnemonic for strategy equivalence.""" + env = analysistest.begin(ctx) + tut = analysistest.target_under_test(env) + + # After unification, both modes use mnemonic "Rustc" for metadata. + # Find metadata action by output type: has -hollow.rlib output. + rustc_actions = [act for act in tut.actions if act.mnemonic == "Rustc"] + metadata_actions = [ + act + for act in rustc_actions + if len([o for o in act.outputs.to_list() if o.path.endswith("-hollow.rlib")]) > 0 + ] + if len(metadata_actions) == 0: + asserts.true( + env, + False, + "Strategy equivalence: hollow-rlib metadata action should use mnemonic 'Rustc', " + + "but no Rustc action found with -hollow.rlib output. " + + "Actions: " + str([(a.mnemonic, [o.path for o in a.outputs.to_list()]) for a in tut.actions]), + ) + return analysistest.end(env) + metadata_action = metadata_actions[0] + + rlib_actions = [ + act + for act in rustc_actions + if len([ + o + for o in act.outputs.to_list() + if o.path.endswith(".rlib") and not o.path.endswith("-hollow.rlib") + ]) > 0 + ] + if len(rlib_actions) == 0: + asserts.true(env, False, "Strategy equivalence: no Rustc action with .rlib output found") + return analysistest.end(env) + rlib_action = rlib_actions[0] + + # RUSTC_BOOTSTRAP=1 (already true for hollow-rlib, but test the invariant). + asserts.equals( + env, + "1", + metadata_action.env.get("RUSTC_BOOTSTRAP", ""), + "Strategy equivalence: hollow metadata action must have RUSTC_BOOTSTRAP=1", + ) + asserts.equals( + env, + "1", + rlib_action.env.get("RUSTC_BOOTSTRAP", ""), + "Strategy equivalence: hollow full action must have RUSTC_BOOTSTRAP=1", + ) + + # Unified --cfg discriminator (not mode-specific). + assert_argv_contains(env, metadata_action, "--cfg=rules_rust_pipelined") + assert_argv_contains(env, rlib_action, "--cfg=rules_rust_pipelined") + assert_argv_contains_not(env, metadata_action, "--cfg=rules_rust_hollow_rlib") + assert_argv_contains_not(env, rlib_action, "--cfg=rules_rust_hollow_rlib") + + return analysistest.end(env) + +strategy_equivalence_hollow_test = analysistest.make( + _strategy_equivalence_hollow_test_impl, + config_settings = ENABLE_PIPELINING, +) + +def _strategy_equivalence_test(): + strategy_equivalence_worker_test( + name = "strategy_equivalence_worker_test", + target_under_test = ":second", + target_compatible_with = _NO_WINDOWS, + ) + strategy_equivalence_hollow_test( + name = "strategy_equivalence_hollow_test", + target_under_test = ":second", + target_compatible_with = _NO_WINDOWS, + ) + return [ + ":strategy_equivalence_worker_test", + ":strategy_equivalence_hollow_test", + ] + def _disable_pipelining_test(): rust_library( name = "lib", @@ -329,24 +676,43 @@ def _custom_rule_test(generate_metadata, suffix): ] def _svh_mismatch_test(): - """Creates a rust_test demonstrating SVH mismatch with non-deterministic proc macros. - - Without pipelining (default): each library is compiled exactly once, SVH - is consistent across the dependency graph, and the test builds and passes. - - With pipelining (//rust/settings:pipelined_compilation=true): rules_rust - compiles svh_lib twice in separate rustc invocations — once for the hollow - metadata (.rmeta), once for the full .rlib. Because the proc macro uses - HashMap with OS-seeded randomness, these two invocations typically produce - different token streams and therefore different SVH values. The consumer is - compiled against the hollow .rmeta (recording SVH_1); when rustc links the - test binary against the full .rlib (SVH_2), it detects SVH_1 ≠ SVH_2 and - fails with E0460. The test is therefore expected to FAIL TO BUILD most of - the time (~99.2% with 5 HashMap entries) when pipelining is enabled. - - The test is marked flaky because the SVH mismatch is non-deterministic: - on rare occasions (~0.8%) both rustc invocations produce the same HashMap - iteration order and the build succeeds even with pipelining enabled. + """Creates a rust_test using a non-deterministic proc macro (HashMap iteration). + + This target graph is used by worker_pipelining_nondeterministic_test.sh to + verify SVH consistency across pipelining modes. + + Without pipelining (default): each library is compiled exactly once, so SVH + is trivially consistent and the build always succeeds. + + With hollow-rlib pipelining (pipeline=true, worker=false): each library is + compiled by two separate rustc processes (one with -Zno-codegen for the hollow + rlib, one for the full rlib). The dependency graph is tier-consistent: the + hollow action depends on upstream hollow rlibs and the full action depends on + upstream full rlibs. Each tier has self-consistent SVH values, so there is no + cross-tier mismatch. The build always succeeds. + + With worker pipelining under worker execution (pipeline=true, worker=true, + --strategy=Rustc=worker): each library is compiled by a single rustc process, + so the proc macro runs once and SVH is always consistent. The build always + succeeds. + + With worker pipelining under non-worker execution (pipeline=true, worker=true, + --strategy=Rustc=sandboxed or local): fast metadata (.rmeta) is used but two + separate rustc processes run per crate. Both the metadata and full actions + depend on upstream .rmeta (a cross-tier dependency). Separate rustc processes + produce different SVHs for the .rmeta and .rlib due to the non-deterministic + proc macro. The practical symptoms are: + - E0460 (crate found with incompatible SVH) in downstream consumers + - E0463 (can't find crate) when rustc fails to match the SVH at all + - SVH mismatch diagnostic from process_wrapper's consistency check + + The test is marked flaky because the mismatch is non-deterministic: on rare + occasions (~0.8%) both rustc invocations happen to iterate the HashMap in the + same order, the SVHs agree, and the build succeeds. + + The full failure boundary is exercised by worker_pipelining_nondeterministic_test.sh + which covers all four scenarios: worker+worker, no-pipeline, hollow_rlib, and + worker+sandboxed (the failure case). """ rust_proc_macro( @@ -389,10 +755,12 @@ def pipelined_compilation_test_suite(name): """ tests = [] tests.extend(_pipelined_compilation_test()) + tests.extend(_worker_pipelining_test()) tests.extend(_disable_pipelining_test()) tests.extend(_custom_rule_test(generate_metadata = True, suffix = "_with_metadata")) tests.extend(_custom_rule_test(generate_metadata = False, suffix = "_without_metadata")) tests.extend(_svh_mismatch_test()) + tests.extend(_strategy_equivalence_test()) native.test_suite( name = name, diff --git a/test/unit/pipelined_compilation/strace_rustc_post_metadata_test.sh b/test/unit/pipelined_compilation/strace_rustc_post_metadata_test.sh new file mode 100755 index 0000000000..9be1a04ad7 --- /dev/null +++ b/test/unit/pipelined_compilation/strace_rustc_post_metadata_test.sh @@ -0,0 +1,229 @@ +#!/usr/bin/env bash +# Regression test: rustc makes zero input file reads after emitting .rmeta. +# +# This is the critical invariant for worker-managed pipelining: after the +# metadata response is sent, background rustc must not read any sandbox inputs. +# Gate 0 investigation (project_gate0_strace_results.md) proved this holds on +# rustc 1.94.0. This test provides ongoing regression coverage. +# +# Tagged manual + no-sandbox + local; requires strace (Linux only). +set -euo pipefail + +RUSTC="${RUSTC:-rustc}" +STRACE="${STRACE:-strace}" + +# --------------------------------------------------------------------------- +# Locate tools +# --------------------------------------------------------------------------- +if ! command -v "$STRACE" &>/dev/null; then + echo "SKIP: strace not found (set STRACE= to override)" + exit 0 +fi +if ! command -v "$RUSTC" &>/dev/null; then + echo "SKIP: rustc not found (set RUSTC= to override)" + exit 0 +fi + +RUSTC_VERSION=$("$RUSTC" --version) +echo "Using rustc: $RUSTC_VERSION" +echo "Using strace: $("$STRACE" --version 2>&1 | head -1)" + +# --------------------------------------------------------------------------- +# Temp workspace +# --------------------------------------------------------------------------- +WORKDIR=$(mktemp -d) +trap 'rm -rf "$WORKDIR"' EXIT + +# dep crate +cat > "$WORKDIR/dep.rs" <<'EOF' +pub fn dep_fn() -> i32 { 42 } +EOF + +# included.txt for include_str! +cat > "$WORKDIR/included.txt" <<'EOF' +hello from include_str +EOF + +# main lib crate: depends on dep and uses include_str! +cat > "$WORKDIR/lib.rs" <<'EOF' +extern crate dep; + +const INCLUDED: &str = include_str!("included.txt"); + +pub fn answer() -> i32 { + let _ = INCLUDED; + dep::dep_fn() +} +EOF + +OUTDIR="$WORKDIR/out" +mkdir -p "$OUTDIR" + +# --------------------------------------------------------------------------- +# Step 1: compile dep.rs to get dep.rmeta (no strace needed) +# --------------------------------------------------------------------------- +"$RUSTC" \ + --edition 2021 \ + --crate-type lib \ + --crate-name dep \ + --emit=metadata,link \ + --out-dir "$OUTDIR" \ + "$WORKDIR/dep.rs" + +DEP_RMETA="$OUTDIR/libdep.rmeta" +if [[ ! -f "$DEP_RMETA" ]]; then + echo "FAIL: dep.rmeta not produced" + exit 1 +fi + +# --------------------------------------------------------------------------- +# Step 2: compile lib.rs under strace +# +# Rustc writes .rmeta to a temp dir (e.g. out/rmeta/full.rmeta) then +# renames it to libmylib.rmeta. We trace openat+read+close to capture all +# file I/O; the artifact JSON lines go to stderr separately. +# --------------------------------------------------------------------------- +STRACE_LOG="$WORKDIR/strace.log" + +"$STRACE" \ + -f \ + -e trace=openat,read,close \ + -o "$STRACE_LOG" \ + "$RUSTC" \ + --edition 2021 \ + --crate-type lib \ + --crate-name mylib \ + --emit=dep-info,metadata,link \ + --error-format=json \ + --json=artifacts \ + --extern "dep=$DEP_RMETA" \ + -L "$OUTDIR" \ + --out-dir "$OUTDIR" \ + "$WORKDIR/lib.rs" 2>/dev/null + +RMETA_OUT="$OUTDIR/libmylib.rmeta" +if [[ ! -f "$RMETA_OUT" ]]; then + echo "FAIL: libmylib.rmeta not produced" + exit 1 +fi + +# --------------------------------------------------------------------------- +# Step 3: find the .rmeta write boundary +# +# Rustc writes metadata to a temporary path like out/rmeta/full.rmeta +# using O_RDWR|O_CREAT before renaming it to libmylib.rmeta. This openat() +# is the earliest observable "metadata write started" event. +# +# We also accept the pattern of writing directly to a path ending in .rmeta +# with O_CREAT (in case rustc internals change). +# --------------------------------------------------------------------------- +# Pattern 1: temp rmeta dir (rmeta/full.rmeta or similar) with O_CREAT +BOUNDARY_LINE=$(grep -n "openat.*rmeta.*full\.rmeta.*O_.*CREAT\|openat.*full\.rmeta.*O_.*CREAT" "$STRACE_LOG" | head -1 | cut -d: -f1) + +# Pattern 2: fallback — any openat with O_WRONLY or O_CREAT for a path in OUTDIR +if [[ -z "$BOUNDARY_LINE" ]]; then + ESCAPED_OUTDIR=$(printf '%s\n' "$OUTDIR" | sed 's/[[\.*^$()+?{|]/\\&/g') + BOUNDARY_LINE=$(grep -n "openat.*${ESCAPED_OUTDIR}.*O_.*CREAT\|openat.*${ESCAPED_OUTDIR}.*O_WRONLY" "$STRACE_LOG" | head -1 | cut -d: -f1) +fi + +if [[ -z "$BOUNDARY_LINE" ]]; then + echo "FAIL: could not find .rmeta write openat() in strace log" + echo "--- strace log (openat lines) ---" + grep "openat" "$STRACE_LOG" | head -30 || true + exit 1 +fi + +echo "Boundary: strace line $BOUNDARY_LINE (first output-file write)" + +# Lines after the boundary (post-metadata I/O) +POST_LOG="$WORKDIR/post_boundary.log" +tail -n +"$((BOUNDARY_LINE + 1))" "$STRACE_LOG" > "$POST_LOG" + +# Lines before and including the boundary (pre-metadata I/O) +PRE_LOG="$WORKDIR/pre_boundary.log" +head -n "$BOUNDARY_LINE" "$STRACE_LOG" > "$PRE_LOG" + +# --------------------------------------------------------------------------- +# Step 4: assert zero input-file openat() reads after the boundary +# +# Input files to watch: lib.rs, dep.rs, included.txt, *.rmeta deps, *.rlib deps +# +# Exclusions (legitimate post-boundary opens): +# O_WRONLY / O_CREAT / O_RDWR — output writes +# ENOENT — probing for nonexistent files +# O_DIRECTORY — directory traversal +# /proc /sys /dev — kernel pseudo-files +# /home /rustup toolchain paths — rustc runtime libs (legitimate) +# --------------------------------------------------------------------------- +FAIL=0 +INPUT_PATTERNS=( + "lib\.rs" + "dep\.rs" + "included\.txt" + "libdep\.rmeta" + "libdep\.rlib" +) + +for pat in "${INPUT_PATTERNS[@]}"; do + BAD=$(grep -E "openat.*${pat}" "$POST_LOG" \ + | grep -vE "O_WRONLY|O_CREAT|O_RDWR|ENOENT|O_DIRECTORY" \ + | grep -vE "/proc/|/sys/|/dev/" \ + || true) + if [[ -n "$BAD" ]]; then + echo "FAIL: post-metadata read of input file matching '${pat}':" + echo "$BAD" + FAIL=1 + fi +done + +# Also flag any .so reads that look like proc-macro loads after the boundary +# (only flag .so files from OUTDIR or workdir — not system/toolchain .so) +ESCAPED_OUTDIR=$(printf '%s\n' "$OUTDIR" | sed 's/[[\.*^$()+?{|]/\\&/g') +ESCAPED_WORKDIR=$(printf '%s\n' "$WORKDIR" | sed 's/[[\.*^$()+?{|]/\\&/g') +BAD_SO=$(grep -E "openat.*(${ESCAPED_OUTDIR}|${ESCAPED_WORKDIR}).*\.so" "$POST_LOG" \ + | grep -vE "O_WRONLY|O_CREAT|O_RDWR|ENOENT|O_DIRECTORY" \ + || true) +if [[ -n "$BAD_SO" ]]; then + echo "FAIL: post-metadata openat() of .so in workdir/outdir (proc macro?) after boundary:" + echo "$BAD_SO" + FAIL=1 +fi + +# --------------------------------------------------------------------------- +# Step 5: assert all input FDs are closed before the boundary +# +# For each input file opened read-only before the boundary, find its FD +# (the return value of openat) and verify close($fd) appears before the +# boundary line. +# --------------------------------------------------------------------------- +while IFS= read -r line; do + # Extract the FD: last token after "= " on the line + fd=$(printf '%s' "$line" | grep -oE '= [0-9]+$' | grep -oE '[0-9]+' || true) + [[ -z "$fd" ]] && continue + + if ! grep -qE "close\($fd\)[[:space:]]*= 0" "$PRE_LOG"; then + echo "FAIL: FD $fd (opened for input) not closed before .rmeta write boundary" + echo " Opened by: $line" + FAIL=1 + fi +done < <(grep -E "openat.*(lib\.rs|dep\.rs|included\.txt|libdep\.rmeta|libdep\.rlib)" "$PRE_LOG" \ + | grep -vE "O_WRONLY|O_CREAT|O_RDWR|ENOENT|O_DIRECTORY" \ + | grep -E '= [0-9]+$' \ + || true) + +# --------------------------------------------------------------------------- +# Result +# --------------------------------------------------------------------------- +echo "" +echo "--- Summary ---" +echo "rustc version: $RUSTC_VERSION" +echo "strace boundary line: $BOUNDARY_LINE / $(wc -l < "$STRACE_LOG") total" +echo "post-boundary strace lines: $(wc -l < "$POST_LOG")" + +if [[ $FAIL -ne 0 ]]; then + echo "RESULT: FAIL" + exit 1 +fi + +echo "RESULT: PASS — zero input file reads after .rmeta emission, all input FDs closed before boundary" +exit 0 diff --git a/test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_consumer.rs b/test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_consumer.rs index 99b0ea9bf4..924c00d3a2 100644 --- a/test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_consumer.rs +++ b/test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_consumer.rs @@ -1,6 +1,10 @@ -/// A library that depends on svh_lib. When compiled against a hollow `.rmeta` -/// of svh_lib, this crate's metadata records svh_lib's SVH at that point in -/// time. If the full `.rlib` of svh_lib was produced by a separate rustc -/// invocation (with a different HashMap seed), it may have a different SVH, -/// causing a mismatch when a downstream binary tries to link against both. +/// A library that depends on svh_lib. In worker-pipelining standalone mode, +/// this crate's metadata and full actions both depend on svh_lib's `.rmeta` +/// (a cross-tier dependency). If the separate rustc invocations for metadata +/// and full produce different SVHs (due to non-deterministic proc macros in +/// svh_lib), a downstream binary that loads this crate's `.rlib` will find +/// svh_lib's `.rlib` SVH doesn't match, causing E0463 or E0460. +/// +/// In hollow-rlib mode, the graph is tier-consistent (hollow→hollow, full→full), +/// so this scenario does not arise. pub use svh_lib::Widget; diff --git a/test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_test.rs b/test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_test.rs index 6ecfe83553..d6c2f44c4e 100644 --- a/test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_test.rs +++ b/test/unit/pipelined_compilation/svh_mismatch/svh_mismatch_test.rs @@ -1,23 +1,41 @@ -/// Demonstrates SVH (Strict Version Hash) mismatch with pipelined compilation. +/// Demonstrates SVH (Strict Version Hash) sensitivity with pipelined compilation. +/// +/// The two pipelining modes use different metadata classes with different safety +/// properties (see DESIGN.md "Metadata Classes"): +/// +/// - **Full metadata** (hollow_rlib mode, Buck2-style): tier-consistent graph, +/// safe with all execution strategies, non-deterministic proc macros OK. +/// - **Fast metadata** (worker mode, Cargo-style): requires same rustc process, +/// non-deterministic proc macros fail under separate-process execution. /// /// Without pipelining this test always builds and passes: each library is /// compiled exactly once, so the SVH embedded in every `.rmeta` and `.rlib` /// is identical. /// -/// With `//rust/settings:pipelined_compilation=true` rules_rust compiles -/// `svh_lib` **twice** in separate rustc processes — once to emit the hollow -/// `.rmeta` (metadata only), once to emit the full `.rlib`. Because -/// `nondeterministic_macro` uses `HashMap` with OS-seeded randomness, the two -/// rustc invocations typically produce different token streams and therefore -/// different SVH values. `svh_consumer` is compiled against the hollow `.rmeta` -/// and records SVH_1 in its own metadata; when rustc later tries to link the -/// test binary against the full `.rlib` (which carries SVH_2), it detects the -/// mismatch and fails with E0460. The test therefore **fails to build** most of -/// the time (~99.2% probability) when pipelining is enabled. +/// With `experimental_pipelined_compilation=hollow_rlib` (full metadata), each +/// library is compiled twice in separate rustc processes — once with +/// `-Zno-codegen` for the hollow rlib and once for the full rlib. The +/// dependency graph is **tier-consistent**: the hollow action depends on +/// upstream hollow rlibs, and the full action depends on upstream full rlibs. +/// Each tier has self-consistent SVH values, so there is no cross-tier mismatch +/// even with non-deterministic proc macros. The build always succeeds. This is +/// why hollow_rlib is the recommended portable mode. +/// +/// With `experimental_pipelined_compilation=worker` (fast metadata) under +/// **worker execution**, each library is compiled by a single rustc process, +/// so the proc macro runs once and SVH is trivially consistent. The build +/// always succeeds. +/// +/// With `experimental_pipelined_compilation=worker` (fast metadata) under +/// **non-worker execution** (sandboxed, local, remote), the metadata and full +/// actions run as separate rustc processes, but both depend on upstream +/// `.rmeta` (a cross-tier dependency). Non-deterministic proc macros produce +/// different SVHs in each process, and downstream consumers see an SVH +/// mismatch (E0463 or E0460). This is the scenario the test exercises. /// -/// The `flaky = True` attribute on this target acknowledges that the mismatch -/// is non-deterministic: on rare occasions (~0.8%) both rustc invocations -/// happen to produce the same HashMap iteration order, the SVHs agree, and the +/// The `flaky = True` attribute acknowledges that the mismatch is non- +/// deterministic: on rare occasions (~0.8%) both rustc invocations happen +/// to produce the same HashMap iteration order, the SVHs agree, and the /// build succeeds. use svh_consumer::Widget; diff --git a/test/unit/pipelined_compilation/worker_pipelining_nondeterministic_test.sh b/test/unit/pipelined_compilation/worker_pipelining_nondeterministic_test.sh new file mode 100755 index 0000000000..f56cd8246e --- /dev/null +++ b/test/unit/pipelined_compilation/worker_pipelining_nondeterministic_test.sh @@ -0,0 +1,247 @@ +#!/usr/bin/env bash +# End-to-end test: pipelining modes × execution strategies with non-deterministic +# proc macros. +# +# The svh_mismatch target graph uses a proc macro that iterates a HashMap +# (non-deterministic across process invocations). This exercises the actual +# failure boundary between fast metadata (.rmeta, Cargo-style) and full metadata +# (hollow .rlib, Buck2-style) pipelining. +# +# Test matrix: +# +# Phase 1: worker pipelining + worker execution → MUST PASS +# Fast metadata, single rustc per crate. SVH always consistent. +# +# Phase 2: no pipelining → MUST PASS (baseline) +# One rustc per crate, trivially consistent. +# +# Phase 3: hollow_rlib pipelining → MUST PASS +# Full metadata, tier-consistent graph (hollow→hollow, full→full). +# Non-deterministic proc macros are safe because each tier is self-consistent. +# +# Phase 4: worker pipelining + sandboxed execution → MUST FAIL (E0460 or E0463) +# Fast metadata, two separate rustc processes, cross-tier dependency +# (full action → upstream .rmeta). Non-deterministic proc macros produce +# different SVH values → SVH mismatch detected by process_wrapper. +# +# Expected errors in Phase 4: the SVH consistency check in process_wrapper +# catches the mismatch and fails with a diagnostic. Downstream consumers would +# see E0460 (crate found with incompatible SVH) or E0463 (can't find crate). +# +# Tagged manual + local because it invokes Bazel (Bazel-in-Bazel). +set -euo pipefail + +if [[ -z "${BUILD_WORKSPACE_DIRECTORY:-}" ]]; then + >&2 echo "This script should be run under Bazel (bazel test)" + exit 1 +fi + +cd "${BUILD_WORKSPACE_DIRECTORY}" + +TARGET="//test/unit/pipelined_compilation:svh_mismatch_test" +ITERATIONS="${WORKER_PIPELINING_TEST_ITERATIONS:-5}" + +echo "=== Pipelining Regression Test: Non-Deterministic Proc Macros ===" +echo "Target: ${TARGET}" +echo "Iterations: ${ITERATIONS}" +echo "" + +COMMON_FLAGS=( + --disk_cache="" + --noremote_accept_cached + --noremote_upload_local_results +) + +# --------------------------------------------------------------------------- +# Phase 1: Worker-pipelined builds (fast metadata, must always succeed) +# +# Worker pipelining uses a single rustc invocation per crate. The metadata +# action spawns rustc, returns as soon as .rmeta is ready, and the full +# action waits for the same rustc to finish. Since the proc macro only runs +# once, SVH is always consistent. +# +# Uses --strategy=Rustc=worker,local: library crates use worker (pipelined), +# binary/test targets fall back to local (they don't support workers). +# --------------------------------------------------------------------------- +echo "--- Phase 1: Worker pipelining + worker execution (fast metadata, single rustc) ---" +WORKER_PASS=0 +WORKER_FAIL=0 + +for i in $(seq 1 "$ITERATIONS"); do + echo -n " worker-pipelined build ${i}/${ITERATIONS}... " + if bazel build "${TARGET}" \ + --@rules_rust//rust/settings:experimental_pipelined_compilation=worker \ + --strategy=Rustc=worker,local \ + "${COMMON_FLAGS[@]}" \ + 2>/dev/null; then + echo "OK" + WORKER_PASS=$((WORKER_PASS + 1)) + else + echo "FAIL" + WORKER_FAIL=$((WORKER_FAIL + 1)) + fi +done + +echo " Results: ${WORKER_PASS}/${ITERATIONS} pass" +echo "" + +# --------------------------------------------------------------------------- +# Phase 2: Non-pipelined builds (must always succeed — baseline) +# +# Without pipelining, each crate is compiled exactly once, so SVH is +# trivially consistent. This phase establishes the baseline. +# --------------------------------------------------------------------------- +echo "--- Phase 2: No pipelining (baseline, single rustc per crate) ---" +STANDALONE_PASS=0 +STANDALONE_FAIL=0 + +for i in $(seq 1 "$ITERATIONS"); do + echo -n " standalone build ${i}/${ITERATIONS}... " + if bazel build "${TARGET}" \ + --@rules_rust//rust/settings:experimental_pipelined_compilation=off \ + --strategy=Rustc=local \ + "${COMMON_FLAGS[@]}" \ + 2>/dev/null; then + echo "OK" + STANDALONE_PASS=$((STANDALONE_PASS + 1)) + else + echo "FAIL (unexpected!)" + STANDALONE_FAIL=$((STANDALONE_FAIL + 1)) + fi +done + +echo " Results: ${STANDALONE_PASS}/${ITERATIONS} pass" +echo "" + +# --------------------------------------------------------------------------- +# Phase 3: Hollow-rlib pipelining (full metadata, must always succeed) +# +# hollow_rlib uses full metadata (hollow .rlib produced with -Zno-codegen). +# The dependency graph is tier-consistent: the hollow action depends on +# upstream hollow rlibs, the full action depends on upstream full rlibs. +# Each tier has self-consistent SVH values, so non-deterministic proc macros +# do NOT cause SVH mismatch. This is the Buck2-style portable pipelining. +# --------------------------------------------------------------------------- +echo "--- Phase 3: Hollow-rlib pipelining (full metadata, tier-consistent graph) ---" +HOLLOW_PASS=0 +HOLLOW_FAIL=0 + +for i in $(seq 1 "$ITERATIONS"); do + echo -n " hollow-rlib build ${i}/${ITERATIONS}... " + if bazel build "${TARGET}" \ + --@rules_rust//rust/settings:experimental_pipelined_compilation=hollow_rlib \ + --strategy=Rustc=local \ + "${COMMON_FLAGS[@]}" \ + 2>/dev/null; then + echo "OK" + HOLLOW_PASS=$((HOLLOW_PASS + 1)) + else + echo "FAIL (unexpected!)" + HOLLOW_FAIL=$((HOLLOW_FAIL + 1)) + fi +done + +echo " Results: ${HOLLOW_PASS}/${ITERATIONS} pass" +echo "" + +# --------------------------------------------------------------------------- +# Phase 4: Worker pipelining + sandboxed execution (must fail — SVH mismatch) +# +# This is the actual failure boundary. Worker pipelining uses fast metadata +# (.rmeta) but sandboxed execution forces two separate rustc processes per +# crate. The full action depends on upstream .rmeta (a cross-tier dependency), +# so non-deterministic proc macros produce different SVH values in the two +# processes. process_wrapper detects this via byte-comparing the .rmeta files +# and fails with a diagnostic mentioning E0460/E0463. +# +# We expect most iterations to fail. A rare pass (~0.8%) is possible when +# HashMap iteration order happens to match across both rustc invocations. +# --------------------------------------------------------------------------- +echo "--- Phase 4: Worker pipelining + sandboxed execution (expected: SVH mismatch failure) ---" +SANDBOXED_PASS=0 +SANDBOXED_FAIL=0 +SAW_SVH_MISMATCH=0 +SAW_E0460=0 +SAW_E0463=0 + +for i in $(seq 1 "$ITERATIONS"); do + echo -n " sandboxed build ${i}/${ITERATIONS}... " + BUILD_OUTPUT=$(bazel build "${TARGET}" \ + --@rules_rust//rust/settings:experimental_pipelined_compilation=worker \ + --strategy=Rustc=sandboxed \ + "${COMMON_FLAGS[@]}" \ + 2>&1) && { + echo "PASS (lucky run — HashMap iteration matched)" + SANDBOXED_PASS=$((SANDBOXED_PASS + 1)) + } || { + echo "FAIL (expected)" + SANDBOXED_FAIL=$((SANDBOXED_FAIL + 1)) + # Check for expected error signatures + if echo "$BUILD_OUTPUT" | grep -q "SVH mismatch"; then + SAW_SVH_MISMATCH=$((SAW_SVH_MISMATCH + 1)) + fi + if echo "$BUILD_OUTPUT" | grep -q "E0460"; then + SAW_E0460=$((SAW_E0460 + 1)) + fi + if echo "$BUILD_OUTPUT" | grep -q "E0463"; then + SAW_E0463=$((SAW_E0463 + 1)) + fi + } +done + +echo " Results: ${SANDBOXED_FAIL}/${ITERATIONS} fail (expected), ${SANDBOXED_PASS}/${ITERATIONS} pass (lucky)" +if [[ ${SAW_SVH_MISMATCH} -gt 0 ]]; then + echo " SVH mismatch diagnostic seen: ${SAW_SVH_MISMATCH} time(s)" +fi +if [[ ${SAW_E0460} -gt 0 ]]; then + echo " E0460 (incompatible SVH) seen: ${SAW_E0460} time(s)" +fi +if [[ ${SAW_E0463} -gt 0 ]]; then + echo " E0463 (can't find crate) seen: ${SAW_E0463} time(s)" +fi +echo "" + +# --------------------------------------------------------------------------- +# Verdict +# --------------------------------------------------------------------------- +echo "=== Summary ===" +echo " Phase 1 (worker + worker exec): ${WORKER_PASS}/${ITERATIONS} pass" +echo " Phase 2 (no pipelining): ${STANDALONE_PASS}/${ITERATIONS} pass" +echo " Phase 3 (hollow_rlib): ${HOLLOW_PASS}/${ITERATIONS} pass" +echo " Phase 4 (worker + sandboxed exec): ${SANDBOXED_FAIL}/${ITERATIONS} fail (expected)" +echo "" + +EXIT=0 + +if [[ ${WORKER_FAIL} -gt 0 ]]; then + echo "FAIL: Phase 1 — Worker-pipelined build failed ${WORKER_FAIL} time(s)." + echo " Worker pipelining should never produce SVH mismatch because each crate" + echo " is compiled by a single rustc invocation (fast metadata, Cargo-style)." + EXIT=1 +fi + +if [[ ${STANDALONE_FAIL} -gt 0 ]]; then + echo "FAIL: Phase 2 — Standalone build failed ${STANDALONE_FAIL} time(s) (unexpected)." + EXIT=1 +fi + +if [[ ${HOLLOW_FAIL} -gt 0 ]]; then + echo "FAIL: Phase 3 — Hollow-rlib build failed ${HOLLOW_FAIL} time(s) (unexpected)." + echo " hollow_rlib uses full metadata with a tier-consistent graph and should" + echo " never produce SVH mismatch regardless of proc macro determinism." + EXIT=1 +fi + +# Phase 4: we expect failures. If ALL iterations passed, the non-deterministic +# proc macro may not be non-deterministic enough, or something changed. +if [[ ${SANDBOXED_FAIL} -eq 0 ]]; then + echo "WARNING: Phase 4 — All sandboxed builds passed. Expected at least one" + echo " SVH mismatch failure with non-deterministic proc macro. The proc macro" + echo " may not be non-deterministic enough, or the SVH check may be bypassed." + # This is a warning, not a hard failure, because it's statistically possible. +fi + +if [[ ${EXIT} -eq 0 ]]; then + echo "PASS: All pipelining modes behave as expected with non-deterministic proc macros." +fi +exit ${EXIT} diff --git a/test/unit/proc_macro/leaks_deps/proc_macro_does_not_leak_deps.bzl b/test/unit/proc_macro/leaks_deps/proc_macro_does_not_leak_deps.bzl index f246e1ddc0..6846e61ee2 100644 --- a/test/unit/proc_macro/leaks_deps/proc_macro_does_not_leak_deps.bzl +++ b/test/unit/proc_macro/leaks_deps/proc_macro_does_not_leak_deps.bzl @@ -143,7 +143,7 @@ def _proc_macro_does_not_leak_lib_deps_test(): proc_macro_does_not_leak_lib_deps_test = analysistest.make( _proc_macro_does_not_leak_lib_deps_impl, config_settings = { - str(Label("//rust/settings:pipelined_compilation")): True, + str(Label("//rust/settings:experimental_pipelined_compilation")): "hollow_rlib", }, ) diff --git a/util/process_wrapper/BUILD.bazel b/util/process_wrapper/BUILD.bazel index 08dd7ecadf..fdea5d085b 100644 --- a/util/process_wrapper/BUILD.bazel +++ b/util/process_wrapper/BUILD.bazel @@ -28,12 +28,15 @@ selects.config_setting_group( rust_binary_without_process_wrapper( name = "process_wrapper", - srcs = glob(["*.rs"]), + srcs = glob([ + "*.rs", + "test/*.rs", + ]), allocator_libraries = select({ "@rules_rust//rust/settings:experimental_use_allocator_libraries_with_mangled_symbols_on": "@rules_rust//ffi/rs:allocator_libraries_with_mangling_support_without_process_wrapper", "//conditions:default": "@rules_rust//ffi/rs:empty_allocator_libraries", }), - edition = "2018", + edition = "2024", # To ensure the process wrapper is produced deterministically # debug info, which is known to sometimes have host specific # paths embedded in this section, is stripped out. @@ -51,7 +54,26 @@ rust_binary_without_process_wrapper( rust_test_without_process_wrapper_test( name = "process_wrapper_test", crate = ":process_wrapper", - edition = "2018", + data = [ + "//rust/toolchain:current_rust_stdlib_files", + "//rust/toolchain:current_rustc_files", + ], + edition = "2024", + env = { + # Cache-seeding tests (test_seed_cache_root_for_current_dir, + # test_seed_cache_root_from_execroot_ancestor) change process CWD + # which races with other tests if run in parallel. + "RUST_TEST_THREADS": "1", + }, + proc_macro_deps = [ + "//rust/runfiles", + ], + rustc_env = { + "RUSTC_RLOCATIONPATH": "$(rlocationpath //rust/toolchain:current_rustc_files)", + }, + deps = [ + "//rust/runfiles", + ], ) alias( diff --git a/util/process_wrapper/DESIGN.md b/util/process_wrapper/DESIGN.md new file mode 100644 index 0000000000..6db4323686 --- /dev/null +++ b/util/process_wrapper/DESIGN.md @@ -0,0 +1,454 @@ +# Process Wrapper Worker Design + +## Overview + +`process_wrapper` has two execution modes: + +- Standalone mode executes one subprocess and forwards output. +- Persistent-worker mode speaks Bazel's JSON worker protocol and can keep + pipelined Rust compilations alive across two worker requests. + +The worker entrypoint is `worker::worker_main()`. It: + +- reads one JSON `WorkRequest` per line from stdin +- classifies the request as non-pipelined, metadata, or full +- registers the request in `RequestCoordinator` before it becomes cancelable +- dispatches multiplex requests onto background threads via `RequestExecutor` +- serializes `WorkResponse` writes to stdout + +## Request Kinds + +Rust pipelining uses two request kinds keyed by `--pipelining-key=`: + +- Metadata request: starts rustc, waits until `.rmeta` is emitted, returns + success early, and leaves the child running in the background. +- Full request: either takes ownership of the background rustc and waits for + completion, or claims the key for a one-shot fallback compile. + +Request classification must use the same rules in the main thread and the worker +thread. Relative `@paramfile` paths are resolved against the request's effective +execroot: + +- `sandboxDir` when Bazel multiplex sandboxing is active +- the worker's current directory otherwise + +This avoids the earlier split where pre-registration and execution could +disagree about whether a request was pipelined. + +## Metadata Classes + +Rust pipelining depends on an intermediate metadata artifact that lets downstream +crates start compiling before upstream codegen finishes. There are two distinct +classes of metadata artifact, with different portability and safety properties: + +**Fast metadata** (`.rmeta`, `--emit=metadata`): +- Produced quickly by rustc as a compilation milestone +- Good for early checking and for pipelining within a single rustc process +- Not portable as the metadata input for a separate full-codegen invocation — + if a non-deterministic proc macro produces different SVH values across two + rustc processes, downstream consumers see E0463 or E0460 +- Used by Cargo (single rustc per crate) and by worker pipelining in rules_rust + +**Full metadata** (hollow `.rlib`, `--emit=link` with `-Zno-codegen`): +- A hollow rlib containing metadata but no object code +- Portable as input to downstream Rust codegen in a two-invocation graph +- The dependency graph is tier-consistent: hollow actions depend on upstream + hollow rlibs, full actions depend on upstream full rlibs — so non-deterministic + proc macros do not cause SVH mismatch +- Used by Buck2 (`metadata-full`) and by hollow-rlib pipelining in rules_rust + +This distinction maps to pipelining modes: + +| Mode | Metadata class | Artifact | Portable across strategies | +|------|---------------|----------|---------------------------| +| Worker pipelining | Fast metadata | `.rmeta` | No — requires same rustc process | +| Hollow-rlib pipelining | Full metadata | hollow `.rlib` | Yes — safe with any execution strategy | + +Worker pipelining is Cargo-like: one rustc per crate, metadata and final artifact +from the same process, safety from process identity. Hollow-rlib pipelining is +Buck2-like: two rustc invocations per crate, a tier-consistent graph, safety from +graph structure. + +For builds that may use sandboxed, remote, or dynamic execution — or any +configuration where the metadata and full actions might run as separate +processes — **hollow_rlib is the recommended portable mode**. + +## Request Coordination and Invocation Lifecycle + +`RequestCoordinator` (in `worker.rs`) tracks two data structures: + +- `invocations`: pipeline key → `Arc` +- `requests`: request id → optional pipeline key (presence means active; removal + is the atomic claim — whoever removes the entry owns the right to send the + `WorkResponse`) + +Each `RustcInvocation` (in `worker_invocation.rs`) is a shared condvar-based +state machine with these states: + +- `Pending`: invocation created but rustc not yet started +- `Running`: rustc child is alive, being driven by a background thread +- `MetadataReady`: `.rmeta` has been emitted; metadata handler can be unblocked +- `Completed`: rustc exited successfully; full handler can be unblocked +- `Failed`: rustc exited with non-zero code +- `ShuttingDown`: shutdown was requested; all waiters receive an error + +The metadata handler spawns rustc, creates a `RustcInvocation` via +`spawn_pipelined_rustc`, and inserts it into the coordinator. The full handler +retrieves that shared invocation and calls `wait_for_completion`. If no +invocation exists yet, the full handler falls back to a standalone subprocess. + +The critical invariant is that invocation insertion and retrieval happen under +the coordinator's mutex. The coordinator also arbitrates cancel/completion +races via the remove-on-claim pattern, ensuring only one response is sent per +request. + +## Retry and Cancellation + +Metadata retries use per-request output directories under: + +`_pw_state/pipeline//outputs-/` + +This avoids deleting a shared `outputs/` directory before ownership of the key +has changed. + +Cancellation is best-effort: + +- non-pipelined requests only suppress duplicate responses via the remove-on-claim + pattern on the `requests` map +- pipelined requests call `RustcInvocation::request_shutdown()`, which + transitions to `ShuttingDown` and sends SIGTERM to the child process + +The `requests` map serves as both the response-level guard and the lookup table. +Removal from the map is the atomic claim that prevents duplicate responses; +the optional pipeline key lets cancellation find the associated invocation. + +## Sandbox Contract + +When Bazel provides `sandboxDir`, the worker runs rustc with that directory as +its current working directory. Relative reads then stay rooted inside the +sandbox. Outputs that must survive across the metadata/full split are redirected +into `_pw_state/pipeline//...` and copied back into the sandbox before the +worker responds. + +The worker also makes prior outputs writable before each request because Bazel +and the disk cache can leave action outputs read-only. + +This satisfies the straightforward part of the multiplex-sandbox contract: +request-time reads and declared output writes stay rooted under `sandboxDir`. +The harder part is response lifetime: the metadata response returns before the +background rustc has finished codegen. The current safety argument is that rustc +has already consumed its inputs by `.rmeta` emission and that later codegen +writes go only into worker-owned `_pw_state`, but that depends on rustc +implementation details rather than on a Bazel-guaranteed contract. For that +reason, sandboxed worker pipelining should still be treated as +contract-sensitive, and the hollow-rlib path remains the compatibility fallback. + +## Standalone Full-Action Behavior + +Outside worker mode, a `--pipelining-full` action may be redundant. If the +metadata action already produced the final `.rlib` as a side effect and that +file still exists (unsandboxed local execution), standalone mode skips the +second rustc invocation and only performs the normal post-success actions +(`touch_file`, `copy_output`). + +If the `.rlib` is missing — which happens under sandboxed, local, or remote +execution because the metadata action's separate rustc process does not produce +the undeclared `.rlib` side effect — the process wrapper warns and falls through +to run a second rustc. After rustc succeeds, it performs an SVH consistency +check: the full action injects `--emit=metadata=` to produce a standalone +`.rmeta`, then byte-compares it with the metadata action's `.rmeta` (passed via +`--pipelining-rmeta-path`). If they match, the crate's proc macros are +deterministic and the build proceeds. If they differ, a non-deterministic proc +macro produced different SVH values across the two rustc invocations, and the +build fails immediately with a diagnostic listing fix options (use worker +strategy, switch to hollow_rlib, or fix the proc macro). + +This check catches the SVH mismatch at the source crate rather than producing +a cryptic E0463 in a downstream consumer. Under dynamic execution, the remote +leg fails fast, the local worker leg wins the race, and the build succeeds. + +## Execution Strategy Compatibility + +Three pipelining modes interact with Bazel's execution strategies. The matrix +below shows which combinations are supported. + +### Execution requirements by mode + +| Mode | `requires-worker-protocol` | `supports-multiplex-workers` | `supports-multiplex-sandboxing` | +|------|---|---|---| +| No pipelining | — | — | — | +| Hollow-rlib | — | — | — | +| Worker pipelining | `json` | `1` | `1` | + +Hollow-rlib and no-pipelining actions are plain subprocesses with no worker +execution requirements (unless incremental compilation is separately enabled). +Worker-pipelining actions declare multiplex worker support and multiplex +sandboxing support. + +### Compatibility matrix + +``` + local sandboxed worker worker+mx-sandbox dynamic remote +No pipelining ✓ ✓ n/a n/a ✓ ✓ +Hollow-rlib ✓ ✓ n/a n/a ✓ ✓ +Worker pipeline ✓* ✓* ✓ ✓ ✓¹ ✓* +``` + +\* **Deterministic proc macros only.** The full action runs a separate rustc + process and checks SVH consistency afterward. If a non-deterministic proc + macro produces different SVH values, the build fails immediately with a + diagnostic (rather than a cryptic E0463 in a downstream consumer). Use + worker strategy or switch to hollow_rlib for non-deterministic proc macros. + +1. **dynamic + worker pipeline:** Bazel forces `mustSandbox=true` for dynamic + execution. Because the action declares `supports-multiplex-sandboxing: 1`, + the local leg runs as a **multiplex sandboxed worker** — the worker process + is shared across requests but each request gets a `sandboxDir`. The remote + leg runs process_wrapper as a one-shot standalone process (pipelining flags + stripped). If the remote leg wins the race for a full action, it runs a + second rustc with SVH checking — non-deterministic proc macros fail fast + and the local worker leg wins the race. + +### Why hollow-rlib shows n/a for worker + +`_build_worker_exec_reqs()` with `use_worker_pipelining=False` (and no +incremental) returns an empty dict — no `supports-workers` or +`supports-multiplex-workers`. Bazel will not route these actions to a worker +process. + +### Recommended configurations + +| Use case | Settings | Metadata class | +|---|---|---| +| Portable builds (sandboxed, remote, dynamic, mixed) | `experimental_pipelined_compilation=hollow_rlib` | Full metadata | +| Maximum parallelism (local worker builds) | `experimental_pipelined_compilation=worker`, `--strategy=Rustc=worker` | Fast metadata | +| Dynamic execution | `experimental_pipelined_compilation=worker`, `--strategy=Rustc=dynamic`, `--experimental_worker_multiplex_sandboxing` | Fast metadata (local), standalone fallback (remote) | + +**hollow_rlib is the safe default for any build that may run outside a persistent +worker.** It uses full metadata (tier-consistent graph) and is compatible with all +execution strategies. Worker pipelining uses fast metadata and achieves higher +parallelism but requires worker execution to guarantee single-process safety. + +## Determinism Contract + +Bazel persistent workers are expected to produce the same outputs as standalone +execution. For Rust pipelining this becomes a hard requirement under dynamic +execution: a local worker leg and a remote standalone leg may race, so the +resulting `.rlib` and `.rmeta` artifacts must be byte-for-byte identical. + +> "The invariant, however, is that strategies do _not_ affect the semantics of +> the execution: that is, running the same command line on strategy A and +> strategy B must yield the same output files." +> — Julio Merino, [What are Bazel's strategies?](https://jmmv.dev/2019/12/bazel-strategies.html) + +There are two relevant worker paths: + +- Non-pipelined requests re-exec `process_wrapper` via `run_request()`, so they + share the standalone path by construction. +- Pipelined requests diverge: `RequestExecutor::execute_metadata()` spawns + rustc directly, rewrites output locations into `_pw_state`, and + `RequestExecutor::execute_full()` later joins that background compile and + materializes artifacts. + +That second path is where determinism matters most. The same rustc flags used by +the worker must be preserved in standalone comparisons, including +`--error-format=json` and `--json=artifacts`, because those flags affect the +metadata rustc emits and therefore the crate hash embedded in downstream-facing +artifacts. + +### Strategy-Equivalence Unification + +Both pipelining modes (hollow-rlib and worker) must produce equivalent +rustc-visible behavior so that switching between them — or switching execution +strategies within worker-pipelining mode — does not change the output. + +The following properties are unified across modes: + +- **`RUSTC_BOOTSTRAP=1`**: set on **every** Rustc action (rlibs, binaries, + tests, proc-macros) when any pipelining mode is active. + `RUSTC_BOOTSTRAP` changes the crate SVH; a binary compiled + without it cannot load rlibs compiled with it (E0463). +- **`--cfg=rules_rust_pipelined`**: set on every Rustc action when pipelining + is active. Distinguishes pipelining-enabled from pipelining-disabled builds + in the action cache so cached artifacts are not reused across modes. The two + pipelining modes (hollow-rlib and worker) already differ in `--emit` flags + and declared outputs, so their cache keys are naturally distinct from each + other. +- **Mnemonic `"Rustc"`**: all pipelining metadata actions use mnemonic `"Rustc"` + (not `"RustcMetadata"`). This ensures Bazel treats all pipelining rustc + actions equivalently for strategy selection. + +Irreducible differences that remain (format-driven, not behavioral): + +- `--emit` shape: `--emit=link=` vs `--emit=metadata=,link` +- `-Zno-codegen`: only on hollow-rlib metadata action +- `--pipelining-*` protocol flags: only on worker-pipelining actions (stripped + before rustc sees them) +- Env delivery: worker-pipelining uses `.worker_env` files for worker-key + sharing; hollow-rlib uses direct action env. Both deliver the same vars to + the rustc child process. + +The design principle enforced here: + +1. Outside worker mode, a worker-pipelining action should emulate the worker + result with one combined rustc invocation that matches the worker + rustc-visible behavior as closely as possible. +2. If the .rlib side-effect is not available, warn and fall through to a second + rustc. Users with non-deterministic proc macros should use hollow-rlib mode, + whose tier-consistent graph (hollow→hollow, full→full) avoids SVH mismatch. + +## Determinism Test Strategy + +`process_wrapper_test` uses the real toolchain rustc from Bazel runfiles +(`RUSTC_RLOCATIONPATH`) together with `current_rust_stdlib_files`, so the test +compares the worker against the production compiler instead of a fake binary. + +The test harness relies on a few implementation hooks: + +- `run_standalone(&Options)` factors the standalone execution path out of + `main()` so tests can invoke it without exiting the process. +- Worker submodules (`pipeline`, `args`, `exec`, `sandbox`, `invocation`, + `rustc_driver`, `protocol`, `types`, `logging`, `request`) are `pub(crate)` + so unit tests can drive the pipelined handlers directly. +- `RUST_TEST_THREADS=1` is set for `process_wrapper_test` because cache-seeding + tests temporarily change the process current working directory. + +**TODO:** A byte-for-byte determinism regression test (`test_pipelined_matches_standalone`) +is planned but not yet implemented. The intended approach: + +1. compile a trivial crate twice with standalone rustc to prove the baseline is + itself deterministic for the chosen flags +2. run the same crate through `execute_metadata()` and `execute_full()` +3. compare both `.rlib` and `.rmeta` bytes between standalone and worker + +The `.rmeta` comparison is as important as the `.rlib` comparison because +downstream crates compile against metadata first; a metadata mismatch can expose +different SVH or type information even if the final archive happens to link. + +## Regression Test Coverage + +`worker_pipelining_nondeterministic_test.sh` exercises the actual failure +boundary around non-deterministic proc macros across all pipelining modes: + +| Phase | Mode | Execution | Metadata class | Expected | +|-------|------|-----------|---------------|----------| +| 1 | Worker pipelining | Worker | Fast metadata | PASS (single rustc) | +| 2 | No pipelining | Local | — | PASS (baseline) | +| 3 | Hollow-rlib | Local | Full metadata | PASS (tier-consistent) | +| 4 | Worker pipelining | Sandboxed | Fast metadata | FAIL (SVH mismatch) | + +Phase 4 verifies that the process_wrapper SVH consistency check catches the +mismatch and produces a clear diagnostic. The practical error symptoms include: + +- `E0460`: crate found with incompatible SVH (downstream consumer gets the + wrong version hash from the metadata action's `.rmeta` vs the full `.rlib`) +- `E0463`: can't find crate (rustc cannot match the SVH at all and treats the + crate as missing) + +Both errors are valid manifestations of the same root cause: the fast metadata +`.rmeta` from one rustc invocation has a different SVH than the full `.rlib` +from a separate invocation when a non-deterministic proc macro is involved. + +## Artifact Hash Instrumentation + +`artifact_hash_check.sh` in `test/unit/pipelined_compilation/` provides +manual instrumentation for investigating SVH consistency. It computes +SHA-256 hashes for three artifact types: + +1. **Declared metadata artifact** — the hollow `.rlib` (hollow_rlib mode) or + `.rmeta` (worker mode) that downstream metadata actions consume +2. **Full `.rlib`** — the final archive that downstream full actions consume +3. **Embedded `lib.rmeta`** — the metadata section extracted from the full + `.rlib` via `ar x` + +This instrumentation is useful for: + +- Validating that a rustc version change has not broken SVH compatibility +- Comparing artifacts across pipelining modes or execution strategies +- Investigating whether a specific proc macro is deterministic +- Future rustc experiments (e.g., testing a hypothetical stable `-Zno-codegen` + replacement or a first-class "full `.rmeta`" output mode) + +The script is tagged `manual` and is not part of the automated test suite. + +## Module Structure + +The worker code is organized into single-responsibility modules: + +| Module | File | Responsibility | +|--------|------|---------------| +| `types` | `worker_types.rs` | Domain newtypes: `PipelineKey`, `RequestId`, `SandboxDir`, `OutputDir` | +| `protocol` | `worker_protocol.rs` | Bazel JSON wire protocol: parse `WorkRequest`, build `WorkResponse` | +| `args` | `worker_args.rs` | Arg parsing, expansion, rewriting, env building | +| `pipeline` | `worker_pipeline.rs` | Pipeline directory lifecycle, output materialization, `PipelineContext` | +| `exec` | `worker_exec.rs` | Subprocess spawning, file utilities, permissions, process kill helpers | +| `sandbox` | `worker_sandbox.rs` | Sandbox-specific: cache seeding, sandboxed copies, sandboxed execution | +| `invocation` | `worker_invocation.rs` | `RustcInvocation` state machine (condvar-based concurrent lifecycle) | +| `rustc_driver` | `worker_rustc.rs` | Rustc child process management: `spawn_pipelined_rustc`, `spawn_non_pipelined_rustc` | +| `request` | `worker_request.rs` | `RequestExecutor`, `RequestKind`: dispatch to metadata/full/fallback/non-pipelined paths | +| `logging` | `worker_logging.rs` | Structured lifecycle logging, `WorkerLifecycleGuard` | + +Current coverage splits across layers: + +- no pipelining: covered by unit tests exercising standalone options and rustc + invocation +- hollow-rlib pipelining: covered by analysis tests that verify consistent flag + selection +- worker pipelining: covered by unit tests for protocol, args, sandbox, and + invocation state machine; end-to-end coverage via reactor-repo builds + +## Historical Notes + +The following conclusions came from the older `thoughts/` design notes and are +worth keeping even though the plan file itself is gone: + +- Stable worker keys were a prerequisite, not a detail. Metadata and full + requests only share one worker process and one in-process pipeline state if + request-specific process-wrapper flags are moved out of startup args and into + per-request files. +- The staged-execroot and stage-pool family was explored and rejected. Measured + reuse stayed too low to justify the extra machinery; the meaningful win came + from early `.rmeta` availability, not from worker-side restaging. +- Cross-process shared stage pools were rejected for the same reason: they add + leasing and invalidation complexity without addressing the main bottleneck. +- "Resolve through the real execroot" is not the current sandbox design. It did + reduce worker-side staging cost, but it violates the documented `sandboxDir` + contract and should not be treated as the supported direction. +- The alias-root strict-sandbox idea was explored but not landed. It had useful + investigative value, especially around post-`.rmeta` rustc behavior, but it + would require a larger rewrite and stronger validation than the current + branch justified. +- Broad metadata-input pruning was investigated and rejected after real + `E0463` missing-crate regressions. Any future pruning has to be trace-driven + and validated against full dependency graphs. +- Teardown and shutdown behavior deserves explicit skepticism. Earlier + investigations saw multiplex-worker cleanup trouble around `bazel clean`, so + worker shutdown and cancellation behavior should continue to be validated as a + first-class part of the design. + +To avoid stale guidance, the following should be treated as explicitly not +current on this branch: + +- staged execroot reuse as the active architecture +- cross-process stage pools as the preferred next step +- resolve-through reads outside `sandboxDir` as the supported sandbox story +- alias-root (`__rr`) as an implemented or imminent design + +## Open Questions + +The implementation is substantially more complete than the old plan, but a few +design questions remain open: + +- If strict post-response sandbox compliance is required, should sandboxed and + dynamic modes fall back to the hollow-rlib two-invocation path, or should a + different strict-sandbox design replace the current one-rustc handoff? +- How much teardown and cancellation validation is enough to treat the + background-rustc lifetime as operationally solid under `bazel clean`, + cancellation races, and dynamic execution? +- Diagnostics processing now runs on the monitor thread rather than the request + thread. Verify the output format still satisfies Bazel consumers. +- Windows `#[cfg(windows)]` paths in `execute_metadata` are preserved but + untested under the new invocation architecture. +- Small timing window: `.rmeta` exists in the pipeline output directory before + it is copied to the declared output location. Verify Bazel's output checker + does not race with this copy. diff --git a/util/process_wrapper/flags.rs b/util/process_wrapper/flags.rs index 8ea9557d43..65fc2c1b4c 100644 --- a/util/process_wrapper/flags.rs +++ b/util/process_wrapper/flags.rs @@ -16,7 +16,6 @@ use std::collections::{BTreeMap, HashSet}; use std::error::Error; use std::fmt; use std::fmt::Write; -use std::iter::Peekable; use std::mem::take; #[derive(Debug, Clone)] @@ -30,9 +29,9 @@ pub(crate) enum FlagParseError { impl fmt::Display for FlagParseError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - Self::UnknownFlag(ref flag) => write!(f, "unknown flag \"{flag}\""), - Self::ValueMissing(ref flag) => write!(f, "flag \"{flag}\" missing parameter(s)"), - Self::ProvidedMultipleTimes(ref flag) => { + Self::UnknownFlag(flag) => write!(f, "unknown flag \"{flag}\""), + Self::ValueMissing(flag) => write!(f, "flag \"{flag}\" missing parameter(s)"), + Self::ProvidedMultipleTimes(flag) => { write!(f, "flag \"{flag}\" can only appear once") } Self::ProgramNameMissing => { @@ -124,41 +123,39 @@ impl<'a> Flags<'a> { ); } - fn help(&self, program_name: String) -> String { - let single = self.single.values().map(|fd| fd.to_string()); - let repeated = self.repeated.values().map(|fd| fd.to_string()); - let mut all: Vec = single.chain(repeated).collect(); - all.sort(); - - let mut help_text = String::new(); - writeln!( - &mut help_text, - "Help for {program_name}: [options] -- [extra arguments]" - ) - .unwrap(); - for line in all { - writeln!(&mut help_text, "\t{line}").unwrap(); - } - help_text - } - pub(crate) fn parse(mut self, argv: Vec) -> Result { let mut argv_iter = argv.into_iter().peekable(); let program_name = argv_iter.next().ok_or(FlagParseError::ProgramNameMissing)?; - // To check if a non-repeated flag has been set already. + // Track single-use flags. let mut seen_single_flags = HashSet::::new(); while let Some(flag) = argv_iter.next() { if flag == "--help" { - return Ok(ParseOutcome::Help(self.help(program_name))); + let single = self.single.values().map(|fd| fd.to_string()); + let repeated = self.repeated.values().map(|fd| fd.to_string()); + let mut all: Vec = single.chain(repeated).collect(); + all.sort(); + let mut help_text = String::new(); + writeln!( + &mut help_text, + "Help for {program_name}: [options] -- [extra arguments]" + ) + .unwrap(); + for line in &all { + writeln!(&mut help_text, "\t{line}").unwrap(); + } + return Ok(ParseOutcome::Help(help_text)); } if !flag.starts_with("--") { return Err(FlagParseError::UnknownFlag(flag)); } - let mut args = consume_args(&flag, &mut argv_iter); if flag == "--" { - return Ok(ParseOutcome::Parsed(args)); + return Ok(ParseOutcome::Parsed(argv_iter.collect())); + } + let mut args = vec![]; + while let Some(arg) = argv_iter.next_if(|s| !s.starts_with("--")) { + args.push(arg); } if args.is_empty() { return Err(FlagParseError::ValueMissing(flag.clone())); @@ -185,91 +182,6 @@ impl<'a> Flags<'a> { } } -fn consume_args>( - flag: &str, - argv_iter: &mut Peekable, -) -> Vec { - if flag == "--" { - // If we have found --, the rest of the iterator is just returned as-is. - argv_iter.collect() - } else { - let mut args = vec![]; - while let Some(arg) = argv_iter.next_if(|s| !s.starts_with("--")) { - args.push(arg); - } - args - } -} - #[cfg(test)] -mod test { - use super::*; - - fn args(args: &[&str]) -> Vec { - ["foo"].iter().chain(args).map(|&s| s.to_owned()).collect() - } - - #[test] - fn test_flag_help() { - let mut bar = None; - let mut parser = Flags::new(); - parser.define_flag("--bar", "bar help", &mut bar); - let result = parser.parse(args(&["--help"])).unwrap(); - if let ParseOutcome::Help(h) = result { - assert!(h.contains("Help for foo")); - assert!(h.contains("--bar\tbar help")); - } else { - panic!("expected that --help would invoke help, instead parsed arguments") - } - } - - #[test] - fn test_flag_single_repeated() { - let mut bar = None; - let mut parser = Flags::new(); - parser.define_flag("--bar", "bar help", &mut bar); - let result = parser.parse(args(&["--bar", "aa", "bb"])); - if let Err(FlagParseError::ProvidedMultipleTimes(f)) = result { - assert_eq!(f, "--bar"); - } else { - panic!("expected error, got {:?}", result) - } - let mut parser = Flags::new(); - parser.define_flag("--bar", "bar help", &mut bar); - let result = parser.parse(args(&["--bar", "aa", "--bar", "bb"])); - if let Err(FlagParseError::ProvidedMultipleTimes(f)) = result { - assert_eq!(f, "--bar"); - } else { - panic!("expected error, got {:?}", result) - } - } - - #[test] - fn test_repeated_flags() { - // Test case 1) --bar something something_else should work as a repeated flag. - let mut bar = None; - let mut parser = Flags::new(); - parser.define_repeated_flag("--bar", "bar help", &mut bar); - let result = parser.parse(args(&["--bar", "aa", "bb"])).unwrap(); - assert!(matches!(result, ParseOutcome::Parsed(_))); - assert_eq!(bar, Some(vec!["aa".to_owned(), "bb".to_owned()])); - // Test case 2) --bar something --bar something_else should also work as a repeated flag. - bar = None; - let mut parser = Flags::new(); - parser.define_repeated_flag("--bar", "bar help", &mut bar); - let result = parser.parse(args(&["--bar", "aa", "--bar", "bb"])).unwrap(); - assert!(matches!(result, ParseOutcome::Parsed(_))); - assert_eq!(bar, Some(vec!["aa".to_owned(), "bb".to_owned()])); - } - - #[test] - fn test_extra_args() { - let parser = Flags::new(); - let result = parser.parse(args(&["--", "bb"])).unwrap(); - if let ParseOutcome::Parsed(got) = result { - assert_eq!(got, vec!["bb".to_owned()]) - } else { - panic!("expected correct parsing, got {:?}", result) - } - } -} +#[path = "test/flags.rs"] +mod test; diff --git a/util/process_wrapper/main.rs b/util/process_wrapper/main.rs index 2a7cbd8565..b8d61c2989 100644 --- a/util/process_wrapper/main.rs +++ b/util/process_wrapper/main.rs @@ -15,12 +15,15 @@ mod flags; mod options; mod output; +mod pw_args; mod rustc; mod util; +mod worker; +#[cfg(windows)] use std::collections::HashMap; #[cfg(windows)] -use std::collections::{HashSet, VecDeque}; +use std::collections::VecDeque; use std::fmt; use std::fs::{self, copy, OpenOptions}; use std::io; @@ -29,16 +32,13 @@ use std::process::{exit, Command, Stdio}; #[cfg(windows)] use std::time::{SystemTime, UNIX_EPOCH}; -use tinyjson::JsonValue; - -use crate::options::options; +use crate::options::{options, Options, SubprocessPipeliningMode}; use crate::output::{process_output, LineOutput}; -use crate::rustc::ErrorFormat; #[cfg(windows)] use crate::util::read_file_to_array; #[derive(Debug)] -struct ProcessWrapperError(String); +pub(crate) struct ProcessWrapperError(String); impl fmt::Display for ProcessWrapperError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -56,61 +56,70 @@ macro_rules! debug_log { }; } -#[cfg(windows)] -struct TemporaryDirectoryGuard { - path: Option, +enum TemporaryPath { + File(PathBuf), + Directory(PathBuf), } -#[cfg(windows)] -impl TemporaryDirectoryGuard { - fn new(path: Option) -> Self { - Self { path } +struct TemporaryPathGuard { + paths: Vec, +} + +impl TemporaryPathGuard { + fn new() -> Self { + Self { paths: Vec::new() } } - fn take(&mut self) -> Option { - self.path.take() + fn track_file(&mut self, path: PathBuf) { + self.paths.push(TemporaryPath::File(path)); } -} -#[cfg(windows)] -impl Drop for TemporaryDirectoryGuard { - fn drop(&mut self) { - if let Some(path) = self.path.take() { - let _ = fs::remove_dir_all(path); + fn track_directory(&mut self, path: PathBuf) { + self.paths.push(TemporaryPath::Directory(path)); + } + + fn cleanup(&mut self) { + for path in self.paths.drain(..).rev() { + match path { + TemporaryPath::File(path) => { + let _ = fs::remove_file(path); + } + TemporaryPath::Directory(path) => { + let _ = fs::remove_dir_all(path); + } + } } } } -#[cfg(not(windows))] -struct TemporaryDirectoryGuard; - -#[cfg(not(windows))] -impl TemporaryDirectoryGuard { - fn new(_: Option) -> Self { - TemporaryDirectoryGuard +impl Drop for TemporaryPathGuard { + fn drop(&mut self) { + self.cleanup(); } +} - fn take(&mut self) -> Option { - None - } +#[cfg(windows)] +struct ParsedDependencyArgs { + dependency_paths: Vec, + filtered_args: Vec, } #[cfg(windows)] fn get_dependency_search_paths_from_args( initial_args: &[String], -) -> Result<(Vec, Vec), ProcessWrapperError> { +) -> Result { let mut dependency_paths = Vec::new(); let mut filtered_args = Vec::new(); let mut argfile_contents: HashMap> = HashMap::new(); - let mut queue: VecDeque<(String, Option)> = initial_args - .iter() - .map(|arg| (arg.clone(), None)) - .collect(); + let mut queue: VecDeque<(String, Option)> = + initial_args.iter().map(|arg| (arg.clone(), None)).collect(); while let Some((arg, parent_argfile)) = queue.pop_front() { let target = match &parent_argfile { - Some(p) => argfile_contents.entry(format!("{}.filtered", p)).or_default(), + Some(p) => argfile_contents + .entry(format!("{}.filtered", p)) + .or_default(), None => &mut filtered_args, }; @@ -145,14 +154,23 @@ fn get_dependency_search_paths_from_args( })?; } - Ok((dependency_paths, filtered_args)) + Ok(ParsedDependencyArgs { + dependency_paths, + filtered_args, + }) } +// On Windows, collapse many `-Ldependency` entries into one directory to stay +// under rustc's search-path limits. #[cfg(windows)] fn consolidate_dependency_search_paths( args: &[String], ) -> Result<(Vec, Option), ProcessWrapperError> { - let (dependency_paths, mut filtered_args) = get_dependency_search_paths_from_args(args)?; + let parsed = get_dependency_search_paths_from_args(args)?; + let ParsedDependencyArgs { + dependency_paths, + mut filtered_args, + } = parsed; if dependency_paths.is_empty() { return Ok((filtered_args, None)); @@ -168,10 +186,7 @@ fn consolidate_dependency_search_paths( unique_suffix ); - let base_dir = std::env::current_dir().map_err(|e| { - ProcessWrapperError(format!("unable to read current working directory: {}", e)) - })?; - let unified_dir = base_dir.join(&dir_name); + let unified_dir = std::env::temp_dir().join(&dir_name); fs::create_dir_all(&unified_dir).map_err(|e| { ProcessWrapperError(format!( "unable to create unified dependency directory {}: {}", @@ -180,67 +195,7 @@ fn consolidate_dependency_search_paths( )) })?; - let mut seen = HashSet::new(); - for path in dependency_paths { - let entries = fs::read_dir(&path).map_err(|e| { - ProcessWrapperError(format!( - "unable to read dependency search path {}: {}", - path.display(), - e - )) - })?; - - for entry in entries { - let entry = entry.map_err(|e| { - ProcessWrapperError(format!( - "unable to iterate dependency search path {}: {}", - path.display(), - e - )) - })?; - let file_type = entry.file_type().map_err(|e| { - ProcessWrapperError(format!( - "unable to inspect dependency search path {}: {}", - path.display(), - e - )) - })?; - if !(file_type.is_file() || file_type.is_symlink()) { - continue; - } - - let file_name = entry.file_name(); - let file_name_lower = file_name - .to_string_lossy() - .to_ascii_lowercase(); - if !seen.insert(file_name_lower) { - continue; - } - - let dest = unified_dir.join(&file_name); - let src = entry.path(); - match fs::hard_link(&src, &dest) { - Ok(_) => {} - Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => {} - Err(err) => { - debug_log!( - "failed to hardlink {} to {} ({}), falling back to copy", - src.display(), - dest.display(), - err - ); - fs::copy(&src, &dest).map_err(|copy_err| { - ProcessWrapperError(format!( - "unable to copy {} into unified dependency dir {}: {}", - src.display(), - dest.display(), - copy_err - )) - })?; - } - } - } - } + crate::util::consolidate_deps_into(&dependency_paths, &unified_dir); filtered_args.push(format!("-Ldependency={}", unified_dir.display())); @@ -254,57 +209,173 @@ fn consolidate_dependency_search_paths( Ok((args.to_vec(), None)) } -fn json_warning(line: &str) -> JsonValue { - JsonValue::Object(HashMap::from([ - ( - "$message_type".to_string(), - JsonValue::String("diagnostic".to_string()), - ), - ("message".to_string(), JsonValue::String(line.to_string())), - ("code".to_string(), JsonValue::Null), - ( - "level".to_string(), - JsonValue::String("warning".to_string()), - ), - ("spans".to_string(), JsonValue::Array(Vec::new())), - ("children".to_string(), JsonValue::Array(Vec::new())), - ("rendered".to_string(), JsonValue::String(line.to_string())), - ])) +#[cfg(unix)] +fn symlink_dir(src: &std::path::Path, dest: &std::path::Path) -> Result<(), std::io::Error> { + std::os::unix::fs::symlink(src, dest) +} + +#[cfg(windows)] +fn symlink_dir(src: &std::path::Path, dest: &std::path::Path) -> Result<(), std::io::Error> { + std::os::windows::fs::symlink_dir(src, dest) +} + +enum CacheSeedOutcome { + AlreadyPresent, + Seeded, + NotFound, } -fn process_line( - mut line: String, - format: ErrorFormat, -) -> Result { - // LLVM can emit lines that look like the following, and these will be interspersed - // with the regular JSON output. Arguably, rustc should be fixed not to emit lines - // like these (or to convert them to JSON), but for now we convert them to JSON - // ourselves. - if line.contains("is not a recognized feature for this target (ignoring feature)") - || line.starts_with(" WARN ") +fn cache_root_from_execroot_ancestor(cwd: &std::path::Path) -> Option { + // Walk upward looking for the output-base `cache` directory. + for ancestor in cwd.ancestors() { + if ancestor.file_name().is_some_and(|name| name == "execroot") { + continue; + } + + let candidate = ancestor.join("cache"); + if candidate.is_dir() { + return candidate.canonicalize().ok().or(Some(candidate)); + } + } + + None +} + +fn ensure_cache_loopback_for_path( + resolved_path: &std::path::Path, + cache_root: &std::path::Path, +) -> Result, ProcessWrapperError> { + let Ok(relative) = resolved_path.strip_prefix(cache_root) else { + return Ok(None); + }; + let mut components = relative.components(); + if components + .next() + .is_none_or(|component| component.as_os_str() != "repos") { - if let Ok(json_str) = json_warning(&line).stringify() { - line = json_str; - } else { - return Ok(LineOutput::Skip); + return Ok(None); + } + let Some(version) = components.next() else { + return Ok(None); + }; + if components + .next() + .is_none_or(|component| component.as_os_str() != "contents") + { + return Ok(None); + } + + let version_dir = cache_root.join("repos").join(version.as_os_str()); + let loopback = version_dir.join("cache"); + if loopback.exists() { + return Ok(Some(loopback)); + } + + symlink_dir(cache_root, &loopback).map_err(|e| { + ProcessWrapperError(format!( + "unable to seed cache loopback {} -> {}: {}", + cache_root.display(), + loopback.display(), + e + )) + })?; + Ok(Some(loopback)) +} + +fn ensure_cache_loopback_from_args( + cwd: &std::path::Path, + child_arguments: &[String], + cache_root: &std::path::Path, +) -> Result, ProcessWrapperError> { + for arg in child_arguments { + let candidate = cwd.join(arg); + let Ok(resolved) = candidate.canonicalize() else { + continue; + }; + if let Some(loopback) = ensure_cache_loopback_for_path(&resolved, cache_root)? { + return Ok(Some(loopback)); } } - rustc::process_json(line, format) + + Ok(None) } -fn main() -> Result<(), ProcessWrapperError> { - let opts = options().map_err(|e| ProcessWrapperError(e.to_string()))?; +fn seed_cache_root_for_current_dir() -> Result { + let cwd = std::env::current_dir().map_err(|e| { + ProcessWrapperError(format!("unable to read current working directory: {e}")) + })?; + let dest = cwd.join("cache"); + if dest.exists() { + return Ok(CacheSeedOutcome::AlreadyPresent); + } - let (child_arguments, dep_dir_cleanup) = + if let Some(cache_root) = cache_root_from_execroot_ancestor(&cwd) { + symlink_dir(&cache_root, &dest).map_err(|e| { + ProcessWrapperError(format!( + "unable to seed cache root {} -> {}: {}", + cache_root.display(), + dest.display(), + e + )) + })?; + return Ok(CacheSeedOutcome::Seeded); + } + + for entry in fs::read_dir(&cwd).map_err(|e| { + ProcessWrapperError(format!("unable to read current working directory: {e}")) + })? { + let entry = entry.map_err(|e| { + ProcessWrapperError(format!( + "unable to enumerate current working directory: {e}" + )) + })?; + let Ok(resolved) = entry.path().canonicalize() else { + continue; + }; + + for ancestor in resolved.ancestors() { + if ancestor.file_name().is_some_and(|name| name == "cache") { + symlink_dir(ancestor, &dest).map_err(|e| { + ProcessWrapperError(format!( + "unable to seed cache root {} -> {}: {}", + ancestor.display(), + dest.display(), + e + )) + })?; + return Ok(CacheSeedOutcome::Seeded); + } + } + } + + Ok(CacheSeedOutcome::NotFound) +} + +/// Runs the standalone process_wrapper path. +pub(crate) fn run_standalone(opts: &Options) -> Result { + let (child_arguments, dep_argfile_cleanup) = consolidate_dependency_search_paths(&opts.child_arguments)?; - let mut temp_dir_guard = TemporaryDirectoryGuard::new(dep_dir_cleanup); + let mut temp_path_guard = TemporaryPathGuard::new(); + for path in &opts.temporary_expanded_paramfiles { + temp_path_guard.track_file(path.clone()); + } + if let Some(path) = dep_argfile_cleanup { + temp_path_guard.track_directory(path); + } + let cwd = std::env::current_dir().map_err(|e| { + ProcessWrapperError(format!("unable to read current working directory: {e}")) + })?; + let _ = seed_cache_root_for_current_dir(); + if let Some(cache_root) = cache_root_from_execroot_ancestor(&cwd) { + let _ = ensure_cache_loopback_from_args(&cwd, &child_arguments, &cache_root); + } - let mut command = Command::new(opts.executable); + let mut command = Command::new(opts.executable.clone()); command .args(child_arguments) .env_clear() - .envs(opts.child_environment) - .stdout(if let Some(stdout_file) = opts.stdout_file { + .envs(opts.child_environment.clone()) + .stdout(if let Some(stdout_file) = opts.stdout_file.as_deref() { OpenOptions::new() .create(true) .truncate(true) @@ -321,7 +392,7 @@ fn main() -> Result<(), ProcessWrapperError> { .spawn() .map_err(|e| ProcessWrapperError(format!("failed to spawn child process: {}", e)))?; - let mut stderr: Box = if let Some(stderr_file) = opts.stderr_file { + let mut stderr: Box = if let Some(stderr_file) = opts.stderr_file.as_deref() { Box::new( OpenOptions::new() .create(true) @@ -338,7 +409,9 @@ fn main() -> Result<(), ProcessWrapperError> { "unable to get child stderr".to_string(), ))?; - let mut output_file: Option = if let Some(output_file_name) = opts.output_file { + let mut output_file: Option = if let Some(output_file_name) = + opts.output_file.as_deref() + { Some( OpenOptions::new() .create(true) @@ -356,10 +429,9 @@ fn main() -> Result<(), ProcessWrapperError> { &mut child_stderr, stderr.as_mut(), output_file.as_mut(), - move |line| process_line(line, format), + move |line| rustc::process_stderr_line(line, format), ) } else { - // Process output normally by forwarding stderr process_output( &mut child_stderr, stderr.as_mut(), @@ -374,7 +446,7 @@ fn main() -> Result<(), ProcessWrapperError> { .map_err(|e| ProcessWrapperError(format!("failed to wait for child process: {}", e)))?; let code = status.code().unwrap_or(1); if code == 0 { - if let Some(tf) = opts.touch_file { + if let Some(tf) = opts.touch_file.as_deref() { OpenOptions::new() .create(true) .truncate(true) @@ -382,8 +454,8 @@ fn main() -> Result<(), ProcessWrapperError> { .open(tf) .map_err(|e| ProcessWrapperError(format!("failed to create touch file: {}", e)))?; } - if let Some((copy_source, copy_dest)) = opts.copy_output { - copy(©_source, ©_dest).map_err(|e| { + if let Some((copy_source, copy_dest)) = opts.copy_output.as_ref() { + copy(copy_source, copy_dest).map_err(|e| { ProcessWrapperError(format!( "failed to copy {} into {}: {}", copy_source, copy_dest, e @@ -392,134 +464,201 @@ fn main() -> Result<(), ProcessWrapperError> { } } - if let Some(path) = temp_dir_guard.take() { - let _ = fs::remove_dir_all(path); - } - - exit(code) + Ok(code) } +/// Checks whether a standalone worker-pipelining full action can skip rustc. +/// +/// Returns `Ok(true)` if the `.rlib` exists (no-op path), `Ok(false)` if no +/// `pipelining_rlib_path` is set or the `.rlib` is missing (must run rustc). +/// +/// The main() function inlines this logic for clarity; this helper exists +/// for unit tests in test/main.rs. #[cfg(test)] -mod test { - use super::*; - - fn parse_json(json_str: &str) -> Result { - json_str.parse::().map_err(|e| e.to_string()) +pub(crate) fn check_pipelining_full_prerequisites( + pipelining_rlib_path: &Option, +) -> Result { + match pipelining_rlib_path { + Some(rlib_path) if std::path::Path::new(rlib_path).exists() => Ok(true), + _ => Ok(false), } +} - #[test] - fn test_process_line_diagnostic_json() -> Result<(), String> { - let LineOutput::Message(msg) = process_line( - r#" - { - "$message_type": "diagnostic", - "rendered": "Diagnostic message" - } - "# - .to_string(), - ErrorFormat::Json, - )? - else { - return Err("Expected a LineOutput::Message".to_string()); - }; - assert_eq!( - parse_json(&msg)?, - parse_json( - r#" - { - "$message_type": "diagnostic", - "rendered": "Diagnostic message" - } - "# - )? +/// Checks whether the standalone .rmeta from the metadata action matches the +/// .rmeta that the full action's rustc produces. Both are from `--emit=metadata`, +/// so they're structurally identical — a byte mismatch means a non-deterministic +/// proc macro produced different SVH values across the two separate rustc +/// invocations, which will cause E0460/E0463 downstream. +/// +/// `full_rmeta_path` is a temp file produced by adding `--emit=metadata=` +/// to the full action's rustc invocation. `meta_rmeta_path` is the metadata +/// action's declared .rmeta output (an input to this action). +fn check_svh_consistency( + full_rmeta_path: &str, + meta_rmeta_path: &str, +) -> Result<(), String> { + let full = fs::read(full_rmeta_path) + .map_err(|e| format!("failed to read full action rmeta {}: {}", full_rmeta_path, e))?; + let meta = fs::read(meta_rmeta_path) + .map_err(|e| format!("failed to read metadata action rmeta {}: {}", meta_rmeta_path, e))?; + + if full == meta { + debug_log!( + "pipelining SVH check passed: metadata-action and full-action .rmeta match ({} bytes)", + meta.len() ); - Ok(()) + return Ok(()); } - #[test] - fn test_process_line_diagnostic_rendered() -> Result<(), String> { - let LineOutput::Message(msg) = process_line( - r#" - { - "$message_type": "diagnostic", - "rendered": "Diagnostic message" - } - "# - .to_string(), - ErrorFormat::Rendered, - )? - else { - return Err("Expected a LineOutput::Message".to_string()); - }; - assert_eq!(msg, "Diagnostic message"); - Ok(()) + Err(format!( + concat!( + "ERROR: [rules_rust] SVH mismatch detected.\n", + "The metadata action and full action produced different crate hashes for\n", + "this crate. This is caused by a non-deterministic proc macro (e.g., one\n", + "that iterates a HashMap) in this crate's dependency graph.\n", + "\n", + " metadata action .rmeta: {} ({} bytes)\n", + " full action .rmeta: {} ({} bytes)\n", + "\n", + "Downstream crates compiled against the metadata .rmeta will fail to link\n", + "against the full .rlib (E0460 SVH mismatch or E0463 can't find crate).\n", + "\n", + "To fix, either:\n", + " 1. Use worker execution: --strategy=Rustc=worker\n", + " (single rustc process per crate, SVH always consistent)\n", + " 2. Use hollow-rlib pipelining:\n", + " --@rules_rust//rust/settings:experimental_pipelined_compilation=hollow_rlib\n", + " (tier-consistent graph, safe for all proc macros)\n", + " 3. Fix the proc macro to use BTreeMap/BTreeSet instead of HashMap/HashSet\n", + " (eliminates non-deterministic iteration order)\n", + ), + meta_rmeta_path, + meta.len(), + full_rmeta_path, + full.len(), + )) +} + +/// Warning message when a standalone full action must run a second rustc. +const PIPELINING_STANDALONE_WARNING: &str = concat!( + "WARNING: [rules_rust] Worker pipelining full action executing outside a worker.\n", + "The metadata action's .rlib side-effect was not found, so a redundant second\n", + "rustc invocation will run. This happens when Bazel falls back from worker to\n", + "sandboxed or local execution (both run separate rustc processes). The build\n", + "will succeed if all proc macros are deterministic; nondeterministic proc macros\n", + "will be detected via SVH consistency check and fail with a clear diagnostic.\n", + "\n", + "To suppress this warning:\n", + " 1. Use worker execution: --strategy=Rustc=worker (default when supports-multiplex-workers is set)\n", + " 2. Use hollow-rlib pipelining: --@rules_rust//rust/settings:experimental_pipelined_compilation=hollow_rlib\n", +); + +fn main() -> Result<(), ProcessWrapperError> { + if std::env::args().any(|a| a == "--persistent_worker") { + return worker::worker_main(); } - #[test] - fn test_process_line_noise() -> Result<(), String> { - for text in [ - "'+zaamo' is not a recognized feature for this target (ignoring feature)", - " WARN rustc_errors::emitter Invalid span...", - ] { - let LineOutput::Message(msg) = process_line( - text.to_string(), - ErrorFormat::Json, - )? - else { - return Err("Expected a LineOutput::Message".to_string()); - }; - assert_eq!( - parse_json(&msg)?, - parse_json(&format!( - r#"{{ - "$message_type": "diagnostic", - "message": "{0}", - "code": null, - "level": "warning", - "spans": [], - "children": [], - "rendered": "{0}" - }}"#, - text - ))? - ); + let mut opts = options().map_err(|e| ProcessWrapperError(e.to_string()))?; + + // Outside worker mode, a full pipelining action can no-op if the metadata + // action already produced the `.rlib` as a side-effect in the same execroot. + if opts.pipelining_mode == Some(SubprocessPipeliningMode::Full) { + if let Some(ref rlib_path) = opts.pipelining_rlib_path { + if std::path::Path::new(rlib_path).exists() { + // .rlib side-effect found — metadata action already ran rustc + // in this execroot. No-op: just touch/copy outputs and exit. + debug_log!( + "pipelining no-op: .rlib already exists at {}, skipping rustc", + rlib_path + ); + if let Some(ref tf) = opts.touch_file { + OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(tf) + .map_err(|e| { + ProcessWrapperError(format!("failed to create touch file: {}", e)) + })?; + } + if let Some((ref copy_source, ref copy_dest)) = opts.copy_output { + copy(copy_source, copy_dest).map_err(|e| { + ProcessWrapperError(format!( + "failed to copy {} into {}: {}", + copy_source, copy_dest, e + )) + })?; + } + for path in &opts.temporary_expanded_paramfiles { + let _ = fs::remove_file(path); + } + exit(0); + } else { + // .rlib side-effect missing (sandboxed execution or remote leg). + // Warn and fall through to run a second rustc. + eprintln!("{}", PIPELINING_STANDALONE_WARNING); + } } - Ok(()) } - #[test] - fn test_process_line_emit_link() -> Result<(), String> { - assert!(matches!( - process_line( - r#" - { - "$message_type": "artifact", - "emit": "link" + // SVH consistency check: when the standalone full action must run a second + // rustc (no .rlib side-effect from the metadata action), inject + // `--emit=metadata=` so this rustc also produces a standalone .rmeta. + // After success, compare it with the metadata action's .rmeta. A byte + // mismatch means non-deterministic proc macros produced different SVHs. + // Multiple `--emit` flags are additive in rustc, so this is safe. + let svh_check_rmeta_path = if opts.pipelining_mode == Some(SubprocessPipeliningMode::Full) + && opts.pipelining_rmeta_path.is_some() + { + let temp_rmeta = format!("{}.svh_check", opts.pipelining_rmeta_path.as_ref().unwrap()); + opts.child_arguments + .push(format!("--emit=metadata={}", temp_rmeta)); + Some(temp_rmeta) + } else { + None + }; + + let code = run_standalone(&opts)?; + + if opts.pipelining_mode == Some(SubprocessPipeliningMode::Full) { + if code != 0 + && opts + .pipelining_rlib_path + .as_ref() + .is_some_and(|p| !std::path::Path::new(p).exists()) + { + eprintln!(concat!( + "\nERROR: [rules_rust] Redundant rustc invocation failed (see warning above).\n", + "If the error is E0460 (SVH mismatch), switch to hollow-rlib pipelining:\n", + " --@rules_rust//rust/settings:experimental_pipelined_compilation=hollow_rlib\n", + )); + } else if code == 0 { + // Rustc succeeded — check for SVH mismatch between the metadata + // action's .rmeta and the full action's .rmeta (produced by the + // injected --emit=metadata). Both are standalone .rmeta files from + // --emit=metadata, so a byte mismatch means different SVH. + if let (Some(full_rmeta), Some(meta_rmeta)) = + (&svh_check_rmeta_path, &opts.pipelining_rmeta_path) + { + let result = check_svh_consistency(full_rmeta, meta_rmeta); + // Clean up the temp .rmeta regardless of result. + let _ = fs::remove_file(full_rmeta); + if let Err(msg) = result { + eprintln!("{}", msg); + exit(1); } - "# - .to_string(), - ErrorFormat::Rendered, - )?, - LineOutput::Skip - )); - Ok(()) + } + } } - #[test] - fn test_process_line_emit_metadata() -> Result<(), String> { - assert!(matches!( - process_line( - r#" - { - "$message_type": "artifact", - "emit": "metadata" - } - "# - .to_string(), - ErrorFormat::Rendered, - )?, - LineOutput::Skip - )); - Ok(()) + // Clean up svh check temp file on failure path too. + if let Some(ref path) = svh_check_rmeta_path { + let _ = fs::remove_file(path); } + + exit(code) } + +#[cfg(test)] +#[path = "test/main.rs"] +mod test; diff --git a/util/process_wrapper/options.rs b/util/process_wrapper/options.rs index 6dbc898a11..e3bebeeb01 100644 --- a/util/process_wrapper/options.rs +++ b/util/process_wrapper/options.rs @@ -1,56 +1,73 @@ use std::collections::HashMap; use std::env; -use std::fmt; -use std::fs::File; +use std::fs::{self, File}; use std::io::{self, Write}; +use std::path::PathBuf; use std::process::exit; -use crate::flags::{FlagParseError, Flags, ParseOutcome}; +use crate::flags::{Flags, ParseOutcome}; use crate::rustc; use crate::util::*; -#[derive(Debug)] -pub(crate) enum OptionError { - FlagError(FlagParseError), - Generic(String), -} - -impl fmt::Display for OptionError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Self::FlagError(e) => write!(f, "error parsing flags: {e}"), - Self::Generic(s) => write!(f, "{s}"), - } - } -} - +// Re-export shared parsing helpers through `crate::options`. +pub(crate) use crate::pw_args::{ + build_child_environment, is_allow_features_flag, is_pipelining_flag, + is_relocated_pw_flag, normalize_args_recursive, parse_pw_args, resolve_external_path, + NormalizedRustcMetadata, OptionError, ParamFileReadErrorMode, ParsedPwArgs, RelocatedPwFlags, + SubprocessPipeliningMode, +}; #[derive(Debug)] pub(crate) struct Options { - // Contains the path to the child executable pub(crate) executable: String, - // Contains arguments for the child process fetched from files. pub(crate) child_arguments: Vec, - // Contains environment variables for the child process fetched from files. + // Standalone-expanded paramfiles to remove after the child exits. + pub(crate) temporary_expanded_paramfiles: Vec, pub(crate) child_environment: HashMap, - // If set, create the specified file after the child process successfully - // terminated its execution. pub(crate) touch_file: Option, - // If set to (source, dest) copies the source file to dest. pub(crate) copy_output: Option<(String, String)>, - // If set, redirects the child process stdout to this file. pub(crate) stdout_file: Option, - // If set, redirects the child process stderr to this file. pub(crate) stderr_file: Option, - // If set, also logs all unprocessed output from the rustc output to this file. - // Meant to be used to get json output out of rustc for tooling usage. + // Raw stderr copy before line-by-line rustc processing. pub(crate) output_file: Option, - // This controls the output format of rustc messages. pub(crate) rustc_output_format: Option, + // Worker pipelining mode discovered in paramfile flags. + pub(crate) pipelining_mode: Option, + // Side-effect `.rlib` used by the standalone full-action no-op path. + pub(crate) pipelining_rlib_path: Option, + // Path to the metadata action's declared `.rmeta` output, used for SVH + // mismatch detection when the full action runs standalone (non-worker). + pub(crate) pipelining_rmeta_path: Option, +} + +#[derive(Default)] +struct TemporaryExpandedParamFiles { + paths: Vec, +} + +impl TemporaryExpandedParamFiles { + fn track(&mut self, path: PathBuf) { + self.paths.push(path); + } + + fn into_inner(mut self) -> Vec { + std::mem::take(&mut self.paths) + } +} + +impl Drop for TemporaryExpandedParamFiles { + fn drop(&mut self) { + for path in self.paths.drain(..) { + let _ = fs::remove_file(path); + } + } } pub(crate) fn options() -> Result { - // Process argument list until -- is encountered. - // Everything after is sent to the child process. + options_from_args(env::args().collect()) +} + +pub(crate) fn options_from_args(raw_args: Vec) -> Result { + // Flags stop at `--`; everything after goes to the child. let mut subst_mapping_raw = None; let mut stable_status_file_raw = None; let mut volatile_status_file_raw = None; @@ -110,10 +127,7 @@ pub(crate) fn options() -> Result { &mut require_explicit_unstable_features, ); - let mut child_args = match flags - .parse(env::args().collect()) - .map_err(OptionError::FlagError)? - { + let mut child_args = match flags.parse(raw_args).map_err(OptionError::FlagError)? { ParseOutcome::Help(help) => { eprintln!("{help}"); exit(0); @@ -141,13 +155,6 @@ pub(crate) fn options() -> Result { Ok((key.to_owned(), v)) }) .collect::, OptionError>>()?; - let stable_stamp_mappings = - stable_status_file_raw.map_or_else(Vec::new, |s| read_stamp_status_to_array(s).unwrap()); - let volatile_stamp_mappings = - volatile_status_file_raw.map_or_else(Vec::new, |s| read_stamp_status_to_array(s).unwrap()); - let environment_file_block = env_from_files(env_file_raw.unwrap_or_default())?; - let mut file_arguments = args_from_file(arg_file_raw.unwrap_or_default())?; - // Process --copy-output let copy_output = copy_output_raw .map(|co| { if co.len() != 2 { @@ -167,6 +174,37 @@ pub(crate) fn options() -> Result { }) .transpose()?; + let require_explicit_unstable_features = + require_explicit_unstable_features.is_some_and(|s| s == "true"); + + let mut file_arguments = args_from_file(arg_file_raw.unwrap_or_default())?; + child_args.append(&mut file_arguments); + let mut temporary_expanded_paramfiles = TemporaryExpandedParamFiles::default(); + let (mut child_args, relocated) = prepare_args_internal( + child_args, + &subst_mappings, + require_explicit_unstable_features, + None, + None, + &mut temporary_expanded_paramfiles, + )?; + + let mut env_files = env_file_raw.unwrap_or_default(); + env_files.extend(relocated.env_files); + if !relocated.arg_files.is_empty() { + for arg in args_from_file(relocated.arg_files)? { + let mut arg = arg; + crate::util::apply_substitutions(&mut arg, &subst_mappings); + child_args.push(arg); + } + } + + let stable_status_file = relocated.stable_status_file.or(stable_status_file_raw); + let volatile_status_file = relocated.volatile_status_file.or(volatile_status_file_raw); + + let output_file = relocated.output_file.or(output_file); + let rustc_output_format_raw = relocated.rustc_output_format.or(rustc_output_format_raw); + let rustc_output_format = rustc_output_format_raw .map(|v| match v.as_str() { "json" => Ok(rustc::ErrorFormat::Json), @@ -177,28 +215,14 @@ pub(crate) fn options() -> Result { }) .transpose()?; - // Prepare the environment variables, unifying those read from files with the ones - // of the current process. - let vars = environment_block( - environment_file_block, - &stable_stamp_mappings, - &volatile_stamp_mappings, + let vars = build_child_environment( + &env_files, + stable_status_file.as_deref(), + volatile_status_file.as_deref(), &subst_mappings, - ); - - let require_explicit_unstable_features = - require_explicit_unstable_features.is_some_and(|s| s == "true"); + ) + .map_err(OptionError::Generic)?; - // Append all the arguments fetched from files to those provided via command line. - child_args.append(&mut file_arguments); - let child_args = prepare_args( - child_args, - &subst_mappings, - require_explicit_unstable_features, - None, - None, - )?; - // Split the executable path from the rest of the arguments. let (exec_path, args) = child_args.split_first().ok_or_else(|| { OptionError::Generic( "at least one argument after -- is required (the child process path)".to_owned(), @@ -208,6 +232,7 @@ pub(crate) fn options() -> Result { Ok(Options { executable: exec_path.to_owned(), child_arguments: args.to_vec(), + temporary_expanded_paramfiles: temporary_expanded_paramfiles.into_inner(), child_environment: vars, touch_file, copy_output, @@ -215,6 +240,9 @@ pub(crate) fn options() -> Result { stderr_file, output_file, rustc_output_format, + pipelining_mode: relocated.pipelining_mode, + pipelining_rlib_path: relocated.pipelining_rlib_path, + pipelining_rmeta_path: relocated.pipelining_rmeta_path, }) } @@ -232,81 +260,67 @@ fn args_from_file(paths: Vec) -> Result, OptionError> { Ok(args) } -fn env_from_files(paths: Vec) -> Result, OptionError> { - let mut env_vars = HashMap::new(); - for path in paths.into_iter() { - let lines = read_file_to_array(&path).map_err(OptionError::Generic)?; - for line in lines.into_iter() { - let (k, v) = line - .split_once('=') - .ok_or_else(|| OptionError::Generic("environment file invalid".to_owned()))?; - env_vars.insert(k.to_owned(), v.to_owned()); - } - } - Ok(env_vars) -} - -fn is_allow_features_flag(arg: &str) -> bool { - arg.starts_with("-Zallow-features=") || arg.starts_with("allow-features=") -} - -fn prepare_arg(mut arg: String, subst_mappings: &[(String, String)]) -> String { - for (f, replace_with) in subst_mappings { - let from = format!("${{{f}}}"); - arg = arg.replace(&from, replace_with); - } - arg -} - -/// Apply substitutions to the given param file. Returns true iff any allow-features flags were found. +/// Expands one paramfile and returns its allow-features bit plus relocated flags. fn prepare_param_file( filename: &str, subst_mappings: &[(String, String)], read_file: &mut impl FnMut(&str) -> Result, OptionError>, write_to_file: &mut impl FnMut(&str) -> Result<(), OptionError>, -) -> Result { - fn process_file( - filename: &str, - subst_mappings: &[(String, String)], - read_file: &mut impl FnMut(&str) -> Result, OptionError>, - write_to_file: &mut impl FnMut(&str) -> Result<(), OptionError>, - ) -> Result { - let mut has_allow_features_flag = false; - for arg in read_file(filename)? { - let arg = prepare_arg(arg, subst_mappings); - has_allow_features_flag |= is_allow_features_flag(&arg); - if let Some(arg_file) = arg.strip_prefix('@') { - has_allow_features_flag |= - process_file(arg_file, subst_mappings, read_file, write_to_file)?; - } else { - write_to_file(&arg)?; - } - } - Ok(has_allow_features_flag) - } - let has_allow_features_flag = process_file(filename, subst_mappings, read_file, write_to_file)?; - Ok(has_allow_features_flag) +) -> Result<(bool, RelocatedPwFlags), OptionError> { + let mut metadata = NormalizedRustcMetadata::default(); + let mut write_arg = |arg: String| write_to_file(&arg); + normalize_args_recursive( + read_file(filename)?, + subst_mappings, + read_file, + ParamFileReadErrorMode::Error, + &mut write_arg, + &mut metadata, + )?; + Ok((metadata.has_allow_features, metadata.relocated)) } -/// Apply substitutions to the provided arguments, recursing into param files. #[allow(clippy::type_complexity)] -fn prepare_args( +fn prepare_args_internal( args: Vec, subst_mappings: &[(String, String)], require_explicit_unstable_features: bool, read_file: Option<&mut dyn FnMut(&str) -> Result, OptionError>>, mut write_file: Option<&mut dyn FnMut(&str, &str) -> Result<(), OptionError>>, -) -> Result, OptionError> { + temporary_expanded_paramfiles: &mut TemporaryExpandedParamFiles, +) -> Result<(Vec, RelocatedPwFlags), OptionError> { let mut allowed_features = false; let mut processed_args = Vec::::new(); + let mut relocated = RelocatedPwFlags::default(); let mut read_file_wrapper = |s: &str| read_file_to_array(s).map_err(OptionError::Generic); let mut read_file = read_file.unwrap_or(&mut read_file_wrapper); for arg in args.into_iter() { - let arg = prepare_arg(arg, subst_mappings); + let mut arg = arg; + crate::util::apply_substitutions(&mut arg, subst_mappings); if let Some(param_file) = arg.strip_prefix('@') { - let expanded_file = format!("{param_file}.expanded"); + // Write expanded paramfiles to a temp location the child can always read. + let expanded_file = match write_file { + Some(_) => format!("{param_file}.expanded"), + None => { + let basename = std::path::Path::new(param_file) + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or("params"); + format!( + "{}/pw_expanded_{}_{}", + std::env::temp_dir().display(), + std::process::id(), + basename, + ) + } + }; + + enum Writer<'f, F: FnMut(&str, &str) -> Result<(), OptionError>> { + Function(&'f mut F), + BufWriter(io::BufWriter), + } let format_err = |err: io::Error| { OptionError::Generic(format!( "{} writing path: {:?}, current directory: {:?}", @@ -315,153 +329,48 @@ fn prepare_args( std::env::current_dir() )) }; - - enum Writer<'f, F: FnMut(&str, &str) -> Result<(), OptionError>> { - Function(&'f mut F), - BufWriter(io::BufWriter), - } let mut out = match write_file { Some(ref mut f) => Writer::Function(f), - None => Writer::BufWriter(io::BufWriter::new( - File::create(&expanded_file).map_err(format_err)?, - )), + None => { + let file = File::create(&expanded_file).map_err(format_err)?; + temporary_expanded_paramfiles.track(PathBuf::from(&expanded_file)); + Writer::BufWriter(io::BufWriter::new(file)) + } }; let mut write_to_file = |s: &str| -> Result<(), OptionError> { + let s = resolve_external_path(s); match out { - Writer::Function(ref mut f) => f(&expanded_file, s), + Writer::Function(ref mut f) => f(&expanded_file, &s), Writer::BufWriter(ref mut bw) => writeln!(bw, "{s}").map_err(format_err), } }; - // Note that substitutions may also apply to the param file path! - let (file, allowed) = prepare_param_file( + // Substitutions also apply to the paramfile path. + let (file, (allowed, pf_relocated)) = prepare_param_file( param_file, subst_mappings, &mut read_file, &mut write_to_file, ) - .map(|af| (format!("@{expanded_file}"), af))?; + .map(|(af, rel)| (format!("@{expanded_file}"), (af, rel)))?; allowed_features |= allowed; + relocated.merge_from(pf_relocated); processed_args.push(file); } else { allowed_features |= is_allow_features_flag(&arg); - processed_args.push(arg); + let resolved = resolve_external_path(&arg); + processed_args.push(match resolved { + std::borrow::Cow::Borrowed(_) => arg, + std::borrow::Cow::Owned(s) => s, + }); } } if !allowed_features && require_explicit_unstable_features { processed_args.push("-Zallow-features=".to_string()); } - Ok(processed_args) -} - -fn environment_block( - environment_file_block: HashMap, - stable_stamp_mappings: &[(String, String)], - volatile_stamp_mappings: &[(String, String)], - subst_mappings: &[(String, String)], -) -> HashMap { - // Taking all environment variables from the current process - // and sending them down to the child process - let mut environment_variables: HashMap = std::env::vars().collect(); - // Have the last values added take precedence over the first. - // This is simpler than needing to track duplicates and explicitly override - // them. - environment_variables.extend(environment_file_block); - for (f, replace_with) in &[stable_stamp_mappings, volatile_stamp_mappings].concat() { - for value in environment_variables.values_mut() { - let from = format!("{{{f}}}"); - let new = value.replace(from.as_str(), replace_with); - *value = new; - } - } - for (f, replace_with) in subst_mappings { - for value in environment_variables.values_mut() { - let from = format!("${{{f}}}"); - let new = value.replace(from.as_str(), replace_with); - *value = new; - } - } - environment_variables + Ok((processed_args, relocated)) } #[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_enforce_allow_features_flag_user_didnt_say() { - let args = vec!["rustc".to_string()]; - let subst_mappings: Vec<(String, String)> = vec![]; - let args = prepare_args(args, &subst_mappings, true, None, None).unwrap(); - assert_eq!( - args, - vec!["rustc".to_string(), "-Zallow-features=".to_string(),] - ); - } - - #[test] - fn test_enforce_allow_features_flag_user_requested_something() { - let args = vec![ - "rustc".to_string(), - "-Zallow-features=whitespace_instead_of_curly_braces".to_string(), - ]; - let subst_mappings: Vec<(String, String)> = vec![]; - let args = prepare_args(args, &subst_mappings, true, None, None).unwrap(); - assert_eq!( - args, - vec![ - "rustc".to_string(), - "-Zallow-features=whitespace_instead_of_curly_braces".to_string(), - ] - ); - } - - #[test] - fn test_enforce_allow_features_flag_user_requested_something_in_param_file() { - let mut written_files = HashMap::::new(); - let mut read_files = HashMap::>::new(); - read_files.insert( - "rustc_params".to_string(), - vec!["-Zallow-features=whitespace_instead_of_curly_braces".to_string()], - ); - - let mut read_file = |filename: &str| -> Result, OptionError> { - read_files - .get(filename) - .cloned() - .ok_or_else(|| OptionError::Generic(format!("file not found: {}", filename))) - }; - let mut write_file = |filename: &str, content: &str| -> Result<(), OptionError> { - if let Some(v) = written_files.get_mut(filename) { - v.push_str(content); - } else { - written_files.insert(filename.to_owned(), content.to_owned()); - } - Ok(()) - }; - - let args = vec!["rustc".to_string(), "@rustc_params".to_string()]; - let subst_mappings: Vec<(String, String)> = vec![]; - - let args = prepare_args( - args, - &subst_mappings, - true, - Some(&mut read_file), - Some(&mut write_file), - ); - - assert_eq!( - args.unwrap(), - vec!["rustc".to_string(), "@rustc_params.expanded".to_string(),] - ); - - assert_eq!( - written_files, - HashMap::::from([( - "rustc_params.expanded".to_string(), - "-Zallow-features=whitespace_instead_of_curly_braces".to_string() - )]) - ); - } -} +#[path = "test/options.rs"] +mod test; diff --git a/util/process_wrapper/output.rs b/util/process_wrapper/output.rs index 5dabad8179..6b8713d712 100644 --- a/util/process_wrapper/output.rs +++ b/util/process_wrapper/output.rs @@ -16,9 +16,7 @@ use std::error; use std::fmt; use std::io::{self, prelude::*}; -/// LineOutput tells process_output what to do when a line is processed. -/// If a Message is returned, it will be written to write_end, if -/// Skip is returned nothing will be printed and execution continues. +/// Output action for one processed line. #[derive(Debug)] pub(crate) enum LineOutput { Message(String), @@ -56,14 +54,10 @@ impl From for ProcessError { pub(crate) type ProcessResult = Result<(), ProcessError>; -/// If this is Err we assume there were issues processing the line. -/// We will print the error returned and all following lines without -/// any more processing. +/// Per-line processing result. pub(crate) type LineResult = Result; -/// process_output reads lines from read_end and invokes process_line on each. -/// Depending on the result of process_line, the modified message may be written -/// to write_end. +/// Reads lines from `read_end` and forwards processed output to `output_write_end`. pub(crate) fn process_output( read_end: &mut dyn Read, output_write_end: &mut dyn Write, @@ -76,8 +70,7 @@ where let mut reader = io::BufReader::new(read_end); let mut output_writer = io::LineWriter::new(output_write_end); let mut file_writer = opt_file_write_end.map(io::LineWriter::new); - // If there was an error parsing a line failed_on contains the offending line - // and the error message. + // Preserve the first failing line and its error. let mut failed_on: Option<(String, String)> = None; loop { let mut line = String::new(); @@ -98,8 +91,7 @@ where }; } - // If we encountered an error processing a line we want to flush the rest of - // reader into writer and return the error. + // Flush the rest of the stream unchanged after the first processing error. if let Some((line, msg)) = failed_on { output_writer.write_all(line.as_bytes())?; io::copy(&mut reader, &mut output_writer)?; @@ -109,21 +101,5 @@ where } #[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_json_parsing_error() { - let mut input = io::Cursor::new(b"ok text\nsome more\nerror text"); - let mut output: Vec = vec![]; - let result = process_output(&mut input, &mut output, None, move |line| { - if line == "ok text\n" { - Ok(LineOutput::Skip) - } else { - Err("error parsing output".to_owned()) - } - }); - assert!(result.is_err()); - assert_eq!(&output, b"some more\nerror text"); - } -} +#[path = "test/output.rs"] +mod test; diff --git a/util/process_wrapper/pw_args.rs b/util/process_wrapper/pw_args.rs new file mode 100644 index 0000000000..d5bcd569be --- /dev/null +++ b/util/process_wrapper/pw_args.rs @@ -0,0 +1,420 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Shared process_wrapper argument normalization for standalone and worker code. + +use std::collections::HashMap; +use std::fmt; + +use crate::util::*; + +#[derive(Debug)] +pub(crate) enum OptionError { + FlagError(crate::flags::FlagParseError), + Generic(String), +} + +impl fmt::Display for OptionError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::FlagError(e) => write!(f, "error parsing flags: {e}"), + Self::Generic(s) => write!(f, "{s}"), + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum SubprocessPipeliningMode { + Metadata, + Full, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct ParsedPwArgs { + pub(crate) subst: Vec<(String, String)>, + pub(crate) env_files: Vec, + pub(crate) arg_files: Vec, + pub(crate) stable_status_file: Option, + pub(crate) volatile_status_file: Option, + pub(crate) output_file: Option, + pub(crate) rustc_output_format: Option, + pub(crate) require_explicit_unstable_features: bool, +} + +impl ParsedPwArgs { + pub(crate) fn merge_relocated(&mut self, relocated: RelocatedPwFlags) { + self.env_files.extend(relocated.env_files); + self.arg_files.extend(relocated.arg_files); + if relocated.output_file.is_some() { + self.output_file = relocated.output_file; + } + if relocated.rustc_output_format.is_some() { + self.rustc_output_format = relocated.rustc_output_format; + } + if relocated.stable_status_file.is_some() { + self.stable_status_file = relocated.stable_status_file; + } + if relocated.volatile_status_file.is_some() { + self.volatile_status_file = relocated.volatile_status_file; + } + } +} + +pub(crate) fn parse_pw_args(pw_args: &[String], pwd: &std::path::Path) -> ParsedPwArgs { + let current_dir = pwd.to_string_lossy().into_owned(); + let mut parsed = ParsedPwArgs { + subst: Vec::new(), + env_files: Vec::new(), + arg_files: Vec::new(), + stable_status_file: None, + volatile_status_file: None, + output_file: None, + rustc_output_format: None, + require_explicit_unstable_features: false, + }; + let mut i = 0; + while i < pw_args.len() { + match pw_args[i].as_str() { + "--subst" => { + if let Some(kv) = pw_args.get(i + 1) { + if let Some((k, v)) = kv.split_once('=') { + let resolved = if v == "${pwd}" { ¤t_dir } else { v }; + parsed.subst.push((k.to_owned(), resolved.to_owned())); + } + i += 1; + } + } + "--env-file" => { + if let Some(path) = pw_args.get(i + 1) { + parsed.env_files.push(path.clone()); + i += 1; + } + } + "--arg-file" => { + if let Some(path) = pw_args.get(i + 1) { + parsed.arg_files.push(path.clone()); + i += 1; + } + } + "--output-file" => { + if let Some(path) = pw_args.get(i + 1) { + parsed.output_file = Some(path.clone()); + i += 1; + } + } + "--stable-status-file" => { + if let Some(path) = pw_args.get(i + 1) { + parsed.stable_status_file = Some(path.clone()); + i += 1; + } + } + "--volatile-status-file" => { + if let Some(path) = pw_args.get(i + 1) { + parsed.volatile_status_file = Some(path.clone()); + i += 1; + } + } + "--rustc-output-format" => { + if let Some(val) = pw_args.get(i + 1) { + parsed.rustc_output_format = Some(val.clone()); + i += 1; + } + } + "--require-explicit-unstable-features" => { + if let Some(val) = pw_args.get(i + 1) { + parsed.require_explicit_unstable_features = val == "true"; + i += 1; + } + } + _ => {} + } + i += 1; + } + parsed +} + +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub(crate) struct RelocatedPwFlags { + pub(crate) env_files: Vec, + pub(crate) arg_files: Vec, + pub(crate) output_file: Option, + pub(crate) rustc_output_format: Option, + pub(crate) stable_status_file: Option, + pub(crate) volatile_status_file: Option, + pub(crate) pipelining_mode: Option, + pub(crate) pipelining_rlib_path: Option, + pub(crate) pipelining_rmeta_path: Option, +} + +impl RelocatedPwFlags { + pub(crate) fn merge_from(&mut self, other: Self) { + self.env_files.extend(other.env_files); + self.arg_files.extend(other.arg_files); + if other.output_file.is_some() { + self.output_file = other.output_file; + } + if other.rustc_output_format.is_some() { + self.rustc_output_format = other.rustc_output_format; + } + if other.stable_status_file.is_some() { + self.stable_status_file = other.stable_status_file; + } + if other.volatile_status_file.is_some() { + self.volatile_status_file = other.volatile_status_file; + } + if other.pipelining_mode.is_some() { + self.pipelining_mode = other.pipelining_mode; + } + if other.pipelining_rlib_path.is_some() { + self.pipelining_rlib_path = other.pipelining_rlib_path; + } + if other.pipelining_rmeta_path.is_some() { + self.pipelining_rmeta_path = other.pipelining_rmeta_path; + } + } +} + +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub(crate) struct NormalizedRustcMetadata { + pub(crate) has_allow_features: bool, + pub(crate) relocated: RelocatedPwFlags, + pub(crate) pipelining_key: Option, +} + +pub(crate) fn is_allow_features_flag(arg: &str) -> bool { + arg.starts_with("-Zallow-features=") || arg.starts_with("allow-features=") +} + +// Canonical pipelining flag strings — single source of truth. +pub(crate) const PIPELINING_METADATA_FLAG: &str = "--pipelining-metadata"; +pub(crate) const PIPELINING_FULL_FLAG: &str = "--pipelining-full"; +pub(crate) const PIPELINING_KEY_PREFIX: &str = "--pipelining-key="; +pub(crate) const PIPELINING_RLIB_PATH_PREFIX: &str = "--pipelining-rlib-path="; +pub(crate) const PIPELINING_RMETA_PATH_PREFIX: &str = "--pipelining-rmeta-path="; + +/// Returns true for worker pipelining protocol flags that should not reach rustc. +pub(crate) fn is_pipelining_flag(arg: &str) -> bool { + arg == PIPELINING_METADATA_FLAG + || arg == PIPELINING_FULL_FLAG + || arg.starts_with(PIPELINING_KEY_PREFIX) + || arg.starts_with(PIPELINING_RLIB_PATH_PREFIX) + || arg.starts_with(PIPELINING_RMETA_PATH_PREFIX) +} + +/// Returns true for process_wrapper flags that may be relocated into a paramfile. +/// +/// These flags take the next argument as their value. +pub(crate) fn is_relocated_pw_flag(arg: &str) -> bool { + arg == "--output-file" + || arg == "--rustc-output-format" + || arg == "--env-file" + || arg == "--arg-file" + || arg == "--stable-status-file" + || arg == "--volatile-status-file" +} + +/// On Windows, resolves `.rs` paths under `external/` through junctions with +/// relative symlinks. +/// +/// Other paths are left alone so crate identity does not change. +#[cfg(windows)] +pub(crate) fn resolve_external_path(arg: &str) -> std::borrow::Cow<'_, str> { + use std::borrow::Cow; + use std::path::Path; + if !arg.ends_with(".rs") { + return Cow::Borrowed(arg); + } + if !arg.starts_with("external/") && !arg.starts_with("external\\") { + return Cow::Borrowed(arg); + } + let path = Path::new(arg); + let mut components = path.components(); + let Some(_external) = components.next() else { + return Cow::Borrowed(arg); + }; + let Some(repo_name) = components.next() else { + return Cow::Borrowed(arg); + }; + let junction = Path::new("external").join(repo_name); + let Ok(resolved) = std::fs::read_link(&junction) else { + return Cow::Borrowed(arg); + }; + let remainder: std::path::PathBuf = components.collect(); + if remainder.as_os_str().is_empty() { + return Cow::Borrowed(arg); + } + Cow::Owned(resolved.join(remainder).to_string_lossy().into_owned()) +} + +/// Returns the original argument on non-Windows platforms. +#[cfg(not(windows))] +#[inline] +pub(crate) fn resolve_external_path(arg: &str) -> std::borrow::Cow<'_, str> { + std::borrow::Cow::Borrowed(arg) +} + +#[derive(Clone, Copy)] +pub(crate) enum ParamFileReadErrorMode { + Error, + PreserveArg, +} + +pub(crate) fn normalize_args_recursive( + args: Vec, + subst_mappings: &[(String, String)], + read_file: &mut dyn FnMut(&str) -> Result, OptionError>, + read_error_mode: ParamFileReadErrorMode, + write_arg: &mut dyn FnMut(String) -> Result<(), OptionError>, + metadata: &mut NormalizedRustcMetadata, +) -> Result<(), OptionError> { + let mut pending_flag: Option = None; + for mut arg in args { + crate::util::apply_substitutions(&mut arg, subst_mappings); + if let Some(flag) = pending_flag.take() { + match flag.as_str() { + "--env-file" => metadata.relocated.env_files.push(arg), + "--arg-file" => metadata.relocated.arg_files.push(arg), + "--output-file" => metadata.relocated.output_file = Some(arg), + "--rustc-output-format" => metadata.relocated.rustc_output_format = Some(arg), + "--stable-status-file" => metadata.relocated.stable_status_file = Some(arg), + "--volatile-status-file" => metadata.relocated.volatile_status_file = Some(arg), + _ => {} + } + continue; + } + if arg == PIPELINING_METADATA_FLAG { + metadata.relocated.pipelining_mode = Some(SubprocessPipeliningMode::Metadata); + continue; + } else if arg == PIPELINING_FULL_FLAG { + metadata.relocated.pipelining_mode = Some(SubprocessPipeliningMode::Full); + continue; + } else if let Some(key) = arg.strip_prefix(PIPELINING_KEY_PREFIX) { + metadata.pipelining_key = Some(key.to_owned()); + continue; + } else if let Some(path) = arg.strip_prefix(PIPELINING_RLIB_PATH_PREFIX) { + metadata.relocated.pipelining_rlib_path = Some(path.to_owned()); + continue; + } else if let Some(path) = arg.strip_prefix(PIPELINING_RMETA_PATH_PREFIX) { + metadata.relocated.pipelining_rmeta_path = Some(path.to_owned()); + continue; + } + if is_relocated_pw_flag(&arg) { + pending_flag = Some(arg); + continue; + } + if let Some(arg_file) = arg.strip_prefix('@') { + let nested_args = match read_file(arg_file) { + Ok(args) => args, + Err(err) => match read_error_mode { + ParamFileReadErrorMode::Error => return Err(err), + ParamFileReadErrorMode::PreserveArg => { + write_arg(arg)?; + continue; + } + }, + }; + normalize_args_recursive( + nested_args, + subst_mappings, + read_file, + read_error_mode, + write_arg, + metadata, + )?; + continue; + } + metadata.has_allow_features |= is_allow_features_flag(&arg); + let resolved = resolve_external_path(&arg); + write_arg(match resolved { + std::borrow::Cow::Borrowed(_) => arg, + std::borrow::Cow::Owned(s) => s, + })?; + } + Ok(()) +} + +#[cfg(test)] +pub(crate) fn expand_args_inline( + args: &[String], + subst_mappings: &[(String, String)], + require_explicit_unstable_features: bool, + read_file: Option<&mut dyn FnMut(&str) -> Result, OptionError>>, + preserve_unreadable_paramfiles: bool, +) -> Result<(Vec, NormalizedRustcMetadata), OptionError> { + let mut metadata = NormalizedRustcMetadata::default(); + let mut expanded = Vec::new(); + let mut read_file_wrapper = |s: &str| read_file_to_array(s).map_err(OptionError::Generic); + let mut read_file = read_file.unwrap_or(&mut read_file_wrapper); + let read_error_mode = if preserve_unreadable_paramfiles { + ParamFileReadErrorMode::PreserveArg + } else { + ParamFileReadErrorMode::Error + }; + let mut write_arg = |arg: String| { + expanded.push(arg); + Ok(()) + }; + normalize_args_recursive( + args.to_vec(), + subst_mappings, + &mut read_file, + read_error_mode, + &mut write_arg, + &mut metadata, + )?; + if !metadata.has_allow_features && require_explicit_unstable_features { + expanded.push("-Zallow-features=".to_string()); + } + Ok((expanded, metadata)) +} + +pub(crate) fn build_child_environment( + env_files: &[String], + stable_status_file: Option<&str>, + volatile_status_file: Option<&str>, + subst_mappings: &[(String, String)], +) -> Result, String> { + let mut environment_file_block = HashMap::new(); + for path in env_files { + let lines = read_file_to_array(path) + .map_err(|err| format!("failed to read env-file '{}': {}", path, err))?; + for line in lines { + let (k, v) = line + .split_once('=') + .ok_or_else(|| format!("env-file '{}': invalid line (no '='): {}", path, line))?; + environment_file_block.insert(k.to_owned(), v.to_owned()); + } + } + let stable_stamp_mappings = match stable_status_file { + Some(path) => read_stamp_status_with_context(path, "stable-status")?, + None => Vec::new(), + }; + let volatile_stamp_mappings = match volatile_status_file { + Some(path) => read_stamp_status_with_context(path, "volatile-status")?, + None => Vec::new(), + }; + let mut environment_variables: HashMap = std::env::vars().collect(); + environment_variables.extend(environment_file_block); + for (f, replace_with) in stable_stamp_mappings.iter().chain(&volatile_stamp_mappings) { + let from = format!("{{{f}}}"); + for value in environment_variables.values_mut() { + let new = value.replace(from.as_str(), replace_with); + *value = new; + } + } + for value in environment_variables.values_mut() { + crate::util::apply_substitutions(value, subst_mappings); + } + Ok(environment_variables) +} diff --git a/util/process_wrapper/rustc.rs b/util/process_wrapper/rustc.rs index 3bb4a8c2d9..ec8884be74 100644 --- a/util/process_wrapper/rustc.rs +++ b/util/process_wrapper/rustc.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; + use tinyjson::JsonValue; use crate::output::{LineOutput, LineResult}; @@ -23,45 +25,124 @@ pub(crate) enum ErrorFormat { Rendered, } -fn get_key(value: &JsonValue, key: &str) -> Option { - if let JsonValue::Object(map) = value { - if let JsonValue::String(s) = map.get(key)? { - Some(s.clone()) - } else { - None +#[derive(Debug, Clone)] +pub(crate) struct RustcStderrProcessor { + error_format: ErrorFormat, + raw_passthrough: bool, +} + +impl RustcStderrProcessor { + pub(crate) fn new(error_format: ErrorFormat) -> Self { + Self { + error_format, + raw_passthrough: false, + } + } + + pub(crate) fn process_line(&mut self, line: &str) -> Option { + if self.raw_passthrough { + return Some(line.to_owned()); + } + + match process_stderr_line(line.to_owned(), self.error_format) { + Ok(LineOutput::Message(msg)) => Some(msg), + Ok(LineOutput::Skip) => None, + Err(_) => { + self.raw_passthrough = true; + Some(line.to_owned()) + } + } + } +} + +#[derive(Debug, Clone)] +pub(crate) enum RustcStderrPolicy { + Raw, + Processed(RustcStderrProcessor), +} + +impl RustcStderrPolicy { + pub(crate) fn from_option_str(error_format: Option<&str>) -> Self { + match error_format { + Some(value) => Self::Processed(RustcStderrProcessor::new(match value { + "json" => ErrorFormat::Json, + _ => ErrorFormat::Rendered, + })), + None => Self::Raw, + } + } + + pub(crate) fn process_line(&mut self, line: &str) -> Option { + match self { + Self::Raw => Some(line.to_owned()), + Self::Processed(processor) => processor.process_line(line), } - } else { - None } } -/// process_rustc_json takes an output line from rustc configured with -/// --error-format=json, parses the json and returns the appropriate output -/// according to the original --error-format supplied. -/// Only diagnostics with a rendered message are returned. -/// Returns an errors if parsing json fails. -pub(crate) fn process_json(line: String, error_format: ErrorFormat) -> LineResult { +pub(crate) fn process_stderr_line(line: String, error_format: ErrorFormat) -> LineResult { + if line.contains("is not a recognized feature for this target (ignoring feature)") + || line.starts_with(" WARN ") + { + return match error_format { + ErrorFormat::Rendered => Ok(LineOutput::Message(line)), + ErrorFormat::Json => { + let warning = JsonValue::Object(HashMap::from([ + ( + "$message_type".to_string(), + JsonValue::String("diagnostic".to_string()), + ), + ("message".to_string(), JsonValue::String(line.clone())), + ("code".to_string(), JsonValue::Null), + ( + "level".to_string(), + JsonValue::String("warning".to_string()), + ), + ("spans".to_string(), JsonValue::Array(Vec::new())), + ("children".to_string(), JsonValue::Array(Vec::new())), + ("rendered".to_string(), JsonValue::String(line)), + ])); + match warning.stringify() { + Ok(json_str) => Ok(LineOutput::Message(json_str)), + Err(_) => Ok(LineOutput::Skip), + } + } + }; + } let parsed: JsonValue = line .parse() .map_err(|_| "error parsing rustc output as json".to_owned())?; - Ok(if let Some(rendered) = get_key(&parsed, "rendered") { - output_based_on_error_format(line, rendered, error_format) + let rendered = if let JsonValue::Object(map) = &parsed + && let Some(JsonValue::String(s)) = map.get("rendered") + { + Some(s.clone()) } else { + None + }; + Ok(match rendered { + Some(rendered) => match error_format { + ErrorFormat::Json => LineOutput::Message(line), + ErrorFormat::Rendered => LineOutput::Message(rendered), + }, // Ignore non-diagnostic messages such as artifact notifications. - LineOutput::Skip + None => LineOutput::Skip, }) } -fn output_based_on_error_format( - line: String, - rendered: String, - error_format: ErrorFormat, -) -> LineOutput { - match error_format { - // If the output should be json, we just forward the messages as-is - // using `line`. - ErrorFormat::Json => LineOutput::Message(line), - // Otherwise we return the rendered field. - ErrorFormat::Rendered => LineOutput::Message(rendered), +/// Extracts `.rmeta` artifact paths from rustc JSON notifications. +pub(crate) fn extract_rmeta_path(line: &str) -> Option { + if let Ok(JsonValue::Object(ref map)) = line.parse::() + && let Some(JsonValue::String(artifact)) = map.get("artifact") + && let Some(JsonValue::String(emit)) = map.get("emit") + && artifact.ends_with(".rmeta") + && emit == "metadata" + { + Some(artifact.clone()) + } else { + None } } + +#[cfg(test)] +#[path = "test/rustc.rs"] +mod test; diff --git a/util/process_wrapper/test/flags.rs b/util/process_wrapper/test/flags.rs new file mode 100644 index 0000000000..1d5bc8cf4e --- /dev/null +++ b/util/process_wrapper/test/flags.rs @@ -0,0 +1,69 @@ +use super::*; + +fn args(args: &[&str]) -> Vec { + ["foo"].iter().chain(args).map(|&s| s.to_owned()).collect() +} + +#[test] +fn test_flag_help() { + let mut bar = None; + let mut parser = Flags::new(); + parser.define_flag("--bar", "bar help", &mut bar); + let result = parser.parse(args(&["--help"])).unwrap(); + if let ParseOutcome::Help(h) = result { + assert!(h.contains("Help for foo")); + assert!(h.contains("--bar\tbar help")); + } else { + panic!("expected that --help would invoke help, instead parsed arguments") + } +} + +#[test] +fn test_flag_single_repeated() { + let mut bar = None; + let mut parser = Flags::new(); + parser.define_flag("--bar", "bar help", &mut bar); + let result = parser.parse(args(&["--bar", "aa", "bb"])); + if let Err(FlagParseError::ProvidedMultipleTimes(f)) = result { + assert_eq!(f, "--bar"); + } else { + panic!("expected error, got {:?}", result) + } + let mut parser = Flags::new(); + parser.define_flag("--bar", "bar help", &mut bar); + let result = parser.parse(args(&["--bar", "aa", "--bar", "bb"])); + if let Err(FlagParseError::ProvidedMultipleTimes(f)) = result { + assert_eq!(f, "--bar"); + } else { + panic!("expected error, got {:?}", result) + } +} + +#[test] +fn test_repeated_flags() { + // Test case 1) --bar something something_else should work as a repeated flag. + let mut bar = None; + let mut parser = Flags::new(); + parser.define_repeated_flag("--bar", "bar help", &mut bar); + let result = parser.parse(args(&["--bar", "aa", "bb"])).unwrap(); + assert!(matches!(result, ParseOutcome::Parsed(_))); + assert_eq!(bar, Some(vec!["aa".to_owned(), "bb".to_owned()])); + // Test case 2) --bar something --bar something_else should also work as a repeated flag. + bar = None; + let mut parser = Flags::new(); + parser.define_repeated_flag("--bar", "bar help", &mut bar); + let result = parser.parse(args(&["--bar", "aa", "--bar", "bb"])).unwrap(); + assert!(matches!(result, ParseOutcome::Parsed(_))); + assert_eq!(bar, Some(vec!["aa".to_owned(), "bb".to_owned()])); +} + +#[test] +fn test_extra_args() { + let parser = Flags::new(); + let result = parser.parse(args(&["--", "bb"])).unwrap(); + if let ParseOutcome::Parsed(got) = result { + assert_eq!(got, vec!["bb".to_owned()]) + } else { + panic!("expected correct parsing, got {:?}", result) + } +} diff --git a/util/process_wrapper/test/main.rs b/util/process_wrapper/test/main.rs new file mode 100644 index 0000000000..ac040f4b45 --- /dev/null +++ b/util/process_wrapper/test/main.rs @@ -0,0 +1,199 @@ +use super::*; + +#[test] +#[cfg(unix)] +fn test_seed_cache_root_for_current_dir() -> Result<(), String> { + let tmp = std::env::temp_dir().join("pw_test_seed_cache_root_for_current_dir"); + let sandbox_dir = tmp.join("sandbox"); + let cache_repo = tmp.join("cache/repos/v1/contents/hash/repo"); + fs::create_dir_all(&sandbox_dir).map_err(|e| e.to_string())?; + fs::create_dir_all(cache_repo.join("tool/src")).map_err(|e| e.to_string())?; + symlink_dir(&cache_repo, &sandbox_dir.join("external_repo")).map_err(|e| e.to_string())?; + + let old_cwd = std::env::current_dir().map_err(|e| e.to_string())?; + std::env::set_current_dir(&sandbox_dir).map_err(|e| e.to_string())?; + let result = seed_cache_root_for_current_dir().map_err(|e| e.to_string()); + let restore = std::env::set_current_dir(old_cwd).map_err(|e| e.to_string()); + let seeded_target = sandbox_dir + .join("cache") + .canonicalize() + .map_err(|e| e.to_string()); + + let _ = fs::remove_dir_all(&tmp); + + result?; + restore?; + assert_eq!(seeded_target?, tmp.join("cache")); + Ok(()) +} + +#[test] +#[cfg(unix)] +fn test_seed_cache_root_from_execroot_ancestor() -> Result<(), String> { + let tmp = std::env::temp_dir().join("pw_test_seed_cache_root_from_execroot_ancestor"); + let cwd = tmp.join("output-base/execroot/_main"); + fs::create_dir_all(tmp.join("output-base/cache/repos")).map_err(|e| e.to_string())?; + fs::create_dir_all(&cwd).map_err(|e| e.to_string())?; + + let old_cwd = std::env::current_dir().map_err(|e| e.to_string())?; + std::env::set_current_dir(&cwd).map_err(|e| e.to_string())?; + let result = seed_cache_root_for_current_dir().map_err(|e| e.to_string()); + let restore = std::env::set_current_dir(old_cwd).map_err(|e| e.to_string()); + let seeded_target = cwd.join("cache").canonicalize().map_err(|e| e.to_string()); + + let _ = fs::remove_dir_all(&tmp); + + result?; + restore?; + assert_eq!(seeded_target?, tmp.join("output-base/cache")); + Ok(()) +} + +#[test] +#[cfg(unix)] +fn test_ensure_cache_loopback_from_args() -> Result<(), String> { + let tmp = std::env::temp_dir().join("pw_test_ensure_cache_loopback_from_args"); + let cwd = tmp.join("output-base/execroot/_main"); + let cache_root = tmp.join("output-base/cache"); + let source = cache_root.join("repos/v1/contents/hash/repo/.tmp_git_root/tool/src/lib.rs"); + fs::create_dir_all(source.parent().unwrap()).map_err(|e| e.to_string())?; + fs::create_dir_all(&cwd).map_err(|e| e.to_string())?; + fs::write(&source, "").map_err(|e| e.to_string())?; + symlink_dir( + &cache_root.join("repos/v1/contents/hash/repo"), + &cwd.join("external_repo"), + ) + .map_err(|e| e.to_string())?; + + let loopback = ensure_cache_loopback_from_args( + &cwd, + &[String::from("external_repo/.tmp_git_root/tool/src/lib.rs")], + &cache_root, + ) + .map_err(|e| e.to_string())?; + let loopback_target = cache_root + .join("repos/v1/cache") + .canonicalize() + .map_err(|e| e.to_string())?; + + let _ = fs::remove_dir_all(&tmp); + + assert_eq!(loopback, Some(cache_root.join("repos/v1/cache"))); + assert_eq!(loopback_target, cache_root); + Ok(()) +} + +#[test] +fn test_run_standalone_cleans_up_expanded_paramfiles() -> Result<(), String> { + let crate_dir = setup_test_crate("cleanup_expanded_paramfiles"); + let out_dir = crate_dir.join("out"); + let paramfile = crate_dir.join("cleanup_expanded_paramfiles.params"); + fs::create_dir_all(&out_dir).map_err(|e| e.to_string())?; + fs::write( + ¶mfile, + format!( + "--crate-type=lib\n--edition=2021\n--crate-name=cleanup_test\n--emit=metadata\n--out-dir={}\n{}\n", + out_dir.display(), + crate_dir.join("lib.rs").display(), + ), + ) + .map_err(|e| e.to_string())?; + + let expanded_paramfile = std::env::temp_dir().join(format!( + "pw_expanded_{}_{}", + std::process::id(), + paramfile + .file_name() + .and_then(|name| name.to_str()) + .ok_or_else(|| "paramfile basename was not utf-8".to_string())?, + )); + let _ = fs::remove_file(&expanded_paramfile); + + let opts = crate::options::options_from_args(vec![ + "process_wrapper".to_string(), + "--".to_string(), + resolve_rustc().display().to_string(), + format!("@{}", paramfile.display()), + ]) + .map_err(|e| e.to_string())?; + + assert_eq!( + opts.temporary_expanded_paramfiles, + vec![expanded_paramfile.clone()] + ); + assert!( + expanded_paramfile.exists(), + "expected expanded paramfile at {}", + expanded_paramfile.display() + ); + + let code = run_standalone(&opts).map_err(|e| e.to_string())?; + let compiled_metadata = fs::read_dir(&out_dir) + .map_err(|e| e.to_string())? + .filter_map(|entry| entry.ok()) + .any(|entry| entry.path().extension().is_some_and(|ext| ext == "rmeta")); + + let _ = fs::remove_dir_all(&crate_dir); + + assert_eq!(code, 0); + assert!(compiled_metadata, "expected rustc to emit an .rmeta file"); + assert!( + !expanded_paramfile.exists(), + "expected expanded paramfile cleanup for {}", + expanded_paramfile.display() + ); + Ok(()) +} + +#[test] +fn test_standalone_full_returns_false_when_rlib_missing() { + let result = super::check_pipelining_full_prerequisites( + &Some("/nonexistent/path/libfoo.rlib".to_string()), + ); + assert!(result.is_ok(), "Expected Ok(false) when .rlib missing"); + assert_eq!(result.unwrap(), false, ".rlib missing should return false (run rustc)"); +} + +#[test] +fn test_standalone_full_noop_when_rlib_exists() { + let tmp = std::env::temp_dir().join("pw_test_fail_closed_noop"); + let _ = std::fs::remove_dir_all(&tmp); + std::fs::create_dir_all(&tmp).unwrap(); + let rlib_path = tmp.join("libfoo.rlib"); + std::fs::write(&rlib_path, b"fake rlib").unwrap(); + + let result = super::check_pipelining_full_prerequisites( + &Some(rlib_path.to_str().unwrap().to_string()), + ); + assert!(result.is_ok(), "Expected Ok(true) when .rlib exists"); + assert_eq!(result.unwrap(), true); + + let _ = std::fs::remove_dir_all(&tmp); +} + +#[test] +fn test_standalone_full_no_rlib_path_is_noop() { + let result = super::check_pipelining_full_prerequisites(&None); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), false); +} + +/// Resolves the real rustc binary from the runfiles tree. +fn resolve_rustc() -> std::path::PathBuf { + let r = runfiles::Runfiles::create().unwrap(); + runfiles::rlocation!(r, env!("RUSTC_RLOCATIONPATH")) + .expect("could not resolve RUSTC_RLOCATIONPATH via runfiles") +} + +/// Creates a temp directory with a trivial Rust library source file. +fn setup_test_crate(name: &str) -> std::path::PathBuf { + let dir = std::env::temp_dir().join(format!("pw_determinism_{name}_{}", std::process::id())); + let _ = fs::remove_dir_all(&dir); + fs::create_dir_all(&dir).unwrap(); + fs::write( + dir.join("lib.rs"), + "pub fn hello() -> u32 { 42 }\npub fn world() -> &'static str { \"hello\" }\n", + ) + .unwrap(); + dir +} diff --git a/util/process_wrapper/test/options.rs b/util/process_wrapper/test/options.rs new file mode 100644 index 0000000000..e14b01bdbe --- /dev/null +++ b/util/process_wrapper/test/options.rs @@ -0,0 +1,362 @@ +use super::*; + +#[allow(clippy::type_complexity)] +fn prepare_args( + args: Vec, + subst_mappings: &[(String, String)], + require_explicit_unstable_features: bool, + read_file: Option<&mut dyn FnMut(&str) -> Result, OptionError>>, + write_file: Option<&mut dyn FnMut(&str, &str) -> Result<(), OptionError>>, +) -> Result<(Vec, RelocatedPwFlags), OptionError> { + let mut tmp = TemporaryExpandedParamFiles::default(); + let prepared = prepare_args_internal( + args, + subst_mappings, + require_explicit_unstable_features, + read_file, + write_file, + &mut tmp, + )?; + let _ = tmp.into_inner(); + Ok(prepared) +} + +fn unique_test_dir(prefix: &str) -> std::path::PathBuf { + let dir = std::env::temp_dir().join(format!( + "{}_{}_{}", + prefix, + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() + )); + let _ = std::fs::remove_dir_all(&dir); + std::fs::create_dir_all(&dir).unwrap(); + dir +} + +#[test] +fn test_enforce_allow_features_flag_user_didnt_say() { + let args = vec!["rustc".to_string()]; + let subst_mappings: Vec<(String, String)> = vec![]; + let (args, _) = prepare_args(args, &subst_mappings, true, None, None).unwrap(); + assert_eq!( + args, + vec!["rustc".to_string(), "-Zallow-features=".to_string(),] + ); +} + +#[test] +fn test_enforce_allow_features_flag_user_requested_something() { + let args = vec![ + "rustc".to_string(), + "-Zallow-features=whitespace_instead_of_curly_braces".to_string(), + ]; + let subst_mappings: Vec<(String, String)> = vec![]; + let (args, _) = prepare_args(args, &subst_mappings, true, None, None).unwrap(); + assert_eq!( + args, + vec![ + "rustc".to_string(), + "-Zallow-features=whitespace_instead_of_curly_braces".to_string(), + ] + ); +} + +#[test] +fn test_enforce_allow_features_flag_user_requested_something_in_param_file() { + let mut written_files = HashMap::::new(); + let mut read_files = HashMap::>::new(); + read_files.insert( + "rustc_params".to_string(), + vec!["-Zallow-features=whitespace_instead_of_curly_braces".to_string()], + ); + + let mut read_file = |filename: &str| -> Result, OptionError> { + read_files + .get(filename) + .cloned() + .ok_or_else(|| OptionError::Generic(format!("file not found: {}", filename))) + }; + let mut write_file = |filename: &str, content: &str| -> Result<(), OptionError> { + if let Some(v) = written_files.get_mut(filename) { + v.push_str(content); + } else { + written_files.insert(filename.to_owned(), content.to_owned()); + } + Ok(()) + }; + + let args = vec!["rustc".to_string(), "@rustc_params".to_string()]; + let subst_mappings: Vec<(String, String)> = vec![]; + + let (args, _) = prepare_args( + args, + &subst_mappings, + true, + Some(&mut read_file), + Some(&mut write_file), + ) + .unwrap(); + + assert_eq!( + args, + vec!["rustc".to_string(), "@rustc_params.expanded".to_string(),] + ); + + assert_eq!( + written_files, + HashMap::::from([( + "rustc_params.expanded".to_string(), + "-Zallow-features=whitespace_instead_of_curly_braces".to_string() + )]) + ); +} + +#[test] +fn test_prepare_param_file_strips_and_collects_relocated_pw_flags() { + let mut written = String::new(); + let mut read_file = |_filename: &str| -> Result, OptionError> { + Ok(vec![ + "--output-file".to_string(), + "bazel-out/foo/libbar.rmeta".to_string(), + "--env-file".to_string(), + "bazel-out/foo/build_script.env".to_string(), + "src/lib.rs".to_string(), + "--crate-name=foo".to_string(), + "--arg-file".to_string(), + "bazel-out/foo/build_script.linksearchpaths".to_string(), + "--rustc-output-format".to_string(), + "rendered".to_string(), + "--stable-status-file".to_string(), + "bazel-out/stable-status.txt".to_string(), + "--volatile-status-file".to_string(), + "bazel-out/volatile-status.txt".to_string(), + "--crate-type=rlib".to_string(), + ]) + }; + let mut write_to_file = |s: &str| -> Result<(), OptionError> { + if !written.is_empty() { + written.push('\n'); + } + written.push_str(s); + Ok(()) + }; + + let (_, relocated) = + prepare_param_file("test.params", &[], &mut read_file, &mut write_to_file).unwrap(); + + // All relocated pw flags + values should be stripped from output. + // Only the rustc flags should remain. + assert_eq!(written, "src/lib.rs\n--crate-name=foo\n--crate-type=rlib"); + + // Verify collected relocated flags. + assert_eq!( + relocated.output_file.as_deref(), + Some("bazel-out/foo/libbar.rmeta") + ); + assert_eq!(relocated.env_files, vec!["bazel-out/foo/build_script.env"]); + assert_eq!( + relocated.arg_files, + vec!["bazel-out/foo/build_script.linksearchpaths"] + ); + assert_eq!(relocated.rustc_output_format.as_deref(), Some("rendered")); + assert_eq!( + relocated.stable_status_file.as_deref(), + Some("bazel-out/stable-status.txt") + ); + assert_eq!( + relocated.volatile_status_file.as_deref(), + Some("bazel-out/volatile-status.txt") + ); +} + +#[test] +fn test_expand_args_inline_matches_standalone_prepare_args_for_nested_paramfiles() { + let read_files = HashMap::>::from([ + ( + "root.params".to_string(), + vec![ + "--crate-name=foo".to_string(), + "@nested.params".to_string(), + "src/lib.rs".to_string(), + ], + ), + ( + "nested.params".to_string(), + vec![ + "--env-file".to_string(), + "build.env".to_string(), + "--arg-file".to_string(), + "build.args".to_string(), + "--output-file".to_string(), + "diag.txt".to_string(), + "--rustc-output-format".to_string(), + "json".to_string(), + "--stable-status-file".to_string(), + "stable.txt".to_string(), + "--volatile-status-file".to_string(), + "volatile.txt".to_string(), + "--pipelining-metadata".to_string(), + "--pipelining-rlib-path=${pwd}/out/libfoo.rlib".to_string(), + "@leaf.params".to_string(), + ], + ), + ( + "leaf.params".to_string(), + vec![ + "--out-dir=${pwd}/out".to_string(), + "--cfg=leaf_cfg".to_string(), + ], + ), + ]); + let mut written_files = HashMap::::new(); + let mut standalone_read = |filename: &str| -> Result, OptionError> { + read_files + .get(filename) + .cloned() + .ok_or_else(|| OptionError::Generic(format!("file not found: {}", filename))) + }; + let mut write_file = |filename: &str, content: &str| -> Result<(), OptionError> { + match written_files.get_mut(filename) { + Some(existing) => { + existing.push('\n'); + existing.push_str(content); + } + None => { + written_files.insert(filename.to_owned(), content.to_owned()); + } + } + Ok(()) + }; + let args = vec!["rustc".to_string(), "@root.params".to_string()]; + let subst_mappings = vec![("pwd".to_string(), "/work".to_string())]; + + let (standalone_args, standalone_relocated) = prepare_args( + args.clone(), + &subst_mappings, + true, + Some(&mut standalone_read), + Some(&mut write_file), + ) + .unwrap(); + + let mut worker_read = |filename: &str| -> Result, OptionError> { + read_files + .get(filename) + .cloned() + .ok_or_else(|| OptionError::Generic(format!("file not found: {}", filename))) + }; + let (worker_args, worker_meta) = + crate::pw_args::expand_args_inline(&args, &subst_mappings, true, Some(&mut worker_read), false).unwrap(); + + assert_eq!( + standalone_args, + vec![ + "rustc".to_string(), + "@root.params.expanded".to_string(), + "-Zallow-features=".to_string(), + ] + ); + let mut reconstructed = vec!["rustc".to_string()]; + reconstructed.extend( + written_files["root.params.expanded"] + .lines() + .map(str::to_owned), + ); + reconstructed.push("-Zallow-features=".to_string()); + assert_eq!(worker_args, reconstructed); + assert_eq!(worker_meta.relocated, standalone_relocated); + assert_eq!(standalone_relocated.env_files, vec!["build.env"]); + assert_eq!(standalone_relocated.arg_files, vec!["build.args"]); + assert_eq!( + standalone_relocated.output_file.as_deref(), + Some("diag.txt") + ); + assert_eq!( + standalone_relocated.rustc_output_format.as_deref(), + Some("json") + ); + assert_eq!( + standalone_relocated.stable_status_file.as_deref(), + Some("stable.txt") + ); + assert_eq!( + standalone_relocated.volatile_status_file.as_deref(), + Some("volatile.txt") + ); + assert_eq!( + standalone_relocated.pipelining_mode, + Some(SubprocessPipeliningMode::Metadata) + ); + assert_eq!( + standalone_relocated.pipelining_rlib_path.as_deref(), + Some("/work/out/libfoo.rlib") + ); +} + +#[test] +#[cfg(not(windows))] +fn resolve_external_path_unchanged_on_non_windows() { + // On non-Windows, resolve_external_path is a no-op passthrough. + for arg in [ + "external/some_repo/src/lib.txt", + "src/main.rs", + "external/nonexistent_repo_12345/src/lib.rs", + ] { + assert_eq!(&*resolve_external_path(arg), arg, "input: {arg}"); + } +} + +#[test] +fn test_options_missing_stable_status_returns_error() { + let tmp = unique_test_dir("pw_test_missing_stable_status"); + let missing = tmp.join("stable-status.txt"); + + let err = options_from_args(vec![ + "process_wrapper".to_string(), + "--stable-status-file".to_string(), + missing.display().to_string(), + "--".to_string(), + "rustc".to_string(), + ]) + .unwrap_err(); + + match err { + OptionError::Generic(message) => { + assert!(message.contains("failed to read stable-status")); + assert!(message.contains(&missing.display().to_string())); + } + other => panic!("expected generic error, got {:?}", other), + } + + let _ = std::fs::remove_dir_all(&tmp); +} + +#[test] +fn test_options_malformed_volatile_status_returns_error() { + let tmp = unique_test_dir("pw_test_malformed_volatile_status"); + let volatile_status = tmp.join("volatile-status.txt"); + std::fs::write(&volatile_status, "BUILD_USER\n").unwrap(); + + let err = options_from_args(vec![ + "process_wrapper".to_string(), + "--volatile-status-file".to_string(), + volatile_status.display().to_string(), + "--".to_string(), + "rustc".to_string(), + ]) + .unwrap_err(); + + match err { + OptionError::Generic(message) => { + assert!(message.contains("failed to read volatile-status")); + assert!(message.contains(&volatile_status.display().to_string())); + assert!(message.contains("wrong workspace status file format")); + } + other => panic!("expected generic error, got {:?}", other), + } + + let _ = std::fs::remove_dir_all(&tmp); +} diff --git a/util/process_wrapper/test/output.rs b/util/process_wrapper/test/output.rs new file mode 100644 index 0000000000..a6c9db1247 --- /dev/null +++ b/util/process_wrapper/test/output.rs @@ -0,0 +1,16 @@ +use super::*; + +#[test] +fn test_json_parsing_error() { + let mut input = io::Cursor::new(b"ok text\nsome more\nerror text"); + let mut output: Vec = vec![]; + let result = process_output(&mut input, &mut output, None, move |line| { + if line == "ok text\n" { + Ok(LineOutput::Skip) + } else { + Err("error parsing output".to_owned()) + } + }); + assert!(result.is_err()); + assert_eq!(&output, b"some more\nerror text"); +} diff --git a/util/process_wrapper/test/rustc.rs b/util/process_wrapper/test/rustc.rs new file mode 100644 index 0000000000..b42e55c42d --- /dev/null +++ b/util/process_wrapper/test/rustc.rs @@ -0,0 +1,101 @@ +use crate::output::LineOutput; + +use super::*; +use tinyjson::JsonValue; + +fn parse_json(json_str: &str) -> Result { + json_str.parse::().map_err(|e| e.to_string()) +} + +#[test] +fn test_stderr_policy_normalizes_llvm_warning_in_json_mode() -> Result<(), String> { + let mut policy = RustcStderrPolicy::from_option_str(Some("json")); + let text = " WARN rustc_errors::emitter Invalid span..."; + let Some(message) = policy.process_line(text) else { + return Err("Expected a processed warning message".to_string()); + }; + + assert_eq!( + parse_json(&message)?, + parse_json(&format!( + r#"{{ + "$message_type": "diagnostic", + "message": "{0}", + "code": null, + "level": "warning", + "spans": [], + "children": [], + "rendered": "{0}" + }}"#, + text + ))? + ); + Ok(()) +} + +#[test] +fn test_stderr_policy_switches_to_raw_passthrough_after_parse_failure() { + let mut policy = RustcStderrPolicy::from_option_str(Some("rendered")); + let malformed = "{\"rendered\":\"unterminated\"\n"; + let valid = "{\"$message_type\":\"diagnostic\",\"rendered\":\"Diagnostic message\"}\n"; + + assert_eq!(policy.process_line(malformed), Some(malformed.to_string())); + assert_eq!(policy.process_line(valid), Some(valid.to_string())); +} + +/// Table-driven test covering all `process_stderr_line` branches: +/// rendered diagnostic, JSON diagnostic, warning normalization, and artifact skip. +#[test] +fn test_process_stderr_line_table() -> Result<(), String> { + // (input, format, expected output) + let diagnostic_json = r#"{"$message_type":"diagnostic","rendered":"Diagnostic message"}"#; + + // JSON mode: returns the full JSON unchanged. + let LineOutput::Message(msg) = + process_stderr_line(diagnostic_json.to_string(), ErrorFormat::Json)? + else { + return Err("expected Message for diagnostic in JSON mode".to_string()); + }; + assert_eq!(parse_json(&msg)?, parse_json(diagnostic_json)?); + + // Rendered mode: extracts the "rendered" field. + let LineOutput::Message(msg) = + process_stderr_line(diagnostic_json.to_string(), ErrorFormat::Rendered)? + else { + return Err("expected Message for diagnostic in Rendered mode".to_string()); + }; + assert_eq!(msg, "Diagnostic message"); + + // Noise lines are normalized to JSON diagnostics. + for text in [ + "'+zaamo' is not a recognized feature for this target (ignoring feature)", + " WARN rustc_errors::emitter Invalid span...", + ] { + let LineOutput::Message(msg) = + process_stderr_line(text.to_string(), ErrorFormat::Json)? + else { + return Err(format!("expected Message for noise line: {text}")); + }; + assert_eq!( + parse_json(&msg)?, + parse_json(&format!( + r#"{{"$message_type":"diagnostic","message":"{0}","code":null,"level":"warning","spans":[],"children":[],"rendered":"{0}"}}"#, + text + ))? + ); + } + + // Artifact lines are skipped. + for emit in ["link", "metadata"] { + let json = format!(r#"{{"$message_type":"artifact","emit":"{emit}"}}"#); + assert!( + matches!( + process_stderr_line(json, ErrorFormat::Rendered)?, + LineOutput::Skip + ), + "artifact emit={emit} should be skipped" + ); + } + + Ok(()) +} diff --git a/util/process_wrapper/test/util.rs b/util/process_wrapper/test/util.rs new file mode 100644 index 0000000000..ac116034ef --- /dev/null +++ b/util/process_wrapper/test/util.rs @@ -0,0 +1,28 @@ +use super::*; + +#[test] +fn test_read_to_array() { + let input = r"some escaped \\\ +string +with other lines" + .to_owned(); + let expected = vec![ + r"some escaped \ +string", + "with other lines", + ]; + let got = read_to_array(input.as_bytes()).unwrap(); + assert_eq!(expected, got); +} + +#[test] +fn test_stamp_status_to_array() { + let lines = "aaa bbb\\\nvvv\nccc ddd\neee fff"; + let got = stamp_status_to_array(lines.as_bytes()).unwrap(); + let expected = vec![ + ("aaa".to_owned(), "bbb\nvvv".to_owned()), + ("ccc".to_owned(), "ddd".to_owned()), + ("eee".to_owned(), "fff".to_owned()), + ]; + assert_eq!(expected, got); +} diff --git a/util/process_wrapper/test/worker.rs b/util/process_wrapper/test/worker.rs new file mode 100644 index 0000000000..d4fe4f37c2 --- /dev/null +++ b/util/process_wrapper/test/worker.rs @@ -0,0 +1,1336 @@ +use super::args::{ + apply_substs, assemble_request_argv, build_rustc_env, expand_rustc_args_with_metadata, + extract_direct_request_pw_flags, find_out_dir_in_expanded, find_out_dir_in_request, + prepare_rustc_args, + rewrite_expanded_rustc_outputs, scan_pipelining_flags, split_startup_args, + strip_pipelining_flags, +}; +use super::exec::resolve_request_relative_path; +use super::exec::{prepare_expanded_rustc_outputs, ExpandedRustcOutputs}; +use super::invocation::RustcInvocation; +use super::protocol::{extract_arguments, extract_cancel, extract_request_id, extract_sandbox_dir}; +use super::request::RequestKind; +#[cfg(unix)] +use super::sandbox::{ + copy_all_outputs_to_sandbox, copy_output_to_sandbox, seed_sandbox_cache_root, symlink_path, +}; +use super::types::{OutputDir, PipelineKey, RequestId}; +use super::RequestCoordinator; +use super::*; +use crate::options::is_pipelining_flag; +use crate::options::parse_pw_args; +use crate::rustc::extract_rmeta_path; +use std::path::PathBuf; +use std::sync::Arc; +use tinyjson::JsonValue; + +fn parse_json(s: &str) -> JsonValue { + s.parse().unwrap() +} + +/// Converts a path to a JSON-safe string, escaping backslashes on Windows. +fn escape_path_for_json(path: &std::path::Path) -> String { + let s = path.to_string_lossy().into_owned(); + #[cfg(windows)] + let s = s.replace('\\', "\\\\"); + s +} + +#[test] +fn test_extract_request_id_present() { + let req = parse_json(r#"{"requestId": 42, "arguments": []}"#); + assert_eq!(extract_request_id(&req), RequestId(42)); +} + +#[test] +fn test_extract_request_id_missing() { + let req = parse_json(r#"{"arguments": []}"#); + assert_eq!(extract_request_id(&req), RequestId(0)); +} + +#[test] +fn test_extract_arguments() { + let req = + parse_json(r#"{"requestId": 0, "arguments": ["--subst", "pwd=/work", "--", "rustc"]}"#); + assert_eq!( + extract_arguments(&req), + vec!["--subst", "pwd=/work", "--", "rustc"] + ); +} + +#[test] +fn test_extract_arguments_empty() { + let req = parse_json(r#"{"requestId": 0, "arguments": []}"#); + assert_eq!(extract_arguments(&req), Vec::::new()); +} + +#[test] +fn test_build_response_sanitizes_control_characters() { + let response = build_response(1, "hello\u{0}world\u{7}", RequestId(9)); + let parsed = parse_json(&response); + let JsonValue::Object(map) = parsed else { + panic!("expected object response"); + }; + let Some(JsonValue::String(output)) = map.get("output") else { + panic!("expected string output"); + }; + assert_eq!(output, "hello world "); +} + +#[test] +#[cfg(unix)] +fn test_prepare_outputs_inline_out_dir() { + use std::fs; + use std::os::unix::fs::PermissionsExt; + + let dir = std::env::temp_dir().join("pw_test_prepare_inline"); + fs::create_dir_all(&dir).unwrap(); + let file_path = dir.join("libfoo.rmeta"); + fs::write(&file_path, b"content").unwrap(); + + let mut perms = fs::metadata(&file_path).unwrap().permissions(); + perms.set_mode(0o444); + fs::set_permissions(&file_path, perms).unwrap(); + assert!(fs::metadata(&file_path).unwrap().permissions().readonly()); + + let args = vec![format!("--out-dir={}", dir.display())]; + prepare_outputs(&args, None); + + assert!(!fs::metadata(&file_path).unwrap().permissions().readonly()); + let _ = fs::remove_dir_all(&dir); +} + +#[test] +#[cfg(unix)] +fn test_prepare_outputs_arg_file() { + use std::fs; + use std::os::unix::fs::PermissionsExt; + + let tmp = std::env::temp_dir().join("pw_test_prepare_argfile"); + fs::create_dir_all(&tmp).unwrap(); + + // Create the output dir and a read-only file in it. + let out_dir = tmp.join("out"); + fs::create_dir_all(&out_dir).unwrap(); + let file_path = out_dir.join("libfoo.rmeta"); + fs::write(&file_path, b"content").unwrap(); + let mut perms = fs::metadata(&file_path).unwrap().permissions(); + perms.set_mode(0o444); + fs::set_permissions(&file_path, perms).unwrap(); + assert!(fs::metadata(&file_path).unwrap().permissions().readonly()); + + // Write an --arg-file containing --out-dir. + let arg_file = tmp.join("rustc.params"); + fs::write( + &arg_file, + format!("--out-dir={}\n--crate-name=foo\n", out_dir.display()), + ) + .unwrap(); + + let args = vec!["--arg-file".to_string(), arg_file.display().to_string()]; + prepare_outputs(&args, None); + + assert!(!fs::metadata(&file_path).unwrap().permissions().readonly()); + let _ = fs::remove_dir_all(&tmp); +} + +#[test] +#[cfg(unix)] +fn test_prepare_outputs_sandboxed_relative_paramfile() { + use std::fs; + use std::os::unix::fs::PermissionsExt; + + let tmp = std::env::temp_dir().join("pw_test_prepare_sandboxed_relative_paramfile"); + let sandbox_dir = tmp.join("sandbox"); + let _ = fs::remove_dir_all(&tmp); + fs::create_dir_all(&sandbox_dir).unwrap(); + + let out_dir = sandbox_dir.join("out"); + fs::create_dir_all(&out_dir).unwrap(); + let file_path = out_dir.join("libfoo.rmeta"); + fs::write(&file_path, b"content").unwrap(); + let mut perms = fs::metadata(&file_path).unwrap().permissions(); + perms.set_mode(0o444); + fs::set_permissions(&file_path, perms).unwrap(); + assert!(fs::metadata(&file_path).unwrap().permissions().readonly()); + + let paramfile = sandbox_dir.join("rustc.params"); + fs::write(¶mfile, "--out-dir=out\n--crate-name=foo\n").unwrap(); + + let args = vec!["@rustc.params".to_string()]; + prepare_outputs(&args, Some(sandbox_dir.as_path())); + + assert!(!fs::metadata(&file_path).unwrap().permissions().readonly()); + let _ = fs::remove_dir_all(&tmp); +} + +#[test] +fn test_find_out_dir_in_request_reads_arg_file() { + use std::fs; + + let tmp = std::env::temp_dir().join("pw_test_find_out_dir_in_request_reads_arg_file"); + let _ = fs::remove_dir_all(&tmp); + fs::create_dir_all(&tmp).unwrap(); + + let arg_file = tmp.join("rustc.args"); + fs::write( + &arg_file, + "--crate-name=foo\n--out-dir=bazel-out/k8-fastbuild/bin/pkg/_worker_pipelining\n", + ) + .unwrap(); + + let args = vec![ + "--".to_string(), + "rustc".to_string(), + "--pipelining-full".to_string(), + "--pipelining-key=foo_key".to_string(), + "--arg-file".to_string(), + arg_file.display().to_string(), + ]; + + let out_dir = find_out_dir_in_request(&args, &tmp).expect("expected out-dir"); + assert_eq!( + out_dir.as_str(), + "bazel-out/k8-fastbuild/bin/pkg/_worker_pipelining" + ); + + let _ = fs::remove_dir_all(&tmp); +} + +#[test] +#[cfg(unix)] +fn test_prepare_expanded_rustc_outputs_emit_path() { + use std::fs; + use std::os::unix::fs::PermissionsExt; + + let tmp = std::env::temp_dir().join("pw_test_prepare_emit_path"); + fs::create_dir_all(&tmp).unwrap(); + + let emit_path = tmp.join("libfoo.rmeta"); + fs::write(&emit_path, b"content").unwrap(); + let mut perms = fs::metadata(&emit_path).unwrap().permissions(); + perms.set_mode(0o555); + fs::set_permissions(&emit_path, perms).unwrap(); + assert!(fs::metadata(&emit_path).unwrap().permissions().readonly()); + + let outputs = ExpandedRustcOutputs { + out_dir: None, + emit_paths: vec![emit_path.display().to_string()], + }; + prepare_expanded_rustc_outputs(&outputs); + + assert!(!fs::metadata(&emit_path).unwrap().permissions().readonly()); + let _ = fs::remove_dir_all(&tmp); +} + +#[test] +fn test_build_response_success() { + let response = build_response(0, "", RequestId(0)); + assert_eq!(response, r#"{"exitCode":0,"output":"","requestId":0}"#); + let parsed = parse_json(&response); + if let JsonValue::Object(map) = parsed { + assert!(matches!(map.get("exitCode"), Some(JsonValue::Number(n)) if *n == 0.0)); + assert!(matches!(map.get("requestId"), Some(JsonValue::Number(n)) if *n == 0.0)); + } else { + panic!("expected object"); + } +} + +#[test] +fn test_build_response_failure() { + let response = build_response(1, "error: type mismatch", RequestId(0)); + let parsed = parse_json(&response); + if let JsonValue::Object(map) = parsed { + assert!(matches!(map.get("exitCode"), Some(JsonValue::Number(n)) if *n == 1.0)); + assert!( + matches!(map.get("output"), Some(JsonValue::String(s)) if s == "error: type mismatch") + ); + } else { + panic!("expected object"); + } +} + +#[test] +fn test_strip_pipelining_flags() { + let args = vec![ + "--pipelining-metadata".to_string(), + "--pipelining-key=my_crate_abc123".to_string(), + "--arg-file".to_string(), + "rustc.params".to_string(), + ]; + let filtered = strip_pipelining_flags(&args); + assert_eq!(filtered, vec!["--arg-file", "rustc.params"]); +} + +#[test] +fn test_request_kind_parse_in_dir_reads_relative_paramfile() { + use std::fs; + + let dir = std::env::temp_dir().join("pw_request_kind_relative_paramfile"); + let _ = fs::remove_dir_all(&dir); + fs::create_dir_all(&dir).unwrap(); + let paramfile = dir.join("rustc.params"); + fs::write( + ¶mfile, + "--crate-name=foo\n--pipelining-full\n--pipelining-key=foo_key\n", + ) + .unwrap(); + + let args = vec![ + "--".to_string(), + "rustc".to_string(), + "@rustc.params".to_string(), + ]; + match RequestKind::parse_in_dir(&args, &dir) { + RequestKind::Full { key } => assert_eq!(key.as_str(), "foo_key"), + other => panic!("expected full request, got {:?}", other), + } + + let _ = fs::remove_dir_all(&dir); +} + +// --- Tests for new helpers added in the worker-key fix --- + +#[test] +fn test_is_pipelining_flag() { + assert!(is_pipelining_flag("--pipelining-metadata")); + assert!(is_pipelining_flag("--pipelining-full")); + assert!(is_pipelining_flag("--pipelining-key=foo_abc")); + assert!(!is_pipelining_flag("--crate-name=foo")); + assert!(!is_pipelining_flag("--emit=dep-info,metadata,link")); + assert!(!is_pipelining_flag("-Zno-codegen")); +} + +#[test] +fn test_apply_substs() { + let subst = vec![ + ("pwd".to_string(), "/work".to_string()), + ("out".to_string(), "bazel-out/k8/bin".to_string()), + ]; + assert_eq!(apply_substs("${pwd}/src", &subst), "/work/src"); + assert_eq!( + apply_substs("${out}/foo.rlib", &subst), + "bazel-out/k8/bin/foo.rlib" + ); + assert_eq!(apply_substs("--crate-name=foo", &subst), "--crate-name=foo"); +} + +#[test] +fn test_scan_pipelining_flags_table() { + let cases: &[(&[&str], &str)] = &[ + ( + &["--pipelining-metadata", "--pipelining-key=foo_abc"], + "Metadata:foo_abc", + ), + ( + &["--pipelining-full", "--pipelining-key=bar_xyz"], + "Full:bar_xyz", + ), + (&["--emit=link", "--crate-name=foo"], "NonPipelined"), + (&["--pipelining-metadata"], "NonPipelined"), // flag but no key + ]; + for (args, expected) in cases { + let kind = scan_pipelining_flags(args.iter().copied()); + let actual = match &kind { + RequestKind::Metadata { key } => format!("Metadata:{}", key.as_str()), + RequestKind::Full { key } => format!("Full:{}", key.as_str()), + RequestKind::NonPipelined => "NonPipelined".to_string(), + }; + assert_eq!(&actual, expected, "scan_pipelining_flags({args:?})"); + } +} + +#[test] +fn test_detect_pipelining_mode_from_paramfile() { + use std::io::Write; + // Write a temporary paramfile with pipelining flags. + let tmp = std::env::temp_dir().join("pw_test_detect_paramfile"); + let param_path = tmp.join("rustc.params"); + std::fs::create_dir_all(&tmp).unwrap(); + let mut f = std::fs::File::create(¶m_path).unwrap(); + writeln!(f, "--emit=dep-info,metadata,link").unwrap(); + writeln!(f, "--crate-name=foo").unwrap(); + writeln!(f, "--pipelining-metadata").unwrap(); + writeln!(f, "--pipelining-key=foo_abc123").unwrap(); + drop(f); + + // Full args: startup args before "--", then rustc + @paramfile. + let args = vec![ + "--subst".to_string(), + "pwd=/work".to_string(), + "--".to_string(), + "/path/to/rustc".to_string(), + format!("@{}", param_path.display()), + ]; + + match RequestKind::parse_in_dir(&args, &tmp) { + RequestKind::Metadata { key } => assert_eq!(key.as_str(), "foo_abc123"), + other => panic!( + "expected Metadata, got {:?}", + std::mem::discriminant(&other) + ), + } + + let _ = std::fs::remove_dir_all(&tmp); +} + +#[test] +fn test_detect_pipelining_mode_from_nested_paramfile() { + let tmp = std::env::temp_dir().join("pw_test_detect_nested_paramfile"); + let outer = tmp.join("outer.params"); + let nested = tmp.join("nested.params"); + let _ = std::fs::remove_dir_all(&tmp); + std::fs::create_dir_all(&tmp).unwrap(); + std::fs::write(&outer, "--crate-name=foo\n@nested.params\n").unwrap(); + std::fs::write( + &nested, + "--pipelining-full\n--pipelining-key=foo_nested_key\n", + ) + .unwrap(); + + let args = vec![ + "--".to_string(), + "/path/to/rustc".to_string(), + "@outer.params".to_string(), + ]; + + match RequestKind::parse_in_dir(&args, &tmp) { + RequestKind::Full { key } => assert_eq!(key.as_str(), "foo_nested_key"), + other => panic!("expected Full, got {:?}", std::mem::discriminant(&other)), + } + + let _ = std::fs::remove_dir_all(&tmp); +} + +#[test] +fn test_expand_rustc_args_strips_pipelining_flags() { + use std::io::Write; + let tmp = std::env::temp_dir().join("pw_test_expand_rustc"); + let param_path = tmp.join("rustc.params"); + std::fs::create_dir_all(&tmp).unwrap(); + let mut f = std::fs::File::create(¶m_path).unwrap(); + writeln!(f, "--emit=dep-info,metadata,link").unwrap(); + writeln!(f, "--crate-name=foo").unwrap(); + writeln!(f, "--pipelining-metadata").unwrap(); + writeln!(f, "--pipelining-key=foo_abc123").unwrap(); + drop(f); + + let rustc_and_after = vec![ + "/path/to/rustc".to_string(), + format!("@{}", param_path.display()), + ]; + let subst: Vec<(String, String)> = vec![]; + let (expanded, _) = + expand_rustc_args_with_metadata(&rustc_and_after, &subst, false, std::path::Path::new(".")) + .unwrap(); + + assert_eq!(expanded[0], "/path/to/rustc"); + assert!(expanded.contains(&"--emit=dep-info,metadata,link".to_string())); + assert!(expanded.contains(&"--crate-name=foo".to_string())); + // Pipelining flags must be stripped. + assert!(!expanded.contains(&"--pipelining-metadata".to_string())); + assert!(!expanded.iter().any(|a| a.starts_with("--pipelining-key="))); + + let _ = std::fs::remove_dir_all(&tmp); +} + +#[test] +fn test_prepare_rustc_args_collects_nested_relocated_flags() { + let tmp = std::env::temp_dir().join("pw_test_prepare_rustc_args_nested"); + let outer = tmp.join("outer.params"); + let nested = tmp.join("nested.params"); + let arg_file = tmp.join("build.args"); + let _ = std::fs::remove_dir_all(&tmp); + std::fs::create_dir_all(&tmp).unwrap(); + std::fs::write(&outer, "@nested.params\n--crate-name=foo\n").unwrap(); + std::fs::write( + &nested, + "\ +--env-file +build.env +--arg-file +build.args +--output-file +diag.txt +--rustc-output-format +rendered +--stable-status-file +stable.txt +--volatile-status-file +volatile.txt +--out-dir=${pwd}/out +", + ) + .unwrap(); + std::fs::write(&arg_file, "--cfg=nested_arg\n").unwrap(); + + let pw_args = parse_pw_args( + &[ + "--subst".to_string(), + "pwd=/work".to_string(), + "--require-explicit-unstable-features".to_string(), + "true".to_string(), + ], + &tmp, + ); + let rustc_and_after = vec!["rustc".to_string(), "@outer.params".to_string()]; + let (rustc_args, out_dir, relocated) = + prepare_rustc_args(&rustc_and_after, &pw_args, &tmp).unwrap(); + + assert_eq!( + rustc_args, + vec![ + "rustc".to_string(), + "--out-dir=/work/out".to_string(), + "--crate-name=foo".to_string(), + "-Zallow-features=".to_string(), + "--cfg=nested_arg".to_string(), + ] + ); + assert_eq!(out_dir.as_str(), "/work/out"); + assert_eq!(relocated.env_files, vec!["build.env"]); + assert_eq!(relocated.arg_files, vec!["build.args"]); + assert_eq!(relocated.output_file.as_deref(), Some("diag.txt")); + assert_eq!(relocated.rustc_output_format.as_deref(), Some("rendered")); + assert_eq!(relocated.stable_status_file.as_deref(), Some("stable.txt")); + assert_eq!( + relocated.volatile_status_file.as_deref(), + Some("volatile.txt") + ); + + let _ = std::fs::remove_dir_all(&tmp); +} + +#[test] +fn test_expand_rustc_args_applies_substs() { + use std::io::Write; + let tmp = std::env::temp_dir().join("pw_test_expand_subst"); + let param_path = tmp.join("rustc.params"); + std::fs::create_dir_all(&tmp).unwrap(); + let mut f = std::fs::File::create(¶m_path).unwrap(); + writeln!(f, "--out-dir=${{pwd}}/out").unwrap(); + drop(f); + + let rustc_and_after = vec![ + "/path/to/rustc".to_string(), + format!("@{}", param_path.display()), + ]; + let subst = vec![("pwd".to_string(), "/work".to_string())]; + let (expanded, _) = + expand_rustc_args_with_metadata(&rustc_and_after, &subst, false, std::path::Path::new(".")) + .unwrap(); + + assert!( + expanded.contains(&"--out-dir=/work/out".to_string()), + "expected substituted arg, got: {:?}", + expanded + ); + + let _ = std::fs::remove_dir_all(&tmp); +} + +// --- Tests for Phase 4 sandbox helpers --- + +#[test] +fn test_extract_sandbox_dir_absent() { + let req = parse_json(r#"{"requestId": 1}"#); + assert_eq!(extract_sandbox_dir(&req), Ok(None)); +} + +#[test] +fn test_extract_sandbox_dir_empty_string_returns_none() { + let req = parse_json(r#"{"requestId": 1, "sandboxDir": ""}"#); + assert_eq!(extract_sandbox_dir(&req), Ok(None)); +} + +/// A nonexistent sandbox directory is an error — it means the platform +/// doesn't support sandboxing and the user should remove the flag. +#[test] +fn test_extract_sandbox_dir_nonexistent_is_err() { + let req = parse_json(r#"{"requestId": 1, "sandboxDir": "/no/such/sandbox/dir"}"#); + let result = extract_sandbox_dir(&req); + assert!(result.is_err(), "expected Err for nonexistent sandbox dir"); + let msg = result.unwrap_err(); + assert!( + msg.contains("--experimental_worker_multiplex_sandboxing"), + "error should mention the flag: {}", + msg + ); +} + +/// An existing but empty sandbox directory is an error. On Windows, Bazel +/// creates the directory without populating it with symlinks because there +/// is no real sandbox implementation. +#[test] +fn test_extract_sandbox_dir_empty_dir_is_err() { + let dir = std::env::temp_dir().join("pw_test_sandbox_empty"); + let _ = std::fs::remove_dir_all(&dir); + std::fs::create_dir_all(&dir).unwrap(); + let json_dir = escape_path_for_json(&dir); + let json = format!(r#"{{"requestId": 1, "sandboxDir": "{}"}}"#, json_dir); + let req = parse_json(&json); + let err = extract_sandbox_dir(&req).unwrap_err(); + assert!( + err.contains("is empty"), + "expected 'is empty' in error, got: {err}" + ); + let _ = std::fs::remove_dir_all(&dir); +} + +/// A populated sandbox directory is accepted. +#[test] +fn test_extract_sandbox_dir_populated() { + let dir = std::env::temp_dir().join("pw_test_sandbox_pop"); + let _ = std::fs::remove_dir_all(&dir); + std::fs::create_dir_all(&dir).unwrap(); + std::fs::write(dir.join("marker"), b"").unwrap(); + let dir_str = dir.to_string_lossy().into_owned(); + let json_dir = escape_path_for_json(&dir); + let json = format!(r#"{{"requestId": 1, "sandboxDir": "{}"}}"#, json_dir); + let req = parse_json(&json); + let result = extract_sandbox_dir(&req).unwrap(); + assert_eq!( + result.as_ref().map(|sd| sd.as_str()), + Some(dir_str.as_str()) + ); + let _ = std::fs::remove_dir_all(&dir); +} + +#[test] +fn test_extract_cancel_true() { + let req = parse_json(r#"{"requestId": 1, "cancel": true}"#); + assert!(extract_cancel(&req)); +} + +#[test] +fn test_extract_cancel_false() { + let req = parse_json(r#"{"requestId": 1, "cancel": false}"#); + assert!(!extract_cancel(&req)); +} + +#[test] +fn test_extract_cancel_absent() { + let req = parse_json(r#"{"requestId": 1}"#); + assert!(!extract_cancel(&req)); +} + +#[test] +fn test_build_cancel_response() { + let response = build_cancel_response(RequestId(7)); + assert_eq!( + response, + r#"{"exitCode":0,"output":"","requestId":7,"wasCancelled":true}"# + ); + let parsed = parse_json(&response); + if let JsonValue::Object(map) = parsed { + assert!(matches!(map.get("requestId"), Some(JsonValue::Number(n)) if *n == 7.0)); + assert!(matches!(map.get("exitCode"), Some(JsonValue::Number(n)) if *n == 0.0)); + assert!(matches!( + map.get("wasCancelled"), + Some(JsonValue::Boolean(true)) + )); + } else { + panic!("expected object"); + } +} + +#[test] +fn test_resolve_sandbox_path_relative() { + let result = resolve_request_relative_path( + "bazel-out/k8/bin/pkg", + Some(std::path::Path::new("/sandbox/42")), + ); + assert_eq!( + result, + std::path::PathBuf::from("/sandbox/42").join("bazel-out/k8/bin/pkg") + ); +} + +#[test] +fn test_resolve_sandbox_path_absolute() { + let result = resolve_request_relative_path( + "/absolute/path/out", + Some(std::path::Path::new("/sandbox/42")), + ); + assert_eq!(result, std::path::PathBuf::from("/absolute/path/out")); +} + +#[test] +fn test_find_out_dir_in_expanded() { + let args = vec![ + "--crate-name=foo".to_string(), + "--out-dir=/work/bazel-out/k8/bin/pkg".to_string(), + "--emit=link".to_string(), + ]; + assert_eq!( + find_out_dir_in_expanded(&args), + Some("/work/bazel-out/k8/bin/pkg".to_string()) + ); +} + +#[test] +fn test_find_out_dir_in_expanded_missing() { + let args = vec!["--crate-name=foo".to_string(), "--emit=link".to_string()]; + assert_eq!(find_out_dir_in_expanded(&args), None); +} + +#[test] +fn test_rewrite_expanded_rustc_outputs_collects_writable_paths() { + let args = vec![ + "--crate-name=foo".to_string(), + "--out-dir=/old/path".to_string(), + "--emit=dep-info=foo.d,metadata=bar/libfoo.rmeta,link".to_string(), + ]; + let new_dir = std::path::Path::new("/_pw_pipeline/foo_abc"); + + let (rewritten, outputs) = rewrite_expanded_rustc_outputs(args, new_dir); + + assert_eq!( + rewritten, + vec![ + "--crate-name=foo", + "--out-dir=/_pw_pipeline/foo_abc", + "--emit=dep-info=foo.d,metadata=/_pw_pipeline/foo_abc/libfoo.rmeta,link", + ] + ); + assert_eq!( + outputs, + ExpandedRustcOutputs { + out_dir: Some("/_pw_pipeline/foo_abc".to_string()), + emit_paths: vec![ + "foo.d".to_string(), + "/_pw_pipeline/foo_abc/libfoo.rmeta".to_string(), + ], + } + ); +} + +#[test] +fn test_parse_pw_args_substitutes_pwd_from_real_execroot() { + let parsed = parse_pw_args( + &[ + "--subst".to_string(), + "pwd=${pwd}".to_string(), + "--output-file".to_string(), + "diag.txt".to_string(), + ], + std::path::Path::new("/real/execroot"), + ); + + assert_eq!( + parsed.subst, + vec![("pwd".to_string(), "/real/execroot".to_string())] + ); + assert_eq!(parsed.output_file, Some("diag.txt".to_string())); + assert_eq!(parsed.stable_status_file, None); + assert_eq!(parsed.volatile_status_file, None); +} + +#[test] +fn test_build_rustc_env_applies_stamp_and_subst_mappings() { + let tmp = std::env::temp_dir().join(format!("pw_test_build_rustc_env_{}", std::process::id())); + std::fs::create_dir_all(&tmp).unwrap(); + + let env_file = tmp.join("env.txt"); + let stable_status = tmp.join("stable-status.txt"); + let volatile_status = tmp.join("volatile-status.txt"); + + std::fs::write( + &env_file, + "STAMPED={BUILD_USER}:{BUILD_SCM_REVISION}:${pwd}\nUNCHANGED=value\n", + ) + .unwrap(); + std::fs::write(&stable_status, "BUILD_USER alice\n").unwrap(); + std::fs::write(&volatile_status, "BUILD_SCM_REVISION deadbeef\n").unwrap(); + + let env = build_rustc_env( + &[env_file.display().to_string()], + Some(stable_status.to_str().unwrap()), + Some(volatile_status.to_str().unwrap()), + &[("pwd".to_string(), "/real/execroot".to_string())], + ) + .unwrap(); + + assert_eq!( + env.get("STAMPED"), + Some(&"alice:deadbeef:/real/execroot".to_string()) + ); + assert_eq!(env.get("UNCHANGED"), Some(&"value".to_string())); + + let _ = std::fs::remove_dir_all(&tmp); +} + +#[test] +fn test_begin_worker_shutdown_sets_flag() { + WORKER_SHUTTING_DOWN.store(false, Ordering::SeqCst); + begin_worker_shutdown("test"); + assert!(worker_is_shutting_down()); + WORKER_SHUTTING_DOWN.store(false, Ordering::SeqCst); +} + +#[test] +fn test_extract_rmeta_path_valid() { + let line = r#"{"artifact":"/work/out/libfoo.rmeta","emit":"metadata"}"#; + assert_eq!( + extract_rmeta_path(line), + Some("/work/out/libfoo.rmeta".to_string()) + ); +} + +#[test] +fn test_extract_rmeta_path_rlib() { + // rlib artifact should not match (only rmeta) + let line = r#"{"artifact":"/work/out/libfoo.rlib","emit":"link"}"#; + assert_eq!(extract_rmeta_path(line), None); +} + +#[test] +#[cfg(unix)] +fn test_copy_output_to_sandbox() { + use std::fs; + + let tmp = std::env::temp_dir().join("pw_test_copy_to_sandbox"); + let pipeline_dir = tmp.join("pipeline"); + let sandbox_dir = tmp.join("sandbox"); + let out_rel = "bazel-out/k8/bin/pkg"; + + fs::create_dir_all(&pipeline_dir).unwrap(); + fs::create_dir_all(&sandbox_dir).unwrap(); + + // Write a fake rmeta into the pipeline dir. + let rmeta_path = pipeline_dir.join("libfoo.rmeta"); + fs::write(&rmeta_path, b"fake rmeta content").unwrap(); + + copy_output_to_sandbox(&rmeta_path, &sandbox_dir, out_rel, "_pipeline").unwrap(); + + let dest = sandbox_dir + .join(out_rel) + .join("_pipeline") + .join("libfoo.rmeta"); + assert!(dest.exists(), "expected rmeta copied to sandbox/_pipeline/"); + assert_eq!(fs::read(&dest).unwrap(), b"fake rmeta content"); + + let _ = fs::remove_dir_all(&tmp); +} + +#[test] +#[cfg(unix)] +fn test_copy_all_outputs_to_sandbox() { + use std::fs; + + let tmp = std::env::temp_dir().join("pw_test_copy_all_to_sandbox"); + let pipeline_dir = tmp.join("pipeline"); + let sandbox_dir = tmp.join("sandbox"); + let out_rel = "bazel-out/k8/bin/pkg"; + + fs::create_dir_all(&pipeline_dir).unwrap(); + fs::create_dir_all(&sandbox_dir).unwrap(); + + fs::write(pipeline_dir.join("libfoo.rlib"), b"fake rlib").unwrap(); + fs::write(pipeline_dir.join("libfoo.rmeta"), b"fake rmeta").unwrap(); + fs::write(pipeline_dir.join("libfoo.d"), b"fake dep-info").unwrap(); + + copy_all_outputs_to_sandbox(&pipeline_dir, &sandbox_dir, out_rel).unwrap(); + + let dest = sandbox_dir.join(out_rel); + assert!(dest.join("libfoo.rlib").exists()); + assert!(dest.join("libfoo.rmeta").exists()); + assert!(dest.join("libfoo.d").exists()); + + let _ = fs::remove_dir_all(&tmp); +} + +#[test] +#[cfg(unix)] +fn test_copy_all_outputs_to_sandbox_prefers_hardlinks() { + use std::fs; + use std::os::unix::fs::MetadataExt; + + let tmp = std::env::temp_dir().join("pw_test_copy_all_outputs_to_sandbox_prefers_hardlinks"); + let pipeline_dir = tmp.join("pipeline"); + let sandbox_dir = tmp.join("sandbox"); + let out_rel = "bazel-out/k8/bin/pkg"; + + fs::create_dir_all(&pipeline_dir).unwrap(); + fs::create_dir_all(&sandbox_dir).unwrap(); + + let src = pipeline_dir.join("libfoo.rlib"); + fs::write(&src, b"fake rlib").unwrap(); + + copy_all_outputs_to_sandbox(&pipeline_dir, &sandbox_dir, out_rel).unwrap(); + + let dest = sandbox_dir.join(out_rel).join("libfoo.rlib"); + assert!(dest.exists()); + assert_eq!( + fs::metadata(&src).unwrap().ino(), + fs::metadata(&dest).unwrap().ino() + ); + + let _ = fs::remove_dir_all(&tmp); +} + +#[test] +#[cfg(unix)] +fn test_seed_sandbox_cache_root() { + use std::fs; + + let tmp = std::env::temp_dir().join("pw_test_seed_sandbox_cache_root"); + let sandbox_dir = tmp.join("sandbox"); + let cache_repo = tmp.join("cache/repos/v1/contents/hash/repo"); + fs::create_dir_all(&sandbox_dir).unwrap(); + fs::create_dir_all(cache_repo.join("tool/src")).unwrap(); + symlink_path(&cache_repo, &sandbox_dir.join("external_repo"), true).unwrap(); + + seed_sandbox_cache_root(&sandbox_dir).unwrap(); + + let cache_link = sandbox_dir.join("cache"); + assert!(cache_link.exists()); + assert_eq!(cache_link.canonicalize().unwrap(), tmp.join("cache")); + + let _ = fs::remove_dir_all(&tmp); +} + +// --- assemble_request_argv tests --- + +/// Happy-path: relocated pw flags move before `--`, pipelining flags stay after, +/// rustc args stay after. Covers the single-relocated, multi-relocated, +/// no-relocated, interleaved, and pipelining-stay-after cases in one assertion. +#[test] +fn test_assemble_request_argv_happy_path() { + let startup: Vec = vec![ + "--subst".into(), + "pwd=${pwd}".into(), + "--".into(), + "rustc".into(), + ]; + let request: Vec = vec![ + "--output-file".into(), + "out.rmeta".into(), + "--env-file".into(), + "build.env".into(), + "--stable-status-file".into(), + "stable.txt".into(), + "--volatile-status-file".into(), + "volatile.txt".into(), + "--rustc-output-format".into(), + "rendered".into(), + "--pipelining-metadata".into(), + "--pipelining-key=abc123".into(), + "--crate-name=foo".into(), + "-Copt-level=2".into(), + "src/lib.rs".into(), + ]; + let result = assemble_request_argv(&startup, &request).unwrap(); + let sep = result.iter().position(|a| a == "--").unwrap(); + let before = &result[..sep]; + let after = &result[sep + 1..]; + + // Relocated pw flags are before -- + for flag in [ + "--output-file", + "--env-file", + "--stable-status-file", + "--volatile-status-file", + "--rustc-output-format", + ] { + assert!( + before.contains(&flag.to_string()), + "{flag} should be before --" + ); + } + + // Pipelining flags stay after -- + assert!(after.contains(&"--pipelining-metadata".to_string())); + assert!(after.contains(&"--pipelining-key=abc123".to_string())); + + // Rustc args stay after -- + assert!(after.contains(&"rustc".to_string())); + assert!(after.contains(&"--crate-name=foo".to_string())); + assert!(after.contains(&"-Copt-level=2".to_string())); + assert!(after.contains(&"src/lib.rs".to_string())); + + // pw flags are NOT after -- + assert!(!after.contains(&"--output-file".to_string())); + assert!(!after.contains(&"--env-file".to_string())); +} + +#[test] +fn test_assemble_request_argv_no_separator_is_error() { + let startup: Vec = vec!["--output-file".into(), "foo".into()]; + let request: Vec = vec!["src/lib.rs".into()]; + let err = assemble_request_argv(&startup, &request).unwrap_err(); + assert!( + err.to_string().contains("separator"), + "expected separator error, got: {err}" + ); +} + +#[test] +fn test_extract_direct_request_pw_flags_basic() { + let request: Vec = vec![ + "--output-file".into(), + "out.rmeta".into(), + "--crate-name=foo".into(), + "--stable-status-file".into(), + "stable.txt".into(), + ]; + let (remaining, pw) = extract_direct_request_pw_flags(&request); + assert_eq!(remaining, vec!["--crate-name=foo"]); + assert_eq!( + pw, + vec![ + "--output-file", + "out.rmeta", + "--stable-status-file", + "stable.txt" + ] + ); +} + +#[test] +fn test_split_startup_args_basic() { + let args: Vec = vec![ + "--subst".into(), + "pwd=${pwd}".into(), + "--".into(), + "/path/to/rustc".into(), + "-v".into(), + ]; + let layout = split_startup_args(&args).unwrap(); + assert_eq!(layout.pw_args, vec!["--subst", "pwd=${pwd}"]); + assert_eq!(layout.child_prefix, vec!["/path/to/rustc", "-v"]); +} + +#[test] +fn test_split_startup_args_no_separator_is_error() { + let args: Vec = vec!["--subst".into(), "pwd=${pwd}".into()]; + let err = split_startup_args(&args).unwrap_err(); + assert!( + err.to_string().contains("separator"), + "expected separator error, got: {err}" + ); +} + +/// Regression: build_response blanked output for exit_code==0, silently +/// discarding rustc warnings from successful compilations. +#[test] +fn test_build_response_preserves_warnings_on_success() { + let warning = "warning: unused variable `x`"; + let response = build_response(0, warning, RequestId(42)); + let parsed = parse_json(&response); + let JsonValue::Object(map) = parsed else { + panic!("expected object response"); + }; + let Some(JsonValue::String(output)) = map.get("output") else { + panic!("expected string output"); + }; + assert_eq!( + output, warning, + "build_response should preserve warnings on success (exit_code=0)" + ); +} + +// --------------------------------------------------------------------------- +// RustcInvocation tests +// --------------------------------------------------------------------------- + +#[test] +fn test_invocation_pending_to_running() { + let inv = RustcInvocation::new(); + assert!(inv.is_pending()); +} + +#[test] +fn test_invocation_shutdown_from_pending() { + let inv = RustcInvocation::new(); + inv.request_shutdown(); + assert!(inv.is_shutting_down_or_terminal()); +} + +// --------------------------------------------------------------------------- +// spawn_pipelined_rustc tests +// --------------------------------------------------------------------------- + +#[test] +#[cfg(unix)] +fn test_rustc_thread_pipelined_completes() { + use super::invocation::InvocationDirs; + use super::rustc_driver::spawn_pipelined_rustc; + use std::process::{Command, Stdio}; + + let child = Command::new("sh") + .arg("-c") + .arg(r#"echo '{"artifact":"/tmp/test.rmeta","emit":"metadata"}' >&2; exit 0"#) + .stdout(Stdio::null()) + .stderr(Stdio::piped()) + .spawn() + .unwrap(); + + let dirs = InvocationDirs { + pipeline_output_dir: PathBuf::from("/tmp"), + pipeline_root_dir: PathBuf::from("/tmp"), + original_out_dir: OutputDir::default(), + }; + + let inv = spawn_pipelined_rustc(child, dirs.clone(), None); + + let meta = inv.wait_for_metadata(); + assert!(meta.is_ok(), "metadata should be ready"); + + let result = inv.wait_for_completion(); + assert!(result.is_ok(), "invocation should complete"); + assert_eq!(result.unwrap().exit_code, 0); +} + +#[test] +#[cfg(unix)] +fn test_rustc_thread_failure_before_rmeta() { + use super::invocation::InvocationDirs; + use super::rustc_driver::spawn_pipelined_rustc; + use std::process::{Command, Stdio}; + + let child = Command::new("sh") + .arg("-c") + .arg("echo 'error: something broke' >&2; exit 1") + .stdout(Stdio::null()) + .stderr(Stdio::piped()) + .spawn() + .unwrap(); + + let dirs = InvocationDirs { + pipeline_output_dir: PathBuf::from("/tmp"), + pipeline_root_dir: PathBuf::from("/tmp"), + original_out_dir: OutputDir::default(), + }; + + let inv = spawn_pipelined_rustc(child, dirs, None); + + let err = inv.wait_for_metadata().unwrap_err(); + assert_eq!(err.exit_code, 1); + assert!( + err.diagnostics.contains("something broke"), + "expected 'something broke' in diagnostics, got: {}", + err.diagnostics, + ); + + // wait_for_completion ensures the thread finishes. + let _ = inv.wait_for_completion(); +} + +#[test] +#[cfg(unix)] +fn test_rustc_thread_shutdown_kills_child() { + use super::invocation::InvocationDirs; + use super::rustc_driver::spawn_pipelined_rustc; + use std::process::{Command, Stdio}; + + // sleep produces no stderr output, so read_line blocks until child is killed. + let child = Command::new("sleep") + .arg("60") + .stdout(Stdio::null()) + .stderr(Stdio::piped()) + .spawn() + .unwrap(); + + let dirs = InvocationDirs { + pipeline_output_dir: PathBuf::from("/tmp"), + pipeline_root_dir: PathBuf::from("/tmp"), + original_out_dir: OutputDir::default(), + }; + + let inv = spawn_pipelined_rustc(child, dirs, None); + + // Give rustc thread time to start reading stderr. + std::thread::sleep(std::time::Duration::from_millis(50)); + + // Request shutdown — this sends SIGTERM to the child, unblocking read_line. + inv.request_shutdown(); + + // wait_for_completion should return failure (shutdown requested). + let err = inv.wait_for_completion().unwrap_err(); + assert_eq!(err.exit_code, -1, "shutdown should produce exit_code -1"); +} + +// --------------------------------------------------------------------------- +// spawn_non_pipelined_rustc tests +// --------------------------------------------------------------------------- + +#[test] +#[cfg(unix)] +fn test_rustc_thread_non_pipelined_completes() { + use super::rustc_driver::spawn_non_pipelined_rustc; + use std::process::{Command, Stdio}; + + let child = Command::new("sh") + .arg("-c") + .arg("echo 'hello' >&2; echo 'world'; exit 0") + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .unwrap(); + + let inv = spawn_non_pipelined_rustc(child); + + let result = inv.wait_for_completion(); + assert!(result.is_ok()); + let completion = result.unwrap(); + assert_eq!(completion.exit_code, 0); + assert!( + completion.diagnostics.contains("hello"), + "should capture stderr" + ); + assert!( + completion.diagnostics.contains("world"), + "should capture stdout" + ); +} + +#[test] +#[cfg(unix)] +fn test_rustc_thread_non_pipelined_fails() { + use super::rustc_driver::spawn_non_pipelined_rustc; + use std::process::{Command, Stdio}; + + let child = Command::new("sh") + .arg("-c") + .arg("echo 'error msg' >&2; exit 1") + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .unwrap(); + + let inv = spawn_non_pipelined_rustc(child); + + let result = inv.wait_for_completion(); + assert!(result.is_err()); + let failure = result.unwrap_err(); + assert_eq!(failure.exit_code, 1); + assert!( + failure.diagnostics.contains("error msg"), + "should capture stderr on failure" + ); +} + +#[test] +#[cfg(unix)] +fn test_cancel_non_pipelined_kills_child() { + use super::rustc_driver::spawn_non_pipelined_rustc; + use std::process::{Command, Stdio}; + + let child = Command::new("sleep") + .arg("60") + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .unwrap(); + + let inv = spawn_non_pipelined_rustc(child); + + std::thread::sleep(std::time::Duration::from_millis(50)); + inv.request_shutdown(); + + let err = inv.wait_for_completion().unwrap_err(); + assert_eq!(err.exit_code, -1, "shutdown should produce exit_code -1"); +} + +// --------------------------------------------------------------------------- +// RequestCoordinator tests (public API only) +// --------------------------------------------------------------------------- + +#[test] +fn test_registry_cancel_shuts_down_invocation() { + let mut reg = RequestCoordinator::default(); + reg.requests + .insert(RequestId(42), Some(PipelineKey("key1".to_string()))); + let inv = Arc::new(RustcInvocation::new()); + reg.invocations + .insert(PipelineKey("key1".to_string()), Arc::clone(&inv)); + assert!(reg.cancel(RequestId(42))); + assert!(inv.is_shutting_down_or_terminal()); + // Second cancel returns false — already claimed. + assert!(!reg.cancel(RequestId(42))); +} + +#[test] +fn test_registry_shutdown_all() { + let mut reg = RequestCoordinator::default(); + reg.requests + .insert(RequestId(42), Some(PipelineKey("key1".to_string()))); + let inv1 = Arc::new(RustcInvocation::new()); + reg.invocations + .insert(PipelineKey("key1".to_string()), Arc::clone(&inv1)); + reg.requests + .insert(RequestId(43), Some(PipelineKey("key2".to_string()))); + reg.invocations.insert( + PipelineKey("key2".to_string()), + Arc::new(RustcInvocation::new()), + ); + reg.shutdown_all(); + assert!(inv1.is_shutting_down_or_terminal()); +} + +// --------------------------------------------------------------------------- +// Regression: metadata cleanup must preserve invocation for full request +// --------------------------------------------------------------------------- + +/// Covers the key lifecycle regression: metadata completes, full request still +/// finds the invocation; metadata panic shuts down invocation but doesn't +/// orphan the full request entry. +#[test] +fn test_metadata_lifecycle_preserves_full_request() { + let mut reg = RequestCoordinator::default(); + let key = PipelineKey("key1".to_string()); + let inv = Arc::new(RustcInvocation::new()); + + // Register metadata (42) and full (99) for the same pipeline key. + reg.requests.insert(RequestId(42), Some(key.clone())); + reg.invocations.insert(key.clone(), Arc::clone(&inv)); + reg.requests.insert(RequestId(99), Some(key.clone())); + + // Metadata completes — claim response. + assert!(reg.requests.remove(&RequestId(42)).is_some()); + // Invocation persists for full request. + assert!(reg.invocations.contains_key(&key)); + assert!(reg.requests.contains_key(&RequestId(99))); + + // Simulate panic on a second metadata: shutdown invocation, claim response. + reg.requests.insert(RequestId(50), Some(key.clone())); + inv.request_shutdown(); + assert!(reg.requests.remove(&RequestId(50)).is_some()); + // Invocation still present (full can discover it failed). + assert!(reg.invocations.contains_key(&key)); + assert!(reg.requests.contains_key(&RequestId(99))); +} + +/// Regression: graceful_kill should send SIGTERM first, giving the child a +/// chance to clean up before resorting to SIGKILL. +#[test] +#[cfg(unix)] +fn test_graceful_kill_sigterm_then_sigkill() { + use super::exec::graceful_kill; + use std::process::Command; + use std::time::Instant; + + // Spawn a process that traps SIGTERM and exits cleanly. + // `sleep` runs in the background so the shell's trap handler can fire + // immediately when SIGTERM arrives (foreground `sleep` blocks trap dispatch). + let mut child = Command::new("sh") + .arg("-c") + .arg("trap 'exit 0' TERM; while true; do sleep 60 & wait; done") + .spawn() + .unwrap(); + + // Give the shell time to set up the trap. + std::thread::sleep(std::time::Duration::from_millis(100)); + + let start = Instant::now(); + graceful_kill(&mut child); + let elapsed = start.elapsed(); + + // Should have exited quickly via SIGTERM (not waited 500ms for SIGKILL). + assert!( + elapsed.as_millis() < 400, + "graceful_kill should exit quickly when SIGTERM is handled: {}ms", + elapsed.as_millis() + ); +} diff --git a/util/process_wrapper/util.rs b/util/process_wrapper/util.rs index 7bab19ef79..14ee612018 100644 --- a/util/process_wrapper/util.rs +++ b/util/process_wrapper/util.rs @@ -28,9 +28,14 @@ pub(crate) fn read_file_to_array(path: &str) -> Result, String> { read_to_array(file) } -pub(crate) fn read_stamp_status_to_array(path: String) -> Result, String> { - let file = File::open(path).map_err(|e| e.to_string())?; - stamp_status_to_array(file) +pub(crate) fn read_stamp_status_with_context( + path: &str, + label: &str, +) -> Result, String> { + let file = File::open(path).map_err(|e| { + format!("failed to read {label} '{}': {e}", path) + })?; + stamp_status_to_array(file).map_err(|e| format!("failed to read {label} '{}': {e}", path)) } fn read_to_array(reader: impl Read) -> Result, String> { @@ -42,20 +47,17 @@ fn read_to_array(reader: impl Read) -> Result, String> { if line.is_empty() { continue; } - // a \ at the end of a line allows us to escape the new line break, - // \\ yields a single \, so \\\ translates to a single \ and a new line - // escape + // Odd trailing backslashes escape the newline; even counts do not. let end_backslash_count = line.chars().rev().take_while(|&c| c == '\\').count(); - // a 0 or even number of backslashes do not lead to a new line escape let escape = end_backslash_count % 2 == 1; - // remove backslashes and add back two for every one + // Keep escaped newlines and collapse escaped backslashes. let l = line.trim_end_matches('\\'); escaped_line.push_str(l); for _ in 0..end_backslash_count / 2 { escaped_line.push('\\'); } if escape { - // we add a newline as we expect a line after this + // Preserve the logical newline for the next physical line. escaped_line.push('\n'); } else { ret.push(escaped_line); @@ -78,34 +80,85 @@ fn stamp_status_to_array(reader: impl Read) -> Result, Str .collect() } -#[cfg(test)] -mod test { - use super::*; +/// Consolidates `-Ldependency` contents into one directory on Windows. +/// +/// Returns the number of files linked or copied, skipping missing directories +/// and duplicate filenames. +#[cfg(windows)] +pub(crate) fn consolidate_deps_into( + dependency_dirs: &[impl AsRef], + unified_dir: &std::path::Path, +) -> usize { + use std::collections::HashSet; - #[test] - fn test_read_to_array() { - let input = r"some escaped \\\ -string -with other lines" - .to_owned(); - let expected = vec![ - r"some escaped \ -string", - "with other lines", - ]; - let got = read_to_array(input.as_bytes()).unwrap(); - assert_eq!(expected, got); + let mut seen = HashSet::new(); + let mut count = 0usize; + for dir in dependency_dirs { + let entries = match std::fs::read_dir(dir.as_ref()) { + Ok(e) => e, + Err(e) => { + eprintln!( + "consolidate_deps: skipping {}: {}", + dir.as_ref().display(), + e + ); + continue; + } + }; + for entry in entries.flatten() { + let file_type = match entry.file_type() { + Ok(ft) => ft, + Err(_) => continue, + }; + if !(file_type.is_file() || file_type.is_symlink()) { + continue; + } + let file_name = entry.file_name(); + let file_name_lower = file_name.to_string_lossy().to_ascii_lowercase(); + if !seen.insert(file_name_lower) { + continue; + } + let dest = unified_dir.join(&file_name); + let src = entry.path(); + match std::fs::hard_link(&src, &dest) { + Ok(_) => {} + Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => {} + Err(e) => { + eprintln!( + "consolidate_deps: hard_link {} -> {} failed ({}), falling back to copy", + src.display(), + dest.display(), + e + ); + if let Err(copy_err) = std::fs::copy(&src, &dest) { + eprintln!( + "consolidate_deps: copy {} -> {} also failed: {}", + src.display(), + dest.display(), + copy_err + ); + } + } + } + count += 1; + } } + count +} - #[test] - fn test_stamp_status_to_array() { - let lines = "aaa bbb\\\nvvv\nccc ddd\neee fff"; - let got = stamp_status_to_array(lines.as_bytes()).unwrap(); - let expected = vec![ - ("aaa".to_owned(), "bbb\nvvv".to_owned()), - ("ccc".to_owned(), "ddd".to_owned()), - ("eee".to_owned(), "fff".to_owned()), - ]; - assert_eq!(expected, got); +/// Applies `${key}` -> `value` substitutions to `s`. +/// +/// On Windows, also normalizes `/` after verbatim-path substitutions. +pub(crate) fn apply_substitutions(s: &mut String, subst: &[(String, String)]) { + for (k, v) in subst { + *s = s.replace(&format!("${{{k}}}"), v); + } + #[cfg(windows)] + if s.contains(r"\\?\") { + *s = s.replace('/', r"\"); } } + +#[cfg(test)] +#[path = "test/util.rs"] +mod test; diff --git a/util/process_wrapper/worker.rs b/util/process_wrapper/worker.rs new file mode 100644 index 0000000000..e2efaab24e --- /dev/null +++ b/util/process_wrapper/worker.rs @@ -0,0 +1,454 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Bazel JSON persistent worker implementation. + +#[path = "worker_args.rs"] +pub(crate) mod args; +#[path = "worker_exec.rs"] +pub(crate) mod exec; +#[path = "worker_invocation.rs"] +pub(crate) mod invocation; +#[path = "worker_logging.rs"] +pub(crate) mod logging; +#[path = "worker_pipeline.rs"] +pub(crate) mod pipeline; +#[path = "worker_protocol.rs"] +pub(crate) mod protocol; +#[path = "worker_request.rs"] +pub(crate) mod request; +#[path = "worker_rustc.rs"] +pub(crate) mod rustc_driver; +#[path = "worker_sandbox.rs"] +pub(crate) mod sandbox; +#[path = "worker_types.rs"] +pub(crate) mod types; + +use std::collections::HashMap; +use std::io::BufRead; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex}; + +use crate::ProcessWrapperError; + +use args::assemble_request_argv; +use exec::{prepare_outputs, run_request}; +use logging::{ + append_worker_lifecycle_log, current_pid, current_thread_label, install_worker_panic_hook, + log_request_received, log_request_thread_start, WorkerLifecycleGuard, +}; +use pipeline::WorkerStateRoots; +use protocol::{ + build_cancel_response, build_response, parse_request_line, write_worker_response, SharedStdout, +}; +use request::{RequestExecutor, RequestKind, WorkRequest}; + +use self::invocation::RustcInvocation; +use self::types::{PipelineKey, RequestId}; + +/// Thread-safe shared handle to the `RequestCoordinator`. +type SharedRequestCoordinator = Arc>; + +pub(super) const REGISTRY_MUTEX_POISONED: &str = "request registry mutex poisoned"; + +/// Shared state for request threads and rustc threads. +#[derive(Default)] +struct RequestCoordinator { + /// Pipeline key -> shared invocation. + invocations: HashMap>, + /// All in-flight requests. Value is `Some(key)` for pipelined requests, + /// `None` for non-pipelined. Presence in this map means the request is + /// active and no response has been sent yet. Removal IS the atomic claim — + /// whoever removes the entry owns the right to send the `WorkResponse`. + requests: HashMap>, +} + +impl RequestCoordinator { + /// Cancels a request and shuts down the associated invocation. + /// Returns `true` if the cancel was claimed (caller should send the cancel + /// response), `false` if the request already completed. + fn cancel(&mut self, request_id: RequestId) -> bool { + if let Some(maybe_key) = self.requests.remove(&request_id) { + if let Some(key) = maybe_key + && let Some(inv) = self.invocations.get(&key) + { + inv.request_shutdown(); + } + true + } else { + false + } + } + + /// Requests shutdown for all tracked invocations and clears the registry. + fn shutdown_all(&mut self) { + for inv in self.invocations.values() { + inv.request_shutdown(); + } + self.invocations.clear(); + self.requests.clear(); + } +} + +static WORKER_SHUTTING_DOWN: AtomicBool = AtomicBool::new(false); + +#[cfg(unix)] +const SIG_TERM: i32 = 15; + +#[cfg(unix)] +unsafe extern "C" { + fn signal(signum: i32, handler: usize) -> usize; + fn close(fd: i32) -> i32; +} + +pub(crate) fn worker_is_shutting_down() -> bool { + WORKER_SHUTTING_DOWN.load(Ordering::SeqCst) +} + +fn begin_worker_shutdown(reason: &str) { + if WORKER_SHUTTING_DOWN + .compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst) + .is_ok() + { + append_worker_lifecycle_log(&format!( + "pid={} event=shutdown_begin thread={} reason={}", + current_pid(), + current_thread_label(), + reason, + )); + } +} + +#[cfg(unix)] +extern "C" fn worker_signal_handler(_signum: i32) { + WORKER_SHUTTING_DOWN.store(true, Ordering::SeqCst); + unsafe { + close(0); + } // Unblock the reader loop. +} + +#[cfg(unix)] +fn install_worker_signal_handlers() { + static ONCE: std::sync::Once = std::sync::Once::new(); + ONCE.call_once(|| unsafe { + signal(SIG_TERM, worker_signal_handler as *const () as usize); + }); +} + +#[cfg(not(unix))] +fn install_worker_signal_handlers() {} + +fn execute_singleplex_request( + self_path: &std::path::Path, + startup_args: &[String], + request: &WorkRequest, + stdout: &SharedStdout, +) -> Result<(), ProcessWrapperError> { + let full_args = assemble_request_argv(startup_args, &request.arguments)?; + prepare_outputs(&full_args, None); + let (exit_code, output) = run_request(self_path, full_args, None, "process_wrapper subprocess")?; + let response = build_response(exit_code, &output, request.request_id); + write_worker_response(stdout, &response)?; + append_worker_lifecycle_log(&format!( + "pid={} thread={} request_complete request_id={} exit_code={} output_bytes={} mode=singleplex", + current_pid(), + current_thread_label(), + request.request_id, + exit_code, + output.len(), + )); + Ok(()) +} + +/// Runs one multiplex request on a detached thread. +fn run_request_thread( + self_path: std::path::PathBuf, + startup_args: Vec, + request: WorkRequest, + request_executor: RequestExecutor, + stdout: SharedStdout, + registry: SharedRequestCoordinator, + state_roots: Arc, +) { + log_request_thread_start(&request, &request_executor.kind); + + // Once shutdown starts, just clean up; Bazel will not read more responses. + if worker_is_shutting_down() { + registry + .lock() + .expect(REGISTRY_MUTEX_POISONED) + .requests.remove(&request.request_id); + return; + } + + let (exit_code, output) = match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + let full_args = match assemble_request_argv(&startup_args, &request.arguments) { + Ok(args) => args, + Err(e) => return (1, format!("worker thread error: {e}")), + }; + let base_dir = if request.sandbox_dir.is_some() { + match request.base_dir() { + Ok(dir) => Some(dir), + Err(e) => return (1, format!("worker thread error: {e}")), + } + } else { + None + }; + prepare_outputs(&full_args, base_dir.as_deref()); + + if !registry + .lock() + .expect(REGISTRY_MUTEX_POISONED) + .requests.contains_key(&request.request_id) + { + return (0, String::new()); + } + + match &request_executor.kind { + RequestKind::Metadata { .. } => { + request_executor.execute_metadata(&request, full_args, &state_roots, ®istry) + } + RequestKind::Full { .. } => { + request_executor.execute_full(&request, full_args, &self_path) + } + RequestKind::NonPipelined => request_executor.execute_non_pipelined( + full_args, + &self_path, + request.sandbox_dir.as_ref().map(|d| d.as_str()), + ), + } + })) { + Ok(result) => result, + Err(_) => { + let reg = registry.lock().expect(REGISTRY_MUTEX_POISONED); + if let Some(inv) = &request_executor.invocation { + inv.request_shutdown(); + } + if let Some(key) = request_executor.kind.key() { + if let Some(inv) = reg.invocations.get(key) { + inv.request_shutdown(); + } + } + drop(reg); + (1, "internal error: worker thread panicked".to_string()) + } + }; + + // Claim the right to respond, then clean up invocation state. + let should_respond = { + let mut reg = registry.lock().expect(REGISTRY_MUTEX_POISONED); + let claimed = reg.requests.remove(&request.request_id).is_some(); + if let Some(key) = request_executor.kind.key() { + if !matches!(request_executor.kind, RequestKind::Metadata { .. }) { + reg.invocations.remove(key); + } + } + claimed + }; + if should_respond { + let response = build_response(exit_code, &output, request.request_id); + if let Err(e) = write_worker_response(&stdout, &response) { + append_worker_lifecycle_log(&format!( + "pid={} event=response_write_failed thread={} request_id={} error={}", + current_pid(), + current_thread_label(), + request.request_id, + e, + )); + } + } + append_worker_lifecycle_log(&format!( + "pid={} thread={} request_thread_complete request_id={} exit_code={} output_bytes={} responded={}", + current_pid(), + current_thread_label(), + request.request_id, + exit_code, + output.len(), + should_respond, + )); +} + +pub(crate) fn worker_main() -> Result<(), ProcessWrapperError> { + let request_counter = Arc::new(AtomicUsize::new(0)); + install_worker_panic_hook(); + let _lifecycle = + WorkerLifecycleGuard::new(&std::env::args().collect::>(), &request_counter); + install_worker_signal_handlers(); + + let self_path = std::env::current_exe() + .map_err(|e| ProcessWrapperError(format!("failed to get worker executable path: {e}")))?; + + let startup_args: Vec = std::env::args() + .skip(1) + .filter(|arg| arg != "--persistent_worker") + .collect(); + + let stdin = std::io::stdin(); + let stdout: SharedStdout = Arc::new(Mutex::new(())); + let registry: SharedRequestCoordinator = Arc::new(Mutex::new(RequestCoordinator::default())); + let state_roots = Arc::new(WorkerStateRoots::ensure()?); + + for line in stdin.lock().lines() { + let line = match line { + Ok(line) => line, + Err(e) => { + begin_worker_shutdown("stdin_read_error"); + append_worker_lifecycle_log(&format!( + "pid={} event=stdin_read_error thread={} error={}", + current_pid(), + current_thread_label(), + e + )); + return Err(ProcessWrapperError(format!( + "failed to read WorkRequest: {e}" + ))); + } + }; + if line.is_empty() { + continue; + } + if worker_is_shutting_down() { + append_worker_lifecycle_log(&format!( + "pid={} event=request_ignored_for_shutdown thread={} bytes={}", + current_pid(), + current_thread_label(), + line.len(), + )); + break; + } + request_counter.fetch_add(1, Ordering::SeqCst); + + let request = match parse_request_line(&line, &stdout) { + Some(request) => request, + None => continue, + }; + let request_kind = match assemble_request_argv(&startup_args, &request.arguments) + .and_then(|full_args| { + let base_dir = request.base_dir().map_err(ProcessWrapperError)?; + Ok(RequestKind::parse_in_dir(&full_args, &base_dir)) + }) { + Ok(kind) => kind, + Err(e) => { + let response = build_response(1, &e.to_string(), request.request_id); + if let Err(we) = write_worker_response(&stdout, &response) { + append_worker_lifecycle_log(&format!( + "pid={} event=response_write_failed thread={} request_id={} error={}", + current_pid(), + current_thread_label(), + request.request_id, + we, + )); + } + continue; + } + }; + log_request_received(&request, &request_kind); + + if request.request_id.is_singleplex() { + execute_singleplex_request(&self_path, &startup_args, &request, &stdout)?; + continue; + } + + if request.cancel { + let should_respond = registry + .lock() + .expect(REGISTRY_MUTEX_POISONED) + .cancel(request.request_id); + if should_respond { + let response = build_cancel_response(request.request_id); + if let Err(e) = write_worker_response(&stdout, &response) { + append_worker_lifecycle_log(&format!( + "pid={} event=response_write_failed thread={} request_id={} error={}", + current_pid(), + current_thread_label(), + request.request_id, + e, + )); + } + } + continue; + } + + let invocation = { + let mut reg = registry.lock().expect(REGISTRY_MUTEX_POISONED); + reg.requests.insert(request.request_id, request_kind.key().cloned()); + request_kind.key().and_then(|k| reg.invocations.get(k).map(Arc::clone)) + }; + let request_executor = RequestExecutor::new(request_kind.clone(), invocation); + match std::thread::Builder::new().spawn({ + let self_path = self_path.clone(); + let startup_args = startup_args.clone(); + let request = request.clone(); + let stdout = Arc::clone(&stdout); + let registry = Arc::clone(®istry); + let state_roots = Arc::clone(&state_roots); + move || { + run_request_thread( + self_path, + startup_args, + request, + request_executor, + stdout, + registry, + state_roots, + ) + } + }) { + Ok(_) => {} + Err(e) => { + // Thread spawn failed — remove registry entry and respond inline. + // Without this, Bazel hangs forever: WorkerMultiplexer.getResponse() + // calls waitForResponse.acquire() with no timeout, and the worker + // process is still alive so destroyProcess() recovery won't trigger. + registry + .lock() + .expect(REGISTRY_MUTEX_POISONED) + .requests + .remove(&request.request_id); + let response = build_response( + 1, + &format!("failed to spawn worker thread: {e}"), + request.request_id, + ); + let _ = write_worker_response(&stdout, &response); + append_worker_lifecycle_log(&format!( + "pid={} event=thread_spawn_failed thread={} request_id={} error={}", + current_pid(), + current_thread_label(), + request.request_id, + e, + )); + } + } + } + + begin_worker_shutdown("stdin_eof"); + registry + .lock() + .expect(REGISTRY_MUTEX_POISONED) + .shutdown_all(); + + append_worker_lifecycle_log(&format!( + "pid={} event=stdin_eof thread={} requests_seen={}", + current_pid(), + current_thread_label(), + request_counter.load(Ordering::SeqCst), + )); + + Ok(()) +} + +#[cfg(test)] +#[path = "test/worker.rs"] +mod test; diff --git a/util/process_wrapper/worker_args.rs b/util/process_wrapper/worker_args.rs new file mode 100644 index 0000000000..c4b07c8118 --- /dev/null +++ b/util/process_wrapper/worker_args.rs @@ -0,0 +1,312 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Argument parsing and rewriting for the persistent worker. + +use crate::options::{ + is_pipelining_flag, is_relocated_pw_flag, parse_pw_args, NormalizedRustcMetadata, + OptionError, ParsedPwArgs, RelocatedPwFlags, +}; +use crate::pw_args::{ + normalize_args_recursive, ParamFileReadErrorMode, + PIPELINING_FULL_FLAG, PIPELINING_KEY_PREFIX, PIPELINING_METADATA_FLAG, +}; +use crate::ProcessWrapperError; + +use super::exec::{resolve_request_relative_path, ExpandedRustcOutputs}; +use super::pipeline::pipelining_err; +use super::request::{RequestKind, WorkRequest}; +use super::types::{OutputDir, PipelineKey}; + +/// Scans an iterator of argument strings for pipelining flags and returns a +/// classified `RequestKind`. +pub(super) fn scan_pipelining_flags<'a>(iter: impl Iterator) -> RequestKind { + let mut is_metadata = false; + let mut is_full = false; + let mut key: Option = None; + for arg in iter { + if arg == PIPELINING_METADATA_FLAG { + is_metadata = true; + } else if arg == PIPELINING_FULL_FLAG { + is_full = true; + } else if let Some(k) = arg.strip_prefix(PIPELINING_KEY_PREFIX) { + key = Some(k.to_string()); + } + } + match (is_metadata, is_full, key) { + (true, _, Some(k)) => RequestKind::Metadata { + key: PipelineKey(k), + }, + (_, true, Some(k)) => RequestKind::Full { + key: PipelineKey(k), + }, + _ => RequestKind::NonPipelined, + } +} + +/// Strips pipelining protocol flags from a direct arg list. +pub(super) fn strip_pipelining_flags(args: &[String]) -> Vec { + args.iter() + .filter(|a| !is_pipelining_flag(a)) + .cloned() + .collect() +} + +/// Startup args split at `--`. +#[derive(Debug)] +pub(super) struct StartupLayout { + /// Process-wrapper flags before `--` (e.g. `["--subst", "pwd=${pwd}"]`). + pub(super) pw_args: Vec, + /// Child-program prefix after `--` (e.g. `["/path/to/rustc"]`). + pub(super) child_prefix: Vec, +} + +/// Splits startup args at the `--` boundary. +pub(super) fn split_startup_args( + startup_args: &[String], +) -> Result { + let mut parts = startup_args.splitn(2, |a| a == "--"); + let pw_args = parts.next().unwrap().to_vec(); + let child_prefix = parts + .next() + .ok_or_else(|| ProcessWrapperError("startup args missing '--' separator".into()))? + .to_vec(); + Ok(StartupLayout { + pw_args, + child_prefix, + }) +} + +/// Splits per-request process_wrapper flags from child args. +pub(super) fn extract_direct_request_pw_flags( + request_args: &[String], +) -> (Vec, Vec) { + let mut remaining = Vec::new(); + let mut pw_pairs = Vec::new(); + let mut iter = request_args.iter(); + while let Some(arg) = iter.next() { + if is_relocated_pw_flag(arg) { + pw_pairs.push(arg.clone()); + if let Some(val) = iter.next() { + pw_pairs.push(val.clone()); + } + } else { + remaining.push(arg.clone()); + } + } + (remaining, pw_pairs) +} + +/// Combines startup args with per-request args into the final argv. +pub(super) fn assemble_request_argv( + startup_args: &[String], + request_args: &[String], +) -> Result, ProcessWrapperError> { + let layout = split_startup_args(startup_args)?; + let (remaining_child, direct_pw) = extract_direct_request_pw_flags(request_args); + Ok([ + layout.pw_args, + direct_pw, + vec!["--".into()], + layout.child_prefix, + remaining_child, + ] + .concat()) +} + +pub(super) fn expand_rustc_args_with_metadata( + rustc_and_after: &[String], + subst: &[(String, String)], + require_explicit_unstable_features: bool, + execroot_dir: &std::path::Path, +) -> Result<(Vec, NormalizedRustcMetadata), OptionError> { + let mut metadata = NormalizedRustcMetadata::default(); + let mut expanded = Vec::new(); + let mut read_file = |path: &str| { + let resolved = resolve_request_relative_path(path, Some(execroot_dir)) + .display() + .to_string(); + crate::util::read_file_to_array(&resolved).map_err(OptionError::Generic) + }; + let mut write_arg = |arg: String| { + expanded.push(arg); + Ok(()) + }; + normalize_args_recursive( + rustc_and_after.to_vec(), + subst, + &mut read_file, + ParamFileReadErrorMode::PreserveArg, + &mut write_arg, + &mut metadata, + )?; + if !metadata.has_allow_features && require_explicit_unstable_features { + expanded.push("-Zallow-features=".to_string()); + } + Ok((expanded, metadata)) +} + +pub(super) use crate::options::build_child_environment as build_rustc_env; + +/// Prepares rustc arguments: expand @paramfiles, apply substitutions, strip +/// pipelining flags, and append args from --arg-file files. +/// +/// Returns `(rustc_args, original_out_dir, relocated_pw_flags)` on success. +pub(super) fn prepare_rustc_args( + rustc_and_after: &[String], + pw_args: &ParsedPwArgs, + execroot_dir: &std::path::Path, +) -> Result<(Vec, OutputDir, RelocatedPwFlags), (i32, String)> { + let (mut rustc_args, metadata) = expand_rustc_args_with_metadata( + rustc_and_after, + &pw_args.subst, + pw_args.require_explicit_unstable_features, + execroot_dir, + ) + .map_err(|e| pipelining_err(e))?; + if rustc_args.is_empty() { + return Err(pipelining_err("no rustc arguments after expansion")); + } + + // Append args from any `--arg-file` inputs. + let mut arg_files = pw_args.arg_files.clone(); + arg_files.extend(metadata.relocated.arg_files.iter().cloned()); + for path in arg_files { + let resolved = resolve_request_relative_path(&path, Some(execroot_dir)); + let resolved = resolved.display().to_string(); + let lines = crate::util::read_file_to_array(&resolved) + .map_err(|e| (1, format!("failed to read arg-file '{}': {}", resolved, e)))?; + for line in lines { + rustc_args.push(apply_substs(&line, &pw_args.subst)); + } + } + + let original_out_dir = OutputDir(find_out_dir_in_expanded(&rustc_args).unwrap_or_default()); + + Ok((rustc_args, original_out_dir, metadata.relocated)) +} + +/// Resolves the request's declared `--out-dir`, including any `@paramfile` or +/// `--arg-file` contents, using the same path as metadata-request preparation. +pub(super) fn find_out_dir_in_request( + args: &[String], + base_dir: &std::path::Path, +) -> Option { + let filtered = strip_pipelining_flags(args); + let mut parts = filtered.splitn(2, |a| a == "--"); + let pw_raw = parts.next()?; + let rustc_and_after = parts.next().filter(|s| !s.is_empty())?; + let pw_args = parse_pw_args(pw_raw, base_dir); + let (_, out_dir, _) = prepare_rustc_args(rustc_and_after, &pw_args, base_dir).ok()?; + Some(out_dir) +} + +/// Rewrites output-related rustc args in one pass and returns the writable +/// paths needed by `prepare_expanded_rustc_outputs`. +pub(super) fn rewrite_expanded_rustc_outputs( + args: Vec, + new_out_dir: &std::path::Path, +) -> (Vec, ExpandedRustcOutputs) { + let mut rewritten = Vec::with_capacity(args.len()); + let mut outputs = ExpandedRustcOutputs::default(); + let rewritten_out_dir = new_out_dir.display().to_string(); + + for arg in args { + if arg.starts_with("--out-dir=") { + outputs.out_dir = Some(rewritten_out_dir.clone()); + rewritten.push(format!("--out-dir={rewritten_out_dir}")); + continue; + } + + let Some(emit) = arg.strip_prefix("--emit=") else { + rewritten.push(arg); + continue; + }; + + let mut rewritten_parts = Vec::new(); + for part in emit.split(',') { + let Some((kind, path)) = part.split_once('=') else { + rewritten_parts.push(part.to_owned()); + continue; + }; + + let path = if kind == "metadata" { + let filename = std::path::Path::new(path) + .file_name() + .unwrap_or_default() + .to_string_lossy() + .into_owned(); + new_out_dir.join(filename).display().to_string() + } else { + path.to_owned() + }; + outputs.emit_paths.push(path.clone()); + rewritten_parts.push(format!("{kind}={path}")); + } + rewritten.push(format!("--emit={}", rewritten_parts.join(","))); + } + + (rewritten, outputs) +} + +fn resolve_paths(paths: Vec, base: &std::path::Path) -> Vec { + paths + .into_iter() + .map(|p| { + resolve_request_relative_path(&p, Some(base)) + .display() + .to_string() + }) + .collect() +} + +pub(super) fn resolve_pw_args_for_request( + mut pw_args: ParsedPwArgs, + request: &WorkRequest, + execroot_dir: &std::path::Path, +) -> ParsedPwArgs { + let resolve = |path: String, base: &std::path::Path| -> String { + resolve_request_relative_path(&path, Some(base)) + .display() + .to_string() + }; + pw_args.env_files = resolve_paths(pw_args.env_files, execroot_dir); + pw_args.arg_files = resolve_paths(pw_args.arg_files, execroot_dir); + pw_args.stable_status_file = pw_args.stable_status_file.map(|p| resolve(p, execroot_dir)); + pw_args.volatile_status_file = pw_args + .volatile_status_file + .map(|p| resolve(p, execroot_dir)); + pw_args.output_file = pw_args.output_file.map(|path| { + let base = request + .sandbox_dir + .as_ref() + .map(|sd| sd.as_path()) + .unwrap_or(execroot_dir); + resolve(path, base) + }); + pw_args +} + +/// Applies substitutions to one argument string. +pub(super) fn apply_substs(arg: &str, subst: &[(String, String)]) -> String { + let mut a = arg.to_owned(); + crate::util::apply_substitutions(&mut a, subst); + a +} + +/// Searches already-expanded rustc args for `--out-dir=`. +pub(super) fn find_out_dir_in_expanded(args: &[String]) -> Option { + args.iter() + .find_map(|arg| arg.strip_prefix("--out-dir=").map(|d| d.to_string())) +} diff --git a/util/process_wrapper/worker_exec.rs b/util/process_wrapper/worker_exec.rs new file mode 100644 index 0000000000..8dddaaeac3 --- /dev/null +++ b/util/process_wrapper/worker_exec.rs @@ -0,0 +1,257 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Shared subprocess and filesystem helpers for the persistent worker. + +use std::path::{Path, PathBuf}; +use std::process::{Child, Command, Stdio}; +#[cfg(unix)] +use std::time::Duration; + +use crate::ProcessWrapperError; + +#[cfg(unix)] +unsafe extern "C" { + fn kill(pid: i32, sig: i32) -> i32; +} + +#[cfg(unix)] +pub(super) fn send_sigterm(pid: u32) { + if pid > i32::MAX as u32 { + return; // Prevent wrapping to negative (process group kill). + } + unsafe { + kill(pid as i32, 15); // SIGTERM + } +} + +#[cfg(not(unix))] +pub(super) fn send_sigterm(_pid: u32) { + // Non-Unix falls back to `Child::kill()` in `graceful_kill`. +} + +/// Send SIGTERM, poll try_wait for 500ms (10 x 50ms), then SIGKILL + wait. +pub(crate) fn graceful_kill(child: &mut Child) { + #[cfg(unix)] + { + send_sigterm(child.id()); + for _ in 0..10 { + match child.try_wait() { + Ok(Some(_)) => return, + _ => std::thread::sleep(Duration::from_millis(50)), + } + } + let _ = child.kill(); + let _ = child.wait(); + } + #[cfg(not(unix))] + { + let _ = child.kill(); + let _ = child.wait(); + } +} + +/// Returns `true` if both paths resolve to the same inode after canonicalization. +/// Returns `false` if either path doesn't exist or can't be canonicalized. +pub(super) fn is_same_file(a: &Path, b: &Path) -> bool { + match (a.canonicalize(), b.canonicalize()) { + (Ok(a), Ok(b)) => a == b, + _ => false, + } +} + +pub(super) fn resolve_request_relative_path( + path: &str, + request_base_dir: Option<&Path>, +) -> PathBuf { + match request_base_dir { + Some(base_dir) => { + let path = Path::new(path); + if path.is_absolute() { + path.to_path_buf() + } else { + base_dir.join(path) + } + } + None => PathBuf::from(path), + } +} + +pub(super) fn materialize_output_file(src: &Path, dest: &Path) -> Result { + if let Some(parent) = dest.parent() { + std::fs::create_dir_all(parent)?; + } + + // Avoid deleting the source when rustc already wrote to the destination. + if is_same_file(src, dest) { + return Ok(false); + } + + // Not a TOCTOU race: dest is a Bazel-declared output path owned exclusively by this + // action. The exists() check avoids EEXIST from hard_link on stale files from a + // previous run. If removal and linking were interleaved by another actor (which Bazel + // prevents), the hard_link/copy fallback below would still handle the failure safely. + if dest.exists() { + std::fs::remove_file(dest)?; + } + + match std::fs::hard_link(src, dest) { + Ok(()) => Ok(true), + Err(link_err) => match std::fs::copy(src, dest) { + Ok(_) => Ok(false), + Err(copy_err) => Err(std::io::Error::new( + copy_err.kind(), + format!( + "failed to materialize {} at {} via hardlink ({link_err}) or copy ({copy_err})", + src.display(), + dest.display(), + ), + )), + }, + } +} + +/// Makes files under each discovered `--out-dir` writable before a request runs. +/// +/// Bazel can leave prior outputs read-only, especially when metadata and full +/// actions reuse the same paths. This scans direct args, `--arg-file`, and +/// `@flagfile` contents. +/// +/// Safety: `--out-dir` values originate from rules_rust Starlark code (`rustc.bzl` +/// `construct_arguments`), not from user input. No path traversal validation needed. +/// +/// When `request_base_dir` is `Some`, relative paths in args are resolved against +/// that directory (used for sandboxed requests). When `None`, paths resolve +/// against the current working directory. +pub(super) fn prepare_outputs(args: &[String], request_base_dir: Option<&Path>) { + let mut out_dirs: Vec = Vec::new(); + + let mut args_iter = args.iter().peekable(); + while let Some(arg) = args_iter.next() { + if let Some(dir) = arg.strip_prefix("--out-dir=") { + out_dirs.push(resolve_request_relative_path(dir, request_base_dir)); + } else if let Some(flagfile_path) = arg.strip_prefix('@') { + let resolved = resolve_request_relative_path(flagfile_path, request_base_dir); + out_dirs.extend(scan_file_for_out_dir(&resolved, request_base_dir)); + } else if arg == "--arg-file" { + if let Some(path) = args_iter.peek() { + let resolved = resolve_request_relative_path(path, request_base_dir); + out_dirs.extend(scan_file_for_out_dir(&resolved, request_base_dir)); + args_iter.next(); + } + } + } + + for out_dir in &out_dirs { + make_dir_files_writable(out_dir); + make_dir_files_writable(&out_dir.join("_pipeline")); + } +} + +/// Reads a param/arg file and returns any `--out-dir=` values found. +pub(super) fn scan_file_for_out_dir( + argfile_path: &Path, + request_base_dir: Option<&Path>, +) -> Vec { + let Ok(content) = std::fs::read_to_string(argfile_path) else { + return Vec::new(); + }; + content + .lines() + .filter_map(|line| line.strip_prefix("--out-dir=")) + .map(|dir| resolve_request_relative_path(dir, request_base_dir)) + .collect() +} + +/// Makes all regular files in `dir` writable. +pub(super) fn make_dir_files_writable(dir: &Path) { + let Ok(entries) = std::fs::read_dir(dir) else { + return; + }; + for entry in entries.flatten() { + let Ok(meta) = entry.metadata() else { + continue; + }; + if !meta.is_file() || !meta.permissions().readonly() { + continue; + } + let mut perms = meta.permissions(); + perms.set_readonly(false); + let _ = std::fs::set_permissions(entry.path(), perms); + } +} + +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub(super) struct ExpandedRustcOutputs { + pub(super) out_dir: Option, + pub(super) emit_paths: Vec, +} + +pub(super) fn prepare_expanded_rustc_outputs(outputs: &ExpandedRustcOutputs) { + if let Some(dir) = outputs.out_dir.as_deref() { + let dir = Path::new(dir); + make_dir_files_writable(dir); + make_dir_files_writable(&dir.join("_pipeline")); + } + + for path in &outputs.emit_paths { + let path = Path::new(path); + if let Ok(meta) = std::fs::metadata(path) { + if meta.is_file() && meta.permissions().readonly() { + let mut perms = meta.permissions(); + perms.set_readonly(false); + let _ = std::fs::set_permissions(path, perms); + } + } + } +} + +/// Spawns a process_wrapper subprocess and returns the Child handle. +pub(super) fn spawn_request( + self_path: &Path, + arguments: Vec, + current_dir: Option<&str>, + context: &str, +) -> Result { + let mut command = Command::new(self_path); + command + .args(&arguments) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + if let Some(dir) = current_dir { + command.current_dir(dir); + } + command + .spawn() + .map_err(|e| ProcessWrapperError(format!("failed to spawn {context}: {e}"))) +} + +pub(super) fn run_request( + self_path: &Path, + arguments: Vec, + current_dir: Option<&str>, + context: &str, +) -> Result<(i32, String), ProcessWrapperError> { + let child = spawn_request(self_path, arguments, current_dir, context)?; + let output = child + .wait_with_output() + .map_err(|e| ProcessWrapperError(format!("failed to wait on {context}: {e}")))?; + let exit_code = output.status.code().unwrap_or(1); + let mut combined = String::from_utf8_lossy(&output.stdout).into_owned(); + let stderr = String::from_utf8_lossy(&output.stderr); + if !stderr.is_empty() { + combined.push_str(&stderr); + } + Ok((exit_code, combined)) +} diff --git a/util/process_wrapper/worker_invocation.rs b/util/process_wrapper/worker_invocation.rs new file mode 100644 index 0000000000..96682e82f0 --- /dev/null +++ b/util/process_wrapper/worker_invocation.rs @@ -0,0 +1,333 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Shared state for a single rustc invocation. + +use std::path::PathBuf; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Condvar, Mutex}; + +use super::exec::send_sigterm; +use super::types::OutputDir; + +const INVOCATION_MUTEX_POISONED: &str = "rustc invocation state mutex poisoned"; + +/// Directories associated with a pipelined invocation. +#[derive(Clone, Debug, Default)] +pub(crate) struct InvocationDirs { + pub pipeline_output_dir: PathBuf, + pub pipeline_root_dir: PathBuf, + pub original_out_dir: OutputDir, +} + +/// Returned from `wait_for_metadata` on success. +#[derive(Debug)] +pub(crate) struct MetadataOutput { + pub diagnostics_before: String, + /// Path to the .rmeta artifact (from rustc's artifact notification). + pub rmeta_path: Option, +} + +/// Returned from `wait_for_completion` on success. +#[derive(Debug)] +pub(crate) struct CompletionOutput { + pub exit_code: i32, + pub diagnostics: String, + pub dirs: InvocationDirs, +} + +/// Returned from wait methods on failure. +#[derive(Debug)] +pub(crate) struct FailureOutput { + pub exit_code: i32, + pub diagnostics: String, +} + +/// The lifecycle state of a single rustc invocation. +pub(crate) enum InvocationState { + Pending, + Running { + pid: u32, + dirs: InvocationDirs, + }, + MetadataReady { + pid: u32, + diagnostics_before: String, + rmeta_path: Option, + dirs: InvocationDirs, + }, + Completed { + exit_code: i32, + diagnostics: String, + dirs: InvocationDirs, + }, + Failed { + exit_code: i32, + diagnostics: String, + }, + ShuttingDown, +} + +impl InvocationState { + fn is_terminal(&self) -> bool { + matches!( + self, + InvocationState::Completed { .. } + | InvocationState::Failed { .. } + | InvocationState::ShuttingDown + ) + } + + /// Returns the child PID if the state has one (Running or MetadataReady). + fn pid(&self) -> Option { + match self { + InvocationState::Running { pid, .. } | InvocationState::MetadataReady { pid, .. } => { + Some(*pid) + } + _ => None, + } + } + + /// Takes the directories from states that carry them. + fn take_dirs(&mut self) -> InvocationDirs { + match self { + InvocationState::Running { dirs, .. } + | InvocationState::MetadataReady { dirs, .. } + | InvocationState::Completed { dirs, .. } => { + std::mem::take(dirs) + } + InvocationState::Pending + | InvocationState::Failed { .. } + | InvocationState::ShuttingDown => InvocationDirs::default(), + } + } + + /// Converts failed or shutting-down states to `FailureOutput`. + fn as_failure(&self) -> Option { + match self { + InvocationState::Completed { + exit_code, + diagnostics, + .. + } if *exit_code != 0 => Some(FailureOutput { + exit_code: *exit_code, + diagnostics: diagnostics.clone(), + }), + InvocationState::Failed { + exit_code, + diagnostics, + } => Some(FailureOutput { + exit_code: *exit_code, + diagnostics: diagnostics.clone(), + }), + InvocationState::ShuttingDown => Some(FailureOutput { + exit_code: -1, + diagnostics: "shutdown requested".to_string(), + }), + _ => None, + } + } + + /// Takes a metadata result from the state, moving data instead of cloning. + fn take_metadata_result(&mut self) -> Option> { + match self { + InvocationState::MetadataReady { + diagnostics_before, + rmeta_path, + .. + } => Some(Ok(MetadataOutput { + diagnostics_before: std::mem::take(diagnostics_before), + rmeta_path: rmeta_path.take(), + })), + InvocationState::Completed { + exit_code: 0, + diagnostics, + .. + } => Some(Ok(MetadataOutput { + diagnostics_before: std::mem::take(diagnostics), + rmeta_path: None, + })), + InvocationState::Pending | InvocationState::Running { .. } => None, + _ => self.as_failure().map(Err), + } + } + + /// Takes a completion result from the state, moving data instead of cloning. + fn take_completion_result(&mut self) -> Option> { + match self { + InvocationState::Completed { + exit_code, + diagnostics, + dirs, + } => Some(Ok(CompletionOutput { + exit_code: *exit_code, + diagnostics: std::mem::take(diagnostics), + dirs: std::mem::take(dirs), + })), + InvocationState::Pending + | InvocationState::Running { .. } + | InvocationState::MetadataReady { .. } => None, + _ => self.as_failure().map(Err), + } + } +} + +/// Shared handle to an invocation lifecycle. +/// +/// Request threads wait on it while the rustc thread drives transitions. +pub(crate) struct RustcInvocation { + state: Mutex, + cvar: Condvar, + shutdown_requested: AtomicBool, +} + +impl RustcInvocation { + pub fn new() -> Self { + RustcInvocation { + state: Mutex::new(InvocationState::Pending), + cvar: Condvar::new(), + shutdown_requested: AtomicBool::new(false), + } + } + + /// Blocks until `extractor` returns `Some`, re-checking after each condvar wakeup. + fn wait_for(&self, extractor: impl Fn(&mut InvocationState) -> Option) -> T { + let mut state = self.state.lock().expect(INVOCATION_MUTEX_POISONED); + loop { + if let Some(result) = extractor(&mut state) { + return result; + } + state = self.cvar.wait(state).expect(INVOCATION_MUTEX_POISONED); + } + } + + /// Waits until metadata is ready, the invocation finishes, or shutdown is requested. + pub fn wait_for_metadata(&self) -> Result { + self.wait_for(InvocationState::take_metadata_result) + } + + /// Waits until the invocation reaches a terminal state. + pub fn wait_for_completion(&self) -> Result { + self.wait_for(InvocationState::take_completion_result) + } + + /// Requests shutdown and sends SIGTERM to any running child process. + pub fn request_shutdown(&self) { + self.shutdown_requested.store(true, Ordering::SeqCst); + let mut state = self + .state + .lock() + .expect(INVOCATION_MUTEX_POISONED); + if state.is_terminal() { + return; + } + let pid = state.pid(); + *state = InvocationState::ShuttingDown; + self.cvar.notify_all(); + drop(state); + // Send SIGTERM outside the lock so the rustc thread can unblock. + if let Some(pid) = pid { + send_sigterm(pid); + } + } + + pub(crate) fn is_shutdown_requested(&self) -> bool { + self.shutdown_requested.load(Ordering::SeqCst) + } + + pub(crate) fn transition_to_running(&self, pid: u32, dirs: InvocationDirs) { + let mut state = self + .state + .lock() + .expect(INVOCATION_MUTEX_POISONED); + if matches!(*state, InvocationState::ShuttingDown) { + return; + } + *state = InvocationState::Running { pid, dirs }; + self.cvar.notify_all(); + } + + pub(crate) fn transition_to_metadata_ready( + &self, + pid: u32, + diagnostics_before: String, + rmeta_path: Option, + ) -> bool { + let mut state = self + .state + .lock() + .expect(INVOCATION_MUTEX_POISONED); + if matches!(*state, InvocationState::ShuttingDown) { + return false; + } + let dirs = state.take_dirs(); + *state = InvocationState::MetadataReady { + pid, + diagnostics_before, + rmeta_path, + dirs, + }; + self.cvar.notify_all(); + true + } + + pub(crate) fn transition_to_finished(&self, exit_code: i32, diagnostics: String) { + let mut state = self + .state + .lock() + .expect(INVOCATION_MUTEX_POISONED); + if exit_code == 0 { + if matches!(*state, InvocationState::ShuttingDown) { + return; + } + let dirs = state.take_dirs(); + *state = InvocationState::Completed { + exit_code, + diagnostics, + dirs, + }; + } else { + *state = InvocationState::Failed { + exit_code, + diagnostics, + }; + } + self.cvar.notify_all(); + } + + #[cfg(test)] + pub fn is_pending(&self) -> bool { + matches!( + *self + .state + .lock() + .expect(INVOCATION_MUTEX_POISONED), + InvocationState::Pending + ) + } + + #[cfg(test)] + pub fn is_shutting_down_or_terminal(&self) -> bool { + let state = self + .state + .lock() + .expect(INVOCATION_MUTEX_POISONED); + matches!( + *state, + InvocationState::ShuttingDown + | InvocationState::Completed { .. } + | InvocationState::Failed { .. } + ) + } +} diff --git a/util/process_wrapper/worker_logging.rs b/util/process_wrapper/worker_logging.rs new file mode 100644 index 0000000000..40ef1161ec --- /dev/null +++ b/util/process_wrapper/worker_logging.rs @@ -0,0 +1,138 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Lifecycle logging helpers for persistent-worker debugging. + +use std::io::Write; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; +use std::thread; +use std::time::Instant; + +use super::request::{RequestKind, WorkRequest}; + +pub(crate) fn current_pid() -> u32 { + std::process::id() +} + +pub(crate) fn current_thread_label() -> String { + format!("{:?}", thread::current().id()) +} + +fn append_log(path: &std::path::Path, message: &str) { + let mut file = match std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(path) + { + Ok(file) => file, + Err(_) => return, + }; + let _ = writeln!(file, "{message}"); +} + +pub(crate) fn append_worker_lifecycle_log(message: &str) { + let root = std::path::Path::new("_pw_state"); + let _ = std::fs::create_dir_all(root); + append_log(&root.join("worker_lifecycle.log"), message); +} + +pub(super) fn append_pipeline_log(pipeline_root: &std::path::Path, message: &str) { + append_log(&pipeline_root.join("pipeline.log"), message); +} + +pub(crate) struct WorkerLifecycleGuard { + pid: u32, + start: Instant, + request_counter: Arc, +} + +impl WorkerLifecycleGuard { + pub(crate) fn new(argv: &[String], request_counter: &Arc) -> Self { + let pid = current_pid(); + let cwd = std::env::current_dir() + .map(|cwd| cwd.display().to_string()) + .unwrap_or_else(|_| "".to_string()); + append_worker_lifecycle_log(&format!( + "pid={} event=start thread={} cwd={} argv_len={}", + pid, + current_thread_label(), + cwd, + argv.len(), + )); + Self { + pid, + start: Instant::now(), + request_counter: Arc::clone(request_counter), + } + } +} + +impl Drop for WorkerLifecycleGuard { + fn drop(&mut self) { + let uptime = self.start.elapsed(); + let requests = self.request_counter.load(Ordering::SeqCst); + append_worker_lifecycle_log(&format!( + "pid={} event=exit uptime_ms={} requests_seen={}", + self.pid, + uptime.as_millis(), + requests, + )); + } +} + +pub(crate) fn install_worker_panic_hook() { + static ONCE: std::sync::Once = std::sync::Once::new(); + ONCE.call_once(|| { + std::panic::set_hook(Box::new(|info| { + append_worker_lifecycle_log(&format!( + "pid={} event=panic thread={} info={}", + current_pid(), + current_thread_label(), + info + )); + })); + }); +} + +fn extract_arg<'a>(args: &'a [String], prefix: &str) -> Option<&'a str> { + args.iter().find_map(|arg| arg.strip_prefix(prefix)) +} + +fn log_request_event(event: &str, request: &WorkRequest, kind: &RequestKind, extra: &str) { + append_worker_lifecycle_log(&format!( + "pid={} thread={} {} request_id={}{} crate={} emit={} pipeline_key={}", + current_pid(), + current_thread_label(), + event, + request.request_id, + extra, + extract_arg(&request.arguments, "--crate-name=").unwrap_or("-"), + extract_arg(&request.arguments, "--emit=").unwrap_or("-"), + kind.key().map(|key| key.as_str()).unwrap_or("-"), + )); +} + +pub(crate) fn log_request_received(request: &WorkRequest, kind: &RequestKind) { + log_request_event( + "request_received", + request, + kind, + &format!(" cancel={}", request.cancel), + ); +} + +pub(crate) fn log_request_thread_start(request: &WorkRequest, kind: &RequestKind) { + log_request_event("request_thread_start", request, kind, ""); +} diff --git a/util/process_wrapper/worker_pipeline.rs b/util/process_wrapper/worker_pipeline.rs new file mode 100644 index 0000000000..e1ebbb3ae2 --- /dev/null +++ b/util/process_wrapper/worker_pipeline.rs @@ -0,0 +1,193 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Pipelining helpers for the persistent worker. + +use std::fmt; +use std::path::PathBuf; + +use crate::ProcessWrapperError; + +use super::exec::is_same_file; +use super::logging::append_pipeline_log; +use super::request::WorkRequest; +use super::types::PipelineKey; + +pub(super) fn pipelining_err(msg: impl std::fmt::Display) -> (i32, String) { + (1, format!("pipelining: {msg}")) +} + +/// Directories used for one worker-managed pipelined request. +pub(super) struct PipelineContext { + pub(super) root_dir: PathBuf, + pub(super) execroot_dir: PathBuf, + pub(super) outputs_dir: PathBuf, +} + +/// Error returned when pipeline outputs cannot be materialized. +#[derive(Debug)] +pub(super) struct MaterializeError { + pub(super) path: PathBuf, + pub(super) cause: std::io::Error, +} + +impl fmt::Display for MaterializeError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "failed to materialize '{}': {}", + self.path.display(), + self.cause + ) + } +} + +impl std::error::Error for MaterializeError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + Some(&self.cause) + } +} + +#[derive(Clone, Debug)] +pub(crate) struct WorkerStateRoots { + pipeline_root: PathBuf, +} + +impl WorkerStateRoots { + /// Ensures `_pw_state/pipeline` exists in the worker execroot. + pub(crate) fn ensure() -> Result { + let pipeline_root = PathBuf::from("_pw_state/pipeline"); + std::fs::create_dir_all(&pipeline_root).map_err(|e| { + ProcessWrapperError(format!("failed to create worker pipeline root: {e}")) + })?; + Ok(Self { pipeline_root }) + } + + /// Safety: PipelineKey is the value of `--pipelining-key=`, set by rules_rust's + /// own Starlark code in `rustc.bzl` (a hash of the crate label). Not user-controlled. + pub(crate) fn pipeline_dir(&self, key: &PipelineKey) -> PathBuf { + self.pipeline_root.join(key.as_str()) + } +} + +/// Creates the directories and working paths for one pipelined request. +pub(super) fn create_pipeline_context( + state_roots: &WorkerStateRoots, + key: &PipelineKey, + request: &WorkRequest, +) -> Result { + let root_dir = state_roots.pipeline_dir(key); + + // Not a TOCTOU race: outputs_dir is namespaced by request_id, which Bazel assigns + // uniquely per work request. No concurrent request shares this path. The remove+create + // ensures a clean output directory for each request attempt. + let outputs_dir = root_dir.join(format!("outputs-{}", request.request_id)); + if let Err(e) = std::fs::remove_dir_all(&outputs_dir) { + if e.kind() != std::io::ErrorKind::NotFound { + return Err(pipelining_err(format_args!( + "failed to clear pipeline outputs dir: {e}" + ))); + } + } + std::fs::create_dir_all(&outputs_dir) + .map_err(|e| pipelining_err(format_args!("failed to create pipeline outputs dir: {e}")))?; + let root_dir = std::fs::canonicalize(root_dir) + .map_err(|e| pipelining_err(format_args!("failed to resolve pipeline dir: {e}")))?; + let outputs_dir = std::fs::canonicalize(outputs_dir) + .map_err(|e| pipelining_err(format_args!("failed to resolve pipeline outputs dir: {e}")))?; + + let execroot_dir = request + .base_dir_canonicalized() + .map_err(|e| pipelining_err(format_args!("{e}")))?; + + Ok(PipelineContext { + root_dir, + execroot_dir, + outputs_dir, + }) +} + +pub(super) fn copy_rmeta_unsandboxed( + rmeta_src: &std::path::Path, + original_out_dir: &str, + root_dir: &std::path::Path, +) -> Option { + let filename = rmeta_src.file_name()?; + let dest_pipeline = std::path::Path::new(original_out_dir).join("_pipeline"); + if let Err(e) = std::fs::create_dir_all(&dest_pipeline) { + append_pipeline_log(root_dir, &format!("failed to create _pipeline dir: {e}")); + return Some(format!("pipelining: failed to create _pipeline dir: {e}")); + } + let dest = dest_pipeline.join(filename); + if !is_same_file(rmeta_src, &dest) + && let Err(e) = std::fs::copy(rmeta_src, &dest) + { + return Some(format!("pipelining: failed to copy rmeta: {e}")); + } + None +} + +/// Copies all regular files from `src_dir` to `dest_dir`. +pub(super) fn copy_outputs_unsandboxed( + src_dir: &std::path::Path, + dest_dir: &std::path::Path, +) -> Result<(), String> { + std::fs::create_dir_all(dest_dir) + .map_err(|e| format!("pipelining: failed to create output dir: {e}"))?; + let entries = std::fs::read_dir(src_dir) + .map_err(|e| format!("pipelining: failed to read pipeline dir: {e}"))?; + for entry in entries { + let entry = entry.map_err(|e| format!("pipelining: dir entry error: {e}"))?; + let meta = entry.metadata().map_err(|e| { + format!( + "pipelining: metadata error for {}: {e}", + entry.path().display() + ) + })?; + if meta.is_file() { + let dest = dest_dir.join(entry.file_name()); + if !is_same_file(&entry.path(), &dest) { + std::fs::copy(entry.path(), &dest).map_err(|e| { + format!( + "pipelining: failed to copy {} to {}: {e}", + entry.path().display(), + dest.display(), + ) + })?; + } + } + } + Ok(()) +} + +pub(super) fn maybe_cleanup_pipeline_dir( + pipeline_root: &std::path::Path, + keep: bool, + reason: &str, +) { + if keep { + append_pipeline_log( + pipeline_root, + &format!("preserving pipeline dir for inspection: {reason}"), + ); + return; + } + + if let Err(err) = std::fs::remove_dir_all(pipeline_root) { + append_pipeline_log( + pipeline_root, + &format!("failed to remove pipeline dir during cleanup: {err}"), + ); + } +} diff --git a/util/process_wrapper/worker_protocol.rs b/util/process_wrapper/worker_protocol.rs new file mode 100644 index 0000000000..eb0e4854e0 --- /dev/null +++ b/util/process_wrapper/worker_protocol.rs @@ -0,0 +1,251 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Bazel JSON worker wire-format helpers. +//! +//! Covers parsing, construction, and transmission of worker protocol messages. + +use std::io; +use std::sync::{Arc, Mutex}; + +#[cfg(not(unix))] +use std::io::Write; + +use tinyjson::JsonValue; + +use crate::ProcessWrapperError; + +use super::logging::{append_worker_lifecycle_log, current_pid, current_thread_label}; +use super::request::WorkRequest; +use super::types::{RequestId, SandboxDir}; + +/// Thread-safe stdout guard for serializing worker responses. +pub(super) type SharedStdout = Arc>; + +#[cfg(unix)] +unsafe extern "C" { + fn write(fd: i32, buf: *const std::ffi::c_void, count: usize) -> isize; +} + +pub(super) fn write_worker_response( + stdout: &SharedStdout, + response: &str, +) -> Result<(), ProcessWrapperError> { + let _guard = stdout + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + write_all_stdout_fd(response.as_bytes()) + .and_then(|_| write_all_stdout_fd(b"\n")) + .map_err(|e| ProcessWrapperError(format!("failed to write WorkResponse: {e}")))?; + Ok(()) +} + +#[cfg(unix)] +fn write_all_stdout_fd(mut bytes: &[u8]) -> io::Result<()> { + while !bytes.is_empty() { + let written = unsafe { write(1, bytes.as_ptr().cast(), bytes.len()) }; + if written < 0 { + let err = io::Error::last_os_error(); + if err.kind() == io::ErrorKind::Interrupted { + continue; + } + return Err(err); + } + let written = written as usize; + if written == 0 { + return Err(io::Error::new( + io::ErrorKind::WriteZero, + "short write to worker stdout", + )); + } + bytes = &bytes[written..]; + } + Ok(()) +} + +#[cfg(not(unix))] +fn write_all_stdout_fd(bytes: &[u8]) -> io::Result<()> { + let mut out = io::stdout().lock(); + out.write_all(bytes)?; + out.flush() +} + +/// Parses a single JSON work request line, sending an error response if parsing fails. +pub(super) fn parse_request_line(line: &str, stdout: &SharedStdout) -> Option { + let request: JsonValue = match line.parse::() { + Ok(request) => request, + Err(e) => { + let request_id = (|| { + let after_key = line.split_once("\"requestId\"")?.1; + let after_colon = after_key.split_once(':')?.1.trim_start(); + let end = after_colon + .find(|ch: char| !ch.is_ascii_digit()) + .unwrap_or(after_colon.len()); + after_colon[..end].parse().ok().map(super::types::RequestId) + })(); + if let Some(request_id) = request_id { + append_worker_lifecycle_log(&format!( + "pid={} thread={} request_parse_error request_id={} bytes={} error={}", + current_pid(), + current_thread_label(), + request_id, + line.len(), + e + )); + let response = + build_response(1, &format!("worker protocol parse error: {e}"), request_id); + if let Err(we) = write_worker_response(stdout, &response) { + append_worker_lifecycle_log(&format!( + "pid={} event=response_write_failed thread={} request_id={} error={}", + current_pid(), + current_thread_label(), + request_id, + we, + )); + } + } + return None; + } + }; + + match extract_sandbox_dir(&request) { + Ok(sandbox_dir) => Some(WorkRequest { + request_id: extract_request_id(&request), + arguments: extract_arguments(&request), + sandbox_dir, + cancel: extract_cancel(&request), + }), + Err(e) => { + let request_id = extract_request_id(&request); + let response = build_response(1, &e, request_id); + if let Err(we) = write_worker_response(stdout, &response) { + append_worker_lifecycle_log(&format!( + "pid={} event=response_write_failed thread={} request_id={} error={}", + current_pid(), + current_thread_label(), + request_id, + we, + )); + } + None + } + } +} + +/// Extracts the `requestId` field from a WorkRequest (defaults to 0). +pub(super) fn extract_request_id(request: &JsonValue) -> RequestId { + if let JsonValue::Object(map) = request + && let Some(JsonValue::Number(id)) = map.get("requestId") + { + return RequestId(*id as i64); + } + RequestId(0) +} + +/// Extracts the `arguments` array from a WorkRequest. +pub(super) fn extract_arguments(request: &JsonValue) -> Vec { + if let JsonValue::Object(map) = request + && let Some(JsonValue::Array(args)) = map.get("arguments") + { + return args + .iter() + .filter_map(|v| { + if let JsonValue::String(s) = v { + Some(s.clone()) + } else { + None + } + }) + .collect(); + } + vec![] +} + +/// Extracts `sandboxDir` and rejects unusable sandbox directories. +/// +/// An unusable directory usually means multiplex sandboxing is enabled on a +/// platform without sandbox support. +/// +/// Safety: sandboxDir is constructed by Bazel as `__sandbox//`. +/// It is a relative path from controlled integer and string components — no user input, no +/// path traversal possible. See `SandboxedWorkerProxy.java` in the Bazel source. +pub(super) fn extract_sandbox_dir(request: &JsonValue) -> Result, String> { + if let JsonValue::Object(map) = request + && let Some(JsonValue::String(dir)) = map.get("sandboxDir") + { + if dir.is_empty() { + return Ok(None); + } + if std::fs::read_dir(dir).is_ok_and(|mut entries| entries.next().is_some()) { + return Ok(Some(SandboxDir(dir.clone()))); + } + return Err(format!( + "Bazel sent sandboxDir=\"{}\" but the directory {}. \ + This typically means --experimental_worker_multiplex_sandboxing is enabled \ + on a platform without sandbox support (e.g. Windows). \ + Remove this flag or make it platform-specific \ + (e.g. build:linux --experimental_worker_multiplex_sandboxing).", + dir, + if std::path::Path::new(dir).exists() { + "is empty (no symlinks to execroot)" + } else { + "does not exist" + }, + )); + } + Ok(None) +} + +/// Extracts the `cancel` field from a WorkRequest (false if absent). +pub(super) fn extract_cancel(request: &JsonValue) -> bool { + if let JsonValue::Object(map) = request + && let Some(JsonValue::Boolean(cancel)) = map.get("cancel") + { + return *cancel; + } + false +} + +/// Builds a JSON WorkResponse string. +pub(super) fn build_response(exit_code: i32, output: &str, request_id: RequestId) -> String { + let output: String = output + .chars() + .map(|ch| match ch { + '\n' | '\r' | '\t' => ch, + ch if ch.is_control() => ' ', + ch => ch, + }) + .collect(); + format!( + "{{\"exitCode\":{},\"output\":{},\"requestId\":{}}}", + exit_code, + json_string_literal(&output), + request_id.0 + ) +} + +/// Builds a JSON WorkResponse with `wasCancelled: true`. +pub(super) fn build_cancel_response(request_id: RequestId) -> String { + format!( + "{{\"exitCode\":0,\"output\":{},\"requestId\":{},\"wasCancelled\":true}}", + json_string_literal(""), + request_id.0 + ) +} + +pub(super) fn json_string_literal(value: &str) -> String { + JsonValue::String(value.to_owned()) + .stringify() + .unwrap_or_else(|_| "\"\"".to_string()) +} diff --git a/util/process_wrapper/worker_request.rs b/util/process_wrapper/worker_request.rs new file mode 100644 index 0000000000..92e6b2860d --- /dev/null +++ b/util/process_wrapper/worker_request.rs @@ -0,0 +1,525 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Request parsing and execution context for Bazel work requests. + +use std::collections::HashMap; +use std::path::PathBuf; +use std::process::{Command, Stdio}; +use std::sync::Arc; + +use crate::options::{parse_pw_args, NormalizedRustcMetadata, ParsedPwArgs, SubprocessPipeliningMode}; + +use super::args::{ + build_rustc_env, expand_rustc_args_with_metadata, find_out_dir_in_request, prepare_rustc_args, + resolve_pw_args_for_request, rewrite_expanded_rustc_outputs, scan_pipelining_flags, + strip_pipelining_flags, +}; +use super::exec::{ + prepare_expanded_rustc_outputs, prepare_outputs, resolve_request_relative_path, run_request, +}; +use super::invocation::{InvocationDirs, MetadataOutput, RustcInvocation}; +use super::logging::append_pipeline_log; +use super::pipeline::{ + copy_outputs_unsandboxed, copy_rmeta_unsandboxed, create_pipeline_context, + maybe_cleanup_pipeline_dir, pipelining_err, PipelineContext, WorkerStateRoots, +}; +use super::SharedRequestCoordinator; +use super::rustc_driver::spawn_pipelined_rustc; +use super::sandbox::{copy_all_outputs_to_sandbox, copy_output_to_sandbox, seed_sandbox_cache_root}; +use super::types::{OutputDir, PipelineKey, RequestId, SandboxDir}; + +/// Fields needed to execute one Bazel work request. +#[derive(Clone, Debug)] +pub(crate) struct WorkRequest { + pub(crate) request_id: RequestId, + pub(crate) arguments: Vec, + pub(crate) sandbox_dir: Option, + pub(crate) cancel: bool, +} + +impl WorkRequest { + /// Returns the base directory for this request. + pub(crate) fn base_dir(&self) -> Result { + if let Some(sandbox_dir) = self.sandbox_dir.as_ref() { + if sandbox_dir.as_path().is_absolute() { + return Ok(sandbox_dir.as_path().to_path_buf()); + } + return std::env::current_dir() + .map(|cwd| cwd.join(sandbox_dir.as_path())) + .map_err(|e| format!("failed to resolve worker cwd: {e}")); + } + std::env::current_dir().map_err(|e| format!("failed to resolve worker cwd: {e}")) + } + + /// Like [`base_dir`], but canonicalizes the unsandboxed path. + pub(crate) fn base_dir_canonicalized(&self) -> Result { + let dir = self.base_dir()?; + if self.sandbox_dir.is_some() { + Ok(dir) + } else { + std::fs::canonicalize(&dir) + .map_err(|e| format!("failed to canonicalize worker CWD: {e}")) + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) enum RequestKind { + /// Handle as a normal subprocess request. + NonPipelined, + /// Start rustc and return once metadata is ready. + Metadata { key: PipelineKey }, + /// Reuse a metadata invocation and wait for completion. + Full { key: PipelineKey }, +} + +impl RequestKind { + pub(crate) fn parse_in_dir(args: &[String], base_dir: &std::path::Path) -> Self { + let direct = scan_pipelining_flags(args.iter().map(String::as_str)); + if !matches!(direct, RequestKind::NonPipelined) { + return direct; + } + + // No direct pipelining flags; check any expanded paramfiles. + let mut parts = args.splitn(2, |a| a == "--"); + let pw_raw = parts.next().unwrap(); + let rustc_args = parts.next().unwrap_or(&[]); + let parsed_pw_args = parse_pw_args(pw_raw, base_dir); + let nested = match expand_rustc_args_with_metadata( + rustc_args, + &parsed_pw_args.subst, + parsed_pw_args.require_explicit_unstable_features, + base_dir, + ) { + Ok((_, metadata)) => metadata, + Err(e) => { + // Expansion failed — fall back to non-pipelined classification. + // This is safe (just slower) but worth logging for debugging. + append_pipeline_log( + &base_dir.join("_pw_state/pipeline"), + &format!("pipelining flag detection failed, falling back to non-pipelined: {e}"), + ); + NormalizedRustcMetadata::default() + } + }; + + let is_metadata = + nested.relocated.pipelining_mode == Some(SubprocessPipeliningMode::Metadata); + let is_full = nested.relocated.pipelining_mode == Some(SubprocessPipeliningMode::Full); + let key = nested.pipelining_key; + + match (is_metadata, is_full, key) { + (true, _, Some(k)) => RequestKind::Metadata { + key: PipelineKey(k), + }, + (_, true, Some(k)) => RequestKind::Full { + key: PipelineKey(k), + }, + _ => RequestKind::NonPipelined, + } + } + + /// Returns the pipeline key if this is a pipelined request. + pub(crate) fn key(&self) -> Option<&PipelineKey> { + match self { + RequestKind::Metadata { key } | RequestKind::Full { key } => Some(key), + RequestKind::NonPipelined => None, + } + } +} + +/// All prepared state needed to spawn a metadata rustc invocation. +struct MetadataInvocationReady { + rustc_args: Vec, + env: HashMap, + ctx: PipelineContext, + original_out_dir: OutputDir, + pw_args: ParsedPwArgs, +} + +/// Per-request executor owned by a request thread. +pub(super) struct RequestExecutor { + pub(super) kind: RequestKind, + /// Shared invocation for pipelined requests. + pub(super) invocation: Option>, +} + +impl RequestExecutor { + pub(super) fn new(kind: RequestKind, invocation: Option>) -> Self { + Self { kind, invocation } + } + + /// Executes a metadata request and returns once `.rmeta` is ready. + pub(super) fn execute_metadata( + &self, + request: &WorkRequest, + full_args: Vec, + state_roots: &WorkerStateRoots, + registry: &SharedRequestCoordinator, + ) -> (i32, String) { + let key = match &self.kind { + RequestKind::Metadata { key } => key.clone(), + _ => { + return ( + 1, + "execute_metadata called for non-metadata request".to_string(), + ) + } + }; + + let ready = match prepare_metadata_invocation(&key, full_args, request, state_roots) { + Ok(r) => r, + Err(e) => return e, + }; + + append_pipeline_log( + &ready.ctx.root_dir, + &format!( + "metadata start request_id={} key={} sandbox_dir={:?} execroot={} outputs={}", + request.request_id, + key, + request.sandbox_dir, + ready.ctx.execroot_dir.display(), + ready.ctx.outputs_dir.display(), + ), + ); + + let (invocation, original_out_dir, ctx, pw_args) = + match spawn_metadata_rustc(ready, &key, registry) { + Ok(result) => result, + Err(e) => return e, + }; + + match invocation.wait_for_metadata() { + Ok(meta) => materialize_metadata( + meta, + &invocation, + &ctx, + request, + &original_out_dir, + &key, + &pw_args, + ), + Err(failure) => { + maybe_cleanup_pipeline_dir(&ctx.root_dir, true, "metadata rustc failed"); + if let Some(ref path) = pw_args.output_file { + let _ = std::fs::write(path, &failure.diagnostics); + } + (failure.exit_code, failure.diagnostics) + } + } + } + + /// Executes a full request, or falls back to a fresh subprocess. + pub(super) fn execute_full( + &self, + request: &WorkRequest, + full_args: Vec, + self_path: &std::path::Path, + ) -> (i32, String) { + let key = match &self.kind { + RequestKind::Full { key } => key.clone(), + _ => return (1, "execute_full called for non-full request".to_string()), + }; + + let invocation = match &self.invocation { + Some(inv) => Arc::clone(inv), + None => { + return self.execute_fallback(request, full_args, self_path, &key); + } + }; + + // Extract the full action's own --out-dir before the match consumes full_args. + // This may differ from the metadata action's original_out_dir when subdirectory + // isolation is used (e.g. _worker_pipelining/). + let full_out_dir = request.base_dir().ok().and_then(|dir| find_out_dir_in_request(&full_args, &dir)); + + match invocation.wait_for_completion() { + Ok(completion) => { + let dest_out_dir = full_out_dir + .as_ref() + .unwrap_or(&completion.dirs.original_out_dir); + if completion.exit_code == 0 { + let copy_result = match request.sandbox_dir.as_ref() { + Some(dir) => copy_all_outputs_to_sandbox( + &completion.dirs.pipeline_output_dir, + dir.as_path(), + dest_out_dir.as_str(), + ) + .map_err(|e| format!("pipelining: output materialization failed: {e}")), + None => copy_outputs_unsandboxed( + &completion.dirs.pipeline_output_dir, + dest_out_dir.as_path(), + ), + }; + if let Err(e) = copy_result { + append_pipeline_log( + &completion.dirs.pipeline_root_dir, + &format!("full output copy error: {e}"), + ); + return (1, format!("{}\n{e}", completion.diagnostics)); + } + } + append_pipeline_log( + &completion.dirs.pipeline_root_dir, + &format!("full done key={} exit_code={}", key, completion.exit_code), + ); + maybe_cleanup_pipeline_dir( + &completion.dirs.pipeline_root_dir, + completion.exit_code != 0, + "full action failed", + ); + (completion.exit_code, completion.diagnostics) + } + Err(_) => { + self.execute_fallback(request, full_args, self_path, &key) + } + } + } + + fn execute_fallback( + &self, + request: &WorkRequest, + args: Vec, + self_path: &std::path::Path, + key: &PipelineKey, + ) -> (i32, String) { + let worker_state_root = std::env::current_dir() + .ok() + .map(|cwd| cwd.join("_pw_state").join("fallback.log")); + if let Some(path) = worker_state_root { + if let Ok(mut file) = std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(path) + { + use std::io::Write; + let _ = writeln!( + file, + "full missing bg request_id={} key={} sandbox_dir={:?}", + request.request_id, key, request.sandbox_dir + ); + } + } + let filtered = strip_pipelining_flags(&args); + match request.sandbox_dir.as_ref() { + Some(dir) => { + let _ = seed_sandbox_cache_root(dir.as_path()); + run_request(self_path, filtered, Some(dir.as_str()), "sandboxed subprocess") + .unwrap_or_else(|e| (1, format!("pipelining fallback error: {e}"))) + } + None => { + prepare_outputs(&filtered, None); + run_request(self_path, filtered, None, "process_wrapper subprocess") + .unwrap_or_else(|e| (1, format!("pipelining fallback error: {e}"))) + } + } + } + + /// Executes a non-pipelined multiplex request. + pub(super) fn execute_non_pipelined( + &self, + full_args: Vec, + self_path: &std::path::Path, + sandbox_dir: Option<&str>, + ) -> (i32, String) { + let context = if sandbox_dir.is_some() { + "sandboxed subprocess" + } else { + "subprocess" + }; + if let Some(dir) = sandbox_dir { + let _ = super::sandbox::seed_sandbox_cache_root(std::path::Path::new(dir)); + } + + // Non-pipelined requests run synchronously; cancellation only + // suppresses the response (handled by the caller). + run_request(self_path, full_args, sandbox_dir, context) + .unwrap_or_else(|e| (1, format!("worker thread error: {e}"))) + } +} + +/// Prepares args, environment, and directories for a metadata rustc invocation. +fn prepare_metadata_invocation( + key: &PipelineKey, + full_args: Vec, + request: &WorkRequest, + state_roots: &WorkerStateRoots, +) -> Result { + let filtered = strip_pipelining_flags(&full_args); + let mut parts = filtered.splitn(2, |a| a == "--"); + let pw_raw = parts.next().unwrap(); + let rustc_and_after = parts + .next() + .ok_or_else(|| pipelining_err("no '--' separator in args"))?; + if rustc_and_after.is_empty() { + return Err(pipelining_err("no rustc executable after '--'")); + } + + let ctx = create_pipeline_context(state_roots, key, request)?; + + let mut pw_args = parse_pw_args(pw_raw, &ctx.execroot_dir); + let (rustc_args, original_out_dir, relocated) = + prepare_rustc_args(rustc_and_after, &pw_args, &ctx.execroot_dir)?; + pw_args.merge_relocated(relocated); + let pw_args = resolve_pw_args_for_request(pw_args, request, &ctx.execroot_dir); + let env = build_rustc_env( + &pw_args.env_files, + pw_args.stable_status_file.as_deref(), + pw_args.volatile_status_file.as_deref(), + &pw_args.subst, + ) + .map_err(|e| pipelining_err(e))?; + + let (rustc_args, writable_outputs) = + rewrite_expanded_rustc_outputs(rustc_args, &ctx.outputs_dir); + prepare_expanded_rustc_outputs(&writable_outputs); + + Ok(MetadataInvocationReady { + rustc_args, + env, + ctx, + original_out_dir, + pw_args, + }) +} + +/// Spawns rustc for a metadata request and registers the running invocation. +fn spawn_metadata_rustc( + ready: MetadataInvocationReady, + key: &PipelineKey, + registry: &SharedRequestCoordinator, +) -> Result< + ( + Arc, + OutputDir, + PipelineContext, + ParsedPwArgs, + ), + (i32, String), +> { + let MetadataInvocationReady { + rustc_args, + env, + ctx, + original_out_dir, + pw_args, + } = ready; + + #[cfg(windows)] + let _consolidated_dir_guard: Option; + #[cfg(windows)] + let mut rustc_args = rustc_args; + #[cfg(windows)] + { + let unified_dir = ctx.root_dir.join("deps"); + let _ = std::fs::remove_dir_all(&unified_dir); + if let Err(e) = std::fs::create_dir_all(&unified_dir) { + return Err((1, format!("pipelining: failed to create deps dir: {e}"))); + } + let dep_dirs: Vec = rustc_args + .iter() + .filter_map(|a| { + a.strip_prefix("-Ldependency=") + .map(std::path::PathBuf::from) + }) + .collect(); + crate::util::consolidate_deps_into(&dep_dirs, &unified_dir); + rustc_args.retain(|a| !a.starts_with("-Ldependency=")); + rustc_args.push(format!("-Ldependency={}", unified_dir.display())); + _consolidated_dir_guard = Some(unified_dir); + } + + let mut cmd = Command::new(&rustc_args[0]); + #[cfg(windows)] + { + let response_file_path = ctx.root_dir.join("metadata_rustc.args"); + let content = rustc_args[1..].join("\n"); + if let Err(e) = std::fs::write(&response_file_path, &content) { + return Err((1, format!("pipelining: failed to write response file: {e}"))); + } + cmd.arg(format!("@{}", response_file_path.display())); + } + #[cfg(not(windows))] + { + cmd.args(&rustc_args[1..]); + } + cmd.env_clear() + .envs(&env) + .stdout(Stdio::null()) + .stderr(Stdio::piped()) + .current_dir(&ctx.execroot_dir); + let child = match cmd.spawn() { + Ok(c) => c, + Err(e) => return Err((1, format!("pipelining: failed to spawn rustc: {e}"))), + }; + + let dirs = InvocationDirs { + pipeline_output_dir: ctx.outputs_dir.clone(), + pipeline_root_dir: ctx.root_dir.clone(), + original_out_dir, + }; + + let original_out_dir = dirs.original_out_dir.clone(); + let invocation = spawn_pipelined_rustc(child, dirs, pw_args.rustc_output_format.clone()); + + registry + .lock() + .expect(super::REGISTRY_MUTEX_POISONED) + .invocations + .insert(key.clone(), Arc::clone(&invocation)); + + Ok((invocation, original_out_dir, ctx, pw_args)) +} + +/// Copies `.rmeta` and returns metadata diagnostics. +fn materialize_metadata( + meta: MetadataOutput, + invocation: &RustcInvocation, + ctx: &PipelineContext, + request: &WorkRequest, + original_out_dir: &OutputDir, + key: &PipelineKey, + pw_args: &ParsedPwArgs, +) -> (i32, String) { + if let Some(rmeta_path_str) = &meta.rmeta_path { + let rmeta_resolved = resolve_request_relative_path(rmeta_path_str, Some(&ctx.execroot_dir)); + append_pipeline_log( + &ctx.root_dir, + &format!("metadata rmeta ready: {}", rmeta_resolved.display()), + ); + let copy_err = match request.sandbox_dir.as_ref() { + Some(dir) => copy_output_to_sandbox( + &rmeta_resolved, + dir.as_path(), + original_out_dir.as_str(), + "_pipeline", + ) + .err().map(|e| format!("pipelining: rmeta materialization failed: {e}")), + None => { + copy_rmeta_unsandboxed(&rmeta_resolved, original_out_dir.as_str(), &ctx.root_dir) + } + }; + if let Some(err_msg) = copy_err { + invocation.request_shutdown(); + return (1, err_msg); + } + } + append_pipeline_log(&ctx.root_dir, &format!("metadata stored key={}", key)); + if let Some(ref path) = pw_args.output_file { + let _ = std::fs::write(path, &meta.diagnostics_before); + } + (0, meta.diagnostics_before) +} diff --git a/util/process_wrapper/worker_rustc.rs b/util/process_wrapper/worker_rustc.rs new file mode 100644 index 0000000000..37c91c5773 --- /dev/null +++ b/util/process_wrapper/worker_rustc.rs @@ -0,0 +1,126 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Threads that own rustc child processes for worker requests. + +use std::io::BufRead; +use std::process::Child; +use std::sync::Arc; + +use super::exec::graceful_kill; +use super::invocation::{InvocationDirs, RustcInvocation}; +use crate::rustc::RustcStderrPolicy; + +/// Spawns a thread that waits on a non-pipelined child process. +#[cfg(test)] +pub(crate) fn spawn_non_pipelined_rustc(child: Child) -> Arc { + let invocation = Arc::new(RustcInvocation::new()); + let pid = child.id(); + + invocation.transition_to_running(pid, InvocationDirs::default()); + + let ret = Arc::clone(&invocation); + std::thread::spawn(move || { + let output = child.wait_with_output(); + + if invocation.is_shutdown_requested() { + invocation.transition_to_finished(-1, "shutdown requested".to_string()); + return; + } + + let (exit_code, diagnostics) = match output { + Ok(output) => { + let exit_code = output.status.code().unwrap_or(-1); + let mut diagnostics = String::from_utf8_lossy(&output.stderr).into_owned(); + let stdout = String::from_utf8_lossy(&output.stdout); + if !stdout.is_empty() { + if !diagnostics.is_empty() { + diagnostics.push('\n'); + } + diagnostics.push_str(&stdout); + } + (exit_code, diagnostics) + } + Err(e) => (-1, format!("wait_with_output failed: {}", e)), + }; + + invocation.transition_to_finished(exit_code, diagnostics); + }); + + ret +} + +/// Spawns a thread that tracks a pipelined rustc process through completion. +pub(crate) fn spawn_pipelined_rustc( + mut child: Child, + dirs: InvocationDirs, + rustc_output_format: Option, +) -> Arc { + let invocation = Arc::new(RustcInvocation::new()); + let pid = child.id(); + let stderr = child + .stderr + .take() + .expect("child must be spawned with Stdio::piped() stderr"); + + invocation.transition_to_running(pid, dirs); + + let ret = Arc::clone(&invocation); + std::thread::spawn(move || { + let reader = std::io::BufReader::new(stderr); + let mut policy = RustcStderrPolicy::from_option_str(rustc_output_format.as_deref()); + + let mut diagnostics = String::new(); + let mut lines = reader.lines().map_while(Result::ok); + + for line in lines.by_ref() { + if let Some(rmeta_path) = crate::rustc::extract_rmeta_path(&line) { + invocation.transition_to_metadata_ready(pid, diagnostics.clone(), Some(rmeta_path)); + break; + } + append_diagnostic(&mut diagnostics, &mut policy, &line); + } + + for line in lines { + if crate::rustc::extract_rmeta_path(&line).is_some() { + continue; + } + append_diagnostic(&mut diagnostics, &mut policy, &line); + } + + if invocation.is_shutdown_requested() { + graceful_kill(&mut child); + invocation.transition_to_finished(-1, "shutdown requested".to_string()); + return; + } + + let exit_code = match child.wait() { + Ok(status) => status.code().unwrap_or(-1), + Err(_) => -1, + }; + + invocation.transition_to_finished(exit_code, diagnostics); + }); + + ret +} + +fn append_diagnostic(buf: &mut String, policy: &mut RustcStderrPolicy, line: &str) { + if let Some(processed) = policy.process_line(line) { + if !buf.is_empty() { + buf.push('\n'); + } + buf.push_str(&processed); + } +} diff --git a/util/process_wrapper/worker_sandbox.rs b/util/process_wrapper/worker_sandbox.rs new file mode 100644 index 0000000000..4654c6f94b --- /dev/null +++ b/util/process_wrapper/worker_sandbox.rs @@ -0,0 +1,146 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Sandbox-specific worker helpers. + +use super::exec::materialize_output_file; +use super::pipeline::MaterializeError; +use crate::ProcessWrapperError; + + +#[cfg(unix)] +pub(super) fn symlink_path( + src: &std::path::Path, + dest: &std::path::Path, + _is_dir: bool, +) -> Result<(), std::io::Error> { + std::os::unix::fs::symlink(src, dest) +} + +#[cfg(windows)] +pub(super) fn symlink_path( + src: &std::path::Path, + dest: &std::path::Path, + is_dir: bool, +) -> Result<(), std::io::Error> { + if is_dir { + std::os::windows::fs::symlink_dir(src, dest) + } else { + std::os::windows::fs::symlink_file(src, dest) + } +} + +pub(super) fn seed_sandbox_cache_root( + sandbox_dir: &std::path::Path, +) -> Result<(), ProcessWrapperError> { + let dest = sandbox_dir.join("cache"); + // Not a TOCTOU race: sandbox_dir is a per-request sandbox directory, so no other + // request operates on this path concurrently. The exists() check is a fast path to + // skip re-seeding; if a race somehow occurred, symlink_path would fail with EEXIST. + if dest.exists() { + return Ok(()); + } + + let entries = std::fs::read_dir(sandbox_dir).map_err(|e| { + ProcessWrapperError(format!( + "failed to read request sandbox for cache seeding: {e}" + )) + })?; + + for entry in entries { + let entry = entry.map_err(|e| { + ProcessWrapperError(format!("failed to enumerate request sandbox entry: {e}")) + })?; + let source = entry.path(); + let Ok(resolved) = source.canonicalize() else { + continue; + }; + + let mut cache_root = None; + for ancestor in resolved.ancestors() { + if ancestor.file_name().is_some_and(|name| name == "cache") { + cache_root = Some(ancestor.to_path_buf()); + break; + } + } + + let Some(cache_root) = cache_root else { + continue; + }; + return symlink_path(&cache_root, &dest, true).map_err(|e| { + ProcessWrapperError(format!( + "failed to seed request sandbox cache root {} -> {}: {e}", + cache_root.display(), + dest.display(), + )) + }); + } + + Ok(()) +} + +/// Copies `src` into the request sandbox under `original_out_dir/dest_subdir`. +pub(super) fn copy_output_to_sandbox( + src: &std::path::Path, + sandbox_dir: &std::path::Path, + original_out_dir: &str, + dest_subdir: &str, +) -> Result<(), MaterializeError> { + let filename = match src.file_name() { + Some(n) => n, + None => { + return Err(MaterializeError { + path: src.to_path_buf(), + cause: std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "source path has no filename", + ), + }); + } + }; + let dest_dir = sandbox_dir.join(original_out_dir).join(dest_subdir); + let dest = dest_dir.join(filename); + materialize_output_file(src, &dest) + .map_err(|cause| MaterializeError { path: dest, cause })?; + Ok(()) +} + +/// Copies all regular files from `pipeline_dir` into the request sandbox. +pub(super) fn copy_all_outputs_to_sandbox( + pipeline_dir: &std::path::Path, + sandbox_dir: &std::path::Path, + original_out_dir: &str, +) -> Result<(), MaterializeError> { + let dest_dir = sandbox_dir.join(original_out_dir); + let entries = std::fs::read_dir(pipeline_dir).map_err(|cause| MaterializeError { + path: pipeline_dir.to_path_buf(), + cause, + })?; + for entry in entries { + let entry = entry.map_err(|cause| MaterializeError { + path: pipeline_dir.to_path_buf(), + cause, + })?; + let meta = entry.metadata().map_err(|cause| MaterializeError { + path: entry.path(), + cause, + })?; + if meta.is_file() { + let dest = dest_dir.join(entry.file_name()); + materialize_output_file(&entry.path(), &dest) + .map_err(|cause| MaterializeError { path: dest, cause })?; + } + } + Ok(()) +} diff --git a/util/process_wrapper/worker_types.rs b/util/process_wrapper/worker_types.rs new file mode 100644 index 0000000000..158be1a265 --- /dev/null +++ b/util/process_wrapper/worker_types.rs @@ -0,0 +1,97 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Strongly typed worker identifiers and paths. + +use std::fmt; +use std::path::Path; + +/// Key from `--pipelining-key=...`. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct PipelineKey(pub String); + +impl PipelineKey { + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl fmt::Display for PipelineKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.0) + } +} + +/// Bazel worker request id. `0` is singleplex. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct RequestId(pub i64); + +impl RequestId { + /// Returns true when `requestId == 0`. + pub fn is_singleplex(&self) -> bool { + self.0 == 0 + } +} + +impl fmt::Display for RequestId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +/// Path from `WorkRequest.sandbox_dir`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct SandboxDir(pub String); + +impl SandboxDir { + pub fn as_str(&self) -> &str { + &self.0 + } + + pub fn as_path(&self) -> &Path { + Path::new(&self.0) + } +} + +impl fmt::Display for SandboxDir { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.0) + } +} + +/// rustc `--out-dir` value. +#[derive(Debug, Clone)] +pub struct OutputDir(pub String); + +impl OutputDir { + pub fn as_str(&self) -> &str { + &self.0 + } + + pub fn as_path(&self) -> &Path { + Path::new(&self.0) + } +} + +impl fmt::Display for OutputDir { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.0) + } +} + +impl Default for OutputDir { + fn default() -> Self { + OutputDir(String::new()) + } +}