diff --git a/litebox_packager/build.rs b/litebox_packager/build.rs deleted file mode 100644 index 77956be92..000000000 --- a/litebox_packager/build.rs +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -use std::path::PathBuf; - -const RTLD_AUDIT_DIR: &str = "../litebox_rtld_audit"; - -fn main() { - let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap(); - if target_arch != "x86_64" { - return; - } - - let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap()); - let mut make_cmd = std::process::Command::new("make"); - make_cmd - .current_dir(RTLD_AUDIT_DIR) - .env("OUT_DIR", &out_dir) - .env("ARCH", &target_arch); - // Always build without DEBUG for the packager -- packaged binaries are - // release artifacts. - make_cmd.env_remove("DEBUG"); - // Force rebuild in case a stale artifact exists from a different config. - let _ = std::fs::remove_file(out_dir.join("litebox_rtld_audit.so")); - - let output = make_cmd - .output() - .expect("Failed to execute make for rtld_audit"); - assert!( - output.status.success(), - "failed to build rtld_audit.so via make:\nstdout: {}\nstderr: {}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr), - ); - assert!( - out_dir.join("litebox_rtld_audit.so").exists(), - "Build failed to create litebox_rtld_audit.so" - ); - - println!("cargo:rerun-if-changed={RTLD_AUDIT_DIR}/rtld_audit.c"); - println!("cargo:rerun-if-changed={RTLD_AUDIT_DIR}/Makefile"); - println!("cargo:rerun-if-changed=build.rs"); -} diff --git a/litebox_packager/src/lib.rs b/litebox_packager/src/lib.rs index 57120ec4e..b3663331b 100644 --- a/litebox_packager/src/lib.rs +++ b/litebox_packager/src/lib.rs @@ -1,18 +1,15 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -// Restrict this crate to only work on Linux, as it relies on `ldd` for -// dependency discovery and other Linux-specific functionality. -#![cfg(target_os = "linux")] - #[cfg(target_arch = "x86_64")] pub mod oci; use anyhow::{Context, bail}; use clap::Parser; use rayon::prelude::*; -use std::collections::{BTreeMap, BTreeSet}; -use std::os::unix::fs::MetadataExt as _; +#[cfg(target_os = "linux")] +use std::collections::BTreeMap; +use std::collections::BTreeSet; use std::path::{Path, PathBuf}; use tar::{Builder, Header}; @@ -48,10 +45,16 @@ pub struct CliArgs { #[arg(short = 'o', long = "output", default_value = "litebox_packager.tar")] pub output: PathBuf, - /// Include extra files in the tar. + /// Include extra files in the tar (host mode only). + /// ELF files are automatically run through the syscall rewriter; non-ELF + /// files are included as-is. /// Format: HOST_PATH:TAR_PATH (split on the first colon, so the tar path /// may contain colons but the host path must not). - #[arg(long = "include", value_name = "HOST_PATH:TAR_PATH")] + #[arg( + long = "include", + value_name = "HOST_PATH:TAR_PATH", + conflicts_with = "oci_image" + )] pub include: Vec, /// Skip rewriting specific files (by their absolute path on the host). @@ -64,11 +67,13 @@ pub struct CliArgs { } /// Parsed `--include` entry. +#[cfg(target_os = "linux")] struct IncludeEntry { host_path: PathBuf, tar_path: String, } +#[cfg(target_os = "linux")] fn parse_include(spec: &str) -> anyhow::Result { let Some(colon_idx) = spec.find(':') else { bail!("invalid --include format: expected HOST_PATH:TAR_PATH, got: {spec}"); @@ -99,7 +104,24 @@ pub fn run(args: CliArgs) -> anyhow::Result<()> { } } - // --- Phase 1: Validate inputs --- + // Host mode (local ELF files + ldd dependency discovery) is Linux-only. + #[cfg(target_os = "linux")] + { + run_host_mode(args) + } + + #[cfg(not(target_os = "linux"))] + { + bail!( + "Host mode (local ELF files) is only supported on Linux. \ + Use --oci-image to pull a container image instead." + ); + } +} + +/// Host mode: package local ELF files with ldd-based dependency discovery. +#[cfg(target_os = "linux")] +fn run_host_mode(args: CliArgs) -> anyhow::Result<()> { let input_files: Vec = args .input_files .iter() @@ -151,12 +173,15 @@ pub fn run(args: CliArgs) -> anyhow::Result<()> { let par_results: Vec>> = file_map_vec .into_par_iter() - .map(|(real_path, tar_paths)| { + .map(|(real_path, tar_paths): (&PathBuf, &Vec)| { let data = std::fs::read(real_path) .with_context(|| format!("failed to read {}", real_path.display()))?; - let mode = std::fs::metadata(real_path) - .with_context(|| format!("failed to stat {}", real_path.display()))? - .mode(); + let mode = { + use std::os::unix::fs::MetadataExt as _; + std::fs::metadata(real_path) + .with_context(|| format!("failed to stat {}", real_path.display()))? + .mode() + }; let rewritten = if no_rewrite.contains(real_path) { if verbose { @@ -194,7 +219,45 @@ pub fn run(args: CliArgs) -> anyhow::Result<()> { } } - finalize_tar(tar_entries, added_tar_paths, &args)?; + // Append --include files (ELF files are automatically rewritten). + let includes: Vec = args + .include + .iter() + .map(|s| parse_include(s)) + .collect::>>()?; + + for inc in &includes { + if !inc.host_path.exists() { + bail!("included file does not exist: {}", inc.host_path.display()); + } + if !added_tar_paths.insert(inc.tar_path.clone()) { + bail!( + "duplicate tar path from --include: '{}' (already present)", + inc.tar_path + ); + } + let data = std::fs::read(&inc.host_path) + .with_context(|| format!("failed to read included file {}", inc.host_path.display()))?; + let mode = { + use std::os::unix::fs::MetadataExt as _; + std::fs::metadata(&inc.host_path).map_or(0o755, |m| m.mode()) + }; + let rewritten = rewrite_elf(&data, &inc.host_path, args.verbose)?; + if args.verbose { + eprintln!( + " including {} as {}", + inc.host_path.display(), + inc.tar_path + ); + } + tar_entries.push(TarEntry { + tar_path: inc.tar_path.clone(), + data: rewritten, + mode, + }); + } + + finalize_tar(tar_entries, &args)?; Ok(()) } @@ -208,7 +271,12 @@ fn run_oci(image_ref: &str, args: &CliArgs) -> anyhow::Result<()> { // --- Phase 2: Scan rootfs for files --- eprintln!("Scanning rootfs..."); - let file_map = oci::scan_rootfs(&extracted.rootfs_path, args.verbose)?; + let file_map = oci::scan_rootfs( + &extracted.rootfs_path, + &extracted.symlink_map, + &extracted.permissions, + args.verbose, + )?; let no_rewrite: BTreeSet = args .no_rewrite @@ -303,76 +371,17 @@ fn run_oci(image_ref: &str, args: &CliArgs) -> anyhow::Result<()> { } } - finalize_tar(tar_entries, added_tar_paths, args)?; + finalize_tar(tar_entries, args)?; Ok(()) } // --------------------------------------------------------------------------- -// Shared finalization: includes, rtld audit injection, tar build, size report +// Shared finalization: tar build, size report // --------------------------------------------------------------------------- -/// Append `--include` files, inject the rtld audit library, build the output -/// tar, and print a size summary. -/// -/// Both host mode and OCI mode call this after producing their rewritten -/// `TarEntry` list. -fn finalize_tar( - mut tar_entries: Vec, - mut added_tar_paths: BTreeSet, - args: &CliArgs, -) -> anyhow::Result<()> { - // Parse and append --include files. - let includes: Vec = args - .include - .iter() - .map(|s| parse_include(s)) - .collect::>>()?; - - for inc in &includes { - if !inc.host_path.exists() { - bail!("included file does not exist: {}", inc.host_path.display()); - } - if !added_tar_paths.insert(inc.tar_path.clone()) { - bail!( - "duplicate tar path from --include: '{}' (already present)", - inc.tar_path - ); - } - let data = std::fs::read(&inc.host_path) - .with_context(|| format!("failed to read included file {}", inc.host_path.display()))?; - let mode = std::fs::metadata(&inc.host_path).map_or(0o644, |m| m.mode()); - if args.verbose { - eprintln!( - " including {} as {}", - inc.host_path.display(), - inc.tar_path - ); - } - tar_entries.push(TarEntry { - tar_path: inc.tar_path.clone(), - data, - mode, - }); - } - - // Include the rtld audit library so the rewriter backend can load it. - #[cfg(target_arch = "x86_64")] - { - const RTLD_AUDIT_TAR_PATH: &str = "lib/litebox_rtld_audit.so"; - if !added_tar_paths.insert(RTLD_AUDIT_TAR_PATH.to_string()) { - bail!( - "tar already contains {RTLD_AUDIT_TAR_PATH} -- \ - remove the conflicting entry or use --no-rewrite" - ); - } - tar_entries.push(TarEntry { - tar_path: RTLD_AUDIT_TAR_PATH.to_string(), - data: include_bytes!(concat!(env!("OUT_DIR"), "/litebox_rtld_audit.so")).to_vec(), - mode: 0o755, - }); - } - +/// Build the output tar and print a size summary. +fn finalize_tar(tar_entries: Vec, args: &CliArgs) -> anyhow::Result<()> { // Build tar. eprintln!("Creating {}...", args.output.display()); build_tar(&tar_entries, &args.output)?; @@ -394,17 +403,20 @@ fn finalize_tar( // Dependency discovery (via ldd) // --------------------------------------------------------------------------- +#[cfg(target_os = "linux")] struct ResolvedDep { ldd_path: PathBuf, real_path: PathBuf, } +#[cfg(target_os = "linux")] struct DepDiscoveryResult { resolved: Vec, missing: Vec, } /// Run `ldd` on the given ELF and return resolved dependencies. +#[cfg(target_os = "linux")] fn find_dependencies(elf_path: &Path, verbose: bool) -> anyhow::Result { let output = std::process::Command::new("ldd") .arg(elf_path) @@ -496,6 +508,7 @@ fn find_dependencies(elf_path: &Path, verbose: bool) -> anyhow::Result anyhow::Result> { // Fast-path: skip the rewriter entirely for non-ELF files. if data.len() < 4 || data[..4] != ELF_MAGIC { @@ -572,7 +586,13 @@ fn rewrite_elf(data: &[u8], path: &Path, verbose: bool) -> anyhow::Result Err(e).with_context(|| format!("failed to rewrite {}", path.display())), + Err(e) => { + eprintln!( + " warning: failed to rewrite {}: {e}; including as-is", + path.display() + ); + Ok(data.to_vec()) + } } } @@ -592,7 +612,7 @@ fn build_tar(entries: &[TarEntry], output: &Path) -> anyhow::Result<()> { let mut builder = Builder::new(file); for entry in entries { - let mut header = Header::new_gnu(); + let mut header = Header::new_ustar(); header.set_size(entry.data.len() as u64); // Mask to permission bits only (rwxrwxrwx). The full st_mode from // MetadataExt::mode() includes file type bits (e.g., 0o100755) which diff --git a/litebox_packager/src/main.rs b/litebox_packager/src/main.rs index 2acb1167d..01987d6e8 100644 --- a/litebox_packager/src/main.rs +++ b/litebox_packager/src/main.rs @@ -1,18 +1,8 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -// Restrict this crate to only work on Linux, as it relies on `ldd` for -// dependency discovery and other Linux-specific functionality. - -#[cfg(target_os = "linux")] fn main() -> anyhow::Result<()> { use clap::Parser as _; use litebox_packager::CliArgs; litebox_packager::run(CliArgs::parse()) } - -#[cfg(not(target_os = "linux"))] -fn main() { - eprintln!("This program is only supported on Linux"); - std::process::exit(1); -} diff --git a/litebox_packager/src/oci.rs b/litebox_packager/src/oci.rs index adb951833..ad4d172f3 100644 --- a/litebox_packager/src/oci.rs +++ b/litebox_packager/src/oci.rs @@ -7,9 +7,8 @@ //! extracts its filesystem layers into a temporary rootfs directory, then //! walks the rootfs to discover all ELF files for syscall rewriting. -use std::collections::{BTreeMap, HashSet}; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::io::Read; -use std::os::unix::fs::PermissionsExt as _; use std::path::{Path, PathBuf}; use anyhow::Context; @@ -38,6 +37,13 @@ pub struct ExtractedImage { pub config: ImageConfig, /// Raw OCI image config JSON blob (the full config descriptor data). pub config_json: Vec, + /// Symlink map from layer extraction: maps relative paths inside the + /// rootfs to their (Unix-style) link targets for cross-platform resolution. + pub symlink_map: HashMap, + /// Unix permission modes captured from tar headers during extraction. + /// Keyed by relative path inside the rootfs. Used instead of querying + /// filesystem metadata, which loses Unix mode bits on non-Unix hosts. + pub permissions: HashMap, } /// Result of scanning an extracted rootfs for files to package. @@ -96,6 +102,17 @@ pub fn pull_and_extract(image_ref: &str, verbose: bool) -> anyhow::Result anyhow::Result = Vec::new(); + let mut permissions: HashMap = HashMap::new(); for (i, layer) in image_data.layers.iter().enumerate() { if verbose { eprintln!( @@ -143,10 +162,29 @@ pub fn pull_and_extract(image_ref: &str, verbose: bool) -> anyhow::Result = symlinks + .iter() + .map(|s| (s.rel_path.clone(), s.link_target.clone())) + .collect(); + + // Materialize symlinks cross-platform: resolve chains through the in-memory + // map and copy target files (or create directories) instead of OS symlinks. + if verbose { + eprintln!(" Resolving {} symlinks...", symlinks.len()); + } + materialize_symlinks(&symlink_map, &rootfs_path, &mut permissions, verbose)?; + if verbose { eprintln!(" Rootfs extracted to {}", rootfs_path.display()); } @@ -188,6 +226,8 @@ pub fn pull_and_extract(image_ref: &str, verbose: bool) -> anyhow::Result String { /// Extract a single OCI layer (tar or tar+gzip) into the rootfs directory. /// /// Handles OCI whiteout files (`.wh.*` prefixed entries) which indicate -/// files deleted in upper layers. -fn extract_layer(data: &[u8], media_type: &str, rootfs: &Path) -> anyhow::Result<()> { +/// files deleted in upper layers. Symlinks are collected into `symlinks` for +/// cross-platform resolution after all layers are extracted. Permission modes +/// from tar headers are recorded in `permissions` for cross-platform use. +fn extract_layer( + data: &[u8], + media_type: &str, + rootfs: &Path, + symlinks: &mut Vec, + permissions: &mut HashMap, +) -> anyhow::Result<()> { // Determine if the layer is gzipped let is_gzip = media_type.contains("gzip") || is_gzip_data(data); if is_gzip { let decoder = flate2::read::GzDecoder::new(data); - extract_tar(decoder, rootfs) + extract_tar(decoder, rootfs, symlinks, permissions) } else { - extract_tar(data, rootfs) + extract_tar(data, rootfs, symlinks, permissions) } } @@ -304,22 +352,49 @@ struct DeferredHardLink { target: PathBuf, /// Source path inside the rootfs (the file the hard link points to). link_source: PathBuf, + /// Original link name from the tar header (used for permission lookup). + link_name: PathBuf, +} + +/// Tracked symlink from a container image layer. +struct DeferredSymlink { + /// Relative path inside the rootfs (e.g., `usr/lib64/ld-linux-x86-64.so.2`). + rel_path: PathBuf, + /// Symlink target as stored in the tar (Unix-style, may be relative or absolute). + link_target: PathBuf, } /// Extract a tar archive into the rootfs, handling OCI whiteout files. /// -/// Hard links whose targets appear later in the archive are collected during -/// the first pass and resolved after all regular entries have been extracted. -fn extract_tar(reader: R, rootfs: &Path) -> anyhow::Result<()> { +/// Symlinks are NOT created as OS symlinks. Instead they are tracked in +/// `symlinks` so the caller can resolve them cross-platform after all layers +/// are extracted. Hard links whose targets appear later in the archive are +/// collected during the first pass and resolved after all regular entries +/// have been extracted. Permission modes from tar headers are recorded in +/// `permissions` keyed by relative path. +fn extract_tar( + reader: R, + rootfs: &Path, + symlinks: &mut Vec, + permissions: &mut HashMap, +) -> anyhow::Result<()> { let mut archive = tar::Archive::new(reader); - archive.set_preserve_permissions(true); - archive.set_unpack_xattrs(true); + // Preserve Unix permissions and xattrs when running on Unix hosts. + // On non-Unix platforms these are no-ops or unsupported; permissions are + // tracked separately in the `permissions` HashMap from tar headers. + #[cfg(unix)] + { + archive.set_preserve_permissions(true); + archive.set_unpack_xattrs(true); + } let mut deferred_links: Vec = Vec::new(); for entry_result in archive.entries()? { let mut entry = entry_result.context("failed to read tar entry")?; - let path = entry.path()?.into_owned(); + // Normalize the path to prevent path traversal (../ and absolute paths) + // and to strip inconsistent ./ prefixes that tar entries may carry. + let path = normalize_path(&entry.path()?); let path_str = path.to_string_lossy(); // Handle OCI whiteout files @@ -340,17 +415,39 @@ fn extract_tar(reader: R, rootfs: &Path) -> anyhow::Result<()> { } } } + // Also prune in-memory symlinks under this directory so + // they are not resurrected by materialize_symlinks. + // Guard: Path::starts_with("") matches everything, so skip + // pruning when parent is empty (root-level opaque whiteout + // already cleared the filesystem above). + if parent.as_os_str().is_empty() { + symlinks.clear(); + permissions.clear(); + } else { + symlinks.retain(|s| !s.rel_path.starts_with(parent)); + // Prune permissions for files under the cleared directory. + permissions.retain(|p, _| !p.starts_with(parent)); + } } continue; } if let Some(target_name) = file_name.strip_prefix(".wh.") { // Regular whiteout: delete the specific file/directory if let Some(parent) = path.parent() { - let target = rootfs.join(parent).join(target_name); + let whiteout_rel = parent.join(target_name); + let target = rootfs.join(&whiteout_rel); if target.is_dir() { let _ = std::fs::remove_dir_all(&target); + // Prune symlinks under the removed directory. + symlinks.retain(|s| !s.rel_path.starts_with(&whiteout_rel)); + // Prune permissions under the removed directory. + permissions.retain(|p, _| !p.starts_with(&whiteout_rel)); } else { let _ = std::fs::remove_file(&target); + // Prune the exact symlink entry if present. + symlinks.retain(|s| s.rel_path != whiteout_rel); + // Prune the exact permissions entry. + permissions.remove(&whiteout_rel); } } continue; @@ -364,16 +461,18 @@ fn extract_tar(reader: R, rootfs: &Path) -> anyhow::Result<()> { std::fs::create_dir_all(parent)?; } + let entry_type = entry.header().entry_type(); + // Handle hard links: copy the link target instead of creating an OS // hard link. The tar crate's unpack() tries std::fs::hard_link which // can fail if the target hasn't been extracted yet (ordering issue), // and the litebox filesystem doesn't support hard links anyway. - let entry_type = entry.header().entry_type(); if entry_type == tar::EntryType::Link { - let link_name = entry - .link_name()? - .context("hard link entry has no link name")? - .into_owned(); + let link_name = normalize_path( + &entry + .link_name()? + .context("hard link entry has no link name")?, + ); let link_source = rootfs.join(&link_name); if link_source.exists() { std::fs::copy(&link_source, &target).with_context(|| { @@ -383,20 +482,50 @@ fn extract_tar(reader: R, rootfs: &Path) -> anyhow::Result<()> { target.display() ) })?; + // Copy permission mode from the link source. + let link_rel = normalize_path(&link_name); + if let Some(&mode) = permissions.get(&link_rel) { + permissions.insert(path.clone(), mode); + } } else { // Target hasn't been extracted yet — defer to second pass. deferred_links.push(DeferredHardLink { target, link_source, + link_name: link_name.clone(), }); } continue; } - // Normal file/directory/symlink: use the standard unpack + // Track symlinks in memory instead of creating OS symlinks. + // OS symlinks on Windows require special privileges and don't handle + // Unix-style relative paths reliably, so we resolve them ourselves + // after all layers are extracted. + if entry_type == tar::EntryType::Symlink { + let link_target = entry + .link_name()? + .context("symlink entry has no link name")? + .into_owned(); + // A later layer may override this symlink, so remove any stale + // entry with the same rel_path. + symlinks.retain(|s| s.rel_path != path); + symlinks.push(DeferredSymlink { + rel_path: path.clone(), + link_target, + }); + continue; + } + + // Normal file/directory: use the standard unpack entry .unpack(&target) .with_context(|| format!("failed to unpack entry: {path_str}"))?; + + // Record the permission mode from the tar header for cross-platform use. + if let Ok(mode) = entry.header().mode() { + permissions.insert(path.clone(), mode); + } } // Second pass: resolve deferred hard links now that all entries are extracted. @@ -412,6 +541,12 @@ fn extract_tar(reader: R, rootfs: &Path) -> anyhow::Result<()> { link.target.display() ) })?; + // Copy permission mode from the link source. + let link_rel = normalize_path(&link.link_name); + if let Some(&mode) = permissions.get(&link_rel) { + let target_rel = link.target.strip_prefix(rootfs).unwrap_or(&link.target); + permissions.insert(target_rel.to_path_buf(), mode); + } } else { // Target still doesn't exist after the full layer extraction — // this is unusual but not fatal; warn and skip. @@ -426,20 +561,241 @@ fn extract_tar(reader: R, rootfs: &Path) -> anyhow::Result<()> { Ok(()) } +/// Resolve a symlink target within the rootfs using the symlink map. +/// +/// Handles both absolute targets (e.g., `/lib/x86_64-linux-gnu/ld.so`) and +/// relative targets (e.g., `../lib/x86_64-linux-gnu/ld.so`). Follows symlink +/// chains up to `max_depth` hops. +fn resolve_symlink_in_rootfs( + rel_path: &Path, + rootfs: &Path, + symlink_map: &HashMap, + max_depth: u32, +) -> Option { + if max_depth == 0 { + return None; + } + + // Empty rel_path would resolve to the rootfs directory itself — treat + // as unresolvable to avoid accidentally matching the entire rootfs. + if rel_path.as_os_str().is_empty() { + return None; + } + + // Check if this rel_path is itself a symlink + if let Some(link_target) = symlink_map.get(rel_path) { + // Resolve the target to a new rel_path + let resolved_rel = if is_unix_absolute(link_target) { + strip_unix_root(link_target) + } else { + // Relative target: resolve from parent of the symlink + let parent = rel_path.parent().unwrap_or(Path::new("")); + normalize_path(&parent.join(link_target)) + }; + // Recurse to follow chains + return resolve_symlink_in_rootfs(&resolved_rel, rootfs, symlink_map, max_depth - 1); + } + + // Not a symlink — check if any ancestor is a symlink (e.g., `lib64/foo` where + // `lib64` → `usr/lib64`). + let components: Vec<_> = rel_path.components().collect(); + for i in 1..components.len() { + let prefix: PathBuf = components[..i].iter().collect(); + if let Some(link_target) = symlink_map.get(&prefix) { + let resolved_prefix = if is_unix_absolute(link_target) { + strip_unix_root(link_target) + } else { + let parent = prefix.parent().unwrap_or(Path::new("")); + normalize_path(&parent.join(link_target)) + }; + let suffix: PathBuf = components[i..].iter().collect(); + let new_rel = resolved_prefix.join(suffix); + return resolve_symlink_in_rootfs(&new_rel, rootfs, symlink_map, max_depth - 1); + } + } + + let host_path = rootfs.join(rel_path); + if host_path.exists() { + Some(host_path) + } else { + None + } +} + +/// Check if a path starts with `/` (Unix-style absolute). +/// +/// On Windows, `Path::is_absolute()` requires a drive letter, so Unix-style +/// paths like `/lib/foo` are not detected as absolute. This helper checks +/// the raw string instead. +fn is_unix_absolute(path: &Path) -> bool { + path.as_os_str() + .to_str() + .is_some_and(|s| s.starts_with('/')) +} + +/// Strip the leading `/` from a Unix-style absolute path to make it +/// rootfs-relative. Returns the path unchanged if it doesn't start with `/`. +fn strip_unix_root(path: &Path) -> PathBuf { + if let Some(stripped) = path.as_os_str().to_str().and_then(|s| s.strip_prefix('/')) { + return PathBuf::from(stripped); + } + path.strip_prefix("/").unwrap_or(path).to_path_buf() +} + +/// Normalize a path by resolving `.` and `..` components without touching the +/// filesystem (no symlink resolution, no existence checks). Strips any root +/// component so the result is always a relative path. +fn normalize_path(path: &Path) -> PathBuf { + let mut result = Vec::new(); + for component in path.components() { + match component { + std::path::Component::ParentDir => { + result.pop(); + } + std::path::Component::CurDir + | std::path::Component::RootDir + | std::path::Component::Prefix(_) => {} + c @ std::path::Component::Normal(_) => result.push(c), + } + } + result.iter().collect() +} + +/// Materialize all deferred symlinks by copying or creating directories. +/// +/// This is called after all OCI layers have been extracted, so every real file +/// should be on disk. Symlinks are resolved through the in-memory map (handling +/// chains like `lib64` → `usr/lib64` → real dir) and then: +/// - File symlinks: the target file is copied to the symlink location. +/// The resolved target's permission mode is also recorded for the symlink path. +/// - Directory symlinks: an empty directory is created (its contents will be +/// expanded by `scan_rootfs`'s dir-symlink logic). +fn materialize_symlinks( + symlink_map: &HashMap, + rootfs: &Path, + permissions: &mut HashMap, + verbose: bool, +) -> anyhow::Result<()> { + for (rel_path, link_target) in symlink_map { + let host_path = rootfs.join(rel_path); + if host_path.exists() { + // A later layer may have replaced the symlink with a real file. + continue; + } + + if let Some(resolved) = resolve_symlink_in_rootfs( + rel_path, + rootfs, + symlink_map, + 32, // max chain depth + ) { + if let Some(parent) = host_path.parent() { + std::fs::create_dir_all(parent)?; + } + + if resolved.is_dir() { + // Directory symlink: create directory placeholder. + // scan_rootfs will discover this is a "dir symlink" and expand + // it through the symlink_map. + std::fs::create_dir_all(&host_path)?; + if verbose { + eprintln!( + " [symlink→dir] {} -> {}", + rel_path.display(), + link_target.display() + ); + } + } else if resolved.is_file() { + std::fs::copy(&resolved, &host_path).with_context(|| { + format!( + "failed to materialize symlink {} -> {}", + rel_path.display(), + resolved.display() + ) + })?; + // Record the resolved target's permission mode for this symlink path. + let resolved_rel = resolved + .strip_prefix(rootfs) + .unwrap_or(&resolved) + .to_path_buf(); + if let Some(&mode) = permissions.get(&resolved_rel) { + permissions.insert(rel_path.clone(), mode); + } + if verbose { + eprintln!( + " [symlink→file] {} -> {}", + rel_path.display(), + link_target.display() + ); + } + } + } else if verbose { + eprintln!( + " [symlink-broken] {} -> {} (unresolvable)", + rel_path.display(), + link_target.display() + ); + } + } + + Ok(()) +} + +/// Look up the Unix permission mode for a file. +/// +/// Look up the Unix file mode for a rootfs-relative path from the OCI tar +/// header permissions map. Defaults to 0o644 if not found. +fn lookup_mode(rel_path: &Path, permissions: &HashMap) -> u32 { + if let Some(&mode) = permissions.get(rel_path) { + mode & 0o7777 + } else { + 0o644 + } +} + /// Scan an extracted rootfs directory and build a file map for packaging. /// /// Walks the rootfs directory tree and collects all regular files with their -/// paths and permission bits. Symlinks are resolved within the rootfs context -/// and flattened into regular file copies (the litebox tar RO filesystem does -/// not support symlinks). +/// paths and permission bits. After `materialize_symlinks` has been called, +/// file symlinks are already materialized as regular file copies on disk. +/// +/// `symlink_map` provides the original symlink mapping from extraction so +/// that **directory symlinks** (e.g., `lib64` → `usr/lib64`) can be expanded: +/// all files under the target directory are duplicated under the symlink's +/// path prefix so that paths like `lib64/ld-linux-x86-64.so.2` exist in the tar. /// -/// **Directory symlinks** (e.g., `/lib64` → `/usr/lib64`) are expanded: all -/// files under the target directory are duplicated under the symlink's path -/// prefix so that paths like `/lib64/ld-linux-x86-64.so.2` exist in the tar. -pub fn scan_rootfs(rootfs: &Path, verbose: bool) -> anyhow::Result { +/// `permissions` provides Unix permission modes captured from tar headers +/// during extraction, so permission bits are accurate on non-Unix hosts. +#[allow(clippy::implicit_hasher)] +pub fn scan_rootfs( + rootfs: &Path, + symlink_map: &HashMap, + permissions: &HashMap, + verbose: bool, +) -> anyhow::Result { let mut files = BTreeMap::new(); - // Collect directory symlinks to expand after the initial walk. + + // Identify directory symlinks and their resolved targets on disk. let mut dir_symlinks: Vec<(PathBuf, PathBuf)> = Vec::new(); + for (rel_path, link_target) in symlink_map { + let host_path = rootfs.join(rel_path); + if host_path.is_dir() { + // This dir symlink was materialized as an empty directory. + // Resolve the target to find the real directory to expand from. + if let Some(resolved) = + resolve_symlink_in_rootfs(rel_path, rootfs, symlink_map, 32).filter(|r| r.is_dir()) + { + if verbose { + eprintln!( + " [dir-symlink] {} -> {}", + rel_path.display(), + link_target.display() + ); + } + dir_symlinks.push((host_path, resolved)); + } + } + } for entry in walkdir::WalkDir::new(rootfs) .follow_links(false) @@ -454,10 +810,11 @@ pub fn scan_rootfs(rootfs: &Path, verbose: bool) -> anyhow::Result anyhow::Result {}", resolved.display()); } @@ -552,6 +904,8 @@ pub fn scan_rootfs(rootfs: &Path, verbose: bool) -> anyhow::Result anyhow::Result Option usr/lib64 + let r = resolve_symlink_in_rootfs(Path::new("lib64"), rootfs, &symlink_map, 32); + assert_eq!(r, Some(rootfs.join("usr/lib64"))); + + // Chain: a -> b -> c + let r = resolve_symlink_in_rootfs(Path::new("a"), rootfs, &symlink_map, 32); + assert_eq!(r, Some(rootfs.join("c"))); + + // Absolute target: bin/sh -> /usr/bin/sh + let r = resolve_symlink_in_rootfs(Path::new("bin/sh"), rootfs, &symlink_map, 32); + assert_eq!(r, Some(rootfs.join("usr/bin/sh"))); + + // Relative target: usr/lib64/libfoo.so -> ../lib/libfoo.so + let r = + resolve_symlink_in_rootfs(Path::new("usr/lib64/libfoo.so"), rootfs, &symlink_map, 32); + assert_eq!(r, Some(rootfs.join("usr/lib/libfoo.so"))); + + // Ancestor is symlink: lib64/foo.so resolves via lib64 -> usr/lib64 + let r = resolve_symlink_in_rootfs(Path::new("lib64/foo.so"), rootfs, &symlink_map, 32); + assert_eq!(r, Some(rootfs.join("usr/lib64/foo.so"))); } #[test] - fn test_resolve_in_rootfs_max_depth_zero() { - let result = resolve_in_rootfs(Path::new("/tmp"), Path::new("/tmp"), 0); - assert!(result.is_none()); + fn resolve_symlink_in_rootfs_edge_cases() { + let tmp = tempfile::tempdir().unwrap(); + let rootfs = tmp.path(); + std::fs::write(rootfs.join("hello.txt"), b"hi").unwrap(); + + // Cycle: a -> b -> a + let mut cycle_map = HashMap::new(); + cycle_map.insert(PathBuf::from("a"), PathBuf::from("b")); + cycle_map.insert(PathBuf::from("b"), PathBuf::from("a")); + assert!(resolve_symlink_in_rootfs(Path::new("a"), rootfs, &cycle_map, 32).is_none()); + + let empty_map = HashMap::new(); + + // Empty path + assert!(resolve_symlink_in_rootfs(Path::new(""), rootfs, &empty_map, 32).is_none()); + + // Nonexistent path + assert!( + resolve_symlink_in_rootfs(Path::new("does/not/exist"), rootfs, &empty_map, 32) + .is_none() + ); + + // Regular file (not a symlink) returns host path directly + let r = resolve_symlink_in_rootfs(Path::new("hello.txt"), rootfs, &empty_map, 32); + assert_eq!(r, Some(rootfs.join("hello.txt"))); } }