diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..d27a0fc --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +# Mark binary test fixtures to prevent line-ending corruption +tests/test.macho binary +tests/test.pe binary +tests/test.elf binary diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 2d5cc4f..aca843c 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -1,4 +1,8 @@ -on: [push, pull_request] +on: + push: + branches: + - main + pull_request: name: CI @@ -7,15 +11,9 @@ jobs: name: Check runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true - - uses: actions-rs/cargo@v1 - with: - command: check + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + - run: cargo check test: name: Test Suite @@ -24,30 +22,28 @@ jobs: matrix: os: [ubuntu-latest, macos-latest, windows-latest] steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true - - uses: actions-rs/cargo@v1 + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + - run: cargo test env: RUST_BACKTRACE: '1' - with: - command: test fmt: name: Rustfmt runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable with: - profile: minimal - toolchain: stable - override: true - - run: rustup component add rustfmt - - uses: actions-rs/cargo@v1 + components: rustfmt + - run: cargo fmt --all -- --check + + clippy: + name: Clippy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable with: - command: fmt - args: --all -- --check + components: clippy + - run: cargo clippy -- -D warnings diff --git a/src/elf.rs b/src/elf.rs new file mode 100644 index 0000000..40f3177 --- /dev/null +++ b/src/elf.rs @@ -0,0 +1,65 @@ +use goblin::{ + elf::{ + header::{EI_OSABI, ELFOSABI_GNU, ELFOSABI_NONE}, + Elf, + }, + Object, +}; + +use crate::{BinaryFormat, InspectDylib}; + +impl InspectDylib for Elf<'_> { + fn rpaths(&self) -> &[&str] { + if !self.runpaths.is_empty() { + &self.runpaths + } else { + &self.rpaths + } + } + + fn libraries(&self) -> Vec<&str> { + self.libraries.clone() + } + + fn interpreter(&self) -> Option<&str> { + self.interpreter + } + + /// See if two ELFs are compatible + /// + /// This compares the aspects of the ELF to see if they're compatible: + /// bit size, endianness, machine type, and operating system. + fn compatible(&self, other: &Object) -> bool { + match other { + Object::Elf(other) => { + if self.is_64 != other.is_64 { + return false; + } + if self.little_endian != other.little_endian { + return false; + } + if self.header.e_machine != other.header.e_machine { + return false; + } + let compatible_osabis = &[ + ELFOSABI_NONE, // ELFOSABI_NONE / ELFOSABI_SYSV + ELFOSABI_GNU, // ELFOSABI_GNU / ELFOSABI_LINUX + ]; + let osabi1 = self.header.e_ident[EI_OSABI]; + let osabi2 = other.header.e_ident[EI_OSABI]; + if osabi1 != osabi2 + && !compatible_osabis.contains(&osabi1) + && !compatible_osabis.contains(&osabi2) + { + return false; + } + true + } + _ => false, + } + } + + fn format(&self) -> BinaryFormat { + BinaryFormat::Elf + } +} diff --git a/src/errors.rs b/src/errors.rs index c239660..f1cc7d3 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -9,6 +9,7 @@ pub enum Error { Io(io::Error), Goblin(goblin::error::Error), LdSoConf(LdSoConfError), + UnsupportedBinary, } impl fmt::Display for Error { @@ -17,6 +18,7 @@ impl fmt::Display for Error { Error::Io(e) => e.fmt(f), Error::Goblin(e) => e.fmt(f), Error::LdSoConf(e) => e.fmt(f), + Error::UnsupportedBinary => write!(f, "Unsupported binary format"), } } } @@ -27,6 +29,7 @@ impl error::Error for Error { Error::Io(e) => e.source(), Error::Goblin(e) => e.source(), Error::LdSoConf(e) => e.source(), + Error::UnsupportedBinary => None, } } } diff --git a/src/ld_so_conf.rs b/src/ld_so_conf.rs index fbb04fb..b1ec59f 100644 --- a/src/ld_so_conf.rs +++ b/src/ld_so_conf.rs @@ -62,8 +62,7 @@ pub fn parse_ld_so_conf( if line.starts_with("#") { continue; } - if line.starts_with("include ") { - let include_path = &line[8..]; + if let Some(include_path) = line.strip_prefix("include ") { let include_path = if !include_path.starts_with('/') { let parent = path.parent().unwrap(); parent.join(include_path).display().to_string() diff --git a/src/lib.rs b/src/lib.rs index f4fe605..e4a3a1c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,6 @@ -//! Read the ELF dependency tree. +//! Read the dynamic library dependency tree. +//! +//! Supports ELF (Linux), Mach-O (macOS), and PE (Windows) binary formats. //! //! This does not work like `ldd` in that we do not execute/load code (only read //! files on disk). @@ -7,14 +9,15 @@ use std::env; use std::path::{Path, PathBuf}; use fs_err as fs; -use goblin::elf::{ - header::{EI_OSABI, ELFOSABI_GNU, ELFOSABI_NONE}, - Elf, -}; +use goblin::mach::Mach; +use goblin::Object; use memmap2::Mmap; +mod elf; mod errors; pub mod ld_so_conf; +mod macho; +mod pe; pub use errors::Error; use ld_so_conf::parse_ld_so_conf; @@ -30,10 +33,8 @@ pub struct Library { pub realpath: Option, /// The dependencies of this library. pub needed: Vec, - /// Runtime library search paths. (deprecated) - pub rpath: Vec, /// Runtime library search paths. - pub runpath: Vec, + pub rpath: Vec, } impl Library { @@ -46,16 +47,35 @@ impl Library { /// Library dependency tree #[derive(Debug, Clone)] pub struct DependencyTree { - /// The binary’s program interpreter (e.g., dynamic linker). + /// The binary's program interpreter (e.g., dynamic linker). pub interpreter: Option, - /// A list of this binary’s dynamic libraries it depends on directly. + /// A list of this binary's dynamic libraries it depends on directly. pub needed: Vec, - /// All of this binary’s dynamic libraries it uses in detail. + /// All of this binary's dynamic libraries it uses in detail. pub libraries: HashMap, - /// Runtime library search paths. (deprecated) + /// Runtime library search paths. pub rpath: Vec, +} + +/// The binary format being analyzed. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum BinaryFormat { + Elf, + MachO, + PE, +} + +trait InspectDylib { /// Runtime library search paths. - pub runpath: Vec, + fn rpaths(&self) -> &[&str]; + /// A list of this binary's dynamic libraries it depends on directly. + fn libraries(&self) -> Vec<&str>; + /// The binary's program interpreter (e.g., dynamic linker). + fn interpreter(&self) -> Option<&str>; + /// See if two dynamic libraries are compatible. + fn compatible(&self, other: &Object) -> bool; + /// The binary format of this dylib. + fn format(&self) -> BinaryFormat; } /// Library dependency analyzer @@ -64,8 +84,9 @@ pub struct DependencyAnalyzer { env_ld_paths: Vec, conf_ld_paths: Vec, additional_ld_paths: Vec, - runpaths: Vec, root: PathBuf, + /// Path to the main executable being analyzed (used for @executable_path on macOS) + executable_path: Option, } impl Default for DependencyAnalyzer { @@ -74,6 +95,9 @@ impl Default for DependencyAnalyzer { } } +/// Extracted library info: (rpaths, needed library names). +type LibInfo = (Vec, Vec); + impl DependencyAnalyzer { /// Create a new dependency analyzer. pub fn new(root: PathBuf) -> DependencyAnalyzer { @@ -81,8 +105,8 @@ impl DependencyAnalyzer { env_ld_paths: Vec::new(), conf_ld_paths: Vec::new(), additional_ld_paths: Vec::new(), - runpaths: Vec::new(), root, + executable_path: None, } } @@ -104,63 +128,165 @@ impl DependencyAnalyzer { self } - fn read_rpath_runpath( - &self, - elf: &Elf, - path: &Path, - ) -> Result<(Vec, Vec), Error> { + /// Read and resolve rpaths from a parsed binary. + /// + /// For ELF: rpaths go through `parse_ld_paths` which handles `$ORIGIN` expansion. + /// For MachO: rpaths may contain `@executable_path` or `@loader_path` which are + /// resolved relative to the given `path` (the binary that contains the rpaths). + /// `@rpath` entries within rpaths don't make sense and are kept as-is. + fn read_rpath(&self, lib: &impl InspectDylib, path: &Path) -> Result, Error> { let mut rpaths = Vec::new(); - let mut runpaths = Vec::new(); - for runpath in &elf.runpaths { - if let Ok(ld_paths) = self.parse_ld_paths(runpath, path) { - runpaths = ld_paths; - } - } - for rpath in &elf.rpaths { - if let Ok(ld_paths) = self.parse_ld_paths(rpath, path) { - rpaths = ld_paths; + for rpath in lib.rpaths() { + if lib.format() == BinaryFormat::Elf { + if let Ok(ld_paths) = self.parse_ld_paths(rpath, path) { + rpaths = ld_paths; + } + } else { + // For MachO, rpaths may contain @executable_path or @loader_path. + // These are resolved here so that when we later use these rpaths + // for @rpath/ library name resolution, they are already absolute. + // Example: rpath = "@loader_path/../Frameworks" with loader at + // /app/Contents/MacOS/binary → resolves to /app/Contents/Frameworks + let resolved = self.resolve_macho_path(rpath, path); + if let Some(resolved) = resolved { + rpaths.push(resolved.display().to_string()); + } else { + rpaths.push(rpath.to_string()); + } } } - Ok((rpaths, runpaths)) + Ok(rpaths) } /// Analyze the given binary. pub fn analyze(mut self, path: impl AsRef) -> Result { let path = path.as_ref(); - self.load_ld_paths(path)?; + self.executable_path = Some(path.to_path_buf()); let file = fs::File::open(path)?; // SAFETY: The file is memory-mapped read-only and we only perform read operations // on the mapped bytes. We do not prevent other processes from modifying the file // concurrently; such external modification is accepted as a risk for this tool. - // - // Note: The file handle does not need to remain open after mapping on POSIX systems, - // but we keep it in scope until parsing is complete for clarity and portability. let bytes = unsafe { Mmap::map(&file)? }; - let elf = Elf::parse(&bytes)?; - - let (mut rpaths, runpaths) = self.read_rpath_runpath(&elf, path)?; - if !runpaths.is_empty() { - // If both RPATH and RUNPATH are set, only the latter is used. - rpaths = Vec::new(); - } - self.runpaths = runpaths.clone(); - self.runpaths.extend(rpaths.clone()); + let dep_tree = match Object::parse(&bytes)? { + Object::Elf(elf) => { + self.load_elf_paths(path)?; + self.analyze_dylib(path, elf)? + } + Object::Mach(mach) => { + self.load_macho_paths(path)?; + match mach { + Mach::Fat(fat) => { + // Fat/universal binaries contain multiple architecture slices + // (e.g., x86_64 + arm64). We select the best matching architecture: + // prefer the native arch of the host, otherwise take the first one. + let mut best = None; + for arch in fat.into_iter() { + if let Ok(goblin::mach::SingleArch::MachO(macho)) = arch { + let is_native = { + #[cfg(target_arch = "x86_64")] + { + macho.header.cputype + == goblin::mach::cputype::CPU_TYPE_X86_64 + } + #[cfg(target_arch = "aarch64")] + { + macho.header.cputype + == goblin::mach::cputype::CPU_TYPE_ARM64 + } + #[cfg(not(any( + target_arch = "x86_64", + target_arch = "aarch64" + )))] + { + false + } + }; + if is_native { + best = Some(macho); + break; + } + if best.is_none() { + best = Some(macho); + } + } + } + match best { + Some(macho) => self.analyze_dylib(path, macho)?, + None => return Err(Error::UnsupportedBinary), + } + } + Mach::Binary(macho) => self.analyze_dylib(path, macho)?, + } + } + Object::PE(pe) => { + self.load_pe_paths(path)?; + self.analyze_dylib(path, pe)? + } + _ => return Err(Error::UnsupportedBinary), + }; + Ok(dep_tree) + } - let needed: Vec = elf.libraries.iter().map(ToString::to_string).collect(); + fn analyze_dylib( + &mut self, + path: &Path, + dylib: impl InspectDylib, + ) -> Result { + let rpaths = self.read_rpath(&dylib, path)?; + let needed: Vec = dylib.libraries().iter().map(ToString::to_string).collect(); let mut libraries = HashMap::new(); - let mut stack = needed.clone(); - while let Some(lib_name) = stack.pop() { + // Dependency resolution stack. Each entry carries: + // - lib_name: the library to resolve (e.g., "libfoo.dylib" or "@rpath/libbar.dylib") + // - loader_path: path of the binary that imports this library, used to resolve + // @loader_path on macOS. For direct deps this is the main binary; for transitive + // deps it's the intermediate library that depends on this one. + // - lib_rpaths: rpaths from the importing binary, used to resolve @rpath/ prefixes. + // Each library has its own rpaths (from LC_RPATH load commands on macOS, or + // DT_RPATH/DT_RUNPATH on ELF). When resolving a library's own dependencies, + // we use *that library's* rpaths, not the top-level binary's rpaths. + let mut stack: Vec<(String, PathBuf, Vec)> = needed + .iter() + .map(|n| (n.clone(), path.to_path_buf(), rpaths.clone())) + .collect(); + + while let Some((lib_name, loader_path, current_rpaths)) = stack.pop() { if libraries.contains_key(&lib_name) { continue; } - let library = self.find_library(&elf, &lib_name)?; - libraries.insert(lib_name, library.clone()); - stack.extend(library.needed); + + // API set DLLs (api-ms-win-*, ext-ms-win-*) are virtual DLLs that Windows + // resolves at runtime through an API set schema mapping. They never exist as + // real files on disk. We record them as not-found and skip dependency + // resolution to avoid pointless (and expensive) filesystem searches. + // See: https://learn.microsoft.com/en-us/windows/win32/apiindex/windows-apisets + if dylib.format() == BinaryFormat::PE && is_api_set_dll(&lib_name) { + libraries.insert(lib_name.clone(), not_found_library(&lib_name)); + continue; + } + + let library = self.find_library(&dylib, &lib_name, &loader_path, ¤t_rpaths)?; + + // For transitive dependency resolution, use the *found library's* path as the + // loader_path and its rpaths for @rpath/ resolution. This ensures that: + // - @loader_path in a transitive dep resolves relative to the intermediate + // library, not the top-level binary + // - @rpath uses the intermediate library's LC_RPATH entries, not the top-level's + let dep_loader = library.realpath.as_ref().unwrap_or(&library.path).clone(); + let dep_rpaths = library.rpath.clone(); + let dep_needed: Vec = library.needed.clone(); + + libraries.insert(lib_name, library); + + for needed_name in dep_needed { + if !libraries.contains_key(&needed_name) { + stack.push((needed_name, dep_loader.clone(), dep_rpaths.clone())); + } + } } - let interpreter = elf.interpreter.map(|interp| interp.to_string()); + let interpreter = dylib.interpreter().map(|interp| interp.to_string()); if let Some(ref interp) = interpreter { if !libraries.contains_key(interp) { let interp_path = self.root.join(interp.strip_prefix('/').unwrap_or(interp)); @@ -178,7 +304,6 @@ impl DependencyAnalyzer { realpath: interp_realpath, needed: Vec::new(), rpath: Vec::new(), - runpath: Vec::new(), }, ); } @@ -188,22 +313,26 @@ impl DependencyAnalyzer { needed, libraries, rpath: rpaths, - runpath: runpaths, }; Ok(dep_tree) } - /// Parse the colon-delimited list of paths and apply ldso rules - fn parse_ld_paths(&self, ld_path: &str, elf_path: &Path) -> Result, Error> { + // ---- ELF-specific path loading ---- + + /// Parse the colon-delimited list of paths and apply ldso rules (ELF-specific). + /// + /// Handles `$ORIGIN` / `${ORIGIN}` expansion (replaced with the directory of the + /// binary that contains the rpath) and root-relative path resolution. + fn parse_ld_paths(&self, ld_path: &str, dylib_path: &Path) -> Result, Error> { let mut paths = Vec::new(); for path in ld_path.split(':') { let normpath = if path.is_empty() { // The ldso treats empty paths as the current directory env::current_dir() } else if path.contains("$ORIGIN") || path.contains("${ORIGIN}") { - let elf_path = fs::canonicalize(elf_path)?; - let elf_dir = elf_path.parent().expect("no parent"); - let replacement = elf_dir.to_str().unwrap(); + let dylib_path = fs::canonicalize(dylib_path)?; + let dylib_dir = dylib_path.parent().expect("no parent"); + let replacement = dylib_dir.to_str().unwrap(); let path = path .replace("${ORIGIN}", replacement) .replace("$ORIGIN", replacement); @@ -218,11 +347,11 @@ impl DependencyAnalyzer { Ok(paths) } - fn load_ld_paths(&mut self, elf_path: &Path) -> Result<(), Error> { + fn load_elf_paths(&mut self, _dylib_path: &Path) -> Result<(), Error> { #[cfg(unix)] if let Ok(env_ld_path) = env::var("LD_LIBRARY_PATH") { if self.root == Path::new("/") { - self.env_ld_paths = self.parse_ld_paths(&env_ld_path, elf_path)?; + self.env_ld_paths = self.parse_ld_paths(&env_ld_path, _dylib_path)?; } } // Load all the paths from a ldso config file @@ -233,17 +362,18 @@ impl DependencyAnalyzer { let root_str = self.root.display().to_string(); let root_str = root_str.strip_suffix("/").unwrap_or(&root_str); let pattern = format!("{}/etc/ld-musl-*.path", root_str); - for entry in glob::glob(&pattern).expect("invalid glob pattern") { - if let Ok(entry) = entry { - let content = fs::read_to_string(&entry)?; - for line in content.lines() { - let line_stripped = line.trim(); - if !line_stripped.is_empty() { - self.conf_ld_paths - .push(root_str.to_string() + line_stripped); - } + if let Some(entry) = glob::glob(&pattern) + .expect("invalid glob pattern") + .flatten() + .next() + { + let content = fs::read_to_string(&entry)?; + for line in content.lines() { + let line_stripped = line.trim(); + if !line_stripped.is_empty() { + self.conf_ld_paths + .push(root_str.to_string() + line_stripped); } - break; } } // default ld paths @@ -270,61 +400,522 @@ impl DependencyAnalyzer { Ok(()) } - /// Try to locate a `lib` that is compatible to `elf` - fn find_library(&self, elf: &Elf, lib: &str) -> Result { - for lib_path in self - .runpaths + // ---- MachO-specific path loading ---- + + /// Load macOS-specific library search paths. + /// + /// macOS dyld search order (simplified): + /// 1. `DYLD_LIBRARY_PATH` — searched first using leaf filename only + /// 2. rpaths — for `@rpath/` prefixed install names, each LC_RPATH entry is tried + /// 3. The library's install name path — absolute or `@executable_path`/`@loader_path` + /// 4. `DYLD_FALLBACK_LIBRARY_PATH` — defaults to `~/lib:/usr/local/lib:/lib:/usr/lib` + /// + /// References: + /// - + /// - + fn load_macho_paths(&mut self, _dylib_path: &Path) -> Result<(), Error> { + // DYLD_LIBRARY_PATH: searched before everything else, using leaf filename only. + // This is intentionally not gated on root == "/" because it's commonly used + // for testing and development overrides. + if let Ok(dyld_lib_path) = env::var("DYLD_LIBRARY_PATH") { + for path in dyld_lib_path.split(':') { + if !path.is_empty() { + self.env_ld_paths.push(path.to_string()); + } + } + } + // DYLD_FALLBACK_LIBRARY_PATH: searched after rpaths and install name. + // If not set, macOS dyld uses a default set of fallback directories. + match env::var("DYLD_FALLBACK_LIBRARY_PATH") { + Ok(fallback_path) => { + for path in fallback_path.split(':') { + if !path.is_empty() { + self.conf_ld_paths.push(path.to_string()); + } + } + } + Err(_) => { + // Default fallback paths per dyld behavior + if let Ok(home) = env::var("HOME") { + self.conf_ld_paths.push(format!("{}/lib", home)); + } + let root_str = self.root.display().to_string(); + let root_str = root_str.strip_suffix('/').unwrap_or(&root_str); + self.conf_ld_paths + .push(format!("{}/usr/local/lib", root_str)); + self.conf_ld_paths.push(format!("{}/lib", root_str)); + self.conf_ld_paths.push(format!("{}/usr/lib", root_str)); + } + } + self.conf_ld_paths.dedup(); + Ok(()) + } + + /// Resolve a macOS install name path variable. + /// + /// macOS uses three special prefixes in library install names and rpaths: + /// - `@executable_path/` — the directory of the main executable (set once at analyze time) + /// - `@loader_path/` — the directory of the Mach-O binary that contains the load command. + /// This changes for each binary in the dependency chain: when A loads B which loads C, + /// `@loader_path` for C's resolution is B's directory, not A's. + /// - `@rpath/` — a search variable; the remainder is appended to each LC_RPATH entry. + /// Returns None because the caller must iterate over rpaths to resolve it. + fn resolve_macho_path(&self, path: &str, loader_path: &Path) -> Option { + if let Some(rest) = path.strip_prefix("@executable_path/") { + let exe_dir = self + .executable_path + .as_ref() + .and_then(|p| p.parent()) + .unwrap_or(Path::new(".")); + Some(exe_dir.join(rest)) + } else if let Some(rest) = path.strip_prefix("@loader_path/") { + let loader_dir = loader_path.parent().unwrap_or(Path::new(".")); + Some(loader_dir.join(rest)) + } else if path.starts_with("@rpath/") { + // @rpath must be resolved by iterating rpaths — return None to signal this + None + } else { + // Absolute or relative path — use as-is + Some(PathBuf::from(path)) + } + } + + // ---- PE-specific path loading ---- + + /// Load Windows PE-specific library search paths. + /// + /// Windows DLL search order (Standard Search Order for Desktop Applications): + /// 1. The directory from which the application loaded + /// 2. The system directory (e.g., `C:\Windows\System32`) + /// 3. The 16-bit system directory (e.g., `C:\Windows\System`) + /// 4. The Windows directory (e.g., `C:\Windows`) + /// 5. The current directory + /// 6. Directories listed in the `PATH` environment variable + /// + /// References: + /// - + /// - + fn load_pe_paths(&mut self, dylib_path: &Path) -> Result<(), Error> { + let root_str = self.root.display().to_string(); + let root_str = root_str.strip_suffix('/').unwrap_or(&root_str); + let root_str = root_str.strip_suffix('\\').unwrap_or(root_str); + + // 1. Application directory + if let Some(app_dir) = dylib_path.parent() { + self.env_ld_paths.push(app_dir.display().to_string()); + } + + // 2-4. System directories (relative to root) + // On 64-bit Windows, System32 contains 64-bit DLLs and SysWOW64 contains + // 32-bit DLLs. When a 32-bit process accesses System32, Windows transparently + // redirects to SysWOW64 (the "WoW64 File System Redirector"). Since we don't + // emulate this redirector, we include both directories and rely on the + // compatible() check to select the correct architecture. + // + // References: + // - https://learn.microsoft.com/en-us/windows/win32/winprog64/file-system-redirector + // - delvewheel's _translate_directory() handles System32 ↔ SysWOW64 ↔ Sysnative + for sys_dir in &[ + "Windows/System32", + "Windows/SysWOW64", + "Windows/System", + "Windows", + "windows/system32", + "windows/syswow64", + "windows/system", + "windows", + // Wine-style paths + "drive_c/windows/system32", + "drive_c/windows/syswow64", + "drive_c/windows", + ] { + let full_path = format!("{}/{}", root_str, sys_dir); + if Path::new(&full_path).is_dir() { + self.conf_ld_paths.push(full_path); + } + } + + // 5-6. Current directory and PATH environment variable. + // Only use these when analyzing against the real filesystem root, + // since they contain absolute paths that don't make sense with a + // custom sysroot. This mirrors how ELF only uses LD_LIBRARY_PATH + // when root is "/". + #[cfg(windows)] + { + let is_system_root = self.root == Path::new("/") + || self.root == Path::new("\\") + || self + .root + .to_str() + .is_some_and(|s| s.len() <= 3 && s.contains(':')); + if is_system_root { + if let Ok(cwd) = env::current_dir() { + self.conf_ld_paths.push(cwd.display().to_string()); + } + if let Ok(path_env) = env::var("PATH") { + for path in path_env.split(';') { + if !path.is_empty() { + self.conf_ld_paths.push(path.to_string()); + } + } + } + } + } + + self.conf_ld_paths.dedup(); + Ok(()) + } + + // ---- Library finding ---- + + /// Try to locate a `lib_name` that is compatible to `dylib`. + /// + /// Dispatches to format-specific find logic based on the binary format. + /// `loader_path` and `rpaths` provide per-dependency context for MachO/ELF + /// resolution (see `analyze_dylib` for how they are threaded through the + /// dependency graph). + fn find_library( + &self, + dylib: &impl InspectDylib, + lib_name: &str, + loader_path: &Path, + rpaths: &[String], + ) -> Result { + match dylib.format() { + BinaryFormat::MachO => self.find_macho_library(dylib, lib_name, loader_path, rpaths), + BinaryFormat::PE => self.find_pe_library(dylib, lib_name), + BinaryFormat::Elf => self.find_elf_library(dylib, lib_name, rpaths), + } + } + + /// Try to locate an ELF library. + /// + /// Search order: rpaths, `LD_LIBRARY_PATH`, `ld.so.conf` paths, additional paths. + fn find_elf_library( + &self, + dylib: &impl InspectDylib, + lib_name: &str, + rpaths: &[String], + ) -> Result { + let candidates: Vec = rpaths .iter() .chain(self.env_ld_paths.iter()) .chain(self.conf_ld_paths.iter()) .map(|ld_path| { self.root .join(ld_path.strip_prefix('/').unwrap_or(ld_path)) - .join(lib) + .join(lib_name) }) .chain( self.additional_ld_paths .iter() - .map(|ld_path| ld_path.join(lib)), + .map(|ld_path| ld_path.join(lib_name)), ) - { - // FIXME: readlink to get real path - if lib_path.exists() { - let file = fs::File::open(&lib_path)?; - // SAFETY: The file is memory-mapped read-only and we only perform read operations - // on the mapped bytes. We do not prevent other processes from modifying the file - // concurrently; such external modification is accepted as a risk for this tool. - // - // Note: The file handle does not need to remain open after mapping on POSIX systems, - // but we keep it in scope until parsing is complete for clarity and portability. - let bytes = unsafe { Mmap::map(&file)? }; - if let Ok(lib_elf) = Elf::parse(&bytes) { - if compatible_elfs(elf, &lib_elf) { - let needed = lib_elf.libraries.iter().map(ToString::to_string).collect(); - let (rpath, runpath) = self.read_rpath_runpath(&lib_elf, &lib_path)?; - return Ok(Library { - name: lib.to_string(), - path: lib_path.to_path_buf(), - realpath: fs::canonicalize(lib_path).ok(), - needed, - rpath, - runpath, - }); + .collect(); + self.try_library_candidates(dylib, lib_name, &candidates) + } + + /// Try to locate a Mach-O library. + /// + /// Handles `@rpath/`, `@loader_path/`, `@executable_path/` prefixes. + /// + /// Search order: + /// 1. `DYLD_LIBRARY_PATH` (leaf filename only) + /// 2. `@rpath` expansion — each rpath from the *depending library* is tried + /// 3. `@executable_path` / `@loader_path` resolution, or direct absolute path + /// 4. `DYLD_FALLBACK_LIBRARY_PATH` (leaf filename only) + /// 5. Additional user-provided paths + /// + /// The `rpaths` parameter contains the rpaths from the library that depends on + /// `lib_name`, NOT the top-level binary. This is critical for transitive deps: + /// if A (rpaths=[/a/lib]) depends on B (rpaths=[/b/lib]) which depends on + /// `@rpath/libC.dylib`, we search /b/lib (B's rpaths), not /a/lib (A's rpaths). + fn find_macho_library( + &self, + dylib: &impl InspectDylib, + lib_name: &str, + loader_path: &Path, + rpaths: &[String], + ) -> Result { + let mut candidates: Vec = Vec::new(); + + // Extract the leaf filename for searching flat directories. + // Install names like "/usr/lib/libSystem.B.dylib" → "libSystem.B.dylib" + let file_name = Path::new(lib_name) + .file_name() + .unwrap_or_default() + .to_str() + .unwrap_or(lib_name); + + // 1. DYLD_LIBRARY_PATH — searched first, using just the leaf filename + for path in &self.env_ld_paths { + candidates.push(PathBuf::from(path).join(file_name)); + } + + // 2-3. Handle @-prefixed install names + if let Some(rest) = lib_name.strip_prefix("@rpath/") { + // @rpath/foo.dylib → try each rpath directory with the suffix. + // rpaths come from the *depending* library, already resolved by read_rpath + // (so @loader_path/@executable_path within rpaths are already expanded). + for rpath in rpaths { + candidates.push(PathBuf::from(rpath).join(rest)); + } + // Fallback: also try the @rpath suffix (not just the leaf filename) against + // DYLD_FALLBACK_LIBRARY_PATH. This matches delocate's behavior of appending + // /usr/local/lib and /usr/lib as fallback search directories for @rpath + // resolution. For @rpath/subdir/libfoo.dylib this correctly tries + // /usr/local/lib/subdir/libfoo.dylib rather than just /usr/local/lib/libfoo.dylib. + for path in &self.conf_ld_paths { + candidates.push(PathBuf::from(path).join(rest)); + } + for path in &self.additional_ld_paths { + candidates.push(path.join(rest)); + } + } else if let Some(resolved) = self.resolve_macho_path(lib_name, loader_path) { + // @executable_path/..., @loader_path/..., or absolute path. + // For absolute paths, also probe through the sysroot so that a custom + // root (e.g., cross-compilation SDK) is searched instead of / on the host. + if resolved.is_absolute() { + if let Ok(relative) = resolved.strip_prefix("/") { + let sysroot_path = self.root.join(relative); + if sysroot_path != resolved { + candidates.push(sysroot_path); } } } + candidates.push(resolved); + + // 4. DYLD_FALLBACK_LIBRARY_PATH — for non-@rpath install names, search + // using the leaf filename (the path-less library name portion). + for path in &self.conf_ld_paths { + candidates.push(PathBuf::from(path).join(file_name)); + } + + // 5. Additional user-provided paths + for path in &self.additional_ld_paths { + candidates.push(path.join(file_name)); + } + } + + self.try_library_candidates(dylib, lib_name, &candidates) + } + + /// Try to locate a PE library (DLL). + /// + /// Uses case-insensitive filename matching because Windows filesystems are + /// case-insensitive but this tool may run on a case-sensitive filesystem + /// (e.g., Linux analyzing a Windows sysroot). Without this, a PE importing + /// "KERNEL32.dll" would fail to match a file named "kernel32.dll". + /// + /// Search order: + /// 1. Application directory (from `env_ld_paths`) + /// 2. System directories (from `conf_ld_paths`) + /// 3. `PATH` directories (from `conf_ld_paths`) + /// 4. Additional user-provided paths + fn find_pe_library(&self, dylib: &impl InspectDylib, lib_name: &str) -> Result { + let search_dirs = self + .env_ld_paths + .iter() + .chain(self.conf_ld_paths.iter()) + .map(|s| Path::new(s.as_str()).to_path_buf()) + .chain(self.additional_ld_paths.iter().cloned()); + for dir in search_dirs { + if let Some(lib_path) = find_file_case_insensitive(&dir, lib_name) { + if let Some(lib) = self.try_single_candidate(dylib, lib_name, &lib_path)? { + return Ok(lib); + } + } + } + Ok(not_found_library(lib_name)) + } + + /// Try a list of candidate paths and return the first compatible library found. + /// + /// Used by ELF and MachO library finding, which generate candidate paths directly. + fn try_library_candidates( + &self, + dylib: &impl InspectDylib, + lib_name: &str, + candidates: &[PathBuf], + ) -> Result { + for lib_path in candidates { + if !lib_path.exists() { + continue; + } + if let Some(lib) = self.try_single_candidate(dylib, lib_name, lib_path)? { + return Ok(lib); + } + } + Ok(not_found_library(lib_name)) + } + + /// Check if a parsed binary is compatible with the main binary and extract + /// its rpaths and needed libraries. + fn check_compatible( + &self, + dylib: &impl InspectDylib, + lib: &impl InspectDylib, + obj: &Object, + lib_path: &Path, + ) -> Result, Error> { + if dylib.compatible(obj) { + Ok(Some(( + self.read_rpath(lib, lib_path)?, + lib.libraries().iter().map(ToString::to_string).collect(), + ))) + } else { + Ok(None) + } + } + + /// Try to parse a single candidate file and check compatibility. + /// + /// Opens the file, memory-maps it, parses the binary format, checks that it is + /// compatible with the main binary, and extracts rpaths and needed libraries. + /// + /// For fat/universal Mach-O binaries, iterates through architecture slices to find + /// one that is compatible with the main binary. This is important because dependent + /// libraries on macOS are often distributed as universal binaries containing + /// multiple architectures (e.g., x86_64 + arm64), and we need to pick the right + /// slice to extract the correct rpaths and dependency list. + fn try_single_candidate( + &self, + dylib: &impl InspectDylib, + lib_name: &str, + lib_path: &Path, + ) -> Result, Error> { + let file = match fs::File::open(lib_path) { + Ok(f) => f, + Err(_) => return Ok(None), + }; + // SAFETY: The file is memory-mapped read-only and we only perform read operations + // on the mapped bytes. + let bytes = match unsafe { Mmap::map(&file) } { + Ok(m) => m, + Err(_) => return Ok(None), + }; + let obj = match Object::parse(&bytes) { + Ok(o) => o, + Err(_) => return Ok(None), + }; + + let info = match obj { + Object::Elf(ref elf) => self.check_compatible(dylib, elf, &obj, lib_path)?, + Object::Mach(ref mach) => match mach { + Mach::Fat(ref fat) => { + // Fat/universal Mach-O: iterate through architecture slices to find + // one that is compatible with the main binary. We construct a + // temporary Object for each slice to reuse the compatible() trait + // method, which checks cputype, bitness, and endianness. + // + // MultiArch re-parses from the underlying byte buffer on each + // iteration, so the fat binary can be iterated multiple times. + let mut found = None; + for arch in fat.into_iter() { + if let Ok(goblin::mach::SingleArch::MachO(inner)) = arch { + // Wrap in Object to reuse compatible(), then unwrap to + // extract rpaths/libraries from the matched architecture. + let inner_obj = Object::Mach(Mach::Binary(inner)); + if dylib.compatible(&inner_obj) { + let Object::Mach(Mach::Binary(ref macho)) = inner_obj else { + unreachable!() + }; + found = Some(( + self.read_rpath(macho, lib_path)?, + macho.libraries().iter().map(ToString::to_string).collect(), + )); + break; + } + } + } + found + } + Mach::Binary(ref macho) => self.check_compatible(dylib, macho, &obj, lib_path)?, + }, + Object::PE(ref pe) => self.check_compatible(dylib, pe, &obj, lib_path)?, + _ => None, + }; + + if let Some((rpath, needed)) = info { + Ok(Some(Library { + name: lib_name.to_string(), + path: lib_path.to_path_buf(), + realpath: fs::canonicalize(lib_path).ok(), + needed, + rpath, + })) + } else { + Ok(None) } - Ok(Library { - name: lib.to_string(), - path: PathBuf::from(lib), - realpath: None, - needed: Vec::new(), - rpath: Vec::new(), - runpath: Vec::new(), - }) } } +/// Create a not-found library entry. +/// +/// Used when a library cannot be located on disk (or is a virtual library like +/// Windows API sets). The library is recorded with `realpath: None` so callers +/// can detect it via `Library::found()`. +fn not_found_library(lib_name: &str) -> Library { + Library { + name: lib_name.to_string(), + path: PathBuf::from(lib_name), + realpath: None, + needed: Vec::new(), + rpath: Vec::new(), + } +} + +/// Check if a DLL name is a Windows API set. +/// +/// API sets (e.g., `api-ms-win-crt-runtime-l1-1-0.dll`) and extension API sets +/// (e.g., `ext-ms-win-ntuser-draw-l1-1-0.dll`) are virtual DLL names that Windows +/// resolves to real host DLLs at runtime via an API set schema. They never exist +/// as files on disk. Trying to locate them is pointless and expensive. +/// +/// References: +/// - +/// - delvewheel uses `re.compile('api-.*')` to skip these +fn is_api_set_dll(name: &str) -> bool { + let lower = name.to_lowercase(); + lower.starts_with("api-") || lower.starts_with("ext-ms-") +} + +/// Find a file in a directory using case-insensitive name matching. +/// +/// Windows filesystems (NTFS, FAT32) are case-insensitive: `KERNEL32.dll`, +/// `kernel32.dll`, and `Kernel32.DLL` all refer to the same file. However, when +/// analyzing a Windows sysroot on a case-sensitive filesystem (e.g., Linux ext4), +/// an exact-case lookup for `KERNEL32.dll` will fail if the file is stored as +/// `kernel32.dll`. This function handles that mismatch by falling back to a +/// directory scan with case-insensitive comparison when the exact match fails. +fn find_file_case_insensitive(dir: &Path, name: &str) -> Option { + // Fast path: try exact match first (also handles case-insensitive filesystems + // like macOS HFS+ and Windows NTFS natively) + let exact = dir.join(name); + if exact.is_file() { + return Some(exact); + } + // Slow path: scan directory entries for case-insensitive match. + // This is O(n) in the number of directory entries, but only runs when the + // exact match fails (i.e., on case-sensitive filesystems with case mismatches). + let name_lower = name.to_lowercase(); + let entries = match std::fs::read_dir(dir) { + Ok(e) => e, + Err(_) => return None, + }; + for entry in entries.flatten() { + if let Some(file_name) = entry.file_name().to_str() { + if file_name.to_lowercase() == name_lower { + let path = entry.path(); + if path.is_file() { + return Some(path); + } + } + } + } + None +} + /// Find musl libc path fn find_musl_libc() -> Result, Error> { match glob::glob("/lib/libc.musl-*.so.1") @@ -336,31 +927,18 @@ fn find_musl_libc() -> Result, Error> { } } -/// See if two ELFs are compatible -/// -/// This compares the aspects of the ELF to see if they're compatible: -/// bit size, endianness, machine type, and operating system. -fn compatible_elfs(elf1: &Elf, elf2: &Elf) -> bool { - if elf1.is_64 != elf2.is_64 { - return false; - } - if elf1.little_endian != elf2.little_endian { - return false; - } - if elf1.header.e_machine != elf2.header.e_machine { - return false; - } - let compatible_osabis = &[ - ELFOSABI_NONE, // ELFOSABI_NONE / ELFOSABI_SYSV - ELFOSABI_GNU, // ELFOSABI_GNU / ELFOSABI_LINUX - ]; - let osabi1 = elf1.header.e_ident[EI_OSABI]; - let osabi2 = elf2.header.e_ident[EI_OSABI]; - if osabi1 != osabi2 - && !compatible_osabis.contains(&osabi1) - && !compatible_osabis.contains(&osabi2) - { - return false; +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_is_api_set_dll() { + assert!(is_api_set_dll("api-ms-win-crt-runtime-l1-1-0.dll")); + assert!(is_api_set_dll("api-ms-win-core-synch-l1-2-0.dll")); + assert!(is_api_set_dll("API-MS-WIN-CRT-STDIO-L1-1-0.DLL")); + assert!(is_api_set_dll("ext-ms-win-ntuser-draw-l1-1-0.dll")); + assert!(!is_api_set_dll("KERNEL32.dll")); + assert!(!is_api_set_dll("vcruntime140.dll")); + assert!(!is_api_set_dll("libSystem.B.dylib")); } - true } diff --git a/src/macho.rs b/src/macho.rs new file mode 100644 index 0000000..979a3ea --- /dev/null +++ b/src/macho.rs @@ -0,0 +1,61 @@ +use goblin::{ + mach::{Mach, MachO}, + Object, +}; + +use crate::{BinaryFormat, InspectDylib}; + +impl InspectDylib for MachO<'_> { + fn rpaths(&self) -> &[&str] { + &self.rpaths + } + + fn libraries(&self) -> Vec<&str> { + // goblin always add `self` or dylib id as a needed library, so we need to remove it, see + // https://github.com/m4b/goblin/blob/6fdaffdc411bacd5dd7095dc93cec66302ca2575/src/mach/mod.rs#L174 + // https://github.com/m4b/goblin/blob/6fdaffdc411bacd5dd7095dc93cec66302ca2575/src/mach/mod.rs#L231-L235 + if self.libs.len() <= 1 { + Vec::new() + } else { + self.libs[1..].to_vec() + } + } + + fn interpreter(&self) -> Option<&str> { + None + } + + fn compatible(&self, other: &Object) -> bool { + match other { + Object::Mach(mach) => match mach { + Mach::Fat(fat) => { + for macho in fat { + if let Ok(goblin::mach::SingleArch::MachO(macho)) = macho { + if self.compatible(&Object::Mach(Mach::Binary(macho))) { + return true; + } + } + } + false + } + Mach::Binary(macho) => { + if self.is_64 != macho.is_64 { + return false; + } + if self.little_endian != macho.little_endian { + return false; + } + if self.header.cputype != macho.header.cputype { + return false; + } + true + } + }, + _ => false, + } + } + + fn format(&self) -> BinaryFormat { + BinaryFormat::MachO + } +} diff --git a/src/pe.rs b/src/pe.rs new file mode 100644 index 0000000..5378759 --- /dev/null +++ b/src/pe.rs @@ -0,0 +1,36 @@ +use goblin::{pe::PE, Object}; + +use crate::{BinaryFormat, InspectDylib}; + +impl InspectDylib for PE<'_> { + fn rpaths(&self) -> &[&str] { + &[] + } + + fn libraries(&self) -> Vec<&str> { + self.libraries.clone() + } + + fn interpreter(&self) -> Option<&str> { + None + } + + fn compatible(&self, other: &Object) -> bool { + match other { + Object::PE(pe) => { + if self.is_64 != pe.is_64 { + return false; + } + if self.header.coff_header.machine != pe.header.coff_header.machine { + return false; + } + true + } + _ => false, + } + } + + fn format(&self) -> BinaryFormat { + BinaryFormat::PE + } +} diff --git a/tests/test.macho b/tests/test.macho new file mode 100755 index 0000000..5ce5eac Binary files /dev/null and b/tests/test.macho differ diff --git a/tests/test.pe b/tests/test.pe new file mode 100644 index 0000000..5d0f29d Binary files /dev/null and b/tests/test.pe differ diff --git a/tests/test_lddtree.rs b/tests/test_lddtree.rs index 20860a0..0f4a539 100644 --- a/tests/test_lddtree.rs +++ b/tests/test_lddtree.rs @@ -1,7 +1,7 @@ use lddtree::DependencyAnalyzer; #[test] -fn test_lddtree() { +fn test_elf() { let analyzer = DependencyAnalyzer::default(); let deps = analyzer.analyze("tests/test.elf").unwrap(); assert_eq!( @@ -20,3 +20,56 @@ fn test_lddtree() { ); assert_eq!(deps.libraries.len(), 6); } + +#[test] +fn test_macho() { + let analyzer = DependencyAnalyzer::default(); + let deps = analyzer.analyze("tests/test.macho").unwrap(); + assert!(deps.interpreter.is_none()); + assert_eq!( + deps.needed, + &[ + "/usr/lib/libz.1.dylib", + "/usr/lib/libiconv.2.dylib", + "/System/Library/Frameworks/CoreFoundation.framework/Versions/A/CoreFoundation", + "/usr/lib/libSystem.B.dylib" + ] + ); + // On macOS, these system libraries exist on disk (in the dyld shared cache), + // so transitive dependencies will be discovered, making the count >= 4. + // On other platforms, the install-name paths don't exist, so we get exactly 4 + // not-found entries. + assert!(deps.libraries.len() >= 4); +} + +#[test] +fn test_pe() { + let analyzer = DependencyAnalyzer::default(); + let deps = analyzer.analyze("tests/test.pe").unwrap(); + assert!(deps.interpreter.is_none()); + assert_eq!( + deps.needed, + &[ + "KERNEL32.dll", + "VCRUNTIME140.dll", + "api-ms-win-crt-runtime-l1-1-0.dll", + "api-ms-win-crt-stdio-l1-1-0.dll" + ] + ); + // All directly needed libraries must appear in the dependency map + for name in &deps.needed { + assert!( + deps.libraries.contains_key(name.as_str()), + "missing library: {name}" + ); + } + // API set DLLs are virtual — they never exist as real files on disk + assert!(!deps.libraries["api-ms-win-crt-runtime-l1-1-0.dll"].found()); + assert!(!deps.libraries["api-ms-win-crt-stdio-l1-1-0.dll"].found()); + // On Windows, real system DLLs (e.g., KERNEL32.dll) are found on disk and + // their transitive dependencies are discovered, so the total library count + // exceeds the 4 direct deps. On Linux/macOS no Windows system directories + // exist, so all non-API-set libs are recorded as not-found and the count + // stays at 4. + assert!(deps.libraries.len() >= 4); +}