From cfd93169a1f7fc8684e2070a0e30cb1c386de1a6 Mon Sep 17 00:00:00 2001 From: messense Date: Sat, 1 Oct 2022 18:03:24 +0800 Subject: [PATCH 01/19] Add macho support --- src/elf.rs | 21 ++++++++++++ src/lib.rs | 85 ++++++++++++++++++++--------------------------- src/macho.rs | 17 ++++++++++ tests/test.macho | Bin 0 -> 16833 bytes 4 files changed, 74 insertions(+), 49 deletions(-) create mode 100644 src/elf.rs create mode 100644 src/macho.rs create mode 100755 tests/test.macho diff --git a/src/elf.rs b/src/elf.rs new file mode 100644 index 0000000..6e7ef3e --- /dev/null +++ b/src/elf.rs @@ -0,0 +1,21 @@ +use goblin::elf::Elf; + +use crate::InspectDylib; + +impl InspectDylib for Elf<'_> { + fn rpaths(&self) -> &[&str] { + if !self.runpaths.is_empty() { + &self.runpaths + } else { + &self.rpaths + } + } + + fn libraries(&self) -> &[&str] { + &self.libraries + } + + fn interpreter(&self) -> Option<&str> { + self.interpreter.clone() + } +} diff --git a/src/lib.rs b/src/lib.rs index f4fe605..1f49133 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,8 +13,10 @@ use goblin::elf::{ }; use memmap2::Mmap; +mod elf; mod errors; pub mod ld_so_conf; +mod macho; pub use errors::Error; use ld_so_conf::parse_ld_so_conf; @@ -30,10 +32,8 @@ pub struct Library { pub realpath: Option, /// The dependencies of this library. pub needed: Vec, - /// Runtime library search paths. (deprecated) - pub rpath: Vec, /// Runtime library search paths. - pub runpath: Vec, + pub rpath: Vec, } impl Library { @@ -52,10 +52,17 @@ pub struct DependencyTree { pub needed: Vec, /// All of this binary’s dynamic libraries it uses in detail. pub libraries: HashMap, - /// Runtime library search paths. (deprecated) + /// Runtime library search paths. pub rpath: Vec, +} + +trait InspectDylib { /// Runtime library search paths. - pub runpath: Vec, + fn rpaths(&self) -> &[&str]; + /// A list of this binary’s dynamic libraries it depends on directly. + fn libraries(&self) -> &[&str]; + /// The binary’s program interpreter (e.g., dynamic linker). + fn interpreter(&self) -> Option<&str>; } /// Library dependency analyzer @@ -64,7 +71,7 @@ pub struct DependencyAnalyzer { env_ld_paths: Vec, conf_ld_paths: Vec, additional_ld_paths: Vec, - runpaths: Vec, + rpaths: Vec, root: PathBuf, } @@ -81,7 +88,7 @@ impl DependencyAnalyzer { env_ld_paths: Vec::new(), conf_ld_paths: Vec::new(), additional_ld_paths: Vec::new(), - runpaths: Vec::new(), + rpaths: Vec::new(), root, } } @@ -104,24 +111,14 @@ impl DependencyAnalyzer { self } - fn read_rpath_runpath( - &self, - elf: &Elf, - path: &Path, - ) -> Result<(Vec, Vec), Error> { + fn read_rpath(&self, lib: &impl InspectDylib, path: &Path) -> Result, Error> { let mut rpaths = Vec::new(); - let mut runpaths = Vec::new(); - for runpath in &elf.runpaths { - if let Ok(ld_paths) = self.parse_ld_paths(runpath, path) { - runpaths = ld_paths; - } - } - for rpath in &elf.rpaths { + for rpath in lib.rpaths() { if let Ok(ld_paths) = self.parse_ld_paths(rpath, path) { rpaths = ld_paths; } } - Ok((rpaths, runpaths)) + Ok(rpaths) } /// Analyze the given binary. @@ -139,15 +136,9 @@ impl DependencyAnalyzer { let bytes = unsafe { Mmap::map(&file)? }; let elf = Elf::parse(&bytes)?; - let (mut rpaths, runpaths) = self.read_rpath_runpath(&elf, path)?; - if !runpaths.is_empty() { - // If both RPATH and RUNPATH are set, only the latter is used. - rpaths = Vec::new(); - } - self.runpaths = runpaths.clone(); - self.runpaths.extend(rpaths.clone()); + let rpaths = self.read_rpath(&elf, path)?; - let needed: Vec = elf.libraries.iter().map(ToString::to_string).collect(); + let needed: Vec = elf.libraries().iter().map(ToString::to_string).collect(); let mut libraries = HashMap::new(); let mut stack = needed.clone(); @@ -160,7 +151,7 @@ impl DependencyAnalyzer { stack.extend(library.needed); } - let interpreter = elf.interpreter.map(|interp| interp.to_string()); + let interpreter = elf.interpreter().map(|interp| interp.to_string()); if let Some(ref interp) = interpreter { if !libraries.contains_key(interp) { let interp_path = self.root.join(interp.strip_prefix('/').unwrap_or(interp)); @@ -178,7 +169,6 @@ impl DependencyAnalyzer { realpath: interp_realpath, needed: Vec::new(), rpath: Vec::new(), - runpath: Vec::new(), }, ); } @@ -188,22 +178,21 @@ impl DependencyAnalyzer { needed, libraries, rpath: rpaths, - runpath: runpaths, }; Ok(dep_tree) } /// Parse the colon-delimited list of paths and apply ldso rules - fn parse_ld_paths(&self, ld_path: &str, elf_path: &Path) -> Result, Error> { + fn parse_ld_paths(&self, ld_path: &str, dylib_path: &Path) -> Result, Error> { let mut paths = Vec::new(); for path in ld_path.split(':') { let normpath = if path.is_empty() { // The ldso treats empty paths as the current directory env::current_dir() } else if path.contains("$ORIGIN") || path.contains("${ORIGIN}") { - let elf_path = fs::canonicalize(elf_path)?; - let elf_dir = elf_path.parent().expect("no parent"); - let replacement = elf_dir.to_str().unwrap(); + let dylib_path = fs::canonicalize(dylib_path)?; + let dylib_dir = dylib_path.parent().expect("no parent"); + let replacement = dylib_dir.to_str().unwrap(); let path = path .replace("${ORIGIN}", replacement) .replace("$ORIGIN", replacement); @@ -218,11 +207,11 @@ impl DependencyAnalyzer { Ok(paths) } - fn load_ld_paths(&mut self, elf_path: &Path) -> Result<(), Error> { + fn load_ld_paths(&mut self, dylib_path: &Path) -> Result<(), Error> { #[cfg(unix)] if let Ok(env_ld_path) = env::var("LD_LIBRARY_PATH") { if self.root == Path::new("/") { - self.env_ld_paths = self.parse_ld_paths(&env_ld_path, elf_path)?; + self.env_ld_paths = self.parse_ld_paths(&env_ld_path, dylib_path)?; } } // Load all the paths from a ldso config file @@ -270,22 +259,22 @@ impl DependencyAnalyzer { Ok(()) } - /// Try to locate a `lib` that is compatible to `elf` - fn find_library(&self, elf: &Elf, lib: &str) -> Result { + /// Try to locate a `lib_name` that is compatible to `dylib` + fn find_library(&self, dylib: &Elf, lib_name: &str) -> Result { for lib_path in self - .runpaths + .rpaths .iter() .chain(self.env_ld_paths.iter()) .chain(self.conf_ld_paths.iter()) .map(|ld_path| { self.root .join(ld_path.strip_prefix('/').unwrap_or(ld_path)) - .join(lib) + .join(lib_name) }) .chain( self.additional_ld_paths .iter() - .map(|ld_path| ld_path.join(lib)), + .map(|ld_path| ld_path.join(lib_name)), ) { // FIXME: readlink to get real path @@ -299,28 +288,26 @@ impl DependencyAnalyzer { // but we keep it in scope until parsing is complete for clarity and portability. let bytes = unsafe { Mmap::map(&file)? }; if let Ok(lib_elf) = Elf::parse(&bytes) { - if compatible_elfs(elf, &lib_elf) { + if compatible_elfs(dylib, &lib_elf) { let needed = lib_elf.libraries.iter().map(ToString::to_string).collect(); - let (rpath, runpath) = self.read_rpath_runpath(&lib_elf, &lib_path)?; + let rpath = self.read_rpath(&lib_elf, &lib_path)?; return Ok(Library { - name: lib.to_string(), + name: lib_name.to_string(), path: lib_path.to_path_buf(), realpath: fs::canonicalize(lib_path).ok(), needed, rpath, - runpath, }); } } } } Ok(Library { - name: lib.to_string(), - path: PathBuf::from(lib), + name: lib_name.to_string(), + path: PathBuf::from(lib_name), realpath: None, needed: Vec::new(), rpath: Vec::new(), - runpath: Vec::new(), }) } } diff --git a/src/macho.rs b/src/macho.rs new file mode 100644 index 0000000..4614301 --- /dev/null +++ b/src/macho.rs @@ -0,0 +1,17 @@ +use goblin::mach::MachO; + +use crate::InspectDylib; + +impl InspectDylib for MachO<'_> { + fn rpaths(&self) -> &[&str] { + &self.rpaths + } + + fn libraries(&self) -> &[&str] { + &self.libs + } + + fn interpreter(&self) -> Option<&str> { + None + } +} diff --git a/tests/test.macho b/tests/test.macho new file mode 100755 index 0000000000000000000000000000000000000000..5ce5eac0727fa6aed6902871c2a35d8e88a81ba4 GIT binary patch literal 16833 zcmeI3O-NKx6vyw(XckIp4pEsj6h)u&N|S8m_%Z6Bq?yQ2I=9hzIuD&UF(3Jnr5i~J zq87D^YGJe}OC&+VU@9mADWoV2%7_+9M5{IfW#_#2M&DB?h?eKTf9^f!-gED{zj@p9 zD*o+P8e=8~W(H+}#x0EXGaJkp+X$)vab8nY?mX_St;VR)lw@OQg@@ki>-wu8wkD;CPc#_pKt0{|19m7$YJwHqUjRz%XDkK06vKY0IVhjD=Pu^J1|`q0&pTBNiC39$ ztScN5145P56!M0;gtCw~Aa(>pZDFA#7!u2ZQQ7B>NI}`&l2{Sy#ZVX`VWDW*9_=1| zeHPyq_`HqpQLnTY>kHfgxyZ}!U~DyvyEGsFf3zGBu8%kv%T%C)Lq-IM01+SpM1Tko z0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko z0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>@ue&zvG*FgurriSY;ZEvws0FvvBw&Prf(ls`nA=t@z=!J z8l|b+%maQdc8bl>h{*k-*C&Qpa=s%u0*58u{a|Be80Vh~Wr3 znmsJLygWYDI$%B-iCur1b?s2e;o^8h<+<0E{Q2+0y?005!DsU=v7~p?({1Kzck)8d zgP!jG_m3`A4DP-Cw{?%_%H+t4O($mW>}#&y^TyNqX`OGYb!_zWRKMruOlhsdI(KGU g_Pw@7?z?3jczEN<`HF_B4_{^-Lo@d?e+ Date: Sat, 1 Oct 2022 18:49:04 +0800 Subject: [PATCH 02/19] Parse single arch macho works --- src/elf.rs | 42 ++++++++++++++++++- src/errors.rs | 3 ++ src/lib.rs | 113 +++++++++++++++++++++++++------------------------- src/macho.rs | 9 +++- 4 files changed, 109 insertions(+), 58 deletions(-) diff --git a/src/elf.rs b/src/elf.rs index 6e7ef3e..b635626 100644 --- a/src/elf.rs +++ b/src/elf.rs @@ -1,4 +1,10 @@ -use goblin::elf::Elf; +use goblin::{ + elf::{ + header::{EI_OSABI, ELFOSABI_GNU, ELFOSABI_NONE}, + Elf, + }, + Object, +}; use crate::InspectDylib; @@ -18,4 +24,38 @@ impl InspectDylib for Elf<'_> { fn interpreter(&self) -> Option<&str> { self.interpreter.clone() } + + /// See if two ELFs are compatible + /// + /// This compares the aspects of the ELF to see if they're compatible: + /// bit size, endianness, machine type, and operating system. + fn compatible(&self, other: &Object) -> bool { + match other { + Object::Elf(other) => { + if self.is_64 != other.is_64 { + return false; + } + if self.little_endian != other.little_endian { + return false; + } + if self.header.e_machine != other.header.e_machine { + return false; + } + let compatible_osabis = &[ + ELFOSABI_NONE, // ELFOSABI_NONE / ELFOSABI_SYSV + ELFOSABI_GNU, // ELFOSABI_GNU / ELFOSABI_LINUX + ]; + let osabi1 = self.header.e_ident[EI_OSABI]; + let osabi2 = other.header.e_ident[EI_OSABI]; + if osabi1 != osabi2 + && !compatible_osabis.contains(&osabi1) + && !compatible_osabis.contains(&osabi2) + { + return false; + } + true + } + _ => false, + } + } } diff --git a/src/errors.rs b/src/errors.rs index c239660..f1cc7d3 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -9,6 +9,7 @@ pub enum Error { Io(io::Error), Goblin(goblin::error::Error), LdSoConf(LdSoConfError), + UnsupportedBinary, } impl fmt::Display for Error { @@ -17,6 +18,7 @@ impl fmt::Display for Error { Error::Io(e) => e.fmt(f), Error::Goblin(e) => e.fmt(f), Error::LdSoConf(e) => e.fmt(f), + Error::UnsupportedBinary => write!(f, "Unsupported binary format"), } } } @@ -27,6 +29,7 @@ impl error::Error for Error { Error::Io(e) => e.source(), Error::Goblin(e) => e.source(), Error::LdSoConf(e) => e.source(), + Error::UnsupportedBinary => None, } } } diff --git a/src/lib.rs b/src/lib.rs index 1f49133..a40edd0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,10 +7,8 @@ use std::env; use std::path::{Path, PathBuf}; use fs_err as fs; -use goblin::elf::{ - header::{EI_OSABI, ELFOSABI_GNU, ELFOSABI_NONE}, - Elf, -}; +use goblin::mach::Mach; +use goblin::Object; use memmap2::Mmap; mod elf; @@ -46,11 +44,11 @@ impl Library { /// Library dependency tree #[derive(Debug, Clone)] pub struct DependencyTree { - /// The binary’s program interpreter (e.g., dynamic linker). + /// The binary's program interpreter (e.g., dynamic linker). pub interpreter: Option, - /// A list of this binary’s dynamic libraries it depends on directly. + /// A list of this binary's dynamic libraries it depends on directly. pub needed: Vec, - /// All of this binary’s dynamic libraries it uses in detail. + /// All of this binary's dynamic libraries it uses in detail. pub libraries: HashMap, /// Runtime library search paths. pub rpath: Vec, @@ -59,10 +57,12 @@ pub struct DependencyTree { trait InspectDylib { /// Runtime library search paths. fn rpaths(&self) -> &[&str]; - /// A list of this binary’s dynamic libraries it depends on directly. + /// A list of this binary's dynamic libraries it depends on directly. fn libraries(&self) -> &[&str]; - /// The binary’s program interpreter (e.g., dynamic linker). + /// The binary's program interpreter (e.g., dynamic linker). fn interpreter(&self) -> Option<&str>; + /// See if two dynamic libraries are compatible. + fn compatible(&self, other: &Object) -> bool; } /// Library dependency analyzer @@ -130,15 +130,26 @@ impl DependencyAnalyzer { // SAFETY: The file is memory-mapped read-only and we only perform read operations // on the mapped bytes. We do not prevent other processes from modifying the file // concurrently; such external modification is accepted as a risk for this tool. - // - // Note: The file handle does not need to remain open after mapping on POSIX systems, - // but we keep it in scope until parsing is complete for clarity and portability. let bytes = unsafe { Mmap::map(&file)? }; - let elf = Elf::parse(&bytes)?; + let dep_tree = match Object::parse(&bytes)? { + Object::Elf(elf) => self.analyze_dylib(path, elf)?, + Object::Mach(mach) => match mach { + Mach::Fat(_) => return Err(Error::UnsupportedBinary), + Mach::Binary(macho) => self.analyze_dylib(path, macho)?, + }, + _ => return Err(Error::UnsupportedBinary), + }; + Ok(dep_tree) + } - let rpaths = self.read_rpath(&elf, path)?; + fn analyze_dylib( + &mut self, + path: &Path, + dylib: impl InspectDylib, + ) -> Result { + let rpaths = self.read_rpath(&dylib, path)?; - let needed: Vec = elf.libraries().iter().map(ToString::to_string).collect(); + let needed: Vec = dylib.libraries().iter().map(ToString::to_string).collect(); let mut libraries = HashMap::new(); let mut stack = needed.clone(); @@ -146,12 +157,12 @@ impl DependencyAnalyzer { if libraries.contains_key(&lib_name) { continue; } - let library = self.find_library(&elf, &lib_name)?; + let library = self.find_library(&dylib, &lib_name)?; libraries.insert(lib_name, library.clone()); stack.extend(library.needed); } - let interpreter = elf.interpreter().map(|interp| interp.to_string()); + let interpreter = dylib.interpreter().map(|interp| interp.to_string()); if let Some(ref interp) = interpreter { if !libraries.contains_key(interp) { let interp_path = self.root.join(interp.strip_prefix('/').unwrap_or(interp)); @@ -260,7 +271,7 @@ impl DependencyAnalyzer { } /// Try to locate a `lib_name` that is compatible to `dylib` - fn find_library(&self, dylib: &Elf, lib_name: &str) -> Result { + fn find_library(&self, dylib: &impl InspectDylib, lib_name: &str) -> Result { for lib_path in self .rpaths .iter() @@ -281,16 +292,35 @@ impl DependencyAnalyzer { if lib_path.exists() { let file = fs::File::open(&lib_path)?; // SAFETY: The file is memory-mapped read-only and we only perform read operations - // on the mapped bytes. We do not prevent other processes from modifying the file - // concurrently; such external modification is accepted as a risk for this tool. - // - // Note: The file handle does not need to remain open after mapping on POSIX systems, - // but we keep it in scope until parsing is complete for clarity and portability. + // on the mapped bytes. let bytes = unsafe { Mmap::map(&file)? }; - if let Ok(lib_elf) = Elf::parse(&bytes) { - if compatible_elfs(dylib, &lib_elf) { - let needed = lib_elf.libraries.iter().map(ToString::to_string).collect(); - let rpath = self.read_rpath(&lib_elf, &lib_path)?; + if let Ok(obj) = Object::parse(&bytes) { + if let Some((rpath, needed)) = match obj { + Object::Elf(ref elf) => { + if dylib.compatible(&obj) { + Some(( + self.read_rpath(elf, &lib_path)?, + elf.libraries().iter().map(ToString::to_string).collect(), + )) + } else { + None + } + } + Object::Mach(ref mach) => match mach { + Mach::Fat(_) => None, + Mach::Binary(ref macho) => { + if dylib.compatible(&obj) { + Some(( + self.read_rpath(macho, &lib_path)?, + macho.libraries().iter().map(ToString::to_string).collect(), + )) + } else { + None + } + } + }, + _ => None, + } { return Ok(Library { name: lib_name.to_string(), path: lib_path.to_path_buf(), @@ -322,32 +352,3 @@ fn find_musl_libc() -> Result, Error> { _ => Ok(None), } } - -/// See if two ELFs are compatible -/// -/// This compares the aspects of the ELF to see if they're compatible: -/// bit size, endianness, machine type, and operating system. -fn compatible_elfs(elf1: &Elf, elf2: &Elf) -> bool { - if elf1.is_64 != elf2.is_64 { - return false; - } - if elf1.little_endian != elf2.little_endian { - return false; - } - if elf1.header.e_machine != elf2.header.e_machine { - return false; - } - let compatible_osabis = &[ - ELFOSABI_NONE, // ELFOSABI_NONE / ELFOSABI_SYSV - ELFOSABI_GNU, // ELFOSABI_GNU / ELFOSABI_LINUX - ]; - let osabi1 = elf1.header.e_ident[EI_OSABI]; - let osabi2 = elf2.header.e_ident[EI_OSABI]; - if osabi1 != osabi2 - && !compatible_osabis.contains(&osabi1) - && !compatible_osabis.contains(&osabi2) - { - return false; - } - true -} diff --git a/src/macho.rs b/src/macho.rs index 4614301..9d4895a 100644 --- a/src/macho.rs +++ b/src/macho.rs @@ -1,4 +1,4 @@ -use goblin::mach::MachO; +use goblin::{mach::MachO, Object}; use crate::InspectDylib; @@ -14,4 +14,11 @@ impl InspectDylib for MachO<'_> { fn interpreter(&self) -> Option<&str> { None } + + fn compatible(&self, other: &Object) -> bool { + match other { + Object::Mach(_) => true, + _ => false, + } + } } From cf47593c9f985aff64d5817fe57b0942403dd24e Mon Sep 17 00:00:00 2001 From: messense Date: Sat, 1 Oct 2022 18:58:48 +0800 Subject: [PATCH 03/19] Remove `self`/dylib id from libraries for macho --- src/elf.rs | 4 ++-- src/lib.rs | 2 +- src/macho.rs | 7 +++++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/elf.rs b/src/elf.rs index b635626..9be9868 100644 --- a/src/elf.rs +++ b/src/elf.rs @@ -17,8 +17,8 @@ impl InspectDylib for Elf<'_> { } } - fn libraries(&self) -> &[&str] { - &self.libraries + fn libraries(&self) -> Vec<&str> { + self.libraries.clone() } fn interpreter(&self) -> Option<&str> { diff --git a/src/lib.rs b/src/lib.rs index a40edd0..9d90028 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -58,7 +58,7 @@ trait InspectDylib { /// Runtime library search paths. fn rpaths(&self) -> &[&str]; /// A list of this binary's dynamic libraries it depends on directly. - fn libraries(&self) -> &[&str]; + fn libraries(&self) -> Vec<&str>; /// The binary's program interpreter (e.g., dynamic linker). fn interpreter(&self) -> Option<&str>; /// See if two dynamic libraries are compatible. diff --git a/src/macho.rs b/src/macho.rs index 9d4895a..b151159 100644 --- a/src/macho.rs +++ b/src/macho.rs @@ -7,8 +7,11 @@ impl InspectDylib for MachO<'_> { &self.rpaths } - fn libraries(&self) -> &[&str] { - &self.libs + fn libraries(&self) -> Vec<&str> { + // goblin always add `self` or dylib id as a needed library, so we need to remove it, see + // https://github.com/m4b/goblin/blob/6fdaffdc411bacd5dd7095dc93cec66302ca2575/src/mach/mod.rs#L174 + // https://github.com/m4b/goblin/blob/6fdaffdc411bacd5dd7095dc93cec66302ca2575/src/mach/mod.rs#L231-L235 + self.libs[1..].to_vec() } fn interpreter(&self) -> Option<&str> { From ec3e06d035044655bb977bf0315ae37af2481fde Mon Sep 17 00:00:00 2001 From: messense Date: Sat, 1 Oct 2022 20:25:07 +0800 Subject: [PATCH 04/19] Add PE support --- src/lib.rs | 12 ++++++++++++ src/pe.rs | 24 ++++++++++++++++++++++++ tests/test.pe | Bin 0 -> 10752 bytes 3 files changed, 36 insertions(+) create mode 100644 src/pe.rs create mode 100644 tests/test.pe diff --git a/src/lib.rs b/src/lib.rs index 9d90028..6612463 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,6 +15,7 @@ mod elf; mod errors; pub mod ld_so_conf; mod macho; +mod pe; pub use errors::Error; use ld_so_conf::parse_ld_so_conf; @@ -137,6 +138,7 @@ impl DependencyAnalyzer { Mach::Fat(_) => return Err(Error::UnsupportedBinary), Mach::Binary(macho) => self.analyze_dylib(path, macho)?, }, + Object::PE(pe) => self.analyze_dylib(path, pe)?, _ => return Err(Error::UnsupportedBinary), }; Ok(dep_tree) @@ -319,6 +321,16 @@ impl DependencyAnalyzer { } } }, + Object::PE(ref pe) => { + if dylib.compatible(&obj) { + Some(( + self.read_rpath(pe, &lib_path)?, + pe.libraries().iter().map(ToString::to_string).collect(), + )) + } else { + None + } + } _ => None, } { return Ok(Library { diff --git a/src/pe.rs b/src/pe.rs new file mode 100644 index 0000000..3719f83 --- /dev/null +++ b/src/pe.rs @@ -0,0 +1,24 @@ +use goblin::{pe::PE, Object}; + +use crate::InspectDylib; + +impl InspectDylib for PE<'_> { + fn rpaths(&self) -> &[&str] { + &[] + } + + fn libraries(&self) -> Vec<&str> { + self.libraries.clone() + } + + fn interpreter(&self) -> Option<&str> { + None + } + + fn compatible(&self, other: &Object) -> bool { + match other { + Object::PE(_) => true, + _ => false, + } + } +} diff --git a/tests/test.pe b/tests/test.pe new file mode 100644 index 0000000000000000000000000000000000000000..5d0f29d6669efae90d7a05df572d98957531e178 GIT binary patch literal 10752 zcmeHNe{@vUoxhW0!Xz+Ev>A*7`hbaqYSJOYVh|z|GtnD6)Cd7t3{Hm33mKhX&b)!Z z+8R2Pi#zgd<7~b#yYei7leEt1sbb8FK1Trq)VYPRvjp9Y{X#QH1WG$& zR});oG*g6rTFK5s*U9|^#w?TR3p&+EVz&g|u9@1isPa zt~3M;;MU~CP!xiikJ`~dcHz^>#m(6Dlj+Yh&NJpkaAWSMQFz+xu<&%Nqs*K3I4Zm# zg*EGL<*7HETt3G$l4Jcr3M18i2ob{N5#^Aos!gsVJS{l{uRO@*7r1Ou?cbs1)M2OZ z_S?=YhYg;wF1v`a(fiRWZ}aHU&Q9AEJGEut%dK}^!C1144-AxbzBiz%NmGsw94Q-p zun@cBC61K=rJqxzDX+Wr!m1?RZiK0h#NC2Hv-rm&uYA8`8dH*8-!N% zW^E4)Xk-OC@pK!_S8|l`^xY1jVI2$zTz=atzvVijyb2{dv$mZ>=%|FvjO`xT<#D*( zPnc5m-=iqC_5KRaL30`e4MFLL(9W!Mvocr$>ir@-E(MEc?61Q*W`7Dm`FJ{Gz1id< zWvwnQp#6~E{!?lnLHnS236xU!x@*hwylB15xcRM(IAvD~cI|wCA zIgICCS|{?0w(RcR=VY@T{d=?bSoO|sbaDD}>QLJJ;aqfDgK27>5mNQvL}DsPUZZTr z%#>1^|2YgdRe1;a4AUrt0oSm5_w&kJVDFC1K9zdKqy!-?=f?W^;im!#urS z@TT1r=@zUHx2(HWTOfC{JTAYFCF&hc+Lqb7F@}4lxw4~g!bIkJHFegswTa8-N06I1 z+*~^7aX7Wfm#g*#V9FziZ6^X@=g-jtUX%Y7E>j-)NL44AzB{?eir~|mQmQGr6iRE+ z%{5Hj-h#_{@4JX9<>`-A6^0k;hNYQ#8P)zM@@20onNwQW$`!IC$cdyYmz)9B;)vh88x%{kZe-nsmKdX^Lhf0bV?JMCn zu2x<>10q_^NWD>a@Dlh;wU>d{o0X;=q*y$*2tj`@PEqA2pt$To)TPZ~EE&%bp2}kT znLmO6roWEZY4hN0UD6HwS+oqA_k%-b2h9%yqqQ6~|Iont5nqhonWf$Jed;+_WFpHG zeAzme_-SvEkY}2>Y-M>SI^K;jD=IBt>Tn$#>DH5;o69Fv`{`2jmj8*)9v5<5mS20-Myo#{^DS(ZtoR;gAe}pg`JWu3uQ6*g@xXdkMX1;t#DtBeax$S4x3Mfjx zS*z#z1E#U}XdhsFNR_@S=X^J1n*c+4jD>s9q+=Pe0oY<2gI%6b9g|RWuQ4+)69951Y z`C-q`N_}8TUZkZAE+6CaplaU&zfm}?m%WY_W5Z#v_u1r3e+?+aR+IolbJ1hcLva0t~(t%ON zNwF1uF?AaCcA`JhWoqDMSEns%{)mpc)R6jn+dbw62p|S;0w9tg$Ta{@o9E(b(makK z9E4Jwy?AJGfMao>f7J%YI+vNPui-+`%{#1aAUJeCLd5aHXLc;3_kg6Ye>xIJ=dy;1 zWUKe8r)*E$QqEIPRq(7wE%njc1z1ZTG_? z9tS6*i^wRfxmEitpa{PiaJ?nN3zc7L=l4FxaT3{an0&Soc_@Du5_o3rcPQg$>h}PQ zU5r~6%^#_c_ApQ7EPVOt_^7^9X@BbLpzV0{-K_U*zo75lKhXCn1Tfj2NE>CYtm}xa z&m#{sxlVcH;f5XK5lIX+?07pE5t}{ov+}s>NJFO4)HMCLt*>FnTQ`c4WV*=FH2nnh z6v^k{cndD~Gm^LQlyxEA+tkYOm8y*onr-wF*TYzSw%Ej^s`~cg0w&F?j}f@ceBj)N zxK;^U9UQlR`onB(e>0FM_sHgd$Dz=W8Zvp(=3$^?)4)3dq#w69=E?96mXF_kB@G(` z@yK*>DasGScIcIoL}}CX0rY(kT?WlPc+V#7p0xGrKy8O!E4QV-tuwm7coHGU(sTWK z+w8ajrmV`vh_&?GmjP&ztLQ!~;WC{% zT;|JZM;{WPE4$46A|!BxqCMwp&vNZqjOWp# z2TVEWgsSHe{MhTCEEDff>X}crPJX{qD@nh5EzSV===I&J+>~oJj)ssWj!t3=$_8-u zYK^&x23`0`oYy)JuMw|v*(4)GvA4+=W)PD<7WKH94`DU_$HR zIf_$$B=v5fG-&^Yrr2F@G(Pv zyFs@b&|^TS0rOKWuhy}aFYB<*fH4E+)`8nDiQ)Gz<-7ul83 zH_(?Bpwg=>O1^Cda5^Dd9H zMPY1=4B~S0lUa=Y3+jD1A-{=t!pBfwz%6eCbv`b>2k@!w00#KC!0F6=7x1qDSFiD` zSsK3c(nTZQ8`9?3jZ57>Izh|Rcx~I-gcwh>`2+sQ<~EO*xKoP8+5%!|i@3E*6hm#2 zn2-`}iC{Pu67gNQZBsHB3K(=C6l&{Ux}3eb(j?%s}JHipLSr+453Yh0PoX+!O*LmHH3Ho>Injn2AcKjLjLkzMy>gRNJbZ*8T z0FCzqR@1l|uS$&#B*UljaQnx)P2n|ba_gt^G`aREycSKaYbsBZi%sFRYI5CEd74}g zcntB8|8|a;Wo=;Tv|~#&B!vT@DeiuIUe2+7`}V*lOpj%+H0BqVIvmXnnw_C3ybYS2 zrXp&;RyNBkDB$2GK&}(|ukGl+9VBudBgx5zh%8{ia~> zmJpBf_p3Aqy)P}V_Wj=tkpE4c z#ExKRu*28oj|4(uoUuyq6H>q@ZHtM%V5BqZ3*an?N4NPx!Gy%vbu27~6C$v6jjPvw zt+jbo)8ZvgZ7HGH=ks^OC0{VQ$=8{Tbl_c0(-w^SI-=olG~(OR8H)!aQYX$0Xb(z~ z7!R{+CUBp~Rx)2g?DBO6Lum9-j8rgmk8}n%C*z_o;*ZCpTYdic<}LWpp!W>=L&5Fm zo5aYLU_2TLixG@+$rOobMC=YqKFPl+B+}2BKCxTuNJ^q_O2aJJBRR(Wv0!yLQN1-7 zsm8pj;h12wI<&ZYakW0>3)=BJxfBeGlO=v#v|0)^`ePEt-WbKpK7OTriFn><^v-1L z26#ydMk7rTDZY)dSEyxOFfJwip|ug1#j%n#uZ`%-6KLx05Mv~MgXSFkYGPg^N`J6f zD7aCS8k6xjJkb)5c8EyVKQOn#<-v$w67wv!r?5F;5YhMzqMydv5*HKXz|}>!B*pl) z7BSu#jfee_4$K0xM5xUBr1s_jtDMAb?TU;30GRCe#9gy3A&KEuIH)0k35bNTN|Ps; zpykkoy}?a!e|%e`KNL!^Xkjx#L|@!BUW6ZqI@XKPdc;l1&6~w|ew1HrTK%;qud7y% z!xa>x9WnUxvtN#F6oxxrRMq^i4EbD4Ihf8v{pO(|8cF;w%4f#|WjI$wRJs%H1^frp zao`648SX)Q7V2;o?jG-eUjeuewGMoO$8Zn1A2|JgjI|u^FTl$HJ*WzBg0t`r^CIvH zz%=Sh!1n;w;hkn2IKkUc$yN{G!v?+&@Ye=D0yqQjWF$jyo`E|6y#`LuZ{PvI!>A72 z83?{(@CjOQ&mlboZ$YJTwgR3u@H2qjxc__zKEZvcBt!6520j9~0KXsBLB9jAoqkIM zP7vP^H2e`jFYZ>Y;NyD&8%FH{j&BOueKH1|?v-?3bV5Htx+@Y+knVYe6Qn!YFMt!= zg*pd)|L==Qd-ufjd#38fJrE4%r||KkEaPaS`}w!0cG9R7E4#xXVGG_iaqQGDTwLQ^ zD2S1cXdoEbT)%K_>kZX)3x$M)>w`apL$rS3HZieq<#pE575+p*3~vf;6QCfHs9%_j zN3KnDbctbqqB#f2__<~Y x`gaG*{<8fQ`-S~g`|I}a+P`;yMW3^;t}oj+(s!m$=^O7m+xNNl(r0$^{|$6lu_FKg literal 0 HcmV?d00001 From cb29ec8991b27022e79d6631c6d3424ab3f0db36 Mon Sep 17 00:00:00 2001 From: messense Date: Sat, 1 Oct 2022 20:27:43 +0800 Subject: [PATCH 05/19] Add test cases for macho and pe --- tests/test_lddtree.rs | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/tests/test_lddtree.rs b/tests/test_lddtree.rs index 20860a0..4139bf7 100644 --- a/tests/test_lddtree.rs +++ b/tests/test_lddtree.rs @@ -1,7 +1,7 @@ use lddtree::DependencyAnalyzer; #[test] -fn test_lddtree() { +fn test_elf() { let analyzer = DependencyAnalyzer::default(); let deps = analyzer.analyze("tests/test.elf").unwrap(); assert_eq!( @@ -20,3 +20,37 @@ fn test_lddtree() { ); assert_eq!(deps.libraries.len(), 6); } + +#[test] +fn test_macho() { + let analyzer = DependencyAnalyzer::default(); + let deps = analyzer.analyze("tests/test.macho").unwrap(); + assert!(deps.interpreter.is_none()); + assert_eq!( + deps.needed, + &[ + "/usr/lib/libz.1.dylib", + "/usr/lib/libiconv.2.dylib", + "/System/Library/Frameworks/CoreFoundation.framework/Versions/A/CoreFoundation", + "/usr/lib/libSystem.B.dylib" + ] + ); + assert_eq!(deps.libraries.len(), 4); +} + +#[test] +fn test_pe() { + let analyzer = DependencyAnalyzer::default(); + let deps = analyzer.analyze("tests/test.pe").unwrap(); + assert!(deps.interpreter.is_none()); + assert_eq!( + deps.needed, + &[ + "KERNEL32.dll", + "VCRUNTIME140.dll", + "api-ms-win-crt-runtime-l1-1-0.dll", + "api-ms-win-crt-stdio-l1-1-0.dll" + ] + ); + assert_eq!(deps.libraries.len(), 4); +} From 624ba14645db5677a05ad2c480635d6e2b28e93b Mon Sep 17 00:00:00 2001 From: messense Date: Sat, 1 Oct 2022 20:29:53 +0800 Subject: [PATCH 06/19] Only run tests in main branch --- .github/workflows/CI.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 2d5cc4f..e68a534 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -1,4 +1,8 @@ -on: [push, pull_request] +on: + push: + branches: + - main + pull_request: name: CI From 4994de179dcbb09dce264829983b40934a98a27e Mon Sep 17 00:00:00 2001 From: messense Date: Sat, 1 Oct 2022 23:57:24 +0800 Subject: [PATCH 07/19] Refine macho `compatible` function --- src/macho.rs | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/macho.rs b/src/macho.rs index b151159..f125ce5 100644 --- a/src/macho.rs +++ b/src/macho.rs @@ -1,4 +1,7 @@ -use goblin::{mach::MachO, Object}; +use goblin::{ + mach::{Mach, MachO}, + Object, +}; use crate::InspectDylib; @@ -20,7 +23,30 @@ impl InspectDylib for MachO<'_> { fn compatible(&self, other: &Object) -> bool { match other { - Object::Mach(_) => true, + Object::Mach(mach) => match mach { + Mach::Fat(fat) => { + for macho in fat { + if let Ok(goblin::mach::SingleArch::MachO(macho)) = macho { + if self.compatible(&Object::Mach(Mach::Binary(macho))) { + return true; + } + } + } + false + } + Mach::Binary(macho) => { + if self.is_64 != macho.is_64 { + return false; + } + if self.little_endian != macho.little_endian { + return false; + } + if self.header.cputype != macho.header.cputype { + return false; + } + true + } + }, _ => false, } } From 88b031df03b70d202da6a206d43bed15e3a24b9d Mon Sep 17 00:00:00 2001 From: messense Date: Sun, 2 Oct 2022 00:01:23 +0800 Subject: [PATCH 08/19] Refine PE `compatible` function --- src/pe.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/pe.rs b/src/pe.rs index 3719f83..031ddc0 100644 --- a/src/pe.rs +++ b/src/pe.rs @@ -17,7 +17,15 @@ impl InspectDylib for PE<'_> { fn compatible(&self, other: &Object) -> bool { match other { - Object::PE(_) => true, + Object::PE(pe) => { + if self.is_64 != pe.is_64 { + return false; + } + if self.header.coff_header.machine != pe.header.coff_header.machine { + return false; + } + true + } _ => false, } } From 7442af5a3ed24ce50c6a813fb7fb76509f2de36b Mon Sep 17 00:00:00 2001 From: messense Date: Sun, 15 Feb 2026 14:27:32 +0800 Subject: [PATCH 09/19] Implement MachO and PE specific find_library MachO: - Handle @rpath/, @loader_path/, @executable_path/ prefixes - Load DYLD_LIBRARY_PATH and DYLD_FALLBACK_LIBRARY_PATH - Default fallback paths: ~/lib, /usr/local/lib, /lib, /usr/lib - Support fat/universal binaries (select native arch) PE: - Search application directory first - Search Windows system directories (System32, System, Windows) - Search current directory and PATH environment variable - Support Wine-style directory layouts Also refactored: - Format-aware path loading (load_elf_paths, load_macho_paths, load_pe_paths) - Format-dispatched find_library (find_elf_library, find_macho_library, find_pe_library) - Shared try_library_candidates for compatibility checking - Added BinaryFormat enum and format() method to InspectDylib trait --- src/elf.rs | 6 +- src/lib.rs | 458 ++++++++++++++++++++++++++++++++++++++++++++------- src/macho.rs | 6 +- src/pe.rs | 6 +- 4 files changed, 414 insertions(+), 62 deletions(-) diff --git a/src/elf.rs b/src/elf.rs index 9be9868..207598f 100644 --- a/src/elf.rs +++ b/src/elf.rs @@ -6,7 +6,7 @@ use goblin::{ Object, }; -use crate::InspectDylib; +use crate::{BinaryFormat, InspectDylib}; impl InspectDylib for Elf<'_> { fn rpaths(&self) -> &[&str] { @@ -58,4 +58,8 @@ impl InspectDylib for Elf<'_> { _ => false, } } + + fn format(&self) -> BinaryFormat { + BinaryFormat::Elf + } } diff --git a/src/lib.rs b/src/lib.rs index 6612463..5ac2698 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,6 @@ -//! Read the ELF dependency tree. +//! Read the dynamic library dependency tree. +//! +//! Supports ELF (Linux), Mach-O (macOS), and PE (Windows) binary formats. //! //! This does not work like `ldd` in that we do not execute/load code (only read //! files on disk). @@ -55,6 +57,14 @@ pub struct DependencyTree { pub rpath: Vec, } +/// The binary format being analyzed. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum BinaryFormat { + Elf, + MachO, + PE, +} + trait InspectDylib { /// Runtime library search paths. fn rpaths(&self) -> &[&str]; @@ -64,6 +74,8 @@ trait InspectDylib { fn interpreter(&self) -> Option<&str>; /// See if two dynamic libraries are compatible. fn compatible(&self, other: &Object) -> bool; + /// The binary format of this dylib. + fn format(&self) -> BinaryFormat; } /// Library dependency analyzer @@ -74,6 +86,10 @@ pub struct DependencyAnalyzer { additional_ld_paths: Vec, rpaths: Vec, root: PathBuf, + /// Path to the main executable being analyzed (used for @executable_path on macOS) + executable_path: Option, + /// The detected binary format + format: Option, } impl Default for DependencyAnalyzer { @@ -91,6 +107,8 @@ impl DependencyAnalyzer { additional_ld_paths: Vec::new(), rpaths: Vec::new(), root, + executable_path: None, + format: None, } } @@ -115,8 +133,20 @@ impl DependencyAnalyzer { fn read_rpath(&self, lib: &impl InspectDylib, path: &Path) -> Result, Error> { let mut rpaths = Vec::new(); for rpath in lib.rpaths() { - if let Ok(ld_paths) = self.parse_ld_paths(rpath, path) { - rpaths = ld_paths; + if lib.format() == BinaryFormat::Elf { + if let Ok(ld_paths) = self.parse_ld_paths(rpath, path) { + rpaths = ld_paths; + } + } else { + // For MachO, rpaths may contain @executable_path or @loader_path + // that need resolution, but we store them as-is for now and resolve + // them during find_library + let resolved = self.resolve_macho_path(rpath, path); + if let Some(resolved) = resolved { + rpaths.push(resolved.display().to_string()); + } else { + rpaths.push(rpath.to_string()); + } } } Ok(rpaths) @@ -125,7 +155,7 @@ impl DependencyAnalyzer { /// Analyze the given binary. pub fn analyze(mut self, path: impl AsRef) -> Result { let path = path.as_ref(); - self.load_ld_paths(path)?; + self.executable_path = Some(path.to_path_buf()); let file = fs::File::open(path)?; // SAFETY: The file is memory-mapped read-only and we only perform read operations @@ -133,12 +163,59 @@ impl DependencyAnalyzer { // concurrently; such external modification is accepted as a risk for this tool. let bytes = unsafe { Mmap::map(&file)? }; let dep_tree = match Object::parse(&bytes)? { - Object::Elf(elf) => self.analyze_dylib(path, elf)?, - Object::Mach(mach) => match mach { - Mach::Fat(_) => return Err(Error::UnsupportedBinary), - Mach::Binary(macho) => self.analyze_dylib(path, macho)?, - }, - Object::PE(pe) => self.analyze_dylib(path, pe)?, + Object::Elf(elf) => { + self.format = Some(BinaryFormat::Elf); + self.load_elf_paths(path)?; + self.analyze_dylib(path, elf)? + } + Object::Mach(mach) => { + self.format = Some(BinaryFormat::MachO); + self.load_macho_paths(path)?; + match mach { + Mach::Fat(fat) => { + // For fat/universal binaries, find the best matching architecture. + // Prefer the native architecture, otherwise use the first one. + let arches: Vec<_> = fat.into_iter().collect(); + let mut selected = None; + for (i, arch) in arches.iter().enumerate() { + if let Ok(goblin::mach::SingleArch::MachO(ref macho)) = arch { + if selected.is_none() { + selected = Some(i); + } + // Prefer native arch + #[cfg(target_arch = "x86_64")] + if macho.header.cputype + == goblin::mach::cputype::CPU_TYPE_X86_64 + { + selected = Some(i); + break; + } + #[cfg(target_arch = "aarch64")] + if macho.header.cputype == goblin::mach::cputype::CPU_TYPE_ARM64 + { + selected = Some(i); + break; + } + } + } + match selected { + Some(idx) => match arches.into_iter().nth(idx) { + Some(Ok(goblin::mach::SingleArch::MachO(macho))) => { + self.analyze_dylib(path, macho)? + } + _ => return Err(Error::UnsupportedBinary), + }, + None => return Err(Error::UnsupportedBinary), + } + } + Mach::Binary(macho) => self.analyze_dylib(path, macho)?, + } + } + Object::PE(pe) => { + self.format = Some(BinaryFormat::PE); + self.load_pe_paths(path)?; + self.analyze_dylib(path, pe)? + } _ => return Err(Error::UnsupportedBinary), }; Ok(dep_tree) @@ -159,7 +236,7 @@ impl DependencyAnalyzer { if libraries.contains_key(&lib_name) { continue; } - let library = self.find_library(&dylib, &lib_name)?; + let library = self.find_library(&dylib, &lib_name, path)?; libraries.insert(lib_name, library.clone()); stack.extend(library.needed); } @@ -195,7 +272,9 @@ impl DependencyAnalyzer { Ok(dep_tree) } - /// Parse the colon-delimited list of paths and apply ldso rules + // ---- ELF-specific path loading ---- + + /// Parse the colon-delimited list of paths and apply ldso rules (ELF-specific) fn parse_ld_paths(&self, ld_path: &str, dylib_path: &Path) -> Result, Error> { let mut paths = Vec::new(); for path in ld_path.split(':') { @@ -220,7 +299,7 @@ impl DependencyAnalyzer { Ok(paths) } - fn load_ld_paths(&mut self, dylib_path: &Path) -> Result<(), Error> { + fn load_elf_paths(&mut self, dylib_path: &Path) -> Result<(), Error> { #[cfg(unix)] if let Ok(env_ld_path) = env::var("LD_LIBRARY_PATH") { if self.root == Path::new("/") { @@ -272,9 +351,169 @@ impl DependencyAnalyzer { Ok(()) } - /// Try to locate a `lib_name` that is compatible to `dylib` - fn find_library(&self, dylib: &impl InspectDylib, lib_name: &str) -> Result { - for lib_path in self + // ---- MachO-specific path loading ---- + + /// Load macOS-specific library search paths. + /// + /// macOS dyld search order: + /// 1. DYLD_LIBRARY_PATH (environment) + /// 2. rpaths (for @rpath/ prefixed names) + /// 3. The library's install name path + /// 4. DYLD_FALLBACK_LIBRARY_PATH (defaults to ~/lib:/usr/local/lib:/lib:/usr/lib) + /// + /// See: http://clarkkromenaker.com/post/library-dynamic-loading-mac/ + /// See: https://matthew-brett.github.io/docosx/mac_runtime_link.html + fn load_macho_paths(&mut self, _dylib_path: &Path) -> Result<(), Error> { + // DYLD_LIBRARY_PATH: searched before everything else + if let Ok(dyld_lib_path) = env::var("DYLD_LIBRARY_PATH") { + for path in dyld_lib_path.split(':') { + if !path.is_empty() { + self.env_ld_paths.push(path.to_string()); + } + } + } + // DYLD_FALLBACK_LIBRARY_PATH: searched after rpaths and install name + // If not set, defaults to ~/lib:/usr/local/lib:/lib:/usr/lib + match env::var("DYLD_FALLBACK_LIBRARY_PATH") { + Ok(fallback_path) => { + for path in fallback_path.split(':') { + if !path.is_empty() { + self.conf_ld_paths.push(path.to_string()); + } + } + } + Err(_) => { + // Default fallback paths + if let Ok(home) = env::var("HOME") { + self.conf_ld_paths.push(format!("{}/lib", home)); + } + let root_str = self.root.display().to_string(); + let root_str = root_str.strip_suffix('/').unwrap_or(&root_str); + self.conf_ld_paths + .push(format!("{}/usr/local/lib", root_str)); + self.conf_ld_paths.push(format!("{}/lib", root_str)); + self.conf_ld_paths.push(format!("{}/usr/lib", root_str)); + } + } + self.conf_ld_paths.dedup(); + Ok(()) + } + + /// Resolve a macOS path variable (@executable_path, @loader_path, @rpath). + /// + /// - `@executable_path/` → replaced with the directory of the main executable + /// - `@loader_path/` → replaced with the directory of the binary that contains the load command + /// - `@rpath/` → returns None (must be resolved by iterating rpaths) + fn resolve_macho_path(&self, path: &str, loader_path: &Path) -> Option { + if let Some(rest) = path.strip_prefix("@executable_path/") { + let exe_dir = self + .executable_path + .as_ref() + .and_then(|p| p.parent()) + .unwrap_or(Path::new(".")); + Some(exe_dir.join(rest)) + } else if let Some(rest) = path.strip_prefix("@loader_path/") { + let loader_dir = loader_path.parent().unwrap_or(Path::new(".")); + Some(loader_dir.join(rest)) + } else if path.starts_with("@rpath/") { + // @rpath must be resolved by iterating rpaths - return None + None + } else { + // Absolute or relative path + Some(PathBuf::from(path)) + } + } + + // ---- PE-specific path loading ---- + + /// Load Windows PE-specific library search paths. + /// + /// Windows DLL search order (Standard Search Order): + /// 1. The directory from which the application loaded + /// 2. The system directory (e.g., C:\Windows\System32) + /// 3. The 16-bit system directory (e.g., C:\Windows\System) + /// 4. The Windows directory (e.g., C:\Windows) + /// 5. The current directory + /// 6. Directories listed in the PATH environment variable + /// + /// See: https://stefanoborini.com/windows-dll-search-path/ + /// See: https://stmxcsr.com/dll-search-order.html + fn load_pe_paths(&mut self, dylib_path: &Path) -> Result<(), Error> { + let root_str = self.root.display().to_string(); + let root_str = root_str.strip_suffix('/').unwrap_or(&root_str); + let root_str = root_str.strip_suffix('\\').unwrap_or(root_str); + + // 1. Application directory + if let Some(app_dir) = dylib_path.parent() { + self.env_ld_paths + .push(app_dir.display().to_string()); + } + + // 2-4. System directories (relative to root) + // Try common Windows system directory layouts + for sys_dir in &[ + "Windows/System32", + "Windows/System", + "Windows", + "windows/system32", + "windows/system", + "windows", + // Wine-style paths + "drive_c/windows/system32", + "drive_c/windows", + ] { + let full_path = format!("{}/{}", root_str, sys_dir); + if Path::new(&full_path).is_dir() { + self.conf_ld_paths.push(full_path); + } + } + + // 5. Current directory + if let Ok(cwd) = env::current_dir() { + self.conf_ld_paths.push(cwd.display().to_string()); + } + + // 6. PATH environment variable + let path_sep = if cfg!(windows) { ';' } else { ':' }; + if let Ok(path_env) = env::var("PATH") { + for path in path_env.split(path_sep) { + if !path.is_empty() { + self.conf_ld_paths.push(path.to_string()); + } + } + } + + self.conf_ld_paths.dedup(); + Ok(()) + } + + // ---- Library finding ---- + + /// Try to locate a `lib_name` that is compatible to `dylib`. + /// + /// Dispatches to format-specific find logic based on the binary format. + fn find_library( + &self, + dylib: &impl InspectDylib, + lib_name: &str, + loader_path: &Path, + ) -> Result { + match dylib.format() { + BinaryFormat::MachO => self.find_macho_library(dylib, lib_name, loader_path), + BinaryFormat::PE => self.find_pe_library(dylib, lib_name), + BinaryFormat::Elf => self.find_elf_library(dylib, lib_name), + } + } + + /// Try to locate an ELF library. + /// + /// Search order: rpaths, LD_LIBRARY_PATH, ld.so.conf paths, additional paths. + fn find_elf_library( + &self, + dylib: &impl InspectDylib, + lib_name: &str, + ) -> Result { + let candidates: Vec = self .rpaths .iter() .chain(self.env_ld_paths.iter()) @@ -289,58 +528,159 @@ impl DependencyAnalyzer { .iter() .map(|ld_path| ld_path.join(lib_name)), ) - { - // FIXME: readlink to get real path - if lib_path.exists() { - let file = fs::File::open(&lib_path)?; - // SAFETY: The file is memory-mapped read-only and we only perform read operations - // on the mapped bytes. - let bytes = unsafe { Mmap::map(&file)? }; - if let Ok(obj) = Object::parse(&bytes) { - if let Some((rpath, needed)) = match obj { - Object::Elf(ref elf) => { - if dylib.compatible(&obj) { - Some(( - self.read_rpath(elf, &lib_path)?, - elf.libraries().iter().map(ToString::to_string).collect(), - )) - } else { - None - } + .collect(); + self.try_library_candidates(dylib, lib_name, &candidates) + } + + /// Try to locate a Mach-O library. + /// + /// Handles @rpath/, @loader_path/, @executable_path/ prefixes. + /// Search order: + /// 1. DYLD_LIBRARY_PATH + /// 2. @rpath expansion (if lib_name starts with @rpath/) + /// 3. @executable_path / @loader_path resolution + /// 4. Direct path (absolute install name) + /// 5. DYLD_FALLBACK_LIBRARY_PATH + /// 6. Additional user-provided paths + fn find_macho_library( + &self, + dylib: &impl InspectDylib, + lib_name: &str, + loader_path: &Path, + ) -> Result { + let mut candidates: Vec = Vec::new(); + + // Extract the filename for searching in DYLD_LIBRARY_PATH etc. + let file_name = Path::new(lib_name) + .file_name() + .unwrap_or_default() + .to_str() + .unwrap_or(lib_name); + + // 1. DYLD_LIBRARY_PATH (searched first, using just the leaf filename) + for path in &self.env_ld_paths { + candidates.push(PathBuf::from(path).join(file_name)); + } + + // 2-3. Handle path variable prefixes + if let Some(rest) = lib_name.strip_prefix("@rpath/") { + // Search each rpath for the library + for rpath in &self.rpaths { + candidates.push(PathBuf::from(rpath).join(rest)); + } + } else if let Some(resolved) = self.resolve_macho_path(lib_name, loader_path) { + // @executable_path, @loader_path, or absolute path + candidates.push(resolved); + } + + // 4. DYLD_FALLBACK_LIBRARY_PATH (using just the leaf filename) + for path in &self.conf_ld_paths { + candidates.push(PathBuf::from(path).join(file_name)); + } + + // 5. Additional user-provided paths + for path in &self.additional_ld_paths { + candidates.push(path.join(file_name)); + } + + self.try_library_candidates(dylib, lib_name, &candidates) + } + + /// Try to locate a PE library (DLL). + /// + /// Search order: + /// 1. Application directory (from env_ld_paths) + /// 2. System directories (from conf_ld_paths) + /// 3. PATH directories (from conf_ld_paths) + /// 4. Additional user-provided paths + fn find_pe_library( + &self, + dylib: &impl InspectDylib, + lib_name: &str, + ) -> Result { + let candidates: Vec = self + .env_ld_paths + .iter() + .chain(self.conf_ld_paths.iter()) + .map(|ld_path| PathBuf::from(ld_path).join(lib_name)) + .chain( + self.additional_ld_paths + .iter() + .map(|ld_path| ld_path.join(lib_name)), + ) + .collect(); + self.try_library_candidates(dylib, lib_name, &candidates) + } + + /// Try a list of candidate paths and return the first compatible library found. + fn try_library_candidates( + &self, + dylib: &impl InspectDylib, + lib_name: &str, + candidates: &[PathBuf], + ) -> Result { + for lib_path in candidates { + if !lib_path.exists() { + continue; + } + let file = match fs::File::open(lib_path) { + Ok(f) => f, + Err(_) => continue, + }; + // SAFETY: The file is memory-mapped read-only and we only perform read operations + // on the mapped bytes. + let bytes = match unsafe { Mmap::map(&file) } { + Ok(m) => m, + Err(_) => continue, + }; + if let Ok(obj) = Object::parse(&bytes) { + if let Some((rpath, needed)) = match obj { + Object::Elf(ref elf) => { + if dylib.compatible(&obj) { + Some(( + self.read_rpath(elf, lib_path)?, + elf.libraries().iter().map(ToString::to_string).collect(), + )) + } else { + None } - Object::Mach(ref mach) => match mach { - Mach::Fat(_) => None, - Mach::Binary(ref macho) => { - if dylib.compatible(&obj) { - Some(( - self.read_rpath(macho, &lib_path)?, - macho.libraries().iter().map(ToString::to_string).collect(), - )) - } else { - None - } - } - }, - Object::PE(ref pe) => { + } + Object::Mach(ref mach) => match mach { + Mach::Fat(_) => None, + Mach::Binary(ref macho) => { if dylib.compatible(&obj) { Some(( - self.read_rpath(pe, &lib_path)?, - pe.libraries().iter().map(ToString::to_string).collect(), + self.read_rpath(macho, lib_path)?, + macho + .libraries() + .iter() + .map(ToString::to_string) + .collect(), )) } else { None } } - _ => None, - } { - return Ok(Library { - name: lib_name.to_string(), - path: lib_path.to_path_buf(), - realpath: fs::canonicalize(lib_path).ok(), - needed, - rpath, - }); + }, + Object::PE(ref pe) => { + if dylib.compatible(&obj) { + Some(( + self.read_rpath(pe, lib_path)?, + pe.libraries().iter().map(ToString::to_string).collect(), + )) + } else { + None + } } + _ => None, + } { + return Ok(Library { + name: lib_name.to_string(), + path: lib_path.to_path_buf(), + realpath: fs::canonicalize(lib_path).ok(), + needed, + rpath, + }); } } } diff --git a/src/macho.rs b/src/macho.rs index f125ce5..8c22442 100644 --- a/src/macho.rs +++ b/src/macho.rs @@ -3,7 +3,7 @@ use goblin::{ Object, }; -use crate::InspectDylib; +use crate::{BinaryFormat, InspectDylib}; impl InspectDylib for MachO<'_> { fn rpaths(&self) -> &[&str] { @@ -50,4 +50,8 @@ impl InspectDylib for MachO<'_> { _ => false, } } + + fn format(&self) -> BinaryFormat { + BinaryFormat::MachO + } } diff --git a/src/pe.rs b/src/pe.rs index 031ddc0..5378759 100644 --- a/src/pe.rs +++ b/src/pe.rs @@ -1,6 +1,6 @@ use goblin::{pe::PE, Object}; -use crate::InspectDylib; +use crate::{BinaryFormat, InspectDylib}; impl InspectDylib for PE<'_> { fn rpaths(&self) -> &[&str] { @@ -29,4 +29,8 @@ impl InspectDylib for PE<'_> { _ => false, } } + + fn format(&self) -> BinaryFormat { + BinaryFormat::PE + } } From 309d57d306f94901afa1922125bb9fc99b9da5c0 Mon Sep 17 00:00:00 2001 From: messense Date: Sun, 15 Feb 2026 14:28:27 +0800 Subject: [PATCH 10/19] Fix all clippy warnings - elf.rs: remove unnecessary clone() on Copy type (Option<&str>) - ld_so_conf.rs: use strip_prefix() instead of manual slicing - lib.rs: use if-let with .next() instead of for loop that never loops --- src/elf.rs | 2 +- src/ld_so_conf.rs | 3 +-- src/lib.rs | 43 ++++++++++++++++--------------------------- 3 files changed, 18 insertions(+), 30 deletions(-) diff --git a/src/elf.rs b/src/elf.rs index 207598f..40f3177 100644 --- a/src/elf.rs +++ b/src/elf.rs @@ -22,7 +22,7 @@ impl InspectDylib for Elf<'_> { } fn interpreter(&self) -> Option<&str> { - self.interpreter.clone() + self.interpreter } /// See if two ELFs are compatible diff --git a/src/ld_so_conf.rs b/src/ld_so_conf.rs index fbb04fb..b1ec59f 100644 --- a/src/ld_so_conf.rs +++ b/src/ld_so_conf.rs @@ -62,8 +62,7 @@ pub fn parse_ld_so_conf( if line.starts_with("#") { continue; } - if line.starts_with("include ") { - let include_path = &line[8..]; + if let Some(include_path) = line.strip_prefix("include ") { let include_path = if !include_path.starts_with('/') { let parent = path.parent().unwrap(); parent.join(include_path).display().to_string() diff --git a/src/lib.rs b/src/lib.rs index 5ac2698..ee051a2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -184,15 +184,12 @@ impl DependencyAnalyzer { } // Prefer native arch #[cfg(target_arch = "x86_64")] - if macho.header.cputype - == goblin::mach::cputype::CPU_TYPE_X86_64 - { + if macho.header.cputype == goblin::mach::cputype::CPU_TYPE_X86_64 { selected = Some(i); break; } #[cfg(target_arch = "aarch64")] - if macho.header.cputype == goblin::mach::cputype::CPU_TYPE_ARM64 - { + if macho.header.cputype == goblin::mach::cputype::CPU_TYPE_ARM64 { selected = Some(i); break; } @@ -314,17 +311,18 @@ impl DependencyAnalyzer { let root_str = self.root.display().to_string(); let root_str = root_str.strip_suffix("/").unwrap_or(&root_str); let pattern = format!("{}/etc/ld-musl-*.path", root_str); - for entry in glob::glob(&pattern).expect("invalid glob pattern") { - if let Ok(entry) = entry { - let content = fs::read_to_string(&entry)?; - for line in content.lines() { - let line_stripped = line.trim(); - if !line_stripped.is_empty() { - self.conf_ld_paths - .push(root_str.to_string() + line_stripped); - } + if let Some(entry) = glob::glob(&pattern) + .expect("invalid glob pattern") + .flatten() + .next() + { + let content = fs::read_to_string(&entry)?; + for line in content.lines() { + let line_stripped = line.trim(); + if !line_stripped.is_empty() { + self.conf_ld_paths + .push(root_str.to_string() + line_stripped); } - break; } } // default ld paths @@ -445,8 +443,7 @@ impl DependencyAnalyzer { // 1. Application directory if let Some(app_dir) = dylib_path.parent() { - self.env_ld_paths - .push(app_dir.display().to_string()); + self.env_ld_paths.push(app_dir.display().to_string()); } // 2-4. System directories (relative to root) @@ -593,11 +590,7 @@ impl DependencyAnalyzer { /// 2. System directories (from conf_ld_paths) /// 3. PATH directories (from conf_ld_paths) /// 4. Additional user-provided paths - fn find_pe_library( - &self, - dylib: &impl InspectDylib, - lib_name: &str, - ) -> Result { + fn find_pe_library(&self, dylib: &impl InspectDylib, lib_name: &str) -> Result { let candidates: Vec = self .env_ld_paths .iter() @@ -651,11 +644,7 @@ impl DependencyAnalyzer { if dylib.compatible(&obj) { Some(( self.read_rpath(macho, lib_path)?, - macho - .libraries() - .iter() - .map(ToString::to_string) - .collect(), + macho.libraries().iter().map(ToString::to_string).collect(), )) } else { None From 4647416143758b8102414c37913789f7f7048f0c Mon Sep 17 00:00:00 2001 From: messense Date: Sun, 15 Feb 2026 14:31:05 +0800 Subject: [PATCH 11/19] Fix PE find_library searching PATH unconditionally on Windows Only search current directory and PATH environment variable when analyzing against the real filesystem root on Windows (e.g., C:\). This mirrors how ELF only uses LD_LIBRARY_PATH when root is '/'. Previously, PATH was always added to search paths, causing the analyzer to find real system DLLs (like KERNEL32.dll) and recursively resolve all their transitive dependencies, even when analyzing with a custom sysroot. --- src/lib.rs | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ee051a2..4943f2e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -465,17 +465,29 @@ impl DependencyAnalyzer { } } - // 5. Current directory - if let Ok(cwd) = env::current_dir() { - self.conf_ld_paths.push(cwd.display().to_string()); - } - - // 6. PATH environment variable - let path_sep = if cfg!(windows) { ';' } else { ':' }; - if let Ok(path_env) = env::var("PATH") { - for path in path_env.split(path_sep) { - if !path.is_empty() { - self.conf_ld_paths.push(path.to_string()); + // 5-6. Current directory and PATH environment variable + // Only use these when analyzing against the real filesystem root, + // since they contain absolute paths that don't make sense with a + // custom sysroot. This mirrors how ELF only uses LD_LIBRARY_PATH + // when root is "/". + #[cfg(windows)] + { + let is_system_root = self.root == Path::new("/") + || self.root == Path::new("\\") + || self + .root + .to_str() + .is_some_and(|s| s.len() <= 3 && s.contains(':')); + if is_system_root { + if let Ok(cwd) = env::current_dir() { + self.conf_ld_paths.push(cwd.display().to_string()); + } + if let Ok(path_env) = env::var("PATH") { + for path in path_env.split(';') { + if !path.is_empty() { + self.conf_ld_paths.push(path.to_string()); + } + } } } } From 2dda775531ccce891331828b77293009c935c71c Mon Sep 17 00:00:00 2001 From: messense Date: Sun, 15 Feb 2026 14:40:45 +0800 Subject: [PATCH 12/19] Fix MachO and PE library resolution issues MachO fixes: - Track @loader_path per-dependency: each library in the dependency chain now correctly uses its own path for @loader_path resolution, instead of always using the top-level binary's path - Use per-library rpaths: transitive dependencies are resolved using the intermediate library's LC_RPATH entries, not the top-level binary's - Handle fat/universal Mach-O binaries in dependency resolution: when a dependent library is a fat binary, extract the compatible architecture slice instead of skipping it entirely PE fixes: - Skip API set DLLs (api-ms-win-*, ext-ms-win-*): these are virtual DLLs resolved by Windows at runtime via an API set schema. They never exist on disk, so searching for them is pointless and expensive - Case-insensitive DLL matching: Windows filesystems are case-insensitive but cross-platform analysis may run on case-sensitive filesystems (Linux). Added find_file_case_insensitive() with fast exact-match path and slow directory-scan fallback Also: - Removed unused self.rpaths field from DependencyAnalyzer (was never populated); rpaths now flow through the dependency stack per-library - Extracted try_single_candidate() from try_library_candidates() to enable PE's case-insensitive search to reuse the parsing/compat logic - Added not_found_library() and is_api_set_dll() helpers - Added unit test for API set DLL detection - Thorough documentation of tricky behaviors in code comments --- src/lib.rs | 471 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 339 insertions(+), 132 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 4943f2e..65f8a9d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -84,7 +84,6 @@ pub struct DependencyAnalyzer { env_ld_paths: Vec, conf_ld_paths: Vec, additional_ld_paths: Vec, - rpaths: Vec, root: PathBuf, /// Path to the main executable being analyzed (used for @executable_path on macOS) executable_path: Option, @@ -105,7 +104,6 @@ impl DependencyAnalyzer { env_ld_paths: Vec::new(), conf_ld_paths: Vec::new(), additional_ld_paths: Vec::new(), - rpaths: Vec::new(), root, executable_path: None, format: None, @@ -130,6 +128,12 @@ impl DependencyAnalyzer { self } + /// Read and resolve rpaths from a parsed binary. + /// + /// For ELF: rpaths go through `parse_ld_paths` which handles `$ORIGIN` expansion. + /// For MachO: rpaths may contain `@executable_path` or `@loader_path` which are + /// resolved relative to the given `path` (the binary that contains the rpaths). + /// `@rpath` entries within rpaths don't make sense and are kept as-is. fn read_rpath(&self, lib: &impl InspectDylib, path: &Path) -> Result, Error> { let mut rpaths = Vec::new(); for rpath in lib.rpaths() { @@ -138,9 +142,11 @@ impl DependencyAnalyzer { rpaths = ld_paths; } } else { - // For MachO, rpaths may contain @executable_path or @loader_path - // that need resolution, but we store them as-is for now and resolve - // them during find_library + // For MachO, rpaths may contain @executable_path or @loader_path. + // These are resolved here so that when we later use these rpaths + // for @rpath/ library name resolution, they are already absolute. + // Example: rpath = "@loader_path/../Frameworks" with loader at + // /app/Contents/MacOS/binary → resolves to /app/Contents/Frameworks let resolved = self.resolve_macho_path(rpath, path); if let Some(resolved) = resolved { rpaths.push(resolved.display().to_string()); @@ -173,8 +179,9 @@ impl DependencyAnalyzer { self.load_macho_paths(path)?; match mach { Mach::Fat(fat) => { - // For fat/universal binaries, find the best matching architecture. - // Prefer the native architecture, otherwise use the first one. + // Fat/universal binaries contain multiple architecture slices + // (e.g., x86_64 + arm64). We select the best matching architecture: + // prefer the native arch of the host, otherwise take the first one. let arches: Vec<_> = fat.into_iter().collect(); let mut selected = None; for (i, arch) in arches.iter().enumerate() { @@ -182,7 +189,6 @@ impl DependencyAnalyzer { if selected.is_none() { selected = Some(i); } - // Prefer native arch #[cfg(target_arch = "x86_64")] if macho.header.cputype == goblin::mach::cputype::CPU_TYPE_X86_64 { selected = Some(i); @@ -224,18 +230,56 @@ impl DependencyAnalyzer { dylib: impl InspectDylib, ) -> Result { let rpaths = self.read_rpath(&dylib, path)?; - let needed: Vec = dylib.libraries().iter().map(ToString::to_string).collect(); let mut libraries = HashMap::new(); - let mut stack = needed.clone(); - while let Some(lib_name) = stack.pop() { + // Dependency resolution stack. Each entry carries: + // - lib_name: the library to resolve (e.g., "libfoo.dylib" or "@rpath/libbar.dylib") + // - loader_path: path of the binary that imports this library, used to resolve + // @loader_path on macOS. For direct deps this is the main binary; for transitive + // deps it's the intermediate library that depends on this one. + // - lib_rpaths: rpaths from the importing binary, used to resolve @rpath/ prefixes. + // Each library has its own rpaths (from LC_RPATH load commands on macOS, or + // DT_RPATH/DT_RUNPATH on ELF). When resolving a library's own dependencies, + // we use *that library's* rpaths, not the top-level binary's rpaths. + let mut stack: Vec<(String, PathBuf, Vec)> = needed + .iter() + .map(|n| (n.clone(), path.to_path_buf(), rpaths.clone())) + .collect(); + + while let Some((lib_name, loader_path, current_rpaths)) = stack.pop() { if libraries.contains_key(&lib_name) { continue; } - let library = self.find_library(&dylib, &lib_name, path)?; - libraries.insert(lib_name, library.clone()); - stack.extend(library.needed); + + // API set DLLs (api-ms-win-*, ext-ms-win-*) are virtual DLLs that Windows + // resolves at runtime through an API set schema mapping. They never exist as + // real files on disk. We record them as not-found and skip dependency + // resolution to avoid pointless (and expensive) filesystem searches. + // See: https://learn.microsoft.com/en-us/windows/win32/apiindex/windows-apisets + if dylib.format() == BinaryFormat::PE && is_api_set_dll(&lib_name) { + libraries.insert(lib_name.clone(), not_found_library(&lib_name)); + continue; + } + + let library = self.find_library(&dylib, &lib_name, &loader_path, ¤t_rpaths)?; + + // For transitive dependency resolution, use the *found library's* path as the + // loader_path and its rpaths for @rpath/ resolution. This ensures that: + // - @loader_path in a transitive dep resolves relative to the intermediate + // library, not the top-level binary + // - @rpath uses the intermediate library's LC_RPATH entries, not the top-level's + let dep_loader = library.realpath.as_ref().unwrap_or(&library.path).clone(); + let dep_rpaths = library.rpath.clone(); + let dep_needed: Vec = library.needed.clone(); + + libraries.insert(lib_name, library); + + for needed_name in dep_needed { + if !libraries.contains_key(&needed_name) { + stack.push((needed_name, dep_loader.clone(), dep_rpaths.clone())); + } + } } let interpreter = dylib.interpreter().map(|interp| interp.to_string()); @@ -271,7 +315,10 @@ impl DependencyAnalyzer { // ---- ELF-specific path loading ---- - /// Parse the colon-delimited list of paths and apply ldso rules (ELF-specific) + /// Parse the colon-delimited list of paths and apply ldso rules (ELF-specific). + /// + /// Handles `$ORIGIN` / `${ORIGIN}` expansion (replaced with the directory of the + /// binary that contains the rpath) and root-relative path resolution. fn parse_ld_paths(&self, ld_path: &str, dylib_path: &Path) -> Result, Error> { let mut paths = Vec::new(); for path in ld_path.split(':') { @@ -353,16 +400,19 @@ impl DependencyAnalyzer { /// Load macOS-specific library search paths. /// - /// macOS dyld search order: - /// 1. DYLD_LIBRARY_PATH (environment) - /// 2. rpaths (for @rpath/ prefixed names) - /// 3. The library's install name path - /// 4. DYLD_FALLBACK_LIBRARY_PATH (defaults to ~/lib:/usr/local/lib:/lib:/usr/lib) + /// macOS dyld search order (simplified): + /// 1. `DYLD_LIBRARY_PATH` — searched first using leaf filename only + /// 2. rpaths — for `@rpath/` prefixed install names, each LC_RPATH entry is tried + /// 3. The library's install name path — absolute or `@executable_path`/`@loader_path` + /// 4. `DYLD_FALLBACK_LIBRARY_PATH` — defaults to `~/lib:/usr/local/lib:/lib:/usr/lib` /// - /// See: http://clarkkromenaker.com/post/library-dynamic-loading-mac/ - /// See: https://matthew-brett.github.io/docosx/mac_runtime_link.html + /// References: + /// - + /// - fn load_macho_paths(&mut self, _dylib_path: &Path) -> Result<(), Error> { - // DYLD_LIBRARY_PATH: searched before everything else + // DYLD_LIBRARY_PATH: searched before everything else, using leaf filename only. + // This is intentionally not gated on root == "/" because it's commonly used + // for testing and development overrides. if let Ok(dyld_lib_path) = env::var("DYLD_LIBRARY_PATH") { for path in dyld_lib_path.split(':') { if !path.is_empty() { @@ -370,8 +420,8 @@ impl DependencyAnalyzer { } } } - // DYLD_FALLBACK_LIBRARY_PATH: searched after rpaths and install name - // If not set, defaults to ~/lib:/usr/local/lib:/lib:/usr/lib + // DYLD_FALLBACK_LIBRARY_PATH: searched after rpaths and install name. + // If not set, macOS dyld uses a default set of fallback directories. match env::var("DYLD_FALLBACK_LIBRARY_PATH") { Ok(fallback_path) => { for path in fallback_path.split(':') { @@ -381,7 +431,7 @@ impl DependencyAnalyzer { } } Err(_) => { - // Default fallback paths + // Default fallback paths per dyld behavior if let Ok(home) = env::var("HOME") { self.conf_ld_paths.push(format!("{}/lib", home)); } @@ -397,11 +447,15 @@ impl DependencyAnalyzer { Ok(()) } - /// Resolve a macOS path variable (@executable_path, @loader_path, @rpath). + /// Resolve a macOS install name path variable. /// - /// - `@executable_path/` → replaced with the directory of the main executable - /// - `@loader_path/` → replaced with the directory of the binary that contains the load command - /// - `@rpath/` → returns None (must be resolved by iterating rpaths) + /// macOS uses three special prefixes in library install names and rpaths: + /// - `@executable_path/` — the directory of the main executable (set once at analyze time) + /// - `@loader_path/` — the directory of the Mach-O binary that contains the load command. + /// This changes for each binary in the dependency chain: when A loads B which loads C, + /// `@loader_path` for C's resolution is B's directory, not A's. + /// - `@rpath/` — a search variable; the remainder is appended to each LC_RPATH entry. + /// Returns None because the caller must iterate over rpaths to resolve it. fn resolve_macho_path(&self, path: &str, loader_path: &Path) -> Option { if let Some(rest) = path.strip_prefix("@executable_path/") { let exe_dir = self @@ -414,10 +468,10 @@ impl DependencyAnalyzer { let loader_dir = loader_path.parent().unwrap_or(Path::new(".")); Some(loader_dir.join(rest)) } else if path.starts_with("@rpath/") { - // @rpath must be resolved by iterating rpaths - return None + // @rpath must be resolved by iterating rpaths — return None to signal this None } else { - // Absolute or relative path + // Absolute or relative path — use as-is Some(PathBuf::from(path)) } } @@ -426,16 +480,17 @@ impl DependencyAnalyzer { /// Load Windows PE-specific library search paths. /// - /// Windows DLL search order (Standard Search Order): + /// Windows DLL search order (Standard Search Order for Desktop Applications): /// 1. The directory from which the application loaded - /// 2. The system directory (e.g., C:\Windows\System32) - /// 3. The 16-bit system directory (e.g., C:\Windows\System) - /// 4. The Windows directory (e.g., C:\Windows) + /// 2. The system directory (e.g., `C:\Windows\System32`) + /// 3. The 16-bit system directory (e.g., `C:\Windows\System`) + /// 4. The Windows directory (e.g., `C:\Windows`) /// 5. The current directory - /// 6. Directories listed in the PATH environment variable + /// 6. Directories listed in the `PATH` environment variable /// - /// See: https://stefanoborini.com/windows-dll-search-path/ - /// See: https://stmxcsr.com/dll-search-order.html + /// References: + /// - + /// - fn load_pe_paths(&mut self, dylib_path: &Path) -> Result<(), Error> { let root_str = self.root.display().to_string(); let root_str = root_str.strip_suffix('/').unwrap_or(&root_str); @@ -465,7 +520,7 @@ impl DependencyAnalyzer { } } - // 5-6. Current directory and PATH environment variable + // 5-6. Current directory and PATH environment variable. // Only use these when analyzing against the real filesystem root, // since they contain absolute paths that don't make sense with a // custom sysroot. This mirrors how ELF only uses LD_LIBRARY_PATH @@ -501,29 +556,33 @@ impl DependencyAnalyzer { /// Try to locate a `lib_name` that is compatible to `dylib`. /// /// Dispatches to format-specific find logic based on the binary format. + /// `loader_path` and `rpaths` provide per-dependency context for MachO/ELF + /// resolution (see `analyze_dylib` for how they are threaded through the + /// dependency graph). fn find_library( &self, dylib: &impl InspectDylib, lib_name: &str, loader_path: &Path, + rpaths: &[String], ) -> Result { match dylib.format() { - BinaryFormat::MachO => self.find_macho_library(dylib, lib_name, loader_path), + BinaryFormat::MachO => self.find_macho_library(dylib, lib_name, loader_path, rpaths), BinaryFormat::PE => self.find_pe_library(dylib, lib_name), - BinaryFormat::Elf => self.find_elf_library(dylib, lib_name), + BinaryFormat::Elf => self.find_elf_library(dylib, lib_name, rpaths), } } /// Try to locate an ELF library. /// - /// Search order: rpaths, LD_LIBRARY_PATH, ld.so.conf paths, additional paths. + /// Search order: rpaths, `LD_LIBRARY_PATH`, `ld.so.conf` paths, additional paths. fn find_elf_library( &self, dylib: &impl InspectDylib, lib_name: &str, + rpaths: &[String], ) -> Result { - let candidates: Vec = self - .rpaths + let candidates: Vec = rpaths .iter() .chain(self.env_ld_paths.iter()) .chain(self.conf_ld_paths.iter()) @@ -543,46 +602,55 @@ impl DependencyAnalyzer { /// Try to locate a Mach-O library. /// - /// Handles @rpath/, @loader_path/, @executable_path/ prefixes. + /// Handles `@rpath/`, `@loader_path/`, `@executable_path/` prefixes. + /// /// Search order: - /// 1. DYLD_LIBRARY_PATH - /// 2. @rpath expansion (if lib_name starts with @rpath/) - /// 3. @executable_path / @loader_path resolution - /// 4. Direct path (absolute install name) - /// 5. DYLD_FALLBACK_LIBRARY_PATH - /// 6. Additional user-provided paths + /// 1. `DYLD_LIBRARY_PATH` (leaf filename only) + /// 2. `@rpath` expansion — each rpath from the *depending library* is tried + /// 3. `@executable_path` / `@loader_path` resolution, or direct absolute path + /// 4. `DYLD_FALLBACK_LIBRARY_PATH` (leaf filename only) + /// 5. Additional user-provided paths + /// + /// The `rpaths` parameter contains the rpaths from the library that depends on + /// `lib_name`, NOT the top-level binary. This is critical for transitive deps: + /// if A (rpaths=[/a/lib]) depends on B (rpaths=[/b/lib]) which depends on + /// `@rpath/libC.dylib`, we search /b/lib (B's rpaths), not /a/lib (A's rpaths). fn find_macho_library( &self, dylib: &impl InspectDylib, lib_name: &str, loader_path: &Path, + rpaths: &[String], ) -> Result { let mut candidates: Vec = Vec::new(); - // Extract the filename for searching in DYLD_LIBRARY_PATH etc. + // Extract the leaf filename for searching flat directories. + // Install names like "/usr/lib/libSystem.B.dylib" → "libSystem.B.dylib" let file_name = Path::new(lib_name) .file_name() .unwrap_or_default() .to_str() .unwrap_or(lib_name); - // 1. DYLD_LIBRARY_PATH (searched first, using just the leaf filename) + // 1. DYLD_LIBRARY_PATH — searched first, using just the leaf filename for path in &self.env_ld_paths { candidates.push(PathBuf::from(path).join(file_name)); } - // 2-3. Handle path variable prefixes + // 2-3. Handle @-prefixed install names if let Some(rest) = lib_name.strip_prefix("@rpath/") { - // Search each rpath for the library - for rpath in &self.rpaths { + // @rpath/foo.dylib → try each rpath directory with the suffix. + // rpaths come from the *depending* library, already resolved by read_rpath + // (so @loader_path/@executable_path within rpaths are already expanded). + for rpath in rpaths { candidates.push(PathBuf::from(rpath).join(rest)); } } else if let Some(resolved) = self.resolve_macho_path(lib_name, loader_path) { - // @executable_path, @loader_path, or absolute path + // @executable_path/..., @loader_path/..., or absolute path candidates.push(resolved); } - // 4. DYLD_FALLBACK_LIBRARY_PATH (using just the leaf filename) + // 4. DYLD_FALLBACK_LIBRARY_PATH — searched last, using leaf filename for path in &self.conf_ld_paths { candidates.push(PathBuf::from(path).join(file_name)); } @@ -597,27 +665,38 @@ impl DependencyAnalyzer { /// Try to locate a PE library (DLL). /// + /// Uses case-insensitive filename matching because Windows filesystems are + /// case-insensitive but this tool may run on a case-sensitive filesystem + /// (e.g., Linux analyzing a Windows sysroot). Without this, a PE importing + /// "KERNEL32.dll" would fail to match a file named "kernel32.dll". + /// /// Search order: - /// 1. Application directory (from env_ld_paths) - /// 2. System directories (from conf_ld_paths) - /// 3. PATH directories (from conf_ld_paths) + /// 1. Application directory (from `env_ld_paths`) + /// 2. System directories (from `conf_ld_paths`) + /// 3. `PATH` directories (from `conf_ld_paths`) /// 4. Additional user-provided paths fn find_pe_library(&self, dylib: &impl InspectDylib, lib_name: &str) -> Result { - let candidates: Vec = self - .env_ld_paths - .iter() - .chain(self.conf_ld_paths.iter()) - .map(|ld_path| PathBuf::from(ld_path).join(lib_name)) - .chain( - self.additional_ld_paths - .iter() - .map(|ld_path| ld_path.join(lib_name)), - ) - .collect(); - self.try_library_candidates(dylib, lib_name, &candidates) + for dir_str in self.env_ld_paths.iter().chain(self.conf_ld_paths.iter()) { + let dir = Path::new(dir_str); + if let Some(lib_path) = find_file_case_insensitive(dir, lib_name) { + if let Some(lib) = self.try_single_candidate(dylib, lib_name, &lib_path)? { + return Ok(lib); + } + } + } + for dir in &self.additional_ld_paths { + if let Some(lib_path) = find_file_case_insensitive(dir, lib_name) { + if let Some(lib) = self.try_single_candidate(dylib, lib_name, &lib_path)? { + return Ok(lib); + } + } + } + Ok(not_found_library(lib_name)) } /// Try a list of candidate paths and return the first compatible library found. + /// + /// Used by ELF and MachO library finding, which generate candidate paths directly. fn try_library_candidates( &self, dylib: &impl InspectDylib, @@ -628,73 +707,185 @@ impl DependencyAnalyzer { if !lib_path.exists() { continue; } - let file = match fs::File::open(lib_path) { - Ok(f) => f, - Err(_) => continue, - }; - // SAFETY: The file is memory-mapped read-only and we only perform read operations - // on the mapped bytes. - let bytes = match unsafe { Mmap::map(&file) } { - Ok(m) => m, - Err(_) => continue, - }; - if let Ok(obj) = Object::parse(&bytes) { - if let Some((rpath, needed)) = match obj { - Object::Elf(ref elf) => { - if dylib.compatible(&obj) { - Some(( - self.read_rpath(elf, lib_path)?, - elf.libraries().iter().map(ToString::to_string).collect(), - )) - } else { - None - } - } - Object::Mach(ref mach) => match mach { - Mach::Fat(_) => None, - Mach::Binary(ref macho) => { - if dylib.compatible(&obj) { - Some(( - self.read_rpath(macho, lib_path)?, - macho.libraries().iter().map(ToString::to_string).collect(), - )) - } else { - None + if let Some(lib) = self.try_single_candidate(dylib, lib_name, lib_path)? { + return Ok(lib); + } + } + Ok(not_found_library(lib_name)) + } + + /// Try to parse a single candidate file and check compatibility. + /// + /// Opens the file, memory-maps it, parses the binary format, checks that it is + /// compatible with the main binary, and extracts rpaths and needed libraries. + /// + /// For fat/universal Mach-O binaries, iterates through architecture slices to find + /// one that is compatible with the main binary. This is important because dependent + /// libraries on macOS are often distributed as universal binaries containing + /// multiple architectures (e.g., x86_64 + arm64), and we need to pick the right + /// slice to extract the correct rpaths and dependency list. + fn try_single_candidate( + &self, + dylib: &impl InspectDylib, + lib_name: &str, + lib_path: &Path, + ) -> Result, Error> { + let file = match fs::File::open(lib_path) { + Ok(f) => f, + Err(_) => return Ok(None), + }; + // SAFETY: The file is memory-mapped read-only and we only perform read operations + // on the mapped bytes. + let bytes = match unsafe { Mmap::map(&file) } { + Ok(m) => m, + Err(_) => return Ok(None), + }; + let obj = match Object::parse(&bytes) { + Ok(o) => o, + Err(_) => return Ok(None), + }; + + let info = match obj { + Object::Elf(ref elf) => { + if dylib.compatible(&obj) { + Some(( + self.read_rpath(elf, lib_path)?, + elf.libraries().iter().map(ToString::to_string).collect(), + )) + } else { + None + } + } + Object::Mach(ref mach) => match mach { + Mach::Fat(ref fat) => { + // Fat/universal Mach-O: iterate through architecture slices to find + // one that is compatible with the main binary. We construct a + // temporary Object for each slice to reuse the compatible() trait + // method, which checks cputype, bitness, and endianness. + // + // MultiArch::into_iter() re-parses from the underlying byte buffer + // on each call, so the fat binary can be iterated multiple times + // (compatible() may have already iterated it). + let mut found = None; + for arch in fat.into_iter() { + if let Ok(goblin::mach::SingleArch::MachO(inner)) = arch { + let inner_obj = Object::Mach(Mach::Binary(inner)); + if dylib.compatible(&inner_obj) { + // Extract the inner MachO back from the wrapper. + // This is safe because we just constructed inner_obj above. + if let Object::Mach(Mach::Binary(ref macho)) = inner_obj { + found = Some(( + self.read_rpath(macho, lib_path)?, + macho.libraries().iter().map(ToString::to_string).collect(), + )); + } + break; } } - }, - Object::PE(ref pe) => { - if dylib.compatible(&obj) { - Some(( - self.read_rpath(pe, lib_path)?, - pe.libraries().iter().map(ToString::to_string).collect(), - )) - } else { - None - } } - _ => None, - } { - return Ok(Library { - name: lib_name.to_string(), - path: lib_path.to_path_buf(), - realpath: fs::canonicalize(lib_path).ok(), - needed, - rpath, - }); + found + } + Mach::Binary(ref macho) => { + if dylib.compatible(&obj) { + Some(( + self.read_rpath(macho, lib_path)?, + macho.libraries().iter().map(ToString::to_string).collect(), + )) + } else { + None + } + } + }, + Object::PE(ref pe) => { + if dylib.compatible(&obj) { + Some(( + self.read_rpath(pe, lib_path)?, + pe.libraries().iter().map(ToString::to_string).collect(), + )) + } else { + None } } + _ => None, + }; + + if let Some((rpath, needed)) = info { + Ok(Some(Library { + name: lib_name.to_string(), + path: lib_path.to_path_buf(), + realpath: fs::canonicalize(lib_path).ok(), + needed, + rpath, + })) + } else { + Ok(None) } - Ok(Library { - name: lib_name.to_string(), - path: PathBuf::from(lib_name), - realpath: None, - needed: Vec::new(), - rpath: Vec::new(), - }) } } +/// Create a not-found library entry. +/// +/// Used when a library cannot be located on disk (or is a virtual library like +/// Windows API sets). The library is recorded with `realpath: None` so callers +/// can detect it via `Library::found()`. +fn not_found_library(lib_name: &str) -> Library { + Library { + name: lib_name.to_string(), + path: PathBuf::from(lib_name), + realpath: None, + needed: Vec::new(), + rpath: Vec::new(), + } +} + +/// Check if a DLL name is a Windows API set. +/// +/// API sets (e.g., `api-ms-win-crt-runtime-l1-1-0.dll`) and extension API sets +/// (e.g., `ext-ms-win-ntuser-draw-l1-1-0.dll`) are virtual DLL names that Windows +/// resolves to real host DLLs at runtime via an API set schema. They never exist +/// as files on disk. Trying to locate them is pointless and expensive. +/// +/// References: +/// - +/// - delvewheel uses `re.compile('api-.*')` to skip these +fn is_api_set_dll(name: &str) -> bool { + let lower = name.to_lowercase(); + lower.starts_with("api-") || lower.starts_with("ext-ms-") +} + +/// Find a file in a directory using case-insensitive name matching. +/// +/// Windows filesystems (NTFS, FAT32) are case-insensitive: `KERNEL32.dll`, +/// `kernel32.dll`, and `Kernel32.DLL` all refer to the same file. However, when +/// analyzing a Windows sysroot on a case-sensitive filesystem (e.g., Linux ext4), +/// an exact-case lookup for `KERNEL32.dll` will fail if the file is stored as +/// `kernel32.dll`. This function handles that mismatch by falling back to a +/// directory scan with case-insensitive comparison when the exact match fails. +fn find_file_case_insensitive(dir: &Path, name: &str) -> Option { + // Fast path: try exact match first (also handles case-insensitive filesystems + // like macOS HFS+ and Windows NTFS natively) + let exact = dir.join(name); + if exact.exists() { + return Some(exact); + } + // Slow path: scan directory entries for case-insensitive match. + // This is O(n) in the number of directory entries, but only runs when the + // exact match fails (i.e., on case-sensitive filesystems with case mismatches). + let name_lower = name.to_lowercase(); + let entries = match std::fs::read_dir(dir) { + Ok(e) => e, + Err(_) => return None, + }; + for entry in entries.flatten() { + if let Some(file_name) = entry.file_name().to_str() { + if file_name.to_lowercase() == name_lower { + return Some(entry.path()); + } + } + } + None +} + /// Find musl libc path fn find_musl_libc() -> Result, Error> { match glob::glob("/lib/libc.musl-*.so.1") @@ -705,3 +896,19 @@ fn find_musl_libc() -> Result, Error> { _ => Ok(None), } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_is_api_set_dll() { + assert!(is_api_set_dll("api-ms-win-crt-runtime-l1-1-0.dll")); + assert!(is_api_set_dll("api-ms-win-core-synch-l1-2-0.dll")); + assert!(is_api_set_dll("API-MS-WIN-CRT-STDIO-L1-1-0.DLL")); + assert!(is_api_set_dll("ext-ms-win-ntuser-draw-l1-1-0.dll")); + assert!(!is_api_set_dll("KERNEL32.dll")); + assert!(!is_api_set_dll("vcruntime140.dll")); + assert!(!is_api_set_dll("libSystem.B.dylib")); + } +} From c24e63e658796421ebf11fa8de654bd6d6684d71 Mon Sep 17 00:00:00 2001 From: messense Date: Sun, 15 Feb 2026 14:42:50 +0800 Subject: [PATCH 13/19] Address remaining minor gaps vs delocate/delvewheel MachO - @rpath fallback with full suffix: When @rpath/ resolution fails against all rpaths, also try the rpath suffix (e.g., 'subdir/libfoo.dylib' from '@rpath/subdir/libfoo.dylib') against DYLD_FALLBACK_LIBRARY_PATH and additional paths. Previously only the leaf filename was tried in fallback, which would miss libraries installed in subdirectories. This matches delocate's behavior of appending /usr/local/lib and /usr/lib as fallback @rpath search dirs. Also restructured fallback search to only apply leaf-filename fallback for non-@rpath install names (absolute paths, @executable_path, etc.), keeping the two code paths clearly separated. PE - SysWOW64 support: Added Windows/SysWOW64 (and lowercase/Wine variants) to the system directory search list. On 64-bit Windows, System32 contains 64-bit DLLs while SysWOW64 contains 32-bit DLLs. The WoW64 File System Redirector transparently maps System32 accesses from 32-bit processes to SysWOW64, but since we don't emulate this redirector, we search both directories and rely on compatible() to select the correct architecture match. --- src/lib.rs | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 65f8a9d..b83163c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -502,16 +502,27 @@ impl DependencyAnalyzer { } // 2-4. System directories (relative to root) - // Try common Windows system directory layouts + // On 64-bit Windows, System32 contains 64-bit DLLs and SysWOW64 contains + // 32-bit DLLs. When a 32-bit process accesses System32, Windows transparently + // redirects to SysWOW64 (the "WoW64 File System Redirector"). Since we don't + // emulate this redirector, we include both directories and rely on the + // compatible() check to select the correct architecture. + // + // References: + // - https://learn.microsoft.com/en-us/windows/win32/winprog64/file-system-redirector + // - delvewheel's _translate_directory() handles System32 ↔ SysWOW64 ↔ Sysnative for sys_dir in &[ "Windows/System32", + "Windows/SysWOW64", "Windows/System", "Windows", "windows/system32", + "windows/syswow64", "windows/system", "windows", // Wine-style paths "drive_c/windows/system32", + "drive_c/windows/syswow64", "drive_c/windows", ] { let full_path = format!("{}/{}", root_str, sys_dir); @@ -645,19 +656,31 @@ impl DependencyAnalyzer { for rpath in rpaths { candidates.push(PathBuf::from(rpath).join(rest)); } + // Fallback: also try the @rpath suffix (not just the leaf filename) against + // DYLD_FALLBACK_LIBRARY_PATH. This matches delocate's behavior of appending + // /usr/local/lib and /usr/lib as fallback search directories for @rpath + // resolution. For @rpath/subdir/libfoo.dylib this correctly tries + // /usr/local/lib/subdir/libfoo.dylib rather than just /usr/local/lib/libfoo.dylib. + for path in &self.conf_ld_paths { + candidates.push(PathBuf::from(path).join(rest)); + } + for path in &self.additional_ld_paths { + candidates.push(path.join(rest)); + } } else if let Some(resolved) = self.resolve_macho_path(lib_name, loader_path) { // @executable_path/..., @loader_path/..., or absolute path candidates.push(resolved); - } - // 4. DYLD_FALLBACK_LIBRARY_PATH — searched last, using leaf filename - for path in &self.conf_ld_paths { - candidates.push(PathBuf::from(path).join(file_name)); - } + // 4. DYLD_FALLBACK_LIBRARY_PATH — for non-@rpath install names, search + // using the leaf filename (the path-less library name portion). + for path in &self.conf_ld_paths { + candidates.push(PathBuf::from(path).join(file_name)); + } - // 5. Additional user-provided paths - for path in &self.additional_ld_paths { - candidates.push(path.join(file_name)); + // 5. Additional user-provided paths + for path in &self.additional_ld_paths { + candidates.push(path.join(file_name)); + } } self.try_library_candidates(dylib, lib_name, &candidates) From 69fd397cfa38aace4d6485dc4f10229ba4e1e922 Mon Sep 17 00:00:00 2001 From: messense Date: Sun, 15 Feb 2026 14:47:29 +0800 Subject: [PATCH 14/19] Simplify: remove dead field, extract check_compatible, clean up fat handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove unused 'format' field from DependencyAnalyzer (was set but never read; dylib.format() is used for dispatch instead) - Extract check_compatible() helper to deduplicate the identical 'if compatible → read_rpath + libraries' pattern across ELF, MachO::Binary, and PE arms in try_single_candidate - Replace redundant if-let with let-else in fat binary handling (the inner match is always true since we just constructed the Object) - Add LibInfo type alias to avoid clippy type_complexity warning --- src/lib.rs | 83 +++++++++++++++++++++++------------------------------- 1 file changed, 36 insertions(+), 47 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b83163c..2e43ded 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -87,8 +87,6 @@ pub struct DependencyAnalyzer { root: PathBuf, /// Path to the main executable being analyzed (used for @executable_path on macOS) executable_path: Option, - /// The detected binary format - format: Option, } impl Default for DependencyAnalyzer { @@ -97,6 +95,9 @@ impl Default for DependencyAnalyzer { } } +/// Extracted library info: (rpaths, needed library names). +type LibInfo = (Vec, Vec); + impl DependencyAnalyzer { /// Create a new dependency analyzer. pub fn new(root: PathBuf) -> DependencyAnalyzer { @@ -106,7 +107,6 @@ impl DependencyAnalyzer { additional_ld_paths: Vec::new(), root, executable_path: None, - format: None, } } @@ -170,12 +170,10 @@ impl DependencyAnalyzer { let bytes = unsafe { Mmap::map(&file)? }; let dep_tree = match Object::parse(&bytes)? { Object::Elf(elf) => { - self.format = Some(BinaryFormat::Elf); self.load_elf_paths(path)?; self.analyze_dylib(path, elf)? } Object::Mach(mach) => { - self.format = Some(BinaryFormat::MachO); self.load_macho_paths(path)?; match mach { Mach::Fat(fat) => { @@ -215,7 +213,6 @@ impl DependencyAnalyzer { } } Object::PE(pe) => { - self.format = Some(BinaryFormat::PE); self.load_pe_paths(path)?; self.analyze_dylib(path, pe)? } @@ -737,6 +734,25 @@ impl DependencyAnalyzer { Ok(not_found_library(lib_name)) } + /// Check if a parsed binary is compatible with the main binary and extract + /// its rpaths and needed libraries. + fn check_compatible( + &self, + dylib: &impl InspectDylib, + lib: &impl InspectDylib, + obj: &Object, + lib_path: &Path, + ) -> Result, Error> { + if dylib.compatible(obj) { + Ok(Some(( + self.read_rpath(lib, lib_path)?, + lib.libraries().iter().map(ToString::to_string).collect(), + ))) + } else { + Ok(None) + } + } + /// Try to parse a single candidate file and check compatibility. /// /// Opens the file, memory-maps it, parses the binary format, checks that it is @@ -769,16 +785,7 @@ impl DependencyAnalyzer { }; let info = match obj { - Object::Elf(ref elf) => { - if dylib.compatible(&obj) { - Some(( - self.read_rpath(elf, lib_path)?, - elf.libraries().iter().map(ToString::to_string).collect(), - )) - } else { - None - } - } + Object::Elf(ref elf) => self.check_compatible(dylib, elf, &obj, lib_path)?, Object::Mach(ref mach) => match mach { Mach::Fat(ref fat) => { // Fat/universal Mach-O: iterate through architecture slices to find @@ -786,49 +793,31 @@ impl DependencyAnalyzer { // temporary Object for each slice to reuse the compatible() trait // method, which checks cputype, bitness, and endianness. // - // MultiArch::into_iter() re-parses from the underlying byte buffer - // on each call, so the fat binary can be iterated multiple times - // (compatible() may have already iterated it). + // MultiArch re-parses from the underlying byte buffer on each + // iteration, so the fat binary can be iterated multiple times. let mut found = None; for arch in fat.into_iter() { if let Ok(goblin::mach::SingleArch::MachO(inner)) = arch { + // Wrap in Object to reuse compatible(), then unwrap to + // extract rpaths/libraries from the matched architecture. let inner_obj = Object::Mach(Mach::Binary(inner)); if dylib.compatible(&inner_obj) { - // Extract the inner MachO back from the wrapper. - // This is safe because we just constructed inner_obj above. - if let Object::Mach(Mach::Binary(ref macho)) = inner_obj { - found = Some(( - self.read_rpath(macho, lib_path)?, - macho.libraries().iter().map(ToString::to_string).collect(), - )); - } + let Object::Mach(Mach::Binary(ref macho)) = inner_obj else { + unreachable!() + }; + found = Some(( + self.read_rpath(macho, lib_path)?, + macho.libraries().iter().map(ToString::to_string).collect(), + )); break; } } } found } - Mach::Binary(ref macho) => { - if dylib.compatible(&obj) { - Some(( - self.read_rpath(macho, lib_path)?, - macho.libraries().iter().map(ToString::to_string).collect(), - )) - } else { - None - } - } + Mach::Binary(ref macho) => self.check_compatible(dylib, macho, &obj, lib_path)?, }, - Object::PE(ref pe) => { - if dylib.compatible(&obj) { - Some(( - self.read_rpath(pe, lib_path)?, - pe.libraries().iter().map(ToString::to_string).collect(), - )) - } else { - None - } - } + Object::PE(ref pe) => self.check_compatible(dylib, pe, &obj, lib_path)?, _ => None, }; From 47ce101eedf52dda0bdd02a7add69353ad36ed1b Mon Sep 17 00:00:00 2001 From: messense Date: Sun, 15 Feb 2026 14:49:28 +0800 Subject: [PATCH 15/19] Fix test_pe to pass on Windows, macOS, and Linux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Windows, real system DLLs (KERNEL32.dll etc.) are found in C:\Windows\System32, parsed, and their transitive dependencies discovered — so libraries.len() grows well beyond the 4 direct deps. On Linux/macOS those system dirs don't exist, so all non-API-set libs are just recorded as not-found. Replace the fragile assert_eq!(libraries.len(), 4) with: - Assert all needed libs exist in the dependency map - Assert API set DLLs are never found (they're virtual) - Assert libraries.len() >= 4 (allows transitive deps on Windows) --- tests/test_lddtree.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tests/test_lddtree.rs b/tests/test_lddtree.rs index 4139bf7..77439e9 100644 --- a/tests/test_lddtree.rs +++ b/tests/test_lddtree.rs @@ -52,5 +52,20 @@ fn test_pe() { "api-ms-win-crt-stdio-l1-1-0.dll" ] ); - assert_eq!(deps.libraries.len(), 4); + // All directly needed libraries must appear in the dependency map + for name in &deps.needed { + assert!( + deps.libraries.contains_key(name.as_str()), + "missing library: {name}" + ); + } + // API set DLLs are virtual — they never exist as real files on disk + assert!(!deps.libraries["api-ms-win-crt-runtime-l1-1-0.dll"].found()); + assert!(!deps.libraries["api-ms-win-crt-stdio-l1-1-0.dll"].found()); + // On Windows, real system DLLs (e.g., KERNEL32.dll) are found on disk and + // their transitive dependencies are discovered, so the total library count + // exceeds the 4 direct deps. On Linux/macOS no Windows system directories + // exist, so all non-API-set libs are recorded as not-found and the count + // stays at 4. + assert!(deps.libraries.len() >= 4); } From 7d694711ec415bf9080826713f451d684655d0f8 Mon Sep 17 00:00:00 2001 From: messense Date: Sun, 15 Feb 2026 14:50:05 +0800 Subject: [PATCH 16/19] Update CI to modern actions and add clippy job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - actions/checkout@v2 → @v4 - Replace deprecated actions-rs/toolchain@v1 and actions-rs/cargo@v1 with dtolnay/rust-toolchain@stable and direct cargo commands - Add clippy job with -D warnings - Install rustfmt/clippy via toolchain components instead of rustup --- .github/workflows/CI.yml | 48 +++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index e68a534..aca843c 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -1,4 +1,4 @@ -on: +on: push: branches: - main @@ -11,15 +11,9 @@ jobs: name: Check runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true - - uses: actions-rs/cargo@v1 - with: - command: check + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + - run: cargo check test: name: Test Suite @@ -28,30 +22,28 @@ jobs: matrix: os: [ubuntu-latest, macos-latest, windows-latest] steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true - - uses: actions-rs/cargo@v1 + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + - run: cargo test env: RUST_BACKTRACE: '1' - with: - command: test fmt: name: Rustfmt runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable with: - profile: minimal - toolchain: stable - override: true - - run: rustup component add rustfmt - - uses: actions-rs/cargo@v1 + components: rustfmt + - run: cargo fmt --all -- --check + + clippy: + name: Clippy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable with: - command: fmt - args: --all -- --check + components: clippy + - run: cargo clippy -- -D warnings From 13b6c6fa1fcc7d16c43a518ec6361626bf610de9 Mon Sep 17 00:00:00 2001 From: messense Date: Sun, 15 Feb 2026 14:51:53 +0800 Subject: [PATCH 17/19] Fix unused variable warning on Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prefix dylib_path with underscore in load_elf_paths — the parameter is only used inside a #[cfg(unix)] block for LD_LIBRARY_PATH, so it's unused on Windows. --- src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2e43ded..5098b16 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -340,11 +340,11 @@ impl DependencyAnalyzer { Ok(paths) } - fn load_elf_paths(&mut self, dylib_path: &Path) -> Result<(), Error> { + fn load_elf_paths(&mut self, _dylib_path: &Path) -> Result<(), Error> { #[cfg(unix)] if let Ok(env_ld_path) = env::var("LD_LIBRARY_PATH") { if self.root == Path::new("/") { - self.env_ld_paths = self.parse_ld_paths(&env_ld_path, dylib_path)?; + self.env_ld_paths = self.parse_ld_paths(&env_ld_path, _dylib_path)?; } } // Load all the paths from a ldso config file From 0cf8902359d5ea2c69412c3a6552720eb445650a Mon Sep 17 00:00:00 2001 From: messense Date: Sun, 15 Feb 2026 15:37:03 +0800 Subject: [PATCH 18/19] Simplify fat Mach-O selection and PE library search - Fat binary: select arch directly in single pass instead of collecting into Vec, selecting by index, then re-iterating with nth() - PE find_library: merge duplicate search loops into single chained iterator - Fix clippy nonminimal_bool warning in cfg-gated arch detection --- src/lib.rs | 67 +++++++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 5098b16..c5fe1c5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -180,32 +180,39 @@ impl DependencyAnalyzer { // Fat/universal binaries contain multiple architecture slices // (e.g., x86_64 + arm64). We select the best matching architecture: // prefer the native arch of the host, otherwise take the first one. - let arches: Vec<_> = fat.into_iter().collect(); - let mut selected = None; - for (i, arch) in arches.iter().enumerate() { - if let Ok(goblin::mach::SingleArch::MachO(ref macho)) = arch { - if selected.is_none() { - selected = Some(i); - } - #[cfg(target_arch = "x86_64")] - if macho.header.cputype == goblin::mach::cputype::CPU_TYPE_X86_64 { - selected = Some(i); + let mut best = None; + for arch in fat.into_iter() { + if let Ok(goblin::mach::SingleArch::MachO(macho)) = arch { + let is_native = { + #[cfg(target_arch = "x86_64")] + { + macho.header.cputype + == goblin::mach::cputype::CPU_TYPE_X86_64 + } + #[cfg(target_arch = "aarch64")] + { + macho.header.cputype + == goblin::mach::cputype::CPU_TYPE_ARM64 + } + #[cfg(not(any( + target_arch = "x86_64", + target_arch = "aarch64" + )))] + { + false + } + }; + if is_native { + best = Some(macho); break; } - #[cfg(target_arch = "aarch64")] - if macho.header.cputype == goblin::mach::cputype::CPU_TYPE_ARM64 { - selected = Some(i); - break; + if best.is_none() { + best = Some(macho); } } } - match selected { - Some(idx) => match arches.into_iter().nth(idx) { - Some(Ok(goblin::mach::SingleArch::MachO(macho))) => { - self.analyze_dylib(path, macho)? - } - _ => return Err(Error::UnsupportedBinary), - }, + match best { + Some(macho) => self.analyze_dylib(path, macho)?, None => return Err(Error::UnsupportedBinary), } } @@ -696,16 +703,14 @@ impl DependencyAnalyzer { /// 3. `PATH` directories (from `conf_ld_paths`) /// 4. Additional user-provided paths fn find_pe_library(&self, dylib: &impl InspectDylib, lib_name: &str) -> Result { - for dir_str in self.env_ld_paths.iter().chain(self.conf_ld_paths.iter()) { - let dir = Path::new(dir_str); - if let Some(lib_path) = find_file_case_insensitive(dir, lib_name) { - if let Some(lib) = self.try_single_candidate(dylib, lib_name, &lib_path)? { - return Ok(lib); - } - } - } - for dir in &self.additional_ld_paths { - if let Some(lib_path) = find_file_case_insensitive(dir, lib_name) { + let search_dirs = self + .env_ld_paths + .iter() + .chain(self.conf_ld_paths.iter()) + .map(|s| Path::new(s.as_str()).to_path_buf()) + .chain(self.additional_ld_paths.iter().cloned()); + for dir in search_dirs { + if let Some(lib_path) = find_file_case_insensitive(&dir, lib_name) { if let Some(lib) = self.try_single_candidate(dylib, lib_name, &lib_path)? { return Ok(lib); } From 3549cd44ec0558ebdd980897a4c2ffa387dda257 Mon Sep 17 00:00:00 2001 From: messense Date: Sun, 15 Feb 2026 19:46:58 +0800 Subject: [PATCH 19/19] Address PR review comments - Guard MachO::libraries() against empty self.libs to prevent panic - Use is_file() instead of exists() in find_file_case_insensitive to skip directories and non-regular files - Resolve Mach-O absolute install names through sysroot (root) for cross-compilation SDK support - Relax test_macho assertion to >= 4 (transitive deps found on macOS) - Add .gitattributes to mark test fixtures as binary (prevents line-ending corruption with core.autocrlf) --- .gitattributes | 4 ++++ src/lib.rs | 19 ++++++++++++++++--- src/macho.rs | 6 +++++- tests/test_lddtree.rs | 6 +++++- 4 files changed, 30 insertions(+), 5 deletions(-) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..d27a0fc --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +# Mark binary test fixtures to prevent line-ending corruption +tests/test.macho binary +tests/test.pe binary +tests/test.elf binary diff --git a/src/lib.rs b/src/lib.rs index c5fe1c5..e4a3a1c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -672,7 +672,17 @@ impl DependencyAnalyzer { candidates.push(path.join(rest)); } } else if let Some(resolved) = self.resolve_macho_path(lib_name, loader_path) { - // @executable_path/..., @loader_path/..., or absolute path + // @executable_path/..., @loader_path/..., or absolute path. + // For absolute paths, also probe through the sysroot so that a custom + // root (e.g., cross-compilation SDK) is searched instead of / on the host. + if resolved.is_absolute() { + if let Ok(relative) = resolved.strip_prefix("/") { + let sysroot_path = self.root.join(relative); + if sysroot_path != resolved { + candidates.push(sysroot_path); + } + } + } candidates.push(resolved); // 4. DYLD_FALLBACK_LIBRARY_PATH — for non-@rpath install names, search @@ -882,7 +892,7 @@ fn find_file_case_insensitive(dir: &Path, name: &str) -> Option { // Fast path: try exact match first (also handles case-insensitive filesystems // like macOS HFS+ and Windows NTFS natively) let exact = dir.join(name); - if exact.exists() { + if exact.is_file() { return Some(exact); } // Slow path: scan directory entries for case-insensitive match. @@ -896,7 +906,10 @@ fn find_file_case_insensitive(dir: &Path, name: &str) -> Option { for entry in entries.flatten() { if let Some(file_name) = entry.file_name().to_str() { if file_name.to_lowercase() == name_lower { - return Some(entry.path()); + let path = entry.path(); + if path.is_file() { + return Some(path); + } } } } diff --git a/src/macho.rs b/src/macho.rs index 8c22442..979a3ea 100644 --- a/src/macho.rs +++ b/src/macho.rs @@ -14,7 +14,11 @@ impl InspectDylib for MachO<'_> { // goblin always add `self` or dylib id as a needed library, so we need to remove it, see // https://github.com/m4b/goblin/blob/6fdaffdc411bacd5dd7095dc93cec66302ca2575/src/mach/mod.rs#L174 // https://github.com/m4b/goblin/blob/6fdaffdc411bacd5dd7095dc93cec66302ca2575/src/mach/mod.rs#L231-L235 - self.libs[1..].to_vec() + if self.libs.len() <= 1 { + Vec::new() + } else { + self.libs[1..].to_vec() + } } fn interpreter(&self) -> Option<&str> { diff --git a/tests/test_lddtree.rs b/tests/test_lddtree.rs index 77439e9..0f4a539 100644 --- a/tests/test_lddtree.rs +++ b/tests/test_lddtree.rs @@ -35,7 +35,11 @@ fn test_macho() { "/usr/lib/libSystem.B.dylib" ] ); - assert_eq!(deps.libraries.len(), 4); + // On macOS, these system libraries exist on disk (in the dyld shared cache), + // so transitive dependencies will be discovered, making the count >= 4. + // On other platforms, the install-name paths don't exist, so we get exactly 4 + // not-found entries. + assert!(deps.libraries.len() >= 4); } #[test]