diff --git a/crates/sandlock-core/src/arch.rs b/crates/sandlock-core/src/arch.rs new file mode 100644 index 0000000..6f1653f --- /dev/null +++ b/crates/sandlock-core/src/arch.rs @@ -0,0 +1,71 @@ +//! Architecture-specific syscall and seccomp helpers. + +#[cfg(target_arch = "x86_64")] +mod imp { + pub const AUDIT_ARCH: u32 = 0xC000_003E; + pub const SYS_SECCOMP: i64 = 317; + pub const SYS_MEMFD_CREATE: i64 = 319; + pub const SYS_PIDFD_OPEN: i64 = 434; + pub const SYS_PIDFD_GETFD: i64 = 438; + + pub const SYS_OPEN: Option = Some(libc::SYS_open); + pub const SYS_STAT: Option = Some(libc::SYS_stat); + pub const SYS_LSTAT: Option = Some(libc::SYS_lstat); + pub const SYS_ACCESS: Option = Some(libc::SYS_access); + pub const SYS_READLINK: Option = Some(libc::SYS_readlink); + pub const SYS_GETDENTS: Option = Some(libc::SYS_getdents); + pub const SYS_UNLINK: Option = Some(libc::SYS_unlink); + pub const SYS_RMDIR: Option = Some(libc::SYS_rmdir); + pub const SYS_MKDIR: Option = Some(libc::SYS_mkdir); + pub const SYS_RENAME: Option = Some(libc::SYS_rename); + pub const SYS_SYMLINK: Option = Some(libc::SYS_symlink); + pub const SYS_LINK: Option = Some(libc::SYS_link); + pub const SYS_CHMOD: Option = Some(libc::SYS_chmod); + pub const SYS_CHOWN: Option = Some(libc::SYS_chown); + pub const SYS_LCHOWN: Option = Some(libc::SYS_lchown); + pub const SYS_VFORK: Option = Some(libc::SYS_vfork); + pub const SYS_FUTIMESAT: Option = Some(libc::SYS_futimesat); + pub const SYS_FORK: Option = Some(libc::SYS_fork); + pub const SYS_IOPERM: Option = Some(libc::SYS_ioperm); + pub const SYS_IOPL: Option = Some(libc::SYS_iopl); + pub const SYS_TIME: Option = Some(libc::SYS_time); +} + +#[cfg(target_arch = "aarch64")] +mod imp { + pub const AUDIT_ARCH: u32 = 0xC000_00B7; + pub const SYS_SECCOMP: i64 = 277; + pub const SYS_MEMFD_CREATE: i64 = 279; + pub const SYS_PIDFD_OPEN: i64 = 434; + pub const SYS_PIDFD_GETFD: i64 = 438; + + pub const SYS_OPEN: Option = None; + pub const SYS_STAT: Option = None; + pub const SYS_LSTAT: Option = None; + pub const SYS_ACCESS: Option = None; + pub const SYS_READLINK: Option = None; + pub const SYS_GETDENTS: Option = None; + pub const SYS_UNLINK: Option = None; + pub const SYS_RMDIR: Option = None; + pub const SYS_MKDIR: Option = None; + pub const SYS_RENAME: Option = None; + pub const SYS_SYMLINK: Option = None; + pub const SYS_LINK: Option = None; + pub const SYS_CHMOD: Option = None; + pub const SYS_CHOWN: Option = None; + pub const SYS_LCHOWN: Option = None; + pub const SYS_VFORK: Option = None; + pub const SYS_FUTIMESAT: Option = None; + pub const SYS_FORK: Option = None; + pub const SYS_IOPERM: Option = None; + pub const SYS_IOPL: Option = None; + pub const SYS_TIME: Option = None; +} + +pub use imp::*; + +pub fn push_optional_syscall(v: &mut Vec, nr: Option) { + if let Some(nr) = nr { + v.push(nr as u32); + } +} diff --git a/crates/sandlock-core/src/checkpoint.rs b/crates/sandlock-core/src/checkpoint.rs index ba0f324..5e573e3 100644 --- a/crates/sandlock-core/src/checkpoint.rs +++ b/crates/sandlock-core/src/checkpoint.rs @@ -100,13 +100,51 @@ fn ptrace_detach(pid: i32) -> io::Result<()> { } fn ptrace_getregs(pid: i32) -> io::Result> { - // user_regs_struct is 27 u64 fields on x86_64 (216 bytes) - let mut regs = vec![0u64; 27]; - let ret = unsafe { libc::ptrace(libc::PTRACE_GETREGS, pid, 0, regs.as_mut_ptr()) }; - if ret < 0 { - return Err(io::Error::last_os_error()); + #[cfg(target_arch = "x86_64")] + { + // user_regs_struct is 27 u64 fields on x86_64 (216 bytes) + let mut regs = vec![0u64; 27]; + let ret = unsafe { libc::ptrace(libc::PTRACE_GETREGS, pid, 0, regs.as_mut_ptr()) }; + if ret < 0 { + return Err(io::Error::last_os_error()); + } + Ok(regs) + } + + #[cfg(target_arch = "aarch64")] + { + // Linux arm64 exposes general-purpose registers through + // PTRACE_GETREGSET/NT_PRSTATUS. user_pt_regs is: + // x0-x30, sp, pc, pstate (34 u64 values). + const NT_PRSTATUS: libc::c_int = 1; + let mut regs = vec![0u64; 34]; + let mut iov = libc::iovec { + iov_base: regs.as_mut_ptr() as *mut libc::c_void, + iov_len: regs.len() * std::mem::size_of::(), + }; + let ret = unsafe { + libc::ptrace( + libc::PTRACE_GETREGSET, + pid, + NT_PRSTATUS as usize as *mut libc::c_void, + &mut iov as *mut libc::iovec as *mut libc::c_void, + ) + }; + if ret < 0 { + return Err(io::Error::last_os_error()); + } + regs.truncate(iov.iov_len / std::mem::size_of::()); + Ok(regs) + } + + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + let _ = pid; + Err(io::Error::new( + io::ErrorKind::Unsupported, + "checkpoint register capture is not implemented on this architecture", + )) } - Ok(regs) } // --------------------------------------------------------------------------- diff --git a/crates/sandlock-core/src/context.rs b/crates/sandlock-core/src/context.rs index a89f2a4..83f643f 100644 --- a/crates/sandlock-core/src/context.rs +++ b/crates/sandlock-core/src/context.rs @@ -5,6 +5,7 @@ use std::ffi::CString; use std::io; use std::os::fd::{AsRawFd, FromRawFd, OwnedFd, RawFd}; +use crate::arch; use crate::policy::{FsIsolation, Policy}; use crate::seccomp::bpf::{self, stmt, jump}; use crate::sys::structs::{ @@ -151,8 +152,8 @@ pub fn syscall_name_to_nr(name: &str) -> Option { "process_vm_writev" => libc::SYS_process_vm_writev, "open_by_handle_at" => libc::SYS_open_by_handle_at, "name_to_handle_at" => libc::SYS_name_to_handle_at, - "ioperm" => libc::SYS_ioperm, - "iopl" => libc::SYS_iopl, + "ioperm" => arch::SYS_IOPERM?, + "iopl" => arch::SYS_IOPL?, "quotactl" => libc::SYS_quotactl, "acct" => libc::SYS_acct, "lookup_dcookie" => libc::SYS_lookup_dcookie, @@ -164,7 +165,7 @@ pub fn syscall_name_to_nr(name: &str) -> Option { // Additional syscalls for notif/arg filters "clone" => libc::SYS_clone, "clone3" => libc::SYS_clone3, - "vfork" => libc::SYS_vfork, + "vfork" => arch::SYS_VFORK?, "mmap" => libc::SYS_mmap, "munmap" => libc::SYS_munmap, "brk" => libc::SYS_brk, @@ -177,14 +178,14 @@ pub fn syscall_name_to_nr(name: &str) -> Option { "prctl" => libc::SYS_prctl, "getrandom" => libc::SYS_getrandom, "openat" => libc::SYS_openat, - "open" => libc::SYS_open, + "open" => arch::SYS_OPEN?, "getdents64" => libc::SYS_getdents64, - "getdents" => libc::SYS_getdents, + "getdents" => arch::SYS_GETDENTS?, "bind" => libc::SYS_bind, "getsockname" => libc::SYS_getsockname, "clock_gettime" => libc::SYS_clock_gettime, "gettimeofday" => libc::SYS_gettimeofday, - "time" => libc::SYS_time, + "time" => arch::SYS_TIME?, "clock_nanosleep" => libc::SYS_clock_nanosleep, "timerfd_settime" => libc::SYS_timerfd_settime, "timer_settime" => libc::SYS_timer_settime, @@ -204,21 +205,21 @@ pub fn syscall_name_to_nr(name: &str) -> Option { "readlinkat" => libc::SYS_readlinkat, "truncate" => libc::SYS_truncate, "utimensat" => libc::SYS_utimensat, - "unlink" => libc::SYS_unlink, - "rmdir" => libc::SYS_rmdir, - "mkdir" => libc::SYS_mkdir, - "rename" => libc::SYS_rename, - "stat" => libc::SYS_stat, - "lstat" => libc::SYS_lstat, - "access" => libc::SYS_access, - "symlink" => libc::SYS_symlink, - "link" => libc::SYS_link, - "chmod" => libc::SYS_chmod, - "chown" => libc::SYS_chown, - "lchown" => libc::SYS_lchown, - "readlink" => libc::SYS_readlink, - "futimesat" => libc::SYS_futimesat, - "fork" => libc::SYS_fork, + "unlink" => arch::SYS_UNLINK?, + "rmdir" => arch::SYS_RMDIR?, + "mkdir" => arch::SYS_MKDIR?, + "rename" => arch::SYS_RENAME?, + "stat" => arch::SYS_STAT?, + "lstat" => arch::SYS_LSTAT?, + "access" => arch::SYS_ACCESS?, + "symlink" => arch::SYS_SYMLINK?, + "link" => arch::SYS_LINK?, + "chmod" => arch::SYS_CHMOD?, + "chown" => arch::SYS_CHOWN?, + "lchown" => arch::SYS_LCHOWN?, + "readlink" => arch::SYS_READLINK?, + "futimesat" => arch::SYS_FUTIMESAT?, + "fork" => arch::SYS_FORK?, _ => return None, }; Some(nr as u32) @@ -233,10 +234,10 @@ pub fn notif_syscalls(policy: &Policy) -> Vec { let mut nrs = vec![ libc::SYS_clone as u32, libc::SYS_clone3 as u32, - libc::SYS_vfork as u32, libc::SYS_wait4 as u32, libc::SYS_waitid as u32, ]; + arch::push_optional_syscall(&mut nrs, arch::SYS_VFORK); if policy.max_memory.is_some() { nrs.push(libc::SYS_mmap as u32); @@ -276,10 +277,8 @@ pub fn notif_syscalls(policy: &Policy) -> Vec { // /proc virtualization (always on: PID filtering, sensitive path blocking) nrs.push(libc::SYS_openat as u32); - nrs.extend_from_slice(&[ - libc::SYS_getdents64 as u32, - libc::SYS_getdents as u32, - ]); + nrs.push(libc::SYS_getdents64 as u32); + arch::push_optional_syscall(&mut nrs, arch::SYS_GETDENTS); // Netlink virtualization (always on): // socket, bind, getsockname — swap in a unix socketpair for AF_NETLINK @@ -308,45 +307,38 @@ pub fn notif_syscalls(policy: &Policy) -> Vec { if policy.workdir.is_some() && policy.fs_isolation == FsIsolation::None { nrs.extend_from_slice(&[ libc::SYS_openat as u32, - libc::SYS_open as u32, libc::SYS_unlinkat as u32, - libc::SYS_unlink as u32, - libc::SYS_rmdir as u32, libc::SYS_mkdirat as u32, - libc::SYS_mkdir as u32, libc::SYS_renameat2 as u32, - libc::SYS_rename as u32, libc::SYS_symlinkat as u32, - libc::SYS_symlink as u32, libc::SYS_linkat as u32, - libc::SYS_link as u32, libc::SYS_fchmodat as u32, - libc::SYS_chmod as u32, libc::SYS_fchownat as u32, - libc::SYS_chown as u32, - libc::SYS_lchown as u32, libc::SYS_truncate as u32, libc::SYS_utimensat as u32, libc::SYS_newfstatat as u32, - libc::SYS_stat as u32, - libc::SYS_lstat as u32, libc::SYS_statx as u32, libc::SYS_faccessat as u32, 439u32, // SYS_faccessat2 — glibc 2.33+ uses this instead of faccessat - libc::SYS_access as u32, libc::SYS_readlinkat as u32, - libc::SYS_readlink as u32, libc::SYS_getdents64 as u32, - libc::SYS_getdents as u32, libc::SYS_chdir as u32, + libc::SYS_getcwd as u32, ]); + for nr in [ + arch::SYS_OPEN, arch::SYS_UNLINK, arch::SYS_RMDIR, arch::SYS_MKDIR, + arch::SYS_RENAME, arch::SYS_SYMLINK, arch::SYS_LINK, arch::SYS_CHMOD, + arch::SYS_CHOWN, arch::SYS_LCHOWN, arch::SYS_STAT, arch::SYS_LSTAT, + arch::SYS_ACCESS, arch::SYS_READLINK, arch::SYS_GETDENTS, + ] { + arch::push_optional_syscall(&mut nrs, nr); + } } // Chroot path interception if policy.chroot.is_some() { nrs.extend_from_slice(&[ libc::SYS_openat as u32, - libc::SYS_open as u32, // musl uses open(2) instead of openat libc::SYS_execve as u32, libc::SYS_execveat as u32, libc::SYS_unlinkat as u32, @@ -358,46 +350,40 @@ pub fn notif_syscalls(policy: &Policy) -> Vec { libc::SYS_fchownat as u32, libc::SYS_truncate as u32, libc::SYS_newfstatat as u32, - libc::SYS_stat as u32, // musl uses stat(2) instead of newfstatat - libc::SYS_lstat as u32, // musl uses lstat(2) instead of newfstatat libc::SYS_statx as u32, libc::SYS_faccessat as u32, 439u32, // SYS_faccessat2 — glibc 2.33+ uses this instead of faccessat - libc::SYS_access as u32, // musl uses access(2) instead of faccessat libc::SYS_readlinkat as u32, - libc::SYS_readlink as u32, // musl uses readlink(2) instead of readlinkat libc::SYS_getdents64 as u32, - libc::SYS_getdents as u32, libc::SYS_chdir as u32, libc::SYS_getcwd as u32, libc::SYS_statfs as u32, libc::SYS_utimensat as u32, - libc::SYS_unlink as u32, // musl uses unlink(2) instead of unlinkat - libc::SYS_rmdir as u32, // musl uses rmdir(2) instead of unlinkat - libc::SYS_mkdir as u32, // musl uses mkdir(2) instead of mkdirat - libc::SYS_rename as u32, // musl uses rename(2) instead of renameat2 - libc::SYS_symlink as u32, // musl uses symlink(2) instead of symlinkat - libc::SYS_link as u32, // musl uses link(2) instead of linkat - libc::SYS_chmod as u32, // musl uses chmod(2) instead of fchmodat - libc::SYS_chown as u32, // musl uses chown(2)/lchown(2) instead of fchownat - libc::SYS_lchown as u32, ]); + for nr in [ + arch::SYS_OPEN, arch::SYS_STAT, arch::SYS_LSTAT, arch::SYS_ACCESS, + arch::SYS_READLINK, arch::SYS_GETDENTS, arch::SYS_UNLINK, + arch::SYS_RMDIR, arch::SYS_MKDIR, arch::SYS_RENAME, + arch::SYS_SYMLINK, arch::SYS_LINK, arch::SYS_CHMOD, + arch::SYS_CHOWN, arch::SYS_LCHOWN, + ] { + arch::push_optional_syscall(&mut nrs, nr); + } } // Explicit deny-paths need path-bearing syscalls intercepted. if !policy.fs_denied.is_empty() { nrs.extend_from_slice(&[ libc::SYS_openat as u32, - libc::SYS_open as u32, libc::SYS_execve as u32, libc::SYS_execveat as u32, libc::SYS_linkat as u32, - libc::SYS_link as u32, libc::SYS_renameat2 as u32, - libc::SYS_rename as u32, libc::SYS_symlinkat as u32, - libc::SYS_symlink as u32, ]); + for nr in [arch::SYS_OPEN, arch::SYS_LINK, arch::SYS_RENAME, arch::SYS_SYMLINK] { + arch::push_optional_syscall(&mut nrs, nr); + } } // Dynamic policy callback — intercept key syscalls for event emission. @@ -995,7 +981,9 @@ mod tests { let nrs = notif_syscalls(&policy); assert!(nrs.contains(&(libc::SYS_clone as u32))); assert!(nrs.contains(&(libc::SYS_clone3 as u32))); - assert!(nrs.contains(&(libc::SYS_vfork as u32))); + if let Some(vfork) = arch::SYS_VFORK { + assert!(nrs.contains(&(vfork as u32))); + } } #[test] @@ -1145,17 +1133,29 @@ mod tests { #[test] fn test_syscall_name_to_nr_covers_defaults() { - // Every name in DEFAULT_DENY_SYSCALLS except nfsservctl should resolve + // Every name in DEFAULT_DENY_SYSCALLS should resolve unless the + // running architecture does not expose that syscall. + let expected_unresolved: &[&str] = &[ + "nfsservctl", + #[cfg(target_arch = "aarch64")] + "ioperm", + #[cfg(target_arch = "aarch64")] + "iopl", + ]; let mut skipped = 0; for name in DEFAULT_DENY_SYSCALLS { match syscall_name_to_nr(name) { Some(_) => {} None => { - assert_eq!(*name, "nfsservctl", "unexpected unresolved syscall: {}", name); + assert!( + expected_unresolved.contains(name), + "unexpected unresolved syscall: {}", + name + ); skipped += 1; } } } - assert_eq!(skipped, 1); // only nfsservctl + assert_eq!(skipped, expected_unresolved.len()); } } diff --git a/crates/sandlock-core/src/cow/dispatch.rs b/crates/sandlock-core/src/cow/dispatch.rs index 8e8e009..a183f46 100644 --- a/crates/sandlock-core/src/cow/dispatch.rs +++ b/crates/sandlock-core/src/cow/dispatch.rs @@ -4,13 +4,16 @@ //! and injects results (fds, stat structs, readlink strings, dirents) back. use std::os::unix::io::{FromRawFd, OwnedFd, RawFd}; +use std::path::{Component, Path, PathBuf}; use std::sync::Arc; use tokio::sync::Mutex; +use crate::arch; +use crate::cow::seccomp::SeccompCowBranch; use crate::procfs::{build_dirent64, DT_DIR, DT_LNK, DT_REG}; use crate::seccomp::notif::{read_child_mem, write_child_mem, NotifAction}; -use crate::seccomp::state::CowState; +use crate::seccomp::state::{CowState, PidKey}; use crate::sys::structs::SeccompNotif; /// Read a NUL-terminated path from child memory (up to 4096 bytes for filesystem paths). @@ -41,28 +44,85 @@ fn read_path(notif: &SeccompNotif, addr: u64, notif_fd: RawFd) -> Option /// Resolve a path that may be relative to a dirfd. /// For AT_FDCWD (-100), returns the path as-is (assumed absolute or cwd-relative). /// For other dirfds, reads /proc/{pid}/fd/{dirfd} to get the base path. -fn resolve_at_path(notif: &SeccompNotif, dirfd: i64, path: &str) -> String { - if std::path::Path::new(path).is_absolute() { - return path.to_string(); +fn normalize_path(path: PathBuf) -> PathBuf { + let mut out = PathBuf::new(); + for component in path.components() { + match component { + Component::Prefix(prefix) => out.push(prefix.as_os_str()), + Component::RootDir => out.push(Path::new("/")), + Component::CurDir => {} + Component::ParentDir => { + out.pop(); + } + Component::Normal(part) => out.push(part), + } + } + out +} + +fn resolve_at_path_with_virtual( + notif: &SeccompNotif, + dirfd: i64, + path: &str, + virtual_cwd: Option<&str>, +) -> String { + if Path::new(path).is_absolute() { + return normalize_path(PathBuf::from(path)).to_string_lossy().into_owned(); } // dirfd is stored as u64 in seccomp_data.args but AT_FDCWD is a negative i32. // Truncate to i32 for correct sign comparison. let dirfd32 = dirfd as i32; if dirfd32 == libc::AT_FDCWD { + if let Some(cwd) = virtual_cwd { + return normalize_path(Path::new(cwd).join(path)) + .to_string_lossy() + .into_owned(); + } // Relative to cwd — read /proc/{pid}/cwd if let Ok(cwd) = std::fs::read_link(format!("/proc/{}/cwd", notif.pid)) { - return format!("{}/{}", cwd.display(), path); + return normalize_path(cwd.join(path)).to_string_lossy().into_owned(); } return path.to_string(); } // Relative to dirfd if let Ok(base) = std::fs::read_link(format!("/proc/{}/fd/{}", notif.pid, dirfd)) { - format!("{}/{}", base.display(), path) + normalize_path(base.join(path)).to_string_lossy().into_owned() } else { path.to_string() } } +fn map_cow_upper_path(cow: &SeccompCowBranch, path: &str) -> String { + let path = PathBuf::from(path); + if let Ok(rel) = path.strip_prefix(cow.upper_dir()) { + return normalize_path(cow.workdir().join(rel)).to_string_lossy().into_owned(); + } + normalize_path(path).to_string_lossy().into_owned() +} + +fn read_pid_start_time(pid: u32) -> Option { + let stat = std::fs::read_to_string(format!("/proc/{}/stat", pid)).ok()?; + let rest = stat.rsplit_once(") ")?.1; + // starttime is field 22; after "pid (comm)" the first token is field 3. + rest.split_whitespace().nth(19)?.parse().ok() +} + +fn cow_pid_key(pid: u32) -> Option { + Some(PidKey { + pid: i32::try_from(pid).ok()?, + start_time: read_pid_start_time(pid)?, + }) +} + +fn current_virtual_cwd(st: &mut CowState, pid: u32) -> Option { + if st.virtual_cwds.is_empty() { + return None; + } + let pid_key = cow_pid_key(pid)?; + st.prune_reused_pid(pid_key); + st.virtual_cwds.get(&pid_key).cloned() +} + // ============================================================ // openat handler // ============================================================ @@ -80,7 +140,7 @@ pub(crate) async fn handle_cow_open( // open(path, flags, mode): args[0]=path, args[1]=flags // openat(dirfd, path, flags): args[0]=dirfd, args[1]=path, args[2]=flags - let (path_ptr, dirfd, flags) = if nr == libc::SYS_open as i64 { + let (path_ptr, dirfd, flags) = if Some(nr) == arch::SYS_OPEN { (notif.data.args[0], libc::AT_FDCWD as i64, notif.data.args[1]) } else { (notif.data.args[1], notif.data.args[0] as i64, notif.data.args[2]) @@ -90,7 +150,13 @@ pub(crate) async fn handle_cow_open( Some(p) => p, None => return NotifAction::Continue, }; - let path = resolve_at_path(notif, dirfd, &rel_path); + let virtual_cwd = if (dirfd as i32) == libc::AT_FDCWD && !Path::new(&rel_path).is_absolute() { + let mut st = cow_state.lock().await; + current_virtual_cwd(&mut st, notif.pid) + } else { + None + }; + let mut path = resolve_at_path_with_virtual(notif, dirfd, &rel_path, virtual_cwd.as_deref()); // Phase 1: determine plan under lock (no heavy I/O) let plan = { @@ -100,13 +166,18 @@ pub(crate) async fn handle_cow_open( None => return NotifAction::Continue, }; + path = map_cow_upper_path(cow, &path); if !cow.matches(&path) { return NotifAction::Continue; } // Read-only opens don't need interception unless the file was // modified or deleted in the COW layer. - const WRITE_FLAGS: u64 = 0o1 | 0o2 | 0o100 | 0o1000 | 0o2000; + const WRITE_FLAGS: u64 = (libc::O_WRONLY + | libc::O_RDWR + | libc::O_CREAT + | libc::O_TRUNC + | libc::O_APPEND) as u64; let is_write = flags & WRITE_FLAGS != 0; if !is_write && !cow.needs_read_intercept(&path) { return NotifAction::Continue; @@ -185,6 +256,28 @@ enum CowWriteOp { Truncate { path: String, length: i64 }, } +impl CowWriteOp { + fn remap_upper_paths(&mut self, cow: &SeccompCowBranch) { + match self { + CowWriteOp::Unlink { path, .. } + | CowWriteOp::Mkdir { path } + | CowWriteOp::Chmod { path, .. } + | CowWriteOp::Chown { path, .. } + | CowWriteOp::Truncate { path, .. } => { + *path = map_cow_upper_path(cow, path); + } + CowWriteOp::Rename { old_path, new_path } + | CowWriteOp::Link { old_path, new_path } => { + *old_path = map_cow_upper_path(cow, old_path); + *new_path = map_cow_upper_path(cow, new_path); + } + CowWriteOp::Symlink { linkpath, .. } => { + *linkpath = map_cow_upper_path(cow, linkpath); + } + } + } +} + /// Read and resolve a path argument. For *at syscalls, pass the dirfd arg index; /// for legacy syscalls, pass None to use the raw path. fn read_resolved( @@ -192,89 +285,114 @@ fn read_resolved( path_arg: usize, dirfd_arg: Option, notif_fd: RawFd, + virtual_cwd: Option<&str>, ) -> Option { let raw = read_path(notif, notif.data.args[path_arg], notif_fd)?; match dirfd_arg { - Some(i) => Some(resolve_at_path(notif, notif.data.args[i] as i64, &raw)), - None => Some(raw), + Some(i) => Some(resolve_at_path_with_virtual( + notif, + notif.data.args[i] as i64, + &raw, + virtual_cwd, + )), + None => Some(resolve_at_path_with_virtual( + notif, + libc::AT_FDCWD as i64, + &raw, + virtual_cwd, + )), } } /// Parse the syscall into a CowWriteOp, reading and resolving paths from child memory. -fn parse_cow_write(notif: &SeccompNotif, notif_fd: RawFd) -> Option { +fn parse_cow_write( + notif: &SeccompNotif, + notif_fd: RawFd, + virtual_cwd: Option<&str>, +) -> Option { let nr = notif.data.nr as i64; // *at variants (dirfd in args[0], path in args[1]) if nr == libc::SYS_unlinkat { - let path = read_resolved(notif, 1, Some(0), notif_fd)?; + let path = read_resolved(notif, 1, Some(0), notif_fd, virtual_cwd)?; let is_dir = (notif.data.args[2] & libc::AT_REMOVEDIR as u64) != 0; return Some(CowWriteOp::Unlink { path, is_dir }); } if nr == libc::SYS_mkdirat { - return Some(CowWriteOp::Mkdir { path: read_resolved(notif, 1, Some(0), notif_fd)? }); + return Some(CowWriteOp::Mkdir { + path: read_resolved(notif, 1, Some(0), notif_fd, virtual_cwd)?, + }); } if nr == libc::SYS_renameat2 { - let old_path = read_resolved(notif, 1, Some(0), notif_fd)?; - let new_path = read_resolved(notif, 3, Some(2), notif_fd)?; + let old_path = read_resolved(notif, 1, Some(0), notif_fd, virtual_cwd)?; + let new_path = read_resolved(notif, 3, Some(2), notif_fd, virtual_cwd)?; return Some(CowWriteOp::Rename { old_path, new_path }); } if nr == libc::SYS_symlinkat { // symlinkat(target, newdirfd, linkpath): target is raw, linkpath is resolved let target = read_path(notif, notif.data.args[0], notif_fd)?; - let linkpath = read_resolved(notif, 2, Some(1), notif_fd)?; + let linkpath = read_resolved(notif, 2, Some(1), notif_fd, virtual_cwd)?; return Some(CowWriteOp::Symlink { target, linkpath }); } if nr == libc::SYS_linkat { - let old_path = read_resolved(notif, 1, Some(0), notif_fd)?; - let new_path = read_resolved(notif, 3, Some(2), notif_fd)?; + let old_path = read_resolved(notif, 1, Some(0), notif_fd, virtual_cwd)?; + let new_path = read_resolved(notif, 3, Some(2), notif_fd, virtual_cwd)?; return Some(CowWriteOp::Link { old_path, new_path }); } if nr == libc::SYS_fchmodat { - let path = read_resolved(notif, 1, Some(0), notif_fd)?; + let path = read_resolved(notif, 1, Some(0), notif_fd, virtual_cwd)?; return Some(CowWriteOp::Chmod { path, mode: (notif.data.args[2] & 0o7777) as u32 }); } if nr == libc::SYS_fchownat { - let path = read_resolved(notif, 1, Some(0), notif_fd)?; + let path = read_resolved(notif, 1, Some(0), notif_fd, virtual_cwd)?; return Some(CowWriteOp::Chown { path, uid: notif.data.args[2] as u32, gid: notif.data.args[3] as u32 }); } // Legacy variants (path in args[0], no dirfd) - if nr == libc::SYS_unlink as i64 { - return Some(CowWriteOp::Unlink { path: read_resolved(notif, 0, None, notif_fd)?, is_dir: false }); + if Some(nr) == arch::SYS_UNLINK { + return Some(CowWriteOp::Unlink { + path: read_resolved(notif, 0, None, notif_fd, virtual_cwd)?, + is_dir: false, + }); } - if nr == libc::SYS_rmdir as i64 { - return Some(CowWriteOp::Unlink { path: read_resolved(notif, 0, None, notif_fd)?, is_dir: true }); + if Some(nr) == arch::SYS_RMDIR { + return Some(CowWriteOp::Unlink { + path: read_resolved(notif, 0, None, notif_fd, virtual_cwd)?, + is_dir: true, + }); } - if nr == libc::SYS_mkdir as i64 { - return Some(CowWriteOp::Mkdir { path: read_resolved(notif, 0, None, notif_fd)? }); + if Some(nr) == arch::SYS_MKDIR { + return Some(CowWriteOp::Mkdir { + path: read_resolved(notif, 0, None, notif_fd, virtual_cwd)?, + }); } - if nr == libc::SYS_rename as i64 { - let old_path = read_resolved(notif, 0, None, notif_fd)?; - let new_path = read_resolved(notif, 1, None, notif_fd)?; + if Some(nr) == arch::SYS_RENAME { + let old_path = read_resolved(notif, 0, None, notif_fd, virtual_cwd)?; + let new_path = read_resolved(notif, 1, None, notif_fd, virtual_cwd)?; return Some(CowWriteOp::Rename { old_path, new_path }); } - if nr == libc::SYS_symlink as i64 { + if Some(nr) == arch::SYS_SYMLINK { let target = read_path(notif, notif.data.args[0], notif_fd)?; - let linkpath = read_resolved(notif, 1, None, notif_fd)?; + let linkpath = read_resolved(notif, 1, None, notif_fd, virtual_cwd)?; return Some(CowWriteOp::Symlink { target, linkpath }); } - if nr == libc::SYS_link as i64 { - let old_path = read_resolved(notif, 0, None, notif_fd)?; - let new_path = read_resolved(notif, 1, None, notif_fd)?; + if Some(nr) == arch::SYS_LINK { + let old_path = read_resolved(notif, 0, None, notif_fd, virtual_cwd)?; + let new_path = read_resolved(notif, 1, None, notif_fd, virtual_cwd)?; return Some(CowWriteOp::Link { old_path, new_path }); } - if nr == libc::SYS_chmod as i64 { - let path = read_resolved(notif, 0, None, notif_fd)?; + if Some(nr) == arch::SYS_CHMOD { + let path = read_resolved(notif, 0, None, notif_fd, virtual_cwd)?; return Some(CowWriteOp::Chmod { path, mode: (notif.data.args[1] & 0o7777) as u32 }); } - if nr == libc::SYS_chown as i64 || nr == libc::SYS_lchown as i64 { - let path = read_resolved(notif, 0, None, notif_fd)?; + if Some(nr) == arch::SYS_CHOWN || Some(nr) == arch::SYS_LCHOWN { + let path = read_resolved(notif, 0, None, notif_fd, virtual_cwd)?; return Some(CowWriteOp::Chown { path, uid: notif.data.args[1] as u32, gid: notif.data.args[2] as u32 }); } // truncate (legacy only, path in args[0]) if nr == libc::SYS_truncate { - let path = read_resolved(notif, 0, None, notif_fd)?; + let path = read_resolved(notif, 0, None, notif_fd, virtual_cwd)?; return Some(CowWriteOp::Truncate { path, length: notif.data.args[1] as i64 }); } @@ -360,7 +478,11 @@ pub(crate) async fn handle_cow_write( cow_state: &Arc>, notif_fd: RawFd, ) -> NotifAction { - let op = match parse_cow_write(notif, notif_fd) { + let virtual_cwd = { + let mut st = cow_state.lock().await; + current_virtual_cwd(&mut st, notif.pid) + }; + let mut op = match parse_cow_write(notif, notif_fd, virtual_cwd.as_deref()) { Some(op) => op, None => return NotifAction::Continue, }; @@ -373,6 +495,7 @@ pub(crate) async fn handle_cow_write( None => return NotifAction::Continue, }; + op.remap_upper_paths(cow); match cow_copy_rel(&op, cow) { Some((_match_path, ref rel)) => { match cow.prepare_copy(rel) { @@ -456,16 +579,29 @@ pub(crate) async fn handle_cow_access( // access(pathname, mode): args[0]=path, args[1]=mode // faccessat(dirfd, pathname, mode, flags): args[0]=dirfd, args[1]=path, args[2]=mode - let (path, mode) = if nr == libc::SYS_access as i64 { + let (path, mode) = if Some(nr) == arch::SYS_ACCESS { + let virtual_cwd = { + let mut st = cow_state.lock().await; + current_virtual_cwd(&mut st, notif.pid) + }; let p = match read_path(notif, notif.data.args[0], notif_fd) { - Some(p) => p, + Some(p) => resolve_at_path_with_virtual( + notif, + libc::AT_FDCWD as i64, + &p, + virtual_cwd.as_deref(), + ), None => return NotifAction::Continue, }; (p, notif.data.args[1] as i32) } else { let dirfd = notif.data.args[0] as i64; + let virtual_cwd = { + let mut st = cow_state.lock().await; + current_virtual_cwd(&mut st, notif.pid) + }; let p = match read_path(notif, notif.data.args[1], notif_fd) { - Some(p) => resolve_at_path(notif, dirfd, &p), + Some(p) => resolve_at_path_with_virtual(notif, dirfd, &p, virtual_cwd.as_deref()), None => return NotifAction::Continue, }; (p, notif.data.args[2] as i32) @@ -482,6 +618,7 @@ pub(crate) async fn handle_cow_access( None => return NotifAction::Continue, }; + let path = map_cow_upper_path(cow, &path); if !cow.matches(&path) { return NotifAction::Continue; } @@ -489,7 +626,7 @@ pub(crate) async fn handle_cow_access( // Path is under workdir and W_OK was requested — writes will be // redirected to the COW upper layer, so report success. // Check the path actually exists on the real filesystem. - if std::path::Path::new(&path).exists() { + if std::path::Path::new(&path).exists() || cow.handle_stat(&path).is_some() { return NotifAction::ReturnValue(0); } @@ -516,8 +653,12 @@ pub(crate) async fn handle_cow_utimensat( return NotifAction::Continue; } + let virtual_cwd = { + let mut st = cow_state.lock().await; + current_virtual_cwd(&mut st, notif.pid) + }; let path = match read_path(notif, path_ptr, notif_fd) { - Some(p) => resolve_at_path(notif, dirfd, &p), + Some(p) => resolve_at_path_with_virtual(notif, dirfd, &p, virtual_cwd.as_deref()), None => return NotifAction::Continue, }; @@ -527,6 +668,7 @@ pub(crate) async fn handle_cow_utimensat( Some(c) => c, None => return NotifAction::Continue, }; + let path = map_cow_upper_path(cow, &path); if !cow.matches(&path) { return NotifAction::Continue; } @@ -582,8 +724,12 @@ pub(crate) async fn handle_cow_stat( // newfstatat(dirfd, pathname, statbuf, flags) // faccessat(dirfd, pathname, mode, flags) let dirfd = notif.data.args[0] as i64; + let virtual_cwd = { + let mut st = cow_state.lock().await; + current_virtual_cwd(&mut st, notif.pid) + }; let path = match read_path(notif, notif.data.args[1], notif_fd) { - Some(p) => resolve_at_path(notif, dirfd, &p), + Some(p) => resolve_at_path_with_virtual(notif, dirfd, &p, virtual_cwd.as_deref()), None => return NotifAction::Continue, }; @@ -593,6 +739,7 @@ pub(crate) async fn handle_cow_stat( None => return NotifAction::Continue, }; + let path = map_cow_upper_path(cow, &path); if !cow.has_changes() || !cow.matches(&path) { return NotifAction::Continue; } @@ -613,52 +760,30 @@ pub(crate) async fn handle_cow_stat( return NotifAction::Errno(libc::ENOENT); } - // newfstatat — stat the resolved path and write to child's buffer + // newfstatat — stat the resolved path and write the native libc layout + // back to the child. Do not hand-pack struct stat; its layout is + // architecture-specific. let statbuf_addr = notif.data.args[2]; - let flags = notif.data.args[3]; - let follow = (flags & libc::AT_SYMLINK_NOFOLLOW as u64) == 0; + let flags = (notif.data.args[3] & 0xFFFF_FFFF) as i32; + let c_path = match std::ffi::CString::new(real_path.to_str().unwrap_or("")) { + Ok(c) => c, + Err(_) => return NotifAction::Continue, + }; + let mut statbuf: libc::stat = unsafe { std::mem::zeroed() }; + if unsafe { libc::fstatat(libc::AT_FDCWD, c_path.as_ptr(), &mut statbuf, flags) } < 0 { + let errno = std::io::Error::last_os_error() + .raw_os_error() + .unwrap_or(libc::EIO); + return NotifAction::Errno(errno); + } + let buf = unsafe { + std::slice::from_raw_parts( + &statbuf as *const libc::stat as *const u8, + std::mem::size_of::(), + ) + }; - let meta = if follow { - std::fs::metadata(&real_path) - } else { - std::fs::symlink_metadata(&real_path) - }; - - let meta = match meta { - Ok(m) => m, - Err(_) => return NotifAction::Errno(libc::ENOENT), - }; - - // Pack struct stat (x86_64 layout, 144 bytes) - use std::os::unix::fs::MetadataExt; - let mut buf = vec![0u8; 144]; - // struct stat { st_dev(8), st_ino(8), st_nlink(8), st_mode(4), st_uid(4), st_gid(4), __pad0(4), - // st_rdev(8), st_size(8), st_blksize(8), st_blocks(8), - // st_atime(8), st_atime_nsec(8), st_mtime(8), st_mtime_nsec(8), - // st_ctime(8), st_ctime_nsec(8), __unused[3](24) } - let mut off = 0; - macro_rules! pack_u64 { ($v:expr) => { buf[off..off+8].copy_from_slice(&($v as u64).to_ne_bytes()); off += 8; } } - macro_rules! pack_u32 { ($v:expr) => { buf[off..off+4].copy_from_slice(&($v as u32).to_ne_bytes()); off += 4; } } - pack_u64!(meta.dev()); - pack_u64!(meta.ino()); - pack_u64!(meta.nlink()); - pack_u32!(meta.mode()); - pack_u32!(meta.uid()); - pack_u32!(meta.gid()); - pack_u32!(0u32); // __pad0 - pack_u64!(meta.rdev()); - pack_u64!(meta.size() as u64); - pack_u64!(meta.blksize()); - pack_u64!(meta.blocks() as u64); - pack_u64!(meta.atime() as u64); - pack_u64!(meta.atime_nsec() as u64); - pack_u64!(meta.mtime() as u64); - pack_u64!(meta.mtime_nsec() as u64); - pack_u64!(meta.ctime() as u64); - pack_u64!(meta.ctime_nsec() as u64); - let _ = off; - - if write_child_mem(notif_fd, notif.id, notif.pid, statbuf_addr, &buf).is_err() { + if write_child_mem(notif_fd, notif.id, notif.pid, statbuf_addr, buf).is_err() { return NotifAction::Continue; } @@ -673,8 +798,12 @@ pub(crate) async fn handle_cow_statx( ) -> NotifAction { // statx(dirfd, pathname, flags, mask, statxbuf) let dirfd = notif.data.args[0] as i64; + let virtual_cwd = { + let mut st = cow_state.lock().await; + current_virtual_cwd(&mut st, notif.pid) + }; let path = match read_path(notif, notif.data.args[1], notif_fd) { - Some(p) => resolve_at_path(notif, dirfd, &p), + Some(p) => resolve_at_path_with_virtual(notif, dirfd, &p, virtual_cwd.as_deref()), None => return NotifAction::Continue, }; @@ -684,6 +813,7 @@ pub(crate) async fn handle_cow_statx( None => return NotifAction::Continue, }; + let path = map_cow_upper_path(cow, &path); if !cow.has_changes() || !cow.matches(&path) { return NotifAction::Continue; } @@ -702,8 +832,12 @@ pub(crate) async fn handle_cow_readlink( ) -> NotifAction { // readlinkat(dirfd, pathname, buf, bufsiz) let dirfd = notif.data.args[0] as i64; + let virtual_cwd = { + let mut st = cow_state.lock().await; + current_virtual_cwd(&mut st, notif.pid) + }; let path = match read_path(notif, notif.data.args[1], notif_fd) { - Some(p) => resolve_at_path(notif, dirfd, &p), + Some(p) => resolve_at_path_with_virtual(notif, dirfd, &p, virtual_cwd.as_deref()), None => return NotifAction::Continue, }; let buf_addr = notif.data.args[2]; @@ -715,6 +849,7 @@ pub(crate) async fn handle_cow_readlink( None => return NotifAction::Continue, }; + let path = map_cow_upper_path(cow, &path); if !cow.has_changes() || !cow.matches(&path) { return NotifAction::Continue; } @@ -746,6 +881,10 @@ pub(crate) async fn handle_cow_getdents( let child_fd = (notif.data.args[0] & 0xFFFFFFFF) as u32; let buf_addr = notif.data.args[1]; let buf_size = (notif.data.args[2] & 0xFFFFFFFF) as usize; + let pid_key = match cow_pid_key(pid) { + Some(key) => key, + None => return NotifAction::Continue, + }; // Check if fd points to a COW-managed directory let link_path = format!("/proc/{}/fd/{}", pid, child_fd); @@ -755,17 +894,32 @@ pub(crate) async fn handle_cow_getdents( }; let mut st = cow_state.lock().await; + st.prune_reused_pid(pid_key); let cow = match st.branch.as_ref() { Some(c) => c, None => return NotifAction::Continue, }; - if !cow.has_changes() || !cow.matches(&target) { + if !cow.has_changes() { return NotifAction::Continue; } + let target_path = Path::new(&target); + let rel_path = if cow.matches(&target) { + cow.safe_rel(&target).unwrap_or_else(|| ".".to_string()) + } else if let Ok(rel) = target_path.strip_prefix(cow.upper_dir()) { + let rel = rel.to_string_lossy(); + if rel.is_empty() { + ".".to_string() + } else { + rel.into_owned() + } + } else { + return NotifAction::Continue; + }; + // Build cache on first call; invalidate if fd was reused for a different dir. - let cache_key = (pid as i32, child_fd); + let cache_key = (pid_key, child_fd); if let Some((cached_target, entries)) = st.dir_cache.get(&cache_key) { if *cached_target != target { // fd reused for a different directory — rebuild. @@ -778,7 +932,6 @@ pub(crate) async fn handle_cow_getdents( } if !st.dir_cache.contains_key(&cache_key) { let cow = st.branch.as_ref().unwrap(); - let rel_path = cow.safe_rel(&target).unwrap_or_else(|| ".".to_string()); let merged = cow.list_merged_dir(&rel_path); let upper_dir = cow.upper_dir().join(&rel_path); @@ -862,22 +1015,20 @@ pub(crate) async fn handle_cow_chdir( }; let orig_path_buf_len = path.len() + 1; // NUL-terminated size in child memory - // Resolve relative paths against the process's cwd. - let abs_path = if std::path::Path::new(&path).is_absolute() { - path - } else { - match std::fs::read_link(format!("/proc/{}/cwd", notif.pid)) { - Ok(cwd) => cwd.join(&path).to_string_lossy().into_owned(), - Err(_) => return NotifAction::Continue, - } - }; - - let st = cow_state.lock().await; + let mut st = cow_state.lock().await; + let virtual_cwd = current_virtual_cwd(&mut st, notif.pid); + let abs_path = resolve_at_path_with_virtual( + notif, + libc::AT_FDCWD as i64, + &path, + virtual_cwd.as_deref(), + ); let cow = match st.branch.as_ref() { Some(c) => c, None => return NotifAction::Continue, }; + let abs_path = map_cow_upper_path(cow, &abs_path); if !cow.matches(&abs_path) { return NotifAction::Continue; } @@ -945,5 +1096,55 @@ pub(crate) async fn handle_cow_chdir( return NotifAction::Errno(libc::EFAULT); } + if let Some(pid_key) = cow_pid_key(notif.pid) { + let mut st = cow_state.lock().await; + st.prune_reused_pid(pid_key); + st.virtual_cwds.insert(pid_key, abs_path); + } + NotifAction::Continue } + +/// Handle getcwd after chdir into a COW-only directory. +pub(crate) async fn handle_cow_getcwd( + notif: &SeccompNotif, + cow_state: &Arc>, + notif_fd: RawFd, +) -> NotifAction { + let buf_addr = notif.data.args[0]; + let buf_size = (notif.data.args[1] & 0xFFFF_FFFF) as usize; + + let mut st = cow_state.lock().await; + let cached_virtual_cwd = current_virtual_cwd(&mut st, notif.pid); + let cow = match st.branch.as_ref() { + Some(c) => c, + None => return NotifAction::Continue, + }; + + let virtual_cwd = if let Some(cwd) = cached_virtual_cwd { + cwd + } else { + let cwd = match std::fs::read_link(format!("/proc/{}/cwd", notif.pid)) { + Ok(c) => c, + Err(_) => return NotifAction::Continue, + }; + match cwd.strip_prefix(cow.upper_dir()) { + Ok(rel) => cow.workdir().join(rel).to_string_lossy().into_owned(), + Err(_) => return NotifAction::Continue, + } + }; + drop(st); + + let cwd_bytes = virtual_cwd.as_bytes(); + if cwd_bytes.len() + 1 > buf_size { + return NotifAction::Errno(libc::ERANGE); + } + + let mut write_buf = cwd_bytes.to_vec(); + write_buf.push(0); + + if write_child_mem(notif_fd, notif.id, notif.pid, buf_addr, &write_buf).is_err() { + return NotifAction::Continue; + } + NotifAction::ReturnValue(write_buf.len() as i64) +} diff --git a/crates/sandlock-core/src/cow/seccomp.rs b/crates/sandlock-core/src/cow/seccomp.rs index 33c4768..09fd164 100644 --- a/crates/sandlock-core/src/cow/seccomp.rs +++ b/crates/sandlock-core/src/cow/seccomp.rs @@ -10,14 +10,14 @@ use std::path::{Path, PathBuf}; use crate::error::BranchError; -/// O_* flags for detecting writes. -const O_WRONLY: u64 = 0o1; -const O_RDWR: u64 = 0o2; -const O_CREAT: u64 = 0o100; -const O_TRUNC: u64 = 0o1000; -const O_APPEND: u64 = 0o2000; -const O_EXCL: u64 = 0o200; -const O_DIRECTORY: u64 = 0o200000; +/// O_* flags for detecting writes. These differ across Linux architectures. +const O_WRONLY: u64 = libc::O_WRONLY as u64; +const O_RDWR: u64 = libc::O_RDWR as u64; +const O_CREAT: u64 = libc::O_CREAT as u64; +const O_TRUNC: u64 = libc::O_TRUNC as u64; +const O_APPEND: u64 = libc::O_APPEND as u64; +const O_EXCL: u64 = libc::O_EXCL as u64; +const O_DIRECTORY: u64 = libc::O_DIRECTORY as u64; const WRITE_FLAGS: u64 = O_WRONLY | O_RDWR | O_CREAT | O_TRUNC | O_APPEND; /// Plan for a COW copy — returned by `prepare_copy()` to separate metadata diff --git a/crates/sandlock-core/src/fork.rs b/crates/sandlock-core/src/fork.rs index f02def5..14ca53f 100644 --- a/crates/sandlock-core/src/fork.rs +++ b/crates/sandlock-core/src/fork.rs @@ -19,8 +19,6 @@ use std::os::unix::io::RawFd; fn raw_fork() -> std::io::Result { #[cfg(target_arch = "x86_64")] const NR_FORK: i64 = 57; - #[cfg(target_arch = "aarch64")] - const NR_FORK: i64 = -1; // aarch64 has no fork — use clone with minimal flags #[cfg(target_arch = "x86_64")] { diff --git a/crates/sandlock-core/src/lib.rs b/crates/sandlock-core/src/lib.rs index caf5654..dd37de9 100644 --- a/crates/sandlock-core/src/lib.rs +++ b/crates/sandlock-core/src/lib.rs @@ -3,6 +3,7 @@ pub mod policy; pub mod profile; pub mod result; pub mod sandbox; +pub(crate) mod arch; pub(crate) mod sys; pub mod landlock; pub mod seccomp; diff --git a/crates/sandlock-core/src/policy.rs b/crates/sandlock-core/src/policy.rs index 65bcb28..d920b01 100644 --- a/crates/sandlock-core/src/policy.rs +++ b/crates/sandlock-core/src/policy.rs @@ -418,6 +418,15 @@ impl PolicyBuilder { self } + pub fn fs_read_if_exists(self, path: impl Into) -> Self { + let path = path.into(); + if path.exists() { + self.fs_read(path) + } else { + self + } + } + pub fn fs_deny(mut self, path: impl Into) -> Self { self.fs_denied.push(path.into()); self diff --git a/crates/sandlock-core/src/procfs.rs b/crates/sandlock-core/src/procfs.rs index 4a5a522..ffbbffa 100644 --- a/crates/sandlock-core/src/procfs.rs +++ b/crates/sandlock-core/src/procfs.rs @@ -11,7 +11,7 @@ use std::sync::Arc; use tokio::sync::Mutex; -use crate::seccomp::notif::{read_child_mem, write_child_mem, NotifAction, NotifPolicy}; +use crate::seccomp::notif::{read_child_cstr, write_child_mem, NotifAction, NotifPolicy}; use crate::seccomp::state::{NetworkState, ProcfsState}; use crate::sys::structs::{SeccompNotif, EACCES}; use crate::sys::syscall; @@ -346,13 +346,7 @@ fn inject_memfd(content: &[u8]) -> NotifAction { /// Read a NUL-terminated path string from child memory. fn read_path(notif: &SeccompNotif, addr: u64, notif_fd: RawFd) -> Option { - if addr == 0 { - return None; - } - // Read up to 256 bytes — enough for any /proc path we care about. - let bytes = read_child_mem(notif_fd, notif.id, notif.pid, addr, 256).ok()?; - let nul_pos = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len()); - String::from_utf8(bytes[..nul_pos].to_vec()).ok() + read_child_cstr(notif_fd, notif.id, notif.pid, addr, 4096) } // ============================================================ @@ -598,24 +592,40 @@ pub(crate) async fn handle_sorted_getdents( let buf_addr = notif.data.args[1]; let buf_size = (notif.data.args[2] & 0xFFFF_FFFF) as usize; - let cache_key = (pid as i32, child_fd); + let link_path = format!("/proc/{}/fd/{}", pid, child_fd); + let dir_path = match std::fs::read_link(&link_path) { + Ok(t) => t, + Err(_) => return NotifAction::Continue, + }; + let cache_key = ( + pid as i32, + child_fd, + dir_path.to_string_lossy().into_owned(), + ); let mut pfs = procfs.lock().await; - // Build and cache sorted entries on first call for this (pid, fd) pair. - // An empty Vec means "already fully consumed" — return 0 (EOF). + // Build and cache sorted entries on first call for this open directory. + // Remove an empty cache on EOF so later fd reuse can rebuild entries. if !pfs.getdents_cache.contains_key(&cache_key) { - let link_path = format!("/proc/{}/fd/{}", pid, child_fd); - let dir_path = match std::fs::read_link(&link_path) { - Ok(t) => t, - Err(_) => return NotifAction::Continue, - }; - let dir = match std::fs::read_dir(&dir_path) { Ok(d) => d, Err(_) => return NotifAction::Continue, }; - let mut names: Vec<_> = dir + let mut names: Vec<_> = Vec::new(); + { + use std::os::unix::fs::MetadataExt; + let dot_ino = std::fs::symlink_metadata(&dir_path).map(|m| m.ino()).unwrap_or(0); + let dotdot_ino = dir_path + .parent() + .and_then(|p| std::fs::symlink_metadata(p).ok()) + .map(|m| m.ino()) + .unwrap_or(dot_ino); + names.push((".".to_string(), DT_DIR, dot_ino)); + names.push(("..".to_string(), DT_DIR, dotdot_ino)); + } + + names.extend(dir .filter_map(|e| e.ok()) .map(|e| { let name = e.file_name().to_string_lossy().into_owned(); @@ -629,8 +639,7 @@ pub(crate) async fn handle_sorted_getdents( e.metadata().map(|m| m.st_ino()).unwrap_or(0) }; (name, d_type, d_ino) - }) - .collect(); + })); names.sort_by(|a, b| a.0.cmp(&b.0)); @@ -642,7 +651,7 @@ pub(crate) async fn handle_sorted_getdents( }) .collect(); - pfs.getdents_cache.insert(cache_key, entries); + pfs.getdents_cache.insert(cache_key.clone(), entries); } let entries = match pfs.getdents_cache.get_mut(&cache_key) { @@ -652,6 +661,7 @@ pub(crate) async fn handle_sorted_getdents( // Empty cache = already fully drained on a prior call → return 0 (EOF). if entries.is_empty() { + pfs.getdents_cache.remove(&cache_key); return NotifAction::ReturnValue(0); } @@ -776,13 +786,13 @@ pub(crate) async fn handle_getdents( return NotifAction::Continue; } - let cache_key = (pid as i32, child_fd); + let cache_key = (pid as i32, child_fd, target.to_string_lossy().into_owned()); let mut pfs = procfs.lock().await; // Build and cache entries on first call for this (pid, fd) pair. if !pfs.getdents_cache.contains_key(&cache_key) { let entries = build_filtered_dirents(&pfs.proc_pids); - pfs.getdents_cache.insert(cache_key, entries); + pfs.getdents_cache.insert(cache_key.clone(), entries); } let entries = match pfs.getdents_cache.get_mut(&cache_key) { @@ -803,6 +813,7 @@ pub(crate) async fn handle_getdents( // Empty cache = already fully drained on a prior call → return 0 (EOF). if entries.is_empty() { + pfs.getdents_cache.remove(&cache_key); return NotifAction::ReturnValue(0); } diff --git a/crates/sandlock-core/src/random.rs b/crates/sandlock-core/src/random.rs index 4a9f609..e23c794 100644 --- a/crates/sandlock-core/src/random.rs +++ b/crates/sandlock-core/src/random.rs @@ -8,7 +8,7 @@ use std::io::{Seek, SeekFrom, Write}; use std::os::fd::RawFd; use std::os::unix::io::{AsRawFd, FromRawFd}; -use crate::seccomp::notif::{read_child_mem, write_child_mem, NotifAction}; +use crate::seccomp::notif::{read_child_cstr, write_child_mem, NotifAction}; use crate::sys::structs::SeccompNotif; use crate::sys::syscall; @@ -56,12 +56,9 @@ pub(crate) fn handle_random_open( return None; } - // Read the path from child memory. - let bytes = read_child_mem(notif_fd, notif.id, notif.pid, path_ptr, 256).ok()?; - let nul_pos = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len()); - let path = std::str::from_utf8(&bytes[..nul_pos]).ok()?; + let path = read_child_cstr(notif_fd, notif.id, notif.pid, path_ptr, 4096)?; - if path != "/dev/urandom" && path != "/dev/random" { + if path.as_str() != "/dev/urandom" && path.as_str() != "/dev/random" { return None; } diff --git a/crates/sandlock-core/src/resource.rs b/crates/sandlock-core/src/resource.rs index c7937c9..cab2be8 100644 --- a/crates/sandlock-core/src/resource.rs +++ b/crates/sandlock-core/src/resource.rs @@ -30,7 +30,7 @@ pub(crate) async fn handle_fork( let args = ¬if.data.args; // For clone/vfork: check namespace flags in args[0]. - if nr == libc::SYS_clone || nr == libc::SYS_vfork { + if nr == libc::SYS_clone || Some(nr) == crate::arch::SYS_VFORK { if nr == libc::SYS_clone && (args[0] & CLONE_NS_FLAGS) != 0 { return NotifAction::Errno(EPERM); } diff --git a/crates/sandlock-core/src/seccomp/bpf.rs b/crates/sandlock-core/src/seccomp/bpf.rs index 48b7790..e7d8e9c 100644 --- a/crates/sandlock-core/src/seccomp/bpf.rs +++ b/crates/sandlock-core/src/seccomp/bpf.rs @@ -14,7 +14,6 @@ use std::os::unix::io::{FromRawFd, OwnedFd}; use crate::sys::structs::{ - AUDIT_ARCH_X86_64, BPF_ABS, BPF_JEQ, BPF_JMP, BPF_K, BPF_LD, BPF_RET, BPF_W, EPERM, OFFSET_ARCH, OFFSET_NR, @@ -71,7 +70,7 @@ pub fn assemble_filter( // ---- 1. Arch check block ---- prog.push(stmt(BPF_LD | BPF_W | BPF_ABS, OFFSET_ARCH)); let arch_jf = (ret_kill_idx - 2) as u8; - prog.push(jump(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 0, arch_jf)); + prog.push(jump(BPF_JMP | BPF_JEQ | BPF_K, crate::arch::AUDIT_ARCH, 0, arch_jf)); // ---- 2. Pre-built arg filter block ---- prog.extend_from_slice(arg_block); @@ -187,7 +186,7 @@ mod tests { // prog[1] is the JEQ arch check; jf should reach the KILL return. let arch_jeq = &prog[1]; assert_eq!(arch_jeq.code, BPF_JMP | BPF_JEQ | BPF_K); - assert_eq!(arch_jeq.k, AUDIT_ARCH_X86_64); + assert_eq!(arch_jeq.k, crate::arch::AUDIT_ARCH); // The instruction following prog[1] is prog[2]. // KILL is the last instruction. let kill_idx = prog.len() - 1; diff --git a/crates/sandlock-core/src/seccomp/dispatch.rs b/crates/sandlock-core/src/seccomp/dispatch.rs index 558492f..8e1479f 100644 --- a/crates/sandlock-core/src/seccomp/dispatch.rs +++ b/crates/sandlock-core/src/seccomp/dispatch.rs @@ -13,6 +13,7 @@ use std::sync::Arc; use super::ctx::SupervisorCtx; use super::notif::{NotifAction, NotifPolicy}; use super::state::ResourceState; +use crate::arch; use crate::sys::structs::SeccompNotif; use tokio::sync::Mutex; @@ -95,7 +96,11 @@ pub fn build_dispatch_table( // ------------------------------------------------------------------ // Fork/clone family (always on) // ------------------------------------------------------------------ - for &nr in &[libc::SYS_clone, libc::SYS_clone3, libc::SYS_vfork] { + let mut fork_nrs = vec![libc::SYS_clone, libc::SYS_clone3]; + if let Some(vfork) = arch::SYS_VFORK { + fork_nrs.push(vfork); + } + for nr in fork_nrs { let policy = Arc::clone(policy); let resource = Arc::clone(resource); table.register(nr, Box::new(move |notif, ctx, _notif_fd| { @@ -235,7 +240,11 @@ pub fn build_dispatch_table( }) })); } - for &nr in &[libc::SYS_getdents64, libc::SYS_getdents as i64] { + let mut getdents_nrs = vec![libc::SYS_getdents64]; + if let Some(getdents) = arch::SYS_GETDENTS { + getdents_nrs.push(getdents); + } + for nr in getdents_nrs { let policy = Arc::clone(policy); table.register(nr, Box::new(move |notif, ctx, notif_fd| { let policy = Arc::clone(&policy); @@ -302,7 +311,11 @@ pub fn build_dispatch_table( // Deterministic directory listing // ------------------------------------------------------------------ if policy.deterministic_dirs { - for &nr in &[libc::SYS_getdents64, libc::SYS_getdents as i64] { + let mut getdents_nrs = vec![libc::SYS_getdents64]; + if let Some(getdents) = arch::SYS_GETDENTS { + getdents_nrs.push(getdents); + } + for nr in getdents_nrs { table.register(nr, Box::new(|notif, ctx, notif_fd| { let procfs_inner = Arc::clone(&ctx.procfs); Box::pin(async move { @@ -444,8 +457,10 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc crate::chroot::dispatch::handle_chroot_open)); // open (legacy) — fallthrough if Continue - table.register(libc::SYS_open as i64, chroot_handler_fallthrough!(policy, - crate::chroot::dispatch::handle_chroot_legacy_open)); + if let Some(open) = arch::SYS_OPEN { + table.register(open, chroot_handler_fallthrough!(policy, + crate::chroot::dispatch::handle_chroot_legacy_open)); + } // execve, execveat — unconditional return for &nr in &[libc::SYS_execve, libc::SYS_execveat] { @@ -464,25 +479,39 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc } // Legacy write syscalls - table.register(libc::SYS_unlink as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_unlink)); - table.register(libc::SYS_rmdir as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_rmdir)); - table.register(libc::SYS_mkdir as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_mkdir)); - table.register(libc::SYS_rename as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_rename)); - table.register(libc::SYS_symlink as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_symlink)); - table.register(libc::SYS_link as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_link)); - table.register(libc::SYS_chmod as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_chmod)); + if let Some(nr) = arch::SYS_UNLINK { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_unlink)); + } + if let Some(nr) = arch::SYS_RMDIR { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_rmdir)); + } + if let Some(nr) = arch::SYS_MKDIR { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_mkdir)); + } + if let Some(nr) = arch::SYS_RENAME { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_rename)); + } + if let Some(nr) = arch::SYS_SYMLINK { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_symlink)); + } + if let Some(nr) = arch::SYS_LINK { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_link)); + } + if let Some(nr) = arch::SYS_CHMOD { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_chmod)); + } // chown — non-follow - { + if let Some(chown) = arch::SYS_CHOWN { let policy = Arc::clone(policy); - table.register(libc::SYS_chown as i64, Box::new(move |notif, ctx, notif_fd| { + table.register(chown, Box::new(move |notif, ctx, notif_fd| { let policy = Arc::clone(&policy); Box::pin(async move { let chroot_ctx = ChrootCtx { @@ -498,9 +527,9 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc } // lchown — follow - { + if let Some(lchown) = arch::SYS_LCHOWN { let policy = Arc::clone(policy); - table.register(libc::SYS_lchown as i64, Box::new(move |notif, ctx, notif_fd| { + table.register(lchown, Box::new(move |notif, ctx, notif_fd| { let policy = Arc::clone(&policy); Box::pin(async move { let chroot_ctx = ChrootCtx { @@ -526,12 +555,18 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc } // Legacy stat - table.register(libc::SYS_stat as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_stat)); - table.register(libc::SYS_lstat as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_lstat)); - table.register(libc::SYS_access as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_access)); + if let Some(nr) = arch::SYS_STAT { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_stat)); + } + if let Some(nr) = arch::SYS_LSTAT { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_lstat)); + } + if let Some(nr) = arch::SYS_ACCESS { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_access)); + } // statx table.register(libc::SYS_statx, chroot_handler!(policy, @@ -540,11 +575,17 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc // readlink table.register(libc::SYS_readlinkat, chroot_handler!(policy, crate::chroot::dispatch::handle_chroot_readlink)); - table.register(libc::SYS_readlink as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_readlink)); + if let Some(nr) = arch::SYS_READLINK { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_readlink)); + } // getdents - for &nr in &[libc::SYS_getdents64, libc::SYS_getdents as i64] { + let mut getdents_nrs = vec![libc::SYS_getdents64]; + if let Some(getdents) = arch::SYS_GETDENTS { + getdents_nrs.push(getdents); + } + for nr in getdents_nrs { table.register(nr, chroot_handler!(policy, crate::chroot::dispatch::handle_chroot_getdents)); } @@ -566,16 +607,17 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc fn register_cow_handlers(table: &mut DispatchTable) { // Write syscalls (*at variants + legacy) - for &nr in &[ + let mut write_nrs = vec![ libc::SYS_unlinkat, libc::SYS_mkdirat, libc::SYS_renameat2, libc::SYS_symlinkat, libc::SYS_linkat, libc::SYS_fchmodat, libc::SYS_fchownat, libc::SYS_truncate, - libc::SYS_unlink as i64, libc::SYS_rmdir as i64, - libc::SYS_mkdir as i64, libc::SYS_rename as i64, - libc::SYS_symlink as i64, libc::SYS_link as i64, - libc::SYS_chmod as i64, libc::SYS_chown as i64, - libc::SYS_lchown as i64, - ] { + ]; + write_nrs.extend([ + arch::SYS_UNLINK, arch::SYS_RMDIR, arch::SYS_MKDIR, arch::SYS_RENAME, + arch::SYS_SYMLINK, arch::SYS_LINK, arch::SYS_CHMOD, arch::SYS_CHOWN, + arch::SYS_LCHOWN, + ].into_iter().flatten()); + for nr in write_nrs { table.register(nr, Box::new(|notif, ctx, notif_fd| { let cow = Arc::clone(&ctx.cow); Box::pin(async move { @@ -593,11 +635,12 @@ fn register_cow_handlers(table: &mut DispatchTable) { })); // faccessat/access — fallthrough - for &nr in &[ + let mut access_nrs = vec![ libc::SYS_faccessat, crate::cow::dispatch::SYS_FACCESSAT2, - libc::SYS_access as i64, - ] { + ]; + access_nrs.extend(arch::SYS_ACCESS); + for nr in access_nrs { table.register(nr, Box::new(|notif, ctx, notif_fd| { let cow = Arc::clone(&ctx.cow); Box::pin(async move { @@ -607,7 +650,9 @@ fn register_cow_handlers(table: &mut DispatchTable) { } // openat/open — fallthrough - for &nr in &[libc::SYS_openat, libc::SYS_open as i64] { + let mut open_nrs = vec![libc::SYS_openat]; + open_nrs.extend(arch::SYS_OPEN); + for nr in open_nrs { table.register(nr, Box::new(|notif, ctx, notif_fd| { let cow = Arc::clone(&ctx.cow); Box::pin(async move { @@ -617,11 +662,11 @@ fn register_cow_handlers(table: &mut DispatchTable) { } // stat family — fallthrough - for &nr in &[ + let mut stat_nrs = vec![ libc::SYS_newfstatat, libc::SYS_faccessat, - libc::SYS_stat as i64, libc::SYS_lstat as i64, - libc::SYS_access as i64, - ] { + ]; + stat_nrs.extend([arch::SYS_STAT, arch::SYS_LSTAT, arch::SYS_ACCESS].into_iter().flatten()); + for nr in stat_nrs { table.register(nr, Box::new(|notif, ctx, notif_fd| { let cow = Arc::clone(&ctx.cow); Box::pin(async move { @@ -639,7 +684,9 @@ fn register_cow_handlers(table: &mut DispatchTable) { })); // readlink — fallthrough - for &nr in &[libc::SYS_readlinkat, libc::SYS_readlink as i64] { + let mut readlink_nrs = vec![libc::SYS_readlinkat]; + readlink_nrs.extend(arch::SYS_READLINK); + for nr in readlink_nrs { table.register(nr, Box::new(|notif, ctx, notif_fd| { let cow = Arc::clone(&ctx.cow); Box::pin(async move { @@ -649,7 +696,9 @@ fn register_cow_handlers(table: &mut DispatchTable) { } // getdents — fallthrough - for &nr in &[libc::SYS_getdents64, libc::SYS_getdents as i64] { + let mut getdents_nrs = vec![libc::SYS_getdents64]; + getdents_nrs.extend(arch::SYS_GETDENTS); + for nr in getdents_nrs { table.register(nr, Box::new(|notif, ctx, notif_fd| { let cow = Arc::clone(&ctx.cow); Box::pin(async move { @@ -665,4 +714,12 @@ fn register_cow_handlers(table: &mut DispatchTable) { crate::cow::dispatch::handle_cow_chdir(¬if, &cow, notif_fd).await }) })); + + // getcwd — return logical workdir path after chdir into a COW-only dir + table.register(libc::SYS_getcwd, Box::new(|notif, ctx, notif_fd| { + let cow = Arc::clone(&ctx.cow); + Box::pin(async move { + crate::cow::dispatch::handle_cow_getcwd(¬if, &cow, notif_fd).await + }) + })); } diff --git a/crates/sandlock-core/src/seccomp/notif.rs b/crates/sandlock-core/src/seccomp/notif.rs index 1d47a21..03a1981 100644 --- a/crates/sandlock-core/src/seccomp/notif.rs +++ b/crates/sandlock-core/src/seccomp/notif.rs @@ -9,6 +9,7 @@ use std::os::unix::io::{AsRawFd, FromRawFd, OwnedFd, RawFd}; use std::sync::Arc; use crate::error::NotifError; +use crate::arch; use crate::sys::structs::{ SeccompNotif, SeccompNotifAddfd, SeccompNotifResp, SECCOMP_ADDFD_FLAG_SEND, SECCOMP_IOCTL_NOTIF_ADDFD, SECCOMP_IOCTL_NOTIF_ID_VALID, SECCOMP_IOCTL_NOTIF_RECV, @@ -387,6 +388,38 @@ pub(crate) fn read_child_mem( Ok(result) } +/// Read a NUL-terminated string from child memory without crossing unmapped +/// page boundaries in a single `process_vm_readv` call. +pub(crate) fn read_child_cstr( + notif_fd: RawFd, + id: u64, + pid: u32, + addr: u64, + max_len: usize, +) -> Option { + if addr == 0 || max_len == 0 { + return None; + } + + const PAGE_SIZE: u64 = 4096; + let mut result = Vec::with_capacity(max_len.min(256)); + let mut cur = addr; + while result.len() < max_len { + let page_remaining = PAGE_SIZE - (cur % PAGE_SIZE); + let remaining = max_len - result.len(); + let to_read = page_remaining.min(remaining as u64) as usize; + let bytes = read_child_mem(notif_fd, id, pid, cur, to_read).ok()?; + if let Some(nul) = bytes.iter().position(|&b| b == 0) { + result.extend_from_slice(&bytes[..nul]); + return String::from_utf8(result).ok(); + } + result.extend_from_slice(&bytes); + cur += to_read as u64; + } + + String::from_utf8(result).ok() +} + /// Write bytes to a child process via process_vm_writev. /// /// Performs TOCTOU validation by calling `id_valid` before and after @@ -480,7 +513,7 @@ fn syscall_name(nr: i64) -> &'static str { n if n == libc::SYS_bind => "bind", n if n == libc::SYS_clone => "clone", n if n == libc::SYS_clone3 => "clone3", - n if n == libc::SYS_vfork => "vfork", + n if Some(n) == arch::SYS_VFORK => "vfork", n if n == libc::SYS_execve => "execve", n if n == libc::SYS_execveat => "execveat", n if n == libc::SYS_mmap => "mmap", @@ -504,12 +537,12 @@ fn syscall_category(nr: i64) -> crate::policy_fn::SyscallCategory { || n == libc::SYS_truncate || n == libc::SYS_readlinkat || n == libc::SYS_newfstatat || n == libc::SYS_statx || n == libc::SYS_faccessat || n == libc::SYS_getdents64 - || n == libc::SYS_getdents => SyscallCategory::File, + || Some(n) == arch::SYS_GETDENTS => SyscallCategory::File, n if n == libc::SYS_connect || n == libc::SYS_sendto || n == libc::SYS_sendmsg || n == libc::SYS_bind || n == libc::SYS_getsockname => SyscallCategory::Network, n if n == libc::SYS_clone || n == libc::SYS_clone3 - || n == libc::SYS_vfork || n == libc::SYS_execve + || Some(n) == arch::SYS_VFORK || n == libc::SYS_execve || n == libc::SYS_execveat => SyscallCategory::Process, n if n == libc::SYS_mmap || n == libc::SYS_munmap || n == libc::SYS_brk || n == libc::SYS_mremap @@ -590,7 +623,7 @@ fn resolve_path_for_notif(notif: &SeccompNotif, notif_fd: RawFd) -> Option { + n if Some(n) == arch::SYS_OPEN || n == libc::SYS_execve => { let path = read_path_for_event(notif, notif.data.args[0], notif_fd)?; resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &path) } @@ -618,17 +651,17 @@ fn resolve_path_for_notif(notif: &SeccompNotif, notif_fd: RawFd) -> Option { + n if Some(n) == arch::SYS_LINK => { let path = read_path_for_event(notif, notif.data.args[0], notif_fd)?; resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &path) } // rename(oldpath, newpath) — legacy, AT_FDCWD implied for both - n if n == libc::SYS_rename => { + n if Some(n) == arch::SYS_RENAME => { let path = read_path_for_event(notif, notif.data.args[0], notif_fd)?; resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &path) } // symlink(target, linkpath) — legacy - n if n == libc::SYS_symlink => { + n if Some(n) == arch::SYS_SYMLINK => { let target = read_path_for_event(notif, notif.data.args[0], notif_fd)?; resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &target) } @@ -655,12 +688,12 @@ fn resolve_second_path_for_notif(notif: &SeccompNotif, notif_fd: RawFd) -> Optio resolve_at_path_for_event(notif, notif.data.args[2] as i64, &path) } // rename(oldpath, newpath) — legacy - n if n == libc::SYS_rename => { + n if Some(n) == arch::SYS_RENAME => { let path = read_path_for_event(notif, notif.data.args[1], notif_fd)?; resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &path) } // link(oldpath, newpath) — legacy - n if n == libc::SYS_link => { + n if Some(n) == arch::SYS_LINK => { let path = read_path_for_event(notif, notif.data.args[1], notif_fd)?; resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &path) } @@ -747,7 +780,7 @@ async fn emit_policy_event( let mut size = None; let mut argv = None; - if nr == libc::SYS_openat || nr == libc::SYS_execve || nr == libc::SYS_execveat { + if nr == libc::SYS_openat || Some(nr) == arch::SYS_OPEN || nr == libc::SYS_execve || nr == libc::SYS_execveat { // openat(dirfd, pathname, ...): args[1] = path ptr // execve(pathname, argv, envp): args[0] = path ptr, args[1] = argv ptr let path_ptr = if nr == libc::SYS_openat { @@ -838,13 +871,15 @@ async fn handle_notification( // Check dynamic path denials before dispatch let mut action = { let nr = notif.data.nr as i64; + let mut path_check_nrs = vec![ + libc::SYS_openat, libc::SYS_execve, libc::SYS_execveat, + libc::SYS_linkat, libc::SYS_renameat2, libc::SYS_symlinkat, + ]; + path_check_nrs.extend([ + arch::SYS_OPEN, arch::SYS_LINK, arch::SYS_RENAME, arch::SYS_SYMLINK, + ].into_iter().flatten()); let should_precheck_denied = policy.chroot_root.is_none() - && [ - libc::SYS_openat, libc::SYS_open, libc::SYS_execve, libc::SYS_execveat, - libc::SYS_linkat, libc::SYS_link, - libc::SYS_renameat2, libc::SYS_rename, - libc::SYS_symlinkat, libc::SYS_symlink, - ].contains(&nr); + && path_check_nrs.contains(&nr); if should_precheck_denied { let pfs = ctx.policy_fn.lock().await; if is_path_denied_for_notif(&pfs, ¬if, fd) { diff --git a/crates/sandlock-core/src/seccomp/state.rs b/crates/sandlock-core/src/seccomp/state.rs index ed46c74..b9058e2 100644 --- a/crates/sandlock-core/src/seccomp/state.rs +++ b/crates/sandlock-core/src/seccomp/state.rs @@ -50,9 +50,9 @@ impl ResourceState { pub struct ProcfsState { /// PIDs belonging to the sandbox (for /proc PID filtering). pub proc_pids: HashSet, - /// Cache of filtered dirent entries keyed by (pid, fd). + /// Cache of filtered dirent entries keyed by (pid, fd, directory target). /// Populated on first getdents64 call for a /proc directory, drained on subsequent calls. - pub getdents_cache: HashMap<(i32, u32), Vec>>, + pub getdents_cache: HashMap<(i32, u32, String), Vec>>, /// Base address of the last vDSO we patched (0 = not yet patched). pub vdso_patched_addr: u64, } @@ -71,13 +71,24 @@ impl ProcfsState { // CowState — copy-on-write filesystem state // ============================================================ +/// Stable process identity for per-process COW state. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct PidKey { + /// Numeric PID observed by seccomp notification. + pub pid: i32, + /// Process start time from /proc//stat field 22. + pub start_time: u64, +} + /// Copy-on-write filesystem state. pub struct CowState { /// Seccomp-based COW branch (None if COW disabled). pub branch: Option, /// Getdents cache for COW directories. /// Value is (host_path, entries) to detect fd reuse and invalidate stale entries. - pub dir_cache: HashMap<(i32, u32), (String, Vec>)>, + pub dir_cache: HashMap<(PidKey, u32), (String, Vec>)>, + /// Logical cwd for processes that chdir into COW-only directories. + pub virtual_cwds: HashMap, } impl CowState { @@ -85,8 +96,42 @@ impl CowState { Self { branch: None, dir_cache: HashMap::new(), + virtual_cwds: HashMap::new(), } } + + /// Drop COW per-process entries for an older process that used the same numeric PID. + pub(crate) fn prune_reused_pid(&mut self, current: PidKey) { + self.virtual_cwds + .retain(|key, _| key.pid != current.pid || *key == current); + self.dir_cache + .retain(|(key, _), _| key.pid != current.pid || *key == current); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn cow_state_prunes_entries_for_reused_pid() { + let old = PidKey { pid: 42, start_time: 1 }; + let current = PidKey { pid: 42, start_time: 2 }; + let other = PidKey { pid: 43, start_time: 1 }; + let mut state = CowState::new(); + + state.virtual_cwds.insert(old, "/old".to_string()); + state.virtual_cwds.insert(other, "/other".to_string()); + state.dir_cache.insert((old, 7), ("/old".to_string(), Vec::new())); + state.dir_cache.insert((other, 7), ("/other".to_string(), Vec::new())); + + state.prune_reused_pid(current); + + assert!(!state.virtual_cwds.contains_key(&old)); + assert!(!state.dir_cache.contains_key(&(old, 7))); + assert_eq!(state.virtual_cwds.get(&other), Some(&"/other".to_string())); + assert!(state.dir_cache.contains_key(&(other, 7))); + } } // ============================================================ diff --git a/crates/sandlock-core/src/sys/structs.rs b/crates/sandlock-core/src/sys/structs.rs index 4fd89d0..2c85021 100644 --- a/crates/sandlock-core/src/sys/structs.rs +++ b/crates/sandlock-core/src/sys/structs.rs @@ -167,12 +167,6 @@ pub const SECCOMP_IOCTL_NOTIF_ID_VALID: u64 = 0x4008_2102; pub const SECCOMP_IOCTL_NOTIF_ADDFD: u64 = 0xc018_2103; pub const SECCOMP_IOCTL_NOTIF_SET_FLAGS: u64 = 0x4008_2104; -// ============================================================ -// Architecture -// ============================================================ - -pub const AUDIT_ARCH_X86_64: u32 = 0xC000_003E; - // ============================================================ // BPF opcodes // ============================================================ diff --git a/crates/sandlock-core/src/sys/syscall.rs b/crates/sandlock-core/src/sys/syscall.rs index e6bf46f..7954868 100644 --- a/crates/sandlock-core/src/sys/syscall.rs +++ b/crates/sandlock-core/src/sys/syscall.rs @@ -8,15 +8,16 @@ use super::structs::{ }; // ============================================================ -// Core raw syscall wrappers (x86_64 ABI) +// Core raw syscall wrappers // ============================================================ -/// Raw 3-argument syscall using x86_64 ABI. +/// Raw 3-argument syscall. /// /// # Safety /// Caller must ensure arguments are valid for the given syscall number. pub unsafe fn syscall3(nr: i64, a1: u64, a2: u64, a3: u64) -> io::Result { let ret: i64; + #[cfg(target_arch = "x86_64")] std::arch::asm!( "syscall", inlateout("rax") nr => ret, @@ -27,6 +28,50 @@ pub unsafe fn syscall3(nr: i64, a1: u64, a2: u64, a3: u64) -> io::Result { lateout("r11") _, options(nostack), ); + #[cfg(target_arch = "aarch64")] + std::arch::asm!( + "svc #0", + inlateout("x8") nr => _, + inlateout("x0") a1 as i64 => ret, + in("x1") a2, + in("x2") a3, + options(nostack), + ); + if ret < 0 && ret >= -4095 { + Err(io::Error::from_raw_os_error(-ret as i32)) + } else { + Ok(ret) + } +} + +/// Raw 4-argument syscall. +/// +/// # Safety +/// Caller must ensure arguments are valid for the given syscall number. +pub unsafe fn syscall4(nr: i64, a1: u64, a2: u64, a3: u64, a4: u64) -> io::Result { + let ret: i64; + #[cfg(target_arch = "x86_64")] + std::arch::asm!( + "syscall", + inlateout("rax") nr => ret, + in("rdi") a1, + in("rsi") a2, + in("rdx") a3, + in("r10") a4, + lateout("rcx") _, + lateout("r11") _, + options(nostack), + ); + #[cfg(target_arch = "aarch64")] + std::arch::asm!( + "svc #0", + inlateout("x8") nr => _, + inlateout("x0") a1 as i64 => ret, + in("x1") a2, + in("x2") a3, + in("x3") a4, + options(nostack), + ); if ret < 0 && ret >= -4095 { Err(io::Error::from_raw_os_error(-ret as i32)) } else { @@ -73,14 +118,13 @@ pub fn landlock_add_rule( ) -> io::Result<()> { use std::os::unix::io::AsRawFd; unsafe { - syscall3( + syscall4( SYS_LANDLOCK_ADD_RULE, ruleset_fd.as_raw_fd() as u64, rule_type as u64, rule_attr as u64, + flags as u64, )?; - // flags is in arg4; re-issue as 4-arg syscall via inline asm - let _ = flags; // flags documented as must be 0 in current kernel ABI } Ok(()) } @@ -104,8 +148,7 @@ pub fn landlock_restrict_self(ruleset_fd: &OwnedFd, flags: u32) -> io::Result<() /// Raw seccomp(2) syscall (syscall 317 on x86_64). pub fn seccomp(operation: u32, flags: u64, args: *const std::ffi::c_void) -> io::Result { - const SYS_SECCOMP: i64 = 317; - unsafe { syscall3(SYS_SECCOMP, operation as u64, flags, args as u64) } + unsafe { syscall3(crate::arch::SYS_SECCOMP, operation as u64, flags, args as u64) } } // ============================================================ @@ -114,18 +157,16 @@ pub fn seccomp(operation: u32, flags: u64, args: *const std::ffi::c_void) -> io: /// Open a pidfd for a process (syscall 434). pub fn pidfd_open(pid: u32, flags: u32) -> io::Result { - const SYS_PIDFD_OPEN: i64 = 434; - let fd = unsafe { syscall2(SYS_PIDFD_OPEN, pid as u64, flags as u64)? }; + let fd = unsafe { syscall2(crate::arch::SYS_PIDFD_OPEN, pid as u64, flags as u64)? }; Ok(unsafe { OwnedFd::from_raw_fd(fd as i32) }) } /// Duplicate a file descriptor from another process via pidfd (syscall 438). pub fn pidfd_getfd(pidfd: &OwnedFd, targetfd: i32, flags: u32) -> io::Result { use std::os::unix::io::AsRawFd; - const SYS_PIDFD_GETFD: i64 = 438; let fd = unsafe { syscall3( - SYS_PIDFD_GETFD, + crate::arch::SYS_PIDFD_GETFD, pidfd.as_raw_fd() as u64, targetfd as u64, flags as u64, @@ -138,10 +179,15 @@ pub fn pidfd_getfd(pidfd: &OwnedFd, targetfd: i32, flags: u32) -> io::Result io::Result { - const SYS_MEMFD_CREATE: i64 = 319; let cname = CString::new(name).map_err(|_| io::Error::from_raw_os_error(libc::EINVAL))?; - let fd = unsafe { syscall2(SYS_MEMFD_CREATE, cname.as_ptr() as u64, flags as u64)? }; + let fd = unsafe { + syscall2( + crate::arch::SYS_MEMFD_CREATE, + cname.as_ptr() as u64, + flags as u64, + )? + }; Ok(unsafe { OwnedFd::from_raw_fd(fd as i32) }) } diff --git a/crates/sandlock-core/src/vdso.rs b/crates/sandlock-core/src/vdso.rs index 389295c..a9fb709 100644 --- a/crates/sandlock-core/src/vdso.rs +++ b/crates/sandlock-core/src/vdso.rs @@ -59,7 +59,31 @@ fn parse_vdso_symbols(vdso_bytes: &[u8]) -> HashMap { symbols } +#[cfg(target_arch = "aarch64")] +fn push_insn(stub: &mut Vec, insn: u32) { + stub.extend_from_slice(&insn.to_le_bytes()); +} + +#[cfg(target_arch = "aarch64")] +fn movz_x(reg: u32, imm16: u16, shift: u32) -> u32 { + 0xD280_0000 | (((shift / 16) & 0x3) << 21) | ((imm16 as u32) << 5) | reg +} + +#[cfg(target_arch = "aarch64")] +fn movk_x(reg: u32, imm16: u16, shift: u32) -> u32 { + 0xF280_0000 | (((shift / 16) & 0x3) << 21) | ((imm16 as u32) << 5) | reg +} + +#[cfg(target_arch = "aarch64")] +fn load_imm64(stub: &mut Vec, reg: u32, value: u64) { + push_insn(stub, movz_x(reg, (value & 0xffff) as u16, 0)); + push_insn(stub, movk_x(reg, ((value >> 16) & 0xffff) as u16, 16)); + push_insn(stub, movk_x(reg, ((value >> 32) & 0xffff) as u16, 32)); + push_insn(stub, movk_x(reg, ((value >> 48) & 0xffff) as u16, 48)); +} + /// Generate a simple stub that forces a real syscall (replacing the vDSO fast path). +#[cfg(target_arch = "x86_64")] /// Layout: mov eax, imm32 / syscall / ret — 8 bytes total. fn simple_stub(syscall_nr: u32) -> Vec { let mut stub = Vec::new(); @@ -70,9 +94,19 @@ fn simple_stub(syscall_nr: u32) -> Vec { stub // 8 bytes total } +#[cfg(target_arch = "aarch64")] +fn simple_stub(syscall_nr: u32) -> Vec { + let mut stub = Vec::new(); + push_insn(&mut stub, movz_x(8, syscall_nr as u16, 0)); // mov x8, syscall_nr + push_insn(&mut stub, 0xD400_0001); // svc #0 + push_insn(&mut stub, 0xD65F_03C0); // ret + stub +} + /// Generate an offset stub for clock_gettime that forces a real syscall, /// then adds a time offset to the result for CLOCK_REALTIME and CLOCK_REALTIME_COARSE. /// +#[cfg(target_arch = "x86_64")] /// Layout (x86-64): /// push rdi / push rsi /// mov eax, 228 / syscall ; do the real syscall @@ -109,8 +143,28 @@ fn offset_stub_clock_gettime(offset_secs: i64) -> Vec { stub } +#[cfg(target_arch = "aarch64")] +fn offset_stub_clock_gettime(offset_secs: i64) -> Vec { + let mut stub = Vec::new(); + push_insn(&mut stub, 0xAA00_03E9); // mov x9, x0 (clock id) + push_insn(&mut stub, 0xAA01_03EA); // mov x10, x1 (timespec*) + push_insn(&mut stub, movz_x(8, libc::SYS_clock_gettime as u16, 0)); + push_insn(&mut stub, 0xD400_0001); // svc #0 + push_insn(&mut stub, 0x7100_013F); // cmp w9, #0 (CLOCK_REALTIME) + push_insn(&mut stub, 0x5400_0060); // b.eq +3 instructions + push_insn(&mut stub, 0x7100_153F); // cmp w9, #5 (CLOCK_REALTIME_COARSE) + push_insn(&mut stub, 0x5400_0101); // b.ne +8 instructions, to ret + load_imm64(&mut stub, 11, offset_secs as u64); // x11 = offset + push_insn(&mut stub, 0xF940_014C); // ldr x12, [x10] + push_insn(&mut stub, 0x8B0B_018C); // add x12, x12, x11 + push_insn(&mut stub, 0xF900_014C); // str x12, [x10] + push_insn(&mut stub, 0xD65F_03C0); // ret + stub +} + /// Generate an offset stub for gettimeofday that forces a real syscall, /// then adds a time offset to tv_sec. +#[cfg(target_arch = "x86_64")] fn offset_stub_gettimeofday(offset_secs: i64) -> Vec { let mut stub = Vec::new(); stub.extend_from_slice(&[0x57, 0x56]); // push rdi, push rsi @@ -124,6 +178,38 @@ fn offset_stub_gettimeofday(offset_secs: i64) -> Vec { stub } +#[cfg(target_arch = "aarch64")] +fn offset_stub_gettimeofday(offset_secs: i64) -> Vec { + let mut stub = Vec::new(); + push_insn(&mut stub, 0xAA00_03EA); // mov x10, x0 (timeval*) + push_insn(&mut stub, movz_x(8, libc::SYS_gettimeofday as u16, 0)); + push_insn(&mut stub, 0xD400_0001); // svc #0 + push_insn(&mut stub, 0xB400_010A); // cbz x10, +8 instructions, to ret + load_imm64(&mut stub, 11, offset_secs as u64); // x11 = offset + push_insn(&mut stub, 0xF940_014C); // ldr x12, [x10] + push_insn(&mut stub, 0x8B0B_018C); // add x12, x12, x11 + push_insn(&mut stub, 0xF900_014C); // str x12, [x10] + push_insn(&mut stub, 0xD65F_03C0); // ret + stub +} + +#[cfg(target_arch = "x86_64")] +fn vdso_targets() -> Vec<(&'static str, &'static str, u32)> { + vec![ + ("clock_gettime", "__vdso_clock_gettime", libc::SYS_clock_gettime as u32), + ("gettimeofday", "__vdso_gettimeofday", libc::SYS_gettimeofday as u32), + ("time", "__vdso_time", libc::SYS_time as u32), + ] +} + +#[cfg(target_arch = "aarch64")] +fn vdso_targets() -> Vec<(&'static str, &'static str, u32)> { + vec![ + ("clock_gettime", "__kernel_clock_gettime", libc::SYS_clock_gettime as u32), + ("gettimeofday", "__kernel_gettimeofday", libc::SYS_gettimeofday as u32), + ] +} + /// Patch the vDSO of a target process to force real syscalls (interceptable by seccomp). /// If `time_offset_secs` is provided, clock_gettime and gettimeofday stubs will add /// the offset to the returned time. @@ -149,19 +235,13 @@ pub(crate) fn patch( SandlockError::MemoryProtect(format!("failed to open /proc/{}/mem: {}", pid, e)) })?; - let targets = [ - ("clock_gettime", "__vdso_clock_gettime", 228u32), - ("gettimeofday", "__vdso_gettimeofday", 96u32), - ("time", "__vdso_time", 201u32), - ]; - - for (name, alt_name, syscall_nr) in &targets { - if let Some(&offset) = symbols.get(*name).or_else(|| symbols.get(*alt_name)) { + for (name, alt_name, syscall_nr) in vdso_targets() { + if let Some(&offset) = symbols.get(name).or_else(|| symbols.get(alt_name)) { let addr = base + offset; - let stub = match (time_offset_secs, *name) { + let stub = match (time_offset_secs, name) { (Some(off), "clock_gettime") => offset_stub_clock_gettime(off), (Some(off), "gettimeofday") => offset_stub_gettimeofday(off), - _ => simple_stub(*syscall_nr), + _ => simple_stub(syscall_nr), }; mem.seek(SeekFrom::Start(addr)).map_err(|e| { SandlockError::MemoryProtect(format!( @@ -200,7 +280,8 @@ mod tests { // Should find at least clock_gettime assert!( symbols.contains_key("clock_gettime") - || symbols.contains_key("__vdso_clock_gettime"), + || symbols.contains_key("__vdso_clock_gettime") + || symbols.contains_key("__kernel_clock_gettime"), "Expected clock_gettime in vDSO symbols, found: {:?}", symbols.keys().collect::>() ); diff --git a/crates/sandlock-core/tests/integration/test_checkpoint.rs b/crates/sandlock-core/tests/integration/test_checkpoint.rs index fd23158..328a3b0 100644 --- a/crates/sandlock-core/tests/integration/test_checkpoint.rs +++ b/crates/sandlock-core/tests/integration/test_checkpoint.rs @@ -4,7 +4,7 @@ use sandlock_core::{Policy, Sandbox, Checkpoint}; #[tokio::test] async fn test_checkpoint_save_load() { let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .build().unwrap(); @@ -53,7 +53,7 @@ async fn test_checkpoint_save_load() { #[tokio::test] async fn test_checkpoint_memory_maps() { let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .build().unwrap(); @@ -80,7 +80,7 @@ async fn test_checkpoint_memory_maps() { #[tokio::test] async fn test_checkpoint_app_state_roundtrip() { let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .build().unwrap(); @@ -113,7 +113,7 @@ async fn test_checkpoint_app_state_roundtrip() { #[tokio::test] async fn test_checkpoint_no_app_state_file() { let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .build().unwrap(); @@ -140,7 +140,7 @@ async fn test_checkpoint_no_app_state_file() { #[tokio::test] async fn test_checkpoint_process_info() { let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .build().unwrap(); diff --git a/crates/sandlock-core/tests/integration/test_cow.rs b/crates/sandlock-core/tests/integration/test_cow.rs index 886edaa..243f41b 100644 --- a/crates/sandlock-core/tests/integration/test_cow.rs +++ b/crates/sandlock-core/tests/integration/test_cow.rs @@ -17,7 +17,7 @@ async fn test_overlayfs_basic_commands() { fs::write(workdir.join("hello.txt"), "original").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .fs_write(&workdir) .fs_isolation(FsIsolation::OverlayFs) @@ -45,7 +45,7 @@ async fn test_overlayfs_write_isolation() { fs::write(workdir.join("data.txt"), "original").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .fs_write(&workdir) .fs_isolation(FsIsolation::OverlayFs) @@ -79,7 +79,7 @@ async fn test_overlayfs_commit() { fs::write(workdir.join("data.txt"), "original").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .fs_write(&workdir) .fs_isolation(FsIsolation::OverlayFs) @@ -124,7 +124,7 @@ async fn test_seccomp_cow_create_file() { fs::write(workdir.join("existing.txt"), "hello").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .fs_write(&workdir) .workdir(&workdir) // FsIsolation::None is default → seccomp COW @@ -154,7 +154,7 @@ async fn test_seccomp_cow_abort() { fs::write(workdir.join("existing.txt"), "original").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .fs_write(&workdir) .workdir(&workdir) @@ -191,7 +191,7 @@ async fn test_seccomp_cow_relative_path_abort() { fs::write(workdir.join("orig.txt"), "original\n").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc").fs_read("/dev") .fs_write(&workdir) .workdir(&workdir) @@ -226,7 +226,7 @@ async fn test_seccomp_cow_relative_path_commit() { fs::write(workdir.join("orig.txt"), "original\n").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc").fs_read("/dev") .fs_write(&workdir) .workdir(&workdir) @@ -266,7 +266,7 @@ async fn test_seccomp_cow_open_directory() { let out_file = workdir.join("opendir_ok.txt"); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc").fs_read("/dev") .fs_write(&workdir) .workdir(&workdir) @@ -310,7 +310,7 @@ async fn test_seccomp_cow_chdir_to_created_dir() { let out_file = workdir.join("chdir_ok.txt"); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc").fs_read("/dev") .fs_write(&workdir) .workdir(&workdir) @@ -319,9 +319,18 @@ async fn test_seccomp_cow_chdir_to_created_dir() { .build() .unwrap(); - // mkdir creates the dir in COW upper only; cd must see it via interception. + // Create a nested directory through a dirfd so the COW handler must map the + // upper-layer fd target back to the logical workdir before mkdirat. + // Use physical pwd so the assertion covers getcwd virtualization. let script = format!( - "mkdir -p subdir/deep && cd subdir/deep && pwd > {}", + concat!( + "mkdir -p subdir && python3 -c \"", + "import os; ", + "fd = os.open('subdir', os.O_RDONLY | os.O_DIRECTORY); ", + "os.mkdir('deep', dir_fd=fd); ", + "os.close(fd)\" && ", + "cd subdir/deep && pwd -P > {}" + ), out_file.display() ); let result = Sandbox::run(&policy, &["sh", "-c", &script]).await; @@ -341,12 +350,13 @@ async fn test_seccomp_cow_chdir_to_created_dir() { let _ = fs::remove_dir_all(&workdir); } -/// Test that the legacy open() syscall works correctly with COW. +/// Test that the raw open syscall ABI works correctly with COW. /// /// Regression test: handle_cow_open always read args in openat() layout /// (dirfd=args[0], path=args[1], flags=args[2]), but open() uses /// (path=args[0], flags=args[1], mode=args[2]). This caused COW to miss -/// all legacy open() calls, falling through to the kernel. +/// all legacy open() calls on x86_64, falling through to the kernel. ARM64 +/// does not provide SYS_open, so it uses the equivalent raw openat ABI. #[tokio::test] async fn test_seccomp_cow_legacy_open_syscall() { let workdir = temp_dir("seccomp-legacy-open"); @@ -355,7 +365,7 @@ async fn test_seccomp_cow_legacy_open_syscall() { )); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc").fs_read("/dev") .fs_write(&workdir).fs_write("/tmp") .workdir(&workdir) @@ -364,18 +374,21 @@ async fn test_seccomp_cow_legacy_open_syscall() { .build() .unwrap(); - // Use raw SYS_open syscall (not openat) to create a file, then verify - // it's visible during the run but discarded on abort. + // Use raw syscall ABI to create a file, then verify it's visible during + // the run but discarded on abort. x86_64 uses legacy SYS_open; ARM64 uses + // the equivalent openat(AT_FDCWD, ...) ABI. let script = format!(concat!( - "import ctypes, os\n", + "import ctypes, os, platform\n", "libc = ctypes.CDLL('libc.so.6', use_errno=True)\n", - "SYS_open = 2\n", "O_WRONLY = 1; O_CREAT = 64; O_TRUNC = 512\n", "path = b'{wd}/newfile.txt'\n", - "fd = libc.syscall(SYS_open, path, O_WRONLY | O_CREAT | O_TRUNC, 0o644)\n", + "if platform.machine() == 'aarch64':\n", + " fd = libc.syscall(56, -100, path, O_WRONLY | O_CREAT | O_TRUNC, 0o644)\n", + "else:\n", + " fd = libc.syscall(2, path, O_WRONLY | O_CREAT | O_TRUNC, 0o644)\n", "err = ctypes.get_errno()\n", "if fd >= 0:\n", - " os.write(fd, b'created via SYS_open')\n", + " os.write(fd, b'created via raw open')\n", " os.close(fd)\n", " content = open('{wd}/newfile.txt').read()\n", " open('{out}', 'w').write(content)\n", @@ -386,7 +399,7 @@ async fn test_seccomp_cow_legacy_open_syscall() { let result = Sandbox::run(&policy, &["python3", "-c", &script]).await.unwrap(); assert!(result.success(), "exit={:?}, stderr={}", result.code(), result.stderr_str().unwrap_or("")); let content = fs::read_to_string(&out_file).unwrap_or_default(); - assert_eq!(content, "created via SYS_open", "SYS_open should work with COW"); + assert_eq!(content, "created via raw open", "raw open ABI should work with COW"); // After abort, the file should not exist on the real filesystem assert!(!workdir.join("newfile.txt").exists(), "newfile.txt should not exist after abort"); @@ -410,7 +423,7 @@ async fn test_seccomp_cow_excl_after_unlink() { fs::write(workdir.join("target.txt"), "original").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc").fs_read("/dev") .fs_write(&workdir).fs_write("/tmp") .workdir(&workdir) @@ -419,9 +432,9 @@ async fn test_seccomp_cow_excl_after_unlink() { .build() .unwrap(); - // Unlink the file, then recreate it with O_CREAT|O_EXCL via SYS_open + // Unlink the file, then recreate it with O_CREAT|O_EXCL via raw open ABI. let script = format!(concat!( - "import ctypes, os\n", + "import ctypes, os, platform\n", "libc = ctypes.CDLL('libc.so.6', use_errno=True)\n", "path = b'{wd}/target.txt'\n", "ret = libc.unlink(path)\n", @@ -429,7 +442,10 @@ async fn test_seccomp_cow_excl_after_unlink() { " open('{out}', 'w').write(f'UNLINK_FAILED:{{ctypes.get_errno()}}')\n", " raise SystemExit(1)\n", "O_WRONLY = 1; O_CREAT = 64; O_EXCL = 128\n", - "fd = libc.syscall(2, path, O_WRONLY | O_CREAT | O_EXCL, 0o644)\n", + "if platform.machine() == 'aarch64':\n", + " fd = libc.syscall(56, -100, path, O_WRONLY | O_CREAT | O_EXCL, 0o644)\n", + "else:\n", + " fd = libc.syscall(2, path, O_WRONLY | O_CREAT | O_EXCL, 0o644)\n", "err = ctypes.get_errno()\n", "if fd >= 0:\n", " os.write(fd, b'recreated')\n", @@ -458,7 +474,7 @@ async fn test_seccomp_cow_read_existing() { fs::write(workdir.join("data.txt"), "hello world").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .fs_write(&workdir) .workdir(&workdir) diff --git a/crates/sandlock-core/tests/integration/test_determinism.rs b/crates/sandlock-core/tests/integration/test_determinism.rs index 34ea5a1..11d165b 100644 --- a/crates/sandlock-core/tests/integration/test_determinism.rs +++ b/crates/sandlock-core/tests/integration/test_determinism.rs @@ -8,7 +8,7 @@ async fn test_random_seed_deterministic() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -47,7 +47,7 @@ async fn test_random_seed_different_seeds() { let p1 = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/dev") @@ -57,7 +57,7 @@ async fn test_random_seed_different_seeds() { let p2 = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/dev") @@ -93,7 +93,7 @@ async fn test_time_start_frozen() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -114,7 +114,7 @@ async fn test_time_start_basic_commands_work() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .time_start(past) @@ -132,7 +132,7 @@ async fn test_combined_determinism() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -146,13 +146,13 @@ async fn test_combined_determinism() { } /// Test that deterministic_dirs produces sorted directory listings. -/// Run ls twice — output should match and be sorted. +/// Run directory iteration twice — output should match and be sorted. #[tokio::test] async fn test_deterministic_dirs() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -160,11 +160,22 @@ async fn test_deterministic_dirs() { .build() .unwrap(); - // Use ls -f -1 to preserve raw getdents order (no re-sorting by ls). - let r1 = Sandbox::run(&policy, &["ls", "-f", "-1", "/etc"]).await.unwrap(); - let r2 = Sandbox::run(&policy, &["ls", "-f", "-1", "/etc"]).await.unwrap(); - assert!(r1.success(), "First ls failed"); - assert!(r2.success(), "Second ls failed"); + // Read directory entries without userland sorting so the assertion covers + // the sandbox's getdents virtualization. Some minimal ls implementations + // do not support `-f`, so avoid depending on ls option support here. + let scan = "python3 - <<'PY'\nimport os\nprint('\\n'.join(e.name for e in os.scandir('/etc')))\nPY"; + let r1 = Sandbox::run(&policy, &["sh", "-c", scan]).await.unwrap(); + let r2 = Sandbox::run(&policy, &["sh", "-c", scan]).await.unwrap(); + assert!( + r1.success(), + "First directory scan failed: {}", + String::from_utf8_lossy(r1.stderr.as_deref().unwrap_or_default()) + ); + assert!( + r2.success(), + "Second directory scan failed: {}", + String::from_utf8_lossy(r2.stderr.as_deref().unwrap_or_default()) + ); let out1 = String::from_utf8_lossy(r1.stdout.as_deref().unwrap_or_default()); let out2 = String::from_utf8_lossy(r2.stdout.as_deref().unwrap_or_default()); @@ -174,10 +185,11 @@ async fn test_deterministic_dirs() { ); assert_eq!( out1, out2, - "Two ls -f runs should produce identical output with deterministic_dirs" + "Two directory scans should produce identical output with deterministic_dirs" ); - // Verify the output is actually sorted (skip . and .. entries from ls -f). + // Verify the output is actually sorted (skip dot entries when the runtime + // exposes them). let lines: Vec<&str> = out1.lines() .filter(|l| *l != "." && *l != "..") .collect(); @@ -192,7 +204,7 @@ async fn test_hostname_virtualization() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .hostname("mybox") diff --git a/crates/sandlock-core/tests/integration/test_dry_run.rs b/crates/sandlock-core/tests/integration/test_dry_run.rs index be9bb41..71d3cd1 100644 --- a/crates/sandlock-core/tests/integration/test_dry_run.rs +++ b/crates/sandlock-core/tests/integration/test_dry_run.rs @@ -15,7 +15,7 @@ async fn test_dry_run_reports_added_file() { fs::write(workdir.join("existing.txt"), "hello").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc").fs_read("/dev") .fs_write(&workdir) .workdir(&workdir) @@ -46,7 +46,7 @@ async fn test_dry_run_reports_modified_file() { fs::write(workdir.join("data.txt"), "original").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc").fs_read("/dev") .fs_write(&workdir) .workdir(&workdir) @@ -77,7 +77,7 @@ async fn test_dry_run_reports_deleted_file() { fs::write(workdir.join("victim.txt"), "delete me").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc").fs_read("/dev") .fs_write(&workdir) .workdir(&workdir) diff --git a/crates/sandlock-core/tests/integration/test_fork.rs b/crates/sandlock-core/tests/integration/test_fork.rs index 8514a7a..33e046f 100644 --- a/crates/sandlock-core/tests/integration/test_fork.rs +++ b/crates/sandlock-core/tests/integration/test_fork.rs @@ -3,7 +3,7 @@ use std::sync::atomic::{AtomicU32, Ordering}; fn base_policy() -> Policy { Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_write("/tmp") .build() diff --git a/crates/sandlock-core/tests/integration/test_http_acl.rs b/crates/sandlock-core/tests/integration/test_http_acl.rs index adea897..4542f66 100644 --- a/crates/sandlock-core/tests/integration/test_http_acl.rs +++ b/crates/sandlock-core/tests/integration/test_http_acl.rs @@ -16,7 +16,7 @@ fn base_policy() -> sandlock_core::PolicyBuilder { Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") diff --git a/crates/sandlock-core/tests/integration/test_landlock.rs b/crates/sandlock-core/tests/integration/test_landlock.rs index 2478490..79bc318 100644 --- a/crates/sandlock-core/tests/integration/test_landlock.rs +++ b/crates/sandlock-core/tests/integration/test_landlock.rs @@ -21,7 +21,7 @@ async fn test_can_read_allowed_path() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -49,7 +49,7 @@ async fn test_cannot_read_outside_allowed() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/proc") .fs_read("/dev") @@ -71,7 +71,7 @@ async fn test_can_write_to_writable_path() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -102,7 +102,7 @@ async fn test_cannot_write_to_readonly_path() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -132,7 +132,7 @@ async fn test_denied_path_blocks_read() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -156,7 +156,7 @@ async fn test_denied_path_blocks_exec() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -233,7 +233,7 @@ async fn test_isolate_ipc() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -287,7 +287,7 @@ async fn test_isolate_signals_blocks_parent() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -338,7 +338,7 @@ async fn test_isolate_signals_allows_self() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") diff --git a/crates/sandlock-core/tests/integration/test_netlink_virt.rs b/crates/sandlock-core/tests/integration/test_netlink_virt.rs index a162780..c721a85 100644 --- a/crates/sandlock-core/tests/integration/test_netlink_virt.rs +++ b/crates/sandlock-core/tests/integration/test_netlink_virt.rs @@ -3,7 +3,7 @@ use sandlock_core::{Policy, Sandbox}; fn base_policy() -> sandlock_core::PolicyBuilder { Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64") .fs_read("/bin").fs_read("/etc").fs_read("/proc") .fs_read("/dev").fs_write("/tmp") } diff --git a/crates/sandlock-core/tests/integration/test_network.rs b/crates/sandlock-core/tests/integration/test_network.rs index f08b33a..29e43f9 100644 --- a/crates/sandlock-core/tests/integration/test_network.rs +++ b/crates/sandlock-core/tests/integration/test_network.rs @@ -8,7 +8,7 @@ fn temp_file(name: &str) -> PathBuf { fn base_policy() -> sandlock_core::PolicyBuilder { Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_write("/tmp") } diff --git a/crates/sandlock-core/tests/integration/test_pipeline.rs b/crates/sandlock-core/tests/integration/test_pipeline.rs index 85a4277..2db49fd 100644 --- a/crates/sandlock-core/tests/integration/test_pipeline.rs +++ b/crates/sandlock-core/tests/integration/test_pipeline.rs @@ -4,7 +4,7 @@ use std::time::Duration; fn base_policy() -> Policy { Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_write("/tmp") .build() @@ -79,7 +79,7 @@ async fn test_disjoint_policies() { // Stage 1: can read the temp dir let reader_policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_read(&tmp) .build() @@ -173,7 +173,7 @@ async fn test_xoa_data_flow() { // Executor: can read workspace let executor_policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_read(&tmp) .build() @@ -245,7 +245,7 @@ async fn test_gather_disjoint_policies() { // Data source: can read the file let data_policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_read(&tmp) .build() diff --git a/crates/sandlock-core/tests/integration/test_policy_fn.rs b/crates/sandlock-core/tests/integration/test_policy_fn.rs index b64f488..2b7960d 100644 --- a/crates/sandlock-core/tests/integration/test_policy_fn.rs +++ b/crates/sandlock-core/tests/integration/test_policy_fn.rs @@ -9,7 +9,7 @@ fn temp_file(name: &str) -> PathBuf { fn base_policy() -> sandlock_core::PolicyBuilder { Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_write("/tmp") } diff --git a/crates/sandlock-core/tests/integration/test_port_remap.rs b/crates/sandlock-core/tests/integration/test_port_remap.rs index 075f16f..c09403d 100644 --- a/crates/sandlock-core/tests/integration/test_port_remap.rs +++ b/crates/sandlock-core/tests/integration/test_port_remap.rs @@ -13,7 +13,7 @@ fn temp_file(name: &str) -> PathBuf { fn base_policy() -> sandlock_core::PolicyBuilder { Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_write("/tmp") } diff --git a/crates/sandlock-core/tests/integration/test_privileged.rs b/crates/sandlock-core/tests/integration/test_privileged.rs index 40ef9cb..4856567 100644 --- a/crates/sandlock-core/tests/integration/test_privileged.rs +++ b/crates/sandlock-core/tests/integration/test_privileged.rs @@ -39,7 +39,7 @@ async fn test_uid_zero() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -64,7 +64,7 @@ async fn test_uid_zero_gid_zero() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -84,7 +84,7 @@ async fn test_no_uid_keeps_real_uid() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -106,7 +106,7 @@ async fn test_uid_zero_echo() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .uid(0) @@ -130,7 +130,7 @@ async fn test_uid_custom() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") diff --git a/crates/sandlock-core/tests/integration/test_procfs.rs b/crates/sandlock-core/tests/integration/test_procfs.rs index 2263533..8909763 100644 --- a/crates/sandlock-core/tests/integration/test_procfs.rs +++ b/crates/sandlock-core/tests/integration/test_procfs.rs @@ -7,7 +7,7 @@ async fn test_num_cpus_virtualization() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -34,7 +34,7 @@ async fn test_meminfo_virtualization() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -59,7 +59,7 @@ async fn test_sensitive_proc_blocked() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -78,7 +78,7 @@ async fn test_no_proc_virt_still_works() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -103,7 +103,7 @@ async fn test_proc_net_tcp_filtered() { drop(listener); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_write("/tmp") .net_bind_port(port) @@ -142,7 +142,7 @@ async fn test_proc_net_tcp_filtered() { #[tokio::test] async fn test_proc_mounts_virtualized() { let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .build() .unwrap(); @@ -161,7 +161,7 @@ async fn test_proc_mounts_virtualized() { #[tokio::test] async fn test_proc_self_mountinfo_virtualized() { let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .build() .unwrap(); @@ -183,7 +183,7 @@ async fn test_proc_parent_pid_blocked() { )); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_write("/tmp") .build() @@ -229,7 +229,7 @@ async fn test_proc_net_tcp_hides_host_ports() { )); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_write("/tmp") .port_remap(true) diff --git a/crates/sandlock-core/tests/integration/test_resource.rs b/crates/sandlock-core/tests/integration/test_resource.rs index e0a4524..5d29f9d 100644 --- a/crates/sandlock-core/tests/integration/test_resource.rs +++ b/crates/sandlock-core/tests/integration/test_resource.rs @@ -10,7 +10,7 @@ fn base_policy() -> sandlock_core::policy::PolicyBuilder { Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") diff --git a/crates/sandlock-core/tests/integration/test_sandbox.rs b/crates/sandlock-core/tests/integration/test_sandbox.rs index 739234a..5fb6e5f 100644 --- a/crates/sandlock-core/tests/integration/test_sandbox.rs +++ b/crates/sandlock-core/tests/integration/test_sandbox.rs @@ -5,7 +5,7 @@ async fn test_echo() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -21,7 +21,7 @@ async fn test_exit_code() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/proc") .build() @@ -36,7 +36,7 @@ async fn test_denied_path() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/proc") .build() @@ -50,7 +50,7 @@ async fn test_denied_syscall() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -95,7 +95,7 @@ async fn test_default_policy_runs_ls() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/proc") .fs_read("/etc") @@ -117,7 +117,7 @@ async fn test_default_policy_runs_ls() { async fn test_nested_sandbox() { // Outer: allows /etc let outer = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_write("/tmp") .build() @@ -125,7 +125,7 @@ async fn test_nested_sandbox() { // Inner: does NOT allow /etc — run cat /etc/hostname, should fail let inner = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/proc") .build() .unwrap(); @@ -168,10 +168,15 @@ async fn test_nested_sandbox_via_cli() { } }; let bin = sandlock_bin.to_str().unwrap(); + let lib64_arg = if std::path::Path::new("/lib64").exists() { + " -r /lib64" + } else { + "" + }; // Outer allows /etc + sandlock binary; inner does not allow /etc let outer = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_read(sandlock_bin.parent().unwrap()) .fs_write("/tmp") @@ -179,8 +184,8 @@ async fn test_nested_sandbox_via_cli() { .unwrap(); let inner_cmd = format!( - "{} run -r /usr -r /lib -r /lib64 -r /bin -r /proc -- cat /etc/hostname", - bin + "{} run -r /usr -r /lib{} -r /bin -r /proc -- cat /etc/hostname", + bin, lib64_arg ); let result = Sandbox::run_interactive( &outer, &["sh", "-c", &inner_cmd], @@ -189,8 +194,8 @@ async fn test_nested_sandbox_via_cli() { // Inner with /etc allowed — should succeed let inner_cmd = format!( - "{} run -r /usr -r /lib -r /lib64 -r /bin -r /etc -r /proc -- echo nested-ok", - bin + "{} run -r /usr -r /lib{} -r /bin -r /etc -r /proc -- echo nested-ok", + bin, lib64_arg ); let result = Sandbox::run_interactive( &outer, &["sh", "-c", &inner_cmd], @@ -209,7 +214,7 @@ async fn test_denied_path_hardlink_blocked() { std::fs::write(&secret, "TOP_SECRET").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64") .fs_read("/bin").fs_read("/proc").fs_read("/etc") .fs_read(tmp.path()) .fs_write(tmp.path()) @@ -238,7 +243,7 @@ async fn test_denied_path_rename_blocked() { std::fs::write(&secret, "TOP_SECRET").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64") .fs_read("/bin").fs_read("/proc").fs_read("/etc") .fs_read(tmp.path()) .fs_write(tmp.path()) @@ -267,7 +272,7 @@ async fn test_denied_path_symlink_blocked() { std::fs::write(&secret, "TOP_SECRET").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64") .fs_read("/bin").fs_read("/proc").fs_read("/etc") .fs_read(tmp.path()) .fs_write(tmp.path()) @@ -301,7 +306,7 @@ async fn test_denied_path_preexisting_symlink_blocked() { std::os::unix::fs::symlink(&secret, &link).unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64") .fs_read("/bin").fs_read("/proc").fs_read("/etc") .fs_read(tmp.path()) .fs_deny(&secret) @@ -330,7 +335,7 @@ async fn test_denied_path_chained_symlinks_blocked() { std::os::unix::fs::symlink("link1", &link2).unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64") .fs_read("/bin").fs_read("/proc").fs_read("/etc") .fs_read(tmp.path()) .fs_deny(&secret) @@ -353,7 +358,7 @@ async fn test_denied_path_allows_normal_writes() { std::fs::write(&secret, "TOP_SECRET").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64") .fs_read("/bin").fs_read("/proc").fs_read("/etc") .fs_read(tmp.path()) .fs_write(tmp.path()) diff --git a/crates/sandlock-core/tests/integration/test_seccomp_enforce.rs b/crates/sandlock-core/tests/integration/test_seccomp_enforce.rs index 30f970c..108ac58 100644 --- a/crates/sandlock-core/tests/integration/test_seccomp_enforce.rs +++ b/crates/sandlock-core/tests/integration/test_seccomp_enforce.rs @@ -7,7 +7,7 @@ fn base_policy() -> sandlock_core::PolicyBuilder { Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") diff --git a/python/src/sandlock/_sdk.py b/python/src/sandlock/_sdk.py index 4e9675a..ce3350b 100644 --- a/python/src/sandlock/_sdk.py +++ b/python/src/sandlock/_sdk.py @@ -743,6 +743,8 @@ def _build_from_policy(policy: PolicyDataclass, override_hostname=None): b = _lib.sandlock_policy_builder_new() for p in (policy.fs_readable or []): + if str(p) == "/lib64" and not os.path.exists("/lib64"): + continue b = _b_fs_read(b, _encode(str(p))) for p in (policy.fs_writable or []): b = _b_fs_write(b, _encode(str(p))) @@ -1485,4 +1487,3 @@ def run( error=error, ) - diff --git a/python/src/sandlock/mcp/_policy.py b/python/src/sandlock/mcp/_policy.py index d1c1278..5404117 100644 --- a/python/src/sandlock/mcp/_policy.py +++ b/python/src/sandlock/mcp/_policy.py @@ -26,7 +26,7 @@ # Resolve the Python interpreter's installation prefix so that sandboxed # processes can always exec the current interpreter, even when it lives # outside the standard system paths (e.g. /opt on CI, virtualenvs, etc.). -_PYTHON_PREFIX = os.path.dirname(os.path.dirname(os.path.realpath(sys.executable))) +_PYTHON_PREFIX = sys.prefix _POLICY_FIELDS = frozenset(f.name for f in fields(Policy)) diff --git a/python/tests/test_chroot_legacy_syscalls.py b/python/tests/test_chroot_legacy_syscalls.py index a43aa3d..a60b912 100644 --- a/python/tests/test_chroot_legacy_syscalls.py +++ b/python/tests/test_chroot_legacy_syscalls.py @@ -1,10 +1,11 @@ # SPDX-License-Identifier: Apache-2.0 -"""Tests for chroot interception of legacy (non-*at) syscalls. +"""Tests for chroot interception of raw path syscall ABIs. musl libc uses stat/lstat/open/access/readlink instead of their *at variants (newfstatat/openat/etc.). These tests invoke the legacy -syscalls via the rootfs-helper binary to verify the chroot dispatcher -handles them correctly. +syscalls via the rootfs-helper binary on architectures that provide +them. On ARM64 the helper uses equivalent raw *at syscalls, because +Linux ARM64 does not expose the legacy non-*at path syscall ABI. """ import os diff --git a/python/tests/test_mcp_integration.py b/python/tests/test_mcp_integration.py index e9a9fbc..fa00acf 100644 --- a/python/tests/test_mcp_integration.py +++ b/python/tests/test_mcp_integration.py @@ -129,7 +129,7 @@ def test_write_then_read(self, tmp_path): class TestMcpSandboxLocalTools: def _run(self, coro): - return asyncio.get_event_loop().run_until_complete(coro) + return asyncio.run(coro) def test_read_only_by_default(self, tmp_path): workspace = str(tmp_path) diff --git a/python/tests/test_pipeline.py b/python/tests/test_pipeline.py index c7ea829..43c1615 100644 --- a/python/tests/test_pipeline.py +++ b/python/tests/test_pipeline.py @@ -12,7 +12,7 @@ # --- Helpers --- -_PYTHON_PREFIX = os.path.dirname(os.path.dirname(os.path.realpath(sys.executable))) +_PYTHON_PREFIX = sys.prefix def _policy(**overrides): """Minimal policy for testing.""" @@ -315,10 +315,18 @@ def test_gather_disjoint_policies(self): result = ( Sandbox(data_policy).cmd(["cat", secret]).as_("data") + Sandbox(code_policy).cmd( - ["echo", "tr a-z A-Z <&3"] + ["echo", "upper"] ).as_("code") | Sandbox(consumer_policy).cmd( - ["sh", "-c", 'eval "$(cat)"'] + [sys.executable, "-c", + "import os, sys\n" + "code = sys.stdin.read().strip()\n" + "with os.fdopen(3) as data_fd:\n" + " data = data_fd.read()\n" + "if code == 'upper':\n" + " sys.stdout.write(data.upper())\n" + "else:\n" + " sys.stdout.write(data)\n"] ) ).run() assert result.success, f"stderr={result.stderr}" diff --git a/python/tests/test_policy_fn.py b/python/tests/test_policy_fn.py index f091e5e..0b84238 100644 --- a/python/tests/test_policy_fn.py +++ b/python/tests/test_policy_fn.py @@ -189,7 +189,8 @@ def on_event(event, ctx): f"s.close()\n" ]) assert result.success - content = open(out).read() + with open(out) as f: + content = f.read() assert content == "ERR:13", f"expected EACCES (13), got: {content}" os.unlink(out) @@ -243,7 +244,8 @@ def on_event(event, ctx): ] ) assert result.success - content = open(out).read() + with open(out) as f: + content = f.read() assert content == "BLOCKED", f"expected BLOCKED, got: {content}" os.unlink(out) diff --git a/tests/rootfs-helper.c b/tests/rootfs-helper.c index a0a814b..68e5135 100644 --- a/tests/rootfs-helper.c +++ b/tests/rootfs-helper.c @@ -224,6 +224,14 @@ static int cmd_access(int argc, char **argv) { /* ── legacy syscall wrappers (for testing chroot handler) ──── */ +#if defined(SYS_stat) && defined(SYS_lstat) && defined(SYS_open) && \ + defined(SYS_access) && defined(SYS_readlink) && defined(SYS_mkdir) && \ + defined(SYS_rmdir) && defined(SYS_unlink) && defined(SYS_rename) && \ + defined(SYS_symlink) && defined(SYS_chmod) +#define HAVE_LEGACY_PATH_SYSCALLS 1 +#endif + +#ifdef HAVE_LEGACY_PATH_SYSCALLS static int cmd_legacy_stat(int argc, char **argv) { if (argc < 1) return 1; struct stat st; @@ -357,6 +365,141 @@ static int cmd_legacy_chmod(int argc, char **argv) { printf("OK\n"); return 0; } +#else +static int cmd_legacy_stat(int argc, char **argv) { + if (argc < 1) return 1; + struct stat st; + long ret = syscall(SYS_newfstatat, AT_FDCWD, argv[0], &st, 0); + if (ret < 0) { + printf("ERR %d\n", errno); + return 1; + } + printf("OK size=%ld mode=%o\n", (long)st.st_size, st.st_mode & 07777); + return 0; +} + +static int cmd_legacy_lstat(int argc, char **argv) { + if (argc < 1) return 1; + struct stat st; + long ret = syscall(SYS_newfstatat, AT_FDCWD, argv[0], &st, AT_SYMLINK_NOFOLLOW); + if (ret < 0) { + printf("ERR %d\n", errno); + return 1; + } + printf("OK size=%ld mode=%o type=%s\n", (long)st.st_size, st.st_mode & 07777, + S_ISDIR(st.st_mode) ? "dir" : S_ISLNK(st.st_mode) ? "link" : "file"); + return 0; +} + +static int cmd_legacy_open(int argc, char **argv) { + if (argc < 1) return 1; + int fd = (int)syscall(SYS_openat, AT_FDCWD, argv[0], O_RDONLY); + if (fd < 0) { + printf("ERR %d\n", errno); + return 1; + } + char buf[4096]; + ssize_t n = read(fd, buf, sizeof(buf)); + close(fd); + printf("OK "); + if (n > 0) { + write(STDOUT_FILENO, buf, n); + } + return 0; +} + +static int cmd_legacy_access(int argc, char **argv) { + if (argc < 1) return 1; + long ret = syscall(SYS_faccessat, AT_FDCWD, argv[0], F_OK, 0); + if (ret < 0) { + printf("ERR %d\n", errno); + return 1; + } + printf("OK\n"); + return 0; +} + +static int cmd_legacy_readlink(int argc, char **argv) { + if (argc < 1) return 1; + char buf[4096]; + long n = syscall(SYS_readlinkat, AT_FDCWD, argv[0], buf, sizeof(buf) - 1); + if (n < 0) { + printf("ERR %d\n", errno); + return 1; + } + buf[n] = '\0'; + printf("OK %s\n", buf); + return 0; +} + +static int cmd_legacy_mkdir(int argc, char **argv) { + if (argc < 1) return 1; + long ret = syscall(SYS_mkdirat, AT_FDCWD, argv[0], 0755); + if (ret < 0) { + printf("ERR %d\n", errno); + return 1; + } + printf("OK\n"); + return 0; +} + +static int cmd_legacy_rmdir(int argc, char **argv) { + if (argc < 1) return 1; + long ret = syscall(SYS_unlinkat, AT_FDCWD, argv[0], AT_REMOVEDIR); + if (ret < 0) { + printf("ERR %d\n", errno); + return 1; + } + printf("OK\n"); + return 0; +} + +static int cmd_legacy_unlink(int argc, char **argv) { + if (argc < 1) return 1; + long ret = syscall(SYS_unlinkat, AT_FDCWD, argv[0], 0); + if (ret < 0) { + printf("ERR %d\n", errno); + return 1; + } + printf("OK\n"); + return 0; +} + +static int cmd_legacy_rename(int argc, char **argv) { + if (argc < 2) return 1; + long ret = syscall(SYS_renameat2, AT_FDCWD, argv[0], AT_FDCWD, argv[1], 0); + if (ret < 0) { + printf("ERR %d\n", errno); + return 1; + } + printf("OK\n"); + return 0; +} + +static int cmd_legacy_symlink(int argc, char **argv) { + if (argc < 2) return 1; + long ret = syscall(SYS_symlinkat, argv[0], AT_FDCWD, argv[1]); + if (ret < 0) { + printf("ERR %d\n", errno); + return 1; + } + printf("OK\n"); + return 0; +} + +static int cmd_legacy_chmod(int argc, char **argv) { + if (argc < 2) return 1; + unsigned mode; + if (sscanf(argv[0], "%o", &mode) != 1) return 1; + long ret = syscall(SYS_fchmodat, AT_FDCWD, argv[1], mode, 0); + if (ret < 0) { + printf("ERR %d\n", errno); + return 1; + } + printf("OK\n"); + return 0; +} +#endif /* ── dispatch ───────────────────────────────────────────────── */ @@ -385,7 +528,7 @@ static int dispatch(const char *cmd, int argc, char **argv) { if (strcmp(cmd, "true") == 0) return 0; if (strcmp(cmd, "false") == 0) return 1; - /* Legacy syscall variants */ + /* Legacy syscall variants on x86_64; equivalent raw *at ABI elsewhere. */ if (strcmp(cmd, "legacy-stat") == 0) return cmd_legacy_stat(argc, argv); if (strcmp(cmd, "legacy-lstat") == 0) return cmd_legacy_lstat(argc, argv); if (strcmp(cmd, "legacy-open") == 0) return cmd_legacy_open(argc, argv);