From bae335a83aea3b80338142caa5fb8f971ee6cccb Mon Sep 17 00:00:00 2001 From: gokwok <531504879@qq.com> Date: Wed, 22 Apr 2026 11:04:17 +0800 Subject: [PATCH 01/11] fix: add arm64 syscall and seccomp ABI support Introduce architecture-specific syscall numbers, audit arch values, and optional legacy path syscall constants so the seccomp filters and dispatch tables build on arm64 while preserving x86_64 behavior. Signed-off-by: gokwok <531504879@qq.com> --- crates/sandlock-core/src/arch.rs | 69 +++++++++ crates/sandlock-core/src/checkpoint.rs | 12 ++ crates/sandlock-core/src/context.rs | 109 ++++++-------- crates/sandlock-core/src/cow/dispatch.rs | 21 +-- crates/sandlock-core/src/fork.rs | 2 - crates/sandlock-core/src/lib.rs | 1 + crates/sandlock-core/src/resource.rs | 2 +- crates/sandlock-core/src/seccomp/bpf.rs | 5 +- crates/sandlock-core/src/seccomp/dispatch.rs | 147 ++++++++++++------- crates/sandlock-core/src/seccomp/notif.rs | 35 +++-- crates/sandlock-core/src/sys/structs.rs | 6 - crates/sandlock-core/src/sys/syscall.rs | 23 ++- 12 files changed, 276 insertions(+), 156 deletions(-) create mode 100644 crates/sandlock-core/src/arch.rs diff --git a/crates/sandlock-core/src/arch.rs b/crates/sandlock-core/src/arch.rs new file mode 100644 index 0000000..5086329 --- /dev/null +++ b/crates/sandlock-core/src/arch.rs @@ -0,0 +1,69 @@ +//! Architecture-specific syscall and seccomp helpers. + +#[cfg(target_arch = "x86_64")] +mod imp { + pub const AUDIT_ARCH: u32 = 0xC000_003E; + pub const SYS_SECCOMP: i64 = 317; + pub const SYS_PIDFD_OPEN: i64 = 434; + pub const SYS_PIDFD_GETFD: i64 = 438; + + pub const SYS_OPEN: Option = Some(libc::SYS_open); + pub const SYS_STAT: Option = Some(libc::SYS_stat); + pub const SYS_LSTAT: Option = Some(libc::SYS_lstat); + pub const SYS_ACCESS: Option = Some(libc::SYS_access); + pub const SYS_READLINK: Option = Some(libc::SYS_readlink); + pub const SYS_GETDENTS: Option = Some(libc::SYS_getdents); + pub const SYS_UNLINK: Option = Some(libc::SYS_unlink); + pub const SYS_RMDIR: Option = Some(libc::SYS_rmdir); + pub const SYS_MKDIR: Option = Some(libc::SYS_mkdir); + pub const SYS_RENAME: Option = Some(libc::SYS_rename); + pub const SYS_SYMLINK: Option = Some(libc::SYS_symlink); + pub const SYS_LINK: Option = Some(libc::SYS_link); + pub const SYS_CHMOD: Option = Some(libc::SYS_chmod); + pub const SYS_CHOWN: Option = Some(libc::SYS_chown); + pub const SYS_LCHOWN: Option = Some(libc::SYS_lchown); + pub const SYS_VFORK: Option = Some(libc::SYS_vfork); + pub const SYS_FUTIMESAT: Option = Some(libc::SYS_futimesat); + pub const SYS_FORK: Option = Some(libc::SYS_fork); + pub const SYS_IOPERM: Option = Some(libc::SYS_ioperm); + pub const SYS_IOPL: Option = Some(libc::SYS_iopl); + pub const SYS_TIME: Option = Some(libc::SYS_time); +} + +#[cfg(target_arch = "aarch64")] +mod imp { + pub const AUDIT_ARCH: u32 = 0xC000_00B7; + pub const SYS_SECCOMP: i64 = 277; + pub const SYS_PIDFD_OPEN: i64 = 434; + pub const SYS_PIDFD_GETFD: i64 = 438; + + pub const SYS_OPEN: Option = None; + pub const SYS_STAT: Option = None; + pub const SYS_LSTAT: Option = None; + pub const SYS_ACCESS: Option = None; + pub const SYS_READLINK: Option = None; + pub const SYS_GETDENTS: Option = None; + pub const SYS_UNLINK: Option = None; + pub const SYS_RMDIR: Option = None; + pub const SYS_MKDIR: Option = None; + pub const SYS_RENAME: Option = None; + pub const SYS_SYMLINK: Option = None; + pub const SYS_LINK: Option = None; + pub const SYS_CHMOD: Option = None; + pub const SYS_CHOWN: Option = None; + pub const SYS_LCHOWN: Option = None; + pub const SYS_VFORK: Option = None; + pub const SYS_FUTIMESAT: Option = None; + pub const SYS_FORK: Option = None; + pub const SYS_IOPERM: Option = None; + pub const SYS_IOPL: Option = None; + pub const SYS_TIME: Option = None; +} + +pub use imp::*; + +pub fn push_optional_syscall(v: &mut Vec, nr: Option) { + if let Some(nr) = nr { + v.push(nr as u32); + } +} diff --git a/crates/sandlock-core/src/checkpoint.rs b/crates/sandlock-core/src/checkpoint.rs index ba0f324..ac29489 100644 --- a/crates/sandlock-core/src/checkpoint.rs +++ b/crates/sandlock-core/src/checkpoint.rs @@ -100,6 +100,8 @@ fn ptrace_detach(pid: i32) -> io::Result<()> { } fn ptrace_getregs(pid: i32) -> io::Result> { + #[cfg(target_arch = "x86_64")] + { // user_regs_struct is 27 u64 fields on x86_64 (216 bytes) let mut regs = vec![0u64; 27]; let ret = unsafe { libc::ptrace(libc::PTRACE_GETREGS, pid, 0, regs.as_mut_ptr()) }; @@ -107,6 +109,16 @@ fn ptrace_getregs(pid: i32) -> io::Result> { return Err(io::Error::last_os_error()); } Ok(regs) + } + + #[cfg(not(target_arch = "x86_64"))] + { + let _ = pid; + Err(io::Error::new( + io::ErrorKind::Unsupported, + "checkpoint register capture is only implemented on x86_64", + )) + } } // --------------------------------------------------------------------------- diff --git a/crates/sandlock-core/src/context.rs b/crates/sandlock-core/src/context.rs index a89f2a4..b61bcb4 100644 --- a/crates/sandlock-core/src/context.rs +++ b/crates/sandlock-core/src/context.rs @@ -5,6 +5,7 @@ use std::ffi::CString; use std::io; use std::os::fd::{AsRawFd, FromRawFd, OwnedFd, RawFd}; +use crate::arch; use crate::policy::{FsIsolation, Policy}; use crate::seccomp::bpf::{self, stmt, jump}; use crate::sys::structs::{ @@ -151,8 +152,8 @@ pub fn syscall_name_to_nr(name: &str) -> Option { "process_vm_writev" => libc::SYS_process_vm_writev, "open_by_handle_at" => libc::SYS_open_by_handle_at, "name_to_handle_at" => libc::SYS_name_to_handle_at, - "ioperm" => libc::SYS_ioperm, - "iopl" => libc::SYS_iopl, + "ioperm" => arch::SYS_IOPERM?, + "iopl" => arch::SYS_IOPL?, "quotactl" => libc::SYS_quotactl, "acct" => libc::SYS_acct, "lookup_dcookie" => libc::SYS_lookup_dcookie, @@ -164,7 +165,7 @@ pub fn syscall_name_to_nr(name: &str) -> Option { // Additional syscalls for notif/arg filters "clone" => libc::SYS_clone, "clone3" => libc::SYS_clone3, - "vfork" => libc::SYS_vfork, + "vfork" => arch::SYS_VFORK?, "mmap" => libc::SYS_mmap, "munmap" => libc::SYS_munmap, "brk" => libc::SYS_brk, @@ -177,14 +178,14 @@ pub fn syscall_name_to_nr(name: &str) -> Option { "prctl" => libc::SYS_prctl, "getrandom" => libc::SYS_getrandom, "openat" => libc::SYS_openat, - "open" => libc::SYS_open, + "open" => arch::SYS_OPEN?, "getdents64" => libc::SYS_getdents64, - "getdents" => libc::SYS_getdents, + "getdents" => arch::SYS_GETDENTS?, "bind" => libc::SYS_bind, "getsockname" => libc::SYS_getsockname, "clock_gettime" => libc::SYS_clock_gettime, "gettimeofday" => libc::SYS_gettimeofday, - "time" => libc::SYS_time, + "time" => arch::SYS_TIME?, "clock_nanosleep" => libc::SYS_clock_nanosleep, "timerfd_settime" => libc::SYS_timerfd_settime, "timer_settime" => libc::SYS_timer_settime, @@ -204,21 +205,21 @@ pub fn syscall_name_to_nr(name: &str) -> Option { "readlinkat" => libc::SYS_readlinkat, "truncate" => libc::SYS_truncate, "utimensat" => libc::SYS_utimensat, - "unlink" => libc::SYS_unlink, - "rmdir" => libc::SYS_rmdir, - "mkdir" => libc::SYS_mkdir, - "rename" => libc::SYS_rename, - "stat" => libc::SYS_stat, - "lstat" => libc::SYS_lstat, - "access" => libc::SYS_access, - "symlink" => libc::SYS_symlink, - "link" => libc::SYS_link, - "chmod" => libc::SYS_chmod, - "chown" => libc::SYS_chown, - "lchown" => libc::SYS_lchown, - "readlink" => libc::SYS_readlink, - "futimesat" => libc::SYS_futimesat, - "fork" => libc::SYS_fork, + "unlink" => arch::SYS_UNLINK?, + "rmdir" => arch::SYS_RMDIR?, + "mkdir" => arch::SYS_MKDIR?, + "rename" => arch::SYS_RENAME?, + "stat" => arch::SYS_STAT?, + "lstat" => arch::SYS_LSTAT?, + "access" => arch::SYS_ACCESS?, + "symlink" => arch::SYS_SYMLINK?, + "link" => arch::SYS_LINK?, + "chmod" => arch::SYS_CHMOD?, + "chown" => arch::SYS_CHOWN?, + "lchown" => arch::SYS_LCHOWN?, + "readlink" => arch::SYS_READLINK?, + "futimesat" => arch::SYS_FUTIMESAT?, + "fork" => arch::SYS_FORK?, _ => return None, }; Some(nr as u32) @@ -233,10 +234,10 @@ pub fn notif_syscalls(policy: &Policy) -> Vec { let mut nrs = vec![ libc::SYS_clone as u32, libc::SYS_clone3 as u32, - libc::SYS_vfork as u32, libc::SYS_wait4 as u32, libc::SYS_waitid as u32, ]; + arch::push_optional_syscall(&mut nrs, arch::SYS_VFORK); if policy.max_memory.is_some() { nrs.push(libc::SYS_mmap as u32); @@ -276,10 +277,8 @@ pub fn notif_syscalls(policy: &Policy) -> Vec { // /proc virtualization (always on: PID filtering, sensitive path blocking) nrs.push(libc::SYS_openat as u32); - nrs.extend_from_slice(&[ - libc::SYS_getdents64 as u32, - libc::SYS_getdents as u32, - ]); + nrs.push(libc::SYS_getdents64 as u32); + arch::push_optional_syscall(&mut nrs, arch::SYS_GETDENTS); // Netlink virtualization (always on): // socket, bind, getsockname — swap in a unix socketpair for AF_NETLINK @@ -308,45 +307,37 @@ pub fn notif_syscalls(policy: &Policy) -> Vec { if policy.workdir.is_some() && policy.fs_isolation == FsIsolation::None { nrs.extend_from_slice(&[ libc::SYS_openat as u32, - libc::SYS_open as u32, libc::SYS_unlinkat as u32, - libc::SYS_unlink as u32, - libc::SYS_rmdir as u32, libc::SYS_mkdirat as u32, - libc::SYS_mkdir as u32, libc::SYS_renameat2 as u32, - libc::SYS_rename as u32, libc::SYS_symlinkat as u32, - libc::SYS_symlink as u32, libc::SYS_linkat as u32, - libc::SYS_link as u32, libc::SYS_fchmodat as u32, - libc::SYS_chmod as u32, libc::SYS_fchownat as u32, - libc::SYS_chown as u32, - libc::SYS_lchown as u32, libc::SYS_truncate as u32, libc::SYS_utimensat as u32, libc::SYS_newfstatat as u32, - libc::SYS_stat as u32, - libc::SYS_lstat as u32, libc::SYS_statx as u32, libc::SYS_faccessat as u32, 439u32, // SYS_faccessat2 — glibc 2.33+ uses this instead of faccessat - libc::SYS_access as u32, libc::SYS_readlinkat as u32, - libc::SYS_readlink as u32, libc::SYS_getdents64 as u32, - libc::SYS_getdents as u32, libc::SYS_chdir as u32, ]); + for nr in [ + arch::SYS_OPEN, arch::SYS_UNLINK, arch::SYS_RMDIR, arch::SYS_MKDIR, + arch::SYS_RENAME, arch::SYS_SYMLINK, arch::SYS_LINK, arch::SYS_CHMOD, + arch::SYS_CHOWN, arch::SYS_LCHOWN, arch::SYS_STAT, arch::SYS_LSTAT, + arch::SYS_ACCESS, arch::SYS_READLINK, arch::SYS_GETDENTS, + ] { + arch::push_optional_syscall(&mut nrs, nr); + } } // Chroot path interception if policy.chroot.is_some() { nrs.extend_from_slice(&[ libc::SYS_openat as u32, - libc::SYS_open as u32, // musl uses open(2) instead of openat libc::SYS_execve as u32, libc::SYS_execveat as u32, libc::SYS_unlinkat as u32, @@ -358,46 +349,40 @@ pub fn notif_syscalls(policy: &Policy) -> Vec { libc::SYS_fchownat as u32, libc::SYS_truncate as u32, libc::SYS_newfstatat as u32, - libc::SYS_stat as u32, // musl uses stat(2) instead of newfstatat - libc::SYS_lstat as u32, // musl uses lstat(2) instead of newfstatat libc::SYS_statx as u32, libc::SYS_faccessat as u32, 439u32, // SYS_faccessat2 — glibc 2.33+ uses this instead of faccessat - libc::SYS_access as u32, // musl uses access(2) instead of faccessat libc::SYS_readlinkat as u32, - libc::SYS_readlink as u32, // musl uses readlink(2) instead of readlinkat libc::SYS_getdents64 as u32, - libc::SYS_getdents as u32, libc::SYS_chdir as u32, libc::SYS_getcwd as u32, libc::SYS_statfs as u32, libc::SYS_utimensat as u32, - libc::SYS_unlink as u32, // musl uses unlink(2) instead of unlinkat - libc::SYS_rmdir as u32, // musl uses rmdir(2) instead of unlinkat - libc::SYS_mkdir as u32, // musl uses mkdir(2) instead of mkdirat - libc::SYS_rename as u32, // musl uses rename(2) instead of renameat2 - libc::SYS_symlink as u32, // musl uses symlink(2) instead of symlinkat - libc::SYS_link as u32, // musl uses link(2) instead of linkat - libc::SYS_chmod as u32, // musl uses chmod(2) instead of fchmodat - libc::SYS_chown as u32, // musl uses chown(2)/lchown(2) instead of fchownat - libc::SYS_lchown as u32, ]); + for nr in [ + arch::SYS_OPEN, arch::SYS_STAT, arch::SYS_LSTAT, arch::SYS_ACCESS, + arch::SYS_READLINK, arch::SYS_GETDENTS, arch::SYS_UNLINK, + arch::SYS_RMDIR, arch::SYS_MKDIR, arch::SYS_RENAME, + arch::SYS_SYMLINK, arch::SYS_LINK, arch::SYS_CHMOD, + arch::SYS_CHOWN, arch::SYS_LCHOWN, + ] { + arch::push_optional_syscall(&mut nrs, nr); + } } // Explicit deny-paths need path-bearing syscalls intercepted. if !policy.fs_denied.is_empty() { nrs.extend_from_slice(&[ libc::SYS_openat as u32, - libc::SYS_open as u32, libc::SYS_execve as u32, libc::SYS_execveat as u32, libc::SYS_linkat as u32, - libc::SYS_link as u32, libc::SYS_renameat2 as u32, - libc::SYS_rename as u32, libc::SYS_symlinkat as u32, - libc::SYS_symlink as u32, ]); + for nr in [arch::SYS_OPEN, arch::SYS_LINK, arch::SYS_RENAME, arch::SYS_SYMLINK] { + arch::push_optional_syscall(&mut nrs, nr); + } } // Dynamic policy callback — intercept key syscalls for event emission. @@ -995,7 +980,9 @@ mod tests { let nrs = notif_syscalls(&policy); assert!(nrs.contains(&(libc::SYS_clone as u32))); assert!(nrs.contains(&(libc::SYS_clone3 as u32))); - assert!(nrs.contains(&(libc::SYS_vfork as u32))); + if let Some(vfork) = arch::SYS_VFORK { + assert!(nrs.contains(&(vfork as u32))); + } } #[test] diff --git a/crates/sandlock-core/src/cow/dispatch.rs b/crates/sandlock-core/src/cow/dispatch.rs index 8e8e009..84f8c8a 100644 --- a/crates/sandlock-core/src/cow/dispatch.rs +++ b/crates/sandlock-core/src/cow/dispatch.rs @@ -8,6 +8,7 @@ use std::sync::Arc; use tokio::sync::Mutex; +use crate::arch; use crate::procfs::{build_dirent64, DT_DIR, DT_LNK, DT_REG}; use crate::seccomp::notif::{read_child_mem, write_child_mem, NotifAction}; use crate::seccomp::state::CowState; @@ -80,7 +81,7 @@ pub(crate) async fn handle_cow_open( // open(path, flags, mode): args[0]=path, args[1]=flags // openat(dirfd, path, flags): args[0]=dirfd, args[1]=path, args[2]=flags - let (path_ptr, dirfd, flags) = if nr == libc::SYS_open as i64 { + let (path_ptr, dirfd, flags) = if Some(nr) == arch::SYS_OPEN { (notif.data.args[0], libc::AT_FDCWD as i64, notif.data.args[1]) } else { (notif.data.args[1], notif.data.args[0] as i64, notif.data.args[2]) @@ -239,35 +240,35 @@ fn parse_cow_write(notif: &SeccompNotif, notif_fd: RawFd) -> Option } // Legacy variants (path in args[0], no dirfd) - if nr == libc::SYS_unlink as i64 { + if Some(nr) == arch::SYS_UNLINK { return Some(CowWriteOp::Unlink { path: read_resolved(notif, 0, None, notif_fd)?, is_dir: false }); } - if nr == libc::SYS_rmdir as i64 { + if Some(nr) == arch::SYS_RMDIR { return Some(CowWriteOp::Unlink { path: read_resolved(notif, 0, None, notif_fd)?, is_dir: true }); } - if nr == libc::SYS_mkdir as i64 { + if Some(nr) == arch::SYS_MKDIR { return Some(CowWriteOp::Mkdir { path: read_resolved(notif, 0, None, notif_fd)? }); } - if nr == libc::SYS_rename as i64 { + if Some(nr) == arch::SYS_RENAME { let old_path = read_resolved(notif, 0, None, notif_fd)?; let new_path = read_resolved(notif, 1, None, notif_fd)?; return Some(CowWriteOp::Rename { old_path, new_path }); } - if nr == libc::SYS_symlink as i64 { + if Some(nr) == arch::SYS_SYMLINK { let target = read_path(notif, notif.data.args[0], notif_fd)?; let linkpath = read_resolved(notif, 1, None, notif_fd)?; return Some(CowWriteOp::Symlink { target, linkpath }); } - if nr == libc::SYS_link as i64 { + if Some(nr) == arch::SYS_LINK { let old_path = read_resolved(notif, 0, None, notif_fd)?; let new_path = read_resolved(notif, 1, None, notif_fd)?; return Some(CowWriteOp::Link { old_path, new_path }); } - if nr == libc::SYS_chmod as i64 { + if Some(nr) == arch::SYS_CHMOD { let path = read_resolved(notif, 0, None, notif_fd)?; return Some(CowWriteOp::Chmod { path, mode: (notif.data.args[1] & 0o7777) as u32 }); } - if nr == libc::SYS_chown as i64 || nr == libc::SYS_lchown as i64 { + if Some(nr) == arch::SYS_CHOWN || Some(nr) == arch::SYS_LCHOWN { let path = read_resolved(notif, 0, None, notif_fd)?; return Some(CowWriteOp::Chown { path, uid: notif.data.args[1] as u32, gid: notif.data.args[2] as u32 }); } @@ -456,7 +457,7 @@ pub(crate) async fn handle_cow_access( // access(pathname, mode): args[0]=path, args[1]=mode // faccessat(dirfd, pathname, mode, flags): args[0]=dirfd, args[1]=path, args[2]=mode - let (path, mode) = if nr == libc::SYS_access as i64 { + let (path, mode) = if Some(nr) == arch::SYS_ACCESS { let p = match read_path(notif, notif.data.args[0], notif_fd) { Some(p) => p, None => return NotifAction::Continue, diff --git a/crates/sandlock-core/src/fork.rs b/crates/sandlock-core/src/fork.rs index f02def5..14ca53f 100644 --- a/crates/sandlock-core/src/fork.rs +++ b/crates/sandlock-core/src/fork.rs @@ -19,8 +19,6 @@ use std::os::unix::io::RawFd; fn raw_fork() -> std::io::Result { #[cfg(target_arch = "x86_64")] const NR_FORK: i64 = 57; - #[cfg(target_arch = "aarch64")] - const NR_FORK: i64 = -1; // aarch64 has no fork — use clone with minimal flags #[cfg(target_arch = "x86_64")] { diff --git a/crates/sandlock-core/src/lib.rs b/crates/sandlock-core/src/lib.rs index caf5654..dd37de9 100644 --- a/crates/sandlock-core/src/lib.rs +++ b/crates/sandlock-core/src/lib.rs @@ -3,6 +3,7 @@ pub mod policy; pub mod profile; pub mod result; pub mod sandbox; +pub(crate) mod arch; pub(crate) mod sys; pub mod landlock; pub mod seccomp; diff --git a/crates/sandlock-core/src/resource.rs b/crates/sandlock-core/src/resource.rs index c7937c9..cab2be8 100644 --- a/crates/sandlock-core/src/resource.rs +++ b/crates/sandlock-core/src/resource.rs @@ -30,7 +30,7 @@ pub(crate) async fn handle_fork( let args = ¬if.data.args; // For clone/vfork: check namespace flags in args[0]. - if nr == libc::SYS_clone || nr == libc::SYS_vfork { + if nr == libc::SYS_clone || Some(nr) == crate::arch::SYS_VFORK { if nr == libc::SYS_clone && (args[0] & CLONE_NS_FLAGS) != 0 { return NotifAction::Errno(EPERM); } diff --git a/crates/sandlock-core/src/seccomp/bpf.rs b/crates/sandlock-core/src/seccomp/bpf.rs index 48b7790..e7d8e9c 100644 --- a/crates/sandlock-core/src/seccomp/bpf.rs +++ b/crates/sandlock-core/src/seccomp/bpf.rs @@ -14,7 +14,6 @@ use std::os::unix::io::{FromRawFd, OwnedFd}; use crate::sys::structs::{ - AUDIT_ARCH_X86_64, BPF_ABS, BPF_JEQ, BPF_JMP, BPF_K, BPF_LD, BPF_RET, BPF_W, EPERM, OFFSET_ARCH, OFFSET_NR, @@ -71,7 +70,7 @@ pub fn assemble_filter( // ---- 1. Arch check block ---- prog.push(stmt(BPF_LD | BPF_W | BPF_ABS, OFFSET_ARCH)); let arch_jf = (ret_kill_idx - 2) as u8; - prog.push(jump(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 0, arch_jf)); + prog.push(jump(BPF_JMP | BPF_JEQ | BPF_K, crate::arch::AUDIT_ARCH, 0, arch_jf)); // ---- 2. Pre-built arg filter block ---- prog.extend_from_slice(arg_block); @@ -187,7 +186,7 @@ mod tests { // prog[1] is the JEQ arch check; jf should reach the KILL return. let arch_jeq = &prog[1]; assert_eq!(arch_jeq.code, BPF_JMP | BPF_JEQ | BPF_K); - assert_eq!(arch_jeq.k, AUDIT_ARCH_X86_64); + assert_eq!(arch_jeq.k, crate::arch::AUDIT_ARCH); // The instruction following prog[1] is prog[2]. // KILL is the last instruction. let kill_idx = prog.len() - 1; diff --git a/crates/sandlock-core/src/seccomp/dispatch.rs b/crates/sandlock-core/src/seccomp/dispatch.rs index 558492f..c5598f9 100644 --- a/crates/sandlock-core/src/seccomp/dispatch.rs +++ b/crates/sandlock-core/src/seccomp/dispatch.rs @@ -13,6 +13,7 @@ use std::sync::Arc; use super::ctx::SupervisorCtx; use super::notif::{NotifAction, NotifPolicy}; use super::state::ResourceState; +use crate::arch; use crate::sys::structs::SeccompNotif; use tokio::sync::Mutex; @@ -95,7 +96,11 @@ pub fn build_dispatch_table( // ------------------------------------------------------------------ // Fork/clone family (always on) // ------------------------------------------------------------------ - for &nr in &[libc::SYS_clone, libc::SYS_clone3, libc::SYS_vfork] { + let mut fork_nrs = vec![libc::SYS_clone, libc::SYS_clone3]; + if let Some(vfork) = arch::SYS_VFORK { + fork_nrs.push(vfork); + } + for nr in fork_nrs { let policy = Arc::clone(policy); let resource = Arc::clone(resource); table.register(nr, Box::new(move |notif, ctx, _notif_fd| { @@ -235,7 +240,11 @@ pub fn build_dispatch_table( }) })); } - for &nr in &[libc::SYS_getdents64, libc::SYS_getdents as i64] { + let mut getdents_nrs = vec![libc::SYS_getdents64]; + if let Some(getdents) = arch::SYS_GETDENTS { + getdents_nrs.push(getdents); + } + for nr in getdents_nrs { let policy = Arc::clone(policy); table.register(nr, Box::new(move |notif, ctx, notif_fd| { let policy = Arc::clone(&policy); @@ -302,7 +311,11 @@ pub fn build_dispatch_table( // Deterministic directory listing // ------------------------------------------------------------------ if policy.deterministic_dirs { - for &nr in &[libc::SYS_getdents64, libc::SYS_getdents as i64] { + let mut getdents_nrs = vec![libc::SYS_getdents64]; + if let Some(getdents) = arch::SYS_GETDENTS { + getdents_nrs.push(getdents); + } + for nr in getdents_nrs { table.register(nr, Box::new(|notif, ctx, notif_fd| { let procfs_inner = Arc::clone(&ctx.procfs); Box::pin(async move { @@ -444,8 +457,10 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc crate::chroot::dispatch::handle_chroot_open)); // open (legacy) — fallthrough if Continue - table.register(libc::SYS_open as i64, chroot_handler_fallthrough!(policy, - crate::chroot::dispatch::handle_chroot_legacy_open)); + if let Some(open) = arch::SYS_OPEN { + table.register(open, chroot_handler_fallthrough!(policy, + crate::chroot::dispatch::handle_chroot_legacy_open)); + } // execve, execveat — unconditional return for &nr in &[libc::SYS_execve, libc::SYS_execveat] { @@ -464,25 +479,39 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc } // Legacy write syscalls - table.register(libc::SYS_unlink as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_unlink)); - table.register(libc::SYS_rmdir as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_rmdir)); - table.register(libc::SYS_mkdir as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_mkdir)); - table.register(libc::SYS_rename as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_rename)); - table.register(libc::SYS_symlink as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_symlink)); - table.register(libc::SYS_link as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_link)); - table.register(libc::SYS_chmod as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_chmod)); + if let Some(nr) = arch::SYS_UNLINK { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_unlink)); + } + if let Some(nr) = arch::SYS_RMDIR { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_rmdir)); + } + if let Some(nr) = arch::SYS_MKDIR { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_mkdir)); + } + if let Some(nr) = arch::SYS_RENAME { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_rename)); + } + if let Some(nr) = arch::SYS_SYMLINK { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_symlink)); + } + if let Some(nr) = arch::SYS_LINK { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_link)); + } + if let Some(nr) = arch::SYS_CHMOD { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_chmod)); + } // chown — non-follow - { + if let Some(chown) = arch::SYS_CHOWN { let policy = Arc::clone(policy); - table.register(libc::SYS_chown as i64, Box::new(move |notif, ctx, notif_fd| { + table.register(chown, Box::new(move |notif, ctx, notif_fd| { let policy = Arc::clone(&policy); Box::pin(async move { let chroot_ctx = ChrootCtx { @@ -498,9 +527,9 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc } // lchown — follow - { + if let Some(lchown) = arch::SYS_LCHOWN { let policy = Arc::clone(policy); - table.register(libc::SYS_lchown as i64, Box::new(move |notif, ctx, notif_fd| { + table.register(lchown, Box::new(move |notif, ctx, notif_fd| { let policy = Arc::clone(&policy); Box::pin(async move { let chroot_ctx = ChrootCtx { @@ -526,12 +555,18 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc } // Legacy stat - table.register(libc::SYS_stat as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_stat)); - table.register(libc::SYS_lstat as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_lstat)); - table.register(libc::SYS_access as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_access)); + if let Some(nr) = arch::SYS_STAT { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_stat)); + } + if let Some(nr) = arch::SYS_LSTAT { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_lstat)); + } + if let Some(nr) = arch::SYS_ACCESS { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_access)); + } // statx table.register(libc::SYS_statx, chroot_handler!(policy, @@ -540,11 +575,17 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc // readlink table.register(libc::SYS_readlinkat, chroot_handler!(policy, crate::chroot::dispatch::handle_chroot_readlink)); - table.register(libc::SYS_readlink as i64, chroot_handler!(policy, - crate::chroot::dispatch::handle_chroot_legacy_readlink)); + if let Some(nr) = arch::SYS_READLINK { + table.register(nr, chroot_handler!(policy, + crate::chroot::dispatch::handle_chroot_legacy_readlink)); + } // getdents - for &nr in &[libc::SYS_getdents64, libc::SYS_getdents as i64] { + let mut getdents_nrs = vec![libc::SYS_getdents64]; + if let Some(getdents) = arch::SYS_GETDENTS { + getdents_nrs.push(getdents); + } + for nr in getdents_nrs { table.register(nr, chroot_handler!(policy, crate::chroot::dispatch::handle_chroot_getdents)); } @@ -566,16 +607,17 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc fn register_cow_handlers(table: &mut DispatchTable) { // Write syscalls (*at variants + legacy) - for &nr in &[ + let mut write_nrs = vec![ libc::SYS_unlinkat, libc::SYS_mkdirat, libc::SYS_renameat2, libc::SYS_symlinkat, libc::SYS_linkat, libc::SYS_fchmodat, libc::SYS_fchownat, libc::SYS_truncate, - libc::SYS_unlink as i64, libc::SYS_rmdir as i64, - libc::SYS_mkdir as i64, libc::SYS_rename as i64, - libc::SYS_symlink as i64, libc::SYS_link as i64, - libc::SYS_chmod as i64, libc::SYS_chown as i64, - libc::SYS_lchown as i64, - ] { + ]; + write_nrs.extend([ + arch::SYS_UNLINK, arch::SYS_RMDIR, arch::SYS_MKDIR, arch::SYS_RENAME, + arch::SYS_SYMLINK, arch::SYS_LINK, arch::SYS_CHMOD, arch::SYS_CHOWN, + arch::SYS_LCHOWN, + ].into_iter().flatten()); + for nr in write_nrs { table.register(nr, Box::new(|notif, ctx, notif_fd| { let cow = Arc::clone(&ctx.cow); Box::pin(async move { @@ -593,11 +635,12 @@ fn register_cow_handlers(table: &mut DispatchTable) { })); // faccessat/access — fallthrough - for &nr in &[ + let mut access_nrs = vec![ libc::SYS_faccessat, crate::cow::dispatch::SYS_FACCESSAT2, - libc::SYS_access as i64, - ] { + ]; + access_nrs.extend(arch::SYS_ACCESS); + for nr in access_nrs { table.register(nr, Box::new(|notif, ctx, notif_fd| { let cow = Arc::clone(&ctx.cow); Box::pin(async move { @@ -607,7 +650,9 @@ fn register_cow_handlers(table: &mut DispatchTable) { } // openat/open — fallthrough - for &nr in &[libc::SYS_openat, libc::SYS_open as i64] { + let mut open_nrs = vec![libc::SYS_openat]; + open_nrs.extend(arch::SYS_OPEN); + for nr in open_nrs { table.register(nr, Box::new(|notif, ctx, notif_fd| { let cow = Arc::clone(&ctx.cow); Box::pin(async move { @@ -617,11 +662,11 @@ fn register_cow_handlers(table: &mut DispatchTable) { } // stat family — fallthrough - for &nr in &[ + let mut stat_nrs = vec![ libc::SYS_newfstatat, libc::SYS_faccessat, - libc::SYS_stat as i64, libc::SYS_lstat as i64, - libc::SYS_access as i64, - ] { + ]; + stat_nrs.extend([arch::SYS_STAT, arch::SYS_LSTAT, arch::SYS_ACCESS].into_iter().flatten()); + for nr in stat_nrs { table.register(nr, Box::new(|notif, ctx, notif_fd| { let cow = Arc::clone(&ctx.cow); Box::pin(async move { @@ -639,7 +684,9 @@ fn register_cow_handlers(table: &mut DispatchTable) { })); // readlink — fallthrough - for &nr in &[libc::SYS_readlinkat, libc::SYS_readlink as i64] { + let mut readlink_nrs = vec![libc::SYS_readlinkat]; + readlink_nrs.extend(arch::SYS_READLINK); + for nr in readlink_nrs { table.register(nr, Box::new(|notif, ctx, notif_fd| { let cow = Arc::clone(&ctx.cow); Box::pin(async move { @@ -649,7 +696,9 @@ fn register_cow_handlers(table: &mut DispatchTable) { } // getdents — fallthrough - for &nr in &[libc::SYS_getdents64, libc::SYS_getdents as i64] { + let mut getdents_nrs = vec![libc::SYS_getdents64]; + getdents_nrs.extend(arch::SYS_GETDENTS); + for nr in getdents_nrs { table.register(nr, Box::new(|notif, ctx, notif_fd| { let cow = Arc::clone(&ctx.cow); Box::pin(async move { diff --git a/crates/sandlock-core/src/seccomp/notif.rs b/crates/sandlock-core/src/seccomp/notif.rs index 1d47a21..cef305e 100644 --- a/crates/sandlock-core/src/seccomp/notif.rs +++ b/crates/sandlock-core/src/seccomp/notif.rs @@ -9,6 +9,7 @@ use std::os::unix::io::{AsRawFd, FromRawFd, OwnedFd, RawFd}; use std::sync::Arc; use crate::error::NotifError; +use crate::arch; use crate::sys::structs::{ SeccompNotif, SeccompNotifAddfd, SeccompNotifResp, SECCOMP_ADDFD_FLAG_SEND, SECCOMP_IOCTL_NOTIF_ADDFD, SECCOMP_IOCTL_NOTIF_ID_VALID, SECCOMP_IOCTL_NOTIF_RECV, @@ -480,7 +481,7 @@ fn syscall_name(nr: i64) -> &'static str { n if n == libc::SYS_bind => "bind", n if n == libc::SYS_clone => "clone", n if n == libc::SYS_clone3 => "clone3", - n if n == libc::SYS_vfork => "vfork", + n if Some(n) == arch::SYS_VFORK => "vfork", n if n == libc::SYS_execve => "execve", n if n == libc::SYS_execveat => "execveat", n if n == libc::SYS_mmap => "mmap", @@ -504,12 +505,12 @@ fn syscall_category(nr: i64) -> crate::policy_fn::SyscallCategory { || n == libc::SYS_truncate || n == libc::SYS_readlinkat || n == libc::SYS_newfstatat || n == libc::SYS_statx || n == libc::SYS_faccessat || n == libc::SYS_getdents64 - || n == libc::SYS_getdents => SyscallCategory::File, + || Some(n) == arch::SYS_GETDENTS => SyscallCategory::File, n if n == libc::SYS_connect || n == libc::SYS_sendto || n == libc::SYS_sendmsg || n == libc::SYS_bind || n == libc::SYS_getsockname => SyscallCategory::Network, n if n == libc::SYS_clone || n == libc::SYS_clone3 - || n == libc::SYS_vfork || n == libc::SYS_execve + || Some(n) == arch::SYS_VFORK || n == libc::SYS_execve || n == libc::SYS_execveat => SyscallCategory::Process, n if n == libc::SYS_mmap || n == libc::SYS_munmap || n == libc::SYS_brk || n == libc::SYS_mremap @@ -590,7 +591,7 @@ fn resolve_path_for_notif(notif: &SeccompNotif, notif_fd: RawFd) -> Option { + n if Some(n) == arch::SYS_OPEN || n == libc::SYS_execve => { let path = read_path_for_event(notif, notif.data.args[0], notif_fd)?; resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &path) } @@ -618,17 +619,17 @@ fn resolve_path_for_notif(notif: &SeccompNotif, notif_fd: RawFd) -> Option { + n if Some(n) == arch::SYS_LINK => { let path = read_path_for_event(notif, notif.data.args[0], notif_fd)?; resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &path) } // rename(oldpath, newpath) — legacy, AT_FDCWD implied for both - n if n == libc::SYS_rename => { + n if Some(n) == arch::SYS_RENAME => { let path = read_path_for_event(notif, notif.data.args[0], notif_fd)?; resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &path) } // symlink(target, linkpath) — legacy - n if n == libc::SYS_symlink => { + n if Some(n) == arch::SYS_SYMLINK => { let target = read_path_for_event(notif, notif.data.args[0], notif_fd)?; resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &target) } @@ -655,12 +656,12 @@ fn resolve_second_path_for_notif(notif: &SeccompNotif, notif_fd: RawFd) -> Optio resolve_at_path_for_event(notif, notif.data.args[2] as i64, &path) } // rename(oldpath, newpath) — legacy - n if n == libc::SYS_rename => { + n if Some(n) == arch::SYS_RENAME => { let path = read_path_for_event(notif, notif.data.args[1], notif_fd)?; resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &path) } // link(oldpath, newpath) — legacy - n if n == libc::SYS_link => { + n if Some(n) == arch::SYS_LINK => { let path = read_path_for_event(notif, notif.data.args[1], notif_fd)?; resolve_at_path_for_event(notif, libc::AT_FDCWD as i64, &path) } @@ -747,7 +748,7 @@ async fn emit_policy_event( let mut size = None; let mut argv = None; - if nr == libc::SYS_openat || nr == libc::SYS_execve || nr == libc::SYS_execveat { + if nr == libc::SYS_openat || Some(nr) == arch::SYS_OPEN || nr == libc::SYS_execve || nr == libc::SYS_execveat { // openat(dirfd, pathname, ...): args[1] = path ptr // execve(pathname, argv, envp): args[0] = path ptr, args[1] = argv ptr let path_ptr = if nr == libc::SYS_openat { @@ -838,13 +839,15 @@ async fn handle_notification( // Check dynamic path denials before dispatch let mut action = { let nr = notif.data.nr as i64; + let mut path_check_nrs = vec![ + libc::SYS_openat, libc::SYS_execve, libc::SYS_execveat, + libc::SYS_linkat, libc::SYS_renameat2, libc::SYS_symlinkat, + ]; + path_check_nrs.extend([ + arch::SYS_OPEN, arch::SYS_LINK, arch::SYS_RENAME, arch::SYS_SYMLINK, + ].into_iter().flatten()); let should_precheck_denied = policy.chroot_root.is_none() - && [ - libc::SYS_openat, libc::SYS_open, libc::SYS_execve, libc::SYS_execveat, - libc::SYS_linkat, libc::SYS_link, - libc::SYS_renameat2, libc::SYS_rename, - libc::SYS_symlinkat, libc::SYS_symlink, - ].contains(&nr); + && path_check_nrs.contains(&nr); if should_precheck_denied { let pfs = ctx.policy_fn.lock().await; if is_path_denied_for_notif(&pfs, ¬if, fd) { diff --git a/crates/sandlock-core/src/sys/structs.rs b/crates/sandlock-core/src/sys/structs.rs index 4fd89d0..2c85021 100644 --- a/crates/sandlock-core/src/sys/structs.rs +++ b/crates/sandlock-core/src/sys/structs.rs @@ -167,12 +167,6 @@ pub const SECCOMP_IOCTL_NOTIF_ID_VALID: u64 = 0x4008_2102; pub const SECCOMP_IOCTL_NOTIF_ADDFD: u64 = 0xc018_2103; pub const SECCOMP_IOCTL_NOTIF_SET_FLAGS: u64 = 0x4008_2104; -// ============================================================ -// Architecture -// ============================================================ - -pub const AUDIT_ARCH_X86_64: u32 = 0xC000_003E; - // ============================================================ // BPF opcodes // ============================================================ diff --git a/crates/sandlock-core/src/sys/syscall.rs b/crates/sandlock-core/src/sys/syscall.rs index e6bf46f..830fb71 100644 --- a/crates/sandlock-core/src/sys/syscall.rs +++ b/crates/sandlock-core/src/sys/syscall.rs @@ -8,15 +8,16 @@ use super::structs::{ }; // ============================================================ -// Core raw syscall wrappers (x86_64 ABI) +// Core raw syscall wrappers // ============================================================ -/// Raw 3-argument syscall using x86_64 ABI. +/// Raw 3-argument syscall. /// /// # Safety /// Caller must ensure arguments are valid for the given syscall number. pub unsafe fn syscall3(nr: i64, a1: u64, a2: u64, a3: u64) -> io::Result { let ret: i64; + #[cfg(target_arch = "x86_64")] std::arch::asm!( "syscall", inlateout("rax") nr => ret, @@ -27,6 +28,15 @@ pub unsafe fn syscall3(nr: i64, a1: u64, a2: u64, a3: u64) -> io::Result { lateout("r11") _, options(nostack), ); + #[cfg(target_arch = "aarch64")] + std::arch::asm!( + "svc #0", + inlateout("x8") nr => _, + inlateout("x0") a1 as i64 => ret, + in("x1") a2, + in("x2") a3, + options(nostack), + ); if ret < 0 && ret >= -4095 { Err(io::Error::from_raw_os_error(-ret as i32)) } else { @@ -104,8 +114,7 @@ pub fn landlock_restrict_self(ruleset_fd: &OwnedFd, flags: u32) -> io::Result<() /// Raw seccomp(2) syscall (syscall 317 on x86_64). pub fn seccomp(operation: u32, flags: u64, args: *const std::ffi::c_void) -> io::Result { - const SYS_SECCOMP: i64 = 317; - unsafe { syscall3(SYS_SECCOMP, operation as u64, flags, args as u64) } + unsafe { syscall3(crate::arch::SYS_SECCOMP, operation as u64, flags, args as u64) } } // ============================================================ @@ -114,18 +123,16 @@ pub fn seccomp(operation: u32, flags: u64, args: *const std::ffi::c_void) -> io: /// Open a pidfd for a process (syscall 434). pub fn pidfd_open(pid: u32, flags: u32) -> io::Result { - const SYS_PIDFD_OPEN: i64 = 434; - let fd = unsafe { syscall2(SYS_PIDFD_OPEN, pid as u64, flags as u64)? }; + let fd = unsafe { syscall2(crate::arch::SYS_PIDFD_OPEN, pid as u64, flags as u64)? }; Ok(unsafe { OwnedFd::from_raw_fd(fd as i32) }) } /// Duplicate a file descriptor from another process via pidfd (syscall 438). pub fn pidfd_getfd(pidfd: &OwnedFd, targetfd: i32, flags: u32) -> io::Result { use std::os::unix::io::AsRawFd; - const SYS_PIDFD_GETFD: i64 = 438; let fd = unsafe { syscall3( - SYS_PIDFD_GETFD, + crate::arch::SYS_PIDFD_GETFD, pidfd.as_raw_fd() as u64, targetfd as u64, flags as u64, From 1b69d7f8595180bea4b3ccf26d314aef794f5d57 Mon Sep 17 00:00:00 2001 From: gokwok <531504879@qq.com> Date: Wed, 22 Apr 2026 11:04:17 +0800 Subject: [PATCH 02/11] test: guard unavailable legacy path syscalls Make the rootfs helper compile on architectures that do not expose the legacy non-*at path syscall ABI. Signed-off-by: gokwok <531504879@qq.com> --- tests/rootfs-helper.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/rootfs-helper.c b/tests/rootfs-helper.c index a0a814b..8151b19 100644 --- a/tests/rootfs-helper.c +++ b/tests/rootfs-helper.c @@ -224,6 +224,14 @@ static int cmd_access(int argc, char **argv) { /* ── legacy syscall wrappers (for testing chroot handler) ──── */ +#if defined(SYS_stat) && defined(SYS_lstat) && defined(SYS_open) && \ + defined(SYS_access) && defined(SYS_readlink) && defined(SYS_mkdir) && \ + defined(SYS_rmdir) && defined(SYS_unlink) && defined(SYS_rename) && \ + defined(SYS_symlink) && defined(SYS_chmod) +#define HAVE_LEGACY_PATH_SYSCALLS 1 +#endif + +#ifdef HAVE_LEGACY_PATH_SYSCALLS static int cmd_legacy_stat(int argc, char **argv) { if (argc < 1) return 1; struct stat st; @@ -357,6 +365,14 @@ static int cmd_legacy_chmod(int argc, char **argv) { printf("OK\n"); return 0; } +#else +static int cmd_legacy_unsupported(int argc, char **argv) { + (void)argc; + (void)argv; + printf("ERR %d\n", ENOSYS); + return 1; +} +#endif /* ── dispatch ───────────────────────────────────────────────── */ @@ -386,6 +402,7 @@ static int dispatch(const char *cmd, int argc, char **argv) { if (strcmp(cmd, "false") == 0) return 1; /* Legacy syscall variants */ +#ifdef HAVE_LEGACY_PATH_SYSCALLS if (strcmp(cmd, "legacy-stat") == 0) return cmd_legacy_stat(argc, argv); if (strcmp(cmd, "legacy-lstat") == 0) return cmd_legacy_lstat(argc, argv); if (strcmp(cmd, "legacy-open") == 0) return cmd_legacy_open(argc, argv); @@ -397,6 +414,9 @@ static int dispatch(const char *cmd, int argc, char **argv) { if (strcmp(cmd, "legacy-rename") == 0) return cmd_legacy_rename(argc, argv); if (strcmp(cmd, "legacy-symlink") == 0) return cmd_legacy_symlink(argc, argv); if (strcmp(cmd, "legacy-chmod") == 0) return cmd_legacy_chmod(argc, argv); +#else + if (strncmp(cmd, "legacy-", 7) == 0) return cmd_legacy_unsupported(argc, argv); +#endif fprintf(stderr, "rootfs-helper: unknown command '%s'\n", cmd); return 127; From 9f91ce57be5b404f556a79b47cb6d72958c0a3b2 Mon Sep 17 00:00:00 2001 From: gokwok <531504879@qq.com> Date: Wed, 22 Apr 2026 11:04:17 +0800 Subject: [PATCH 03/11] test: account for architecture-specific syscall gaps Allow tests to reflect arm64 syscall availability and vDSO symbol naming without changing x86_64 expectations. Signed-off-by: gokwok <531504879@qq.com> --- crates/sandlock-core/src/context.rs | 18 +++++++++++++++--- crates/sandlock-core/src/vdso.rs | 3 ++- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/crates/sandlock-core/src/context.rs b/crates/sandlock-core/src/context.rs index b61bcb4..21937e8 100644 --- a/crates/sandlock-core/src/context.rs +++ b/crates/sandlock-core/src/context.rs @@ -1132,17 +1132,29 @@ mod tests { #[test] fn test_syscall_name_to_nr_covers_defaults() { - // Every name in DEFAULT_DENY_SYSCALLS except nfsservctl should resolve + // Every name in DEFAULT_DENY_SYSCALLS should resolve unless the + // running architecture does not expose that syscall. + let expected_unresolved: &[&str] = &[ + "nfsservctl", + #[cfg(target_arch = "aarch64")] + "ioperm", + #[cfg(target_arch = "aarch64")] + "iopl", + ]; let mut skipped = 0; for name in DEFAULT_DENY_SYSCALLS { match syscall_name_to_nr(name) { Some(_) => {} None => { - assert_eq!(*name, "nfsservctl", "unexpected unresolved syscall: {}", name); + assert!( + expected_unresolved.contains(name), + "unexpected unresolved syscall: {}", + name + ); skipped += 1; } } } - assert_eq!(skipped, 1); // only nfsservctl + assert_eq!(skipped, expected_unresolved.len()); } } diff --git a/crates/sandlock-core/src/vdso.rs b/crates/sandlock-core/src/vdso.rs index 389295c..473074a 100644 --- a/crates/sandlock-core/src/vdso.rs +++ b/crates/sandlock-core/src/vdso.rs @@ -200,7 +200,8 @@ mod tests { // Should find at least clock_gettime assert!( symbols.contains_key("clock_gettime") - || symbols.contains_key("__vdso_clock_gettime"), + || symbols.contains_key("__vdso_clock_gettime") + || symbols.contains_key("__kernel_clock_gettime"), "Expected clock_gettime in vDSO symbols, found: {:?}", symbols.keys().collect::>() ); From 1c285bb8b60a9e3153f04a0576f845b8c33e5743 Mon Sep 17 00:00:00 2001 From: gokwok <531504879@qq.com> Date: Wed, 22 Apr 2026 11:04:17 +0800 Subject: [PATCH 04/11] fix: make sandbox runtime helpers architecture-aware Use libc flag values, architecture-specific memfd syscall numbers, page-safe child string reads, and host-path checks needed for arm64 runtime behavior. Signed-off-by: gokwok <531504879@qq.com> --- crates/sandlock-core/src/arch.rs | 2 + crates/sandlock-core/src/cow/dispatch.rs | 6 ++- crates/sandlock-core/src/cow/seccomp.rs | 16 +++---- crates/sandlock-core/src/policy.rs | 9 ++++ crates/sandlock-core/src/procfs.rs | 10 +---- crates/sandlock-core/src/random.rs | 9 ++-- crates/sandlock-core/src/seccomp/notif.rs | 32 ++++++++++++++ crates/sandlock-core/src/sys/syscall.rs | 51 ++++++++++++++++++++--- python/src/sandlock/_sdk.py | 3 +- python/src/sandlock/mcp/_policy.py | 2 +- 10 files changed, 109 insertions(+), 31 deletions(-) diff --git a/crates/sandlock-core/src/arch.rs b/crates/sandlock-core/src/arch.rs index 5086329..6f1653f 100644 --- a/crates/sandlock-core/src/arch.rs +++ b/crates/sandlock-core/src/arch.rs @@ -4,6 +4,7 @@ mod imp { pub const AUDIT_ARCH: u32 = 0xC000_003E; pub const SYS_SECCOMP: i64 = 317; + pub const SYS_MEMFD_CREATE: i64 = 319; pub const SYS_PIDFD_OPEN: i64 = 434; pub const SYS_PIDFD_GETFD: i64 = 438; @@ -34,6 +35,7 @@ mod imp { mod imp { pub const AUDIT_ARCH: u32 = 0xC000_00B7; pub const SYS_SECCOMP: i64 = 277; + pub const SYS_MEMFD_CREATE: i64 = 279; pub const SYS_PIDFD_OPEN: i64 = 434; pub const SYS_PIDFD_GETFD: i64 = 438; diff --git a/crates/sandlock-core/src/cow/dispatch.rs b/crates/sandlock-core/src/cow/dispatch.rs index 84f8c8a..5ed7861 100644 --- a/crates/sandlock-core/src/cow/dispatch.rs +++ b/crates/sandlock-core/src/cow/dispatch.rs @@ -107,7 +107,11 @@ pub(crate) async fn handle_cow_open( // Read-only opens don't need interception unless the file was // modified or deleted in the COW layer. - const WRITE_FLAGS: u64 = 0o1 | 0o2 | 0o100 | 0o1000 | 0o2000; + const WRITE_FLAGS: u64 = (libc::O_WRONLY + | libc::O_RDWR + | libc::O_CREAT + | libc::O_TRUNC + | libc::O_APPEND) as u64; let is_write = flags & WRITE_FLAGS != 0; if !is_write && !cow.needs_read_intercept(&path) { return NotifAction::Continue; diff --git a/crates/sandlock-core/src/cow/seccomp.rs b/crates/sandlock-core/src/cow/seccomp.rs index 33c4768..09fd164 100644 --- a/crates/sandlock-core/src/cow/seccomp.rs +++ b/crates/sandlock-core/src/cow/seccomp.rs @@ -10,14 +10,14 @@ use std::path::{Path, PathBuf}; use crate::error::BranchError; -/// O_* flags for detecting writes. -const O_WRONLY: u64 = 0o1; -const O_RDWR: u64 = 0o2; -const O_CREAT: u64 = 0o100; -const O_TRUNC: u64 = 0o1000; -const O_APPEND: u64 = 0o2000; -const O_EXCL: u64 = 0o200; -const O_DIRECTORY: u64 = 0o200000; +/// O_* flags for detecting writes. These differ across Linux architectures. +const O_WRONLY: u64 = libc::O_WRONLY as u64; +const O_RDWR: u64 = libc::O_RDWR as u64; +const O_CREAT: u64 = libc::O_CREAT as u64; +const O_TRUNC: u64 = libc::O_TRUNC as u64; +const O_APPEND: u64 = libc::O_APPEND as u64; +const O_EXCL: u64 = libc::O_EXCL as u64; +const O_DIRECTORY: u64 = libc::O_DIRECTORY as u64; const WRITE_FLAGS: u64 = O_WRONLY | O_RDWR | O_CREAT | O_TRUNC | O_APPEND; /// Plan for a COW copy — returned by `prepare_copy()` to separate metadata diff --git a/crates/sandlock-core/src/policy.rs b/crates/sandlock-core/src/policy.rs index 65bcb28..d920b01 100644 --- a/crates/sandlock-core/src/policy.rs +++ b/crates/sandlock-core/src/policy.rs @@ -418,6 +418,15 @@ impl PolicyBuilder { self } + pub fn fs_read_if_exists(self, path: impl Into) -> Self { + let path = path.into(); + if path.exists() { + self.fs_read(path) + } else { + self + } + } + pub fn fs_deny(mut self, path: impl Into) -> Self { self.fs_denied.push(path.into()); self diff --git a/crates/sandlock-core/src/procfs.rs b/crates/sandlock-core/src/procfs.rs index 4a5a522..ea62f42 100644 --- a/crates/sandlock-core/src/procfs.rs +++ b/crates/sandlock-core/src/procfs.rs @@ -11,7 +11,7 @@ use std::sync::Arc; use tokio::sync::Mutex; -use crate::seccomp::notif::{read_child_mem, write_child_mem, NotifAction, NotifPolicy}; +use crate::seccomp::notif::{read_child_cstr, write_child_mem, NotifAction, NotifPolicy}; use crate::seccomp::state::{NetworkState, ProcfsState}; use crate::sys::structs::{SeccompNotif, EACCES}; use crate::sys::syscall; @@ -346,13 +346,7 @@ fn inject_memfd(content: &[u8]) -> NotifAction { /// Read a NUL-terminated path string from child memory. fn read_path(notif: &SeccompNotif, addr: u64, notif_fd: RawFd) -> Option { - if addr == 0 { - return None; - } - // Read up to 256 bytes — enough for any /proc path we care about. - let bytes = read_child_mem(notif_fd, notif.id, notif.pid, addr, 256).ok()?; - let nul_pos = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len()); - String::from_utf8(bytes[..nul_pos].to_vec()).ok() + read_child_cstr(notif_fd, notif.id, notif.pid, addr, 4096) } // ============================================================ diff --git a/crates/sandlock-core/src/random.rs b/crates/sandlock-core/src/random.rs index 4a9f609..e23c794 100644 --- a/crates/sandlock-core/src/random.rs +++ b/crates/sandlock-core/src/random.rs @@ -8,7 +8,7 @@ use std::io::{Seek, SeekFrom, Write}; use std::os::fd::RawFd; use std::os::unix::io::{AsRawFd, FromRawFd}; -use crate::seccomp::notif::{read_child_mem, write_child_mem, NotifAction}; +use crate::seccomp::notif::{read_child_cstr, write_child_mem, NotifAction}; use crate::sys::structs::SeccompNotif; use crate::sys::syscall; @@ -56,12 +56,9 @@ pub(crate) fn handle_random_open( return None; } - // Read the path from child memory. - let bytes = read_child_mem(notif_fd, notif.id, notif.pid, path_ptr, 256).ok()?; - let nul_pos = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len()); - let path = std::str::from_utf8(&bytes[..nul_pos]).ok()?; + let path = read_child_cstr(notif_fd, notif.id, notif.pid, path_ptr, 4096)?; - if path != "/dev/urandom" && path != "/dev/random" { + if path.as_str() != "/dev/urandom" && path.as_str() != "/dev/random" { return None; } diff --git a/crates/sandlock-core/src/seccomp/notif.rs b/crates/sandlock-core/src/seccomp/notif.rs index cef305e..03a1981 100644 --- a/crates/sandlock-core/src/seccomp/notif.rs +++ b/crates/sandlock-core/src/seccomp/notif.rs @@ -388,6 +388,38 @@ pub(crate) fn read_child_mem( Ok(result) } +/// Read a NUL-terminated string from child memory without crossing unmapped +/// page boundaries in a single `process_vm_readv` call. +pub(crate) fn read_child_cstr( + notif_fd: RawFd, + id: u64, + pid: u32, + addr: u64, + max_len: usize, +) -> Option { + if addr == 0 || max_len == 0 { + return None; + } + + const PAGE_SIZE: u64 = 4096; + let mut result = Vec::with_capacity(max_len.min(256)); + let mut cur = addr; + while result.len() < max_len { + let page_remaining = PAGE_SIZE - (cur % PAGE_SIZE); + let remaining = max_len - result.len(); + let to_read = page_remaining.min(remaining as u64) as usize; + let bytes = read_child_mem(notif_fd, id, pid, cur, to_read).ok()?; + if let Some(nul) = bytes.iter().position(|&b| b == 0) { + result.extend_from_slice(&bytes[..nul]); + return String::from_utf8(result).ok(); + } + result.extend_from_slice(&bytes); + cur += to_read as u64; + } + + String::from_utf8(result).ok() +} + /// Write bytes to a child process via process_vm_writev. /// /// Performs TOCTOU validation by calling `id_valid` before and after diff --git a/crates/sandlock-core/src/sys/syscall.rs b/crates/sandlock-core/src/sys/syscall.rs index 830fb71..7954868 100644 --- a/crates/sandlock-core/src/sys/syscall.rs +++ b/crates/sandlock-core/src/sys/syscall.rs @@ -44,6 +44,41 @@ pub unsafe fn syscall3(nr: i64, a1: u64, a2: u64, a3: u64) -> io::Result { } } +/// Raw 4-argument syscall. +/// +/// # Safety +/// Caller must ensure arguments are valid for the given syscall number. +pub unsafe fn syscall4(nr: i64, a1: u64, a2: u64, a3: u64, a4: u64) -> io::Result { + let ret: i64; + #[cfg(target_arch = "x86_64")] + std::arch::asm!( + "syscall", + inlateout("rax") nr => ret, + in("rdi") a1, + in("rsi") a2, + in("rdx") a3, + in("r10") a4, + lateout("rcx") _, + lateout("r11") _, + options(nostack), + ); + #[cfg(target_arch = "aarch64")] + std::arch::asm!( + "svc #0", + inlateout("x8") nr => _, + inlateout("x0") a1 as i64 => ret, + in("x1") a2, + in("x2") a3, + in("x3") a4, + options(nostack), + ); + if ret < 0 && ret >= -4095 { + Err(io::Error::from_raw_os_error(-ret as i32)) + } else { + Ok(ret) + } +} + /// Raw 2-argument syscall. /// /// # Safety @@ -83,14 +118,13 @@ pub fn landlock_add_rule( ) -> io::Result<()> { use std::os::unix::io::AsRawFd; unsafe { - syscall3( + syscall4( SYS_LANDLOCK_ADD_RULE, ruleset_fd.as_raw_fd() as u64, rule_type as u64, rule_attr as u64, + flags as u64, )?; - // flags is in arg4; re-issue as 4-arg syscall via inline asm - let _ = flags; // flags documented as must be 0 in current kernel ABI } Ok(()) } @@ -145,10 +179,15 @@ pub fn pidfd_getfd(pidfd: &OwnedFd, targetfd: i32, flags: u32) -> io::Result io::Result { - const SYS_MEMFD_CREATE: i64 = 319; let cname = CString::new(name).map_err(|_| io::Error::from_raw_os_error(libc::EINVAL))?; - let fd = unsafe { syscall2(SYS_MEMFD_CREATE, cname.as_ptr() as u64, flags as u64)? }; + let fd = unsafe { + syscall2( + crate::arch::SYS_MEMFD_CREATE, + cname.as_ptr() as u64, + flags as u64, + )? + }; Ok(unsafe { OwnedFd::from_raw_fd(fd as i32) }) } diff --git a/python/src/sandlock/_sdk.py b/python/src/sandlock/_sdk.py index 4e9675a..ce3350b 100644 --- a/python/src/sandlock/_sdk.py +++ b/python/src/sandlock/_sdk.py @@ -743,6 +743,8 @@ def _build_from_policy(policy: PolicyDataclass, override_hostname=None): b = _lib.sandlock_policy_builder_new() for p in (policy.fs_readable or []): + if str(p) == "/lib64" and not os.path.exists("/lib64"): + continue b = _b_fs_read(b, _encode(str(p))) for p in (policy.fs_writable or []): b = _b_fs_write(b, _encode(str(p))) @@ -1485,4 +1487,3 @@ def run( error=error, ) - diff --git a/python/src/sandlock/mcp/_policy.py b/python/src/sandlock/mcp/_policy.py index d1c1278..5404117 100644 --- a/python/src/sandlock/mcp/_policy.py +++ b/python/src/sandlock/mcp/_policy.py @@ -26,7 +26,7 @@ # Resolve the Python interpreter's installation prefix so that sandboxed # processes can always exec the current interpreter, even when it lives # outside the standard system paths (e.g. /opt on CI, virtualenvs, etc.). -_PYTHON_PREFIX = os.path.dirname(os.path.dirname(os.path.realpath(sys.executable))) +_PYTHON_PREFIX = sys.prefix _POLICY_FIELDS = frozenset(f.name for f in fields(Policy)) From 665665b4564df3a540b754751638c574ddb7c14c Mon Sep 17 00:00:00 2001 From: gokwok <531504879@qq.com> Date: Wed, 22 Apr 2026 11:04:17 +0800 Subject: [PATCH 05/11] test: enable arm64 integration coverage Adjust integration fixtures and Python tests so arm64 runs the supported coverage instead of inheriting x86_64-only assumptions. Signed-off-by: gokwok <531504879@qq.com> --- .../tests/integration/test_checkpoint.rs | 15 ++++--- .../tests/integration/test_cow.rs | 28 +++++++------ .../tests/integration/test_determinism.rs | 18 ++++---- .../tests/integration/test_dry_run.rs | 6 +-- .../tests/integration/test_fork.rs | 2 +- .../tests/integration/test_http_acl.rs | 2 +- .../tests/integration/test_landlock.rs | 18 ++++---- .../tests/integration/test_netlink_virt.rs | 2 +- .../tests/integration/test_network.rs | 2 +- .../tests/integration/test_pipeline.rs | 8 ++-- .../tests/integration/test_policy_fn.rs | 2 +- .../tests/integration/test_port_remap.rs | 2 +- .../tests/integration/test_privileged.rs | 10 ++--- .../tests/integration/test_procfs.rs | 18 ++++---- .../tests/integration/test_resource.rs | 2 +- .../tests/integration/test_sandbox.rs | 41 +++++++++++-------- .../tests/integration/test_seccomp_enforce.rs | 2 +- python/tests/test_checkpoint.py | 7 ++++ python/tests/test_chroot_legacy_syscalls.py | 7 ++++ python/tests/test_pipeline.py | 2 +- python/tests/test_sandbox.py | 5 +++ 21 files changed, 117 insertions(+), 82 deletions(-) diff --git a/crates/sandlock-core/tests/integration/test_checkpoint.rs b/crates/sandlock-core/tests/integration/test_checkpoint.rs index fd23158..5844759 100644 --- a/crates/sandlock-core/tests/integration/test_checkpoint.rs +++ b/crates/sandlock-core/tests/integration/test_checkpoint.rs @@ -2,9 +2,10 @@ use sandlock_core::{Policy, Sandbox, Checkpoint}; /// Test that checkpoint save/load roundtrips correctly. #[tokio::test] +#[cfg_attr(target_arch = "aarch64", ignore = "ARM64 checkpoint register capture is planned for stage 4")] async fn test_checkpoint_save_load() { let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .build().unwrap(); @@ -51,9 +52,10 @@ async fn test_checkpoint_save_load() { /// Test that checkpoint captures memory maps correctly. #[tokio::test] +#[cfg_attr(target_arch = "aarch64", ignore = "ARM64 checkpoint register capture is planned for stage 4")] async fn test_checkpoint_memory_maps() { let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .build().unwrap(); @@ -78,9 +80,10 @@ async fn test_checkpoint_memory_maps() { /// Test that app_state round-trips through save/load. #[tokio::test] +#[cfg_attr(target_arch = "aarch64", ignore = "ARM64 checkpoint register capture is planned for stage 4")] async fn test_checkpoint_app_state_roundtrip() { let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .build().unwrap(); @@ -111,9 +114,10 @@ async fn test_checkpoint_app_state_roundtrip() { /// Test that checkpoint without app_state doesn't create app_state.bin. #[tokio::test] +#[cfg_attr(target_arch = "aarch64", ignore = "ARM64 checkpoint register capture is planned for stage 4")] async fn test_checkpoint_no_app_state_file() { let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .build().unwrap(); @@ -138,9 +142,10 @@ async fn test_checkpoint_no_app_state_file() { /// Test that process info (pid, cwd, exe) is captured correctly. #[tokio::test] +#[cfg_attr(target_arch = "aarch64", ignore = "ARM64 checkpoint register capture is planned for stage 4")] async fn test_checkpoint_process_info() { let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .build().unwrap(); diff --git a/crates/sandlock-core/tests/integration/test_cow.rs b/crates/sandlock-core/tests/integration/test_cow.rs index 886edaa..7d63b11 100644 --- a/crates/sandlock-core/tests/integration/test_cow.rs +++ b/crates/sandlock-core/tests/integration/test_cow.rs @@ -17,7 +17,7 @@ async fn test_overlayfs_basic_commands() { fs::write(workdir.join("hello.txt"), "original").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .fs_write(&workdir) .fs_isolation(FsIsolation::OverlayFs) @@ -45,7 +45,7 @@ async fn test_overlayfs_write_isolation() { fs::write(workdir.join("data.txt"), "original").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .fs_write(&workdir) .fs_isolation(FsIsolation::OverlayFs) @@ -79,7 +79,7 @@ async fn test_overlayfs_commit() { fs::write(workdir.join("data.txt"), "original").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .fs_write(&workdir) .fs_isolation(FsIsolation::OverlayFs) @@ -124,7 +124,7 @@ async fn test_seccomp_cow_create_file() { fs::write(workdir.join("existing.txt"), "hello").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .fs_write(&workdir) .workdir(&workdir) // FsIsolation::None is default → seccomp COW @@ -154,7 +154,7 @@ async fn test_seccomp_cow_abort() { fs::write(workdir.join("existing.txt"), "original").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .fs_write(&workdir) .workdir(&workdir) @@ -191,7 +191,7 @@ async fn test_seccomp_cow_relative_path_abort() { fs::write(workdir.join("orig.txt"), "original\n").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc").fs_read("/dev") .fs_write(&workdir) .workdir(&workdir) @@ -226,7 +226,7 @@ async fn test_seccomp_cow_relative_path_commit() { fs::write(workdir.join("orig.txt"), "original\n").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc").fs_read("/dev") .fs_write(&workdir) .workdir(&workdir) @@ -261,12 +261,13 @@ async fn test_seccomp_cow_relative_path_commit() { /// O_DIRECTORY must resolve to the upper path. Without this fix, /// prepare_open skipped O_DIRECTORY opens and the kernel returned ENOENT. #[tokio::test] +#[cfg_attr(target_arch = "aarch64", ignore = "ARM64 COW directory fd injection needs follow-up")] async fn test_seccomp_cow_open_directory() { let workdir = temp_dir("seccomp-opendir"); let out_file = workdir.join("opendir_ok.txt"); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc").fs_read("/dev") .fs_write(&workdir) .workdir(&workdir) @@ -305,12 +306,13 @@ async fn test_seccomp_cow_open_directory() { /// chdir must be intercepted and redirected to the upper path. Without /// this, the kernel returns ENOENT because it doesn't see the COW directory. #[tokio::test] +#[cfg_attr(target_arch = "aarch64", ignore = "ARM64 COW chdir currently exposes /proc/self/fd cwd")] async fn test_seccomp_cow_chdir_to_created_dir() { let workdir = temp_dir("seccomp-chdir"); let out_file = workdir.join("chdir_ok.txt"); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc").fs_read("/dev") .fs_write(&workdir) .workdir(&workdir) @@ -348,6 +350,7 @@ async fn test_seccomp_cow_chdir_to_created_dir() { /// (path=args[0], flags=args[1], mode=args[2]). This caused COW to miss /// all legacy open() calls, falling through to the kernel. #[tokio::test] +#[cfg_attr(target_arch = "aarch64", ignore = "ARM64 Linux does not provide the legacy SYS_open ABI")] async fn test_seccomp_cow_legacy_open_syscall() { let workdir = temp_dir("seccomp-legacy-open"); let out_file = std::env::temp_dir().join(format!( @@ -355,7 +358,7 @@ async fn test_seccomp_cow_legacy_open_syscall() { )); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc").fs_read("/dev") .fs_write(&workdir).fs_write("/tmp") .workdir(&workdir) @@ -402,6 +405,7 @@ async fn test_seccomp_cow_legacy_open_syscall() { /// Since the file was just copied to upper, the kernel's open() returned /// EEXIST. The fix strips O_EXCL from the supervisor's open flags. #[tokio::test] +#[cfg_attr(target_arch = "aarch64", ignore = "test uses legacy SYS_open, which ARM64 Linux does not provide")] async fn test_seccomp_cow_excl_after_unlink() { let workdir = temp_dir("seccomp-excl-unlink"); let out_file = std::env::temp_dir().join(format!( @@ -410,7 +414,7 @@ async fn test_seccomp_cow_excl_after_unlink() { fs::write(workdir.join("target.txt"), "original").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc").fs_read("/dev") .fs_write(&workdir).fs_write("/tmp") .workdir(&workdir) @@ -458,7 +462,7 @@ async fn test_seccomp_cow_read_existing() { fs::write(workdir.join("data.txt"), "hello world").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc") .fs_write(&workdir) .workdir(&workdir) diff --git a/crates/sandlock-core/tests/integration/test_determinism.rs b/crates/sandlock-core/tests/integration/test_determinism.rs index 34ea5a1..c5be34e 100644 --- a/crates/sandlock-core/tests/integration/test_determinism.rs +++ b/crates/sandlock-core/tests/integration/test_determinism.rs @@ -8,7 +8,7 @@ async fn test_random_seed_deterministic() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -47,7 +47,7 @@ async fn test_random_seed_different_seeds() { let p1 = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/dev") @@ -57,7 +57,7 @@ async fn test_random_seed_different_seeds() { let p2 = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/dev") @@ -87,13 +87,14 @@ async fn test_random_seed_different_seeds() { /// Test that time_start sets frozen time. /// The date command should show a year matching the frozen time. #[tokio::test] +#[cfg_attr(target_arch = "aarch64", ignore = "ARM64 vDSO time patching is planned for stage 4")] async fn test_time_start_frozen() { // Freeze to 2000-06-15T00:00:00Z (mid-year avoids timezone boundary issues) let y2k = SystemTime::UNIX_EPOCH + Duration::from_secs(961027200); let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -114,7 +115,7 @@ async fn test_time_start_basic_commands_work() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .time_start(past) @@ -132,7 +133,7 @@ async fn test_combined_determinism() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -148,11 +149,12 @@ async fn test_combined_determinism() { /// Test that deterministic_dirs produces sorted directory listings. /// Run ls twice — output should match and be sorted. #[tokio::test] +#[cfg_attr(target_arch = "aarch64", ignore = "ARM64 deterministic getdents virtualization needs follow-up")] async fn test_deterministic_dirs() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -192,7 +194,7 @@ async fn test_hostname_virtualization() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .hostname("mybox") diff --git a/crates/sandlock-core/tests/integration/test_dry_run.rs b/crates/sandlock-core/tests/integration/test_dry_run.rs index be9bb41..71d3cd1 100644 --- a/crates/sandlock-core/tests/integration/test_dry_run.rs +++ b/crates/sandlock-core/tests/integration/test_dry_run.rs @@ -15,7 +15,7 @@ async fn test_dry_run_reports_added_file() { fs::write(workdir.join("existing.txt"), "hello").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc").fs_read("/dev") .fs_write(&workdir) .workdir(&workdir) @@ -46,7 +46,7 @@ async fn test_dry_run_reports_modified_file() { fs::write(workdir.join("data.txt"), "original").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc").fs_read("/dev") .fs_write(&workdir) .workdir(&workdir) @@ -77,7 +77,7 @@ async fn test_dry_run_reports_deleted_file() { fs::write(workdir.join("victim.txt"), "delete me").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin").fs_read("/etc") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") .fs_read("/proc").fs_read("/dev") .fs_write(&workdir) .workdir(&workdir) diff --git a/crates/sandlock-core/tests/integration/test_fork.rs b/crates/sandlock-core/tests/integration/test_fork.rs index 8514a7a..33e046f 100644 --- a/crates/sandlock-core/tests/integration/test_fork.rs +++ b/crates/sandlock-core/tests/integration/test_fork.rs @@ -3,7 +3,7 @@ use std::sync::atomic::{AtomicU32, Ordering}; fn base_policy() -> Policy { Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_write("/tmp") .build() diff --git a/crates/sandlock-core/tests/integration/test_http_acl.rs b/crates/sandlock-core/tests/integration/test_http_acl.rs index adea897..4542f66 100644 --- a/crates/sandlock-core/tests/integration/test_http_acl.rs +++ b/crates/sandlock-core/tests/integration/test_http_acl.rs @@ -16,7 +16,7 @@ fn base_policy() -> sandlock_core::PolicyBuilder { Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") diff --git a/crates/sandlock-core/tests/integration/test_landlock.rs b/crates/sandlock-core/tests/integration/test_landlock.rs index 2478490..79bc318 100644 --- a/crates/sandlock-core/tests/integration/test_landlock.rs +++ b/crates/sandlock-core/tests/integration/test_landlock.rs @@ -21,7 +21,7 @@ async fn test_can_read_allowed_path() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -49,7 +49,7 @@ async fn test_cannot_read_outside_allowed() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/proc") .fs_read("/dev") @@ -71,7 +71,7 @@ async fn test_can_write_to_writable_path() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -102,7 +102,7 @@ async fn test_cannot_write_to_readonly_path() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -132,7 +132,7 @@ async fn test_denied_path_blocks_read() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -156,7 +156,7 @@ async fn test_denied_path_blocks_exec() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -233,7 +233,7 @@ async fn test_isolate_ipc() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -287,7 +287,7 @@ async fn test_isolate_signals_blocks_parent() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -338,7 +338,7 @@ async fn test_isolate_signals_allows_self() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") diff --git a/crates/sandlock-core/tests/integration/test_netlink_virt.rs b/crates/sandlock-core/tests/integration/test_netlink_virt.rs index a162780..c721a85 100644 --- a/crates/sandlock-core/tests/integration/test_netlink_virt.rs +++ b/crates/sandlock-core/tests/integration/test_netlink_virt.rs @@ -3,7 +3,7 @@ use sandlock_core::{Policy, Sandbox}; fn base_policy() -> sandlock_core::PolicyBuilder { Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64") .fs_read("/bin").fs_read("/etc").fs_read("/proc") .fs_read("/dev").fs_write("/tmp") } diff --git a/crates/sandlock-core/tests/integration/test_network.rs b/crates/sandlock-core/tests/integration/test_network.rs index f08b33a..29e43f9 100644 --- a/crates/sandlock-core/tests/integration/test_network.rs +++ b/crates/sandlock-core/tests/integration/test_network.rs @@ -8,7 +8,7 @@ fn temp_file(name: &str) -> PathBuf { fn base_policy() -> sandlock_core::PolicyBuilder { Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_write("/tmp") } diff --git a/crates/sandlock-core/tests/integration/test_pipeline.rs b/crates/sandlock-core/tests/integration/test_pipeline.rs index 85a4277..2db49fd 100644 --- a/crates/sandlock-core/tests/integration/test_pipeline.rs +++ b/crates/sandlock-core/tests/integration/test_pipeline.rs @@ -4,7 +4,7 @@ use std::time::Duration; fn base_policy() -> Policy { Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_write("/tmp") .build() @@ -79,7 +79,7 @@ async fn test_disjoint_policies() { // Stage 1: can read the temp dir let reader_policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_read(&tmp) .build() @@ -173,7 +173,7 @@ async fn test_xoa_data_flow() { // Executor: can read workspace let executor_policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_read(&tmp) .build() @@ -245,7 +245,7 @@ async fn test_gather_disjoint_policies() { // Data source: can read the file let data_policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_read(&tmp) .build() diff --git a/crates/sandlock-core/tests/integration/test_policy_fn.rs b/crates/sandlock-core/tests/integration/test_policy_fn.rs index b64f488..2b7960d 100644 --- a/crates/sandlock-core/tests/integration/test_policy_fn.rs +++ b/crates/sandlock-core/tests/integration/test_policy_fn.rs @@ -9,7 +9,7 @@ fn temp_file(name: &str) -> PathBuf { fn base_policy() -> sandlock_core::PolicyBuilder { Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_write("/tmp") } diff --git a/crates/sandlock-core/tests/integration/test_port_remap.rs b/crates/sandlock-core/tests/integration/test_port_remap.rs index 075f16f..c09403d 100644 --- a/crates/sandlock-core/tests/integration/test_port_remap.rs +++ b/crates/sandlock-core/tests/integration/test_port_remap.rs @@ -13,7 +13,7 @@ fn temp_file(name: &str) -> PathBuf { fn base_policy() -> sandlock_core::PolicyBuilder { Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_write("/tmp") } diff --git a/crates/sandlock-core/tests/integration/test_privileged.rs b/crates/sandlock-core/tests/integration/test_privileged.rs index 40ef9cb..4856567 100644 --- a/crates/sandlock-core/tests/integration/test_privileged.rs +++ b/crates/sandlock-core/tests/integration/test_privileged.rs @@ -39,7 +39,7 @@ async fn test_uid_zero() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -64,7 +64,7 @@ async fn test_uid_zero_gid_zero() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -84,7 +84,7 @@ async fn test_no_uid_keeps_real_uid() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -106,7 +106,7 @@ async fn test_uid_zero_echo() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .uid(0) @@ -130,7 +130,7 @@ async fn test_uid_custom() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") diff --git a/crates/sandlock-core/tests/integration/test_procfs.rs b/crates/sandlock-core/tests/integration/test_procfs.rs index 2263533..8909763 100644 --- a/crates/sandlock-core/tests/integration/test_procfs.rs +++ b/crates/sandlock-core/tests/integration/test_procfs.rs @@ -7,7 +7,7 @@ async fn test_num_cpus_virtualization() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -34,7 +34,7 @@ async fn test_meminfo_virtualization() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -59,7 +59,7 @@ async fn test_sensitive_proc_blocked() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -78,7 +78,7 @@ async fn test_no_proc_virt_still_works() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -103,7 +103,7 @@ async fn test_proc_net_tcp_filtered() { drop(listener); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_write("/tmp") .net_bind_port(port) @@ -142,7 +142,7 @@ async fn test_proc_net_tcp_filtered() { #[tokio::test] async fn test_proc_mounts_virtualized() { let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .build() .unwrap(); @@ -161,7 +161,7 @@ async fn test_proc_mounts_virtualized() { #[tokio::test] async fn test_proc_self_mountinfo_virtualized() { let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .build() .unwrap(); @@ -183,7 +183,7 @@ async fn test_proc_parent_pid_blocked() { )); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_write("/tmp") .build() @@ -229,7 +229,7 @@ async fn test_proc_net_tcp_hides_host_ports() { )); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_write("/tmp") .port_remap(true) diff --git a/crates/sandlock-core/tests/integration/test_resource.rs b/crates/sandlock-core/tests/integration/test_resource.rs index e0a4524..5d29f9d 100644 --- a/crates/sandlock-core/tests/integration/test_resource.rs +++ b/crates/sandlock-core/tests/integration/test_resource.rs @@ -10,7 +10,7 @@ fn base_policy() -> sandlock_core::policy::PolicyBuilder { Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") diff --git a/crates/sandlock-core/tests/integration/test_sandbox.rs b/crates/sandlock-core/tests/integration/test_sandbox.rs index 739234a..5fb6e5f 100644 --- a/crates/sandlock-core/tests/integration/test_sandbox.rs +++ b/crates/sandlock-core/tests/integration/test_sandbox.rs @@ -5,7 +5,7 @@ async fn test_echo() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -21,7 +21,7 @@ async fn test_exit_code() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/proc") .build() @@ -36,7 +36,7 @@ async fn test_denied_path() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/proc") .build() @@ -50,7 +50,7 @@ async fn test_denied_syscall() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") @@ -95,7 +95,7 @@ async fn test_default_policy_runs_ls() { let policy = Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/proc") .fs_read("/etc") @@ -117,7 +117,7 @@ async fn test_default_policy_runs_ls() { async fn test_nested_sandbox() { // Outer: allows /etc let outer = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_write("/tmp") .build() @@ -125,7 +125,7 @@ async fn test_nested_sandbox() { // Inner: does NOT allow /etc — run cat /etc/hostname, should fail let inner = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/proc") .build() .unwrap(); @@ -168,10 +168,15 @@ async fn test_nested_sandbox_via_cli() { } }; let bin = sandlock_bin.to_str().unwrap(); + let lib64_arg = if std::path::Path::new("/lib64").exists() { + " -r /lib64" + } else { + "" + }; // Outer allows /etc + sandlock binary; inner does not allow /etc let outer = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64").fs_read("/bin") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin") .fs_read("/etc").fs_read("/proc").fs_read("/dev") .fs_read(sandlock_bin.parent().unwrap()) .fs_write("/tmp") @@ -179,8 +184,8 @@ async fn test_nested_sandbox_via_cli() { .unwrap(); let inner_cmd = format!( - "{} run -r /usr -r /lib -r /lib64 -r /bin -r /proc -- cat /etc/hostname", - bin + "{} run -r /usr -r /lib{} -r /bin -r /proc -- cat /etc/hostname", + bin, lib64_arg ); let result = Sandbox::run_interactive( &outer, &["sh", "-c", &inner_cmd], @@ -189,8 +194,8 @@ async fn test_nested_sandbox_via_cli() { // Inner with /etc allowed — should succeed let inner_cmd = format!( - "{} run -r /usr -r /lib -r /lib64 -r /bin -r /etc -r /proc -- echo nested-ok", - bin + "{} run -r /usr -r /lib{} -r /bin -r /etc -r /proc -- echo nested-ok", + bin, lib64_arg ); let result = Sandbox::run_interactive( &outer, &["sh", "-c", &inner_cmd], @@ -209,7 +214,7 @@ async fn test_denied_path_hardlink_blocked() { std::fs::write(&secret, "TOP_SECRET").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64") .fs_read("/bin").fs_read("/proc").fs_read("/etc") .fs_read(tmp.path()) .fs_write(tmp.path()) @@ -238,7 +243,7 @@ async fn test_denied_path_rename_blocked() { std::fs::write(&secret, "TOP_SECRET").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64") .fs_read("/bin").fs_read("/proc").fs_read("/etc") .fs_read(tmp.path()) .fs_write(tmp.path()) @@ -267,7 +272,7 @@ async fn test_denied_path_symlink_blocked() { std::fs::write(&secret, "TOP_SECRET").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64") .fs_read("/bin").fs_read("/proc").fs_read("/etc") .fs_read(tmp.path()) .fs_write(tmp.path()) @@ -301,7 +306,7 @@ async fn test_denied_path_preexisting_symlink_blocked() { std::os::unix::fs::symlink(&secret, &link).unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64") .fs_read("/bin").fs_read("/proc").fs_read("/etc") .fs_read(tmp.path()) .fs_deny(&secret) @@ -330,7 +335,7 @@ async fn test_denied_path_chained_symlinks_blocked() { std::os::unix::fs::symlink("link1", &link2).unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64") .fs_read("/bin").fs_read("/proc").fs_read("/etc") .fs_read(tmp.path()) .fs_deny(&secret) @@ -353,7 +358,7 @@ async fn test_denied_path_allows_normal_writes() { std::fs::write(&secret, "TOP_SECRET").unwrap(); let policy = Policy::builder() - .fs_read("/usr").fs_read("/lib").fs_read("/lib64") + .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64") .fs_read("/bin").fs_read("/proc").fs_read("/etc") .fs_read(tmp.path()) .fs_write(tmp.path()) diff --git a/crates/sandlock-core/tests/integration/test_seccomp_enforce.rs b/crates/sandlock-core/tests/integration/test_seccomp_enforce.rs index 30f970c..108ac58 100644 --- a/crates/sandlock-core/tests/integration/test_seccomp_enforce.rs +++ b/crates/sandlock-core/tests/integration/test_seccomp_enforce.rs @@ -7,7 +7,7 @@ fn base_policy() -> sandlock_core::PolicyBuilder { Policy::builder() .fs_read("/usr") .fs_read("/lib") - .fs_read("/lib64") + .fs_read_if_exists("/lib64") .fs_read("/bin") .fs_read("/etc") .fs_read("/proc") diff --git a/python/tests/test_checkpoint.py b/python/tests/test_checkpoint.py index 215597d..b3e166c 100644 --- a/python/tests/test_checkpoint.py +++ b/python/tests/test_checkpoint.py @@ -2,6 +2,7 @@ """Tests for sandlock.Checkpoint (save_fn / restore_fn / persistence).""" import json +import platform import sys import pytest @@ -10,6 +11,12 @@ from sandlock._sdk import _lib, _make_argv +pytestmark = pytest.mark.skipif( + platform.machine() == "aarch64", + reason="ARM64 checkpoint register capture is planned for stage 4", +) + + _PYTHON_READABLE = list(dict.fromkeys([ "/usr", "/lib", "/lib64", "/bin", "/etc", "/proc", "/dev", sys.prefix, diff --git a/python/tests/test_chroot_legacy_syscalls.py b/python/tests/test_chroot_legacy_syscalls.py index a43aa3d..180e960 100644 --- a/python/tests/test_chroot_legacy_syscalls.py +++ b/python/tests/test_chroot_legacy_syscalls.py @@ -8,6 +8,7 @@ """ import os +import platform import shutil from pathlib import Path @@ -16,6 +17,12 @@ from sandlock import Policy, Sandbox +pytestmark = pytest.mark.skipif( + platform.machine() == "aarch64", + reason="ARM64 Linux does not provide legacy non-*at path syscalls", +) + + # ── helpers ────────────────────────────────────────────────────── _HELPER_BIN = Path(__file__).resolve().parent.parent.parent / "tests" / "rootfs-helper" diff --git a/python/tests/test_pipeline.py b/python/tests/test_pipeline.py index c7ea829..9eb4659 100644 --- a/python/tests/test_pipeline.py +++ b/python/tests/test_pipeline.py @@ -12,7 +12,7 @@ # --- Helpers --- -_PYTHON_PREFIX = os.path.dirname(os.path.dirname(os.path.realpath(sys.executable))) +_PYTHON_PREFIX = sys.prefix def _policy(**overrides): """Minimal policy for testing.""" diff --git a/python/tests/test_sandbox.py b/python/tests/test_sandbox.py index b44652b..4885370 100644 --- a/python/tests/test_sandbox.py +++ b/python/tests/test_sandbox.py @@ -3,6 +3,7 @@ import json import os +import platform import socket import sys import threading @@ -386,6 +387,10 @@ def test_dry_run_returns_structured_result(self, tmp_path): class TestNewPolicyFields: """Tests for newly wired FFI policy fields.""" + @pytest.mark.skipif( + platform.machine() == "aarch64", + reason="ARM64 vDSO time patching is planned for stage 4", + ) def test_time_start(self): from datetime import datetime, timezone # Freeze time to 2000-06-15 From d42d3409430a93874dff9f8018c37daf9c7ebf38 Mon Sep 17 00:00:00 2001 From: gokwok <531504879@qq.com> Date: Wed, 22 Apr 2026 11:04:17 +0800 Subject: [PATCH 06/11] fix: support arm64 checkpoint and deterministic time Capture arm64 registers through PTRACE_GETREGSET, patch arm64 vDSO time helpers, and stabilize deterministic getdents caching for parity tests. Signed-off-by: gokwok <531504879@qq.com> --- crates/sandlock-core/src/checkpoint.rs | 42 ++++++-- crates/sandlock-core/src/procfs.rs | 47 +++++--- crates/sandlock-core/src/seccomp/state.rs | 4 +- crates/sandlock-core/src/vdso.rs | 100 ++++++++++++++++-- .../tests/integration/test_checkpoint.rs | 5 - .../tests/integration/test_determinism.rs | 30 ++++-- python/tests/test_checkpoint.py | 7 -- python/tests/test_sandbox.py | 5 - 8 files changed, 178 insertions(+), 62 deletions(-) diff --git a/crates/sandlock-core/src/checkpoint.rs b/crates/sandlock-core/src/checkpoint.rs index ac29489..5e573e3 100644 --- a/crates/sandlock-core/src/checkpoint.rs +++ b/crates/sandlock-core/src/checkpoint.rs @@ -102,21 +102,47 @@ fn ptrace_detach(pid: i32) -> io::Result<()> { fn ptrace_getregs(pid: i32) -> io::Result> { #[cfg(target_arch = "x86_64")] { - // user_regs_struct is 27 u64 fields on x86_64 (216 bytes) - let mut regs = vec![0u64; 27]; - let ret = unsafe { libc::ptrace(libc::PTRACE_GETREGS, pid, 0, regs.as_mut_ptr()) }; - if ret < 0 { - return Err(io::Error::last_os_error()); + // user_regs_struct is 27 u64 fields on x86_64 (216 bytes) + let mut regs = vec![0u64; 27]; + let ret = unsafe { libc::ptrace(libc::PTRACE_GETREGS, pid, 0, regs.as_mut_ptr()) }; + if ret < 0 { + return Err(io::Error::last_os_error()); + } + Ok(regs) } - Ok(regs) + + #[cfg(target_arch = "aarch64")] + { + // Linux arm64 exposes general-purpose registers through + // PTRACE_GETREGSET/NT_PRSTATUS. user_pt_regs is: + // x0-x30, sp, pc, pstate (34 u64 values). + const NT_PRSTATUS: libc::c_int = 1; + let mut regs = vec![0u64; 34]; + let mut iov = libc::iovec { + iov_base: regs.as_mut_ptr() as *mut libc::c_void, + iov_len: regs.len() * std::mem::size_of::(), + }; + let ret = unsafe { + libc::ptrace( + libc::PTRACE_GETREGSET, + pid, + NT_PRSTATUS as usize as *mut libc::c_void, + &mut iov as *mut libc::iovec as *mut libc::c_void, + ) + }; + if ret < 0 { + return Err(io::Error::last_os_error()); + } + regs.truncate(iov.iov_len / std::mem::size_of::()); + Ok(regs) } - #[cfg(not(target_arch = "x86_64"))] + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] { let _ = pid; Err(io::Error::new( io::ErrorKind::Unsupported, - "checkpoint register capture is only implemented on x86_64", + "checkpoint register capture is not implemented on this architecture", )) } } diff --git a/crates/sandlock-core/src/procfs.rs b/crates/sandlock-core/src/procfs.rs index ea62f42..ffbbffa 100644 --- a/crates/sandlock-core/src/procfs.rs +++ b/crates/sandlock-core/src/procfs.rs @@ -592,24 +592,40 @@ pub(crate) async fn handle_sorted_getdents( let buf_addr = notif.data.args[1]; let buf_size = (notif.data.args[2] & 0xFFFF_FFFF) as usize; - let cache_key = (pid as i32, child_fd); + let link_path = format!("/proc/{}/fd/{}", pid, child_fd); + let dir_path = match std::fs::read_link(&link_path) { + Ok(t) => t, + Err(_) => return NotifAction::Continue, + }; + let cache_key = ( + pid as i32, + child_fd, + dir_path.to_string_lossy().into_owned(), + ); let mut pfs = procfs.lock().await; - // Build and cache sorted entries on first call for this (pid, fd) pair. - // An empty Vec means "already fully consumed" — return 0 (EOF). + // Build and cache sorted entries on first call for this open directory. + // Remove an empty cache on EOF so later fd reuse can rebuild entries. if !pfs.getdents_cache.contains_key(&cache_key) { - let link_path = format!("/proc/{}/fd/{}", pid, child_fd); - let dir_path = match std::fs::read_link(&link_path) { - Ok(t) => t, - Err(_) => return NotifAction::Continue, - }; - let dir = match std::fs::read_dir(&dir_path) { Ok(d) => d, Err(_) => return NotifAction::Continue, }; - let mut names: Vec<_> = dir + let mut names: Vec<_> = Vec::new(); + { + use std::os::unix::fs::MetadataExt; + let dot_ino = std::fs::symlink_metadata(&dir_path).map(|m| m.ino()).unwrap_or(0); + let dotdot_ino = dir_path + .parent() + .and_then(|p| std::fs::symlink_metadata(p).ok()) + .map(|m| m.ino()) + .unwrap_or(dot_ino); + names.push((".".to_string(), DT_DIR, dot_ino)); + names.push(("..".to_string(), DT_DIR, dotdot_ino)); + } + + names.extend(dir .filter_map(|e| e.ok()) .map(|e| { let name = e.file_name().to_string_lossy().into_owned(); @@ -623,8 +639,7 @@ pub(crate) async fn handle_sorted_getdents( e.metadata().map(|m| m.st_ino()).unwrap_or(0) }; (name, d_type, d_ino) - }) - .collect(); + })); names.sort_by(|a, b| a.0.cmp(&b.0)); @@ -636,7 +651,7 @@ pub(crate) async fn handle_sorted_getdents( }) .collect(); - pfs.getdents_cache.insert(cache_key, entries); + pfs.getdents_cache.insert(cache_key.clone(), entries); } let entries = match pfs.getdents_cache.get_mut(&cache_key) { @@ -646,6 +661,7 @@ pub(crate) async fn handle_sorted_getdents( // Empty cache = already fully drained on a prior call → return 0 (EOF). if entries.is_empty() { + pfs.getdents_cache.remove(&cache_key); return NotifAction::ReturnValue(0); } @@ -770,13 +786,13 @@ pub(crate) async fn handle_getdents( return NotifAction::Continue; } - let cache_key = (pid as i32, child_fd); + let cache_key = (pid as i32, child_fd, target.to_string_lossy().into_owned()); let mut pfs = procfs.lock().await; // Build and cache entries on first call for this (pid, fd) pair. if !pfs.getdents_cache.contains_key(&cache_key) { let entries = build_filtered_dirents(&pfs.proc_pids); - pfs.getdents_cache.insert(cache_key, entries); + pfs.getdents_cache.insert(cache_key.clone(), entries); } let entries = match pfs.getdents_cache.get_mut(&cache_key) { @@ -797,6 +813,7 @@ pub(crate) async fn handle_getdents( // Empty cache = already fully drained on a prior call → return 0 (EOF). if entries.is_empty() { + pfs.getdents_cache.remove(&cache_key); return NotifAction::ReturnValue(0); } diff --git a/crates/sandlock-core/src/seccomp/state.rs b/crates/sandlock-core/src/seccomp/state.rs index ed46c74..8b41166 100644 --- a/crates/sandlock-core/src/seccomp/state.rs +++ b/crates/sandlock-core/src/seccomp/state.rs @@ -50,9 +50,9 @@ impl ResourceState { pub struct ProcfsState { /// PIDs belonging to the sandbox (for /proc PID filtering). pub proc_pids: HashSet, - /// Cache of filtered dirent entries keyed by (pid, fd). + /// Cache of filtered dirent entries keyed by (pid, fd, directory target). /// Populated on first getdents64 call for a /proc directory, drained on subsequent calls. - pub getdents_cache: HashMap<(i32, u32), Vec>>, + pub getdents_cache: HashMap<(i32, u32, String), Vec>>, /// Base address of the last vDSO we patched (0 = not yet patched). pub vdso_patched_addr: u64, } diff --git a/crates/sandlock-core/src/vdso.rs b/crates/sandlock-core/src/vdso.rs index 473074a..a9fb709 100644 --- a/crates/sandlock-core/src/vdso.rs +++ b/crates/sandlock-core/src/vdso.rs @@ -59,7 +59,31 @@ fn parse_vdso_symbols(vdso_bytes: &[u8]) -> HashMap { symbols } +#[cfg(target_arch = "aarch64")] +fn push_insn(stub: &mut Vec, insn: u32) { + stub.extend_from_slice(&insn.to_le_bytes()); +} + +#[cfg(target_arch = "aarch64")] +fn movz_x(reg: u32, imm16: u16, shift: u32) -> u32 { + 0xD280_0000 | (((shift / 16) & 0x3) << 21) | ((imm16 as u32) << 5) | reg +} + +#[cfg(target_arch = "aarch64")] +fn movk_x(reg: u32, imm16: u16, shift: u32) -> u32 { + 0xF280_0000 | (((shift / 16) & 0x3) << 21) | ((imm16 as u32) << 5) | reg +} + +#[cfg(target_arch = "aarch64")] +fn load_imm64(stub: &mut Vec, reg: u32, value: u64) { + push_insn(stub, movz_x(reg, (value & 0xffff) as u16, 0)); + push_insn(stub, movk_x(reg, ((value >> 16) & 0xffff) as u16, 16)); + push_insn(stub, movk_x(reg, ((value >> 32) & 0xffff) as u16, 32)); + push_insn(stub, movk_x(reg, ((value >> 48) & 0xffff) as u16, 48)); +} + /// Generate a simple stub that forces a real syscall (replacing the vDSO fast path). +#[cfg(target_arch = "x86_64")] /// Layout: mov eax, imm32 / syscall / ret — 8 bytes total. fn simple_stub(syscall_nr: u32) -> Vec { let mut stub = Vec::new(); @@ -70,9 +94,19 @@ fn simple_stub(syscall_nr: u32) -> Vec { stub // 8 bytes total } +#[cfg(target_arch = "aarch64")] +fn simple_stub(syscall_nr: u32) -> Vec { + let mut stub = Vec::new(); + push_insn(&mut stub, movz_x(8, syscall_nr as u16, 0)); // mov x8, syscall_nr + push_insn(&mut stub, 0xD400_0001); // svc #0 + push_insn(&mut stub, 0xD65F_03C0); // ret + stub +} + /// Generate an offset stub for clock_gettime that forces a real syscall, /// then adds a time offset to the result for CLOCK_REALTIME and CLOCK_REALTIME_COARSE. /// +#[cfg(target_arch = "x86_64")] /// Layout (x86-64): /// push rdi / push rsi /// mov eax, 228 / syscall ; do the real syscall @@ -109,8 +143,28 @@ fn offset_stub_clock_gettime(offset_secs: i64) -> Vec { stub } +#[cfg(target_arch = "aarch64")] +fn offset_stub_clock_gettime(offset_secs: i64) -> Vec { + let mut stub = Vec::new(); + push_insn(&mut stub, 0xAA00_03E9); // mov x9, x0 (clock id) + push_insn(&mut stub, 0xAA01_03EA); // mov x10, x1 (timespec*) + push_insn(&mut stub, movz_x(8, libc::SYS_clock_gettime as u16, 0)); + push_insn(&mut stub, 0xD400_0001); // svc #0 + push_insn(&mut stub, 0x7100_013F); // cmp w9, #0 (CLOCK_REALTIME) + push_insn(&mut stub, 0x5400_0060); // b.eq +3 instructions + push_insn(&mut stub, 0x7100_153F); // cmp w9, #5 (CLOCK_REALTIME_COARSE) + push_insn(&mut stub, 0x5400_0101); // b.ne +8 instructions, to ret + load_imm64(&mut stub, 11, offset_secs as u64); // x11 = offset + push_insn(&mut stub, 0xF940_014C); // ldr x12, [x10] + push_insn(&mut stub, 0x8B0B_018C); // add x12, x12, x11 + push_insn(&mut stub, 0xF900_014C); // str x12, [x10] + push_insn(&mut stub, 0xD65F_03C0); // ret + stub +} + /// Generate an offset stub for gettimeofday that forces a real syscall, /// then adds a time offset to tv_sec. +#[cfg(target_arch = "x86_64")] fn offset_stub_gettimeofday(offset_secs: i64) -> Vec { let mut stub = Vec::new(); stub.extend_from_slice(&[0x57, 0x56]); // push rdi, push rsi @@ -124,6 +178,38 @@ fn offset_stub_gettimeofday(offset_secs: i64) -> Vec { stub } +#[cfg(target_arch = "aarch64")] +fn offset_stub_gettimeofday(offset_secs: i64) -> Vec { + let mut stub = Vec::new(); + push_insn(&mut stub, 0xAA00_03EA); // mov x10, x0 (timeval*) + push_insn(&mut stub, movz_x(8, libc::SYS_gettimeofday as u16, 0)); + push_insn(&mut stub, 0xD400_0001); // svc #0 + push_insn(&mut stub, 0xB400_010A); // cbz x10, +8 instructions, to ret + load_imm64(&mut stub, 11, offset_secs as u64); // x11 = offset + push_insn(&mut stub, 0xF940_014C); // ldr x12, [x10] + push_insn(&mut stub, 0x8B0B_018C); // add x12, x12, x11 + push_insn(&mut stub, 0xF900_014C); // str x12, [x10] + push_insn(&mut stub, 0xD65F_03C0); // ret + stub +} + +#[cfg(target_arch = "x86_64")] +fn vdso_targets() -> Vec<(&'static str, &'static str, u32)> { + vec![ + ("clock_gettime", "__vdso_clock_gettime", libc::SYS_clock_gettime as u32), + ("gettimeofday", "__vdso_gettimeofday", libc::SYS_gettimeofday as u32), + ("time", "__vdso_time", libc::SYS_time as u32), + ] +} + +#[cfg(target_arch = "aarch64")] +fn vdso_targets() -> Vec<(&'static str, &'static str, u32)> { + vec![ + ("clock_gettime", "__kernel_clock_gettime", libc::SYS_clock_gettime as u32), + ("gettimeofday", "__kernel_gettimeofday", libc::SYS_gettimeofday as u32), + ] +} + /// Patch the vDSO of a target process to force real syscalls (interceptable by seccomp). /// If `time_offset_secs` is provided, clock_gettime and gettimeofday stubs will add /// the offset to the returned time. @@ -149,19 +235,13 @@ pub(crate) fn patch( SandlockError::MemoryProtect(format!("failed to open /proc/{}/mem: {}", pid, e)) })?; - let targets = [ - ("clock_gettime", "__vdso_clock_gettime", 228u32), - ("gettimeofday", "__vdso_gettimeofday", 96u32), - ("time", "__vdso_time", 201u32), - ]; - - for (name, alt_name, syscall_nr) in &targets { - if let Some(&offset) = symbols.get(*name).or_else(|| symbols.get(*alt_name)) { + for (name, alt_name, syscall_nr) in vdso_targets() { + if let Some(&offset) = symbols.get(name).or_else(|| symbols.get(alt_name)) { let addr = base + offset; - let stub = match (time_offset_secs, *name) { + let stub = match (time_offset_secs, name) { (Some(off), "clock_gettime") => offset_stub_clock_gettime(off), (Some(off), "gettimeofday") => offset_stub_gettimeofday(off), - _ => simple_stub(*syscall_nr), + _ => simple_stub(syscall_nr), }; mem.seek(SeekFrom::Start(addr)).map_err(|e| { SandlockError::MemoryProtect(format!( diff --git a/crates/sandlock-core/tests/integration/test_checkpoint.rs b/crates/sandlock-core/tests/integration/test_checkpoint.rs index 5844759..328a3b0 100644 --- a/crates/sandlock-core/tests/integration/test_checkpoint.rs +++ b/crates/sandlock-core/tests/integration/test_checkpoint.rs @@ -2,7 +2,6 @@ use sandlock_core::{Policy, Sandbox, Checkpoint}; /// Test that checkpoint save/load roundtrips correctly. #[tokio::test] -#[cfg_attr(target_arch = "aarch64", ignore = "ARM64 checkpoint register capture is planned for stage 4")] async fn test_checkpoint_save_load() { let policy = Policy::builder() .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") @@ -52,7 +51,6 @@ async fn test_checkpoint_save_load() { /// Test that checkpoint captures memory maps correctly. #[tokio::test] -#[cfg_attr(target_arch = "aarch64", ignore = "ARM64 checkpoint register capture is planned for stage 4")] async fn test_checkpoint_memory_maps() { let policy = Policy::builder() .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") @@ -80,7 +78,6 @@ async fn test_checkpoint_memory_maps() { /// Test that app_state round-trips through save/load. #[tokio::test] -#[cfg_attr(target_arch = "aarch64", ignore = "ARM64 checkpoint register capture is planned for stage 4")] async fn test_checkpoint_app_state_roundtrip() { let policy = Policy::builder() .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") @@ -114,7 +111,6 @@ async fn test_checkpoint_app_state_roundtrip() { /// Test that checkpoint without app_state doesn't create app_state.bin. #[tokio::test] -#[cfg_attr(target_arch = "aarch64", ignore = "ARM64 checkpoint register capture is planned for stage 4")] async fn test_checkpoint_no_app_state_file() { let policy = Policy::builder() .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") @@ -142,7 +138,6 @@ async fn test_checkpoint_no_app_state_file() { /// Test that process info (pid, cwd, exe) is captured correctly. #[tokio::test] -#[cfg_attr(target_arch = "aarch64", ignore = "ARM64 checkpoint register capture is planned for stage 4")] async fn test_checkpoint_process_info() { let policy = Policy::builder() .fs_read("/usr").fs_read("/lib").fs_read_if_exists("/lib64").fs_read("/bin").fs_read("/etc") diff --git a/crates/sandlock-core/tests/integration/test_determinism.rs b/crates/sandlock-core/tests/integration/test_determinism.rs index c5be34e..11d165b 100644 --- a/crates/sandlock-core/tests/integration/test_determinism.rs +++ b/crates/sandlock-core/tests/integration/test_determinism.rs @@ -87,7 +87,6 @@ async fn test_random_seed_different_seeds() { /// Test that time_start sets frozen time. /// The date command should show a year matching the frozen time. #[tokio::test] -#[cfg_attr(target_arch = "aarch64", ignore = "ARM64 vDSO time patching is planned for stage 4")] async fn test_time_start_frozen() { // Freeze to 2000-06-15T00:00:00Z (mid-year avoids timezone boundary issues) let y2k = SystemTime::UNIX_EPOCH + Duration::from_secs(961027200); @@ -147,9 +146,8 @@ async fn test_combined_determinism() { } /// Test that deterministic_dirs produces sorted directory listings. -/// Run ls twice — output should match and be sorted. +/// Run directory iteration twice — output should match and be sorted. #[tokio::test] -#[cfg_attr(target_arch = "aarch64", ignore = "ARM64 deterministic getdents virtualization needs follow-up")] async fn test_deterministic_dirs() { let policy = Policy::builder() .fs_read("/usr") @@ -162,11 +160,22 @@ async fn test_deterministic_dirs() { .build() .unwrap(); - // Use ls -f -1 to preserve raw getdents order (no re-sorting by ls). - let r1 = Sandbox::run(&policy, &["ls", "-f", "-1", "/etc"]).await.unwrap(); - let r2 = Sandbox::run(&policy, &["ls", "-f", "-1", "/etc"]).await.unwrap(); - assert!(r1.success(), "First ls failed"); - assert!(r2.success(), "Second ls failed"); + // Read directory entries without userland sorting so the assertion covers + // the sandbox's getdents virtualization. Some minimal ls implementations + // do not support `-f`, so avoid depending on ls option support here. + let scan = "python3 - <<'PY'\nimport os\nprint('\\n'.join(e.name for e in os.scandir('/etc')))\nPY"; + let r1 = Sandbox::run(&policy, &["sh", "-c", scan]).await.unwrap(); + let r2 = Sandbox::run(&policy, &["sh", "-c", scan]).await.unwrap(); + assert!( + r1.success(), + "First directory scan failed: {}", + String::from_utf8_lossy(r1.stderr.as_deref().unwrap_or_default()) + ); + assert!( + r2.success(), + "Second directory scan failed: {}", + String::from_utf8_lossy(r2.stderr.as_deref().unwrap_or_default()) + ); let out1 = String::from_utf8_lossy(r1.stdout.as_deref().unwrap_or_default()); let out2 = String::from_utf8_lossy(r2.stdout.as_deref().unwrap_or_default()); @@ -176,10 +185,11 @@ async fn test_deterministic_dirs() { ); assert_eq!( out1, out2, - "Two ls -f runs should produce identical output with deterministic_dirs" + "Two directory scans should produce identical output with deterministic_dirs" ); - // Verify the output is actually sorted (skip . and .. entries from ls -f). + // Verify the output is actually sorted (skip dot entries when the runtime + // exposes them). let lines: Vec<&str> = out1.lines() .filter(|l| *l != "." && *l != "..") .collect(); diff --git a/python/tests/test_checkpoint.py b/python/tests/test_checkpoint.py index b3e166c..215597d 100644 --- a/python/tests/test_checkpoint.py +++ b/python/tests/test_checkpoint.py @@ -2,7 +2,6 @@ """Tests for sandlock.Checkpoint (save_fn / restore_fn / persistence).""" import json -import platform import sys import pytest @@ -11,12 +10,6 @@ from sandlock._sdk import _lib, _make_argv -pytestmark = pytest.mark.skipif( - platform.machine() == "aarch64", - reason="ARM64 checkpoint register capture is planned for stage 4", -) - - _PYTHON_READABLE = list(dict.fromkeys([ "/usr", "/lib", "/lib64", "/bin", "/etc", "/proc", "/dev", sys.prefix, diff --git a/python/tests/test_sandbox.py b/python/tests/test_sandbox.py index 4885370..b44652b 100644 --- a/python/tests/test_sandbox.py +++ b/python/tests/test_sandbox.py @@ -3,7 +3,6 @@ import json import os -import platform import socket import sys import threading @@ -387,10 +386,6 @@ def test_dry_run_returns_structured_result(self, tmp_path): class TestNewPolicyFields: """Tests for newly wired FFI policy fields.""" - @pytest.mark.skipif( - platform.machine() == "aarch64", - reason="ARM64 vDSO time patching is planned for stage 4", - ) def test_time_start(self): from datetime import datetime, timezone # Freeze time to 2000-06-15 From b0f7aeb9d12b80fbed7ce46de40b24eae1b6e528 Mon Sep 17 00:00:00 2001 From: gokwok <531504879@qq.com> Date: Wed, 22 Apr 2026 11:04:17 +0800 Subject: [PATCH 07/11] fix: complete seccomp COW path handling on arm64 Use native stat layouts, normalize virtual COW paths, virtualize getcwd after COW-only chdir, and merge directory reads for upper-layer fds. Signed-off-by: gokwok <531504879@qq.com> --- crates/sandlock-core/src/context.rs | 1 + crates/sandlock-core/src/cow/dispatch.rs | 309 ++++++++++++------ crates/sandlock-core/src/seccomp/dispatch.rs | 8 + crates/sandlock-core/src/seccomp/state.rs | 3 + .../tests/integration/test_cow.rs | 38 ++- 5 files changed, 250 insertions(+), 109 deletions(-) diff --git a/crates/sandlock-core/src/context.rs b/crates/sandlock-core/src/context.rs index 21937e8..83f643f 100644 --- a/crates/sandlock-core/src/context.rs +++ b/crates/sandlock-core/src/context.rs @@ -323,6 +323,7 @@ pub fn notif_syscalls(policy: &Policy) -> Vec { libc::SYS_readlinkat as u32, libc::SYS_getdents64 as u32, libc::SYS_chdir as u32, + libc::SYS_getcwd as u32, ]); for nr in [ arch::SYS_OPEN, arch::SYS_UNLINK, arch::SYS_RMDIR, arch::SYS_MKDIR, diff --git a/crates/sandlock-core/src/cow/dispatch.rs b/crates/sandlock-core/src/cow/dispatch.rs index 5ed7861..f22fe2a 100644 --- a/crates/sandlock-core/src/cow/dispatch.rs +++ b/crates/sandlock-core/src/cow/dispatch.rs @@ -4,6 +4,7 @@ //! and injects results (fds, stat structs, readlink strings, dirents) back. use std::os::unix::io::{FromRawFd, OwnedFd, RawFd}; +use std::path::{Component, Path, PathBuf}; use std::sync::Arc; use tokio::sync::Mutex; @@ -42,23 +43,49 @@ fn read_path(notif: &SeccompNotif, addr: u64, notif_fd: RawFd) -> Option /// Resolve a path that may be relative to a dirfd. /// For AT_FDCWD (-100), returns the path as-is (assumed absolute or cwd-relative). /// For other dirfds, reads /proc/{pid}/fd/{dirfd} to get the base path. -fn resolve_at_path(notif: &SeccompNotif, dirfd: i64, path: &str) -> String { - if std::path::Path::new(path).is_absolute() { - return path.to_string(); +fn normalize_path(path: PathBuf) -> PathBuf { + let mut out = PathBuf::new(); + for component in path.components() { + match component { + Component::Prefix(prefix) => out.push(prefix.as_os_str()), + Component::RootDir => out.push(Path::new("/")), + Component::CurDir => {} + Component::ParentDir => { + out.pop(); + } + Component::Normal(part) => out.push(part), + } + } + out +} + +fn resolve_at_path_with_virtual( + notif: &SeccompNotif, + dirfd: i64, + path: &str, + virtual_cwd: Option<&str>, +) -> String { + if Path::new(path).is_absolute() { + return normalize_path(PathBuf::from(path)).to_string_lossy().into_owned(); } // dirfd is stored as u64 in seccomp_data.args but AT_FDCWD is a negative i32. // Truncate to i32 for correct sign comparison. let dirfd32 = dirfd as i32; if dirfd32 == libc::AT_FDCWD { + if let Some(cwd) = virtual_cwd { + return normalize_path(Path::new(cwd).join(path)) + .to_string_lossy() + .into_owned(); + } // Relative to cwd — read /proc/{pid}/cwd if let Ok(cwd) = std::fs::read_link(format!("/proc/{}/cwd", notif.pid)) { - return format!("{}/{}", cwd.display(), path); + return normalize_path(cwd.join(path)).to_string_lossy().into_owned(); } return path.to_string(); } // Relative to dirfd if let Ok(base) = std::fs::read_link(format!("/proc/{}/fd/{}", notif.pid, dirfd)) { - format!("{}/{}", base.display(), path) + normalize_path(base.join(path)).to_string_lossy().into_owned() } else { path.to_string() } @@ -91,7 +118,13 @@ pub(crate) async fn handle_cow_open( Some(p) => p, None => return NotifAction::Continue, }; - let path = resolve_at_path(notif, dirfd, &rel_path); + let virtual_cwd = if (dirfd as i32) == libc::AT_FDCWD && !Path::new(&rel_path).is_absolute() { + let st = cow_state.lock().await; + st.virtual_cwds.get(&(notif.pid as i32)).cloned() + } else { + None + }; + let path = resolve_at_path_with_virtual(notif, dirfd, &rel_path, virtual_cwd.as_deref()); // Phase 1: determine plan under lock (no heavy I/O) let plan = { @@ -197,89 +230,114 @@ fn read_resolved( path_arg: usize, dirfd_arg: Option, notif_fd: RawFd, + virtual_cwd: Option<&str>, ) -> Option { let raw = read_path(notif, notif.data.args[path_arg], notif_fd)?; match dirfd_arg { - Some(i) => Some(resolve_at_path(notif, notif.data.args[i] as i64, &raw)), - None => Some(raw), + Some(i) => Some(resolve_at_path_with_virtual( + notif, + notif.data.args[i] as i64, + &raw, + virtual_cwd, + )), + None => Some(resolve_at_path_with_virtual( + notif, + libc::AT_FDCWD as i64, + &raw, + virtual_cwd, + )), } } /// Parse the syscall into a CowWriteOp, reading and resolving paths from child memory. -fn parse_cow_write(notif: &SeccompNotif, notif_fd: RawFd) -> Option { +fn parse_cow_write( + notif: &SeccompNotif, + notif_fd: RawFd, + virtual_cwd: Option<&str>, +) -> Option { let nr = notif.data.nr as i64; // *at variants (dirfd in args[0], path in args[1]) if nr == libc::SYS_unlinkat { - let path = read_resolved(notif, 1, Some(0), notif_fd)?; + let path = read_resolved(notif, 1, Some(0), notif_fd, virtual_cwd)?; let is_dir = (notif.data.args[2] & libc::AT_REMOVEDIR as u64) != 0; return Some(CowWriteOp::Unlink { path, is_dir }); } if nr == libc::SYS_mkdirat { - return Some(CowWriteOp::Mkdir { path: read_resolved(notif, 1, Some(0), notif_fd)? }); + return Some(CowWriteOp::Mkdir { + path: read_resolved(notif, 1, Some(0), notif_fd, virtual_cwd)?, + }); } if nr == libc::SYS_renameat2 { - let old_path = read_resolved(notif, 1, Some(0), notif_fd)?; - let new_path = read_resolved(notif, 3, Some(2), notif_fd)?; + let old_path = read_resolved(notif, 1, Some(0), notif_fd, virtual_cwd)?; + let new_path = read_resolved(notif, 3, Some(2), notif_fd, virtual_cwd)?; return Some(CowWriteOp::Rename { old_path, new_path }); } if nr == libc::SYS_symlinkat { // symlinkat(target, newdirfd, linkpath): target is raw, linkpath is resolved let target = read_path(notif, notif.data.args[0], notif_fd)?; - let linkpath = read_resolved(notif, 2, Some(1), notif_fd)?; + let linkpath = read_resolved(notif, 2, Some(1), notif_fd, virtual_cwd)?; return Some(CowWriteOp::Symlink { target, linkpath }); } if nr == libc::SYS_linkat { - let old_path = read_resolved(notif, 1, Some(0), notif_fd)?; - let new_path = read_resolved(notif, 3, Some(2), notif_fd)?; + let old_path = read_resolved(notif, 1, Some(0), notif_fd, virtual_cwd)?; + let new_path = read_resolved(notif, 3, Some(2), notif_fd, virtual_cwd)?; return Some(CowWriteOp::Link { old_path, new_path }); } if nr == libc::SYS_fchmodat { - let path = read_resolved(notif, 1, Some(0), notif_fd)?; + let path = read_resolved(notif, 1, Some(0), notif_fd, virtual_cwd)?; return Some(CowWriteOp::Chmod { path, mode: (notif.data.args[2] & 0o7777) as u32 }); } if nr == libc::SYS_fchownat { - let path = read_resolved(notif, 1, Some(0), notif_fd)?; + let path = read_resolved(notif, 1, Some(0), notif_fd, virtual_cwd)?; return Some(CowWriteOp::Chown { path, uid: notif.data.args[2] as u32, gid: notif.data.args[3] as u32 }); } // Legacy variants (path in args[0], no dirfd) if Some(nr) == arch::SYS_UNLINK { - return Some(CowWriteOp::Unlink { path: read_resolved(notif, 0, None, notif_fd)?, is_dir: false }); + return Some(CowWriteOp::Unlink { + path: read_resolved(notif, 0, None, notif_fd, virtual_cwd)?, + is_dir: false, + }); } if Some(nr) == arch::SYS_RMDIR { - return Some(CowWriteOp::Unlink { path: read_resolved(notif, 0, None, notif_fd)?, is_dir: true }); + return Some(CowWriteOp::Unlink { + path: read_resolved(notif, 0, None, notif_fd, virtual_cwd)?, + is_dir: true, + }); } if Some(nr) == arch::SYS_MKDIR { - return Some(CowWriteOp::Mkdir { path: read_resolved(notif, 0, None, notif_fd)? }); + return Some(CowWriteOp::Mkdir { + path: read_resolved(notif, 0, None, notif_fd, virtual_cwd)?, + }); } if Some(nr) == arch::SYS_RENAME { - let old_path = read_resolved(notif, 0, None, notif_fd)?; - let new_path = read_resolved(notif, 1, None, notif_fd)?; + let old_path = read_resolved(notif, 0, None, notif_fd, virtual_cwd)?; + let new_path = read_resolved(notif, 1, None, notif_fd, virtual_cwd)?; return Some(CowWriteOp::Rename { old_path, new_path }); } if Some(nr) == arch::SYS_SYMLINK { let target = read_path(notif, notif.data.args[0], notif_fd)?; - let linkpath = read_resolved(notif, 1, None, notif_fd)?; + let linkpath = read_resolved(notif, 1, None, notif_fd, virtual_cwd)?; return Some(CowWriteOp::Symlink { target, linkpath }); } if Some(nr) == arch::SYS_LINK { - let old_path = read_resolved(notif, 0, None, notif_fd)?; - let new_path = read_resolved(notif, 1, None, notif_fd)?; + let old_path = read_resolved(notif, 0, None, notif_fd, virtual_cwd)?; + let new_path = read_resolved(notif, 1, None, notif_fd, virtual_cwd)?; return Some(CowWriteOp::Link { old_path, new_path }); } if Some(nr) == arch::SYS_CHMOD { - let path = read_resolved(notif, 0, None, notif_fd)?; + let path = read_resolved(notif, 0, None, notif_fd, virtual_cwd)?; return Some(CowWriteOp::Chmod { path, mode: (notif.data.args[1] & 0o7777) as u32 }); } if Some(nr) == arch::SYS_CHOWN || Some(nr) == arch::SYS_LCHOWN { - let path = read_resolved(notif, 0, None, notif_fd)?; + let path = read_resolved(notif, 0, None, notif_fd, virtual_cwd)?; return Some(CowWriteOp::Chown { path, uid: notif.data.args[1] as u32, gid: notif.data.args[2] as u32 }); } // truncate (legacy only, path in args[0]) if nr == libc::SYS_truncate { - let path = read_resolved(notif, 0, None, notif_fd)?; + let path = read_resolved(notif, 0, None, notif_fd, virtual_cwd)?; return Some(CowWriteOp::Truncate { path, length: notif.data.args[1] as i64 }); } @@ -365,7 +423,11 @@ pub(crate) async fn handle_cow_write( cow_state: &Arc>, notif_fd: RawFd, ) -> NotifAction { - let op = match parse_cow_write(notif, notif_fd) { + let virtual_cwd = { + let st = cow_state.lock().await; + st.virtual_cwds.get(&(notif.pid as i32)).cloned() + }; + let op = match parse_cow_write(notif, notif_fd, virtual_cwd.as_deref()) { Some(op) => op, None => return NotifAction::Continue, }; @@ -462,15 +524,28 @@ pub(crate) async fn handle_cow_access( // access(pathname, mode): args[0]=path, args[1]=mode // faccessat(dirfd, pathname, mode, flags): args[0]=dirfd, args[1]=path, args[2]=mode let (path, mode) = if Some(nr) == arch::SYS_ACCESS { + let virtual_cwd = { + let st = cow_state.lock().await; + st.virtual_cwds.get(&(notif.pid as i32)).cloned() + }; let p = match read_path(notif, notif.data.args[0], notif_fd) { - Some(p) => p, + Some(p) => resolve_at_path_with_virtual( + notif, + libc::AT_FDCWD as i64, + &p, + virtual_cwd.as_deref(), + ), None => return NotifAction::Continue, }; (p, notif.data.args[1] as i32) } else { let dirfd = notif.data.args[0] as i64; + let virtual_cwd = { + let st = cow_state.lock().await; + st.virtual_cwds.get(&(notif.pid as i32)).cloned() + }; let p = match read_path(notif, notif.data.args[1], notif_fd) { - Some(p) => resolve_at_path(notif, dirfd, &p), + Some(p) => resolve_at_path_with_virtual(notif, dirfd, &p, virtual_cwd.as_deref()), None => return NotifAction::Continue, }; (p, notif.data.args[2] as i32) @@ -521,8 +596,12 @@ pub(crate) async fn handle_cow_utimensat( return NotifAction::Continue; } + let virtual_cwd = { + let st = cow_state.lock().await; + st.virtual_cwds.get(&(notif.pid as i32)).cloned() + }; let path = match read_path(notif, path_ptr, notif_fd) { - Some(p) => resolve_at_path(notif, dirfd, &p), + Some(p) => resolve_at_path_with_virtual(notif, dirfd, &p, virtual_cwd.as_deref()), None => return NotifAction::Continue, }; @@ -587,8 +666,12 @@ pub(crate) async fn handle_cow_stat( // newfstatat(dirfd, pathname, statbuf, flags) // faccessat(dirfd, pathname, mode, flags) let dirfd = notif.data.args[0] as i64; + let virtual_cwd = { + let st = cow_state.lock().await; + st.virtual_cwds.get(&(notif.pid as i32)).cloned() + }; let path = match read_path(notif, notif.data.args[1], notif_fd) { - Some(p) => resolve_at_path(notif, dirfd, &p), + Some(p) => resolve_at_path_with_virtual(notif, dirfd, &p, virtual_cwd.as_deref()), None => return NotifAction::Continue, }; @@ -618,52 +701,30 @@ pub(crate) async fn handle_cow_stat( return NotifAction::Errno(libc::ENOENT); } - // newfstatat — stat the resolved path and write to child's buffer + // newfstatat — stat the resolved path and write the native libc layout + // back to the child. Do not hand-pack struct stat; its layout is + // architecture-specific. let statbuf_addr = notif.data.args[2]; - let flags = notif.data.args[3]; - let follow = (flags & libc::AT_SYMLINK_NOFOLLOW as u64) == 0; + let flags = (notif.data.args[3] & 0xFFFF_FFFF) as i32; + let c_path = match std::ffi::CString::new(real_path.to_str().unwrap_or("")) { + Ok(c) => c, + Err(_) => return NotifAction::Continue, + }; + let mut statbuf: libc::stat = unsafe { std::mem::zeroed() }; + if unsafe { libc::fstatat(libc::AT_FDCWD, c_path.as_ptr(), &mut statbuf, flags) } < 0 { + let errno = std::io::Error::last_os_error() + .raw_os_error() + .unwrap_or(libc::EIO); + return NotifAction::Errno(errno); + } + let buf = unsafe { + std::slice::from_raw_parts( + &statbuf as *const libc::stat as *const u8, + std::mem::size_of::(), + ) + }; - let meta = if follow { - std::fs::metadata(&real_path) - } else { - std::fs::symlink_metadata(&real_path) - }; - - let meta = match meta { - Ok(m) => m, - Err(_) => return NotifAction::Errno(libc::ENOENT), - }; - - // Pack struct stat (x86_64 layout, 144 bytes) - use std::os::unix::fs::MetadataExt; - let mut buf = vec![0u8; 144]; - // struct stat { st_dev(8), st_ino(8), st_nlink(8), st_mode(4), st_uid(4), st_gid(4), __pad0(4), - // st_rdev(8), st_size(8), st_blksize(8), st_blocks(8), - // st_atime(8), st_atime_nsec(8), st_mtime(8), st_mtime_nsec(8), - // st_ctime(8), st_ctime_nsec(8), __unused[3](24) } - let mut off = 0; - macro_rules! pack_u64 { ($v:expr) => { buf[off..off+8].copy_from_slice(&($v as u64).to_ne_bytes()); off += 8; } } - macro_rules! pack_u32 { ($v:expr) => { buf[off..off+4].copy_from_slice(&($v as u32).to_ne_bytes()); off += 4; } } - pack_u64!(meta.dev()); - pack_u64!(meta.ino()); - pack_u64!(meta.nlink()); - pack_u32!(meta.mode()); - pack_u32!(meta.uid()); - pack_u32!(meta.gid()); - pack_u32!(0u32); // __pad0 - pack_u64!(meta.rdev()); - pack_u64!(meta.size() as u64); - pack_u64!(meta.blksize()); - pack_u64!(meta.blocks() as u64); - pack_u64!(meta.atime() as u64); - pack_u64!(meta.atime_nsec() as u64); - pack_u64!(meta.mtime() as u64); - pack_u64!(meta.mtime_nsec() as u64); - pack_u64!(meta.ctime() as u64); - pack_u64!(meta.ctime_nsec() as u64); - let _ = off; - - if write_child_mem(notif_fd, notif.id, notif.pid, statbuf_addr, &buf).is_err() { + if write_child_mem(notif_fd, notif.id, notif.pid, statbuf_addr, buf).is_err() { return NotifAction::Continue; } @@ -678,8 +739,12 @@ pub(crate) async fn handle_cow_statx( ) -> NotifAction { // statx(dirfd, pathname, flags, mask, statxbuf) let dirfd = notif.data.args[0] as i64; + let virtual_cwd = { + let st = cow_state.lock().await; + st.virtual_cwds.get(&(notif.pid as i32)).cloned() + }; let path = match read_path(notif, notif.data.args[1], notif_fd) { - Some(p) => resolve_at_path(notif, dirfd, &p), + Some(p) => resolve_at_path_with_virtual(notif, dirfd, &p, virtual_cwd.as_deref()), None => return NotifAction::Continue, }; @@ -707,8 +772,12 @@ pub(crate) async fn handle_cow_readlink( ) -> NotifAction { // readlinkat(dirfd, pathname, buf, bufsiz) let dirfd = notif.data.args[0] as i64; + let virtual_cwd = { + let st = cow_state.lock().await; + st.virtual_cwds.get(&(notif.pid as i32)).cloned() + }; let path = match read_path(notif, notif.data.args[1], notif_fd) { - Some(p) => resolve_at_path(notif, dirfd, &p), + Some(p) => resolve_at_path_with_virtual(notif, dirfd, &p, virtual_cwd.as_deref()), None => return NotifAction::Continue, }; let buf_addr = notif.data.args[2]; @@ -765,10 +834,24 @@ pub(crate) async fn handle_cow_getdents( None => return NotifAction::Continue, }; - if !cow.has_changes() || !cow.matches(&target) { + if !cow.has_changes() { return NotifAction::Continue; } + let target_path = Path::new(&target); + let rel_path = if cow.matches(&target) { + cow.safe_rel(&target).unwrap_or_else(|| ".".to_string()) + } else if let Ok(rel) = target_path.strip_prefix(cow.upper_dir()) { + let rel = rel.to_string_lossy(); + if rel.is_empty() { + ".".to_string() + } else { + rel.into_owned() + } + } else { + return NotifAction::Continue; + }; + // Build cache on first call; invalidate if fd was reused for a different dir. let cache_key = (pid as i32, child_fd); if let Some((cached_target, entries)) = st.dir_cache.get(&cache_key) { @@ -783,7 +866,6 @@ pub(crate) async fn handle_cow_getdents( } if !st.dir_cache.contains_key(&cache_key) { let cow = st.branch.as_ref().unwrap(); - let rel_path = cow.safe_rel(&target).unwrap_or_else(|| ".".to_string()); let merged = cow.list_merged_dir(&rel_path); let upper_dir = cow.upper_dir().join(&rel_path); @@ -867,17 +949,14 @@ pub(crate) async fn handle_cow_chdir( }; let orig_path_buf_len = path.len() + 1; // NUL-terminated size in child memory - // Resolve relative paths against the process's cwd. - let abs_path = if std::path::Path::new(&path).is_absolute() { - path - } else { - match std::fs::read_link(format!("/proc/{}/cwd", notif.pid)) { - Ok(cwd) => cwd.join(&path).to_string_lossy().into_owned(), - Err(_) => return NotifAction::Continue, - } - }; - let st = cow_state.lock().await; + let virtual_cwd = st.virtual_cwds.get(&(notif.pid as i32)).cloned(); + let abs_path = resolve_at_path_with_virtual( + notif, + libc::AT_FDCWD as i64, + &path, + virtual_cwd.as_deref(), + ); let cow = match st.branch.as_ref() { Some(c) => c, None => return NotifAction::Continue, @@ -950,5 +1029,51 @@ pub(crate) async fn handle_cow_chdir( return NotifAction::Errno(libc::EFAULT); } + let mut st = cow_state.lock().await; + st.virtual_cwds.insert(notif.pid as i32, abs_path); + NotifAction::Continue } + +/// Handle getcwd after chdir into a COW-only directory. +pub(crate) async fn handle_cow_getcwd( + notif: &SeccompNotif, + cow_state: &Arc>, + notif_fd: RawFd, +) -> NotifAction { + let buf_addr = notif.data.args[0]; + let buf_size = (notif.data.args[1] & 0xFFFF_FFFF) as usize; + + let st = cow_state.lock().await; + let cow = match st.branch.as_ref() { + Some(c) => c, + None => return NotifAction::Continue, + }; + + let virtual_cwd = if let Some(cwd) = st.virtual_cwds.get(&(notif.pid as i32)) { + cwd.clone() + } else { + let cwd = match std::fs::read_link(format!("/proc/{}/cwd", notif.pid)) { + Ok(c) => c, + Err(_) => return NotifAction::Continue, + }; + match cwd.strip_prefix(cow.upper_dir()) { + Ok(rel) => cow.workdir().join(rel).to_string_lossy().into_owned(), + Err(_) => return NotifAction::Continue, + } + }; + drop(st); + + let cwd_bytes = virtual_cwd.as_bytes(); + if cwd_bytes.len() + 1 > buf_size { + return NotifAction::Errno(libc::ERANGE); + } + + let mut write_buf = cwd_bytes.to_vec(); + write_buf.push(0); + + if write_child_mem(notif_fd, notif.id, notif.pid, buf_addr, &write_buf).is_err() { + return NotifAction::Continue; + } + NotifAction::ReturnValue(write_buf.len() as i64) +} diff --git a/crates/sandlock-core/src/seccomp/dispatch.rs b/crates/sandlock-core/src/seccomp/dispatch.rs index c5598f9..8e1479f 100644 --- a/crates/sandlock-core/src/seccomp/dispatch.rs +++ b/crates/sandlock-core/src/seccomp/dispatch.rs @@ -714,4 +714,12 @@ fn register_cow_handlers(table: &mut DispatchTable) { crate::cow::dispatch::handle_cow_chdir(¬if, &cow, notif_fd).await }) })); + + // getcwd — return logical workdir path after chdir into a COW-only dir + table.register(libc::SYS_getcwd, Box::new(|notif, ctx, notif_fd| { + let cow = Arc::clone(&ctx.cow); + Box::pin(async move { + crate::cow::dispatch::handle_cow_getcwd(¬if, &cow, notif_fd).await + }) + })); } diff --git a/crates/sandlock-core/src/seccomp/state.rs b/crates/sandlock-core/src/seccomp/state.rs index 8b41166..40ba92c 100644 --- a/crates/sandlock-core/src/seccomp/state.rs +++ b/crates/sandlock-core/src/seccomp/state.rs @@ -78,6 +78,8 @@ pub struct CowState { /// Getdents cache for COW directories. /// Value is (host_path, entries) to detect fd reuse and invalidate stale entries. pub dir_cache: HashMap<(i32, u32), (String, Vec>)>, + /// Logical cwd for processes that chdir into COW-only directories. + pub virtual_cwds: HashMap, } impl CowState { @@ -85,6 +87,7 @@ impl CowState { Self { branch: None, dir_cache: HashMap::new(), + virtual_cwds: HashMap::new(), } } } diff --git a/crates/sandlock-core/tests/integration/test_cow.rs b/crates/sandlock-core/tests/integration/test_cow.rs index 7d63b11..2ff4740 100644 --- a/crates/sandlock-core/tests/integration/test_cow.rs +++ b/crates/sandlock-core/tests/integration/test_cow.rs @@ -261,7 +261,6 @@ async fn test_seccomp_cow_relative_path_commit() { /// O_DIRECTORY must resolve to the upper path. Without this fix, /// prepare_open skipped O_DIRECTORY opens and the kernel returned ENOENT. #[tokio::test] -#[cfg_attr(target_arch = "aarch64", ignore = "ARM64 COW directory fd injection needs follow-up")] async fn test_seccomp_cow_open_directory() { let workdir = temp_dir("seccomp-opendir"); let out_file = workdir.join("opendir_ok.txt"); @@ -306,7 +305,6 @@ async fn test_seccomp_cow_open_directory() { /// chdir must be intercepted and redirected to the upper path. Without /// this, the kernel returns ENOENT because it doesn't see the COW directory. #[tokio::test] -#[cfg_attr(target_arch = "aarch64", ignore = "ARM64 COW chdir currently exposes /proc/self/fd cwd")] async fn test_seccomp_cow_chdir_to_created_dir() { let workdir = temp_dir("seccomp-chdir"); let out_file = workdir.join("chdir_ok.txt"); @@ -322,8 +320,9 @@ async fn test_seccomp_cow_chdir_to_created_dir() { .unwrap(); // mkdir creates the dir in COW upper only; cd must see it via interception. + // Use physical pwd so the assertion covers getcwd virtualization. let script = format!( - "mkdir -p subdir/deep && cd subdir/deep && pwd > {}", + "mkdir -p subdir/deep && cd subdir/deep && pwd -P > {}", out_file.display() ); let result = Sandbox::run(&policy, &["sh", "-c", &script]).await; @@ -343,14 +342,14 @@ async fn test_seccomp_cow_chdir_to_created_dir() { let _ = fs::remove_dir_all(&workdir); } -/// Test that the legacy open() syscall works correctly with COW. +/// Test that the raw open syscall ABI works correctly with COW. /// /// Regression test: handle_cow_open always read args in openat() layout /// (dirfd=args[0], path=args[1], flags=args[2]), but open() uses /// (path=args[0], flags=args[1], mode=args[2]). This caused COW to miss -/// all legacy open() calls, falling through to the kernel. +/// all legacy open() calls on x86_64, falling through to the kernel. ARM64 +/// does not provide SYS_open, so it uses the equivalent raw openat ABI. #[tokio::test] -#[cfg_attr(target_arch = "aarch64", ignore = "ARM64 Linux does not provide the legacy SYS_open ABI")] async fn test_seccomp_cow_legacy_open_syscall() { let workdir = temp_dir("seccomp-legacy-open"); let out_file = std::env::temp_dir().join(format!( @@ -367,18 +366,21 @@ async fn test_seccomp_cow_legacy_open_syscall() { .build() .unwrap(); - // Use raw SYS_open syscall (not openat) to create a file, then verify - // it's visible during the run but discarded on abort. + // Use raw syscall ABI to create a file, then verify it's visible during + // the run but discarded on abort. x86_64 uses legacy SYS_open; ARM64 uses + // the equivalent openat(AT_FDCWD, ...) ABI. let script = format!(concat!( - "import ctypes, os\n", + "import ctypes, os, platform\n", "libc = ctypes.CDLL('libc.so.6', use_errno=True)\n", - "SYS_open = 2\n", "O_WRONLY = 1; O_CREAT = 64; O_TRUNC = 512\n", "path = b'{wd}/newfile.txt'\n", - "fd = libc.syscall(SYS_open, path, O_WRONLY | O_CREAT | O_TRUNC, 0o644)\n", + "if platform.machine() == 'aarch64':\n", + " fd = libc.syscall(56, -100, path, O_WRONLY | O_CREAT | O_TRUNC, 0o644)\n", + "else:\n", + " fd = libc.syscall(2, path, O_WRONLY | O_CREAT | O_TRUNC, 0o644)\n", "err = ctypes.get_errno()\n", "if fd >= 0:\n", - " os.write(fd, b'created via SYS_open')\n", + " os.write(fd, b'created via raw open')\n", " os.close(fd)\n", " content = open('{wd}/newfile.txt').read()\n", " open('{out}', 'w').write(content)\n", @@ -389,7 +391,7 @@ async fn test_seccomp_cow_legacy_open_syscall() { let result = Sandbox::run(&policy, &["python3", "-c", &script]).await.unwrap(); assert!(result.success(), "exit={:?}, stderr={}", result.code(), result.stderr_str().unwrap_or("")); let content = fs::read_to_string(&out_file).unwrap_or_default(); - assert_eq!(content, "created via SYS_open", "SYS_open should work with COW"); + assert_eq!(content, "created via raw open", "raw open ABI should work with COW"); // After abort, the file should not exist on the real filesystem assert!(!workdir.join("newfile.txt").exists(), "newfile.txt should not exist after abort"); @@ -405,7 +407,6 @@ async fn test_seccomp_cow_legacy_open_syscall() { /// Since the file was just copied to upper, the kernel's open() returned /// EEXIST. The fix strips O_EXCL from the supervisor's open flags. #[tokio::test] -#[cfg_attr(target_arch = "aarch64", ignore = "test uses legacy SYS_open, which ARM64 Linux does not provide")] async fn test_seccomp_cow_excl_after_unlink() { let workdir = temp_dir("seccomp-excl-unlink"); let out_file = std::env::temp_dir().join(format!( @@ -423,9 +424,9 @@ async fn test_seccomp_cow_excl_after_unlink() { .build() .unwrap(); - // Unlink the file, then recreate it with O_CREAT|O_EXCL via SYS_open + // Unlink the file, then recreate it with O_CREAT|O_EXCL via raw open ABI. let script = format!(concat!( - "import ctypes, os\n", + "import ctypes, os, platform\n", "libc = ctypes.CDLL('libc.so.6', use_errno=True)\n", "path = b'{wd}/target.txt'\n", "ret = libc.unlink(path)\n", @@ -433,7 +434,10 @@ async fn test_seccomp_cow_excl_after_unlink() { " open('{out}', 'w').write(f'UNLINK_FAILED:{{ctypes.get_errno()}}')\n", " raise SystemExit(1)\n", "O_WRONLY = 1; O_CREAT = 64; O_EXCL = 128\n", - "fd = libc.syscall(2, path, O_WRONLY | O_CREAT | O_EXCL, 0o644)\n", + "if platform.machine() == 'aarch64':\n", + " fd = libc.syscall(56, -100, path, O_WRONLY | O_CREAT | O_EXCL, 0o644)\n", + "else:\n", + " fd = libc.syscall(2, path, O_WRONLY | O_CREAT | O_EXCL, 0o644)\n", "err = ctypes.get_errno()\n", "if fd >= 0:\n", " os.write(fd, b'recreated')\n", From 53cc6a89967d388d5e4960a066cdd6b8f56f8401 Mon Sep 17 00:00:00 2001 From: gokwok <531504879@qq.com> Date: Wed, 22 Apr 2026 11:04:17 +0800 Subject: [PATCH 08/11] test: exercise arm64 raw path syscall equivalents Run the chroot raw path syscall coverage on arm64 by mapping legacy helper commands to equivalent *at syscalls where the legacy ABI is absent. Signed-off-by: gokwok <531504879@qq.com> --- python/tests/test_chroot_legacy_syscalls.py | 14 +- tests/rootfs-helper.c | 143 ++++++++++++++++++-- 2 files changed, 137 insertions(+), 20 deletions(-) diff --git a/python/tests/test_chroot_legacy_syscalls.py b/python/tests/test_chroot_legacy_syscalls.py index 180e960..a60b912 100644 --- a/python/tests/test_chroot_legacy_syscalls.py +++ b/python/tests/test_chroot_legacy_syscalls.py @@ -1,14 +1,14 @@ # SPDX-License-Identifier: Apache-2.0 -"""Tests for chroot interception of legacy (non-*at) syscalls. +"""Tests for chroot interception of raw path syscall ABIs. musl libc uses stat/lstat/open/access/readlink instead of their *at variants (newfstatat/openat/etc.). These tests invoke the legacy -syscalls via the rootfs-helper binary to verify the chroot dispatcher -handles them correctly. +syscalls via the rootfs-helper binary on architectures that provide +them. On ARM64 the helper uses equivalent raw *at syscalls, because +Linux ARM64 does not expose the legacy non-*at path syscall ABI. """ import os -import platform import shutil from pathlib import Path @@ -17,12 +17,6 @@ from sandlock import Policy, Sandbox -pytestmark = pytest.mark.skipif( - platform.machine() == "aarch64", - reason="ARM64 Linux does not provide legacy non-*at path syscalls", -) - - # ── helpers ────────────────────────────────────────────────────── _HELPER_BIN = Path(__file__).resolve().parent.parent.parent / "tests" / "rootfs-helper" diff --git a/tests/rootfs-helper.c b/tests/rootfs-helper.c index 8151b19..68e5135 100644 --- a/tests/rootfs-helper.c +++ b/tests/rootfs-helper.c @@ -366,11 +366,138 @@ static int cmd_legacy_chmod(int argc, char **argv) { return 0; } #else -static int cmd_legacy_unsupported(int argc, char **argv) { - (void)argc; - (void)argv; - printf("ERR %d\n", ENOSYS); - return 1; +static int cmd_legacy_stat(int argc, char **argv) { + if (argc < 1) return 1; + struct stat st; + long ret = syscall(SYS_newfstatat, AT_FDCWD, argv[0], &st, 0); + if (ret < 0) { + printf("ERR %d\n", errno); + return 1; + } + printf("OK size=%ld mode=%o\n", (long)st.st_size, st.st_mode & 07777); + return 0; +} + +static int cmd_legacy_lstat(int argc, char **argv) { + if (argc < 1) return 1; + struct stat st; + long ret = syscall(SYS_newfstatat, AT_FDCWD, argv[0], &st, AT_SYMLINK_NOFOLLOW); + if (ret < 0) { + printf("ERR %d\n", errno); + return 1; + } + printf("OK size=%ld mode=%o type=%s\n", (long)st.st_size, st.st_mode & 07777, + S_ISDIR(st.st_mode) ? "dir" : S_ISLNK(st.st_mode) ? "link" : "file"); + return 0; +} + +static int cmd_legacy_open(int argc, char **argv) { + if (argc < 1) return 1; + int fd = (int)syscall(SYS_openat, AT_FDCWD, argv[0], O_RDONLY); + if (fd < 0) { + printf("ERR %d\n", errno); + return 1; + } + char buf[4096]; + ssize_t n = read(fd, buf, sizeof(buf)); + close(fd); + printf("OK "); + if (n > 0) { + write(STDOUT_FILENO, buf, n); + } + return 0; +} + +static int cmd_legacy_access(int argc, char **argv) { + if (argc < 1) return 1; + long ret = syscall(SYS_faccessat, AT_FDCWD, argv[0], F_OK, 0); + if (ret < 0) { + printf("ERR %d\n", errno); + return 1; + } + printf("OK\n"); + return 0; +} + +static int cmd_legacy_readlink(int argc, char **argv) { + if (argc < 1) return 1; + char buf[4096]; + long n = syscall(SYS_readlinkat, AT_FDCWD, argv[0], buf, sizeof(buf) - 1); + if (n < 0) { + printf("ERR %d\n", errno); + return 1; + } + buf[n] = '\0'; + printf("OK %s\n", buf); + return 0; +} + +static int cmd_legacy_mkdir(int argc, char **argv) { + if (argc < 1) return 1; + long ret = syscall(SYS_mkdirat, AT_FDCWD, argv[0], 0755); + if (ret < 0) { + printf("ERR %d\n", errno); + return 1; + } + printf("OK\n"); + return 0; +} + +static int cmd_legacy_rmdir(int argc, char **argv) { + if (argc < 1) return 1; + long ret = syscall(SYS_unlinkat, AT_FDCWD, argv[0], AT_REMOVEDIR); + if (ret < 0) { + printf("ERR %d\n", errno); + return 1; + } + printf("OK\n"); + return 0; +} + +static int cmd_legacy_unlink(int argc, char **argv) { + if (argc < 1) return 1; + long ret = syscall(SYS_unlinkat, AT_FDCWD, argv[0], 0); + if (ret < 0) { + printf("ERR %d\n", errno); + return 1; + } + printf("OK\n"); + return 0; +} + +static int cmd_legacy_rename(int argc, char **argv) { + if (argc < 2) return 1; + long ret = syscall(SYS_renameat2, AT_FDCWD, argv[0], AT_FDCWD, argv[1], 0); + if (ret < 0) { + printf("ERR %d\n", errno); + return 1; + } + printf("OK\n"); + return 0; +} + +static int cmd_legacy_symlink(int argc, char **argv) { + if (argc < 2) return 1; + long ret = syscall(SYS_symlinkat, argv[0], AT_FDCWD, argv[1]); + if (ret < 0) { + printf("ERR %d\n", errno); + return 1; + } + printf("OK\n"); + return 0; +} + +static int cmd_legacy_chmod(int argc, char **argv) { + if (argc < 2) return 1; + unsigned mode; + if (sscanf(argv[0], "%o", &mode) != 1) return 1; + long ret = syscall(SYS_fchmodat, AT_FDCWD, argv[1], mode, 0); + if (ret < 0) { + printf("ERR %d\n", errno); + return 1; + } + printf("OK\n"); + return 0; } #endif @@ -401,8 +528,7 @@ static int dispatch(const char *cmd, int argc, char **argv) { if (strcmp(cmd, "true") == 0) return 0; if (strcmp(cmd, "false") == 0) return 1; - /* Legacy syscall variants */ -#ifdef HAVE_LEGACY_PATH_SYSCALLS + /* Legacy syscall variants on x86_64; equivalent raw *at ABI elsewhere. */ if (strcmp(cmd, "legacy-stat") == 0) return cmd_legacy_stat(argc, argv); if (strcmp(cmd, "legacy-lstat") == 0) return cmd_legacy_lstat(argc, argv); if (strcmp(cmd, "legacy-open") == 0) return cmd_legacy_open(argc, argv); @@ -414,9 +540,6 @@ static int dispatch(const char *cmd, int argc, char **argv) { if (strcmp(cmd, "legacy-rename") == 0) return cmd_legacy_rename(argc, argv); if (strcmp(cmd, "legacy-symlink") == 0) return cmd_legacy_symlink(argc, argv); if (strcmp(cmd, "legacy-chmod") == 0) return cmd_legacy_chmod(argc, argv); -#else - if (strncmp(cmd, "legacy-", 7) == 0) return cmd_legacy_unsupported(argc, argv); -#endif fprintf(stderr, "rootfs-helper: unknown command '%s'\n", cmd); return 127; From fc76050521ee38effdb16e03ce5bcaa021a3c968 Mon Sep 17 00:00:00 2001 From: gokwok <531504879@qq.com> Date: Wed, 22 Apr 2026 11:04:17 +0800 Subject: [PATCH 09/11] test: stabilize python warning-free run Replace deprecated asyncio loop usage, close test files explicitly, and avoid an extra shell fork in the gather pipeline test. Signed-off-by: gokwok <531504879@qq.com> --- python/tests/test_mcp_integration.py | 2 +- python/tests/test_pipeline.py | 12 ++++++++++-- python/tests/test_policy_fn.py | 6 ++++-- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/python/tests/test_mcp_integration.py b/python/tests/test_mcp_integration.py index e9a9fbc..fa00acf 100644 --- a/python/tests/test_mcp_integration.py +++ b/python/tests/test_mcp_integration.py @@ -129,7 +129,7 @@ def test_write_then_read(self, tmp_path): class TestMcpSandboxLocalTools: def _run(self, coro): - return asyncio.get_event_loop().run_until_complete(coro) + return asyncio.run(coro) def test_read_only_by_default(self, tmp_path): workspace = str(tmp_path) diff --git a/python/tests/test_pipeline.py b/python/tests/test_pipeline.py index 9eb4659..43c1615 100644 --- a/python/tests/test_pipeline.py +++ b/python/tests/test_pipeline.py @@ -315,10 +315,18 @@ def test_gather_disjoint_policies(self): result = ( Sandbox(data_policy).cmd(["cat", secret]).as_("data") + Sandbox(code_policy).cmd( - ["echo", "tr a-z A-Z <&3"] + ["echo", "upper"] ).as_("code") | Sandbox(consumer_policy).cmd( - ["sh", "-c", 'eval "$(cat)"'] + [sys.executable, "-c", + "import os, sys\n" + "code = sys.stdin.read().strip()\n" + "with os.fdopen(3) as data_fd:\n" + " data = data_fd.read()\n" + "if code == 'upper':\n" + " sys.stdout.write(data.upper())\n" + "else:\n" + " sys.stdout.write(data)\n"] ) ).run() assert result.success, f"stderr={result.stderr}" diff --git a/python/tests/test_policy_fn.py b/python/tests/test_policy_fn.py index f091e5e..0b84238 100644 --- a/python/tests/test_policy_fn.py +++ b/python/tests/test_policy_fn.py @@ -189,7 +189,8 @@ def on_event(event, ctx): f"s.close()\n" ]) assert result.success - content = open(out).read() + with open(out) as f: + content = f.read() assert content == "ERR:13", f"expected EACCES (13), got: {content}" os.unlink(out) @@ -243,7 +244,8 @@ def on_event(event, ctx): ] ) assert result.success - content = open(out).read() + with open(out) as f: + content = f.read() assert content == "BLOCKED", f"expected BLOCKED, got: {content}" os.unlink(out) From 73526553ea6fbabe438418f01495f4491369a9d2 Mon Sep 17 00:00:00 2001 From: gokwok <531504879@qq.com> Date: Wed, 22 Apr 2026 18:50:46 +0800 Subject: [PATCH 10/11] fix: map COW upper dirfds to workdir paths Signed-off-by: gokwok <531504879@qq.com> --- crates/sandlock-core/src/cow/dispatch.rs | 45 +++++++++++++++++-- .../tests/integration/test_cow.rs | 12 ++++- 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/crates/sandlock-core/src/cow/dispatch.rs b/crates/sandlock-core/src/cow/dispatch.rs index f22fe2a..ea24349 100644 --- a/crates/sandlock-core/src/cow/dispatch.rs +++ b/crates/sandlock-core/src/cow/dispatch.rs @@ -10,6 +10,7 @@ use std::sync::Arc; use tokio::sync::Mutex; use crate::arch; +use crate::cow::seccomp::SeccompCowBranch; use crate::procfs::{build_dirent64, DT_DIR, DT_LNK, DT_REG}; use crate::seccomp::notif::{read_child_mem, write_child_mem, NotifAction}; use crate::seccomp::state::CowState; @@ -91,6 +92,14 @@ fn resolve_at_path_with_virtual( } } +fn map_cow_upper_path(cow: &SeccompCowBranch, path: &str) -> String { + let path = PathBuf::from(path); + if let Ok(rel) = path.strip_prefix(cow.upper_dir()) { + return normalize_path(cow.workdir().join(rel)).to_string_lossy().into_owned(); + } + normalize_path(path).to_string_lossy().into_owned() +} + // ============================================================ // openat handler // ============================================================ @@ -124,7 +133,7 @@ pub(crate) async fn handle_cow_open( } else { None }; - let path = resolve_at_path_with_virtual(notif, dirfd, &rel_path, virtual_cwd.as_deref()); + let mut path = resolve_at_path_with_virtual(notif, dirfd, &rel_path, virtual_cwd.as_deref()); // Phase 1: determine plan under lock (no heavy I/O) let plan = { @@ -134,6 +143,7 @@ pub(crate) async fn handle_cow_open( None => return NotifAction::Continue, }; + path = map_cow_upper_path(cow, &path); if !cow.matches(&path) { return NotifAction::Continue; } @@ -223,6 +233,28 @@ enum CowWriteOp { Truncate { path: String, length: i64 }, } +impl CowWriteOp { + fn remap_upper_paths(&mut self, cow: &SeccompCowBranch) { + match self { + CowWriteOp::Unlink { path, .. } + | CowWriteOp::Mkdir { path } + | CowWriteOp::Chmod { path, .. } + | CowWriteOp::Chown { path, .. } + | CowWriteOp::Truncate { path, .. } => { + *path = map_cow_upper_path(cow, path); + } + CowWriteOp::Rename { old_path, new_path } + | CowWriteOp::Link { old_path, new_path } => { + *old_path = map_cow_upper_path(cow, old_path); + *new_path = map_cow_upper_path(cow, new_path); + } + CowWriteOp::Symlink { linkpath, .. } => { + *linkpath = map_cow_upper_path(cow, linkpath); + } + } + } +} + /// Read and resolve a path argument. For *at syscalls, pass the dirfd arg index; /// for legacy syscalls, pass None to use the raw path. fn read_resolved( @@ -427,7 +459,7 @@ pub(crate) async fn handle_cow_write( let st = cow_state.lock().await; st.virtual_cwds.get(&(notif.pid as i32)).cloned() }; - let op = match parse_cow_write(notif, notif_fd, virtual_cwd.as_deref()) { + let mut op = match parse_cow_write(notif, notif_fd, virtual_cwd.as_deref()) { Some(op) => op, None => return NotifAction::Continue, }; @@ -440,6 +472,7 @@ pub(crate) async fn handle_cow_write( None => return NotifAction::Continue, }; + op.remap_upper_paths(cow); match cow_copy_rel(&op, cow) { Some((_match_path, ref rel)) => { match cow.prepare_copy(rel) { @@ -562,6 +595,7 @@ pub(crate) async fn handle_cow_access( None => return NotifAction::Continue, }; + let path = map_cow_upper_path(cow, &path); if !cow.matches(&path) { return NotifAction::Continue; } @@ -569,7 +603,7 @@ pub(crate) async fn handle_cow_access( // Path is under workdir and W_OK was requested — writes will be // redirected to the COW upper layer, so report success. // Check the path actually exists on the real filesystem. - if std::path::Path::new(&path).exists() { + if std::path::Path::new(&path).exists() || cow.handle_stat(&path).is_some() { return NotifAction::ReturnValue(0); } @@ -611,6 +645,7 @@ pub(crate) async fn handle_cow_utimensat( Some(c) => c, None => return NotifAction::Continue, }; + let path = map_cow_upper_path(cow, &path); if !cow.matches(&path) { return NotifAction::Continue; } @@ -681,6 +716,7 @@ pub(crate) async fn handle_cow_stat( None => return NotifAction::Continue, }; + let path = map_cow_upper_path(cow, &path); if !cow.has_changes() || !cow.matches(&path) { return NotifAction::Continue; } @@ -754,6 +790,7 @@ pub(crate) async fn handle_cow_statx( None => return NotifAction::Continue, }; + let path = map_cow_upper_path(cow, &path); if !cow.has_changes() || !cow.matches(&path) { return NotifAction::Continue; } @@ -789,6 +826,7 @@ pub(crate) async fn handle_cow_readlink( None => return NotifAction::Continue, }; + let path = map_cow_upper_path(cow, &path); if !cow.has_changes() || !cow.matches(&path) { return NotifAction::Continue; } @@ -962,6 +1000,7 @@ pub(crate) async fn handle_cow_chdir( None => return NotifAction::Continue, }; + let abs_path = map_cow_upper_path(cow, &abs_path); if !cow.matches(&abs_path) { return NotifAction::Continue; } diff --git a/crates/sandlock-core/tests/integration/test_cow.rs b/crates/sandlock-core/tests/integration/test_cow.rs index 2ff4740..243f41b 100644 --- a/crates/sandlock-core/tests/integration/test_cow.rs +++ b/crates/sandlock-core/tests/integration/test_cow.rs @@ -319,10 +319,18 @@ async fn test_seccomp_cow_chdir_to_created_dir() { .build() .unwrap(); - // mkdir creates the dir in COW upper only; cd must see it via interception. + // Create a nested directory through a dirfd so the COW handler must map the + // upper-layer fd target back to the logical workdir before mkdirat. // Use physical pwd so the assertion covers getcwd virtualization. let script = format!( - "mkdir -p subdir/deep && cd subdir/deep && pwd -P > {}", + concat!( + "mkdir -p subdir && python3 -c \"", + "import os; ", + "fd = os.open('subdir', os.O_RDONLY | os.O_DIRECTORY); ", + "os.mkdir('deep', dir_fd=fd); ", + "os.close(fd)\" && ", + "cd subdir/deep && pwd -P > {}" + ), out_file.display() ); let result = Sandbox::run(&policy, &["sh", "-c", &script]).await; From 83213a0ba42409f8b11b449ab86c4e8dccdedaa2 Mon Sep 17 00:00:00 2001 From: gokwok <531504879@qq.com> Date: Wed, 22 Apr 2026 18:55:55 +0800 Subject: [PATCH 11/11] fix: key COW process state by pid identity Signed-off-by: gokwok <531504879@qq.com> --- crates/sandlock-core/src/cow/dispatch.rs | 82 ++++++++++++++++------- crates/sandlock-core/src/seccomp/state.rs | 46 ++++++++++++- 2 files changed, 101 insertions(+), 27 deletions(-) diff --git a/crates/sandlock-core/src/cow/dispatch.rs b/crates/sandlock-core/src/cow/dispatch.rs index ea24349..a183f46 100644 --- a/crates/sandlock-core/src/cow/dispatch.rs +++ b/crates/sandlock-core/src/cow/dispatch.rs @@ -13,7 +13,7 @@ use crate::arch; use crate::cow::seccomp::SeccompCowBranch; use crate::procfs::{build_dirent64, DT_DIR, DT_LNK, DT_REG}; use crate::seccomp::notif::{read_child_mem, write_child_mem, NotifAction}; -use crate::seccomp::state::CowState; +use crate::seccomp::state::{CowState, PidKey}; use crate::sys::structs::SeccompNotif; /// Read a NUL-terminated path from child memory (up to 4096 bytes for filesystem paths). @@ -100,6 +100,29 @@ fn map_cow_upper_path(cow: &SeccompCowBranch, path: &str) -> String { normalize_path(path).to_string_lossy().into_owned() } +fn read_pid_start_time(pid: u32) -> Option { + let stat = std::fs::read_to_string(format!("/proc/{}/stat", pid)).ok()?; + let rest = stat.rsplit_once(") ")?.1; + // starttime is field 22; after "pid (comm)" the first token is field 3. + rest.split_whitespace().nth(19)?.parse().ok() +} + +fn cow_pid_key(pid: u32) -> Option { + Some(PidKey { + pid: i32::try_from(pid).ok()?, + start_time: read_pid_start_time(pid)?, + }) +} + +fn current_virtual_cwd(st: &mut CowState, pid: u32) -> Option { + if st.virtual_cwds.is_empty() { + return None; + } + let pid_key = cow_pid_key(pid)?; + st.prune_reused_pid(pid_key); + st.virtual_cwds.get(&pid_key).cloned() +} + // ============================================================ // openat handler // ============================================================ @@ -128,8 +151,8 @@ pub(crate) async fn handle_cow_open( None => return NotifAction::Continue, }; let virtual_cwd = if (dirfd as i32) == libc::AT_FDCWD && !Path::new(&rel_path).is_absolute() { - let st = cow_state.lock().await; - st.virtual_cwds.get(&(notif.pid as i32)).cloned() + let mut st = cow_state.lock().await; + current_virtual_cwd(&mut st, notif.pid) } else { None }; @@ -456,8 +479,8 @@ pub(crate) async fn handle_cow_write( notif_fd: RawFd, ) -> NotifAction { let virtual_cwd = { - let st = cow_state.lock().await; - st.virtual_cwds.get(&(notif.pid as i32)).cloned() + let mut st = cow_state.lock().await; + current_virtual_cwd(&mut st, notif.pid) }; let mut op = match parse_cow_write(notif, notif_fd, virtual_cwd.as_deref()) { Some(op) => op, @@ -558,8 +581,8 @@ pub(crate) async fn handle_cow_access( // faccessat(dirfd, pathname, mode, flags): args[0]=dirfd, args[1]=path, args[2]=mode let (path, mode) = if Some(nr) == arch::SYS_ACCESS { let virtual_cwd = { - let st = cow_state.lock().await; - st.virtual_cwds.get(&(notif.pid as i32)).cloned() + let mut st = cow_state.lock().await; + current_virtual_cwd(&mut st, notif.pid) }; let p = match read_path(notif, notif.data.args[0], notif_fd) { Some(p) => resolve_at_path_with_virtual( @@ -574,8 +597,8 @@ pub(crate) async fn handle_cow_access( } else { let dirfd = notif.data.args[0] as i64; let virtual_cwd = { - let st = cow_state.lock().await; - st.virtual_cwds.get(&(notif.pid as i32)).cloned() + let mut st = cow_state.lock().await; + current_virtual_cwd(&mut st, notif.pid) }; let p = match read_path(notif, notif.data.args[1], notif_fd) { Some(p) => resolve_at_path_with_virtual(notif, dirfd, &p, virtual_cwd.as_deref()), @@ -631,8 +654,8 @@ pub(crate) async fn handle_cow_utimensat( } let virtual_cwd = { - let st = cow_state.lock().await; - st.virtual_cwds.get(&(notif.pid as i32)).cloned() + let mut st = cow_state.lock().await; + current_virtual_cwd(&mut st, notif.pid) }; let path = match read_path(notif, path_ptr, notif_fd) { Some(p) => resolve_at_path_with_virtual(notif, dirfd, &p, virtual_cwd.as_deref()), @@ -702,8 +725,8 @@ pub(crate) async fn handle_cow_stat( // faccessat(dirfd, pathname, mode, flags) let dirfd = notif.data.args[0] as i64; let virtual_cwd = { - let st = cow_state.lock().await; - st.virtual_cwds.get(&(notif.pid as i32)).cloned() + let mut st = cow_state.lock().await; + current_virtual_cwd(&mut st, notif.pid) }; let path = match read_path(notif, notif.data.args[1], notif_fd) { Some(p) => resolve_at_path_with_virtual(notif, dirfd, &p, virtual_cwd.as_deref()), @@ -776,8 +799,8 @@ pub(crate) async fn handle_cow_statx( // statx(dirfd, pathname, flags, mask, statxbuf) let dirfd = notif.data.args[0] as i64; let virtual_cwd = { - let st = cow_state.lock().await; - st.virtual_cwds.get(&(notif.pid as i32)).cloned() + let mut st = cow_state.lock().await; + current_virtual_cwd(&mut st, notif.pid) }; let path = match read_path(notif, notif.data.args[1], notif_fd) { Some(p) => resolve_at_path_with_virtual(notif, dirfd, &p, virtual_cwd.as_deref()), @@ -810,8 +833,8 @@ pub(crate) async fn handle_cow_readlink( // readlinkat(dirfd, pathname, buf, bufsiz) let dirfd = notif.data.args[0] as i64; let virtual_cwd = { - let st = cow_state.lock().await; - st.virtual_cwds.get(&(notif.pid as i32)).cloned() + let mut st = cow_state.lock().await; + current_virtual_cwd(&mut st, notif.pid) }; let path = match read_path(notif, notif.data.args[1], notif_fd) { Some(p) => resolve_at_path_with_virtual(notif, dirfd, &p, virtual_cwd.as_deref()), @@ -858,6 +881,10 @@ pub(crate) async fn handle_cow_getdents( let child_fd = (notif.data.args[0] & 0xFFFFFFFF) as u32; let buf_addr = notif.data.args[1]; let buf_size = (notif.data.args[2] & 0xFFFFFFFF) as usize; + let pid_key = match cow_pid_key(pid) { + Some(key) => key, + None => return NotifAction::Continue, + }; // Check if fd points to a COW-managed directory let link_path = format!("/proc/{}/fd/{}", pid, child_fd); @@ -867,6 +894,7 @@ pub(crate) async fn handle_cow_getdents( }; let mut st = cow_state.lock().await; + st.prune_reused_pid(pid_key); let cow = match st.branch.as_ref() { Some(c) => c, None => return NotifAction::Continue, @@ -891,7 +919,7 @@ pub(crate) async fn handle_cow_getdents( }; // Build cache on first call; invalidate if fd was reused for a different dir. - let cache_key = (pid as i32, child_fd); + let cache_key = (pid_key, child_fd); if let Some((cached_target, entries)) = st.dir_cache.get(&cache_key) { if *cached_target != target { // fd reused for a different directory — rebuild. @@ -987,8 +1015,8 @@ pub(crate) async fn handle_cow_chdir( }; let orig_path_buf_len = path.len() + 1; // NUL-terminated size in child memory - let st = cow_state.lock().await; - let virtual_cwd = st.virtual_cwds.get(&(notif.pid as i32)).cloned(); + let mut st = cow_state.lock().await; + let virtual_cwd = current_virtual_cwd(&mut st, notif.pid); let abs_path = resolve_at_path_with_virtual( notif, libc::AT_FDCWD as i64, @@ -1068,8 +1096,11 @@ pub(crate) async fn handle_cow_chdir( return NotifAction::Errno(libc::EFAULT); } - let mut st = cow_state.lock().await; - st.virtual_cwds.insert(notif.pid as i32, abs_path); + if let Some(pid_key) = cow_pid_key(notif.pid) { + let mut st = cow_state.lock().await; + st.prune_reused_pid(pid_key); + st.virtual_cwds.insert(pid_key, abs_path); + } NotifAction::Continue } @@ -1083,14 +1114,15 @@ pub(crate) async fn handle_cow_getcwd( let buf_addr = notif.data.args[0]; let buf_size = (notif.data.args[1] & 0xFFFF_FFFF) as usize; - let st = cow_state.lock().await; + let mut st = cow_state.lock().await; + let cached_virtual_cwd = current_virtual_cwd(&mut st, notif.pid); let cow = match st.branch.as_ref() { Some(c) => c, None => return NotifAction::Continue, }; - let virtual_cwd = if let Some(cwd) = st.virtual_cwds.get(&(notif.pid as i32)) { - cwd.clone() + let virtual_cwd = if let Some(cwd) = cached_virtual_cwd { + cwd } else { let cwd = match std::fs::read_link(format!("/proc/{}/cwd", notif.pid)) { Ok(c) => c, diff --git a/crates/sandlock-core/src/seccomp/state.rs b/crates/sandlock-core/src/seccomp/state.rs index 40ba92c..b9058e2 100644 --- a/crates/sandlock-core/src/seccomp/state.rs +++ b/crates/sandlock-core/src/seccomp/state.rs @@ -71,15 +71,24 @@ impl ProcfsState { // CowState — copy-on-write filesystem state // ============================================================ +/// Stable process identity for per-process COW state. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct PidKey { + /// Numeric PID observed by seccomp notification. + pub pid: i32, + /// Process start time from /proc//stat field 22. + pub start_time: u64, +} + /// Copy-on-write filesystem state. pub struct CowState { /// Seccomp-based COW branch (None if COW disabled). pub branch: Option, /// Getdents cache for COW directories. /// Value is (host_path, entries) to detect fd reuse and invalidate stale entries. - pub dir_cache: HashMap<(i32, u32), (String, Vec>)>, + pub dir_cache: HashMap<(PidKey, u32), (String, Vec>)>, /// Logical cwd for processes that chdir into COW-only directories. - pub virtual_cwds: HashMap, + pub virtual_cwds: HashMap, } impl CowState { @@ -90,6 +99,39 @@ impl CowState { virtual_cwds: HashMap::new(), } } + + /// Drop COW per-process entries for an older process that used the same numeric PID. + pub(crate) fn prune_reused_pid(&mut self, current: PidKey) { + self.virtual_cwds + .retain(|key, _| key.pid != current.pid || *key == current); + self.dir_cache + .retain(|(key, _), _| key.pid != current.pid || *key == current); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn cow_state_prunes_entries_for_reused_pid() { + let old = PidKey { pid: 42, start_time: 1 }; + let current = PidKey { pid: 42, start_time: 2 }; + let other = PidKey { pid: 43, start_time: 1 }; + let mut state = CowState::new(); + + state.virtual_cwds.insert(old, "/old".to_string()); + state.virtual_cwds.insert(other, "/other".to_string()); + state.dir_cache.insert((old, 7), ("/old".to_string(), Vec::new())); + state.dir_cache.insert((other, 7), ("/other".to_string(), Vec::new())); + + state.prune_reused_pid(current); + + assert!(!state.virtual_cwds.contains_key(&old)); + assert!(!state.dir_cache.contains_key(&(old, 7))); + assert_eq!(state.virtual_cwds.get(&other), Some(&"/other".to_string())); + assert!(state.dir_cache.contains_key(&(other, 7))); + } } // ============================================================