diff --git a/Cargo.lock b/Cargo.lock index ad0c763ca..138a14b97 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1531,12 +1531,14 @@ dependencies = [ name = "litebox_platform_linux_userland" version = "0.1.0" dependencies = [ + "arrayvec", "cfg-if", "getrandom 0.3.4", "libc", "litebox", "litebox_common_linux", "litebox_common_optee", + "seccompiler", "spin 0.9.8", "syscalls", "zerocopy", @@ -2521,6 +2523,15 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "seccompiler" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4ae55de56877481d112a559bbc12667635fdaf5e005712fd4e2b2fa50ffc884" +dependencies = [ + "libc", +] + [[package]] name = "security-framework" version = "2.11.1" diff --git a/litebox_platform_linux_userland/Cargo.toml b/litebox_platform_linux_userland/Cargo.toml index 1c4d96ed6..a4e5ef9a2 100644 --- a/litebox_platform_linux_userland/Cargo.toml +++ b/litebox_platform_linux_userland/Cargo.toml @@ -4,6 +4,7 @@ version = "0.1.0" edition = "2024" [dependencies] +arrayvec = { version = "0.7.6", default-features = false } cfg-if = "1.0.0" getrandom = "0.3.4" libc = { version = "0.2.169", default-features = false } @@ -13,6 +14,7 @@ litebox_common_optee = { path = "../litebox_common_optee", version = "0.1.0", de spin = "0.9.8" syscalls = { version = "0.6", default-features = false } zerocopy = { version = "0.8", default-features = false } +seccompiler = { version = "0.5.0" } [features] default = ["linux_syscall"] diff --git a/litebox_platform_linux_userland/src/lib.rs b/litebox_platform_linux_userland/src/lib.rs index a1ccf7fdc..a88432c90 100644 --- a/litebox_platform_linux_userland/src/lib.rs +++ b/litebox_platform_linux_userland/src/lib.rs @@ -412,6 +412,98 @@ impl LinuxUserland { ) }; } + + #[cfg(target_arch = "x86_64")] + #[allow( + clippy::missing_panics_doc, + reason = "the seccomp filter rules are hardcoded and not expected to fail" + )] + pub fn enable_seccomp_filter() { + use seccompiler::{ + BpfProgram, SeccompAction, SeccompCmpArgLen, SeccompCmpOp, SeccompCondition, + SeccompFilter, SeccompRule, + }; + + let rules = vec![ + // TUN and terminal + (libc::SYS_read, vec![]), + (libc::SYS_write, vec![]), + (libc::SYS_poll, vec![]), + // memory management + (libc::SYS_mmap, vec![]), + (libc::SYS_mprotect, vec![]), + (libc::SYS_munmap, vec![]), + (libc::SYS_mremap, vec![]), + // signal + (libc::SYS_rt_sigreturn, vec![]), + (libc::SYS_sigaltstack, vec![]), + (libc::SYS_tgkill, vec![]), + (libc::SYS_timer_create, vec![]), + (libc::SYS_timer_settime, vec![]), + (libc::SYS_timer_delete, vec![]), + // called by [pthread_create](https://codebrowser.dev/glibc/glibc/nptl/pthread_create.c.html#83) to set up signal handler + // to support setuid et.al. functions (which we probably don't need, but include them in debug mode to suppress the warnings + // about missing seccomp rules for these syscalls). + #[cfg(debug_assertions)] + (libc::SYS_rt_sigaction, vec![]), + // TODO: also called by `next_signal_handler`, but I'm not sure if it's really needed. + (libc::SYS_rt_sigprocmask, vec![]), + // thread management + (libc::SYS_exit, vec![]), + (libc::SYS_exit_group, vec![]), + (libc::SYS_clone3, vec![]), + // sync + (libc::SYS_futex, vec![]), + // misc + (libc::SYS_getrandom, vec![]), + // required by std spawn + (libc::SYS_rseq, vec![]), + (libc::SYS_set_robust_list, vec![]), + (libc::SYS_get_robust_list, vec![]), + (libc::SYS_sched_getaffinity, vec![]), + (libc::SYS_gettid, vec![]), + (libc::SYS_madvise, vec![]), + // required by libc allocator + (libc::SYS_brk, vec![]), + (libc::SYS_getpid, vec![]), + // TODO: could be removed if we pre-open files (see `try_allocate_cow_pages`) + ( + libc::SYS_open, + vec![ + SeccompRule::new(vec![ + SeccompCondition::new( + 1, + SeccompCmpArgLen::Dword, + SeccompCmpOp::Eq, + u64::from(OFlags::RDONLY.bits()), + ) + .unwrap(), + ]) + .unwrap(), + ], + ), + (libc::SYS_close, vec![]), + ]; + let rule_map: std::collections::BTreeMap> = + rules.into_iter().collect(); + let filter = SeccompFilter::new( + rule_map, + // In debug builds, log violations instead of silently returning an error so that + // it won't fail silently during development (which may be hard to debug). + if cfg!(debug_assertions) { + SeccompAction::Trap + } else { + SeccompAction::Errno(libc::EINVAL.cast_unsigned()) + }, + SeccompAction::Allow, + seccompiler::TargetArch::x86_64, + ) + .unwrap(); + // TODO: bpf program can be compiled offline + let bpf_prog: BpfProgram = filter.try_into().unwrap(); + + seccompiler::apply_filter(&bpf_prog).unwrap(); + } } impl litebox::platform::Provider for LinuxUserland {} @@ -1753,6 +1845,9 @@ fn register_exception_handlers() { libc::SIGFPE, libc::SIGILL, libc::SIGTRAP, + // We'd like to log forbidden syscalls in debug mode + #[cfg(debug_assertions)] + libc::SIGSYS, ]; for &sig in exception_signals { unsafe { @@ -1987,6 +2082,37 @@ unsafe extern "C" fn exception_signal_handler( info: &mut libc::siginfo_t, context: &mut libc::ucontext_t, ) { + // Return an error code for the syscall and log it in debug mode. + #[cfg(debug_assertions)] + if signum == libc::SIGSYS { + use core::fmt::Write as _; + #[cfg(target_arch = "x86_64")] + let eax_idx = libc::REG_RAX as usize; + let sysno = context.uc_mcontext.gregs[eax_idx]; + context.uc_mcontext.gregs[eax_idx] = i64::from(-libc::EINVAL); + // Signal-safe: format on the stack via arrayvec (no heap allocation). + let mut buf = arrayvec::ArrayString::<320>::new(); + if sysno == libc::SYS_openat { + #[cfg(target_arch = "x86_64")] + let rsi = context.uc_mcontext.gregs[libc::REG_RSI as usize] as *const i8; + let c_path = unsafe { core::ffi::CStr::from_ptr(rsi) }; + // libc may call `openat` for certain files that we can ignore, e.g., /proc/sys/vm/overcommit_memory. + // Log the paths in case we need to allow some of them in the future. + let _ = writeln!(buf, "INFO: openat with {c_path:?} is not allowed"); + } else { + let _ = writeln!(buf, "WARNING: disallowed syscall invoked: {sysno}"); + } + let _ = unsafe { + syscalls::syscall3( + syscalls::Sysno::write, + libc::STDERR_FILENO as usize, + buf.as_ptr() as usize, + buf.len(), + ) + }; + return; + } + let Some(regs) = signal_handler_exit_guest(context, false) else { return unsafe { next_signal_handler(signum, info, context) }; }; @@ -2277,7 +2403,7 @@ mod tests { use core::sync::atomic::AtomicU32; use std::thread::sleep; - use litebox::platform::RawMutex; + use litebox::{fs::OFlags, platform::RawMutex}; use crate::LinuxUserland; use litebox::platform::PageManagementProvider; @@ -2316,4 +2442,35 @@ mod tests { prev = page.end; } } + + #[test] + fn test_seccomp_filter() { + let _platform: &LinuxUserland = LinuxUserland::new(None); + LinuxUserland::enable_seccomp_filter(); + + let pathname = c"/tmp/test_seccomp"; + let mkdir_res = unsafe { + syscalls::syscall2(syscalls::Sysno::mkdir, pathname.as_ptr() as usize, 0o755) + }; + assert_eq!( + mkdir_res.unwrap_err(), + syscalls::Errno::EINVAL, + "mkdir should be blocked by seccomp filter" + ); + + let pathname = + std::ffi::CString::new(format!("{}/Cargo.toml", env!("CARGO_MANIFEST_DIR"))).unwrap(); + let open_res = unsafe { + syscalls::syscall2( + syscalls::Sysno::open, + pathname.as_ptr() as usize, + OFlags::RDWR.bits() as usize, + ) + }; + assert_eq!( + open_res.unwrap_err(), + syscalls::Errno::EINVAL, + "open with RDWR should be blocked by seccomp filter" + ); + } } diff --git a/litebox_runner_linux_userland/src/lib.rs b/litebox_runner_linux_userland/src/lib.rs index 90b24878b..d0cfe6e2c 100644 --- a/litebox_runner_linux_userland/src/lib.rs +++ b/litebox_runner_linux_userland/src/lib.rs @@ -380,13 +380,12 @@ pub fn run(cli_args: CliArgs) -> Result<()> { envp }; - let program = shim.load_program( - initial_file_system, - platform.init_task(), - prog_path, - argv, - envp, - )?; + let task_params = platform.init_task(); + + #[cfg(target_arch = "x86_64")] + litebox_platform_linux_userland::LinuxUserland::enable_seccomp_filter(); + + let program = shim.load_program(initial_file_system, task_params, prog_path, argv, envp)?; #[cfg(feature = "lock_tracing")] litebox::sync::start_recording();