diff --git a/litebox_shim_linux/src/syscalls/file.rs b/litebox_shim_linux/src/syscalls/file.rs index 1d158fb05..def2ede9b 100644 --- a/litebox_shim_linux/src/syscalls/file.rs +++ b/litebox_shim_linux/src/syscalls/file.rs @@ -21,6 +21,7 @@ use litebox_common_linux::{ IoWriteVec, IoctlArg, TimeParam, errno::Errno, }; use litebox_platform_multiplex::Platform; +use thiserror::Error; use crate::{ConstPtr, GlobalState, MutPtr, ShimFS, Task}; use core::sync::atomic::{AtomicUsize, Ordering}; @@ -506,58 +507,106 @@ impl Task { } pub(crate) fn do_close(&self, raw_fd: usize) -> Result<(), Errno> { + self.do_close_and_replace::(raw_fd, None) + } + + /// Close the file at `raw_fd` and optionally place a new file in the same slot. + /// + /// This function ensure `close` and `insert` are done atomically. + fn do_close_and_replace( + &self, + raw_fd: usize, + replace: Option>, + ) -> Result<(), Errno> { + enum ConsumedFd { + Fs(alloc::sync::Arc>), + Network(alloc::sync::Arc>>), + Pipes(alloc::sync::Arc>>), + Eventfd(alloc::sync::Arc>), + Epoll(alloc::sync::Arc>>), + Unix(alloc::sync::Arc>>), + } + let files = self.files.borrow(); let mut rds = files.raw_descriptor_store.write(); - match rds.fd_consume_raw_integer(raw_fd) { - Ok(fd) => { - drop(rds); - return files.fs.close(&fd).map_err(Errno::from); - } + let consumed: ConsumedFd = match rds.fd_consume_raw_integer::(raw_fd) { + Ok(fd) => ConsumedFd::Fs(fd), Err(litebox::fd::ErrRawIntFd::NotFound) => { + if let Some(new_fd) = replace { + let success = rds.fd_into_specific_raw_integer(new_fd, raw_fd); + assert!(success, "raw_fd slot is empty, so insert must succeed"); + } return Err(Errno::EBADF); } Err(litebox::fd::ErrRawIntFd::InvalidSubsystem) => { - // fallthrough + if let Ok(fd) = + rds.fd_consume_raw_integer::>(raw_fd) + { + ConsumedFd::Network(fd) + } else if let Ok(fd) = + rds.fd_consume_raw_integer::>(raw_fd) + { + ConsumedFd::Pipes(fd) + } else if let Ok(fd) = + rds.fd_consume_raw_integer::(raw_fd) + { + ConsumedFd::Eventfd(fd) + } else if let Ok(fd) = + rds.fd_consume_raw_integer::>(raw_fd) + { + ConsumedFd::Epoll(fd) + } else if let Ok(fd) = + rds.fd_consume_raw_integer::>(raw_fd) + { + ConsumedFd::Unix(fd) + } else { + unreachable!("all subsystems covered") + } } + }; + + // Insert the replacement into the now-vacated slot while still holding the lock. + if let Some(new_fd) = replace { + let success = rds.fd_into_specific_raw_integer(new_fd, raw_fd); + assert!( + success, + "we just consumed this raw_fd, so it must be available" + ); } - if let Ok(fd) = rds.fd_consume_raw_integer(raw_fd) { - drop(rds); - return self.global.close_socket(&self.wait_cx(), fd); - } - if let Ok(fd) = rds.fd_consume_raw_integer(raw_fd) { - drop(rds); - return self.global.pipes.close(&fd).map_err(Errno::from); - } - if let Ok(fd) = rds.fd_consume_raw_integer::(raw_fd) { - drop(rds); - let entry = { - let mut dt = self.global.litebox.descriptor_table_mut(); - dt.remove(&fd) - }; - drop(entry); - return Ok(()); - } - if let Ok(fd) = rds.fd_consume_raw_integer::>(raw_fd) { - drop(rds); - let entry = { - let mut dt = self.global.litebox.descriptor_table_mut(); - dt.remove(&fd) - }; - drop(entry); - return Ok(()); - } - if let Ok(fd) = rds.fd_consume_raw_integer::>(raw_fd) { - drop(rds); - let entry = { - let mut dt = self.global.litebox.descriptor_table_mut(); - dt.remove(&fd) - }; - drop(entry); - return Ok(()); + drop(rds); + + match consumed { + ConsumedFd::Fs(fd) => files.fs.close(&fd).map_err(Errno::from), + ConsumedFd::Network(fd) => self.global.close_socket(&self.wait_cx(), fd), + ConsumedFd::Pipes(fd) => self.global.pipes.close(&fd).map_err(Errno::from), + ConsumedFd::Eventfd(fd) => { + let entry = { + let mut dt = self.global.litebox.descriptor_table_mut(); + dt.remove(&fd) + }; + // do not hold any locks while dropping the entry + drop(entry); + Ok(()) + } + ConsumedFd::Epoll(fd) => { + let entry = { + let mut dt = self.global.litebox.descriptor_table_mut(); + dt.remove(&fd) + }; + // do not hold any locks while dropping the entry + drop(entry); + Ok(()) + } + ConsumedFd::Unix(fd) => { + let entry = { + let mut dt = self.global.litebox.descriptor_table_mut(); + dt.remove(&fd) + }; + // do not hold any locks while dropping the entry + drop(entry); + Ok(()) + } } - // All the above cases should cover all the known subsystems, and we've already - // early-handled the "raw FD not found" case. - unreachable!() } /// Handle syscall `close` @@ -1218,22 +1267,21 @@ impl Task { .flatten() } FcntlArg::DUPFD { cloexec, min_fd } => { - let max_fd = self - .process() - .limits - .get_rlimit_cur(litebox_common_linux::RlimitResource::NOFILE); - if min_fd as usize >= max_fd { - return Err(Errno::EINVAL); - } - let new_file = self.do_dup_inner( - desc, - if cloexec { - OFlags::CLOEXEC - } else { - OFlags::empty() - }, - DupFdRequest::LowestAtOrAbove(min_fd as usize), - )?; + let new_file = self + .do_dup_inner( + desc, + if cloexec { + OFlags::CLOEXEC + } else { + OFlags::empty() + }, + DupFdRequest::LowestAtOrAbove(min_fd as usize), + ) + .map_err(|e| match e { + DupFdError::BadFd => Errno::EBADF, + DupFdError::TooManyFiles => Errno::EMFILE, + DupFdError::TargetFdExceedsLimit => Errno::EINVAL, + })?; Ok(new_file.try_into().unwrap()) } _ => unimplemented!(), @@ -1938,7 +1986,7 @@ impl Task { Ok(count) } - fn do_dup(&self, file: usize, flags: OFlags) -> Result { + fn do_dup(&self, file: usize, flags: OFlags) -> Result { self.do_dup_inner(file, flags, DupFdRequest::LowestAvailable) } @@ -1947,30 +1995,32 @@ impl Task { file: usize, flags: OFlags, target: DupFdRequest, - ) -> Result { + ) -> Result { fn dup( - global: &GlobalState, + task: &Task, files: &FilesState, fd: &TypedFd, close_on_exec: bool, target: DupFdRequest, - ) -> Result { - let mut dt = global.litebox.descriptor_table_mut(); - let fd: TypedFd<_> = dt.duplicate(fd).ok_or(Errno::EBADF)?; + ) -> Result { + let mut dt = task.global.litebox.descriptor_table_mut(); + let fd: TypedFd<_> = dt.duplicate(fd).ok_or(DupFdError::BadFd)?; if close_on_exec { let old = dt.set_fd_metadata(&fd, FileDescriptorFlags::FD_CLOEXEC); assert!(old.is_none()); } - let mut rds = files.raw_descriptor_store.write(); + drop(dt); match target { DupFdRequest::Exact(target) => { - if !rds.fd_into_specific_raw_integer(fd, target) { - return Err(Errno::EBADF); - } + let _ = task.do_close_and_replace(target, Some(fd)); Ok(target) } - DupFdRequest::LowestAvailable => Ok(rds.fd_into_raw_integer(fd)), + DupFdRequest::LowestAvailable => { + let rds = &mut *files.raw_descriptor_store.write(); + Ok(rds.fd_into_raw_integer(fd)) + } DupFdRequest::LowestAtOrAbove(min_fd) => { + let rds = &mut *files.raw_descriptor_store.write(); let mut raw_fd = min_fd; for occupied_raw_fd in rds.iter_alive().skip_while(|&fd| fd < min_fd) { if occupied_raw_fd != raw_fd { @@ -1984,29 +2034,35 @@ impl Task { } } } + let max_fd = self + .process() + .limits + .get_rlimit_cur(litebox_common_linux::RlimitResource::NOFILE); + match target { + DupFdRequest::Exact(fd) if fd >= max_fd => { + return Err(DupFdError::TargetFdExceedsLimit); + } + DupFdRequest::LowestAtOrAbove(min_fd) if min_fd >= max_fd => { + return Err(DupFdError::TargetFdExceedsLimit); + } + _ => {} + } let close_on_exec = flags.contains(OFlags::CLOEXEC); let files = self.files.borrow(); - let new_fd = files.run_on_raw_fd( - file, - |fd| dup(&self.global, &files, fd, close_on_exec, target), - |fd| dup(&self.global, &files, fd, close_on_exec, target), - |fd| dup(&self.global, &files, fd, close_on_exec, target), - |fd| dup(&self.global, &files, fd, close_on_exec, target), - |fd| dup(&self.global, &files, fd, close_on_exec, target), - |fd| dup(&self.global, &files, fd, close_on_exec, target), - )??; - if matches!( - target, - DupFdRequest::LowestAvailable | DupFdRequest::LowestAtOrAbove(_) - ) { - let max_fd = self - .process() - .limits - .get_rlimit_cur(litebox_common_linux::RlimitResource::NOFILE); - if new_fd >= max_fd { - self.do_close(new_fd)?; - return Err(Errno::EMFILE); - } + let new_fd = files + .run_on_raw_fd( + file, + |fd| dup(self, &files, fd, close_on_exec, target), + |fd| dup(self, &files, fd, close_on_exec, target), + |fd| dup(self, &files, fd, close_on_exec, target), + |fd| dup(self, &files, fd, close_on_exec, target), + |fd| dup(self, &files, fd, close_on_exec, target), + |fd| dup(self, &files, fd, close_on_exec, target), + ) + .map_err(|_| DupFdError::BadFd)??; + if new_fd >= max_fd { + let _ = self.do_close(new_fd); + return Err(DupFdError::TooManyFiles); } Ok(new_fd) } @@ -2051,20 +2107,21 @@ impl Task { Ok(oldfd) }; } - // Close whatever is at newfd before duping into it let newfd_usize = usize::try_from(newfd).or(Err(Errno::EBADF))?; - let _ = self.do_close(newfd_usize); self.do_dup_inner( oldfd_usize, flags.unwrap_or(OFlags::empty()), DupFdRequest::Exact(newfd_usize), - )?; - Ok(newfd) + ) } else { // dup - let new_file = self.do_dup(oldfd_usize, flags.unwrap_or(OFlags::empty()))?; - Ok(u32::try_from(new_file).unwrap()) + self.do_dup(oldfd_usize, flags.unwrap_or(OFlags::empty())) } + .map_err(|e| match e { + DupFdError::BadFd | DupFdError::TargetFdExceedsLimit => Errno::EBADF, + DupFdError::TooManyFiles => Errno::EMFILE, + }) + .map(|new_fd| u32::try_from(new_fd).unwrap()) } } @@ -2072,9 +2129,20 @@ impl Task { enum DupFdRequest { LowestAvailable, LowestAtOrAbove(usize), + /// Duplicate to the specified fd, closing it first if it's open. Exact(usize), } +#[derive(Error, Debug)] +enum DupFdError { + #[error("Bad file descriptor")] + BadFd, + #[error("Too many open files")] + TooManyFiles, + #[error("Target fd exceeds process limit")] + TargetFdExceedsLimit, +} + #[derive(Clone, Copy, Debug, Default)] struct Diroff(usize);